SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
0: MOV OUT[0], IN[0]
1: MOV OUT[1], IN[1]
2: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = add i32 %5, %7
%14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
%15 = extractelement <4 x float> %14, i32 0
%16 = extractelement <4 x float> %14, i32 1
%17 = extractelement <4 x float> %14, i32 2
%18 = extractelement <4 x float> %14, i32 3
%19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
%21 = add i32 %5, %7
%22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
%23 = extractelement <4 x float> %22, i32 0
%24 = extractelement <4 x float> %22, i32 1
%25 = extractelement <4 x float> %22, i32 2
%26 = extractelement <4 x float> %22, i32 3
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18)
ret void
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100
buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500
s_waitcnt vmcnt(0) ; BF8C0770
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 12
Code Size: 56 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
0: TEX OUT[0], IN[0], SAMP[0], 2D
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11)
%28 = bitcast float %26 to i32
%29 = bitcast float %27 to i32
%30 = insertelement <2 x i32> undef, i32 %28, i32 0
%31 = insertelement <2 x i32> %30, i32 %29, i32 1
%32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = call i32 @llvm.SI.packf16(float %33, float %34)
%38 = bitcast i32 %37 to float
%39 = call i32 @llvm.SI.packf16(float %35, float %36)
%40 = bitcast i32 %39 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002
s_waitcnt vmcnt(0) ; BF8C0770
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 68 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x0
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLAudioManager: Using ALSA: default
Begin MonoManager ReloadAssembly
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityEngine.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityEngine.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-CSharp-firstpass.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-CSharp-firstpass.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-CSharp.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-CSharp.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-UnityScript.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Assembly-UnityScript.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityEngine.UI.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityEngine.UI.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.ClipperLib.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.ClipperLib.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.Ionic.Zip.Reduced.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.Ionic.Zip.Reduced.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.JsonFx.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.JsonFx.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.Poly2Tri.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Pathfinding.Poly2Tri.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Jboy.Core.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Jboy.Core.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Jboy.dll (this message is harmless)
Loading /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Jboy.dll into Unity Child Domain
Platform assembly: /media/bigdata/games/steam/steamapps/common/PSHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
0: MOV OUT[0], IN[0]
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5)
%23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5)
%24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5)
%25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5)
%26 = call i32 @llvm.SI.packf16(float %22, float %23)
%27 = bitcast i32 %26 to float
%28 = call i32 @llvm.SI.packf16(float %24, float %25)
%29 = bitcast i32 %28 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
s_mov_b32 m0, s9 ; BEFC0309
v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202
v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
0: MOV OUT[0], IN[0]
1: MOV OUT[1], IN[1]
2: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = add i32 %5, %7
%14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13)
%15 = extractelement <4 x float> %14, i32 0
%16 = extractelement <4 x float> %14, i32 1
%17 = extractelement <4 x float> %14, i32 2
%18 = extractelement <4 x float> %14, i32 3
%19 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%20 = load <16 x i8>, <16 x i8> addrspace(2)* %19, align 16, !tbaa !0
%21 = add i32 %5, %7
%22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21)
%23 = extractelement <4 x float> %22, i32 0
%24 = extractelement <4 x float> %22, i32 1
%25 = extractelement <4 x float> %22, i32 2
%26 = extractelement <4 x float> %22, i32 3
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18)
ret void
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100
buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500
s_waitcnt vmcnt(0) ; BF8C0770
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 12
Code Size: 56 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
0: TEX OUT[0], IN[0], SAMP[0], 2D
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11requesting resize 1920 x 1080
requesting fullscreen 1920 x 1080 at 0 Hz
Desktop is 1920 x 1080 @ 60 Hz
Using libudev for joystick management
Importing game controller configs
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.Core.dll (this message is harmless)
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.dll (this message is harmless)
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.Xml.dll (this message is harmless)
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.Data.dll (this message is harmless)
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/UnityScript.Lang.dll (this message is harmless)
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/Boo.Lang.dll (this message is harmless)
Steam Version
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Resolution[1920X1080] fs[True]
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Platform assembly: /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Managed/System.Transactions.dll (this message is harmless)
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Connected to db
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libc.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libc.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libc.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libc.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libc.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/./libsqlite.0.dylib.so
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libsqlite.0.dylib
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/sqlite3
Fallback handler could not load library /media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_DatSetting breakpad minidump AppID = 237870
Steam_SetMinidumpSteamID: Caching Steam ID: 76561198118363848 [API loaded no]
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
0: MOV OUT[0], IN[0]
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5)
%23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5)
%24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5)
%25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5)
%26 = call i32 @llvm.SI.packf16(float %22, float %23)
%27 = bitcast i32 %26 to float
%28 = call i32 @llvm.SI.packf16(float %24, float %25)
%29 = bitcast i32 %28 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
s_mov_b32 m0, s9 ; BEFC0309
v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202
v_interp_mov_f32 v2, P0, 3, 0, [m0] ; C80A0302
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], COLOR
DCL OUT[2], TEXCOORD[0]
DCL CONST[0..3]
DCL TEMP[0]
0: MUL TEMP[0], IN[0].xxxx, CONST[0]
1: MAD TEMP[0], IN[0].yyyy, CONST[1], TEMP[0]
2: MAD TEMP[0], IN[0].zzzz, CONST[2], TEMP[0]
3: MAD OUT[0], IN[0].wwww, CONST[3], TEMP[0]
4: MOV_SAT OUT[1], IN[1]
5: MOV OUT[2], IN[2]
6: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = extractelement <4 x float> %40, i32 3
%45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0
%47 = add i32 %5, %7
%48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47)
%49 = extractelement <4 x float> %48, i32 0
%50 = extractelement <4 x float> %48, i32 1
%51 = extractelement <4 x float> %48, i32 2
%52 = extractelement <4 x float> %48, i32 3
%53 = fmul float %33, %13
%54 = fmul float %33, %14
%55 = fmul float %33, %15
%56 = fmul float %33, %16
%57 = fmul float %34, %17
%58 = fadd float %57, %53
%59 = fmul float %34, %18
%60 = fadd float %59, %54
%61 = fmul float %34, %19
%62 = fadd float %61, %55
%63 = fmul float %34, %20
%64 = fadd float %63, %56
%65 = fmul float %35, %21
%66 = fadd float %65, %58
%67 = fmul float %35, %22
%68 = fadd float %67, %60
%69 = fmul float %35, %23
%70 = fadd float %69, %62
%71 = fmul float %35, %24
%72 = fadd float %71, %64
%73 = fmul float %36, %25
%74 = fadd float %73, %66
%75 = fmul float %36, %26
%76 = fadd float %75, %68
%77 = fmul float %36, %27
%78 = fadd float %77, %70
%79 = fmul float %36, %28
%80 = fadd float %79, %72
%81 = call float @llvm.AMDIL.clamp.(float %41, float 0.000000e+00, float 1.000000e+00)
%82 = call float @llvm.AMDIL.clamp.(float %42, float 0.000000e+00, float 1.000000e+00)
%83 = call float @llvm.AMDIL.clamp.(float %43, float 0.000000e+00, float 1.000000e+00)
%84 = call float @llvm.AMDIL.clamp.(float %44, float 0.000000e+00, float 1.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %81, float %82, float %83, float %84)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %49, float %50, float %51, float %52)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %76, float %78, float %80)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
v_mac_f32_e32 v0, s8, v2 ; 3E000408
v_mul_f32_e32 v13, s5, v1 ; 101A0205
v_mac_f32_e32 v13, s9, v2 ; 3E1A0409
v_mul_f32_e32 v14, s6, v1 ; 101C0206
v_mac_f32_e32 v14, s10, v2 ; 3E1C040A
v_mul_f32_e32 v1, s7, v1 ; 10020207
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v13, s13, v3 ; 3E1A060D
v_mac_f32_e32 v14, s14, v3 ; 3E1C060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v13, s17, v4 ; 3E1A0811
v_mac_f32_e32 v14, s18, v4 ; 3E1C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_add_f32_e64 v2, 0, v5 clamp ; D2060802 00020A80
v_add_f32_e64 v3, 0, v6 clamp ; D2060803 00020C80
v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00requesting resize 1920 x 1080
requesting fullscreen 1920 x 1080 at 0 Hz
Desktop is 1920 x 1080 @ 60 Hz
MUTEX Created.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Changing real window size to 1920 x 1080
Unloading 6 Unused Serialized files (Serialized files now loaded: 0)
Unloading 107 unused Assets to reduce memory usage. Loaded Objects now: 23007.
Total: 85.608994 ms (FindLiveObjects: 2.482000 ms CreateObjectMapping: 1.400000 ms MarkObjects: 81.138000 ms DeleteObjects: 0.588000 ms)
Unloading 3 Unused Serialized files (Serialized files now loaded: 0)
UnloadTime: 2.956000 ms
Unloading 1 unused Assets to reduce memory usage. Loaded Objects now: 25899.
Total: 96.442001 ms (FindLiveObjects: 2.711000 ms CreateObjectMapping: 2.635000 ms MarkObjects: 90.958000 ms DeleteObjects: 0.138000 ms)
Changing real window size to 1920 x 1080
Changing real window size to 1920 x 1080
error version:3; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.PeGameSummary+Mgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:3; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:3; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:2; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:3; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:2; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:1; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:1; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:1; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:2; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:0; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:0; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
error version:0; need version:4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Unloading 3 Unused Serialized files (Serialized files now loaded: 0)
Unloading 0 unused Assets to reduce memory usage. Loaded Objects now: 26114.
Total: 93.065002 ms (FindLiveObjects: 3.224000 ms CreateObjectMapping: 1.868000 ms MarkObjects: 87.834999 ms DeleteObjects: 0.138000 ms)
Unloading 4 Unused Serialized files (Serialized files now loaded: 0)
WARNING: Shader Unsupported: 'Hidden/Dof/DX11Dof' - Setting to default shader.
UnloadTime: 8.448000 ms
-----------------------------clear singleton, count:6
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.ArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
temp path:/tmp/
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.SinglePlayerTypeArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
****************load saved adventure****************
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.PeGameSummary+Mgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
game summary
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.InitArchiveSummary, progress:0.03448276
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Unloading 47 unused Assets to reduce memory usage. Loaded Objects now: 23297.
Total: 87.361000 ms (FindLiveObjects: 1.398000 ms CreateObjectMapping: 1.325000 ms MarkObjects: 81.345001 ms DeleteObjects: 3.292000 ms)
-----------------------------register singleton:Pathea.PeCreature
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.MainPlayer
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.EntityMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:InputManager
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:ReputationSystem
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadReputation, progress:0.06896552
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadRandomItemMgr, progress:0.1034483
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.RandomMapConfigArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
SeedString:Planet MariaterrainHeight:128mapsize: 4, riverdensity: 1, riverwidth: 1
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Test: CT_Dry
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Test: waterHeight:8.5
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Test: c_fWaterLvl:8.5
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
VoxelPaintXMLParser: No sectionmap found.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadRandomTerrainParam, progress:0.137931
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 763040
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 1658970
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 1323810
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 753380
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 1183810
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 865620
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 6855300
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 2840080
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 454560
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 555180
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 881780
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 1379770
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 1898380
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
loadIso Time: 215400
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadRandomTown, progress:0.1724138
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
set player spawn pos:(-203.0, 36.6, -423.0)
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadAdventurePlayerSpawnPos, progress:0.2068966
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadCamera, progress:0.2413793
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.VArtifactTownArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:DetectedTownMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:PeMap.LabelMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.TownNpcArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.VABuildingArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.VoxelTerrainArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadRandomTerrainWithTown, progress:0.2758621
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
[VFDataRTGen]Error: Unrecognized voxel tile,discard the following data.[-2,-17,0,128]:0
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
[VFDataRTGen]Error: Unrecognized voxel tile,discard the following data.[-2,-17,0,128]:0
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.CreationDataArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadCreationData, progress:0.3103448
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.ItemAssetArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:ItemAsset.ItemMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:ItemAsset.ItemProto+Mgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Adjust befor:(-224.0, -4.0, -448.0)
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Adjust befor:(-224.0, 28.0, -448.0)
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Adjust befor:(-224.0, 60.0, -448.0)
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadItemAsset, progress:0.3448276
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadWaveSystem, progress:0.3793103
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.GrassDataSLArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadGrassRandom, progress:0.4137931
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadVETreeProtos, progress:0.4482759
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Creating RSubTerrainMgr!
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.RSubTerrSLArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadRandomTree, progress:0.4827586
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Coroutine RSubTerrCreator[2]::RefreshRegion started
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Coroutine RSubTerrCreator[1]::RefreshRegion started
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Coroutine RSubTerrCreator[0]::RefreshRegion started
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadEnvironment, progress:0.5172414
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.WorldInfoMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadWorldInfo, progress:0.5517241
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Railway.Manager
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadRailway, progress:0.5862069
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadEntityCreator, progress:0.6206896
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadCreature, progress:0.6551724
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
[Singleton] An instance of Behave.Runtime.BTLauncher is needed in the scene, so '(singleton) Behave.Runtime.BTLauncher (UnityEngine.GameObject)' was created with DontDestroyOnLoad.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:MousePicker
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadPathFinding, progress:0.6896552
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1560)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
-----------------------------register singleton:PeTipsMsgMan
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
-----------------------------register singleton:PeMap.MaskTile+Mgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadGUI, progress:0.7241379
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:DraggingMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.CSDataMgrArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
The CSClodsMgr is null.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadCSData, progress:0.7586207
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadFarm, progress:0.7931035
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
The CSCloMgr is areadly.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Still have this data inst.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Still have this data inst.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadColony, progress:0.8275862
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.UiHelpArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadUiHelp, progress:0.862069
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:PeMap.StaticPoint+Mgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:PeMap.UserLabel+Mgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
5 3
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
5 4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
5 5
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
6 3
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
6 4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
6 5
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
7 3
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
7 4
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
7 5
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadRandomMap, progress:0.8965517
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:UIBlockSaver
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.InitBuildManager, progress:0.9310345
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
[Singleton] An instance of ForceSetting is needed in the scene, so '(singleton) ForceSetting (UnityEngine.GameObject)' was created with DontDestroyOnLoad.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:MisRepositoryArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:NpcUserDataArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:EntityCreatedArchiveMgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadRandomStory, progress:0.9655172
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:PeMap.MapIcon+Mgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Pathea.GameLoader.LoadSingleAdventureInitData, progress:1
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
born_pos(-203.0, 36.6, -423.0)
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
-----------------------------register singleton:Pathea.NpcAbility+Mgr
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
The referenced script on this Behaviour is missing!
(Filename: Line: 1713)
Aim Transform unassigned in Aim IK solver. Please Assign a Transform (lineal descendant to the last bone in the spine) that you want to be aimed at IKPosition
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
Unloading 8 Unused Serialized files (Serialized files now loaded: 0)
Unloading 2011 unused Assets to reduce memory usage. Loaded Objects now: 129033.
Total: 267.863983 ms (FindLiveObjects: 18.658998 ms CreateObjectMapping: 7.872000 ms MarkObjects: 232.768005 ms DeleteObjects: 8.565001 ms)
[Singleton] An instance of MouseOpMgr is needed in the scene, so '(singleton) MouseOpMgr (UnityEngine.GameObject)' was created with DontDestroyOnLoad.
(Filename: /home/builduser/buildslave/unity/build/artifacts/generated/common/runtime/UnityEngineDebug.gen.cpp Line: 56)
ad.const(<16 x i8> %12, i32 112)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0
%47 = add i32 %5, %7
%48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47)
%49 = extractelement <4 x float> %48, i32 0
%50 = extractelement <4 x float> %48, i32 1
%51 = extractelement <4 x float> %48, i32 2
%52 = extractelement <4 x float> %48, i32 3
%53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0
%55 = add i32 %5, %7
%56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55)
%57 = extractelement <4 x float> %56, i32 0
%58 = extractelement <4 x float> %56, i32 1
%59 = extractelement <4 x float> %56, i32 2
%60 = extractelement <4 x float> %56, i32 3
%61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0
%63 = add i32 %5, %7
%64 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %62, i32 0, i32 %63)
%65 = extractelement <4 x float> %64, i32 0
%66 = extractelement <4 x float> %64, i32 1
%67 = extractelement <4 x float> %64, i32 2
%68 = extractelement <4 x float> %64, i32 3
%69 = fmul float %49, %13
%70 = fmul float %49, %14
%71 = fmul float %49, %15
%72 = fmul float %49, %16
%73 = fmul float %50, %17
%74 = fadd float %73, %69
%75 = fmul float %50, %18
%76 = fadd float %75, %70
%77 = fmul float %50, %19
%78 = fadd float %77, %71
%79 = fmul float %50, %20
%80 = fadd float %79, %72
%81 = fmul float %51, %21
%82 = fadd float %81, %74
%83 = fmul float %51, %22
%84 = fadd float %83, %76
%85 = fmul float %51, %23
%86 = fadd float %85, %78
%87 = fmul float %51, %24
%88 = fadd float %87, %80
%89 = fmul float %52, %25
%90 = fadd float %89, %82
%91 = fmul float %52, %26
%92 = fadd float %91, %84
%93 = fmul float %52, %27
%94 = fadd float %93, %86
%95 = fmul float %52, %28
%96 = fadd float %95, %88
%97 = call float @llvm.AMDIL.clamp.(float %57, float 0.000000e+00, float 1.000000e+00)
%98 = call float @llvm.AMDIL.clamp.(float %58, float 0.000000e+00, float 1.000000e+00)
%99 = call float @llvm.AMDIL.clamp.(float %59, float 0.000000e+00, float 1.000000e+00)
%100 = call float @llvm.AMDIL.clamp.(float %60, float 0.000000e+00, float 1.000000e+00)
%101 = fmul float %65, %29
%102 = fmul float %65, %30
%103 = fmul float %65, %31
%104 = fmul float %65, %32
%105 = fmul float %66, %33
%106 = fadd float %105, %101
%107 = fmul float %66, %34
%108 = fadd float %107, %102
%109 = fmul float %66, %35
%110 = fadd float %109, %103
%111 = fmul float %66, %36
%112 = fadd float %111, %104
%113 = fmul float %67, %37
%114 = fadd float %113, %106
%115 = fmul float %67, %38
%116 = fadd float %115, %108
%117 = fmul float %67, %39
%118 = fadd float %117, %110
%119 = fmul float %67, %40
%120 = fadd float %119, %112
%121 = fmul float %68, %41
%122 = fadd float %121, %114
%123 = fmul float %68, %42
%124 = fadd float %123, %116
%125 = fmul float %68, %43
%126 = fadd float %125, %118
%127 = fmul float %68, %44
%128 = fadd float %127, %120
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %97, float %98, float %99, float %100)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %122, float %124, float %126, float %128)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %90, float %92, float %94, float %96)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104
s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v13, s6, v1 ; 101A0206
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
v_mac_f32_e32 v13, s7, v2 ; 3E1A0407
v_mul_f32_e32 v14, s8, v1 ; 101C0208
v_mac_f32_e32 v14, s9, v2 ; 3E1C0409
s_buffer_load_dword s7, s[0:3], 0xa ; C203810A
s_buffer_load_dword s8, s[0:3], 0xb ; C204010B
s_buffer_load_dword s9, s[0:3], 0xc ; C204810C
s_buffer_load_dword s11, s[0:3], 0xd ; C205810D
s_buffer_load_dword s12, s[0:3], 0xe ; C206010E
v_mul_f32_e32 v1, s10, v1 ; 1002020A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s4, v2 ; 3E020404
v_mac_f32_e32 v0, s5, v3 ; 3E000605
v_mac_f32_e32 v13, s6, v3 ; 3E1A0606
s_buffer_load_dword s4, s[0:3], 0xf ; C202010F
v_mac_f32_e32 v14, s7, v3 ; 3E1C0607
v_mac_f32_e32 v1, s8, v3 ; 3E020608
v_mac_f32_e32 v0, s9, v4 ; 3E000809
v_mac_f32_e32 v13, s11, v4 ; 3E1A080B
v_mac_f32_e32 v14, s12, v4 ; 3E1C080C
s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110
s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111
s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112
s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s4, v4 ; 3E020804
s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114
s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115
s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116
s_buffer_load_dword s11, s[0:3], 0x17 ; C2058117
s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118
s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119
s_buffer_load_dword s14, s[0:3], 0x1a ; C207011A
s_buffer_load_dword s15, s[0:3], 0x1b ; C207811B
s_buffer_load_dword s16, s[0:3], 0x1c ; C208011C
s_buffer_load_dword s17, s[0:3], 0x1d ; C208811D
s_buffer_load_dword s18, s[0:3], 0x1e ; C209011E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
v_mul_f32_e32 v2, s5, v9 ; 10041205
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v2, s4, v10 ; 3E041404
v_mul_f32_e32 v3, s6, v9 ; 10061206
v_mac_f32_e32 v3, s9, v10 ; 3E061409
v_mul_f32_e32 v4, s7, v9 ; 10081207
v_mac_f32_e32 v4, s10, v10 ; 3E08140A
v_mul_f32_e32 v9, s8, v9 ; 10121208
v_mac_f32_e32 v9, s11, v10 ; 3E12140B
v_mac_f32_e32 v2, s12, v11 ; 3E04160C
v_mac_f32_e32 v3, s13, v11 ; 3E06160D
v_mac_f32_e32 v4, s14, v11 ; 3E08160E
v_mac_f32_e32 v9, s15, v11 ; 3E12160F
v_mac_f32_e32 v2, s16, v12 ; 3E041810
v_mac_f32_e32 v3, s17, v12 ; 3E061811
v_mac_f32_e32 v4, s18, v12 ; 3E081812
v_mac_f32_e32 v9, s0, v12 ; 3E121800
v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80
v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80
v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80
v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
exp 15, 33, 0, 0, 0, v2, v3, v4, v9 ; F800021F 09040302
exp 15, 12, 0, 1, 0, v0, v13, v14, v1 ; F80008CF 010E0D00
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 384 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL IN[0], COLOR, COLOR
DCL IN[1], TEXCOORD[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0], LOCAL
0: MOV TEMP[0].xy, IN[1].xyyy
1: MOV TEMP[0].w, IN[1].wwww
2: TXP TEMP[0], TEMP[0], SAMP[0], 2D
3: MUL TEMP[0], TEMP[0], IN[0]
4: MOV OUT[0], TEMP[0]
5: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%33 = fdiv float %30, %32
%34 = fdiv float %31, %32
%35 = bitcast float %33 to i32
%36 = bitcast float %34 to i32
%37 = insertelement <2 x i32> undef, i32 %35, i32 0
%38 = insertelement <2 x i32> %37, i32 %36, i32 1
%39 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %38, <32 x i8> %23, <16 x i8> %25, i32 2)
%40 = extractelement <4 x float> %39, i32 0
%41 = extractelement <4 x float> %39, i32 1
%42 = extractelement <4 x float> %39, i32 2
%43 = extractelement <4 x float> %39, i32 3
%44 = fmul float %40, %26
%45 = fmul float %41, %27
%46 = fmul float %42, %28
%47 = fmul float %43, %29
%48 = call i32 @llvm.SI.packf16(float %44, float %45)
%49 = bitcast i32 %48 to float
%50 = call i32 @llvm.SI.packf16(float %46, float %47)
%51 = bitcast i32 %50 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700
v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701
v_mov_b32_e32 v1, 0x6f800000 ; 7E0202FF 6F800000
v_cmp_gt_f32_e64 vcc, |v0|, v1 ; D008016A 00020300
v_mov_b32_e32 v1, 0x2f800000 ; 7E0202FF 2F800000
v_cndmask_b32_e32 v1, 1.0, v1 ; 000202F2
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_rcp_f32_e32 v0, v0 ; 7E005500
v_mul_f32_e32 v6, v0, v6 ; 100C0D00
v_mul_f32_e32 v0, v0, v7 ; 10000F00
v_mul_f32_e32 v6, v6, v1 ; 100C0306
v_mul_f32_e32 v7, v0, v1 ; 100E0300
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020606
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v0, v2, v6 ; 10000D02
v_mul_f32_e32 v1, v3, v7 ; 10020F03
v_mul_f32_e32 v2, v4, v8 ; 10041104
v_mul_f32_e32 v3, v5, v9 ; 10061305
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 176 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..7]
DCL TEMP[0..3], LOCAL
0: MOV TEMP[0].x, CONST[0].xxxx
1: MOV TEMP[0].y, CONST[1].xxxx
2: MOV TEMP[0].z, CONST[2].xxxx
3: MOV TEMP[1].x, CONST[0].yyyy
4: MOV TEMP[1].y, CONST[1].yyyy
5: MOV TEMP[1].z, CONST[2].yyyy
6: MOV TEMP[2].x, CONST[0].zzzz
7: MOV TEMP[2].y, CONST[1].zzzz
8: MOV TEMP[2].z, CONST[2].zzzz
9: MUL TEMP[3], CONST[4], IN[0].xxxx
10: MAD TEMP[3], CONST[5], IN[0].yyyy, TEMP[3]
11: MAD TEMP[3], CONST[6], IN[0].zzzz, TEMP[3]
12: MAD TEMP[3], CONST[7], IN[0].wwww, TEMP[3]
13: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[1].xxxx
14: MAD TEMP[0].xyz, TEMP[1].xyzz, IN[1].yyyy, TEMP[0].xyzz
15: MAD TEMP[0].xyz, TEMP[2].xyzz, IN[1].zzzz, TEMP[0].xyzz
16: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
17: RSQ TEMP[1].x, TEMP[1].xxxx
18: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
19: MOV OUT[1], TEMP[0]
20: MOV OUT[0], TEMP[3]
21: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%38 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0
%40 = add i32 %5, %7
%41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40)
%42 = extractelement <4 x float> %41, i32 0
%43 = extractelement <4 x float> %41, i32 1
%44 = extractelement <4 x float> %41, i32 2
%45 = extractelement <4 x float> %41, i32 3
%46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = add i32 %5, %7
%49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48)
%50 = extractelement <4 x float> %49, i32 0
%51 = extractelement <4 x float> %49, i32 1
%52 = extractelement <4 x float> %49, i32 2
%53 = fmul float %22, %42
%54 = fmul float %23, %42
%55 = fmul float %24, %42
%56 = fmul float %25, %42
%57 = fmul float %26, %43
%58 = fadd float %57, %53
%59 = fmul float %27, %43
%60 = fadd float %59, %54
%61 = fmul float %28, %43
%62 = fadd float %61, %55
%63 = fmul float %29, %43
%64 = fadd float %63, %56
%65 = fmul float %30, %44
%66 = fadd float %65, %58
%67 = fmul float %31, %44
%68 = fadd float %67, %60
%69 = fmul float %32, %44
%70 = fadd float %69, %62
%71 = fmul float %33, %44
%72 = fadd float %71, %64
%73 = fmul float %34, %45
%74 = fadd float %73, %66
%75 = fmul float %35, %45
%76 = fadd float %75, %68
%77 = fmul float %36, %45
%78 = fadd float %77, %70
%79 = fmul float %37, %45
%80 = fadd float %79, %72
%81 = fmul float %13, %50
%82 = fmul float %16, %50
%83 = fmul float %19, %50
%84 = fmul float %14, %51
%85 = fadd float %84, %81
%86 = fmul float %17, %51
%87 = fadd float %86, %82
%88 = fmul float %20, %51
%89 = fadd float %88, %83
%90 = fmul float %15, %52
%91 = fadd float %90, %85
%92 = fmul float %18, %52
%93 = fadd float %92, %87
%94 = fmul float %21, %52
%95 = fadd float %94, %89
%96 = fmul float %91, %91
%97 = fmul float %93, %93
%98 = fadd float %97, %96
%99 = fmul float %95, %95
%100 = fadd float %98, %99
%101 = call float @llvm.AMDGPU.rsq.clamped.f32(float %100)
%102 = fmul float %91, %101
%103 = fmul float %93, %101
%104 = fmul float %95, %101
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %102, float %103, float %104, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %76, float %78, float %80)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_buffer_load_dword s7, s[0:3], 0x6 ; C2038106
s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108
s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109
s_buffer_load_dword s10, s[0:3], 0xa ; C205010A
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111
s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112
s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113
s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114
s_buffer_load_dword s18, s[0:3], 0x15 ; C2090115
s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116
s_buffer_load_dword s20, s[0:3], 0x17 ; C20A0117
s_buffer_load_dword s21, s[0:3], 0x18 ; C20A8118
s_buffer_load_dword s22, s[0:3], 0x19 ; C20B0119
s_buffer_load_dword s23, s[0:3], 0x1a ; C20B811A
s_buffer_load_dword s24, s[0:3], 0x1b ; C20C011B
s_buffer_load_dword s25, s[0:3], 0x1c ; C20C811C
s_buffer_load_dword s26, s[0:3], 0x1d ; C20D011D
s_buffer_load_dword s27, s[0:3], 0x1e ; C20D811E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s11, v1 ; 1000020B
v_mul_f32_e32 v8, s12, v5 ; 10100A0C
v_mac_f32_e32 v8, s13, v6 ; 3E100C0D
v_mul_f32_e32 v9, s5, v5 ; 10120A05
v_mac_f32_e32 v9, s6, v6 ; 3E120C06
v_mul_f32_e32 v5, s8, v5 ; 100A0A08
v_mac_f32_e32 v5, s9, v6 ; 3E0A0C09
v_mac_f32_e32 v8, s4, v7 ; 3E100E04
v_mac_f32_e32 v9, s7, v7 ; 3E120E07
v_mac_f32_e32 v5, s10, v7 ; 3E0A0E0A
v_mac_f32_e32 v0, s17, v2 ; 3E000411
v_mul_f32_e32 v6, s14, v1 ; 100C020E
v_mac_f32_e32 v6, s18, v2 ; 3E0C0412
v_mul_f32_e32 v7, s15, v1 ; 100E020F
v_mac_f32_e32 v7, s19, v2 ; 3E0E0413
v_mul_f32_e32 v1, s16, v1 ; 10020210
v_mac_f32_e32 v1, s20, v2 ; 3E020414
v_mac_f32_e32 v0, s21, v3 ; 3E000615
v_mac_f32_e32 v6, s22, v3 ; 3E0C0616
v_mac_f32_e32 v7, s23, v3 ; 3E0E0617
v_mac_f32_e32 v1, s24, v3 ; 3E020618
v_mac_f32_e32 v0, s25, v4 ; 3E000819
v_mul_f32_e32 v2, v8, v8 ; 10041108
v_mac_f32_e32 v2, v9, v9 ; 3E041309
v_mac_f32_e32 v2, v5, v5 ; 3E040B05
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
v_mac_f32_e32 v6, s26, v4 ; 3E0C081A
v_mac_f32_e32 v7, s27, v4 ; 3E0E081B
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_mul_f32_e32 v3, v2, v8 ; 10061102
v_mul_f32_e32 v4, v2, v9 ; 10081302
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_mov_b32_e32 v5, 0 ; 7E0A0280
exp 15, 32, 0, 0, 0, v3, v4, v2, v5 ; F800020F 05020403
exp 15, 12, 0, 1, 0, v0, v6, v7, v1 ; F80008CF 01070600
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 12
Code Size: 292 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0]
DCL TEMP[0], LOCAL
IMM[0] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000}
0: MAD TEMP[0].xyz, IN[0].xyzz, IMM[0].xxxx, IMM[0].xxxx
1: MOV TEMP[0].w, CONST[0].xxxx
2: MOV OUT[0], TEMP[0]
3: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%28 = fmul float %25, 5.000000e-01
%29 = fadd float %28, 5.000000e-01
%30 = fmul float %26, 5.000000e-01
%31 = fadd float %30, 5.000000e-01
%32 = fmul float %27, 5.000000e-01
%33 = fadd float %32, 5.000000e-01
%34 = call i32 @llvm.SI.packf16(float %29, float %31)
%35 = bitcast i32 %34 to float
%36 = call i32 @llvm.SI.packf16(float %33, float %24)
%37 = bitcast i32 %36 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %35, float %37, float %35, float %37)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_mov_b32 m0, s9 ; BEFC0309
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200
v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201
v_mad_f32 v1, 0.5, v2, 0.5 ; D2820001 03C204F0
v_mad_f32 v2, 0.5, v3, 0.5 ; D2820002 03C206F0
v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cvt_pkrtz_f16_f32_e64 v0, v0, s0 ; D25E0000 00000100
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 92 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x0
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
0: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
call void @llvm.SI.export(i32 0, i32 1, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
ret void
}
declare void @llvm.SI.export(i32, i32, i32, i32, i32, i32, i32, i32, i32)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
Shader Disassembly:
v_mov_b32_e32 v0, 0 ; 7E000280
exp 0, 0, 0, 1, 1, v0, v0, v0, v0 ; F8001800 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 8
VGPRS: 4
Code Size: 16 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x0
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL OUT[1], POSITION
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: MOV OUT[0], IMM[0].xxxy
1: TEX OUT[1].z, IN[0], SAMP[0], 2D
2: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11)
%28 = bitcast float %26 to i32
%29 = bitcast float %27 to i32
%30 = insertelement <2 x i32> undef, i32 %28, i32 0
%31 = insertelement <2 x i32> %30, i32 %29, i32 1
%32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2)
%33 = extractelement <4 x float> %32, i32 2
call void @llvm.SI.export(i32 1, i32 0, i32 0, i32 8, i32 0, float %33, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_mov_b32_e32 v0, 0 ; 7E000280
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v1, 4, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800400 00030102
s_waitcnt vmcnt(0) ; BF8C0770
exp 1, 8, 0, 0, 0, v1, v0, v0, v0 ; F8000081 00000001
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v1, 1.0 ; 7E0202F2
exp 15, 0, 0, 1, 1, v0, v0, v0, v1 ; F800180F 01000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 80 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..9]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx
5: MOV TEMP[2].x, TEMP[1].xxxx
6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
7: MOV TEMP[2].y, TEMP[3].xxxx
8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
9: MOV TEMP[1].zw, TEMP[0].wwzw
10: MUL TEMP[2], CONST[6], IN[0].xxxx
11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2]
13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2]
14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz
15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz
16: MOV OUT[1], TEMP[1]
17: MOV OUT[2], TEMP[2]
18: MOV OUT[0], TEMP[0]
19: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = add i32 %5, %7
%49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48)
%50 = extractelement <4 x float> %49, i32 0
%51 = extractelement <4 x float> %49, i32 1
%52 = extractelement <4 x float> %49, i32 2
%53 = extractelement <4 x float> %49, i32 3
%54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = add i32 %5, %7
%57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = fmul float %15, %50
%62 = fmul float %16, %50
%63 = fmul float %17, %50
%64 = fmul float %18, %50
%65 = fmul float %19, %51
%66 = fadd float %65, %61
%67 = fmul float %20, %51
%68 = fadd float %67, %62
%69 = fmul float %21, %51
%70 = fadd float %69, %63
%71 = fmul float %22, %51
%72 = fadd float %71, %64
%73 = fmul float %23, %52
%74 = fadd float %73, %66
%75 = fmul float %24, %52
%76 = fadd float %75, %68
%77 = fmul float %25, %52
%78 = fadd float %77, %70
%79 = fmul float %26, %52
%80 = fadd float %79, %72
%81 = fmul float %27, %53
%82 = fadd float %81, %74
%83 = fmul float %28, %53
%84 = fadd float %83, %76
%85 = fmul float %29, %53
%86 = fadd float %85, %78
%87 = fmul float %30, %53
%88 = fadd float %87, %80
%89 = fmul float %82, 5.000000e-01
%90 = fmul float %84, 5.000000e-01
%91 = fmul float %88, 5.000000e-01
%92 = fmul float %90, %13
%93 = fadd float %89, %91
%94 = fadd float %92, %91
%95 = fmul float %31, %50
%96 = fmul float %32, %50
%97 = fmul float %33, %50
%98 = fmul float %34, %50
%99 = fmul float %35, %51
%100 = fadd float %99, %95
%101 = fmul float %36, %51
%102 = fadd float %101, %96
%103 = fmul float %37, %51
%104 = fadd float %103, %97
%105 = fmul float %38, %51
%106 = fadd float %105, %98
%107 = fmul float %39, %52
%108 = fadd float %107, %100
%109 = fmul float %40, %52
%110 = fadd float %109, %102
%111 = fmul float %41, %52
%112 = fadd float %111, %104
%113 = fmul float %42, %52
%114 = fadd float %113, %106
%115 = fmul float %43, %53
%116 = fadd float %115, %108
%117 = fmul float %44, %53
%118 = fadd float %117, %110
%119 = fmul float %45, %53
%120 = fadd float %119, %112
%121 = fsub float -0.000000e+00, %116
%122 = fsub float -0.000000e+00, %118
%123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121)
%124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122)
%125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xd ; C203810D
s_buffer_load_dword s8, s[0:3], 0xa ; C204010A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
s_buffer_load_dword s4, s[0:3], 0xe ; C202010E
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v8, s6, v1 ; 10100206
v_mac_f32_e32 v8, s7, v2 ; 3E100407
s_buffer_load_dword s5, s[0:3], 0xb ; C202810B
v_mul_f32_e32 v9, s8, v1 ; 10120208
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C
s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119
s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D
s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A
s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v9, s4, v2 ; 3E120404
s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B
v_mul_f32_e32 v10, s5, v1 ; 10140205
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_mul_f32_e32 v11, s6, v1 ; 10160206
v_mac_f32_e32 v11, s7, v2 ; 3E160407
v_mul_f32_e32 v12, s8, v1 ; 10180208
v_mac_f32_e32 v12, s10, v2 ; 3E18040A
v_mul_f32_e32 v13, s11, v1 ; 101A020B
v_mac_f32_e32 v13, s12, v2 ; 3E1A040C
s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s4, v1 ; 10020204
s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110
s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111
s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112
s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113
s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120
s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123
v_mac_f32_e32 v1, s5, v2 ; 3E020405
s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s4, v3 ; 3E000604
v_mac_f32_e32 v8, s6, v3 ; 3E100606
v_mac_f32_e32 v9, s7, v3 ; 3E120607
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mac_f32_e32 v11, s9, v3 ; 3E160609
v_mac_f32_e32 v12, s10, v3 ; 3E18060A
s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115
s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116
s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117
s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124
s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125
s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126
v_mac_f32_e32 v13, s11, v3 ; 3E1A060B
s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104
v_mac_f32_e32 v1, s12, v3 ; 3E02060C
v_mac_f32_e32 v0, s5, v4 ; 3E000805
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v8, s4, v4 ; 3E100804
v_mac_f32_e32 v9, s6, v4 ; 3E120806
v_mac_f32_e32 v10, s7, v4 ; 3E140807
v_mac_f32_e32 v11, s8, v4 ; 3E160808
v_mac_f32_e32 v12, s9, v4 ; 3E180809
v_mac_f32_e32 v13, s10, v4 ; 3E1A080A
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2
v_mul_f32_e32 v3, v11, v2 ; 1006050B
v_mul_f32_e32 v4, v12, v2 ; 1008050C
v_mul_f32_e32 v2, v13, v2 ; 1004050D
v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B
v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B
v_mac_f32_e32 v2, s11, v7 ; 3E040E0B
v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0
v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0
v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00
exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607
exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403
exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Code Size: 408 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[6..12]
DCL TEMP[0..9], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000}
IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000}
0: RCP TEMP[0].x, IN[0].wwww
1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx
2: RCP TEMP[1].x, IN[1].zzzz
3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx
4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
5: MOV TEMP[3].xy, TEMP[0].xyyy
6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D
7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy
8: RCP TEMP[3].x, TEMP[3].xxxx
9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx
10: MUL TEMP[2], CONST[9], TEMP[1].xxxx
11: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[11], TEMP[1].zzzz, TEMP[2]
13: ADD TEMP[2].xyz, TEMP[2], CONST[12]
14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[3].xyzz
15: ADD TEMP[4].xyz, TEMP[2].xyzz, -CONST[6].xyzz
16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
17: RSQ TEMP[5].x, TEMP[5].xxxx
18: MUL TEMP[5].xyz, TEMP[4].xyzz, TEMP[5].xxxx
19: MOV TEMP[5].xyz, -TEMP[5].xyzx
20: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz
21: MUL TEMP[4].x, TEMP[4].xxxx, CONST[6].wwww
22: MOV TEMP[4].xy, TEMP[4].xxxx
23: TEX TEMP[4].w, TEMP[4], SAMP[1], 2D
24: MOV TEMP[0].xy, TEMP[0].xyyy
25: TEX TEMP[0], TEMP[0], SAMP[2], 2D
26: MAD TEMP[6].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz
27: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz
28: RSQ TEMP[7].x, TEMP[7].xxxx
29: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx
30: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[6].xyzz
31: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx
32: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[4].wwww
33: MUL TEMP[7].xyz, CONST[7].xyzz, TEMP[7].xxxx
34: MUL TEMP[8].xyz, CONST[7].xyzz, CONST[4].xyzz
35: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz
36: DP3 TEMP[9].x, TEMP[2].xyzz, TEMP[2].xyzz
37: RSQ TEMP[9].x, TEMP[9].xxxx
38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[9].xxxx
39: ADD TEMP[2].xyz, TEMP[5].xyzz, -TEMP[2].xyzz
40: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
41: RSQ TEMP[5].x, TEMP[5].xxxx
42: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
43: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[6].xyzz
44: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx
45: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx
46: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx
47: MOV_SAT TEMP[2].x, TEMP[4].wwww
48: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
49: ADD TEMP[2].x, TEMP[8].xxxx, TEMP[8].zzzz
50: MUL TEMP[2].x, TEMP[8].yyyy, TEMP[2].xxxx
51: SQRT TEMP[2].x, TEMP[2].xxxx
52: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
53: ADD TEMP[4].x, TEMP[8].xxxx, TEMP[8].yyyy
54: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[8].zzzz
55: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].wwww, TEMP[4].xxxx
56: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
57: MOV TEMP[7].w, TEMP[0].xxxx
58: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz
59: SQRT TEMP[0].x, TEMP[0].xxxx
60: LRP TEMP[0].x, CONST[3].wwww, TEMP[0].xxxx, TEMP[1].zzzz
61: MAD TEMP[0].x, TEMP[0].xxxx, CONST[8].zzzz, CONST[8].wwww
62: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx
63: MOV_SAT TEMP[0].x, TEMP[0].xxxx
64: MUL TEMP[0], TEMP[7], TEMP[0].xxxx
65: MOV OUT[0], TEMP[0]
66: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%59 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%60 = load <32 x i8>, <32 x i8> addrspace(2)* %59, align 32, !tbaa !0
%61 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0
%63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)*
%65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0
%66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)*
%68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0
%69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)*
%71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0
%72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)*
%74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0
%75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%77 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%81 = fdiv float 1.000000e+00, %77
%82 = fmul float %75, %81
%83 = fmul float %76, %81
%84 = fdiv float 1.000000e+00, %80
%85 = fmul float %27, %84
%86 = fmul float %78, %85
%87 = fmul float %79, %85
%88 = fmul float %80, %85
%89 = bitcast float %82 to i32
%90 = bitcast float %83 to i32
%91 = insertelement <2 x i32> undef, i32 %89, i32 0
%92 = insertelement <2 x i32> %91, i32 %90, i32 1
%93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %92, <32 x i8> %60, <16 x i8> %62, i32 2)
%94 = extractelement <4 x float> %93, i32 0
%95 = fmul float %28, %94
%96 = fadd float %95, %29
%97 = fdiv float 1.000000e+00, %96
%98 = fmul float %86, %97
%99 = fmul float %87, %97
%100 = fmul float %88, %97
%101 = fmul float %47, %98
%102 = fmul float %48, %98
%103 = fmul float %49, %98
%104 = fmul float %50, %99
%105 = fadd float %104, %101
%106 = fmul float %51, %99
%107 = fadd float %106, %102
%108 = fmul float %52, %99
%109 = fadd float %108, %103
%110 = fmul float %53, %100
%111 = fadd float %110, %105
%112 = fmul float %54, %100
%113 = fadd float %112, %107
%114 = fmul float %55, %100
%115 = fadd float %114, %109
%116 = fadd float %111, %56
%117 = fadd float %113, %57
%118 = fadd float %115, %58
%119 = fsub float %116, %30
%120 = fsub float %117, %31
%121 = fsub float %118, %32
%122 = fsub float %116, %38
%123 = fsub float %117, %39
%124 = fsub float %118, %40
%125 = fmul float %122, %122
%126 = fmul float %123, %123
%127 = fadd float %126, %125
%128 = fmul float %124, %124
%129 = fadd float %127, %128
%130 = call float @llvm.AMDGPU.rsq.clamped.f32(float %129)
%131 = fmul float %122, %130
%132 = fmul float %123, %130
%133 = fmul float %124, %130
%134 = fmul float %122, %122
%135 = fmul float %123, %123
%136 = fadd float %135, %134
%137 = fmul float %124, %124
%138 = fadd float %136, %137
%139 = fmul float %138, %41
%140 = bitcast float %139 to i32
%141 = bitcast float %139 to i32
%142 = insertelement <2 x i32> undef, i32 %140, i32 0
%143 = insertelement <2 x i32> %142, i32 %141, i32 1
%144 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %143, <32 x i8> %65, <16 x i8> %68, i32 2)
%145 = extractelement <4 x float> %144, i32 3
%146 = bitcast float %82 to i32
%147 = bitcast float %83 to i32
%148 = insertelement <2 x i32> undef, i32 %146, i32 0
%149 = insertelement <2 x i32> %148, i32 %147, i32 1
%150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %71, <16 x i8> %74, i32 2)
%151 = extractelement <4 x float> %150, i32 0
%152 = extractelement <4 x float> %150, i32 1
%153 = extractelement <4 x float> %150, i32 2
%154 = extractelement <4 x float> %150, i32 3
%155 = fmul float %151, 2.000000e+00
%156 = fadd float %155, -1.000000e+00
%157 = fmul float %152, 2.000000e+00
%158 = fadd float %157, -1.000000e+00
%159 = fmul float %153, 2.000000e+00
%160 = fadd float %159, -1.000000e+00
%161 = fmul float %156, %156
%162 = fmul float %158, %158
%163 = fadd float %162, %161
%164 = fmul float %160, %160
%165 = fadd float %163, %164
%166 = call float @llvm.AMDGPU.rsq.clamped.f32(float %165)
%167 = fmul float %156, %166
%168 = fmul float %158, %166
%169 = fmul float %160, %166
%170 = fmul float %131, %167
%171 = fsub float -0.000000e+00, %170
%172 = fmul float %132, %168
%173 = fsub float %171, %172
%174 = fmul float %133, %169
%175 = fsub float %173, %174
%176 = call float @llvm.maxnum.f32(float %175, float 0.000000e+00)
%177 = fmul float %176, %145
%178 = fmul float %42, %177
%179 = fmul float %43, %177
%180 = fmul float %44, %177
%181 = fmul float %42, %34
%182 = fmul float %43, %35
%183 = fmul float %44, %36
%184 = fsub float %116, %24
%185 = fsub float %117, %25
%186 = fsub float %118, %26
%187 = fmul float %184, %184
%188 = fmul float %185, %185
%189 = fadd float %188, %187
%190 = fmul float %186, %186
%191 = fadd float %189, %190
%192 = call float @llvm.AMDGPU.rsq.clamped.f32(float %191)
%193 = fmul float %184, %192
%194 = fmul float %185, %192
%195 = fmul float %186, %192
%196 = fsub float -0.000000e+00, %193
%197 = fsub float %196, %131
%198 = fsub float -0.000000e+00, %194
%199 = fsub float %198, %132
%200 = fsub float -0.000000e+00, %195
%201 = fsub float %200, %133
%202 = fmul float %197, %197
%203 = fmul float %199, %199
%204 = fadd float %203, %202
%205 = fmul float %201, %201
%206 = fadd float %204, %205
%207 = call float @llvm.AMDGPU.rsq.clamped.f32(float %206)
%208 = fmul float %197, %207
%209 = fmul float %199, %207
%210 = fmul float %201, %207
%211 = fmul float %208, %167
%212 = fmul float %209, %168
%213 = fadd float %212, %211
%214 = fmul float %210, %169
%215 = fadd float %213, %214
%216 = call float @llvm.maxnum.f32(float %215, float 0.000000e+00)
%217 = fmul float %154, 1.280000e+02
%218 = call float @llvm.pow.f32(float %216, float %217)
%219 = call float @llvm.AMDIL.clamp.(float %145, float 0.000000e+00, float 1.000000e+00)
%220 = fmul float %218, %219
%221 = fadd float %181, %183
%222 = fmul float %182, %221
%223 = call float @llvm.sqrt.f32(float %222)
%224 = fmul float %223, 2.000000e+00
%225 = fadd float %181, %182
%226 = fadd float %225, %183
%227 = fmul float %224, %37
%228 = fadd float %227, %226
%229 = fmul float %220, %228
%230 = fmul float %119, %119
%231 = fmul float %120, %120
%232 = fadd float %231, %230
%233 = fmul float %121, %121
%234 = fadd float %232, %233
%235 = call float @llvm.sqrt.f32(float %234)
%236 = call float @llvm.AMDGPU.lrp(float %33, float %235, float %100)
%237 = fmul float %236, %45
%238 = fadd float %237, %46
%239 = fsub float 1.000000e+00, %238
%240 = call float @llvm.AMDIL.clamp.(float %239, float 0.000000e+00, float 1.000000e+00)
%241 = fmul float %178, %240
%242 = fmul float %179, %240
%243 = fmul float %180, %240
%244 = fmul float %229, %240
%245 = call i32 @llvm.SI.packf16(float %241, float %242)
%246 = bitcast i32 %245 to float
%247 = call i32 @llvm.SI.packf16(float %243, float %244)
%248 = bitcast i32 %247 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %246, float %248, float %246, float %248)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_rcp_f32_e32 v4, v4 ; 7E085504
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500
s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v2, v4, v3 ; 10040704
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s36, s[0:3], 0x9 ; C2120109
s_buffer_load_dword s37, s[0:3], 0x6 ; C2128106
s_buffer_load_dword s38, s[0:3], 0x8 ; C2130108
s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504
s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508
s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708
s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710
image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[28:35], s[24:27] ; F0800100 00C70301
s_buffer_load_dword s4, s[0:3], 0xc ; C202010C
s_buffer_load_dword s5, s[0:3], 0xd ; C202810D
s_buffer_load_dword s6, s[0:3], 0xe ; C203010E
s_buffer_load_dword s7, s[0:3], 0xf ; C203810F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v4, s36 ; 7E080224
v_rcp_f32_e32 v7, v0 ; 7E0E5500
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v4, s38, v3 ; 3E080626
s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124
s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125
v_mul_f32_e32 v3, s37, v7 ; 10060E25
s_buffer_load_dword s26, s[0:3], 0x26 ; C20D0126
v_rcp_f32_e32 v4, v4 ; 7E085504
v_mul_f32_e32 v5, v3, v5 ; 100A0B03
s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128
s_buffer_load_dword s28, s[0:3], 0x29 ; C20E0129
v_mul_f32_e32 v5, v4, v5 ; 100A0B04
s_buffer_load_dword s29, s[0:3], 0x2a ; C20E812A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v7, s24, v5 ; 100E0A18
v_mul_f32_e32 v8, s25, v5 ; 10100A19
s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C
v_mul_f32_e32 v5, s26, v5 ; 100A0A1A
v_mul_f32_e32 v6, v3, v6 ; 100C0D03
v_mul_f32_e32 v6, v4, v6 ; 100C0D04
v_mac_f32_e32 v7, s27, v6 ; 3E0E0C1B
v_mac_f32_e32 v8, s28, v6 ; 3E100C1C
s_buffer_load_dword s25, s[0:3], 0x2d ; C20C812D
v_mac_f32_e32 v5, s29, v6 ; 3E0A0C1D
v_mul_f32_e32 v0, v3, v0 ; 10000103
s_buffer_load_dword s26, s[0:3], 0x2e ; C20D012E
s_buffer_load_dword s27, s[0:3], 0x30 ; C20D8130
s_buffer_load_dword s28, s[0:3], 0x31 ; C20E0131
v_mul_f32_e32 v0, v4, v0 ; 10000104
s_buffer_load_dword s29, s[0:3], 0x32 ; C20E8132
s_buffer_load_dword s30, s[0:3], 0x18 ; C20F0118
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v7, s24, v0 ; 3E0E0018
v_mac_f32_e32 v8, s25, v0 ; 3E100019
s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119
s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A
v_mac_f32_e32 v5, s26, v0 ; 3E0A001A
v_add_f32_e32 v3, s27, v7 ; 06060E1B
v_add_f32_e32 v4, s28, v8 ; 0608101C
s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B
v_add_f32_e32 v5, s29, v5 ; 060A0A1D
v_subrev_f32_e32 v6, s30, v3 ; 0A0C061E
s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C
s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v7, s24, v4 ; 0A0E0818
v_subrev_f32_e32 v8, s25, v5 ; 0A100A19
v_mul_f32_e32 v9, v6, v6 ; 10120D06
v_mac_f32_e32 v9, v7, v7 ; 3E120F07
v_mac_f32_e32 v9, v8, v8 ; 3E121108
v_mul_f32_e32 v10, s26, v9 ; 1014121A
v_mov_b32_e32 v11, v10 ; 7E16030A
s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112
s_buffer_load_dword s25, s[0:3], 0x1e ; C20C811E
s_buffer_load_dword s26, s[0:3], 0x10 ; C20D0110
s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111
image_sample v10, 8, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[40:43] ; F0800800 01420A0A
image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[44:47] ; F0800F00 01640B01
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
s_buffer_load_dword s11, s[0:3], 0x13 ; C2058113
s_buffer_load_dword s12, s[0:3], 0x22 ; C2060122
s_buffer_load_dword s0, s[0:3], 0x23 ; C2000123
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v1, s24 ; 7E020218
v_mul_f32_e32 v1, s25, v1 ; 10020219
v_mov_b32_e32 v2, s26 ; 7E04021A
v_mac_f32_e32 v1, s27, v2 ; 3E02041B
v_mov_b32_e32 v2, s26 ; 7E04021A
v_mov_b32_e32 v15, s29 ; 7E1E021D
v_mul_f32_e32 v15, s28, v15 ; 101E1E1C
v_mul_f32_e32 v1, v1, v15 ; 10021F01
v_mac_f32_e32 v15, s27, v2 ; 3E1E041B
v_mov_b32_e32 v2, s24 ; 7E040218
v_mac_f32_e32 v15, s25, v2 ; 3E1E0419
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v2, s8, v3 ; 0A040608
v_subrev_f32_e32 v16, s9, v4 ; 0A200809
v_subrev_f32_e32 v17, s10, v5 ; 0A220A0A
v_mul_f32_e32 v18, v2, v2 ; 10240502
v_mac_f32_e32 v18, v16, v16 ; 3E242110
v_mac_f32_e32 v18, v17, v17 ; 3E242311
v_rsq_clamp_f32_e32 v18, v18 ; 7E245912
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_add_f32_e32 v1, v1, v1 ; 06020301
v_mac_f32_e32 v15, s11, v1 ; 3E1E020B
v_mul_f32_e32 v1, v18, v2 ; 10020512
v_mul_f32_e32 v2, v18, v16 ; 10042112
v_mul_f32_e32 v16, v18, v17 ; 10202312
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mad_f32 v11, 2.0, v11, -1.0 ; D282000B 03CE16F4
v_mad_f32 v12, 2.0, v12, -1.0 ; D282000C 03CE18F4
v_mul_f32_e32 v17, v11, v11 ; 1022170B
v_mac_f32_e32 v17, v12, v12 ; 3E22190C
v_mad_f32 v13, 2.0, v13, -1.0 ; D282000D 03CE1AF4
v_mac_f32_e32 v17, v13, v13 ; 3E221B0D
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
v_mad_f32 v1, -v6, v9, -v1 ; D2820001 A4061306
v_mul_f32_e32 v6, v9, v6 ; 100C0D09
v_mad_f32 v2, -v7, v9, -v2 ; D2820002 A40A1307
v_mul_f32_e32 v7, v9, v7 ; 100E0F09
v_mul_f32_e32 v11, v17, v11 ; 10161711
v_mad_f32 v16, -v8, v9, -v16 ; D2820010 A4421308
v_mul_f32_e32 v18, v1, v1 ; 10240301
v_mac_f32_e32 v18, v2, v2 ; 3E240502
v_mac_f32_e32 v18, v16, v16 ; 3E242110
v_rsq_clamp_f32_e32 v18, v18 ; 7E245912
v_mul_f32_e32 v12, v17, v12 ; 10181911
v_mul_f32_e32 v6, v11, v6 ; 100C0D0B
v_mad_f32 v6, -v7, v12, -v6 ; D2820006 A41A1907
v_mul_f32_e32 v1, v18, v1 ; 10020312
v_mul_f32_e32 v1, v11, v1 ; 1002030B
v_mul_f32_e32 v2, v18, v2 ; 10040512
v_mac_f32_e32 v1, v12, v2 ; 3E02050C
v_mul_f32_e32 v2, v9, v8 ; 10041109
v_mul_f32_e32 v7, v17, v13 ; 100E1B11
v_mad_f32 v2, -v2, v7, v6 ; D2820002 241A0F02
v_mul_f32_e32 v6, v18, v16 ; 100C2112
v_mac_f32_e32 v1, v7, v6 ; 3E020D07
v_subrev_f32_e32 v3, s4, v3 ; 0A060604
v_subrev_f32_e32 v4, s5, v4 ; 0A080805
v_subrev_f32_e32 v5, s6, v5 ; 0A0A0A06
v_max_f32_e32 v2, 0, v2 ; 20040480
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_mul_f32_e32 v6, s27, v2 ; 100C041B
v_mul_f32_e32 v7, s28, v2 ; 100E041C
v_mul_f32_e32 v2, s25, v2 ; 10040419
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mac_f32_e32 v3, v4, v4 ; 3E060904
v_mac_f32_e32 v3, v5, v5 ; 3E060B05
v_sub_f32_e64 v4, 1.0, s7 ; D2080004 00000EF2
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_max_f32_e32 v1, 0, v1 ; 20020280
v_log_f32_e32 v1, v1 ; 7E024F01
v_sqrt_f32_e32 v3, v3 ; 7E066703
v_mac_f32_e32 v0, s7, v3 ; 3E000607
v_mov_b32_e32 v3, s0 ; 7E060200
v_mac_f32_e32 v3, s12, v0 ; 3E06000C
v_mul_f32_e32 v0, 0x43000000, v14 ; 10001CFF 43000000
v_mul_legacy_f32_e32 v0, v0, v1 ; 0E000300
v_exp_f32_e32 v0, v0 ; 7E004B00
v_add_f32_e64 v1, 0, v10 clamp ; D2060801 00021480
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mul_f32_e32 v0, v15, v0 ; 1000010F
v_sub_f32_e32 v1, 1.0, v3 ; 080206F2
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v3, v1, v6 ; 10060D01
v_mul_f32_e32 v4, v1, v7 ; 10080F01
v_mul_f32_e32 v2, v1, v2 ; 10040501
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_cvt_pkrtz_f16_f32_e32 v1, v3, v4 ; 5E020903
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 20
Code Size: 824 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV OUT[0], TEMP[0]
5: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = fmul float %13, %33
%38 = fmul float %14, %33
%39 = fmul float %15, %33
%40 = fmul float %16, %33
%41 = fmul float %17, %34
%42 = fadd float %41, %37
%43 = fmul float %18, %34
%44 = fadd float %43, %38
%45 = fmul float %19, %34
%46 = fadd float %45, %39
%47 = fmul float %20, %34
%48 = fadd float %47, %40
%49 = fmul float %21, %35
%50 = fadd float %49, %42
%51 = fmul float %22, %35
%52 = fadd float %51, %44
%53 = fmul float %23, %35
%54 = fadd float %53, %46
%55 = fmul float %24, %35
%56 = fadd float %55, %48
%57 = fmul float %25, %36
%58 = fadd float %57, %50
%59 = fmul float %26, %36
%60 = fadd float %59, %52
%61 = fmul float %27, %36
%62 = fadd float %61, %54
%63 = fmul float %28, %36
%64 = fadd float %63, %56
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s4, v0 ; 10080004
v_mac_f32_e32 v4, s8, v1 ; 3E080208
v_mul_f32_e32 v5, s5, v0 ; 100A0005
v_mac_f32_e32 v5, s9, v1 ; 3E0A0209
v_mul_f32_e32 v6, s6, v0 ; 100C0006
v_mac_f32_e32 v6, s10, v1 ; 3E0C020A
v_mul_f32_e32 v0, s7, v0 ; 10000007
v_mac_f32_e32 v0, s11, v1 ; 3E00020B
v_mac_f32_e32 v4, s12, v2 ; 3E08040C
v_mac_f32_e32 v5, s13, v2 ; 3E0A040D
v_mac_f32_e32 v6, s14, v2 ; 3E0C040E
v_mac_f32_e32 v0, s15, v2 ; 3E00040F
v_mac_f32_e32 v4, s16, v3 ; 3E080610
v_mac_f32_e32 v5, s17, v3 ; 3E0A0611
v_mac_f32_e32 v6, s18, v3 ; 3E0C0612
v_mac_f32_e32 v0, s0, v3 ; 3E000600
exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL OUT[0], COLOR
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV OUT[0], IMM[0].xyxx
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 0.000000e+00)
%23 = bitcast i32 %22 to float
%24 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00)
%25 = bitcast i32 %24 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
v_cvt_pkrtz_f16_f32_e64 v0, 1.0, 0 ; D25E0000 000100F2
v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 1.0 ; D25E0001 0001E4F2
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 8
VGPRS: 4
Code Size: 28 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..9]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx
5: MOV TEMP[2].x, TEMP[1].xxxx
6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
7: MOV TEMP[2].y, TEMP[3].xxxx
8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
9: MOV TEMP[1].zw, TEMP[0].wwzw
10: MUL TEMP[2], CONST[6], IN[0].xxxx
11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2]
13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2]
14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz
15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz
16: MOV OUT[1], TEMP[1]
17: MOV OUT[2], TEMP[2]
18: MOV OUT[0], TEMP[0]
19: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = add i32 %5, %7
%49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48)
%50 = extractelement <4 x float> %49, i32 0
%51 = extractelement <4 x float> %49, i32 1
%52 = extractelement <4 x float> %49, i32 2
%53 = extractelement <4 x float> %49, i32 3
%54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = add i32 %5, %7
%57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = fmul float %15, %50
%62 = fmul float %16, %50
%63 = fmul float %17, %50
%64 = fmul float %18, %50
%65 = fmul float %19, %51
%66 = fadd float %65, %61
%67 = fmul float %20, %51
%68 = fadd float %67, %62
%69 = fmul float %21, %51
%70 = fadd float %69, %63
%71 = fmul float %22, %51
%72 = fadd float %71, %64
%73 = fmul float %23, %52
%74 = fadd float %73, %66
%75 = fmul float %24, %52
%76 = fadd float %75, %68
%77 = fmul float %25, %52
%78 = fadd float %77, %70
%79 = fmul float %26, %52
%80 = fadd float %79, %72
%81 = fmul float %27, %53
%82 = fadd float %81, %74
%83 = fmul float %28, %53
%84 = fadd float %83, %76
%85 = fmul float %29, %53
%86 = fadd float %85, %78
%87 = fmul float %30, %53
%88 = fadd float %87, %80
%89 = fmul float %82, 5.000000e-01
%90 = fmul float %84, 5.000000e-01
%91 = fmul float %88, 5.000000e-01
%92 = fmul float %90, %13
%93 = fadd float %89, %91
%94 = fadd float %92, %91
%95 = fmul float %31, %50
%96 = fmul float %32, %50
%97 = fmul float %33, %50
%98 = fmul float %34, %50
%99 = fmul float %35, %51
%100 = fadd float %99, %95
%101 = fmul float %36, %51
%102 = fadd float %101, %96
%103 = fmul float %37, %51
%104 = fadd float %103, %97
%105 = fmul float %38, %51
%106 = fadd float %105, %98
%107 = fmul float %39, %52
%108 = fadd float %107, %100
%109 = fmul float %40, %52
%110 = fadd float %109, %102
%111 = fmul float %41, %52
%112 = fadd float %111, %104
%113 = fmul float %42, %52
%114 = fadd float %113, %106
%115 = fmul float %43, %53
%116 = fadd float %115, %108
%117 = fmul float %44, %53
%118 = fadd float %117, %110
%119 = fmul float %45, %53
%120 = fadd float %119, %112
%121 = fsub float -0.000000e+00, %116
%122 = fsub float -0.000000e+00, %118
%123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121)
%124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122)
%125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xd ; C203810D
s_buffer_load_dword s8, s[0:3], 0xa ; C204010A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
s_buffer_load_dword s4, s[0:3], 0xe ; C202010E
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v8, s6, v1 ; 10100206
v_mac_f32_e32 v8, s7, v2 ; 3E100407
s_buffer_load_dword s5, s[0:3], 0xb ; C202810B
v_mul_f32_e32 v9, s8, v1 ; 10120208
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C
s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119
s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D
s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A
s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v9, s4, v2 ; 3E120404
s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B
v_mul_f32_e32 v10, s5, v1 ; 10140205
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_mul_f32_e32 v11, s6, v1 ; 10160206
v_mac_f32_e32 v11, s7, v2 ; 3E160407
v_mul_f32_e32 v12, s8, v1 ; 10180208
v_mac_f32_e32 v12, s10, v2 ; 3E18040A
v_mul_f32_e32 v13, s11, v1 ; 101A020B
v_mac_f32_e32 v13, s12, v2 ; 3E1A040C
s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s4, v1 ; 10020204
s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110
s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111
s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112
s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113
s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120
s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123
v_mac_f32_e32 v1, s5, v2 ; 3E020405
s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s4, v3 ; 3E000604
v_mac_f32_e32 v8, s6, v3 ; 3E100606
v_mac_f32_e32 v9, s7, v3 ; 3E120607
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mac_f32_e32 v11, s9, v3 ; 3E160609
v_mac_f32_e32 v12, s10, v3 ; 3E18060A
s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115
s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116
s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117
s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124
s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125
s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126
v_mac_f32_e32 v13, s11, v3 ; 3E1A060B
s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104
v_mac_f32_e32 v1, s12, v3 ; 3E02060C
v_mac_f32_e32 v0, s5, v4 ; 3E000805
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v8, s4, v4 ; 3E100804
v_mac_f32_e32 v9, s6, v4 ; 3E120806
v_mac_f32_e32 v10, s7, v4 ; 3E140807
v_mac_f32_e32 v11, s8, v4 ; 3E160808
v_mac_f32_e32 v12, s9, v4 ; 3E180809
v_mac_f32_e32 v13, s10, v4 ; 3E1A080A
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2
v_mul_f32_e32 v3, v11, v2 ; 1006050B
v_mul_f32_e32 v4, v12, v2 ; 1008050C
v_mul_f32_e32 v2, v13, v2 ; 1004050D
v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B
v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B
v_mac_f32_e32 v2, s11, v7 ; 3E040E0B
v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0
v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0
v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00
exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607
exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403
exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Code Size: 408 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[6..12]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000}
IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000}
0: RCP TEMP[0].x, IN[0].wwww
1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx
2: RCP TEMP[1].x, IN[1].zzzz
3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx
4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
5: MOV TEMP[3].xy, TEMP[0].xyyy
6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D
7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy
8: RCP TEMP[3].x, TEMP[3].xxxx
9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx
10: MUL TEMP[2], CONST[9], TEMP[1].xxxx
11: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[11], TEMP[1].zzzz, TEMP[2]
13: ADD TEMP[2].xyz, TEMP[2], CONST[12]
14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[3].xyzz
15: MOV TEMP[4].xyz, -CONST[6].xyzx
16: MOV TEMP[0].xy, TEMP[0].xyyy
17: TEX TEMP[0], TEMP[0], SAMP[1], 2D
18: MAD TEMP[5].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz
19: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz
20: RSQ TEMP[6].x, TEMP[6].xxxx
21: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx
22: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[5].xyzz
23: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
24: MUL TEMP[6].xyz, CONST[7].xyzz, TEMP[6].xxxx
25: MUL TEMP[7].xyz, CONST[7].xyzz, CONST[4].xyzz
26: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz
27: DP3 TEMP[8].x, TEMP[2].xyzz, TEMP[2].xyzz
28: RSQ TEMP[8].x, TEMP[8].xxxx
29: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
30: ADD TEMP[2].xyz, TEMP[4].xyzz, -TEMP[2].xyzz
31: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz
32: RSQ TEMP[4].x, TEMP[4].xxxx
33: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
34: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[5].xyzz
35: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx
36: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx
37: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx
38: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[7].zzzz
39: MUL TEMP[2].x, TEMP[7].yyyy, TEMP[2].xxxx
40: SQRT TEMP[2].x, TEMP[2].xxxx
41: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
42: ADD TEMP[4].x, TEMP[7].xxxx, TEMP[7].yyyy
43: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[7].zzzz
44: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].wwww, TEMP[4].xxxx
45: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
46: MOV TEMP[6].w, TEMP[0].xxxx
47: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz
48: SQRT TEMP[0].x, TEMP[0].xxxx
49: LRP TEMP[0].x, CONST[3].wwww, TEMP[0].xxxx, TEMP[1].zzzz
50: MAD TEMP[0].x, TEMP[0].xxxx, CONST[8].zzzz, CONST[8].wwww
51: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx
52: MOV_SAT TEMP[0].x, TEMP[0].xxxx
53: MUL TEMP[0], TEMP[6], TEMP[0].xxxx
54: MOV OUT[0], TEMP[0]
55: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%58 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0
%60 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0
%62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)*
%64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0
%65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)*
%67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0
%68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%71 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%74 = fdiv float 1.000000e+00, %70
%75 = fmul float %68, %74
%76 = fmul float %69, %74
%77 = fdiv float 1.000000e+00, %73
%78 = fmul float %27, %77
%79 = fmul float %71, %78
%80 = fmul float %72, %78
%81 = fmul float %73, %78
%82 = bitcast float %75 to i32
%83 = bitcast float %76 to i32
%84 = insertelement <2 x i32> undef, i32 %82, i32 0
%85 = insertelement <2 x i32> %84, i32 %83, i32 1
%86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %59, <16 x i8> %61, i32 2)
%87 = extractelement <4 x float> %86, i32 0
%88 = fmul float %28, %87
%89 = fadd float %88, %29
%90 = fdiv float 1.000000e+00, %89
%91 = fmul float %79, %90
%92 = fmul float %80, %90
%93 = fmul float %81, %90
%94 = fmul float %46, %91
%95 = fmul float %47, %91
%96 = fmul float %48, %91
%97 = fmul float %49, %92
%98 = fadd float %97, %94
%99 = fmul float %50, %92
%100 = fadd float %99, %95
%101 = fmul float %51, %92
%102 = fadd float %101, %96
%103 = fmul float %52, %93
%104 = fadd float %103, %98
%105 = fmul float %53, %93
%106 = fadd float %105, %100
%107 = fmul float %54, %93
%108 = fadd float %107, %102
%109 = fadd float %104, %55
%110 = fadd float %106, %56
%111 = fadd float %108, %57
%112 = fsub float %109, %30
%113 = fsub float %110, %31
%114 = fsub float %111, %32
%115 = bitcast float %75 to i32
%116 = bitcast float %76 to i32
%117 = insertelement <2 x i32> undef, i32 %115, i32 0
%118 = insertelement <2 x i32> %117, i32 %116, i32 1
%119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %64, <16 x i8> %67, i32 2)
%120 = extractelement <4 x float> %119, i32 0
%121 = extractelement <4 x float> %119, i32 1
%122 = extractelement <4 x float> %119, i32 2
%123 = extractelement <4 x float> %119, i32 3
%124 = fmul float %120, 2.000000e+00
%125 = fadd float %124, -1.000000e+00
%126 = fmul float %121, 2.000000e+00
%127 = fadd float %126, -1.000000e+00
%128 = fmul float %122, 2.000000e+00
%129 = fadd float %128, -1.000000e+00
%130 = fmul float %125, %125
%131 = fmul float %127, %127
%132 = fadd float %131, %130
%133 = fmul float %129, %129
%134 = fadd float %132, %133
%135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134)
%136 = fmul float %125, %135
%137 = fmul float %127, %135
%138 = fmul float %129, %135
%139 = fmul float %38, %136
%140 = fsub float -0.000000e+00, %139
%141 = fmul float %39, %137
%142 = fsub float %140, %141
%143 = fmul float %40, %138
%144 = fsub float %142, %143
%145 = call float @llvm.maxnum.f32(float %144, float 0.000000e+00)
%146 = fmul float %41, %145
%147 = fmul float %42, %145
%148 = fmul float %43, %145
%149 = fmul float %41, %34
%150 = fmul float %42, %35
%151 = fmul float %43, %36
%152 = fsub float %109, %24
%153 = fsub float %110, %25
%154 = fsub float %111, %26
%155 = fmul float %152, %152
%156 = fmul float %153, %153
%157 = fadd float %156, %155
%158 = fmul float %154, %154
%159 = fadd float %157, %158
%160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159)
%161 = fmul float %152, %160
%162 = fmul float %153, %160
%163 = fmul float %154, %160
%164 = fsub float -0.000000e+00, %161
%165 = fsub float %164, %38
%166 = fsub float -0.000000e+00, %162
%167 = fsub float %166, %39
%168 = fsub float -0.000000e+00, %163
%169 = fsub float %168, %40
%170 = fmul float %165, %165
%171 = fmul float %167, %167
%172 = fadd float %171, %170
%173 = fmul float %169, %169
%174 = fadd float %172, %173
%175 = call float @llvm.AMDGPU.rsq.clamped.f32(float %174)
%176 = fmul float %165, %175
%177 = fmul float %167, %175
%178 = fmul float %169, %175
%179 = fmul float %176, %136
%180 = fmul float %177, %137
%181 = fadd float %180, %179
%182 = fmul float %178, %138
%183 = fadd float %181, %182
%184 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00)
%185 = fmul float %123, 1.280000e+02
%186 = call float @llvm.pow.f32(float %184, float %185)
%187 = fadd float %149, %151
%188 = fmul float %150, %187
%189 = call float @llvm.sqrt.f32(float %188)
%190 = fmul float %189, 2.000000e+00
%191 = fadd float %149, %150
%192 = fadd float %191, %151
%193 = fmul float %190, %37
%194 = fadd float %193, %192
%195 = fmul float %186, %194
%196 = fmul float %112, %112
%197 = fmul float %113, %113
%198 = fadd float %197, %196
%199 = fmul float %114, %114
%200 = fadd float %198, %199
%201 = call float @llvm.sqrt.f32(float %200)
%202 = call float @llvm.AMDGPU.lrp(float %33, float %201, float %93)
%203 = fmul float %202, %44
%204 = fadd float %203, %45
%205 = fsub float 1.000000e+00, %204
%206 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00)
%207 = fmul float %146, %206
%208 = fmul float %147, %206
%209 = fmul float %148, %206
%210 = fmul float %195, %206
%211 = call i32 @llvm.SI.packf16(float %207, float %208)
%212 = bitcast i32 %211 to float
%213 = call i32 @llvm.SI.packf16(float %209, float %210)
%214 = bitcast i32 %213 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %212, float %214, float %212, float %214)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_rcp_f32_e32 v4, v4 ; 7E085504
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v2, v4, v3 ; 10040704
image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[8:11] ; F0800100 00440301
image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[24:31], s[12:15] ; F0800F00 00660701
s_buffer_load_dword s6, s[0:3], 0xc ; C203010C
s_buffer_load_dword s7, s[0:3], 0xd ; C203810D
s_buffer_load_dword s8, s[0:3], 0xe ; C204010E
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v1, s4 ; 7E020204
s_waitcnt vmcnt(1) ; BF8C0771
v_mac_f32_e32 v1, s5, v3 ; 3E020605
v_rcp_f32_e32 v1, v1 ; 7E025501
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106
s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110
s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111
s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112
s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113
s_buffer_load_dword s16, s[0:3], 0x18 ; C2080118
s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119
s_buffer_load_dword s18, s[0:3], 0x1a ; C209011A
s_buffer_load_dword s19, s[0:3], 0x1c ; C209811C
s_buffer_load_dword s20, s[0:3], 0x1d ; C20A011D
s_buffer_load_dword s21, s[0:3], 0x1e ; C20A811E
s_buffer_load_dword s22, s[0:3], 0x22 ; C20B0122
s_buffer_load_dword s23, s[0:3], 0x23 ; C20B8123
s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124
s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125
s_buffer_load_dword s26, s[0:3], 0x26 ; C20D0126
s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128
s_buffer_load_dword s28, s[0:3], 0x29 ; C20E0129
s_buffer_load_dword s29, s[0:3], 0x2a ; C20E812A
s_buffer_load_dword s30, s[0:3], 0x2c ; C20F012C
s_buffer_load_dword s31, s[0:3], 0x2d ; C20F812D
v_rcp_f32_e32 v2, v0 ; 7E045500
s_buffer_load_dword s32, s[0:3], 0x2e ; C210012E
s_buffer_load_dword s33, s[0:3], 0x30 ; C2108130
s_buffer_load_dword s34, s[0:3], 0x31 ; C2110131
s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v2, s11, v2 ; 1004040B
v_mul_f32_e32 v3, v2, v5 ; 10060B02
v_mul_f32_e32 v3, v1, v3 ; 10060701
v_mul_f32_e32 v4, s24, v3 ; 10080618
v_mul_f32_e32 v5, s25, v3 ; 100A0619
v_mul_f32_e32 v3, s26, v3 ; 1006061A
v_mul_f32_e32 v6, v2, v6 ; 100C0D02
v_mul_f32_e32 v6, v1, v6 ; 100C0D01
v_mac_f32_e32 v4, s27, v6 ; 3E080C1B
v_mac_f32_e32 v5, s28, v6 ; 3E0A0C1C
v_mac_f32_e32 v3, s29, v6 ; 3E060C1D
v_mov_b32_e32 v6, s14 ; 7E0C020E
v_mul_f32_e32 v6, s21, v6 ; 100C0C15
v_mov_b32_e32 v11, s12 ; 7E16020C
v_mac_f32_e32 v6, s19, v11 ; 3E0C1613
v_mov_b32_e32 v11, s12 ; 7E16020C
v_mov_b32_e32 v12, s13 ; 7E18020D
v_mul_f32_e32 v12, s20, v12 ; 10181814
v_mul_f32_e32 v6, v6, v12 ; 100C1906
v_mac_f32_e32 v12, s19, v11 ; 3E181613
v_mov_b32_e32 v11, s14 ; 7E16020E
v_mac_f32_e32 v12, s21, v11 ; 3E181615
v_sqrt_f32_e32 v6, v6 ; 7E0C6706
v_add_f32_e32 v6, v6, v6 ; 060C0D06
v_mac_f32_e32 v12, s15, v6 ; 3E180C0F
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mac_f32_e32 v4, s30, v0 ; 3E08001E
v_mac_f32_e32 v5, s31, v0 ; 3E0A001F
v_mac_f32_e32 v3, s32, v0 ; 3E060020
v_add_f32_e32 v1, s33, v4 ; 06020821
v_add_f32_e32 v2, s34, v5 ; 06040A22
v_add_f32_e32 v3, s0, v3 ; 06060600
v_subrev_f32_e32 v4, s4, v1 ; 0A080204
v_subrev_f32_e32 v5, s5, v2 ; 0A0A0405
v_subrev_f32_e32 v6, s10, v3 ; 0A0C060A
v_mad_f32 v7, 2.0, v7, -1.0 ; D2820007 03CE0EF4
v_mad_f32 v8, 2.0, v8, -1.0 ; D2820008 03CE10F4
v_mul_f32_e32 v11, v7, v7 ; 10160F07
v_mac_f32_e32 v11, v8, v8 ; 3E161108
v_mul_f32_e32 v13, v4, v4 ; 101A0904
v_mac_f32_e32 v13, v5, v5 ; 3E1A0B05
v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4
v_mac_f32_e32 v11, v9, v9 ; 3E161309
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_mad_f32 v4, -v4, v13, -s16 ; D2820004 A0421B04
v_mad_f32 v5, -v5, v13, -s17 ; D2820005 A0461B05
v_mad_f32 v6, -v6, v13, -s18 ; D2820006 A04A1B06
v_mul_f32_e32 v7, v11, v7 ; 100E0F0B
v_mul_f32_e32 v13, v4, v4 ; 101A0904
v_mac_f32_e32 v13, v5, v5 ; 3E1A0B05
v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mul_f32_e32 v14, s16, v7 ; 101C0E10
v_mul_f32_e32 v8, v11, v8 ; 1010110B
v_mad_f32 v14, -s17, v8, -v14 ; D282000E A43A1011
v_mul_f32_e32 v4, v13, v4 ; 1008090D
v_mul_f32_e32 v4, v7, v4 ; 10080907
v_mul_f32_e32 v5, v13, v5 ; 100A0B0D
v_mac_f32_e32 v4, v8, v5 ; 3E080B08
v_subrev_f32_e32 v1, s6, v1 ; 0A020206
v_subrev_f32_e32 v2, s7, v2 ; 0A040407
v_subrev_f32_e32 v3, s8, v3 ; 0A060608
v_mul_f32_e32 v5, v11, v9 ; 100A130B
v_mad_f32 v7, -s18, v5, v14 ; D2820007 243A0A12
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_mul_f32_e32 v8, s19, v7 ; 10100E13
v_mul_f32_e32 v9, s20, v7 ; 10120E14
v_mul_f32_e32 v7, s21, v7 ; 100E0E15
v_mul_f32_e32 v6, v13, v6 ; 100C0D0D
v_mac_f32_e32 v4, v5, v6 ; 3E080D05
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_mac_f32_e32 v1, v2, v2 ; 3E020502
v_mac_f32_e32 v1, v3, v3 ; 3E020703
v_sub_f32_e64 v2, 1.0, s9 ; D2080002 000012F2
v_mul_f32_e32 v0, v0, v2 ; 10000500
v_max_f32_e32 v2, 0, v4 ; 20040880
v_log_f32_e32 v2, v2 ; 7E044F02
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mac_f32_e32 v0, s9, v1 ; 3E000209
v_mov_b32_e32 v1, s23 ; 7E020217
v_mac_f32_e32 v1, s22, v0 ; 3E020016
v_mul_f32_e32 v0, 0x43000000, v10 ; 100014FF 43000000
v_mul_legacy_f32_e32 v0, v0, v2 ; 0E000500
v_exp_f32_e32 v0, v0 ; 7E004B00
v_mul_f32_e32 v0, v12, v0 ; 1000010C
v_sub_f32_e32 v1, 1.0, v1 ; 080202F2
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v2, v1, v8 ; 10041101
v_mul_f32_e32 v3, v1, v9 ; 10061301
v_mul_f32_e32 v4, v1, v7 ; 10080F01
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 712 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..17]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 0.5000, 1.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[14], IN[0].xxxx
1: MAD TEMP[0], CONST[15], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[16], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[17], IN[0].wwww, TEMP[0]
4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx
5: MOV TEMP[2].x, TEMP[1].xxxx
6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
7: MOV TEMP[2].y, TEMP[3].xxxx
8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
9: MOV TEMP[1].zw, TEMP[0].wwzw
10: MOV TEMP[2].x, CONST[8].xxxx
11: MOV TEMP[2].y, CONST[9].xxxx
12: MOV TEMP[2].z, CONST[10].xxxx
13: MOV TEMP[3].x, CONST[8].yyyy
14: MOV TEMP[3].y, CONST[9].yyyy
15: MOV TEMP[3].z, CONST[10].yyyy
16: MOV TEMP[4].x, CONST[8].zzzz
17: MOV TEMP[4].y, CONST[9].zzzz
18: MOV TEMP[4].z, CONST[10].zzzz
19: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx
20: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz
21: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz
22: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
23: RSQ TEMP[3].x, TEMP[3].xxxx
24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
25: MOV TEMP[3].w, IMM[0].yyyy
26: MOV TEMP[3].xyz, TEMP[2].xyzx
27: DP4 TEMP[4].x, CONST[1], TEMP[3]
28: DP4 TEMP[5].x, CONST[2], TEMP[3]
29: MOV TEMP[4].y, TEMP[5].xxxx
30: DP4 TEMP[3].x, CONST[3], TEMP[3]
31: MOV TEMP[4].z, TEMP[3].xxxx
32: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx
33: DP4 TEMP[5].x, CONST[4], TEMP[3]
34: DP4 TEMP[6].x, CONST[5], TEMP[3]
35: MOV TEMP[5].y, TEMP[6].xxxx
36: DP4 TEMP[3].x, CONST[6], TEMP[3]
37: MOV TEMP[5].z, TEMP[3].xxxx
38: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy
39: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx
40: MAD TEMP[2].xyz, CONST[7].xyzz, TEMP[2].xxxx, TEMP[5].xyzz
41: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz
42: MAD TEMP[3].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
43: MOV TEMP[3].w, TEMP[2].xxxx
44: MOV TEMP[2].xy, TEMP[2].yzyy
45: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww
46: MOV TEMP[3].z, TEMP[4].xxxx
47: MOV OUT[2], TEMP[3]
48: MOV OUT[1], TEMP[1]
49: MOV OUT[0], TEMP[0]
50: MOV OUT[3], TEMP[2]
51: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0
%74 = add i32 %5, %7
%75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74)
%76 = extractelement <4 x float> %75, i32 0
%77 = extractelement <4 x float> %75, i32 1
%78 = extractelement <4 x float> %75, i32 2
%79 = extractelement <4 x float> %75, i32 3
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = add i32 %5, %7
%90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89)
%91 = extractelement <4 x float> %90, i32 0
%92 = extractelement <4 x float> %90, i32 1
%93 = fmul float %56, %76
%94 = fmul float %57, %76
%95 = fmul float %58, %76
%96 = fmul float %59, %76
%97 = fmul float %60, %77
%98 = fadd float %97, %93
%99 = fmul float %61, %77
%100 = fadd float %99, %94
%101 = fmul float %62, %77
%102 = fadd float %101, %95
%103 = fmul float %63, %77
%104 = fadd float %103, %96
%105 = fmul float %64, %78
%106 = fadd float %105, %98
%107 = fmul float %65, %78
%108 = fadd float %107, %100
%109 = fmul float %66, %78
%110 = fadd float %109, %102
%111 = fmul float %67, %78
%112 = fadd float %111, %104
%113 = fmul float %68, %79
%114 = fadd float %113, %106
%115 = fmul float %69, %79
%116 = fadd float %115, %108
%117 = fmul float %70, %79
%118 = fadd float %117, %110
%119 = fmul float %71, %79
%120 = fadd float %119, %112
%121 = fmul float %114, 5.000000e-01
%122 = fmul float %116, 5.000000e-01
%123 = fmul float %120, 5.000000e-01
%124 = fmul float %122, %13
%125 = fadd float %121, %123
%126 = fadd float %124, %123
%127 = fmul float %41, %84
%128 = fmul float %44, %84
%129 = fmul float %47, %84
%130 = fmul float %42, %85
%131 = fadd float %130, %127
%132 = fmul float %45, %85
%133 = fadd float %132, %128
%134 = fmul float %48, %85
%135 = fadd float %134, %129
%136 = fmul float %43, %86
%137 = fadd float %136, %131
%138 = fmul float %46, %86
%139 = fadd float %138, %133
%140 = fmul float %49, %86
%141 = fadd float %140, %135
%142 = fmul float %137, %137
%143 = fmul float %139, %139
%144 = fadd float %143, %142
%145 = fmul float %141, %141
%146 = fadd float %144, %145
%147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146)
%148 = fmul float %137, %147
%149 = fmul float %139, %147
%150 = fmul float %141, %147
%151 = fmul float %14, %148
%152 = fmul float %15, %149
%153 = fadd float %151, %152
%154 = fmul float %16, %150
%155 = fadd float %153, %154
%156 = fadd float %155, %17
%157 = fmul float %18, %148
%158 = fmul float %19, %149
%159 = fadd float %157, %158
%160 = fmul float %20, %150
%161 = fadd float %159, %160
%162 = fadd float %161, %21
%163 = fmul float %22, %148
%164 = fmul float %23, %149
%165 = fadd float %163, %164
%166 = fmul float %24, %150
%167 = fadd float %165, %166
%168 = fadd float %167, %25
%169 = fmul float %148, %149
%170 = fmul float %149, %150
%171 = fmul float %150, %150
%172 = fmul float %150, %148
%173 = fmul float %26, %169
%174 = fmul float %27, %170
%175 = fadd float %173, %174
%176 = fmul float %28, %171
%177 = fadd float %175, %176
%178 = fmul float %29, %172
%179 = fadd float %177, %178
%180 = fmul float %30, %169
%181 = fmul float %31, %170
%182 = fadd float %180, %181
%183 = fmul float %32, %171
%184 = fadd float %182, %183
%185 = fmul float %33, %172
%186 = fadd float %184, %185
%187 = fmul float %34, %169
%188 = fmul float %35, %170
%189 = fadd float %187, %188
%190 = fmul float %36, %171
%191 = fadd float %189, %190
%192 = fmul float %37, %172
%193 = fadd float %191, %192
%194 = fmul float %149, %149
%195 = fmul float %148, %148
%196 = fsub float %195, %194
%197 = fmul float %38, %196
%198 = fadd float %197, %179
%199 = fmul float %39, %196
%200 = fadd float %199, %186
%201 = fmul float %40, %196
%202 = fadd float %201, %193
%203 = fadd float %198, %156
%204 = fadd float %200, %162
%205 = fadd float %202, %168
%206 = fmul float %91, %52
%207 = fadd float %206, %54
%208 = fmul float %92, %53
%209 = fadd float %208, %55
%210 = fmul float %118, %50
%211 = fadd float %210, %51
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %125, float %126, float %118, float %120)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %207, float %209, float %211, float %203)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %204, float %205, float %205, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %116, float %118, float %120)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[12:15], 0x1c ; C2000D1C
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[16:19], 0 idxen ; E00C2000 80040500
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800
s_buffer_load_dword s1, s[12:15], 0x1d ; C2008D1D
s_buffer_load_dword s2, s[12:15], 0x1e ; C2010D1E
s_buffer_load_dword s5, s[12:15], 0x20 ; C2028D20
s_buffer_load_dword s6, s[12:15], 0x21 ; C2030D21
s_buffer_load_dword s7, s[12:15], 0x22 ; C2038D22
s_buffer_load_dword s8, s[12:15], 0x24 ; C2040D24
s_buffer_load_dword s9, s[12:15], 0x25 ; C2048D25
s_buffer_load_dword s10, s[12:15], 0x26 ; C2050D26
s_buffer_load_dword s11, s[12:15], 0x28 ; C2058D28
s_buffer_load_dword s16, s[12:15], 0x29 ; C2080D29
s_buffer_load_dword s17, s[12:15], 0x2a ; C2088D2A
s_buffer_load_dword s3, s[12:15], 0x32 ; C2018D32
s_buffer_load_dword s4, s[12:15], 0x33 ; C2020D33
s_buffer_load_dword s18, s[12:15], 0x34 ; C2090D34
s_buffer_load_dword s19, s[12:15], 0x35 ; C2098D35
s_buffer_load_dword s20, s[12:15], 0x36 ; C20A0D36
s_buffer_load_dword s21, s[12:15], 0x37 ; C20A8D37
s_buffer_load_dword s22, s[12:15], 0x38 ; C20B0D38
s_buffer_load_dword s23, s[12:15], 0x39 ; C20B8D39
s_buffer_load_dword s24, s[12:15], 0x3a ; C20C0D3A
s_buffer_load_dword s25, s[12:15], 0x3b ; C20C8D3B
s_buffer_load_dword s26, s[12:15], 0x3c ; C20D0D3C
s_buffer_load_dword s27, s[12:15], 0x3d ; C20D8D3D
s_buffer_load_dword s28, s[12:15], 0x3e ; C20E0D3E
s_buffer_load_dword s29, s[12:15], 0x3f ; C20E8D3F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s20 ; 7E000214
s_buffer_load_dword s20, s[12:15], 0x40 ; C20A0D40
v_mov_b32_e32 v10, s21 ; 7E140215
s_buffer_load_dword s21, s[12:15], 0x41 ; C20A8D41
s_buffer_load_dword s30, s[12:15], 0x42 ; C20F0D42
s_buffer_load_dword s31, s[12:15], 0x43 ; C20F8D43
s_buffer_load_dword s32, s[12:15], 0x44 ; C2100D44
s_buffer_load_dword s33, s[12:15], 0x45 ; C2108D45
s_buffer_load_dword s34, s[12:15], 0x46 ; C2110D46
s_buffer_load_dword s35, s[12:15], 0x47 ; C2118D47
v_mul_f32_e32 v11, s22, v1 ; 10160216
v_mul_f32_e32 v12, s5, v5 ; 10180A05
v_mac_f32_e32 v0, s18, v8 ; 3E001012
v_mac_f32_e32 v10, s19, v9 ; 3E141213
v_mac_f32_e32 v12, s6, v6 ; 3E180C06
v_mul_f32_e32 v8, s8, v5 ; 10100A08
v_mac_f32_e32 v8, s9, v6 ; 3E100C09
v_mul_f32_e32 v5, s11, v5 ; 100A0A0B
v_mac_f32_e32 v5, s16, v6 ; 3E0A0C10
v_mac_f32_e32 v12, s7, v7 ; 3E180E07
v_mac_f32_e32 v8, s10, v7 ; 3E100E0A
v_mac_f32_e32 v5, s17, v7 ; 3E0A0E11
v_mac_f32_e32 v11, s26, v2 ; 3E16041A
v_mul_f32_e32 v6, s23, v1 ; 100C0217
v_mac_f32_e32 v6, s27, v2 ; 3E0C041B
v_mul_f32_e32 v7, s24, v1 ; 100E0218
v_mac_f32_e32 v7, s28, v2 ; 3E0E041C
v_mul_f32_e32 v1, s25, v1 ; 10020219
v_mac_f32_e32 v1, s29, v2 ; 3E02041D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v11, s20, v3 ; 3E160614
v_mac_f32_e32 v6, s21, v3 ; 3E0C0615
v_mac_f32_e32 v7, s30, v3 ; 3E0E061E
v_mac_f32_e32 v1, s31, v3 ; 3E02061F
v_mac_f32_e32 v11, s32, v4 ; 3E160820
v_mac_f32_e32 v6, s33, v4 ; 3E0C0821
v_mac_f32_e32 v7, s34, v4 ; 3E0E0822
v_mac_f32_e32 v1, s35, v4 ; 3E020823
s_buffer_load_dword s5, s[12:15], 0x0 ; C2028D00
s_buffer_load_dword s6, s[12:15], 0x4 ; C2030D04
s_buffer_load_dword s7, s[12:15], 0x5 ; C2038D05
s_buffer_load_dword s8, s[12:15], 0x6 ; C2040D06
s_buffer_load_dword s9, s[12:15], 0x7 ; C2048D07
s_buffer_load_dword s10, s[12:15], 0x8 ; C2050D08
s_buffer_load_dword s11, s[12:15], 0x9 ; C2058D09
s_buffer_load_dword s16, s[12:15], 0xa ; C2080D0A
s_buffer_load_dword s17, s[12:15], 0xb ; C2088D0B
s_buffer_load_dword s18, s[12:15], 0xc ; C2090D0C
s_buffer_load_dword s19, s[12:15], 0xd ; C2098D0D
s_buffer_load_dword s20, s[12:15], 0xe ; C20A0D0E
s_buffer_load_dword s21, s[12:15], 0xf ; C20A8D0F
s_buffer_load_dword s22, s[12:15], 0x10 ; C20B0D10
s_buffer_load_dword s23, s[12:15], 0x11 ; C20B8D11
s_buffer_load_dword s24, s[12:15], 0x12 ; C20C0D12
s_buffer_load_dword s25, s[12:15], 0x13 ; C20C8D13
s_buffer_load_dword s26, s[12:15], 0x14 ; C20D0D14
s_buffer_load_dword s27, s[12:15], 0x15 ; C20D8D15
s_buffer_load_dword s28, s[12:15], 0x16 ; C20E0D16
s_buffer_load_dword s29, s[12:15], 0x17 ; C20E8D17
s_buffer_load_dword s30, s[12:15], 0x18 ; C20F0D18
s_buffer_load_dword s31, s[12:15], 0x19 ; C20F8D19
s_buffer_load_dword s32, s[12:15], 0x1a ; C2100D1A
s_buffer_load_dword s12, s[12:15], 0x1b ; C2060D1B
v_mul_f32_e32 v2, v12, v12 ; 1004190C
v_mac_f32_e32 v2, v8, v8 ; 3E041108
v_mac_f32_e32 v2, v5, v5 ; 3E040B05
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
v_mul_f32_e32 v3, 0.5, v6 ; 10060CF0
v_mul_f32_e32 v4, 0.5, v1 ; 100802F0
v_mad_f32 v9, 0.5, v11, v4 ; D2820009 041216F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v4, s5, v3 ; 3E080605
exp 15, 32, 0, 0, 0, v9, v4, v7, v1 ; F800020F 01070409
v_mul_f32_e32 v3, v2, v8 ; 10061102
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v4, v2, v5 ; 10080B02
v_mul_f32_e32 v5, v4, v3 ; 100A0704
v_mul_f32_e32 v8, s23, v5 ; 10100A17
v_mul_f32_e32 v9, s27, v5 ; 10120A1B
v_mul_f32_e32 v5, s31, v5 ; 100A0A1F
v_mul_f32_e32 v2, v2, v12 ; 10041902
v_mul_f32_e32 v12, v3, v2 ; 10180503
v_mac_f32_e32 v8, s22, v12 ; 3E101816
v_mac_f32_e32 v9, s26, v12 ; 3E12181A
v_mac_f32_e32 v5, s30, v12 ; 3E0A181E
v_mul_f32_e32 v12, v4, v4 ; 10180904
v_mac_f32_e32 v8, s24, v12 ; 3E101818
v_mac_f32_e32 v9, s28, v12 ; 3E12181C
v_mac_f32_e32 v5, s32, v12 ; 3E0A1820
v_mul_f32_e32 v12, s7, v3 ; 10180607
v_mac_f32_e32 v12, s6, v2 ; 3E180406
v_mul_f32_e32 v13, s11, v3 ; 101A060B
v_mac_f32_e32 v13, s10, v2 ; 3E1A040A
v_mul_f32_e32 v14, s19, v3 ; 101C0613
v_mac_f32_e32 v14, s18, v2 ; 3E1C0412
v_mac_f32_e32 v12, s8, v4 ; 3E180808
v_mac_f32_e32 v13, s16, v4 ; 3E1A0810
v_mac_f32_e32 v14, s20, v4 ; 3E1C0814
v_mul_f32_e32 v4, v2, v4 ; 10080902
v_mac_f32_e32 v8, s25, v4 ; 3E100819
v_mac_f32_e32 v9, s29, v4 ; 3E12081D
v_mac_f32_e32 v5, s12, v4 ; 3E0A080C
v_mov_b32_e32 v4, s4 ; 7E080204
v_mac_f32_e32 v4, s3, v7 ; 3E080E03
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mad_f32 v2, v2, v2, -v3 ; D2820002 840E0502
v_add_f32_e32 v3, s9, v12 ; 06061809
v_mac_f32_e32 v8, s0, v2 ; 3E100400
v_mac_f32_e32 v9, s1, v2 ; 3E120401
v_mac_f32_e32 v5, s2, v2 ; 3E0A0402
v_add_f32_e32 v2, v3, v8 ; 06041103
exp 15, 33, 0, 0, 0, v0, v10, v4, v2 ; F800021F 02040A00
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, s17, v13 ; 06001A11
v_add_f32_e32 v2, s21, v14 ; 06041C15
v_add_f32_e32 v0, v0, v9 ; 06001300
v_add_f32_e32 v2, v2, v5 ; 06040B02
v_mov_b32_e32 v3, 0 ; 7E060280
exp 15, 34, 0, 0, 0, v0, v2, v2, v3 ; F800022F 03020200
exp 15, 12, 0, 1, 0, v11, v6, v7, v1 ; F80008CF 0107060B
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 668 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..1]
DCL CONST[3]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].wwww
1: MOV TEMP[0].yz, IN[2].yxyy
2: MOV TEMP[1].xy, IN[1].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MOV TEMP[2].xy, IN[0].xyyy
5: MOV TEMP[2].w, IN[0].wwww
6: TXP TEMP[2], TEMP[2], SAMP[1], 2D
7: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz
8: MUL TEMP[3].xyz, TEMP[0].xyzz, CONST[1].xyzz
9: MUL TEMP[2].x, TEMP[2].wwww, TEMP[1].wwww
10: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[3].xyzz
11: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz
12: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[2].xxxx, TEMP[0].xyzz
13: MOV_SAT TEMP[1].x, IN[1].zzzz
14: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz
15: MOV TEMP[0].w, IMM[0].xxxx
16: MOV OUT[0], TEMP[0]
17: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%33 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0
%35 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%38 = bitcast <8 x i32> addrspace(2)* %37 to <32 x i8> addrspace(2)*
%39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0
%40 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%41 = bitcast <4 x i32> addrspace(2)* %40 to <16 x i8> addrspace(2)*
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%44 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%45 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%48 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%49 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%50 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%51 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%52 = bitcast float %46 to i32
%53 = bitcast float %47 to i32
%54 = insertelement <2 x i32> undef, i32 %52, i32 0
%55 = insertelement <2 x i32> %54, i32 %53, i32 1
%56 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %55, <32 x i8> %34, <16 x i8> %36, i32 2)
%57 = extractelement <4 x float> %56, i32 0
%58 = extractelement <4 x float> %56, i32 1
%59 = extractelement <4 x float> %56, i32 2
%60 = extractelement <4 x float> %56, i32 3
%61 = fdiv float %43, %45
%62 = fdiv float %44, %45
%63 = bitcast float %61 to i32
%64 = bitcast float %62 to i32
%65 = insertelement <2 x i32> undef, i32 %63, i32 0
%66 = insertelement <2 x i32> %65, i32 %64, i32 1
%67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %66, <32 x i8> %39, <16 x i8> %42, i32 2)
%68 = extractelement <4 x float> %67, i32 0
%69 = extractelement <4 x float> %67, i32 1
%70 = extractelement <4 x float> %67, i32 2
%71 = extractelement <4 x float> %67, i32 3
%72 = fadd float %68, %49
%73 = fadd float %69, %50
%74 = fadd float %70, %51
%75 = fmul float %72, %27
%76 = fmul float %73, %28
%77 = fmul float %74, %29
%78 = fmul float %71, %60
%79 = fmul float %57, %30
%80 = fmul float %58, %31
%81 = fmul float %59, %32
%82 = fmul float %79, %72
%83 = fmul float %80, %73
%84 = fmul float %81, %74
%85 = fmul float %75, %78
%86 = fadd float %85, %82
%87 = fmul float %76, %78
%88 = fadd float %87, %83
%89 = fmul float %77, %78
%90 = fadd float %89, %84
%91 = call float @llvm.AMDIL.clamp.(float %48, float 0.000000e+00, float 1.000000e+00)
%92 = call float @llvm.AMDGPU.lrp(float %91, float %86, float %24)
%93 = call float @llvm.AMDGPU.lrp(float %91, float %88, float %25)
%94 = call float @llvm.AMDGPU.lrp(float %91, float %90, float %26)
%95 = call i32 @llvm.SI.packf16(float %92, float %93)
%96 = bitcast i32 %95 to float
%97 = call i32 @llvm.SI.packf16(float %94, float 1.000000e+00)
%98 = bitcast i32 %97 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %96, float %98, float %96, float %98)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_mov_b32_e32 v5, 0x6f800000 ; 7E0A02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v4|, v5 ; D008016A 00020B04
v_mov_b32_e32 v5, 0x2f800000 ; 7E0A02FF 2F800000
v_cndmask_b32_e32 v5, 1.0, v5 ; 000A0AF2
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_mul_f32_e32 v4, v5, v4 ; 10080905
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
v_rcp_f32_e32 v4, v4 ; 7E085504
v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900
v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640B06
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v2, v4, v3 ; 10040704
v_mul_f32_e32 v3, v1, v5 ; 10060B01
v_mul_f32_e32 v4, v2, v5 ; 10080B02
s_buffer_load_dword s4, s[0:3], 0xc ; C202010C
image_sample v[1:4], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[8:11] ; F0800F00 00460103
s_buffer_load_dword s5, s[0:3], 0xd ; C202810D
s_buffer_load_dword s6, s[0:3], 0xe ; C203010E
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_add_f32_e32 v1, v9, v1 ; 06020309
v_add_f32_e32 v2, v10, v2 ; 0604050A
v_add_f32_e32 v0, v0, v3 ; 06000700
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
v_mul_f32_e32 v3, v14, v4 ; 1006090E
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
v_mul_f32_e32 v4, s4, v11 ; 10081604
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
v_mul_f32_e32 v5, s5, v12 ; 100A1805
v_mul_f32_e32 v6, s6, v13 ; 100C1A06
s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100
s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101
s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v7, s7, v1 ; 100E0207
v_mul_f32_e32 v1, v1, v4 ; 10020901
v_mul_f32_e32 v4, s8, v2 ; 10080408
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_mul_f32_e32 v5, s4, v0 ; 100A0004
v_mul_f32_e32 v0, v0, v6 ; 10000D00
v_mac_f32_e32 v1, v3, v7 ; 3E020F03
v_mac_f32_e32 v2, v3, v4 ; 3E040903
v_mac_f32_e32 v0, v3, v5 ; 3E000B03
v_add_f32_e64 v3, 0, v8 clamp ; D2060803 00021080
v_sub_f32_e32 v4, 1.0, v3 ; 080806F2
v_mul_f32_e32 v5, s5, v4 ; 100A0805
v_mac_f32_e32 v5, v1, v3 ; 3E0A0701
v_mul_f32_e32 v1, s6, v4 ; 10020806
v_mac_f32_e32 v1, v2, v3 ; 3E020702
v_mul_f32_e32 v2, s0, v4 ; 10040800
v_mac_f32_e32 v2, v0, v3 ; 3E040700
v_cvt_pkrtz_f16_f32_e32 v0, v5, v1 ; 5E000305
v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 340 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xyz, IN[1].xyzx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = fmul float %13, %33
%45 = fmul float %14, %33
%46 = fmul float %15, %33
%47 = fmul float %16, %33
%48 = fmul float %17, %34
%49 = fadd float %48, %44
%50 = fmul float %18, %34
%51 = fadd float %50, %45
%52 = fmul float %19, %34
%53 = fadd float %52, %46
%54 = fmul float %20, %34
%55 = fadd float %54, %47
%56 = fmul float %21, %35
%57 = fadd float %56, %49
%58 = fmul float %22, %35
%59 = fadd float %58, %51
%60 = fmul float %23, %35
%61 = fadd float %60, %53
%62 = fmul float %24, %35
%63 = fadd float %62, %55
%64 = fmul float %25, %36
%65 = fadd float %64, %57
%66 = fmul float %26, %36
%67 = fadd float %66, %59
%68 = fmul float %27, %36
%69 = fadd float %68, %61
%70 = fmul float %28, %36
%71 = fadd float %70, %63
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %67, float %69, float %71)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v9, s13, v2 ; 1012040D
v_mac_f32_e32 v9, s7, v3 ; 3E120607
v_mul_f32_e32 v10, s4, v2 ; 10140404
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v9, s11, v4 ; 3E12080B
v_mac_f32_e32 v10, s14, v4 ; 3E14080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v9, s17, v5 ; 3E120A11
v_mac_f32_e32 v10, s18, v5 ; 3E140A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v8, v1 ; F800020F 01080706
exp 15, 12, 0, 1, 0, v0, v9, v10, v2 ; F80008CF 020A0900
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0]
DCL TEMP[0], LOCAL
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MUL TEMP[0], TEMP[0], CONST[0]
3: MOV OUT[0], TEMP[0]
4: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0
%30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0
%32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%34 = bitcast float %32 to i32
%35 = bitcast float %33 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %29, <16 x i8> %31, i32 2)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = fmul float %39, %24
%44 = fmul float %40, %25
%45 = fmul float %41, %26
%46 = fmul float %42, %27
%47 = call i32 @llvm.SI.packf16(float %43, float %44)
%48 = bitcast i32 %47 to float
%49 = call i32 @llvm.SI.packf16(float %45, float %46)
%50 = bitcast i32 %49 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %48, float %50, float %48, float %50)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500
s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101
s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102
s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[20:23] ; F0800F00 00A30002
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v0, s6, v0 ; 10000006
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mul_f32_e32 v3, s0, v3 ; 10060600
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 108 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xyz, IN[1].xyzx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = fmul float %13, %33
%45 = fmul float %14, %33
%46 = fmul float %15, %33
%47 = fmul float %16, %33
%48 = fmul float %17, %34
%49 = fadd float %48, %44
%50 = fmul float %18, %34
%51 = fadd float %50, %45
%52 = fmul float %19, %34
%53 = fadd float %52, %46
%54 = fmul float %20, %34
%55 = fadd float %54, %47
%56 = fmul float %21, %35
%57 = fadd float %56, %49
%58 = fmul float %22, %35
%59 = fadd float %58, %51
%60 = fmul float %23, %35
%61 = fadd float %60, %53
%62 = fmul float %24, %35
%63 = fadd float %62, %55
%64 = fmul float %25, %36
%65 = fadd float %64, %57
%66 = fmul float %26, %36
%67 = fadd float %66, %59
%68 = fmul float %27, %36
%69 = fadd float %68, %61
%70 = fmul float %28, %36
%71 = fadd float %70, %63
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %67, float %69, float %71)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v9, s13, v2 ; 1012040D
v_mac_f32_e32 v9, s7, v3 ; 3E120607
v_mul_f32_e32 v10, s4, v2 ; 10140404
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v9, s11, v4 ; 3E12080B
v_mac_f32_e32 v10, s14, v4 ; 3E14080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v9, s17, v5 ; 3E120A11
v_mac_f32_e32 v10, s18, v5 ; 3E140A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v8, v1 ; F800020F 01080706
exp 15, 12, 0, 1, 0, v0, v9, v10, v2 ; F80008CF 020A0900
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..1]
DCL TEMP[0], LOCAL
0: MUL TEMP[0].xy, IN[0].xyyy, CONST[1].xxxx
1: MOV TEMP[0].xy, TEMP[0].xyyy
2: TEX TEMP[0], TEMP[0], SAMP[0], 2D
3: MUL TEMP[0], TEMP[0], CONST[0]
4: MOV OUT[0], TEMP[0]
5: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0
%31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%35 = fmul float %33, %28
%36 = fmul float %34, %28
%37 = bitcast float %35 to i32
%38 = bitcast float %36 to i32
%39 = insertelement <2 x i32> undef, i32 %37, i32 0
%40 = insertelement <2 x i32> %39, i32 %38, i32 1
%41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %30, <16 x i8> %32, i32 2)
%42 = extractelement <4 x float> %41, i32 0
%43 = extractelement <4 x float> %41, i32 1
%44 = extractelement <4 x float> %41, i32 2
%45 = extractelement <4 x float> %41, i32 3
%46 = fmul float %42, %24
%47 = fmul float %43, %25
%48 = fmul float %44, %26
%49 = fmul float %45, %27
%50 = call i32 @llvm.SI.packf16(float %46, float %47)
%51 = bitcast i32 %50 to float
%52 = call i32 @llvm.SI.packf16(float %48, float %49)
%53 = bitcast i32 %52 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %51, float %53, float %51, float %53)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_mov_b32 m0, s9 ; BEFC0309
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101
s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102
s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100
v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s4, v2 ; 10020404
v_mul_f32_e32 v2, s4, v0 ; 10040004
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[12:15] ; F0800F00 00640001
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v0, s5, v0 ; 10000005
v_mul_f32_e32 v1, s6, v1 ; 10020206
v_mul_f32_e32 v2, s7, v2 ; 10040407
v_mul_f32_e32 v3, s0, v3 ; 10060600
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 120 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL CONST[0..19]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[16], IN[0].xxxx
1: MAD TEMP[0], CONST[17], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[18], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[19], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[7], IN[0].xxxx
5: MAD TEMP[1], CONST[8], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[9], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1].xyz, CONST[10], IN[0].wwww, TEMP[1]
8: MOV TEMP[2].x, CONST[11].xxxx
9: MOV TEMP[2].y, CONST[12].xxxx
10: MOV TEMP[2].z, CONST[13].xxxx
11: MOV TEMP[3].x, CONST[11].yyyy
12: MOV TEMP[3].y, CONST[12].yyyy
13: MOV TEMP[3].z, CONST[13].yyyy
14: MOV TEMP[4].x, CONST[11].zzzz
15: MOV TEMP[4].y, CONST[12].zzzz
16: MOV TEMP[4].z, CONST[13].zzzz
17: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx
18: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz
19: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz
20: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
21: RSQ TEMP[3].x, TEMP[3].xxxx
22: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
23: MUL TEMP[3].xyz, CONST[7].xyzz, IN[3].xxxx
24: MAD TEMP[3].xyz, CONST[8].xyzz, IN[3].yyyy, TEMP[3].xyzz
25: MAD TEMP[3].xyz, CONST[9].xyzz, IN[3].zzzz, TEMP[3].xyzz
26: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
27: RSQ TEMP[4].x, TEMP[4].xxxx
28: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
29: MUL TEMP[4].xyz, TEMP[2].zxyy, TEMP[3].yzxx
30: MAD TEMP[4].xyz, TEMP[2].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz
31: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[3].wwww
32: MOV TEMP[5].x, TEMP[3].xxxx
33: MOV TEMP[5].y, TEMP[4].xxxx
34: MOV TEMP[5].z, TEMP[2].xxxx
35: MOV TEMP[5].w, TEMP[1].xxxx
36: MOV TEMP[6].x, TEMP[3].yyyy
37: MOV TEMP[6].y, TEMP[4].yyyy
38: MOV TEMP[6].z, TEMP[2].yyyy
39: MOV TEMP[6].w, TEMP[1].yyyy
40: MOV TEMP[3].x, TEMP[3].zzzz
41: MOV TEMP[3].y, TEMP[4].zzzz
42: MOV TEMP[3].z, TEMP[2].zzzz
43: MOV TEMP[3].w, TEMP[1].zzzz
44: MOV TEMP[1].w, IMM[0].xxxx
45: MOV TEMP[1].xyz, TEMP[2].xyzx
46: DP4 TEMP[4].x, CONST[0], TEMP[1]
47: DP4 TEMP[7].x, CONST[1], TEMP[1]
48: MOV TEMP[4].y, TEMP[7].xxxx
49: DP4 TEMP[1].x, CONST[2], TEMP[1]
50: MOV TEMP[4].z, TEMP[1].xxxx
51: MUL TEMP[1], TEMP[2].xyzz, TEMP[2].yzzx
52: DP4 TEMP[7].x, CONST[3], TEMP[1]
53: DP4 TEMP[8].x, CONST[4], TEMP[1]
54: MOV TEMP[7].y, TEMP[8].xxxx
55: DP4 TEMP[1].x, CONST[5], TEMP[1]
56: MOV TEMP[7].z, TEMP[1].xxxx
57: MUL TEMP[1].x, TEMP[2].yyyy, TEMP[2].yyyy
58: MAD TEMP[1].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[1].xxxx
59: MAD TEMP[1].xyz, CONST[6].xyzz, TEMP[1].xxxx, TEMP[7].xyzz
60: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xyzz
61: MAD TEMP[2].xy, IN[2].xyyy, CONST[15].xyyy, CONST[15].zwww
62: MOV TEMP[2].w, TEMP[1].xxxx
63: MOV TEMP[1].xy, TEMP[1].yzyy
64: MOV TEMP[2].z, TEMP[0].zzzz
65: MOV OUT[4], TEMP[2]
66: MOV OUT[1], TEMP[5]
67: MOV OUT[2], TEMP[6]
68: MOV OUT[3], TEMP[3]
69: MOV OUT[0], TEMP[0]
70: MOV OUT[5], TEMP[1]
71: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = add i32 %5, %7
%105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = extractelement <4 x float> %105, i32 2
%109 = extractelement <4 x float> %105, i32 3
%110 = fmul float %65, %85
%111 = fmul float %66, %85
%112 = fmul float %67, %85
%113 = fmul float %68, %85
%114 = fmul float %69, %86
%115 = fadd float %114, %110
%116 = fmul float %70, %86
%117 = fadd float %116, %111
%118 = fmul float %71, %86
%119 = fadd float %118, %112
%120 = fmul float %72, %86
%121 = fadd float %120, %113
%122 = fmul float %73, %87
%123 = fadd float %122, %115
%124 = fmul float %74, %87
%125 = fadd float %124, %117
%126 = fmul float %75, %87
%127 = fadd float %126, %119
%128 = fmul float %76, %87
%129 = fadd float %128, %121
%130 = fmul float %77, %88
%131 = fadd float %130, %123
%132 = fmul float %78, %88
%133 = fadd float %132, %125
%134 = fmul float %79, %88
%135 = fadd float %134, %127
%136 = fmul float %80, %88
%137 = fadd float %136, %129
%138 = fmul float %40, %85
%139 = fmul float %41, %85
%140 = fmul float %42, %85
%141 = fmul float %43, %86
%142 = fadd float %141, %138
%143 = fmul float %44, %86
%144 = fadd float %143, %139
%145 = fmul float %45, %86
%146 = fadd float %145, %140
%147 = fmul float %46, %87
%148 = fadd float %147, %142
%149 = fmul float %47, %87
%150 = fadd float %149, %144
%151 = fmul float %48, %87
%152 = fadd float %151, %146
%153 = fmul float %49, %88
%154 = fadd float %153, %148
%155 = fmul float %50, %88
%156 = fadd float %155, %150
%157 = fmul float %51, %88
%158 = fadd float %157, %152
%159 = fmul float %52, %93
%160 = fmul float %55, %93
%161 = fmul float %58, %93
%162 = fmul float %53, %94
%163 = fadd float %162, %159
%164 = fmul float %56, %94
%165 = fadd float %164, %160
%166 = fmul float %59, %94
%167 = fadd float %166, %161
%168 = fmul float %54, %95
%169 = fadd float %168, %163
%170 = fmul float %57, %95
%171 = fadd float %170, %165
%172 = fmul float %60, %95
%173 = fadd float %172, %167
%174 = fmul float %169, %169
%175 = fmul float %171, %171
%176 = fadd float %175, %174
%177 = fmul float %173, %173
%178 = fadd float %176, %177
%179 = call float @llvm.AMDGPU.rsq.clamped.f32(float %178)
%180 = fmul float %169, %179
%181 = fmul float %171, %179
%182 = fmul float %173, %179
%183 = fmul float %40, %106
%184 = fmul float %41, %106
%185 = fmul float %42, %106
%186 = fmul float %43, %107
%187 = fadd float %186, %183
%188 = fmul float %44, %107
%189 = fadd float %188, %184
%190 = fmul float %45, %107
%191 = fadd float %190, %185
%192 = fmul float %46, %108
%193 = fadd float %192, %187
%194 = fmul float %47, %108
%195 = fadd float %194, %189
%196 = fmul float %48, %108
%197 = fadd float %196, %191
%198 = fmul float %193, %193
%199 = fmul float %195, %195
%200 = fadd float %199, %198
%201 = fmul float %197, %197
%202 = fadd float %200, %201
%203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202)
%204 = fmul float %193, %203
%205 = fmul float %195, %203
%206 = fmul float %197, %203
%207 = fmul float %182, %205
%208 = fmul float %180, %206
%209 = fmul float %181, %204
%210 = fmul float %181, %206
%211 = fsub float %210, %207
%212 = fmul float %182, %204
%213 = fsub float %212, %208
%214 = fmul float %180, %205
%215 = fsub float %214, %209
%216 = fmul float %211, %109
%217 = fmul float %213, %109
%218 = fmul float %215, %109
%219 = fmul float %13, %180
%220 = fmul float %14, %181
%221 = fadd float %219, %220
%222 = fmul float %15, %182
%223 = fadd float %221, %222
%224 = fadd float %223, %16
%225 = fmul float %17, %180
%226 = fmul float %18, %181
%227 = fadd float %225, %226
%228 = fmul float %19, %182
%229 = fadd float %227, %228
%230 = fadd float %229, %20
%231 = fmul float %21, %180
%232 = fmul float %22, %181
%233 = fadd float %231, %232
%234 = fmul float %23, %182
%235 = fadd float %233, %234
%236 = fadd float %235, %24
%237 = fmul float %180, %181
%238 = fmul float %181, %182
%239 = fmul float %182, %182
%240 = fmul float %182, %180
%241 = fmul float %25, %237
%242 = fmul float %26, %238
%243 = fadd float %241, %242
%244 = fmul float %27, %239
%245 = fadd float %243, %244
%246 = fmul float %28, %240
%247 = fadd float %245, %246
%248 = fmul float %29, %237
%249 = fmul float %30, %238
%250 = fadd float %248, %249
%251 = fmul float %31, %239
%252 = fadd float %250, %251
%253 = fmul float %32, %240
%254 = fadd float %252, %253
%255 = fmul float %33, %237
%256 = fmul float %34, %238
%257 = fadd float %255, %256
%258 = fmul float %35, %239
%259 = fadd float %257, %258
%260 = fmul float %36, %240
%261 = fadd float %259, %260
%262 = fmul float %181, %181
%263 = fmul float %180, %180
%264 = fsub float %263, %262
%265 = fmul float %37, %264
%266 = fadd float %265, %247
%267 = fmul float %38, %264
%268 = fadd float %267, %254
%269 = fmul float %39, %264
%270 = fadd float %269, %261
%271 = fadd float %266, %224
%272 = fadd float %268, %230
%273 = fadd float %270, %236
%274 = fmul float %100, %61
%275 = fadd float %274, %63
%276 = fmul float %101, %62
%277 = fadd float %276, %64
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %204, float %216, float %180, float %154)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %205, float %217, float %181, float %156)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %206, float %218, float %182, float %158)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %275, float %277, float %135, float %271)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %272, float %273, float %273, float %240)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %131, float %133, float %135, float %137)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[10:13], v0, s[8:11], 0 idxen ; E00C2000 80020A00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x3e ; C202013E
s_buffer_load_dword s5, s[0:3], 0x3f ; C202813F
s_buffer_load_dword s6, s[0:3], 0x3c ; C203013C
s_buffer_load_dword s7, s[0:3], 0x3d ; C203813D
s_buffer_load_dword s8, s[0:3], 0x2c ; C204012C
s_buffer_load_dword s9, s[0:3], 0x40 ; C2048140
s_buffer_load_dword s10, s[0:3], 0x41 ; C2050141
s_buffer_load_dword s11, s[0:3], 0x42 ; C2058142
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s4 ; 7E000204
s_buffer_load_dword s4, s[0:3], 0x2d ; C202012D
v_mac_f32_e32 v0, s6, v8 ; 3E001006
v_mov_b32_e32 v8, s5 ; 7E100205
s_buffer_load_dword s5, s[0:3], 0x30 ; C2028130
s_buffer_load_dword s6, s[0:3], 0x31 ; C2030131
v_mac_f32_e32 v8, s7, v9 ; 3E101207
s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134
v_mul_f32_e32 v9, s8, v5 ; 10120A08
s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135
s_buffer_load_dword s12, s[0:3], 0x2e ; C206012E
s_buffer_load_dword s13, s[0:3], 0x32 ; C2068132
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v9, s4, v6 ; 3E120C04
s_buffer_load_dword s4, s[0:3], 0x36 ; C2020136
v_mul_f32_e32 v14, s5, v5 ; 101C0A05
v_mac_f32_e32 v14, s6, v6 ; 3E1C0C06
s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144
v_mul_f32_e32 v5, s7, v5 ; 100A0A07
s_buffer_load_dword s6, s[0:3], 0x45 ; C2030145
v_mac_f32_e32 v5, s8, v6 ; 3E0A0C08
v_mac_f32_e32 v9, s12, v7 ; 3E120E0C
s_buffer_load_dword s7, s[0:3], 0x46 ; C2038146
v_mac_f32_e32 v14, s13, v7 ; 3E1C0E0D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v5, s4, v7 ; 3E0A0E04
v_mul_f32_e32 v6, s9, v1 ; 100C0209
s_buffer_load_dword s4, s[0:3], 0x47 ; C2020147
v_mac_f32_e32 v6, s5, v2 ; 3E0C0405
v_mul_f32_e32 v7, s10, v1 ; 100E020A
v_mac_f32_e32 v7, s6, v2 ; 3E0E0406
v_mul_f32_e32 v15, s11, v1 ; 101E020B
s_buffer_load_dword s5, s[0:3], 0x43 ; C2028143
v_mac_f32_e32 v15, s7, v2 ; 3E1E0407
s_buffer_load_dword s6, s[0:3], 0x48 ; C2030148
s_buffer_load_dword s7, s[0:3], 0x49 ; C2038149
s_buffer_load_dword s8, s[0:3], 0x4a ; C204014A
s_buffer_load_dword s9, s[0:3], 0x4b ; C204814B
s_buffer_load_dword s10, s[0:3], 0x4c ; C205014C
s_buffer_load_dword s11, s[0:3], 0x4d ; C205814D
s_buffer_load_dword s12, s[0:3], 0x4e ; C206014E
s_buffer_load_dword s13, s[0:3], 0x4f ; C206814F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v16, s5, v1 ; 10200205
v_mac_f32_e32 v16, s4, v2 ; 3E200404
v_mac_f32_e32 v6, s6, v3 ; 3E0C0606
v_mac_f32_e32 v7, s7, v3 ; 3E0E0607
v_mac_f32_e32 v15, s8, v3 ; 3E1E0608
v_mac_f32_e32 v16, s9, v3 ; 3E200609
v_mac_f32_e32 v6, s10, v4 ; 3E0C080A
v_mac_f32_e32 v7, s11, v4 ; 3E0E080B
v_mac_f32_e32 v15, s12, v4 ; 3E1E080C
v_mac_f32_e32 v16, s13, v4 ; 3E20080D
s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C
s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120
s_buffer_load_dword s6, s[0:3], 0x21 ; C2030121
s_buffer_load_dword s7, s[0:3], 0x22 ; C2038122
s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124
s_buffer_load_dword s9, s[0:3], 0x28 ; C2048128
s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D
s_buffer_load_dword s11, s[0:3], 0x1e ; C205811E
s_buffer_load_dword s12, s[0:3], 0x25 ; C2060125
s_buffer_load_dword s13, s[0:3], 0x29 ; C2068129
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v17, s4, v1 ; 10220204
v_mac_f32_e32 v17, s5, v2 ; 3E220405
s_buffer_load_dword s14, s[0:3], 0x26 ; C2070126
s_buffer_load_dword s15, s[0:3], 0x2a ; C207812A
v_mac_f32_e32 v17, s8, v3 ; 3E220608
v_mac_f32_e32 v17, s9, v4 ; 3E220809
v_mul_f32_e32 v18, s10, v1 ; 1024020A
v_mac_f32_e32 v18, s6, v2 ; 3E240406
v_mac_f32_e32 v18, s12, v3 ; 3E24060C
v_mac_f32_e32 v18, s13, v4 ; 3E24080D
v_mul_f32_e32 v1, s11, v1 ; 1002020B
v_mac_f32_e32 v1, s7, v2 ; 3E020407
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s14, v3 ; 3E02060E
v_mac_f32_e32 v1, s15, v4 ; 3E02080F
v_mul_f32_e32 v2, s4, v10 ; 10041404
v_mac_f32_e32 v2, s5, v11 ; 3E041605
v_mul_f32_e32 v3, s10, v10 ; 1006140A
v_mac_f32_e32 v3, s6, v11 ; 3E061606
v_mul_f32_e32 v4, s11, v10 ; 1008140B
v_mac_f32_e32 v4, s7, v11 ; 3E081607
v_mac_f32_e32 v2, s8, v12 ; 3E041808
v_mac_f32_e32 v3, s12, v12 ; 3E06180C
v_mac_f32_e32 v4, s14, v12 ; 3E08180E
v_mul_f32_e32 v10, v9, v9 ; 10141309
v_mac_f32_e32 v10, v14, v14 ; 3E141D0E
v_mac_f32_e32 v10, v5, v5 ; 3E140B05
v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A
v_mul_f32_e32 v11, v2, v2 ; 10160502
v_mac_f32_e32 v11, v3, v3 ; 3E160703
v_mac_f32_e32 v11, v4, v4 ; 3E160904
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_mul_f32_e32 v12, v10, v14 ; 10181D0A
v_mul_f32_e32 v5, v10, v5 ; 100A0B0A
v_mul_f32_e32 v2, v11, v2 ; 1004050B
v_mul_f32_e32 v3, v11, v3 ; 1006070B
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mul_f32_e32 v10, v3, v5 ; 10140B03
v_mad_f32 v10, v12, v4, -v10 ; D282000A 842A090C
v_mul_f32_e32 v11, v4, v9 ; 10161304
v_mad_f32 v11, v5, v2, -v11 ; D282000B 842E0505
v_mul_f32_e32 v14, v2, v12 ; 101C1902
v_mad_f32 v14, v9, v3, -v14 ; D282000E 843A0709
v_mul_f32_e32 v10, v13, v10 ; 1014150D
v_mul_f32_e32 v11, v13, v11 ; 1016170D
v_mul_f32_e32 v13, v13, v14 ; 101A1D0D
s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119
s_buffer_load_dword s5, s[0:3], 0x1a ; C202811A
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101
s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102
s_buffer_load_dword s9, s[0:3], 0x3 ; C2048103
s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104
s_buffer_load_dword s11, s[0:3], 0x5 ; C2058105
s_buffer_load_dword s12, s[0:3], 0x6 ; C2060106
s_buffer_load_dword s13, s[0:3], 0x7 ; C2068107
s_buffer_load_dword s14, s[0:3], 0x8 ; C2070108
s_buffer_load_dword s15, s[0:3], 0x9 ; C2078109
s_buffer_load_dword s16, s[0:3], 0xa ; C208010A
s_buffer_load_dword s17, s[0:3], 0xb ; C208810B
s_buffer_load_dword s18, s[0:3], 0xc ; C209010C
s_buffer_load_dword s19, s[0:3], 0xd ; C209810D
s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E
s_buffer_load_dword s21, s[0:3], 0x14 ; C20A8114
s_buffer_load_dword s22, s[0:3], 0x15 ; C20B0115
s_buffer_load_dword s23, s[0:3], 0xf ; C20B810F
s_buffer_load_dword s24, s[0:3], 0x10 ; C20C0110
s_buffer_load_dword s25, s[0:3], 0x11 ; C20C8111
s_buffer_load_dword s26, s[0:3], 0x12 ; C20D0112
s_buffer_load_dword s27, s[0:3], 0x13 ; C20D8113
s_buffer_load_dword s28, s[0:3], 0x16 ; C20E0116
s_buffer_load_dword s29, s[0:3], 0x17 ; C20E8117
s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118
exp 15, 32, 0, 0, 0, v2, v10, v9, v17 ; F800020F 11090A02
exp 15, 33, 0, 0, 0, v3, v11, v12, v18 ; F800021F 120C0B03
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mul_f32_e32 v2, v5, v12 ; 10041905
v_mul_f32_e32 v3, s19, v2 ; 10060413
v_mul_f32_e32 v10, s25, v2 ; 10140419
v_mul_f32_e32 v2, s22, v2 ; 10040416
v_mul_f32_e32 v11, v12, v9 ; 1016130C
v_mac_f32_e32 v3, s18, v11 ; 3E061612
v_mac_f32_e32 v10, s24, v11 ; 3E141618
v_mac_f32_e32 v2, s21, v11 ; 3E041615
v_mul_f32_e32 v11, v5, v5 ; 10160B05
v_mac_f32_e32 v3, s20, v11 ; 3E061614
v_mac_f32_e32 v10, s26, v11 ; 3E14161A
v_mac_f32_e32 v2, s28, v11 ; 3E04161C
v_mul_f32_e32 v11, s7, v12 ; 10161807
v_mac_f32_e32 v11, s6, v9 ; 3E161206
v_mul_f32_e32 v14, s11, v12 ; 101C180B
v_mac_f32_e32 v14, s10, v9 ; 3E1C120A
v_mul_f32_e32 v17, s15, v12 ; 1022180F
v_mac_f32_e32 v17, s14, v9 ; 3E22120E
v_mul_f32_e32 v18, v9, v5 ; 10240B09
v_mac_f32_e32 v3, s23, v18 ; 3E062417
v_mac_f32_e32 v10, s27, v18 ; 3E14241B
v_mac_f32_e32 v2, s29, v18 ; 3E04241D
v_mul_f32_e32 v12, v12, v12 ; 1018190C
v_mad_f32 v9, v9, v9, -v12 ; D2820009 84321309
v_mac_f32_e32 v3, s0, v9 ; 3E061200
v_mac_f32_e32 v10, s4, v9 ; 3E141204
v_mac_f32_e32 v2, s5, v9 ; 3E041205
v_mac_f32_e32 v11, s8, v5 ; 3E160A08
v_mac_f32_e32 v14, s12, v5 ; 3E1C0A0C
v_mac_f32_e32 v17, s16, v5 ; 3E220A10
v_add_f32_e32 v9, s9, v11 ; 06121609
v_add_f32_e32 v11, s13, v14 ; 06161C0D
v_add_f32_e32 v12, s17, v17 ; 06182211
exp 15, 34, 0, 0, 0, v4, v13, v5, v1 ; F800022F 01050D04
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v1, v9, v3 ; 06020709
exp 15, 35, 0, 0, 0, v0, v8, v15, v1 ; F800023F 010F0800
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, v11, v10 ; 0600150B
v_add_f32_e32 v1, v12, v2 ; 0602050C
exp 15, 36, 0, 0, 0, v0, v1, v1, v18 ; F800024F 12010100
exp 15, 12, 0, 1, 0, v6, v7, v15, v16 ; F80008CF 100F0706
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 20
Code Size: 884 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..4]
DCL TEMP[0..12], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 1.5000}
IMM[1] FLT32 { 4.0000, 8.0000, 16.0000, 32.0000}
IMM[2] FLT32 { 1.2000, 0.6000, 0.0000, 1.0000}
IMM[3] FLT32 { 50.0000, 3.0000, 0.3500, -0.9200}
0: MOV TEMP[0].x, IN[3].wwww
1: MOV TEMP[0].yz, IN[4].yxyy
2: MOV TEMP[1].x, IN[0].wwww
3: MOV TEMP[1].y, IN[1].wwww
4: MOV TEMP[1].z, IN[2].wwww
5: MOV TEMP[2].xy, IN[3].xyyy
6: TEX TEMP[2].yw, TEMP[2], SAMP[1], 2D
7: MAD TEMP[2].xy, TEMP[2].wyyy, IMM[0].xxxx, IMM[0].yyyy
8: DP2 TEMP[3].x, TEMP[2].xyyy, TEMP[2].xyyy
9: MOV_SAT TEMP[3].x, TEMP[3].xxxx
10: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx
11: SQRT TEMP[3].x, TEMP[3].xxxx
12: MOV TEMP[2].z, TEMP[3].xxxx
13: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
14: RSQ TEMP[3].x, TEMP[3].xxxx
15: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
16: MOV TEMP[3].xz, TEMP[2].xxzx
17: MOV TEMP[3].y, -TEMP[2].yyyy
18: MUL TEMP[3].xy, TEMP[3].xyyy, IMM[0].xxxx
19: MOV TEMP[2].xy, IN[3].xyyy
20: TEX TEMP[2].x, TEMP[2], SAMP[2], 2D
21: MUL TEMP[4].x, TEMP[2].xxxx, IMM[0].wwww
22: MOV_SAT TEMP[4].x, TEMP[4].xxxx
23: MUL TEMP[5].xy, IN[3].xyyy, IMM[1].xxxx
24: MOV TEMP[6].xy, TEMP[5].xyyy
25: TEX TEMP[6].yw, TEMP[6], SAMP[4], 2D
26: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[0].xxxx, IMM[0].yyyy
27: DP2 TEMP[7].x, TEMP[6].xyyy, TEMP[6].xyyy
28: MOV_SAT TEMP[7].x, TEMP[7].xxxx
29: ADD TEMP[7].x, IMM[0].zzzz, -TEMP[7].xxxx
30: SQRT TEMP[7].x, TEMP[7].xxxx
31: MOV TEMP[6].z, TEMP[7].xxxx
32: MUL TEMP[7].xy, IN[3].xyyy, IMM[1].yyyy
33: MOV TEMP[8].xy, TEMP[7].xyyy
34: TEX TEMP[8].yw, TEMP[8], SAMP[4], 2D
35: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[0].xxxx, IMM[0].yyyy
36: DP2 TEMP[9].x, TEMP[8].xyyy, TEMP[8].xyyy
37: MOV_SAT TEMP[9].x, TEMP[9].xxxx
38: ADD TEMP[9].x, IMM[0].zzzz, -TEMP[9].xxxx
39: SQRT TEMP[9].x, TEMP[9].xxxx
40: MOV TEMP[8].z, TEMP[9].xxxx
41: MUL TEMP[9].xy, IN[3].xyyy, IMM[1].zzzz
42: MOV TEMP[10].xy, TEMP[9].xyyy
43: TEX TEMP[10].yw, TEMP[10], SAMP[4], 2D
44: MAD TEMP[10].xy, TEMP[10].wyyy, IMM[0].xxxx, IMM[0].yyyy
45: DP2 TEMP[11].x, TEMP[10].xyyy, TEMP[10].xyyy
46: MOV_SAT TEMP[11].x, TEMP[11].xxxx
47: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[11].xxxx
48: SQRT TEMP[11].x, TEMP[11].xxxx
49: MOV TEMP[10].z, TEMP[11].xxxx
50: MUL TEMP[11].xy, IN[3].xyyy, IMM[1].wwww
51: MOV TEMP[11].xy, TEMP[11].xyyy
52: TEX TEMP[11].yw, TEMP[11], SAMP[4], 2D
53: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[0].xxxx, IMM[0].yyyy
54: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy
55: MOV_SAT TEMP[12].x, TEMP[12].xxxx
56: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[12].xxxx
57: SQRT TEMP[12].x, TEMP[12].xxxx
58: MOV TEMP[11].z, TEMP[12].xxxx
59: MOV TEMP[12].xy, IN[3].xyyy
60: TEX TEMP[12].xyz, TEMP[12], SAMP[0], 2D
61: MOV TEMP[5].xy, TEMP[5].xyyy
62: TEX TEMP[5].x, TEMP[5], SAMP[3], 2D
63: MUL TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx
64: MOV_SAT TEMP[5].x, TEMP[5].xxxx
65: LRP TEMP[5].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[5].xxxx
66: MUL TEMP[5].xyz, TEMP[12].xyzz, TEMP[5].xxxx
67: MOV TEMP[7].xy, TEMP[7].xyyy
68: TEX TEMP[7].x, TEMP[7], SAMP[3], 2D
69: MUL TEMP[7].x, TEMP[7].xxxx, IMM[2].xxxx
70: MOV_SAT TEMP[7].x, TEMP[7].xxxx
71: LRP TEMP[7].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[7].xxxx
72: MOV TEMP[9].xy, TEMP[9].xyyy
73: TEX TEMP[9].x, TEMP[9], SAMP[3], 2D
74: MUL TEMP[9].x, TEMP[9].xxxx, IMM[2].xxxx
75: MOV_SAT TEMP[9].x, TEMP[9].xxxx
76: LRP TEMP[9].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[9].xxxx
77: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[9].xxxx
78: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[7].xxxx
79: ADD TEMP[6].xyz, TEMP[6].xyzz, TEMP[8].xyzz
80: ADD TEMP[7].xyz, TEMP[10].xyzz, TEMP[11].xyzz
81: ADD TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xyzz
82: MAD TEMP[3].xyz, TEMP[6].xyzz, IMM[2].yyyy, TEMP[3].xyzz
83: LRP TEMP[3].xyz, TEMP[4].xxxx, IMM[2].zzww, TEMP[3].xyzz
84: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
85: RSQ TEMP[4].x, TEMP[4].xxxx
86: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
87: MOV TEMP[4].w, IMM[2].zzzz
88: DP3 TEMP[6].x, IN[0].xyzz, TEMP[3].xyzz
89: DP3 TEMP[7].x, IN[1].xyzz, TEMP[3].xyzz
90: MOV TEMP[6].y, TEMP[7].xxxx
91: DP3 TEMP[3].x, IN[2].xyzz, TEMP[3].xyzz
92: MOV TEMP[6].z, TEMP[3].xxxx
93: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[0].xyzz
94: DP3 TEMP[0].x, TEMP[6].xyzz, CONST[1].xyzz
95: MOV_SAT TEMP[0].x, TEMP[0].xxxx
96: ADD TEMP[1].xyz, CONST[0].xyzz, -TEMP[1].xyzz
97: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
98: RSQ TEMP[3].x, TEMP[3].xxxx
99: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx, CONST[1].xyzz
100: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
101: RSQ TEMP[3].x, TEMP[3].xxxx
102: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
103: DP3 TEMP[1].x, TEMP[6].xyzz, TEMP[1].xyzz
104: MAX TEMP[1].x, IMM[2].zzzz, TEMP[1].xxxx
105: MUL TEMP[3].x, IMM[3].xxxx, TEMP[2].xxxx
106: POW TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx
107: MOV_SAT TEMP[1].x, TEMP[1].xxxx
108: MUL TEMP[3].x, IMM[0].xxxx, TEMP[1].xxxx
109: ADD TEMP[3].x, IMM[3].yyyy, -TEMP[3].xxxx
110: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx
111: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx
112: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
113: MUL TEMP[1].x, TEMP[1].xxxx, IMM[3].zzzz
114: ADD TEMP[2].x, TEMP[2].xxxx, IMM[3].wwww
115: MOV_SAT TEMP[2].x, TEMP[2].xxxx
116: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx, TEMP[2].xxxx
117: MUL TEMP[2].xyz, TEMP[5].xyzz, CONST[4].xyzz
118: MUL TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xxxx
119: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[1].xxxx, TEMP[0].xyzz
120: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx
121: MOV TEMP[0].w, IMM[0].zzzz
122: ADD TEMP[0].xyz, TEMP[4], TEMP[0]
123: MAD TEMP[1].x, IN[3].zzzz, CONST[3].zzzz, CONST[3].wwww
124: MOV_SAT TEMP[1].x, TEMP[1].xxxx
125: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz
126: MOV TEMP[4].w, IMM[0].zzzz
127: MOV OUT[0], TEMP[4]
128: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%38 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0
%40 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)*
%44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0
%45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)*
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)*
%50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0
%51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)*
%53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0
%54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%55 = load <8 x i32>, <8 x i32> addrspace(2)* %54, align 32, !tbaa !0
%56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0
%58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0
%60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0
%62 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%68 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%71 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%74 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%75 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%76 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%77 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%78 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%79 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%80 = bitcast float %74 to i32
%81 = bitcast float %75 to i32
%82 = insertelement <2 x i32> undef, i32 %80, i32 0
%83 = insertelement <2 x i32> %82, i32 %81, i32 1
%84 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %83, <32 x i8> %44, <16 x i8> %47, i32 2)
%85 = extractelement <4 x float> %84, i32 1
%86 = extractelement <4 x float> %84, i32 3
%87 = fmul float %86, 2.000000e+00
%88 = fadd float %87, -1.000000e+00
%89 = fmul float %85, 2.000000e+00
%90 = fadd float %89, -1.000000e+00
%91 = fmul float %88, %88
%92 = fmul float %90, %90
%93 = fadd float %91, %92
%94 = call float @llvm.AMDIL.clamp.(float %93, float 0.000000e+00, float 1.000000e+00)
%95 = fsub float 1.000000e+00, %94
%96 = call float @llvm.sqrt.f32(float %95)
%97 = fmul float %88, %88
%98 = fmul float %90, %90
%99 = fadd float %98, %97
%100 = fmul float %96, %96
%101 = fadd float %99, %100
%102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101)
%103 = fmul float %88, %102
%104 = fmul float %90, %102
%105 = fmul float %96, %102
%106 = fmul float %103, 2.000000e+00
%107 = fmul float %104, -2.000000e+00
%108 = bitcast float %74 to i32
%109 = bitcast float %75 to i32
%110 = insertelement <2 x i32> undef, i32 %108, i32 0
%111 = insertelement <2 x i32> %110, i32 %109, i32 1
%112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %111, <32 x i8> %50, <16 x i8> %53, i32 2)
%113 = extractelement <4 x float> %112, i32 0
%114 = fmul float %113, 1.500000e+00
%115 = call float @llvm.AMDIL.clamp.(float %114, float 0.000000e+00, float 1.000000e+00)
%116 = fmul float %74, 4.000000e+00
%117 = fmul float %75, 4.000000e+00
%118 = bitcast float %116 to i32
%119 = bitcast float %117 to i32
%120 = insertelement <2 x i32> undef, i32 %118, i32 0
%121 = insertelement <2 x i32> %120, i32 %119, i32 1
%122 = bitcast <8 x i32> %59 to <32 x i8>
%123 = bitcast <4 x i32> %61 to <16 x i8>
%124 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %122, <16 x i8> %123, i32 2)
%125 = extractelement <4 x float> %124, i32 1
%126 = extractelement <4 x float> %124, i32 3
%127 = fmul float %126, 2.000000e+00
%128 = fadd float %127, -1.000000e+00
%129 = fmul float %125, 2.000000e+00
%130 = fadd float %129, -1.000000e+00
%131 = fmul float %128, %128
%132 = fmul float %130, %130
%133 = fadd float %131, %132
%134 = call float @llvm.AMDIL.clamp.(float %133, float 0.000000e+00, float 1.000000e+00)
%135 = fsub float 1.000000e+00, %134
%136 = call float @llvm.sqrt.f32(float %135)
%137 = fmul float %74, 8.000000e+00
%138 = fmul float %75, 8.000000e+00
%139 = bitcast float %137 to i32
%140 = bitcast float %138 to i32
%141 = insertelement <2 x i32> undef, i32 %139, i32 0
%142 = insertelement <2 x i32> %141, i32 %140, i32 1
%143 = bitcast <8 x i32> %59 to <32 x i8>
%144 = bitcast <4 x i32> %61 to <16 x i8>
%145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %142, <32 x i8> %143, <16 x i8> %144, i32 2)
%146 = extractelement <4 x float> %145, i32 1
%147 = extractelement <4 x float> %145, i32 3
%148 = fmul float %147, 2.000000e+00
%149 = fadd float %148, -1.000000e+00
%150 = fmul float %146, 2.000000e+00
%151 = fadd float %150, -1.000000e+00
%152 = fmul float %149, %149
%153 = fmul float %151, %151
%154 = fadd float %152, %153
%155 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00)
%156 = fsub float 1.000000e+00, %155
%157 = call float @llvm.sqrt.f32(float %156)
%158 = fmul float %74, 1.600000e+01
%159 = fmul float %75, 1.600000e+01
%160 = bitcast float %158 to i32
%161 = bitcast float %159 to i32
%162 = insertelement <2 x i32> undef, i32 %160, i32 0
%163 = insertelement <2 x i32> %162, i32 %161, i32 1
%164 = bitcast <8 x i32> %59 to <32 x i8>
%165 = bitcast <4 x i32> %61 to <16 x i8>
%166 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %163, <32 x i8> %164, <16 x i8> %165, i32 2)
%167 = extractelement <4 x float> %166, i32 1
%168 = extractelement <4 x float> %166, i32 3
%169 = fmul float %168, 2.000000e+00
%170 = fadd float %169, -1.000000e+00
%171 = fmul float %167, 2.000000e+00
%172 = fadd float %171, -1.000000e+00
%173 = fmul float %170, %170
%174 = fmul float %172, %172
%175 = fadd float %173, %174
%176 = call float @llvm.AMDIL.clamp.(float %175, float 0.000000e+00, float 1.000000e+00)
%177 = fsub float 1.000000e+00, %176
%178 = call float @llvm.sqrt.f32(float %177)
%179 = fmul float %74, 3.200000e+01
%180 = fmul float %75, 3.200000e+01
%181 = bitcast float %179 to i32
%182 = bitcast float %180 to i32
%183 = insertelement <2 x i32> undef, i32 %181, i32 0
%184 = insertelement <2 x i32> %183, i32 %182, i32 1
%185 = bitcast <8 x i32> %59 to <32 x i8>
%186 = bitcast <4 x i32> %61 to <16 x i8>
%187 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %184, <32 x i8> %185, <16 x i8> %186, i32 2)
%188 = extractelement <4 x float> %187, i32 1
%189 = extractelement <4 x float> %187, i32 3
%190 = fmul float %189, 2.000000e+00
%191 = fadd float %190, -1.000000e+00
%192 = fmul float %188, 2.000000e+00
%193 = fadd float %192, -1.000000e+00
%194 = fmul float %191, %191
%195 = fmul float %193, %193
%196 = fadd float %194, %195
%197 = call float @llvm.AMDIL.clamp.(float %196, float 0.000000e+00, float 1.000000e+00)
%198 = fsub float 1.000000e+00, %197
%199 = call float @llvm.sqrt.f32(float %198)
%200 = bitcast float %74 to i32
%201 = bitcast float %75 to i32
%202 = insertelement <2 x i32> undef, i32 %200, i32 0
%203 = insertelement <2 x i32> %202, i32 %201, i32 1
%204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %39, <16 x i8> %41, i32 2)
%205 = extractelement <4 x float> %204, i32 0
%206 = extractelement <4 x float> %204, i32 1
%207 = extractelement <4 x float> %204, i32 2
%208 = bitcast float %116 to i32
%209 = bitcast float %117 to i32
%210 = insertelement <2 x i32> undef, i32 %208, i32 0
%211 = insertelement <2 x i32> %210, i32 %209, i32 1
%212 = bitcast <8 x i32> %55 to <32 x i8>
%213 = bitcast <4 x i32> %57 to <16 x i8>
%214 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %211, <32 x i8> %212, <16 x i8> %213, i32 2)
%215 = extractelement <4 x float> %214, i32 0
%216 = fmul float %215, 0x3FF3333340000000
%217 = call float @llvm.AMDIL.clamp.(float %216, float 0.000000e+00, float 1.000000e+00)
%218 = call float @llvm.AMDGPU.lrp(float %115, float 1.000000e+00, float %217)
%219 = fmul float %205, %218
%220 = fmul float %206, %218
%221 = fmul float %207, %218
%222 = bitcast float %137 to i32
%223 = bitcast float %138 to i32
%224 = insertelement <2 x i32> undef, i32 %222, i32 0
%225 = insertelement <2 x i32> %224, i32 %223, i32 1
%226 = bitcast <8 x i32> %55 to <32 x i8>
%227 = bitcast <4 x i32> %57 to <16 x i8>
%228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2)
%229 = extractelement <4 x float> %228, i32 0
%230 = fmul float %229, 0x3FF3333340000000
%231 = call float @llvm.AMDIL.clamp.(float %230, float 0.000000e+00, float 1.000000e+00)
%232 = call float @llvm.AMDGPU.lrp(float %115, float 1.000000e+00, float %231)
%233 = bitcast float %158 to i32
%234 = bitcast float %159 to i32
%235 = insertelement <2 x i32> undef, i32 %233, i32 0
%236 = insertelement <2 x i32> %235, i32 %234, i32 1
%237 = bitcast <8 x i32> %55 to <32 x i8>
%238 = bitcast <4 x i32> %57 to <16 x i8>
%239 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %236, <32 x i8> %237, <16 x i8> %238, i32 2)
%240 = extractelement <4 x float> %239, i32 0
%241 = fmul float %240, 0x3FF3333340000000
%242 = call float @llvm.AMDIL.clamp.(float %241, float 0.000000e+00, float 1.000000e+00)
%243 = call float @llvm.AMDGPU.lrp(float %115, float 1.000000e+00, float %242)
%244 = fmul float %232, %243
%245 = fmul float %219, %244
%246 = fmul float %220, %244
%247 = fmul float %221, %244
%248 = fadd float %128, %149
%249 = fadd float %130, %151
%250 = fadd float %136, %157
%251 = fadd float %170, %191
%252 = fadd float %172, %193
%253 = fadd float %178, %199
%254 = fadd float %248, %251
%255 = fadd float %249, %252
%256 = fadd float %250, %253
%257 = fmul float %254, 0x3FE3333340000000
%258 = fadd float %257, %106
%259 = fmul float %255, 0x3FE3333340000000
%260 = fadd float %259, %107
%261 = fmul float %256, 0x3FE3333340000000
%262 = fadd float %261, %105
%263 = call float @llvm.AMDGPU.lrp(float %115, float 0.000000e+00, float %258)
%264 = call float @llvm.AMDGPU.lrp(float %115, float 0.000000e+00, float %260)
%265 = call float @llvm.AMDGPU.lrp(float %115, float 1.000000e+00, float %262)
%266 = fmul float %263, %263
%267 = fmul float %264, %264
%268 = fadd float %267, %266
%269 = fmul float %265, %265
%270 = fadd float %268, %269
%271 = call float @llvm.AMDGPU.rsq.clamped.f32(float %270)
%272 = fmul float %263, %271
%273 = fmul float %264, %271
%274 = fmul float %265, %271
%275 = fmul float %62, %272
%276 = fmul float %63, %273
%277 = fadd float %276, %275
%278 = fmul float %64, %274
%279 = fadd float %277, %278
%280 = fmul float %66, %272
%281 = fmul float %67, %273
%282 = fadd float %281, %280
%283 = fmul float %68, %274
%284 = fadd float %282, %283
%285 = fmul float %70, %272
%286 = fmul float %71, %273
%287 = fadd float %286, %285
%288 = fmul float %72, %274
%289 = fadd float %287, %288
%290 = fmul float %245, %77
%291 = fmul float %246, %78
%292 = fmul float %247, %79
%293 = fmul float %279, %27
%294 = fmul float %284, %28
%295 = fadd float %294, %293
%296 = fmul float %289, %29
%297 = fadd float %295, %296
%298 = call float @llvm.AMDIL.clamp.(float %297, float 0.000000e+00, float 1.000000e+00)
%299 = fsub float %24, %65
%300 = fsub float %25, %69
%301 = fsub float %26, %73
%302 = fmul float %299, %299
%303 = fmul float %300, %300
%304 = fadd float %303, %302
%305 = fmul float %301, %301
%306 = fadd float %304, %305
%307 = call float @llvm.AMDGPU.rsq.clamped.f32(float %306)
%308 = fmul float %299, %307
%309 = fadd float %308, %27
%310 = fmul float %300, %307
%311 = fadd float %310, %28
%312 = fmul float %301, %307
%313 = fadd float %312, %29
%314 = fmul float %309, %309
%315 = fmul float %311, %311
%316 = fadd float %315, %314
%317 = fmul float %313, %313
%318 = fadd float %316, %317
%319 = call float @llvm.AMDGPU.rsq.clamped.f32(float %318)
%320 = fmul float %309, %319
%321 = fmul float %311, %319
%322 = fmul float %313, %319
%323 = fmul float %279, %320
%324 = fmul float %284, %321
%325 = fadd float %324, %323
%326 = fmul float %289, %322
%327 = fadd float %325, %326
%328 = call float @llvm.maxnum.f32(float %327, float 0.000000e+00)
%329 = fmul float %113, 5.000000e+01
%330 = call float @llvm.pow.f32(float %328, float %329)
%331 = call float @llvm.AMDIL.clamp.(float %330, float 0.000000e+00, float 1.000000e+00)
%332 = fmul float %331, 2.000000e+00
%333 = fsub float 3.000000e+00, %332
%334 = fmul float %331, %333
%335 = fmul float %331, %334
%336 = fmul float %335, %113
%337 = fmul float %336, 0x3FD6666660000000
%338 = fadd float %113, 0xBFED70A3E0000000
%339 = call float @llvm.AMDIL.clamp.(float %338, float 0.000000e+00, float 1.000000e+00)
%340 = fmul float %337, %298
%341 = fadd float %340, %339
%342 = fmul float %245, %35
%343 = fmul float %246, %36
%344 = fmul float %247, %37
%345 = fmul float %342, %298
%346 = fmul float %343, %298
%347 = fmul float %344, %298
%348 = fmul float %35, %341
%349 = fadd float %348, %345
%350 = fmul float %36, %341
%351 = fadd float %350, %346
%352 = fmul float %37, %341
%353 = fadd float %352, %347
%354 = fmul float %349, 2.000000e+00
%355 = fmul float %351, 2.000000e+00
%356 = fmul float %353, 2.000000e+00
%357 = fadd float %290, %354
%358 = fadd float %291, %355
%359 = fadd float %292, %356
%360 = fmul float %76, %33
%361 = fadd float %360, %34
%362 = call float @llvm.AMDIL.clamp.(float %361, float 0.000000e+00, float 1.000000e+00)
%363 = call float @llvm.AMDGPU.lrp(float %362, float %357, float %30)
%364 = call float @llvm.AMDGPU.lrp(float %362, float %358, float %31)
%365 = call float @llvm.AMDGPU.lrp(float %362, float %359, float %32)
%366 = call i32 @llvm.SI.packf16(float %363, float %364)
%367 = bitcast i32 %366 to float
%368 = call i32 @llvm.SI.packf16(float %365, float 1.000000e+00)
%369 = bitcast i32 %368 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %367, float %369, float %367, float %369)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900
v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901
v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00
v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01
v_interp_p1_f32 v13, v0, 3, 2, [m0] ; C8340B00
v_interp_p2_f32 v13, [v13], v1, 3, 2, [m0] ; C8350B01
v_interp_p1_f32 v14, v0, 0, 3, [m0] ; C8380C00
v_interp_p2_f32 v14, [v14], v1, 0, 3, [m0] ; C8390C01
v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00
v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01
v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00
v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01
v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00
v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01
v_interp_p1_f32 v18, v0, 0, 4, [m0] ; C8481000
v_interp_p2_f32 v18, [v18], v1, 0, 4, [m0] ; C8491001
v_interp_p1_f32 v0, v0, 1, 4, [m0] ; C8001100
v_interp_p2_f32 v0, [v0], v1, 1, 4, [m0] ; C8011101
v_mul_f32_e32 v19, 4.0, v14 ; 10261CF6
v_mul_f32_e32 v20, 4.0, v15 ; 10281EF6
s_load_dwordx4 s[48:51], s[4:5], 0x4 ; C0980504
s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508
s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C
s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510
s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708
s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710
s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718
s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[21:22], 10, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[52:59], s[48:51] ; F0800A00 018D150E
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v1, 2.0, v22, -1.0 ; D2820001 03CE2CF4
v_mad_f32 v21, 2.0, v21, -1.0 ; D2820015 03CE2AF4
v_mul_f32_e32 v22, v21, v21 ; 102C2B15
v_mac_f32_e32 v22, v1, v1 ; 3E2C0301
v_add_f32_e64 v23, 0, v22 clamp ; D2060817 00022C80
v_sub_f32_e32 v23, 1.0, v23 ; 082E2EF2
v_sqrt_f32_e32 v23, v23 ; 7E2E6717
v_mac_f32_e32 v22, v23, v23 ; 3E2C2F17
v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916
image_sample v24, 1, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[40:47], s[36:39] ; F0800100 012A180E
v_mul_f32_e32 v25, v22, v1 ; 10320316
v_mac_f32_e32 v25, v22, v1 ; 3E320316
v_mul_f32_e32 v1, v22, v21 ; 10022B16
v_mul_f32_e32 v21, v22, v23 ; 102A2F16
image_sample v[22:23], 10, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[28:35], s[24:27] ; F0800A00 00C71613
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v23, 2.0, v23, -1.0 ; D2820017 03CE2EF4
v_mad_f32 v22, 2.0, v22, -1.0 ; D2820016 03CE2CF4
v_mov_b32_e32 v26, 0x41000000 ; 7E3402FF 41000000
v_mul_f32_e32 v27, v26, v14 ; 10361D1A
v_mul_f32_e32 v28, v26, v15 ; 10381F1A
image_sample v[29:30], 10, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[28:35], s[24:27] ; F0800A00 00C71D1B
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v26, 2.0, v30, -1.0 ; D282001A 03CE3CF4
v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4
v_mov_b32_e32 v30, 0x41800000 ; 7E3C02FF 41800000
v_mul_f32_e32 v31, v30, v14 ; 103E1D1E
v_mul_f32_e32 v32, v30, v15 ; 10401F1E
image_sample v[33:34], 10, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[28:35], s[24:27] ; F0800A00 00C7211F
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v30, 2.0, v34, -1.0 ; D282001E 03CE44F4
v_mad_f32 v33, 2.0, v33, -1.0 ; D2820021 03CE42F4
v_mov_b32_e32 v34, 0x42000000 ; 7E4402FF 42000000
v_mul_f32_e32 v35, v34, v14 ; 10461D22
v_mul_f32_e32 v36, v34, v15 ; 10481F22
image_sample v[34:35], 10, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[28:35], s[24:27] ; F0800A00 00C72223
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v35, 2.0, v35, -1.0 ; D2820023 03CE46F4
v_mad_f32 v34, 2.0, v34, -1.0 ; D2820022 03CE44F4
v_mul_f32_e32 v36, v22, v22 ; 10482D16
v_mac_f32_e32 v36, v23, v23 ; 3E482F17
v_mul_f32_e32 v37, v29, v29 ; 104A3B1D
v_mac_f32_e32 v37, v26, v26 ; 3E4A351A
v_add_f32_e32 v23, v26, v23 ; 062E2F1A
v_add_f32_e32 v22, v29, v22 ; 062C2D1D
v_add_f32_e64 v26, 0, v36 clamp ; D206081A 00024880
v_sub_f32_e32 v26, 1.0, v26 ; 083434F2
v_sqrt_f32_e32 v26, v26 ; 7E34671A
v_add_f32_e64 v29, 0, v37 clamp ; D206081D 00024A80
v_sub_f32_e32 v29, 1.0, v29 ; 083A3AF2
v_sqrt_f32_e32 v29, v29 ; 7E3A671D
v_add_f32_e32 v26, v29, v26 ; 0634351D
v_mul_f32_e32 v29, v33, v33 ; 103A4321
v_mac_f32_e32 v29, v30, v30 ; 3E3A3D1E
v_mul_f32_e32 v36, v34, v34 ; 10484522
v_mac_f32_e32 v36, v35, v35 ; 3E484723
v_add_f32_e32 v30, v35, v30 ; 063C3D23
v_add_f32_e32 v33, v34, v33 ; 06424322
v_add_f32_e64 v29, 0, v29 clamp ; D206081D 00023A80
v_sub_f32_e32 v29, 1.0, v29 ; 083A3AF2
v_sqrt_f32_e32 v29, v29 ; 7E3A671D
v_add_f32_e64 v34, 0, v36 clamp ; D2060822 00024880
v_sub_f32_e32 v34, 1.0, v34 ; 084444F2
v_sqrt_f32_e32 v34, v34 ; 7E446722
s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700
v_add_f32_e32 v29, v34, v29 ; 063A3B22
v_add_f32_e32 v23, v30, v23 ; 062E2F1E
v_add_f32_e32 v22, v33, v22 ; 062C2D21
v_add_f32_e32 v26, v29, v26 ; 0634351D
v_mov_b32_e32 v29, 0x3f19999a ; 7E3A02FF 3F19999A
v_mac_f32_e32 v25, v29, v23 ; 3E322F1D
v_mul_f32_e32 v1, -2.0, v1 ; 100202F5
v_mac_f32_e32 v1, v29, v22 ; 3E022D1D
v_mac_f32_e32 v21, v29, v26 ; 3E2A351D
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[24:31], s[20:23] ; F0800700 00A6210E
image_sample v14, 1, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[12:19], s[8:11] ; F0800100 00430E13
v_mul_f32_e32 v15, 0x3fc00000, v24 ; 101E30FF 3FC00000
v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80
v_mov_b32_e32 v19, 0x3f99999a ; 7E2602FF 3F99999A
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v14, v19, v14 ; 101C1D13
v_add_f32_e64 v14, 0, v14 clamp ; D206080E 00021C80
v_sub_f32_e32 v20, 1.0, v15 ; 08281EF2
v_mul_f32_e32 v14, v14, v20 ; 101C290E
v_mac_f32_e32 v14, 1.0, v15 ; 3E1C1EF2
v_mul_f32_e32 v22, v25, v20 ; 102C2919
v_mac_f32_e32 v22, 0, v15 ; 3E2C1E80
v_mul_f32_e32 v1, v1, v20 ; 10022901
v_mac_f32_e32 v1, 0, v15 ; 3E021E80
v_mul_f32_e32 v21, v21, v20 ; 102A2915
v_mac_f32_e32 v21, 1.0, v15 ; 3E2A1EF2
v_mul_f32_e32 v23, v22, v22 ; 102E2D16
v_mac_f32_e32 v23, v1, v1 ; 3E2E0301
v_mac_f32_e32 v23, v21, v21 ; 3E2E2B15
v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917
v_mul_f32_e32 v25, v14, v33 ; 1032430E
v_mul_f32_e32 v26, v14, v34 ; 1034450E
v_mul_f32_e32 v14, v14, v35 ; 101C470E
v_mul_f32_e32 v22, v23, v22 ; 102C2D17
v_mul_f32_e32 v1, v23, v1 ; 10020317
v_mul_f32_e32 v2, v22, v2 ; 10040516
v_mac_f32_e32 v2, v1, v3 ; 3E040701
v_mul_f32_e32 v3, v22, v6 ; 10060D16
v_mac_f32_e32 v3, v1, v7 ; 3E060F01
v_mul_f32_e32 v6, v22, v10 ; 100C1516
v_mac_f32_e32 v6, v1, v11 ; 3E0C1701
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[12:19], s[8:11] ; F0800100 0043011B
image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[12:19], s[8:11] ; F0800100 0043071F
v_mul_f32_e32 v10, v23, v21 ; 10142B17
v_mac_f32_e32 v2, v10, v4 ; 3E04090A
v_mac_f32_e32 v3, v10, v8 ; 3E06110A
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v4, s4, v5 ; 08080A04
v_sub_f32_e32 v5, s5, v9 ; 080A1205
v_sub_f32_e32 v8, s6, v13 ; 08101A06
v_mul_f32_e32 v9, v4, v4 ; 10120904
v_mac_f32_e32 v9, v5, v5 ; 3E120B05
v_mac_f32_e32 v9, v8, v8 ; 3E121108
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s9, s[0:3], 0xa ; C204810A
s_buffer_load_dword s10, s[0:3], 0xe ; C205010E
s_buffer_load_dword s11, s[0:3], 0xf ; C205810F
s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110
s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111
s_buffer_load_dword s0, s[0:3], 0x12 ; C2000112
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v4, v4, v9, s7 ; D2820004 001E1304
v_mad_f32 v5, v5, v9, s8 ; D2820005 00221305
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mad_f32 v8, v8, v9, s4 ; D2820008 00121308
v_mul_f32_e32 v9, v4, v4 ; 10120904
v_mac_f32_e32 v9, v5, v5 ; 3E120B05
v_mac_f32_e32 v9, v8, v8 ; 3E121108
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mac_f32_e32 v6, v10, v12 ; 3E0C190A
v_mul_f32_e32 v10, s7, v2 ; 10140407
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v4, v9, v5 ; 10080B09
v_mac_f32_e32 v2, v4, v3 ; 3E040704
v_mac_f32_e32 v10, s4, v6 ; 3E140C04
v_mul_f32_e32 v3, v9, v8 ; 10061109
v_mac_f32_e32 v2, v3, v6 ; 3E040D03
v_mul_f32_e32 v1, v19, v1 ; 10020313
v_mul_f32_e32 v3, v19, v7 ; 10060F13
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v1, v1, v20 ; 10022901
v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680
v_mul_f32_e32 v3, v3, v20 ; 10062903
v_mac_f32_e32 v1, 1.0, v15 ; 3E021EF2
v_mac_f32_e32 v3, 1.0, v15 ; 3E061EF2
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_mul_f32_e32 v3, v1, v25 ; 10063301
v_max_f32_e32 v2, 0, v2 ; 20040480
v_log_f32_e32 v2, v2 ; 7E044F02
v_mul_f32_e32 v4, v1, v26 ; 10083501
v_mul_f32_e32 v1, v1, v14 ; 10021D01
v_mul_f32_e32 v5, 0x42480000, v24 ; 100A30FF 42480000
v_mul_legacy_f32_e32 v2, v5, v2 ; 0E040505
v_exp_f32_e32 v2, v2 ; 7E044B02
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_madak_f32_e32 v5, -2.0, v2, 0x40400000 ; 420A04F5 40400000
v_mul_f32_e32 v5, v5, v2 ; 100A0505
v_mul_f32_e32 v2, v5, v2 ; 10040505
v_mul_f32_e32 v2, v24, v2 ; 10040518
v_mov_b32_e32 v5, 0xbf6b851f ; 7E0A02FF BF6B851F
v_add_f32_e32 v5, v24, v5 ; 060A0B18
v_add_f32_e64 v6, 0, v10 clamp ; D2060806 00021480
v_mul_f32_e32 v2, 0x3eb33333, v2 ; 100404FF 3EB33333
v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80
v_mac_f32_e32 v5, v6, v2 ; 3E0A0506
v_mul_f32_e32 v2, s12, v3 ; 1004060C
v_mul_f32_e32 v7, v6, v2 ; 100E0506
v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C
v_mac_f32_e32 v7, v6, v2 ; 3E0E0506
v_mul_f32_e32 v2, s13, v4 ; 1004080D
v_mul_f32_e32 v8, v6, v2 ; 10100506
v_mac_f32_e32 v8, s13, v5 ; 3E100A0D
v_mac_f32_e32 v8, v6, v2 ; 3E100506
v_mul_f32_e32 v2, s0, v1 ; 10040200
v_mul_f32_e32 v9, v6, v2 ; 10120506
v_mac_f32_e32 v9, s0, v5 ; 3E120A00
v_mac_f32_e32 v9, v6, v2 ; 3E120506
v_mac_f32_e32 v7, s12, v5 ; 3E0E0A0C
v_mac_f32_e32 v7, v17, v3 ; 3E0E0711
v_mac_f32_e32 v8, s13, v5 ; 3E100A0D
v_mac_f32_e32 v8, v18, v4 ; 3E100912
v_mac_f32_e32 v9, s0, v5 ; 3E120A00
v_mac_f32_e32 v9, v0, v1 ; 3E120300
v_mov_b32_e32 v0, s11 ; 7E00020B
v_mac_f32_e32 v0, s10, v16 ; 3E00200A
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s5, v1 ; 10040205
v_mac_f32_e32 v2, v7, v0 ; 3E040107
v_mul_f32_e32 v3, s6, v1 ; 10060206
v_mac_f32_e32 v3, v8, v0 ; 3E060108
v_mul_f32_e32 v1, s9, v1 ; 10020209
v_mac_f32_e32 v1, v9, v0 ; 3E020109
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 40
Code Size: 1256 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..36]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.0000}
0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[14].xyzz
1: MUL TEMP[1], CONST[15], TEMP[0].xxxx
2: MAD TEMP[1], CONST[16], TEMP[0].yyyy, TEMP[1]
3: MAD TEMP[1].xyz, CONST[17], TEMP[0].zzzz, TEMP[1]
4: LRP TEMP[0].xyz, IN[2].wwww, TEMP[1].xyzz, TEMP[0].xyzz
5: DP3 TEMP[1].x, CONST[19].xyzz, TEMP[0].xyzz
6: ADD TEMP[1].x, TEMP[1].xxxx, CONST[19].wwww
7: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[19].xyzz
8: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
9: LRP TEMP[0].xyz, CONST[20].xxxx, TEMP[0].xyzz, TEMP[1].xyzz
10: MUL TEMP[1], CONST[33], TEMP[0].xxxx
11: MAD TEMP[1], CONST[34], TEMP[0].yyyy, TEMP[1]
12: MAD TEMP[1], CONST[35], TEMP[0].zzzz, TEMP[1]
13: ADD TEMP[1].xyz, TEMP[1], CONST[36]
14: MUL TEMP[2], CONST[29], TEMP[0].xxxx
15: MAD TEMP[2], CONST[30], TEMP[0].yyyy, TEMP[2]
16: MAD TEMP[0], CONST[31], TEMP[0].zzzz, TEMP[2]
17: ADD TEMP[0], TEMP[0], CONST[32]
18: MOV TEMP[2].w, IMM[0].yyyy
19: MUL TEMP[3], CONST[28], IMM[0].zzzz
20: MUL TEMP[4].xyz, TEMP[1].xyzz, CONST[4].wwww
21: ADD TEMP[4].xyz, CONST[4].xyzz, -TEMP[4].xyzz
22: MOV TEMP[5].xy, TEMP[4].xyxx
23: MOV TEMP[5].z, -TEMP[4].zzzz
24: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[5].xyzz
25: RSQ TEMP[4].x, TEMP[4].xxxx
26: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xxxx
27: MOV TEMP[2].xyz, CONST[0].xyzx
28: MUL TEMP[6].x, CONST[21].xxxx, IN[4].wwww
29: MUL TEMP[7].xyz, CONST[24].xyzz, TEMP[4].xxxx
30: MAD TEMP[7].xyz, CONST[25].xyzz, TEMP[4].yyyy, TEMP[7].xyzz
31: MAD TEMP[4].xyz, CONST[26].xyzz, TEMP[4].zzzz, TEMP[7].xyzz
32: DP3 TEMP[4].x, IN[1].xyzz, TEMP[4].xyzz
33: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx
34: ADD TEMP[7].x, TEMP[6].xxxx, CONST[22].xxxx
35: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx
36: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[5].xyzz
37: MAD TEMP[5].x, TEMP[5].xxxx, CONST[8].zzzz, IMM[0].xxxx
38: RCP TEMP[5].x, TEMP[5].xxxx
39: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
40: MAD TEMP[3], TEMP[2], TEMP[4].xxxx, TEMP[3]
41: MUL TEMP[4].xyz, TEMP[1].xyzz, CONST[5].wwww
42: ADD TEMP[4].xyz, CONST[5].xyzz, -TEMP[4].xyzz
43: MOV TEMP[5].xy, TEMP[4].xyxx
44: MOV TEMP[5].z, -TEMP[4].zzzz
45: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[5].xyzz
46: RSQ TEMP[4].x, TEMP[4].xxxx
47: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xxxx
48: MOV TEMP[2].xyz, CONST[1].xyzx
49: MUL TEMP[7].xyz, CONST[24].xyzz, TEMP[4].xxxx
50: MAD TEMP[7].xyz, CONST[25].xyzz, TEMP[4].yyyy, TEMP[7].xyzz
51: MAD TEMP[4].xyz, CONST[26].xyzz, TEMP[4].zzzz, TEMP[7].xyzz
52: DP3 TEMP[4].x, IN[1].xyzz, TEMP[4].xyzz
53: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx
54: ADD TEMP[7].x, TEMP[6].xxxx, CONST[22].xxxx
55: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx
56: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[5].xyzz
57: MAD TEMP[5].x, TEMP[5].xxxx, CONST[9].zzzz, IMM[0].xxxx
58: RCP TEMP[5].x, TEMP[5].xxxx
59: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
60: MAD TEMP[3], TEMP[2], TEMP[4].xxxx, TEMP[3]
61: MUL TEMP[4].xyz, TEMP[1].xyzz, CONST[6].wwww
62: ADD TEMP[4].xyz, CONST[6].xyzz, -TEMP[4].xyzz
63: MOV TEMP[5].xy, TEMP[4].xyxx
64: MOV TEMP[5].z, -TEMP[4].zzzz
65: DP3 TEMP[4].x, TEMP[5].xyzz, TEMP[5].xyzz
66: RSQ TEMP[4].x, TEMP[4].xxxx
67: MUL TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xxxx
68: MOV TEMP[2].xyz, CONST[2].xyzx
69: MUL TEMP[7].xyz, CONST[24].xyzz, TEMP[4].xxxx
70: MAD TEMP[7].xyz, CONST[25].xyzz, TEMP[4].yyyy, TEMP[7].xyzz
71: MAD TEMP[4].xyz, CONST[26].xyzz, TEMP[4].zzzz, TEMP[7].xyzz
72: DP3 TEMP[4].x, IN[1].xyzz, TEMP[4].xyzz
73: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx
74: ADD TEMP[7].x, TEMP[6].xxxx, CONST[22].xxxx
75: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx
76: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[5].xyzz
77: MAD TEMP[5].x, TEMP[5].xxxx, CONST[10].zzzz, IMM[0].xxxx
78: RCP TEMP[5].x, TEMP[5].xxxx
79: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
80: MAD TEMP[3], TEMP[2], TEMP[4].xxxx, TEMP[3]
81: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[7].wwww
82: ADD TEMP[1].xyz, CONST[7].xyzz, -TEMP[1].xyzz
83: MOV TEMP[4].xy, TEMP[1].xyxx
84: MOV TEMP[4].z, -TEMP[1].zzzz
85: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[4].xyzz
86: RSQ TEMP[1].x, TEMP[1].xxxx
87: MUL TEMP[1].xyz, TEMP[4].xyzz, TEMP[1].xxxx
88: MOV TEMP[2].xyz, CONST[3].xyzx
89: MUL TEMP[5].xyz, CONST[24].xyzz, TEMP[1].xxxx
90: MAD TEMP[5].xyz, CONST[25].xyzz, TEMP[1].yyyy, TEMP[5].xyzz
91: MAD TEMP[1].xyz, CONST[26].xyzz, TEMP[1].zzzz, TEMP[5].xyzz
92: DP3 TEMP[1].x, IN[1].xyzz, TEMP[1].xyzz
93: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
94: ADD TEMP[5].x, TEMP[6].xxxx, CONST[22].xxxx
95: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx
96: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz
97: MAD TEMP[4].x, TEMP[4].xxxx, CONST[11].zzzz, IMM[0].xxxx
98: RCP TEMP[4].x, TEMP[4].xxxx
99: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx
100: MAD TEMP[1].xyz, TEMP[2], TEMP[1].xxxx, TEMP[3]
101: MOV TEMP[3].xyz, TEMP[1].xyzx
102: MOV TEMP[3].w, IMM[0].xxxx
103: MUL TEMP[1], TEMP[3], CONST[23]
104: MUL TEMP[1], TEMP[1], CONST[13]
105: MAD TEMP[2].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww
106: MOV OUT[1], IN[3]
107: MOV OUT[2], TEMP[1]
108: MOV OUT[3], TEMP[2]
109: MOV OUT[0], TEMP[0]
110: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420)
%82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424)
%83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 448)
%84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 452)
%85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 456)
%86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 464)
%87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 468)
%88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 472)
%89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 476)
%90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 480)
%91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 484)
%92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 488)
%93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 492)
%94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 496)
%95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 500)
%96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 504)
%97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 508)
%98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 512)
%99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 516)
%100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 520)
%101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 524)
%102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 528)
%103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 532)
%104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 536)
%105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 544)
%106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 548)
%107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 552)
%108 = call float @llvm.SI.load.const(<16 x i8> %12, i32 560)
%109 = call float @llvm.SI.load.const(<16 x i8> %12, i32 564)
%110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 568)
%111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 576)
%112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 580)
%113 = call float @llvm.SI.load.const(<16 x i8> %12, i32 584)
%114 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%115 = load <16 x i8>, <16 x i8> addrspace(2)* %114, align 16, !tbaa !0
%116 = add i32 %5, %7
%117 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %115, i32 0, i32 %116)
%118 = extractelement <4 x float> %117, i32 0
%119 = extractelement <4 x float> %117, i32 1
%120 = extractelement <4 x float> %117, i32 2
%121 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%122 = load <16 x i8>, <16 x i8> addrspace(2)* %121, align 16, !tbaa !0
%123 = add i32 %5, %7
%124 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %122, i32 0, i32 %123)
%125 = extractelement <4 x float> %124, i32 0
%126 = extractelement <4 x float> %124, i32 1
%127 = extractelement <4 x float> %124, i32 2
%128 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%129 = load <16 x i8>, <16 x i8> addrspace(2)* %128, align 16, !tbaa !0
%130 = add i32 %5, %7
%131 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %129, i32 0, i32 %130)
%132 = extractelement <4 x float> %131, i32 3
%133 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%134 = load <16 x i8>, <16 x i8> addrspace(2)* %133, align 16, !tbaa !0
%135 = add i32 %5, %7
%136 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %134, i32 0, i32 %135)
%137 = extractelement <4 x float> %136, i32 0
%138 = extractelement <4 x float> %136, i32 1
%139 = extractelement <4 x float> %136, i32 2
%140 = extractelement <4 x float> %136, i32 3
%141 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%142 = load <16 x i8>, <16 x i8> addrspace(2)* %141, align 16, !tbaa !0
%143 = add i32 %5, %7
%144 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %142, i32 0, i32 %143)
%145 = extractelement <4 x float> %144, i32 3
%146 = fmul float %118, %51
%147 = fmul float %119, %52
%148 = fmul float %120, %53
%149 = fmul float %54, %146
%150 = fmul float %55, %146
%151 = fmul float %56, %146
%152 = fmul float %57, %147
%153 = fadd float %152, %149
%154 = fmul float %58, %147
%155 = fadd float %154, %150
%156 = fmul float %59, %147
%157 = fadd float %156, %151
%158 = fmul float %60, %148
%159 = fadd float %158, %153
%160 = fmul float %61, %148
%161 = fadd float %160, %155
%162 = fmul float %62, %148
%163 = fadd float %162, %157
%164 = call float @llvm.AMDGPU.lrp(float %132, float %159, float %146)
%165 = call float @llvm.AMDGPU.lrp(float %132, float %161, float %147)
%166 = call float @llvm.AMDGPU.lrp(float %132, float %163, float %148)
%167 = fmul float %63, %164
%168 = fmul float %64, %165
%169 = fadd float %168, %167
%170 = fmul float %65, %166
%171 = fadd float %169, %170
%172 = fadd float %171, %66
%173 = fmul float %172, %63
%174 = fmul float %172, %64
%175 = fmul float %172, %65
%176 = fsub float %164, %173
%177 = fsub float %165, %174
%178 = fsub float %166, %175
%179 = call float @llvm.AMDGPU.lrp(float %67, float %164, float %176)
%180 = call float @llvm.AMDGPU.lrp(float %67, float %165, float %177)
%181 = call float @llvm.AMDGPU.lrp(float %67, float %166, float %178)
%182 = fmul float %102, %179
%183 = fmul float %103, %179
%184 = fmul float %104, %179
%185 = fmul float %105, %180
%186 = fadd float %185, %182
%187 = fmul float %106, %180
%188 = fadd float %187, %183
%189 = fmul float %107, %180
%190 = fadd float %189, %184
%191 = fmul float %108, %181
%192 = fadd float %191, %186
%193 = fmul float %109, %181
%194 = fadd float %193, %188
%195 = fmul float %110, %181
%196 = fadd float %195, %190
%197 = fadd float %192, %111
%198 = fadd float %194, %112
%199 = fadd float %196, %113
%200 = fmul float %86, %179
%201 = fmul float %87, %179
%202 = fmul float %88, %179
%203 = fmul float %89, %179
%204 = fmul float %90, %180
%205 = fadd float %204, %200
%206 = fmul float %91, %180
%207 = fadd float %206, %201
%208 = fmul float %92, %180
%209 = fadd float %208, %202
%210 = fmul float %93, %180
%211 = fadd float %210, %203
%212 = fmul float %94, %181
%213 = fadd float %212, %205
%214 = fmul float %95, %181
%215 = fadd float %214, %207
%216 = fmul float %96, %181
%217 = fadd float %216, %209
%218 = fmul float %97, %181
%219 = fadd float %218, %211
%220 = fadd float %213, %98
%221 = fadd float %215, %99
%222 = fadd float %217, %100
%223 = fadd float %219, %101
%224 = fmul float %83, 2.000000e+00
%225 = fmul float %84, 2.000000e+00
%226 = fmul float %85, 2.000000e+00
%227 = fmul float %197, %28
%228 = fmul float %198, %28
%229 = fmul float %199, %28
%230 = fsub float %25, %227
%231 = fsub float %26, %228
%232 = fsub float %27, %229
%233 = fmul float %230, %230
%234 = fmul float %231, %231
%235 = fadd float %234, %233
%236 = fmul float %232, %232
%237 = fadd float %235, %236
%238 = call float @llvm.AMDGPU.rsq.clamped.f32(float %237)
%239 = fmul float %230, %238
%240 = fmul float %231, %238
%241 = fmul float %232, %238
%242 = fsub float -0.000000e+00, %241
%243 = fmul float %68, %145
%244 = fmul float %74, %239
%245 = fmul float %75, %239
%246 = fmul float %76, %239
%247 = fmul float %77, %240
%248 = fadd float %247, %244
%249 = fmul float %78, %240
%250 = fadd float %249, %245
%251 = fmul float %79, %240
%252 = fadd float %251, %246
%253 = fmul float %80, %242
%254 = fadd float %253, %248
%255 = fmul float %81, %242
%256 = fadd float %255, %250
%257 = fmul float %82, %242
%258 = fadd float %257, %252
%259 = fmul float %125, %254
%260 = fmul float %126, %256
%261 = fadd float %260, %259
%262 = fmul float %127, %258
%263 = fadd float %261, %262
%264 = call float @llvm.maxnum.f32(float %263, float 0.000000e+00)
%265 = fadd float %243, %69
%266 = fmul float %264, %265
%267 = fmul float %230, %230
%268 = fmul float %231, %231
%269 = fadd float %268, %267
%270 = fmul float %232, %232
%271 = fadd float %269, %270
%272 = fmul float %271, %41
%273 = fadd float %272, 1.000000e+00
%274 = fdiv float 1.000000e+00, %273
%275 = fmul float %266, %274
%276 = fmul float %13, %275
%277 = fadd float %276, %224
%278 = fmul float %14, %275
%279 = fadd float %278, %225
%280 = fmul float %15, %275
%281 = fadd float %280, %226
%282 = fmul float %197, %32
%283 = fmul float %198, %32
%284 = fmul float %199, %32
%285 = fsub float %29, %282
%286 = fsub float %30, %283
%287 = fsub float %31, %284
%288 = fmul float %285, %285
%289 = fmul float %286, %286
%290 = fadd float %289, %288
%291 = fmul float %287, %287
%292 = fadd float %290, %291
%293 = call float @llvm.AMDGPU.rsq.clamped.f32(float %292)
%294 = fmul float %285, %293
%295 = fmul float %286, %293
%296 = fmul float %287, %293
%297 = fsub float -0.000000e+00, %296
%298 = fmul float %74, %294
%299 = fmul float %75, %294
%300 = fmul float %76, %294
%301 = fmul float %77, %295
%302 = fadd float %301, %298
%303 = fmul float %78, %295
%304 = fadd float %303, %299
%305 = fmul float %79, %295
%306 = fadd float %305, %300
%307 = fmul float %80, %297
%308 = fadd float %307, %302
%309 = fmul float %81, %297
%310 = fadd float %309, %304
%311 = fmul float %82, %297
%312 = fadd float %311, %306
%313 = fmul float %125, %308
%314 = fmul float %126, %310
%315 = fadd float %314, %313
%316 = fmul float %127, %312
%317 = fadd float %315, %316
%318 = call float @llvm.maxnum.f32(float %317, float 0.000000e+00)
%319 = fadd float %243, %69
%320 = fmul float %318, %319
%321 = fmul float %285, %285
%322 = fmul float %286, %286
%323 = fadd float %322, %321
%324 = fmul float %287, %287
%325 = fadd float %323, %324
%326 = fmul float %325, %42
%327 = fadd float %326, 1.000000e+00
%328 = fdiv float 1.000000e+00, %327
%329 = fmul float %320, %328
%330 = fmul float %16, %329
%331 = fadd float %330, %277
%332 = fmul float %17, %329
%333 = fadd float %332, %279
%334 = fmul float %18, %329
%335 = fadd float %334, %281
%336 = fmul float %197, %36
%337 = fmul float %198, %36
%338 = fmul float %199, %36
%339 = fsub float %33, %336
%340 = fsub float %34, %337
%341 = fsub float %35, %338
%342 = fmul float %339, %339
%343 = fmul float %340, %340
%344 = fadd float %343, %342
%345 = fmul float %341, %341
%346 = fadd float %344, %345
%347 = call float @llvm.AMDGPU.rsq.clamped.f32(float %346)
%348 = fmul float %339, %347
%349 = fmul float %340, %347
%350 = fmul float %341, %347
%351 = fsub float -0.000000e+00, %350
%352 = fmul float %74, %348
%353 = fmul float %75, %348
%354 = fmul float %76, %348
%355 = fmul float %77, %349
%356 = fadd float %355, %352
%357 = fmul float %78, %349
%358 = fadd float %357, %353
%359 = fmul float %79, %349
%360 = fadd float %359, %354
%361 = fmul float %80, %351
%362 = fadd float %361, %356
%363 = fmul float %81, %351
%364 = fadd float %363, %358
%365 = fmul float %82, %351
%366 = fadd float %365, %360
%367 = fmul float %125, %362
%368 = fmul float %126, %364
%369 = fadd float %368, %367
%370 = fmul float %127, %366
%371 = fadd float %369, %370
%372 = call float @llvm.maxnum.f32(float %371, float 0.000000e+00)
%373 = fadd float %243, %69
%374 = fmul float %372, %373
%375 = fmul float %339, %339
%376 = fmul float %340, %340
%377 = fadd float %376, %375
%378 = fmul float %341, %341
%379 = fadd float %377, %378
%380 = fmul float %379, %43
%381 = fadd float %380, 1.000000e+00
%382 = fdiv float 1.000000e+00, %381
%383 = fmul float %374, %382
%384 = fmul float %19, %383
%385 = fadd float %384, %331
%386 = fmul float %20, %383
%387 = fadd float %386, %333
%388 = fmul float %21, %383
%389 = fadd float %388, %335
%390 = fmul float %197, %40
%391 = fmul float %198, %40
%392 = fmul float %199, %40
%393 = fsub float %37, %390
%394 = fsub float %38, %391
%395 = fsub float %39, %392
%396 = fmul float %393, %393
%397 = fmul float %394, %394
%398 = fadd float %397, %396
%399 = fmul float %395, %395
%400 = fadd float %398, %399
%401 = call float @llvm.AMDGPU.rsq.clamped.f32(float %400)
%402 = fmul float %393, %401
%403 = fmul float %394, %401
%404 = fmul float %395, %401
%405 = fsub float -0.000000e+00, %404
%406 = fmul float %74, %402
%407 = fmul float %75, %402
%408 = fmul float %76, %402
%409 = fmul float %77, %403
%410 = fadd float %409, %406
%411 = fmul float %78, %403
%412 = fadd float %411, %407
%413 = fmul float %79, %403
%414 = fadd float %413, %408
%415 = fmul float %80, %405
%416 = fadd float %415, %410
%417 = fmul float %81, %405
%418 = fadd float %417, %412
%419 = fmul float %82, %405
%420 = fadd float %419, %414
%421 = fmul float %125, %416
%422 = fmul float %126, %418
%423 = fadd float %422, %421
%424 = fmul float %127, %420
%425 = fadd float %423, %424
%426 = call float @llvm.maxnum.f32(float %425, float 0.000000e+00)
%427 = fadd float %243, %69
%428 = fmul float %426, %427
%429 = fmul float %393, %393
%430 = fmul float %394, %394
%431 = fadd float %430, %429
%432 = fmul float %395, %395
%433 = fadd float %431, %432
%434 = fmul float %433, %44
%435 = fadd float %434, 1.000000e+00
%436 = fdiv float 1.000000e+00, %435
%437 = fmul float %428, %436
%438 = fmul float %22, %437
%439 = fadd float %438, %385
%440 = fmul float %23, %437
%441 = fadd float %440, %387
%442 = fmul float %24, %437
%443 = fadd float %442, %389
%444 = fmul float %439, %70
%445 = fmul float %441, %71
%446 = fmul float %443, %72
%447 = fmul float %444, %47
%448 = fmul float %445, %48
%449 = fmul float %446, %49
%450 = fmul float %73, %50
%451 = fmul float %222, %45
%452 = fadd float %451, %46
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %137, float %138, float %139, float %140)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %447, float %448, float %449, float %450)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %452, float %23, float %24, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %220, float %221, float %222, float %223)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[52:55], s[2:3], 0x0 ; C09A0300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s1, s[52:55], 0x35 ; C200B535
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
s_buffer_load_dword s0, s[52:55], 0x36 ; C2003536
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700
buffer_load_format_xyzw v[11:14], v0, s[20:23], 0 idxen ; E00C2000 80050B00
buffer_load_format_xyzw v[15:18], v0, s[8:11], 0 idxen ; E00C2000 80020F00
s_buffer_load_dword s2, s[52:55], 0x37 ; C2013537
s_buffer_load_dword s56, s[52:55], 0x38 ; C21C3538
s_buffer_load_dword s57, s[52:55], 0x39 ; C21CB539
s_buffer_load_dword s58, s[52:55], 0x3a ; C21D353A
s_buffer_load_dword s59, s[52:55], 0x3c ; C21DB53C
s_buffer_load_dword s60, s[52:55], 0x3d ; C21E353D
s_buffer_load_dword s61, s[52:55], 0x3e ; C21EB53E
s_buffer_load_dword s62, s[52:55], 0x40 ; C21F3540
s_buffer_load_dword s63, s[52:55], 0x41 ; C21FB541
s_buffer_load_dword s64, s[52:55], 0x42 ; C2203542
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s2 ; 7E000202
s_buffer_load_dword s65, s[52:55], 0x44 ; C220B544
s_buffer_load_dword s66, s[52:55], 0x45 ; C2213545
s_buffer_load_dword s67, s[52:55], 0x46 ; C221B546
s_buffer_load_dword s68, s[52:55], 0x4c ; C222354C
s_buffer_load_dword s69, s[52:55], 0x4d ; C222B54D
s_buffer_load_dword s70, s[52:55], 0x4e ; C223354E
s_buffer_load_dword s71, s[52:55], 0x4f ; C223B54F
s_buffer_load_dword s72, s[52:55], 0x50 ; C2243550
s_buffer_load_dword s3, s[52:55], 0x80 ; C201B580
s_buffer_load_dword s2, s[52:55], 0x81 ; C2013581
s_buffer_load_dword s7, s[52:55], 0x82 ; C203B582
s_buffer_load_dword s4, s[52:55], 0x83 ; C2023583
s_buffer_load_dword s73, s[52:55], 0x84 ; C224B584
s_buffer_load_dword s74, s[52:55], 0x85 ; C2253585
s_buffer_load_dword s75, s[52:55], 0x86 ; C225B586
s_buffer_load_dword s76, s[52:55], 0x88 ; C2263588
s_buffer_load_dword s77, s[52:55], 0x89 ; C226B589
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e64 v7, 1.0, s72 ; D2080007 000090F2
s_buffer_load_dword s78, s[52:55], 0x8a ; C227358A
s_buffer_load_dword s79, s[52:55], 0x8c ; C227B58C
s_buffer_load_dword s80, s[52:55], 0x8d ; C228358D
s_buffer_load_dword s81, s[52:55], 0x8e ; C228B58E
s_buffer_load_dword s82, s[52:55], 0x90 ; C2293590
s_buffer_load_dword s83, s[52:55], 0x91 ; C229B591
s_buffer_load_dword s6, s[52:55], 0xd ; C203350D
s_buffer_load_dword s5, s[52:55], 0xe ; C202B50E
s_buffer_load_dword s8, s[52:55], 0x10 ; C2043510
s_buffer_load_dword s9, s[52:55], 0x11 ; C204B511
s_buffer_load_dword s10, s[52:55], 0x12 ; C2053512
s_buffer_load_dword s84, s[52:55], 0x13 ; C22A3513
s_buffer_load_dword s11, s[52:55], 0x14 ; C205B514
s_buffer_load_dword s12, s[52:55], 0x15 ; C2063515
s_buffer_load_dword s13, s[52:55], 0x16 ; C206B516
s_buffer_load_dword s51, s[52:55], 0x17 ; C219B517
s_buffer_load_dword s85, s[52:55], 0x92 ; C22AB592
s_buffer_load_dword s16, s[52:55], 0x18 ; C2083518
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v8, s8 ; 7E100208
s_buffer_load_dword s8, s[52:55], 0x19 ; C2043519
v_mov_b32_e32 v9, s9 ; 7E120209
s_buffer_load_dword s9, s[52:55], 0x1a ; C204B51A
v_mov_b32_e32 v15, s10 ; 7E1E020A
s_buffer_load_dword s50, s[52:55], 0x1b ; C219351B
s_buffer_load_dword s10, s[52:55], 0x1c ; C205351C
v_mov_b32_e32 v16, s11 ; 7E20020B
s_buffer_load_dword s11, s[52:55], 0x1d ; C205B51D
v_mov_b32_e32 v17, s12 ; 7E22020C
s_buffer_load_dword s12, s[52:55], 0x1e ; C206351E
v_mov_b32_e32 v19, s13 ; 7E26020D
s_buffer_load_dword s49, s[52:55], 0x1f ; C218B51F
s_buffer_load_dword s15, s[52:55], 0x22 ; C207B522
s_buffer_load_dword s14, s[52:55], 0x26 ; C2073526
v_mov_b32_e32 v20, s16 ; 7E280210
s_buffer_load_dword s13, s[52:55], 0x5f ; C206B55F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v21, s8 ; 7E2A0208
s_buffer_load_dword s42, s[52:55], 0x60 ; C2153560
v_mov_b32_e32 v22, s9 ; 7E2C0209
s_buffer_load_dword s41, s[52:55], 0x61 ; C214B561
s_buffer_load_dword s40, s[52:55], 0x62 ; C2143562
v_mov_b32_e32 v23, s10 ; 7E2E020A
s_buffer_load_dword s37, s[52:55], 0x64 ; C212B564
v_mov_b32_e32 v24, s11 ; 7E30020B
s_buffer_load_dword s36, s[52:55], 0x65 ; C2123565
v_mov_b32_e32 v25, s12 ; 7E32020C
s_buffer_load_dword s35, s[52:55], 0x66 ; C211B566
s_buffer_load_dword s32, s[52:55], 0x68 ; C2103568
s_buffer_load_dword s30, s[52:55], 0x69 ; C20F3569
s_buffer_load_dword s31, s[52:55], 0x6a ; C20FB56A
v_mul_f32_e32 v0, s13, v0 ; 1000000D
s_buffer_load_dword s17, s[52:55], 0x2a ; C208B52A
s_buffer_load_dword s13, s[52:55], 0x2e ; C206B52E
s_buffer_load_dword s12, s[52:55], 0x32 ; C2063532
s_buffer_load_dword s16, s[52:55], 0x33 ; C2083533
s_buffer_load_dword s8, s[52:55], 0x34 ; C2043534
s_buffer_load_dword s29, s[52:55], 0x54 ; C20EB554
s_buffer_load_dword s18, s[52:55], 0x58 ; C2093558
s_buffer_load_dword s11, s[52:55], 0x5c ; C205B55C
s_buffer_load_dword s10, s[52:55], 0x5d ; C205355D
s_buffer_load_dword s9, s[52:55], 0x5e ; C204B55E
s_buffer_load_dword s19, s[52:55], 0x70 ; C209B570
s_buffer_load_dword s20, s[52:55], 0x71 ; C20A3571
s_buffer_load_dword s23, s[52:55], 0x72 ; C20BB572
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v26, s16 ; 7E340210
s_buffer_load_dword s28, s[52:55], 0x74 ; C20E3574
s_buffer_load_dword s27, s[52:55], 0x75 ; C20DB575
s_buffer_load_dword s24, s[52:55], 0x0 ; C20C3500
v_mov_b32_e32 v27, s18 ; 7E360212
s_buffer_load_dword s25, s[52:55], 0x1 ; C20CB501
s_buffer_load_dword s26, s[52:55], 0x2 ; C20D3502
s_buffer_load_dword s21, s[52:55], 0x4 ; C20AB504
s_buffer_load_dword s22, s[52:55], 0x5 ; C20B3505
v_add_f32_e64 v28, s19, s19 ; D206001C 00002613
v_add_f32_e64 v29, s20, s20 ; D206001D 00002814
v_add_f32_e64 v30, s23, s23 ; D206001E 00002E17
s_buffer_load_dword s23, s[52:55], 0x6 ; C20BB506
s_buffer_load_dword s18, s[52:55], 0x8 ; C2093508
s_buffer_load_dword s19, s[52:55], 0x9 ; C209B509
s_buffer_load_dword s20, s[52:55], 0xa ; C20A350A
s_buffer_load_dword s16, s[52:55], 0xc ; C208350C
s_buffer_load_dword s44, s[52:55], 0x76 ; C2163576
s_buffer_load_dword s45, s[52:55], 0x77 ; C216B577
s_buffer_load_dword s46, s[52:55], 0x78 ; C2173578
s_buffer_load_dword s47, s[52:55], 0x79 ; C217B579
s_buffer_load_dword s48, s[52:55], 0x7a ; C218357A
s_buffer_load_dword s43, s[52:55], 0x7b ; C215B57B
s_buffer_load_dword s38, s[52:55], 0x7c ; C213357C
s_buffer_load_dword s33, s[52:55], 0x7d ; C210B57D
s_buffer_load_dword s39, s[52:55], 0x7e ; C213B57E
s_buffer_load_dword s34, s[52:55], 0x7f ; C211357F
v_mul_f32_e32 v1, s56, v1 ; 10020238
v_mul_f32_e32 v2, s57, v2 ; 10040439
v_mul_f32_e32 v3, s58, v3 ; 1006063A
v_mul_f32_e32 v31, s59, v1 ; 103E023B
v_mul_f32_e32 v32, s60, v1 ; 1040023C
v_mul_f32_e32 v33, s61, v1 ; 1042023D
v_mac_f32_e32 v31, s62, v2 ; 3E3E043E
v_mac_f32_e32 v32, s63, v2 ; 3E40043F
v_mac_f32_e32 v33, s64, v2 ; 3E420440
v_mac_f32_e32 v31, s65, v3 ; 3E3E0641
v_mac_f32_e32 v32, s66, v3 ; 3E400642
v_mac_f32_e32 v33, s67, v3 ; 3E420643
v_sub_f32_e32 v34, 1.0, v10 ; 084414F2
v_mul_f32_e32 v1, v1, v34 ; 10024501
v_mul_f32_e32 v2, v2, v34 ; 10044502
v_mul_f32_e32 v3, v3, v34 ; 10064503
v_mac_f32_e32 v1, v31, v10 ; 3E02151F
v_mac_f32_e32 v2, v32, v10 ; 3E041520
v_mac_f32_e32 v3, v33, v10 ; 3E061521
v_mul_f32_e32 v10, s68, v1 ; 10140244
v_mac_f32_e32 v10, s69, v2 ; 3E140445
v_mac_f32_e32 v10, s70, v3 ; 3E140646
v_add_f32_e32 v10, s71, v10 ; 06141447
v_mad_f32 v31, -v10, s68, v1 ; D282001F 2404890A
v_mad_f32 v32, -v10, s69, v2 ; D2820020 24088B0A
v_mad_f32 v10, -v10, s70, v3 ; D282000A 240C8D0A
v_mul_f32_e32 v31, v31, v7 ; 103E0F1F
v_mul_f32_e32 v32, v32, v7 ; 10400F20
v_mul_f32_e32 v7, v10, v7 ; 100E0F0A
v_mac_f32_e32 v31, s72, v1 ; 3E3E0248
v_mac_f32_e32 v32, s72, v2 ; 3E400448
v_mac_f32_e32 v7, s72, v3 ; 3E0E0648
v_mul_f32_e32 v1, s73, v31 ; 10023E49
v_mul_f32_e32 v2, s74, v31 ; 10043E4A
v_mul_f32_e32 v3, s75, v31 ; 10063E4B
v_mac_f32_e32 v1, s76, v32 ; 3E02404C
v_mac_f32_e32 v2, s77, v32 ; 3E04404D
v_mac_f32_e32 v3, s78, v32 ; 3E06404E
v_mac_f32_e32 v1, s79, v7 ; 3E020E4F
v_mac_f32_e32 v2, s80, v7 ; 3E040E50
v_mac_f32_e32 v3, s81, v7 ; 3E060E51
v_add_f32_e32 v1, s82, v1 ; 06020252
v_add_f32_e32 v2, s83, v2 ; 06040453
v_add_f32_e32 v3, s85, v3 ; 06060655
v_mad_f32 v8, -v1, s84, v8 ; D2820008 2420A901
v_mad_f32 v9, -v2, s84, v9 ; D2820009 2424A902
v_mad_f32 v10, -v3, s84, v15 ; D282000A 243CA903
v_mad_f32 v15, -v1, s51, v16 ; D282000F 24406701
v_mad_f32 v16, -v2, s51, v17 ; D2820010 24446702
v_mad_f32 v17, -v3, s51, v19 ; D2820011 244C6703
v_mad_f32 v19, -v1, s50, v20 ; D2820013 24506501
v_mad_f32 v20, -v2, s50, v21 ; D2820014 24546502
v_mad_f32 v21, -v3, s50, v22 ; D2820015 24586503
v_mad_f32 v1, -v1, s49, v23 ; D2820001 245C6301
v_mad_f32 v2, -v2, s49, v24 ; D2820002 24606302
v_mad_f32 v3, -v3, s49, v25 ; D2820003 24646303
v_mul_f32_e32 v22, v8, v8 ; 102C1108
v_mac_f32_e32 v22, v9, v9 ; 3E2C1309
v_mul_f32_e32 v23, v15, v15 ; 102E1F0F
v_mac_f32_e32 v23, v16, v16 ; 3E2E2110
v_mul_f32_e32 v24, v19, v19 ; 10302713
v_mac_f32_e32 v24, v20, v20 ; 3E302914
v_mul_f32_e32 v25, v1, v1 ; 10320301
v_mac_f32_e32 v25, v2, v2 ; 3E320502
v_mac_f32_e32 v22, v10, v10 ; 3E2C150A
v_mac_f32_e32 v23, v17, v17 ; 3E2E2311
v_mac_f32_e32 v24, v21, v21 ; 3E302B15
v_mac_f32_e32 v25, v3, v3 ; 3E320703
v_rsq_clamp_f32_e32 v33, v22 ; 7E425916
v_rsq_clamp_f32_e32 v34, v23 ; 7E445917
v_rsq_clamp_f32_e32 v35, v24 ; 7E465918
v_rsq_clamp_f32_e32 v36, v25 ; 7E485919
v_mul_f32_e32 v8, v33, v8 ; 10101121
v_mul_f32_e32 v15, v34, v15 ; 101E1F22
v_mul_f32_e32 v19, v35, v19 ; 10262723
v_mul_f32_e32 v1, v36, v1 ; 10020324
v_mul_f32_e32 v37, s42, v8 ; 104A102A
v_mul_f32_e32 v38, s42, v15 ; 104C1E2A
v_mul_f32_e32 v39, s42, v19 ; 104E262A
v_mul_f32_e32 v40, s42, v1 ; 1050022A
v_mul_f32_e32 v41, s41, v8 ; 10521029
v_mul_f32_e32 v42, s41, v15 ; 10541E29
v_mul_f32_e32 v43, s41, v19 ; 10562629
v_mul_f32_e32 v44, s41, v1 ; 10580229
v_mul_f32_e32 v8, s40, v8 ; 10101028
v_mul_f32_e32 v15, s40, v15 ; 101E1E28
v_mul_f32_e32 v19, s40, v19 ; 10262628
v_mul_f32_e32 v1, s40, v1 ; 10020228
v_mul_f32_e32 v9, v33, v9 ; 10121321
v_mul_f32_e32 v16, v34, v16 ; 10202122
v_mul_f32_e32 v20, v35, v20 ; 10282923
v_mul_f32_e32 v2, v36, v2 ; 10040524
v_mac_f32_e32 v37, s37, v9 ; 3E4A1225
v_mac_f32_e32 v38, s37, v16 ; 3E4C2025
v_mac_f32_e32 v39, s37, v20 ; 3E4E2825
v_mac_f32_e32 v40, s37, v2 ; 3E500425
v_mac_f32_e32 v41, s36, v9 ; 3E521224
v_mac_f32_e32 v42, s36, v16 ; 3E542024
v_mac_f32_e32 v43, s36, v20 ; 3E562824
v_mac_f32_e32 v44, s36, v2 ; 3E580424
v_mul_f32_e32 v10, v33, v10 ; 10141521
v_mul_f32_e32 v17, v34, v17 ; 10222322
v_mul_f32_e32 v21, v35, v21 ; 102A2B23
v_mul_f32_e32 v3, v36, v3 ; 10060724
v_mac_f32_e32 v8, s35, v9 ; 3E101223
v_mac_f32_e32 v15, s35, v16 ; 3E1E2023
v_mac_f32_e32 v19, s35, v20 ; 3E262823
v_mac_f32_e32 v1, s35, v2 ; 3E020423
v_mad_f32 v2, -s32, v10, v37 ; D2820002 24961420
v_mad_f32 v9, -s30, v10, v41 ; D2820009 24A6141E
v_mad_f32 v8, -s31, v10, v8 ; D2820008 2422141F
v_mad_f32 v10, -s32, v17, v38 ; D282000A 249A2220
v_mad_f32 v16, -s30, v17, v42 ; D2820010 24AA221E
v_mad_f32 v15, -s31, v17, v15 ; D282000F 243E221F
v_mad_f32 v17, -s32, v21, v39 ; D2820011 249E2A20
v_mad_f32 v20, -s30, v21, v43 ; D2820014 24AE2A1E
v_mad_f32 v19, -s31, v21, v19 ; D2820013 244E2A1F
v_mad_f32 v21, -s32, v3, v40 ; D2820015 24A20620
v_mad_f32 v33, -s30, v3, v44 ; D2820021 24B2061E
v_mad_f32 v1, -s31, v3, v1 ; D2820001 2406061F
v_mul_f32_e32 v2, v2, v4 ; 10040902
v_mac_f32_e32 v2, v9, v5 ; 3E040B09
v_mul_f32_e32 v3, v10, v4 ; 1006090A
v_mac_f32_e32 v3, v16, v5 ; 3E060B10
v_mul_f32_e32 v9, v17, v4 ; 10120911
v_mac_f32_e32 v9, v20, v5 ; 3E120B14
v_mul_f32_e32 v4, v21, v4 ; 10080915
v_mac_f32_e32 v4, v33, v5 ; 3E080B21
v_mac_f32_e32 v2, v8, v6 ; 3E040D08
v_mac_f32_e32 v3, v15, v6 ; 3E060D0F
v_mac_f32_e32 v9, v19, v6 ; 3E120D13
v_mac_f32_e32 v4, v1, v6 ; 3E080D01
v_mac_f32_e32 v27, s29, v18 ; 3E36241D
exp 15, 32, 0, 0, 0, v11, v12, v13, v14 ; F800020F 0E0D0C0B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s28, v31 ; 10023E1C
v_mul_f32_e32 v5, s27, v31 ; 100A3E1B
v_mul_f32_e32 v6, s44, v31 ; 100C3E2C
v_mul_f32_e32 v8, s45, v31 ; 10103E2D
v_mac_f32_e32 v1, s46, v32 ; 3E02402E
v_mac_f32_e32 v5, s47, v32 ; 3E0A402F
v_mac_f32_e32 v6, s48, v32 ; 3E0C4030
v_mad_f32 v10, v22, s15, 1.0 ; D282000A 03C81F16
v_rcp_f32_e32 v10, v10 ; 7E14550A
v_mac_f32_e32 v8, s43, v32 ; 3E10402B
v_max_f32_e32 v2, 0, v2 ; 20040480
v_mul_f32_e32 v2, v27, v2 ; 1004051B
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_mac_f32_e32 v28, s24, v2 ; 3E380418
v_mac_f32_e32 v29, s25, v2 ; 3E3A0419
v_mac_f32_e32 v30, s26, v2 ; 3E3C041A
v_mad_f32 v2, v23, s14, 1.0 ; D2820002 03C81D17
v_rcp_f32_e32 v2, v2 ; 7E045502
v_mad_f32 v10, v24, s17, 1.0 ; D282000A 03C82318
v_max_f32_e32 v3, 0, v3 ; 20060680
v_mul_f32_e32 v3, v27, v3 ; 1006071B
v_mul_f32_e32 v2, v2, v3 ; 10040702
v_mac_f32_e32 v28, s21, v2 ; 3E380415
v_mac_f32_e32 v29, s22, v2 ; 3E3A0416
v_rcp_f32_e32 v3, v10 ; 7E06550A
v_mac_f32_e32 v30, s23, v2 ; 3E3C0417
v_max_f32_e32 v2, 0, v9 ; 20041280
v_mul_f32_e32 v2, v27, v2 ; 1004051B
v_mul_f32_e32 v2, v3, v2 ; 10040503
v_mac_f32_e32 v28, s18, v2 ; 3E380412
v_mac_f32_e32 v29, s19, v2 ; 3E3A0413
v_mac_f32_e32 v30, s20, v2 ; 3E3C0414
v_mac_f32_e32 v1, s38, v7 ; 3E020E26
v_mac_f32_e32 v6, s39, v7 ; 3E0C0E27
v_add_f32_e32 v2, s7, v6 ; 06040C07
v_mad_f32 v3, v25, s13, 1.0 ; D2820003 03C81B19
v_rcp_f32_e32 v3, v3 ; 7E065503
v_mac_f32_e32 v26, s12, v2 ; 3E34040C
v_max_f32_e32 v4, 0, v4 ; 20080880
v_mul_f32_e32 v4, v27, v4 ; 1008091B
v_mul_f32_e32 v3, v3, v4 ; 10060903
v_mac_f32_e32 v28, s16, v3 ; 3E380610
v_mul_f32_e32 v4, s11, v28 ; 1008380B
v_mac_f32_e32 v29, s6, v3 ; 3E3A0606
v_mul_f32_e32 v6, s10, v29 ; 100C3A0A
v_mac_f32_e32 v30, s5, v3 ; 3E3C0605
v_mul_f32_e32 v3, s9, v30 ; 10063C09
v_mul_f32_e32 v4, s8, v4 ; 10080808
v_mul_f32_e32 v6, s1, v6 ; 100C0C01
v_mov_b32_e32 v9, s6 ; 7E120206
v_mov_b32_e32 v10, s5 ; 7E140205
v_mul_f32_e32 v3, s0, v3 ; 10060600
exp 15, 33, 0, 0, 0, v4, v6, v3, v0 ; F800021F 00030604
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v0, 0 ; 7E000280
exp 15, 34, 0, 0, 0, v26, v9, v10, v0 ; F800022F 000A091A
v_mac_f32_e32 v5, s33, v7 ; 3E0A0E21
v_mac_f32_e32 v8, s34, v7 ; 3E100E22
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, s3, v1 ; 06000203
v_add_f32_e32 v1, s2, v5 ; 06020A02
v_add_f32_e32 v3, s4, v8 ; 06061004
exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 88
VGPRS: 48
Code Size: 1536 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[1].xyz, TEMP[0], SAMP[0], 2D
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[1].xyzz
3: MOV_SAT TEMP[1].x, IN[2].xxxx
4: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz
5: MOV TEMP[0].w, IMM[0].xxxx
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0
%29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%34 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%37 = bitcast float %31 to i32
%38 = bitcast float %32 to i32
%39 = insertelement <2 x i32> undef, i32 %37, i32 0
%40 = insertelement <2 x i32> %39, i32 %38, i32 1
%41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %28, <16 x i8> %30, i32 2)
%42 = extractelement <4 x float> %41, i32 0
%43 = extractelement <4 x float> %41, i32 1
%44 = extractelement <4 x float> %41, i32 2
%45 = fmul float %33, %42
%46 = fmul float %34, %43
%47 = fmul float %35, %44
%48 = call float @llvm.AMDIL.clamp.(float %36, float 0.000000e+00, float 1.000000e+00)
%49 = call float @llvm.AMDGPU.lrp(float %48, float %45, float %24)
%50 = call float @llvm.AMDGPU.lrp(float %48, float %46, float %25)
%51 = call float @llvm.AMDGPU.lrp(float %48, float %47, float %26)
%52 = call i32 @llvm.SI.packf16(float %49, float %50)
%53 = bitcast i32 %52 to float
%54 = call i32 @llvm.SI.packf16(float %51, float 1.000000e+00)
%55 = bitcast i32 %54 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %53, float %55, float %53, float %55)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v0, v0, 0, 2, [m0] ; C8000800
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
v_interp_p2_f32 v0, [v0], v1, 0, 2, [m0] ; C8010801
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430102
s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v1, v1, v4 ; 10020901
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_mul_f32_e32 v3, v3, v6 ; 10060D03
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v4, 1.0, v0 ; 080800F2
v_mul_f32_e32 v5, s4, v4 ; 100A0804
v_mac_f32_e32 v5, v1, v0 ; 3E0A0101
v_mul_f32_e32 v1, s5, v4 ; 10020805
v_mac_f32_e32 v1, v2, v0 ; 3E020102
v_mul_f32_e32 v2, s0, v4 ; 10040800
v_mac_f32_e32 v2, v3, v0 ; 3E040103
v_cvt_pkrtz_f16_f32_e32 v0, v5, v1 ; 5E000305
v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 168 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..7]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: DP3 TEMP[0].x, IN[0].xyzz, IN[0].xyzz
1: RSQ TEMP[1].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[0].xyzz, TEMP[1].xxxx
3: MUL TEMP[1], CONST[0], TEMP[0].xxxx
4: MAD TEMP[1], CONST[1], TEMP[0].yyyy, TEMP[1]
5: MAD TEMP[0].xyz, CONST[2], TEMP[0].zzzz, TEMP[1]
6: MUL TEMP[1], CONST[4], IN[0].xxxx
7: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[1]
8: MAD TEMP[1], CONST[6], IN[0].zzzz, TEMP[1]
9: MAD TEMP[1], CONST[7], IN[0].wwww, TEMP[1]
10: MUL TEMP[2], CONST[0], IN[0].xxxx
11: MAD TEMP[2], CONST[1], IN[0].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[2], IN[0].zzzz, TEMP[2]
13: MAD TEMP[2].xyz, CONST[3], IN[0].wwww, TEMP[2]
14: MOV TEMP[2].xyz, TEMP[2].xyzx
15: MOV TEMP[2].w, TEMP[0].xxxx
16: MOV TEMP[0].xy, TEMP[0].yzyy
17: MOV OUT[2], TEMP[0]
18: MOV OUT[1], TEMP[2]
19: MOV OUT[0], TEMP[1]
20: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = extractelement <4 x float> %44, i32 2
%48 = extractelement <4 x float> %44, i32 3
%49 = fmul float %45, %45
%50 = fmul float %46, %46
%51 = fadd float %50, %49
%52 = fmul float %47, %47
%53 = fadd float %51, %52
%54 = call float @llvm.AMDGPU.rsq.clamped.f32(float %53)
%55 = fmul float %45, %54
%56 = fmul float %46, %54
%57 = fmul float %47, %54
%58 = fmul float %13, %55
%59 = fmul float %14, %55
%60 = fmul float %15, %55
%61 = fmul float %16, %56
%62 = fadd float %61, %58
%63 = fmul float %17, %56
%64 = fadd float %63, %59
%65 = fmul float %18, %56
%66 = fadd float %65, %60
%67 = fmul float %19, %57
%68 = fadd float %67, %62
%69 = fmul float %20, %57
%70 = fadd float %69, %64
%71 = fmul float %21, %57
%72 = fadd float %71, %66
%73 = fmul float %25, %45
%74 = fmul float %26, %45
%75 = fmul float %27, %45
%76 = fmul float %28, %45
%77 = fmul float %29, %46
%78 = fadd float %77, %73
%79 = fmul float %30, %46
%80 = fadd float %79, %74
%81 = fmul float %31, %46
%82 = fadd float %81, %75
%83 = fmul float %32, %46
%84 = fadd float %83, %76
%85 = fmul float %33, %47
%86 = fadd float %85, %78
%87 = fmul float %34, %47
%88 = fadd float %87, %80
%89 = fmul float %35, %47
%90 = fadd float %89, %82
%91 = fmul float %36, %47
%92 = fadd float %91, %84
%93 = fmul float %37, %48
%94 = fadd float %93, %86
%95 = fmul float %38, %48
%96 = fadd float %95, %88
%97 = fmul float %39, %48
%98 = fadd float %97, %90
%99 = fmul float %40, %48
%100 = fadd float %99, %92
%101 = fmul float %13, %45
%102 = fmul float %14, %45
%103 = fmul float %15, %45
%104 = fmul float %16, %46
%105 = fadd float %104, %101
%106 = fmul float %17, %46
%107 = fadd float %106, %102
%108 = fmul float %18, %46
%109 = fadd float %108, %103
%110 = fmul float %19, %47
%111 = fadd float %110, %105
%112 = fmul float %20, %47
%113 = fadd float %112, %107
%114 = fmul float %21, %47
%115 = fadd float %114, %109
%116 = fmul float %22, %48
%117 = fadd float %116, %111
%118 = fmul float %23, %48
%119 = fadd float %118, %113
%120 = fmul float %24, %48
%121 = fadd float %120, %115
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %117, float %119, float %121, float %68)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %70, float %72, float %72, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %94, float %96, float %98, float %100)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s12, s[0:3], 0xa ; C206010A
s_buffer_load_dword s13, s[0:3], 0xc ; C206810C
s_buffer_load_dword s14, s[0:3], 0xd ; C207010D
s_buffer_load_dword s15, s[0:3], 0xe ; C207810E
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113
s_buffer_load_dword s20, s[0:3], 0x14 ; C20A0114
s_buffer_load_dword s21, s[0:3], 0x15 ; C20A8115
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117
s_buffer_load_dword s24, s[0:3], 0x18 ; C20C0118
s_buffer_load_dword s25, s[0:3], 0x19 ; C20C8119
s_buffer_load_dword s26, s[0:3], 0x1a ; C20D011A
s_buffer_load_dword s27, s[0:3], 0x1b ; C20D811B
s_buffer_load_dword s28, s[0:3], 0x1c ; C20E011C
s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D
s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mac_f32_e32 v4, s20, v1 ; 3E080214
v_mul_f32_e32 v5, s17, v0 ; 100A0011
v_mac_f32_e32 v5, s21, v1 ; 3E0A0215
v_mul_f32_e32 v6, s18, v0 ; 100C0012
v_mac_f32_e32 v6, s22, v1 ; 3E0C0216
v_mul_f32_e32 v7, s19, v0 ; 100E0013
v_mac_f32_e32 v7, s23, v1 ; 3E0E0217
v_mac_f32_e32 v4, s24, v2 ; 3E080418
v_mac_f32_e32 v5, s25, v2 ; 3E0A0419
v_mac_f32_e32 v6, s26, v2 ; 3E0C041A
v_mac_f32_e32 v7, s27, v2 ; 3E0E041B
v_mac_f32_e32 v4, s28, v3 ; 3E08061C
v_mac_f32_e32 v5, s29, v3 ; 3E0A061D
v_mac_f32_e32 v6, s30, v3 ; 3E0C061E
v_mac_f32_e32 v7, s0, v3 ; 3E0E0600
v_mul_f32_e32 v8, s4, v0 ; 10100004
v_mac_f32_e32 v8, s7, v1 ; 3E100207
v_mac_f32_e32 v8, s10, v2 ; 3E10040A
v_mac_f32_e32 v8, s13, v3 ; 3E10060D
v_mul_f32_e32 v9, s5, v0 ; 10120005
v_mac_f32_e32 v9, s8, v1 ; 3E120208
v_mac_f32_e32 v9, s11, v2 ; 3E12040B
v_mac_f32_e32 v9, s14, v3 ; 3E12060E
v_mul_f32_e32 v10, v0, v0 ; 10140100
v_mac_f32_e32 v10, v1, v1 ; 3E140301
v_mac_f32_e32 v10, v2, v2 ; 3E140502
v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A
v_mul_f32_e32 v11, s6, v0 ; 10160006
v_mac_f32_e32 v11, s9, v1 ; 3E160209
v_mac_f32_e32 v11, s12, v2 ; 3E16040C
v_mac_f32_e32 v11, s15, v3 ; 3E16060F
v_mul_f32_e32 v0, v10, v0 ; 1000010A
v_mul_f32_e32 v1, v10, v1 ; 1002030A
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_mul_f32_e32 v3, s4, v0 ; 10060004
v_mac_f32_e32 v3, s7, v1 ; 3E060207
v_mac_f32_e32 v3, s10, v2 ; 3E06040A
exp 15, 32, 0, 0, 0, v8, v9, v11, v3 ; F800020F 030B0908
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, s5, v0 ; 10060005
v_mul_f32_e32 v0, s6, v0 ; 10000006
v_mac_f32_e32 v3, s8, v1 ; 3E060208
v_mac_f32_e32 v0, s9, v1 ; 3E000209
v_mac_f32_e32 v3, s11, v2 ; 3E06040B
v_mac_f32_e32 v0, s12, v2 ; 3E00040C
v_mov_b32_e32 v1, 0 ; 7E020280
exp 15, 33, 0, 0, 0, v3, v0, v0, v1 ; F800021F 01000003
exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 356 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..10]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { 1.0000, -1.0000, 2.0000, 1.3000}
0: MOV TEMP[0].x, IN[0].wwww
1: MOV TEMP[0].yz, IN[1].yxyy
2: MOV TEMP[1].xyz, -CONST[0].xyzx
3: ADD TEMP[2].xyz, CONST[1].xyzz, TEMP[1].xyzz
4: ADD TEMP[1].xyz, IN[0].xyzz, TEMP[1].xyzz
5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
6: RSQ TEMP[3].x, TEMP[3].xxxx
7: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx
8: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
9: RSQ TEMP[4].x, TEMP[4].xxxx
10: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
11: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
12: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx
13: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[1].xxxx
14: SQRT TEMP[1].x, TEMP[1].xxxx
15: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz
16: SQRT TEMP[2].x, TEMP[2].xxxx
17: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
18: RCP TEMP[2].x, CONST[1].wwww
19: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
20: ADD TEMP[2].x, TEMP[1].xxxx, IMM[0].yyyy
21: ABS TEMP[2].x, TEMP[2].xxxx
22: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz
23: RSQ TEMP[3].x, TEMP[3].xxxx
24: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx
25: DP3 TEMP[3].x, CONST[10].xyzz, CONST[10].xyzz
26: RSQ TEMP[3].x, TEMP[3].xxxx
27: MUL TEMP[3].xyz, CONST[10].xyzz, TEMP[3].xxxx
28: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[3].xyzz
29: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].zzzz
30: POW TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww
31: FSLT TEMP[1].x, IMM[0].xxxx, TEMP[1].xxxx
32: UIF TEMP[1].xxxx :0
33: ADD TEMP[1].x, CONST[2].zzzz, -TEMP[2].xxxx
34: RCP TEMP[3].x, CONST[2].zzzz
35: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx
36: MOV_SAT TEMP[1].x, TEMP[1].xxxx
37: POW TEMP[1].x, TEMP[1].xxxx, CONST[2].wwww
38: ADD TEMP[3].x, CONST[2].xxxx, -TEMP[2].xxxx
39: RCP TEMP[4].x, CONST[2].xxxx
40: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
41: MOV_SAT TEMP[3].x, TEMP[3].xxxx
42: POW TEMP[3].x, TEMP[3].xxxx, CONST[2].yyyy
43: MUL TEMP[3], CONST[3], TEMP[3].xxxx
44: MAD TEMP[1], CONST[4], TEMP[1].xxxx, TEMP[3]
45: MUL TEMP[1], TEMP[1], CONST[9].xxxx
46: MUL TEMP[1], TEMP[1], TEMP[0].xxxx
47: ELSE :0
48: ADD TEMP[3].x, CONST[5].zzzz, -TEMP[2].xxxx
49: RCP TEMP[4].x, CONST[5].zzzz
50: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
51: MOV_SAT TEMP[3].x, TEMP[3].xxxx
52: POW TEMP[3].x, TEMP[3].xxxx, CONST[5].wwww
53: ADD TEMP[2].x, CONST[5].xxxx, -TEMP[2].xxxx
54: RCP TEMP[4].x, CONST[5].xxxx
55: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx
56: MOV_SAT TEMP[2].x, TEMP[2].xxxx
57: POW TEMP[2].x, TEMP[2].xxxx, CONST[5].yyyy
58: MUL TEMP[2], CONST[6], TEMP[2].xxxx
59: MAD TEMP[2], CONST[7], TEMP[3].xxxx, TEMP[2]
60: ADD TEMP[2], TEMP[2], CONST[8]
61: MUL TEMP[2], TEMP[2], CONST[9].xxxx
62: MUL TEMP[1], TEMP[2], TEMP[0].xxxx
63: ENDIF
64: MOV OUT[0], TEMP[1]
65: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%45 = fsub float %27, %24
%46 = fsub float %28, %25
%47 = fsub float %29, %26
%48 = fsub float %39, %24
%49 = fsub float %40, %25
%50 = fsub float %41, %26
%51 = fmul float %45, %45
%52 = fmul float %46, %46
%53 = fadd float %52, %51
%54 = fmul float %47, %47
%55 = fadd float %53, %54
%56 = call float @llvm.AMDGPU.rsq.clamped.f32(float %55)
%57 = fmul float %45, %56
%58 = fmul float %46, %56
%59 = fmul float %47, %56
%60 = fmul float %48, %48
%61 = fmul float %49, %49
%62 = fadd float %61, %60
%63 = fmul float %50, %50
%64 = fadd float %62, %63
%65 = call float @llvm.AMDGPU.rsq.clamped.f32(float %64)
%66 = fmul float %48, %65
%67 = fmul float %49, %65
%68 = fmul float %50, %65
%69 = fmul float %57, %66
%70 = fmul float %58, %67
%71 = fadd float %70, %69
%72 = fmul float %59, %68
%73 = fadd float %71, %72
%74 = fmul float %73, %73
%75 = fsub float 1.000000e+00, %74
%76 = call float @llvm.sqrt.f32(float %75)
%77 = fmul float %45, %45
%78 = fmul float %46, %46
%79 = fadd float %78, %77
%80 = fmul float %47, %47
%81 = fadd float %79, %80
%82 = call float @llvm.sqrt.f32(float %81)
%83 = fmul float %76, %82
%84 = fdiv float 1.000000e+00, %30
%85 = fmul float %83, %84
%86 = fadd float %85, -1.000000e+00
%87 = call float @llvm.fabs.f32(float %86)
%88 = fmul float %42, %42
%89 = fmul float %43, %43
%90 = fadd float %89, %88
%91 = fmul float %44, %44
%92 = fadd float %90, %91
%93 = call float @llvm.AMDGPU.rsq.clamped.f32(float %92)
%94 = fmul float %42, %93
%95 = fmul float %43, %93
%96 = fmul float %44, %93
%97 = fmul float %36, %36
%98 = fmul float %37, %37
%99 = fadd float %98, %97
%100 = fmul float %38, %38
%101 = fadd float %99, %100
%102 = call float @llvm.AMDGPU.rsq.clamped.f32(float %101)
%103 = fmul float %36, %102
%104 = fmul float %37, %102
%105 = fmul float %38, %102
%106 = fmul float %94, %103
%107 = fmul float %95, %104
%108 = fadd float %107, %106
%109 = fmul float %96, %105
%110 = fadd float %108, %109
%111 = fadd float %110, 2.000000e+00
%112 = call float @llvm.pow.f32(float %111, float 0x3FF4CCCCC0000000)
%113 = fcmp ogt float %85, 1.000000e+00
br i1 %113, label %IF, label %ELSE
IF: ; preds = %main_body
%114 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%115 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%116 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%117 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%118 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%119 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%120 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%121 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%122 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%123 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%124 = fsub float %32, %87
%125 = fdiv float 1.000000e+00, %32
%126 = fmul float %124, %125
%127 = call float @llvm.AMDIL.clamp.(float %126, float 0.000000e+00, float 1.000000e+00)
%128 = call float @llvm.pow.f32(float %127, float %122)
%129 = fsub float %31, %87
%130 = fdiv float 1.000000e+00, %31
%131 = fmul float %129, %130
%132 = call float @llvm.AMDIL.clamp.(float %131, float 0.000000e+00, float 1.000000e+00)
%133 = call float @llvm.pow.f32(float %132, float %123)
%134 = fmul float %121, %133
%135 = fmul float %120, %133
%136 = fmul float %119, %133
%137 = fmul float %118, %133
%138 = fmul float %117, %128
%139 = fadd float %138, %134
%140 = fmul float %116, %128
%141 = fadd float %140, %135
%142 = fmul float %115, %128
%143 = fadd float %142, %136
%144 = fmul float %114, %128
%145 = fadd float %144, %137
br label %ENDIF
ELSE: ; preds = %main_body
%146 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140)
%147 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%148 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%149 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%150 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124)
%151 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%152 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%153 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%154 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108)
%155 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104)
%156 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%157 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%158 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%159 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%160 = fsub float %34, %87
%161 = fdiv float 1.000000e+00, %34
%162 = fmul float %160, %161
%163 = call float @llvm.AMDIL.clamp.(float %162, float 0.000000e+00, float 1.000000e+00)
%164 = call float @llvm.pow.f32(float %163, float %158)
%165 = fsub float %33, %87
%166 = fdiv float 1.000000e+00, %33
%167 = fmul float %165, %166
%168 = call float @llvm.AMDIL.clamp.(float %167, float 0.000000e+00, float 1.000000e+00)
%169 = call float @llvm.pow.f32(float %168, float %159)
%170 = fmul float %157, %169
%171 = fmul float %156, %169
%172 = fmul float %155, %169
%173 = fmul float %154, %169
%174 = fmul float %153, %164
%175 = fadd float %174, %170
%176 = fmul float %152, %164
%177 = fadd float %176, %171
%178 = fmul float %151, %164
%179 = fadd float %178, %172
%180 = fmul float %150, %164
%181 = fadd float %180, %173
%182 = fadd float %175, %149
%183 = fadd float %177, %148
%184 = fadd float %179, %147
%185 = fadd float %181, %146
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%.sink25 = phi float [ %139, %IF ], [ %182, %ELSE ]
%.sink24 = phi float [ %141, %IF ], [ %183, %ELSE ]
%.sink = phi float [ %143, %IF ], [ %184, %ELSE ]
%.sink23 = phi float [ %145, %IF ], [ %185, %ELSE ]
%186 = fmul float %.sink25, %35
%187 = fmul float %.sink24, %35
%188 = fmul float %.sink, %35
%189 = fmul float %.sink23, %35
%190 = fmul float %186, %112
%191 = fmul float %187, %112
%192 = fmul float %188, %112
%193 = fmul float %189, %112
%194 = call i32 @llvm.SI.packf16(float %190, float %191)
%195 = bitcast i32 %194 to float
%196 = call i32 @llvm.SI.packf16(float %192, float %193)
%197 = bitcast i32 %196 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %195, float %197, float %195, float %197)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v2, s4, v2 ; 0A040404
v_mov_b32_e32 v7, s4 ; 7E0E0204
v_sub_f32_e32 v7, s7, v7 ; 080E0E07
v_subrev_f32_e32 v3, s5, v3 ; 0A060605
v_mov_b32_e32 v8, s5 ; 7E100205
v_sub_f32_e32 v8, s8, v8 ; 08101008
v_subrev_f32_e32 v4, s6, v4 ; 0A080806
v_mov_b32_e32 v9, s6 ; 7E120206
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
s_buffer_load_dword s5, s[0:3], 0x28 ; C2028128
s_buffer_load_dword s6, s[0:3], 0x29 ; C2030129
s_buffer_load_dword s7, s[0:3], 0x2a ; C203812A
v_sub_f32_e32 v9, s9, v9 ; 08121209
v_mul_f32_e32 v10, v7, v7 ; 10140F07
v_mac_f32_e32 v10, v8, v8 ; 3E141108
v_mac_f32_e32 v10, v9, v9 ; 3E141309
v_mul_f32_e32 v11, v2, v2 ; 10160502
v_mac_f32_e32 v11, v3, v3 ; 3E160703
v_mac_f32_e32 v11, v4, v4 ; 3E160904
v_rsq_clamp_f32_e32 v12, v10 ; 7E18590A
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500
v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501
v_mul_f32_e32 v1, v12, v7 ; 10020F0C
v_mul_f32_e32 v2, v11, v2 ; 1004050B
v_mul_f32_e32 v1, v2, v1 ; 10020302
v_mul_f32_e32 v2, v12, v8 ; 1004110C
v_mul_f32_e32 v3, v11, v3 ; 1006070B
v_mac_f32_e32 v1, v3, v2 ; 3E020503
v_mul_f32_e32 v2, v12, v9 ; 1004130C
v_mul_f32_e32 v3, v11, v4 ; 1006090B
v_mac_f32_e32 v1, v3, v2 ; 3E020503
v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_sqrt_f32_e32 v2, v10 ; 7E04670A
v_mul_f32_e32 v1, v2, v1 ; 10020302
v_mul_f32_e32 v2, v5, v5 ; 10040B05
v_mac_f32_e32 v2, v6, v6 ; 3E040D06
v_mac_f32_e32 v2, v0, v0 ; 3E040100
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e64 v3, s5, s5 ; D2100003 00000A05
v_mac_f32_e64 v3, s6, s6 ; D23E0003 00000C06
v_mac_f32_e64 v3, s7, s7 ; D23E0003 00000E07
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mul_f32_e32 v4, v2, v5 ; 10080B02
v_mul_f32_e32 v5, v2, v6 ; 100A0D02
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mul_f32_e32 v2, s5, v3 ; 10040605
v_mul_f32_e32 v6, s6, v3 ; 100C0606
v_mul_f32_e32 v3, s7, v3 ; 10060607
v_rcp_f32_e32 v7, s4 ; 7E0E5404
v_mul_f32_e32 v2, v2, v4 ; 10040902
v_mac_f32_e32 v2, v6, v5 ; 3E040B06
v_mac_f32_e32 v2, v3, v0 ; 3E040103
v_add_f32_e32 v0, 2.0, v2 ; 060004F4
v_log_f32_e32 v0, v0 ; 7E004F00
v_mul_f32_e32 v2, v7, v1 ; 10040307
v_mad_f32 v1, v1, v7, -1.0 ; D2820001 03CE0F01
v_and_b32_e32 v1, 0x7fffffff, v1 ; 360202FF 7FFFFFFF
v_mul_legacy_f32_e32 v0, 0x3fa66666, v0 ; 0E0000FF 3FA66666
v_exp_f32_e32 v0, v0 ; 7E004B00
v_cmp_nlt_f32_e32 vcc, 1.0, v2 ; 7C1C04F2
s_and_saveexec_b64 s[4:5], vcc ; BE84246A
s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114
s_buffer_load_dword s7, s[0:3], 0x15 ; C2038115
s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116
s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117
s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118
s_buffer_load_dword s11, s[0:3], 0x23 ; C2058123
s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E
s_buffer_load_dword s13, s[0:3], 0x1f ; C206811F
s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120
s_buffer_load_dword s15, s[0:3], 0x21 ; C2078121
s_buffer_load_dword s16, s[0:3], 0x22 ; C2080122
s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119
s_buffer_load_dword s18, s[0:3], 0x1a ; C209011A
s_buffer_load_dword s19, s[0:3], 0x1b ; C209811B
s_buffer_load_dword s20, s[0:3], 0x1c ; C20A011C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v2, s11 ; 7E04020B
s_buffer_load_dword s11, s[0:3], 0x1d ; C205811D
v_rcp_f32_e32 v3, s8 ; 7E065408
v_rcp_f32_e32 v4, s6 ; 7E085406
v_sub_f32_e32 v5, s8, v1 ; 080A0208
v_mul_f32_e32 v3, v3, v5 ; 10060B03
v_sub_f32_e32 v5, s6, v1 ; 080A0206
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680
v_log_f32_e32 v3, v3 ; 7E064F03
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_log_f32_e32 v4, v4 ; 7E084F04
v_mul_legacy_f32_e32 v3, s9, v3 ; 0E060609
v_exp_f32_e32 v6, v3 ; 7E0C4B03
v_mul_legacy_f32_e32 v3, s7, v4 ; 0E060807
v_exp_f32_e32 v7, v3 ; 7E0E4B03
v_mov_b32_e32 v3, s14 ; 7E06020E
v_mac_f32_e32 v3, s10, v7 ; 3E060E0A
v_mac_f32_e32 v3, s20, v6 ; 3E060C14
v_mov_b32_e32 v4, s15 ; 7E08020F
v_mac_f32_e32 v4, s17, v7 ; 3E080E11
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v4, s11, v6 ; 3E080C0B
v_mov_b32_e32 v5, s16 ; 7E0A0210
v_mac_f32_e32 v5, s18, v7 ; 3E0A0E12
v_mac_f32_e32 v5, s12, v6 ; 3E0A0C0C
v_mac_f32_e32 v2, s19, v7 ; 3E040E13
v_mac_f32_e32 v2, s13, v6 ; 3E040C0D
s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504
s_buffer_load_dword s6, s[0:3], 0x24 ; C2030124
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[4:5] ; 89FE047E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108
s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109
s_buffer_load_dword s9, s[0:3], 0xa ; C204810A
s_buffer_load_dword s10, s[0:3], 0xb ; C205010B
s_buffer_load_dword s11, s[0:3], 0xc ; C205810C
s_buffer_load_dword s12, s[0:3], 0x12 ; C2060112
s_buffer_load_dword s13, s[0:3], 0x13 ; C2068113
s_buffer_load_dword s14, s[0:3], 0xd ; C207010D
s_buffer_load_dword s15, s[0:3], 0xe ; C207810E
s_buffer_load_dword s16, s[0:3], 0xf ; C208010F
s_buffer_load_dword s17, s[0:3], 0x10 ; C2088110
s_buffer_load_dword s18, s[0:3], 0x11 ; C2090111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_rcp_f32_e32 v2, s9 ; 7E045409
v_sub_f32_e32 v3, s9, v1 ; 08060209
v_mul_f32_e32 v2, v2, v3 ; 10040702
v_rcp_f32_e32 v3, s7 ; 7E065407
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_log_f32_e32 v2, v2 ; 7E044F02
v_sub_f32_e32 v1, s7, v1 ; 08020207
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_log_f32_e32 v1, v1 ; 7E024F01
v_mul_legacy_f32_e32 v2, s10, v2 ; 0E04040A
v_exp_f32_e32 v6, v2 ; 7E0C4B02
v_mul_legacy_f32_e32 v1, s8, v1 ; 0E020208
v_exp_f32_e32 v1, v1 ; 7E024B01
v_mul_f32_e32 v3, s11, v1 ; 1006020B
v_mul_f32_e32 v4, s14, v1 ; 1008020E
v_mul_f32_e32 v5, s15, v1 ; 100A020F
v_mul_f32_e32 v2, s16, v1 ; 10040210
v_mac_f32_e32 v3, s17, v6 ; 3E060C11
v_mac_f32_e32 v4, s18, v6 ; 3E080C12
v_mac_f32_e32 v5, s12, v6 ; 3E0A0C0C
v_mac_f32_e32 v2, s13, v6 ; 3E040C0D
s_or_b64 exec, exec, s[4:5] ; 88FE047E
v_mul_f32_e32 v1, s6, v3 ; 10020606
v_mul_f32_e32 v3, s6, v4 ; 10060806
v_mul_f32_e32 v4, s6, v5 ; 10080A06
v_mul_f32_e32 v2, s6, v2 ; 10040406
v_mul_f32_e32 v1, v0, v1 ; 10020300
v_mul_f32_e32 v3, v0, v3 ; 10060700
v_mul_f32_e32 v4, v0, v4 ; 10080900
v_mul_f32_e32 v0, v0, v2 ; 10000500
v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701
v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 780 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..7]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: DP3 TEMP[0].x, IN[0].xyzz, IN[0].xyzz
1: RSQ TEMP[1].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[0].xyzz, TEMP[1].xxxx
3: MUL TEMP[1], CONST[0], TEMP[0].xxxx
4: MAD TEMP[1], CONST[1], TEMP[0].yyyy, TEMP[1]
5: MAD TEMP[0].xyz, CONST[2], TEMP[0].zzzz, TEMP[1]
6: MUL TEMP[1], CONST[4], IN[0].xxxx
7: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[1]
8: MAD TEMP[1], CONST[6], IN[0].zzzz, TEMP[1]
9: MAD TEMP[1], CONST[7], IN[0].wwww, TEMP[1]
10: MUL TEMP[2], CONST[0], IN[0].xxxx
11: MAD TEMP[2], CONST[1], IN[0].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[2], IN[0].zzzz, TEMP[2]
13: MAD TEMP[2].xyz, CONST[3], IN[0].wwww, TEMP[2]
14: MOV TEMP[2].xyz, TEMP[2].xyzx
15: MOV TEMP[2].w, TEMP[0].xxxx
16: MOV TEMP[0].xy, TEMP[0].yzyy
17: MOV OUT[2], TEMP[0]
18: MOV OUT[1], TEMP[2]
19: MOV OUT[0], TEMP[1]
20: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = extractelement <4 x float> %44, i32 2
%48 = extractelement <4 x float> %44, i32 3
%49 = fmul float %45, %45
%50 = fmul float %46, %46
%51 = fadd float %50, %49
%52 = fmul float %47, %47
%53 = fadd float %51, %52
%54 = call float @llvm.AMDGPU.rsq.clamped.f32(float %53)
%55 = fmul float %45, %54
%56 = fmul float %46, %54
%57 = fmul float %47, %54
%58 = fmul float %13, %55
%59 = fmul float %14, %55
%60 = fmul float %15, %55
%61 = fmul float %16, %56
%62 = fadd float %61, %58
%63 = fmul float %17, %56
%64 = fadd float %63, %59
%65 = fmul float %18, %56
%66 = fadd float %65, %60
%67 = fmul float %19, %57
%68 = fadd float %67, %62
%69 = fmul float %20, %57
%70 = fadd float %69, %64
%71 = fmul float %21, %57
%72 = fadd float %71, %66
%73 = fmul float %25, %45
%74 = fmul float %26, %45
%75 = fmul float %27, %45
%76 = fmul float %28, %45
%77 = fmul float %29, %46
%78 = fadd float %77, %73
%79 = fmul float %30, %46
%80 = fadd float %79, %74
%81 = fmul float %31, %46
%82 = fadd float %81, %75
%83 = fmul float %32, %46
%84 = fadd float %83, %76
%85 = fmul float %33, %47
%86 = fadd float %85, %78
%87 = fmul float %34, %47
%88 = fadd float %87, %80
%89 = fmul float %35, %47
%90 = fadd float %89, %82
%91 = fmul float %36, %47
%92 = fadd float %91, %84
%93 = fmul float %37, %48
%94 = fadd float %93, %86
%95 = fmul float %38, %48
%96 = fadd float %95, %88
%97 = fmul float %39, %48
%98 = fadd float %97, %90
%99 = fmul float %40, %48
%100 = fadd float %99, %92
%101 = fmul float %13, %45
%102 = fmul float %14, %45
%103 = fmul float %15, %45
%104 = fmul float %16, %46
%105 = fadd float %104, %101
%106 = fmul float %17, %46
%107 = fadd float %106, %102
%108 = fmul float %18, %46
%109 = fadd float %108, %103
%110 = fmul float %19, %47
%111 = fadd float %110, %105
%112 = fmul float %20, %47
%113 = fadd float %112, %107
%114 = fmul float %21, %47
%115 = fadd float %114, %109
%116 = fmul float %22, %48
%117 = fadd float %116, %111
%118 = fmul float %23, %48
%119 = fadd float %118, %113
%120 = fmul float %24, %48
%121 = fadd float %120, %115
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %117, float %119, float %121, float %68)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %70, float %72, float %72, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %94, float %96, float %98, float %100)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s12, s[0:3], 0xa ; C206010A
s_buffer_load_dword s13, s[0:3], 0xc ; C206810C
s_buffer_load_dword s14, s[0:3], 0xd ; C207010D
s_buffer_load_dword s15, s[0:3], 0xe ; C207810E
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113
s_buffer_load_dword s20, s[0:3], 0x14 ; C20A0114
s_buffer_load_dword s21, s[0:3], 0x15 ; C20A8115
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117
s_buffer_load_dword s24, s[0:3], 0x18 ; C20C0118
s_buffer_load_dword s25, s[0:3], 0x19 ; C20C8119
s_buffer_load_dword s26, s[0:3], 0x1a ; C20D011A
s_buffer_load_dword s27, s[0:3], 0x1b ; C20D811B
s_buffer_load_dword s28, s[0:3], 0x1c ; C20E011C
s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D
s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mac_f32_e32 v4, s20, v1 ; 3E080214
v_mul_f32_e32 v5, s17, v0 ; 100A0011
v_mac_f32_e32 v5, s21, v1 ; 3E0A0215
v_mul_f32_e32 v6, s18, v0 ; 100C0012
v_mac_f32_e32 v6, s22, v1 ; 3E0C0216
v_mul_f32_e32 v7, s19, v0 ; 100E0013
v_mac_f32_e32 v7, s23, v1 ; 3E0E0217
v_mac_f32_e32 v4, s24, v2 ; 3E080418
v_mac_f32_e32 v5, s25, v2 ; 3E0A0419
v_mac_f32_e32 v6, s26, v2 ; 3E0C041A
v_mac_f32_e32 v7, s27, v2 ; 3E0E041B
v_mac_f32_e32 v4, s28, v3 ; 3E08061C
v_mac_f32_e32 v5, s29, v3 ; 3E0A061D
v_mac_f32_e32 v6, s30, v3 ; 3E0C061E
v_mac_f32_e32 v7, s0, v3 ; 3E0E0600
v_mul_f32_e32 v8, s4, v0 ; 10100004
v_mac_f32_e32 v8, s7, v1 ; 3E100207
v_mac_f32_e32 v8, s10, v2 ; 3E10040A
v_mac_f32_e32 v8, s13, v3 ; 3E10060D
v_mul_f32_e32 v9, s5, v0 ; 10120005
v_mac_f32_e32 v9, s8, v1 ; 3E120208
v_mac_f32_e32 v9, s11, v2 ; 3E12040B
v_mac_f32_e32 v9, s14, v3 ; 3E12060E
v_mul_f32_e32 v10, v0, v0 ; 10140100
v_mac_f32_e32 v10, v1, v1 ; 3E140301
v_mac_f32_e32 v10, v2, v2 ; 3E140502
v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A
v_mul_f32_e32 v11, s6, v0 ; 10160006
v_mac_f32_e32 v11, s9, v1 ; 3E160209
v_mac_f32_e32 v11, s12, v2 ; 3E16040C
v_mac_f32_e32 v11, s15, v3 ; 3E16060F
v_mul_f32_e32 v0, v10, v0 ; 1000010A
v_mul_f32_e32 v1, v10, v1 ; 1002030A
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_mul_f32_e32 v3, s4, v0 ; 10060004
v_mac_f32_e32 v3, s7, v1 ; 3E060207
v_mac_f32_e32 v3, s10, v2 ; 3E06040A
exp 15, 32, 0, 0, 0, v8, v9, v11, v3 ; F800020F 030B0908
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, s5, v0 ; 10060005
v_mul_f32_e32 v0, s6, v0 ; 10000006
v_mac_f32_e32 v3, s8, v1 ; 3E060208
v_mac_f32_e32 v0, s9, v1 ; 3E000209
v_mac_f32_e32 v3, s11, v2 ; 3E06040B
v_mac_f32_e32 v0, s12, v2 ; 3E00040C
v_mov_b32_e32 v1, 0 ; 7E020280
exp 15, 33, 0, 0, 0, v3, v0, v0, v1 ; F800021F 01000003
exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 356 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..4]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { 2.0000, 1.3000, 0.5000, 1.0000}
IMM[1] FLT32 { -1.0000, 3.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[0].wwww
1: MOV TEMP[0].yz, IN[1].yxyy
2: MOV TEMP[1].xyz, -CONST[0].xyzx
3: ADD TEMP[2].xyz, CONST[1].xyzz, TEMP[1].xyzz
4: ADD TEMP[1].xyz, IN[0].xyzz, TEMP[1].xyzz
5: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
6: RSQ TEMP[3].x, TEMP[3].xxxx
7: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx
8: DP3 TEMP[4].x, TEMP[1].xyzz, TEMP[1].xyzz
9: RSQ TEMP[4].x, TEMP[4].xxxx
10: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
11: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
12: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz
13: RSQ TEMP[3].x, TEMP[3].xxxx
14: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx
15: DP3 TEMP[3].x, CONST[4].xyzz, CONST[4].xyzz
16: RSQ TEMP[3].x, TEMP[3].xxxx
17: MUL TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx
18: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[3].xyzz
19: ADD TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
20: POW TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy
21: MUL TEMP[3].x, CONST[3].xxxx, TEMP[0].xxxx
22: MUL TEMP[0], CONST[2], TEMP[0].xxxx
23: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz
24: MOV TEMP[0].xyz, TEMP[0].xyzx
25: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx
26: ADD TEMP[1].x, IMM[0].wwww, -TEMP[1].xxxx
27: SQRT TEMP[1].x, TEMP[1].xxxx
28: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz
29: SQRT TEMP[2].x, TEMP[2].xxxx
30: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
31: RCP TEMP[2].x, CONST[1].wwww
32: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx, IMM[1].xxxx
33: ABS TEMP[1].x, TEMP[1].xxxx
34: ADD TEMP[1].x, TEMP[3].xxxx, -TEMP[1].xxxx
35: RCP TEMP[2].x, TEMP[3].xxxx
36: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
37: MOV_SAT TEMP[1].x, TEMP[1].xxxx
38: POW TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy
39: MOV TEMP[0].w, TEMP[1].xxxx
40: MOV OUT[0], TEMP[0]
41: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%44 = fsub float %27, %24
%45 = fsub float %28, %25
%46 = fsub float %29, %26
%47 = fsub float %38, %24
%48 = fsub float %39, %25
%49 = fsub float %40, %26
%50 = fmul float %44, %44
%51 = fmul float %45, %45
%52 = fadd float %51, %50
%53 = fmul float %46, %46
%54 = fadd float %52, %53
%55 = call float @llvm.AMDGPU.rsq.clamped.f32(float %54)
%56 = fmul float %44, %55
%57 = fmul float %45, %55
%58 = fmul float %46, %55
%59 = fmul float %47, %47
%60 = fmul float %48, %48
%61 = fadd float %60, %59
%62 = fmul float %49, %49
%63 = fadd float %61, %62
%64 = call float @llvm.AMDGPU.rsq.clamped.f32(float %63)
%65 = fmul float %47, %64
%66 = fmul float %48, %64
%67 = fmul float %49, %64
%68 = fmul float %56, %65
%69 = fmul float %57, %66
%70 = fadd float %69, %68
%71 = fmul float %58, %67
%72 = fadd float %70, %71
%73 = fmul float %41, %41
%74 = fmul float %42, %42
%75 = fadd float %74, %73
%76 = fmul float %43, %43
%77 = fadd float %75, %76
%78 = call float @llvm.AMDGPU.rsq.clamped.f32(float %77)
%79 = fmul float %41, %78
%80 = fmul float %42, %78
%81 = fmul float %43, %78
%82 = fmul float %35, %35
%83 = fmul float %36, %36
%84 = fadd float %83, %82
%85 = fmul float %37, %37
%86 = fadd float %84, %85
%87 = call float @llvm.AMDGPU.rsq.clamped.f32(float %86)
%88 = fmul float %35, %87
%89 = fmul float %36, %87
%90 = fmul float %37, %87
%91 = fmul float %79, %88
%92 = fmul float %80, %89
%93 = fadd float %92, %91
%94 = fmul float %81, %90
%95 = fadd float %93, %94
%96 = fadd float %95, 2.000000e+00
%97 = call float @llvm.pow.f32(float %96, float 0x3FF4CCCCC0000000)
%98 = fmul float %34, %97
%99 = fmul float %31, %97
%100 = fmul float %32, %97
%101 = fmul float %33, %97
%102 = fmul float %99, 5.000000e-01
%103 = fmul float %100, 5.000000e-01
%104 = fmul float %101, 5.000000e-01
%105 = fmul float %72, %72
%106 = fsub float 1.000000e+00, %105
%107 = call float @llvm.sqrt.f32(float %106)
%108 = fmul float %44, %44
%109 = fmul float %45, %45
%110 = fadd float %109, %108
%111 = fmul float %46, %46
%112 = fadd float %110, %111
%113 = call float @llvm.sqrt.f32(float %112)
%114 = fmul float %107, %113
%115 = fdiv float 1.000000e+00, %30
%116 = fmul float %114, %115
%117 = fadd float %116, -1.000000e+00
%118 = call float @llvm.fabs.f32(float %117)
%119 = fsub float %98, %118
%120 = fdiv float 1.000000e+00, %98
%121 = fmul float %119, %120
%122 = call float @llvm.AMDIL.clamp.(float %121, float 0.000000e+00, float 1.000000e+00)
%123 = call float @llvm.pow.f32(float %122, float 3.000000e+00)
%124 = call i32 @llvm.SI.packf16(float %102, float %103)
%125 = bitcast i32 %124 to float
%126 = call i32 @llvm.SI.packf16(float %104, float %123)
%127 = bitcast i32 %126 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %125, float %127, float %125, float %127)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_mov_b32 m0, s9 ; BEFC0309
s_buffer_load_dword s9, s[0:3], 0xc ; C204810C
s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110
s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111
s_buffer_load_dword s0, s[0:3], 0x12 ; C2000112
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500
v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v1, s4, v2 ; 0A020404
v_mov_b32_e32 v2, s4 ; 7E040204
v_sub_f32_e32 v2, s7, v2 ; 08040407
v_subrev_f32_e32 v3, s5, v3 ; 0A060605
v_mov_b32_e32 v7, s5 ; 7E0E0205
v_sub_f32_e32 v7, s8, v7 ; 080E0E08
v_subrev_f32_e32 v4, s6, v4 ; 0A080806
v_mov_b32_e32 v8, s6 ; 7E100206
v_sub_f32_e32 v8, s10, v8 ; 0810100A
v_mul_f32_e32 v9, v2, v2 ; 10120502
v_mac_f32_e32 v9, v7, v7 ; 3E120F07
v_mac_f32_e32 v9, v8, v8 ; 3E121108
v_rsq_clamp_f32_e32 v10, v9 ; 7E145909
v_mul_f32_e32 v11, v1, v1 ; 10160301
v_mac_f32_e32 v11, v3, v3 ; 3E160703
v_mac_f32_e32 v11, v4, v4 ; 3E160904
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_mul_f32_e32 v7, v10, v7 ; 100E0F0A
v_mul_f32_e32 v8, v10, v8 ; 1010110A
v_mul_f32_e32 v1, v11, v1 ; 1002030B
v_mul_f32_e32 v1, v1, v2 ; 10020501
v_mul_f32_e32 v2, v11, v3 ; 1004070B
v_mac_f32_e32 v1, v2, v7 ; 3E020F02
v_mul_f32_e32 v2, v5, v5 ; 10040B05
v_mac_f32_e32 v2, v6, v6 ; 3E040D06
v_mac_f32_e32 v2, v0, v0 ; 3E040100
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
v_mul_f32_e64 v3, s15, s15 ; D2100003 00001E0F
v_mac_f32_e64 v3, s16, s16 ; D23E0003 00002010
v_mac_f32_e64 v3, s0, s0 ; D23E0003 00000000
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mac_f32_e32 v1, v4, v8 ; 3E021104
v_mul_f32_e32 v4, v2, v5 ; 10080B02
v_mul_f32_e32 v5, s15, v3 ; 100A060F
v_mul_f32_e32 v4, v5, v4 ; 10080905
v_mul_f32_e32 v5, v2, v6 ; 100A0D02
v_mul_f32_e32 v6, s16, v3 ; 100C0610
v_mac_f32_e32 v4, v6, v5 ; 3E080B06
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mul_f32_e32 v2, s0, v3 ; 10040600
v_mac_f32_e32 v4, v2, v0 ; 3E080102
v_add_f32_e32 v0, 2.0, v4 ; 060008F4
v_log_f32_e32 v0, v0 ; 7E004F00
v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301
v_rcp_f32_e32 v2, s11 ; 7E04540B
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_sqrt_f32_e32 v3, v9 ; 7E066709
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_mad_f32 v1, v1, v2, -1.0 ; D2820001 03CE0501
v_mul_legacy_f32_e32 v0, 0x3fa66666, v0 ; 0E0000FF 3FA66666
v_exp_f32_e32 v0, v0 ; 7E004B00
v_mad_f32 v1, s9, v0, -|v1| ; D2820401 84060009
v_mul_f32_e32 v2, s9, v0 ; 10040009
v_rcp_f32_e32 v2, v2 ; 7E045502
v_mul_f32_e32 v3, s12, v0 ; 1006000C
v_mul_f32_e32 v4, s13, v0 ; 1008000D
v_mul_f32_e32 v0, s14, v0 ; 1000000E
v_mul_f32_e32 v1, v2, v1 ; 10020302
v_mul_f32_e32 v2, 0.5, v3 ; 100406F0
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_log_f32_e32 v1, v1 ; 7E024F01
v_mul_f32_e32 v3, 0.5, v4 ; 100608F0
v_cvt_pkrtz_f16_f32_e32 v2, v2, v3 ; 5E040702
v_mul_f32_e32 v0, 0.5, v0 ; 100000F0
v_mul_legacy_f32_e32 v1, 0x40400000, v1 ; 0E0202FF 40400000
v_exp_f32_e32 v1, v1 ; 7E024B01
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 444 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MAD TEMP[1].xy, IN[2].xyyy, CONST[1].xyyy, CONST[1].zwww
5: MAD TEMP[2].x, TEMP[0].zzzz, CONST[0].zzzz, CONST[0].wwww
6: MOV TEMP[1].z, TEMP[2].xxxx
7: MOV OUT[2], TEMP[1]
8: MOV OUT[0], TEMP[0]
9: MOV OUT[1], IN[1]
10: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = add i32 %5, %7
%38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0
%45 = add i32 %5, %7
%46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45)
%47 = extractelement <4 x float> %46, i32 0
%48 = extractelement <4 x float> %46, i32 1
%49 = extractelement <4 x float> %46, i32 2
%50 = extractelement <4 x float> %46, i32 3
%51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0
%53 = add i32 %5, %7
%54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53)
%55 = extractelement <4 x float> %54, i32 0
%56 = extractelement <4 x float> %54, i32 1
%57 = fmul float %19, %39
%58 = fmul float %20, %39
%59 = fmul float %21, %39
%60 = fmul float %22, %39
%61 = fmul float %23, %40
%62 = fadd float %61, %57
%63 = fmul float %24, %40
%64 = fadd float %63, %58
%65 = fmul float %25, %40
%66 = fadd float %65, %59
%67 = fmul float %26, %40
%68 = fadd float %67, %60
%69 = fmul float %27, %41
%70 = fadd float %69, %62
%71 = fmul float %28, %41
%72 = fadd float %71, %64
%73 = fmul float %29, %41
%74 = fadd float %73, %66
%75 = fmul float %30, %41
%76 = fadd float %75, %68
%77 = fmul float %31, %42
%78 = fadd float %77, %70
%79 = fmul float %32, %42
%80 = fadd float %79, %72
%81 = fmul float %33, %42
%82 = fadd float %81, %74
%83 = fmul float %34, %42
%84 = fadd float %83, %76
%85 = fmul float %55, %15
%86 = fadd float %85, %17
%87 = fmul float %56, %16
%88 = fadd float %87, %18
%89 = fmul float %82, %13
%90 = fadd float %89, %14
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %49, float %50)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float %90, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %78, float %80, float %82, float %84)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s16, s[0:3], 0x7 ; C2080107
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900
s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
s_buffer_load_dword s8, s[0:3], 0xc ; C204010C
s_buffer_load_dword s9, s[0:3], 0xd ; C204810D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s16 ; 7E000210
s_buffer_load_dword s10, s[0:3], 0xe ; C205010E
s_buffer_load_dword s11, s[0:3], 0xf ; C205810F
s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110
s_buffer_load_dword s13, s[0:3], 0x2 ; C2068102
s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106
s_buffer_load_dword s15, s[0:3], 0x4 ; C2078104
s_buffer_load_dword s16, s[0:3], 0x5 ; C2080105
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v11, s14 ; 7E16020E
v_mac_f32_e32 v11, s15, v9 ; 3E16120F
v_mac_f32_e32 v0, s16, v10 ; 3E001410
s_buffer_load_dword s14, s[0:3], 0x3 ; C2070103
s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111
s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112
s_buffer_load_dword s17, s[0:3], 0x13 ; C2088113
s_buffer_load_dword s18, s[0:3], 0x14 ; C2090114
s_buffer_load_dword s19, s[0:3], 0x15 ; C2098115
s_buffer_load_dword s20, s[0:3], 0x16 ; C20A0116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
v_mul_f32_e32 v9, s4, v1 ; 10120204
v_mac_f32_e32 v9, s8, v2 ; 3E120408
v_mul_f32_e32 v10, s5, v1 ; 10140205
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_mul_f32_e32 v12, s6, v1 ; 10180206
v_mac_f32_e32 v12, s10, v2 ; 3E18040A
v_mul_f32_e32 v1, s7, v1 ; 10020207
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v9, s12, v3 ; 3E12060C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v10, s15, v3 ; 3E14060F
v_mac_f32_e32 v12, s16, v3 ; 3E180610
v_mac_f32_e32 v1, s17, v3 ; 3E020611
v_mac_f32_e32 v9, s18, v4 ; 3E120812
v_mac_f32_e32 v10, s19, v4 ; 3E140813
v_mac_f32_e32 v12, s20, v4 ; 3E180814
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
v_mov_b32_e32 v2, s14 ; 7E04020E
v_mac_f32_e32 v2, s13, v12 ; 3E04180D
v_mov_b32_e32 v3, 0 ; 7E060280
exp 15, 33, 0, 0, 0, v11, v0, v2, v3 ; F800021F 0302000B
exp 15, 12, 0, 1, 0, v9, v10, v12, v1 ; F80008CF 010C0A09
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 268 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..1], LOCAL
0: MOV TEMP[0].xy, IN[1].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MUL TEMP[0], IN[0], TEMP[0]
3: MOV TEMP[1].w, TEMP[0].wwww
4: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[0].wwww
5: MOV_SAT TEMP[0].x, IN[1].zzzz
6: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xxxx
7: MOV OUT[0], TEMP[1]
8: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%33 = bitcast float %30 to i32
%34 = bitcast float %31 to i32
%35 = insertelement <2 x i32> undef, i32 %33, i32 0
%36 = insertelement <2 x i32> %35, i32 %34, i32 1
%37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %36, <32 x i8> %23, <16 x i8> %25, i32 2)
%38 = extractelement <4 x float> %37, i32 0
%39 = extractelement <4 x float> %37, i32 1
%40 = extractelement <4 x float> %37, i32 2
%41 = extractelement <4 x float> %37, i32 3
%42 = fmul float %26, %38
%43 = fmul float %27, %39
%44 = fmul float %28, %40
%45 = fmul float %29, %41
%46 = fmul float %42, %45
%47 = fmul float %43, %45
%48 = fmul float %44, %45
%49 = call float @llvm.AMDIL.clamp.(float %32, float 0.000000e+00, float 1.000000e+00)
%50 = fmul float %46, %49
%51 = fmul float %47, %49
%52 = fmul float %48, %49
%53 = call i32 @llvm.SI.packf16(float %50, float %51)
%54 = bitcast i32 %53 to float
%55 = call i32 @llvm.SI.packf16(float %52, float %45)
%56 = bitcast i32 %55 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %54, float %56, float %54, float %56)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020606
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v1, v6, v2 ; 10020506
v_mul_f32_e32 v2, v7, v3 ; 10040707
v_mul_f32_e32 v3, v8, v4 ; 10060908
v_mul_f32_e32 v4, v9, v5 ; 10080B09
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_mul_f32_e32 v1, v0, v1 ; 10020300
v_mul_f32_e32 v2, v0, v2 ; 10040500
v_mul_f32_e32 v0, v0, v3 ; 10000700
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e32 v0, v0, v4 ; 5E000900
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 12
Code Size: 156 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL CONST[0..4]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { 0.0000, -0.5000, 0.5000, 0.0000}
0: MOV TEMP[0].zw, IMM[0].xxxx
1: ADD TEMP[0].xy, IN[1].xyyy, CONST[0].xyyy
2: MOV TEMP[1].zw, IMM[0].xxxx
3: MAD TEMP[1].xy, CONST[0].xyyy, IMM[0].yyyy, IN[1].xyyy
4: MOV TEMP[2].zw, IMM[0].xxxx
5: MAD TEMP[2].xy, CONST[0].xyyy, IMM[0].zyyy, IN[1].xyyy
6: MOV TEMP[3].zw, IMM[0].xxxx
7: MAD TEMP[3].xy, CONST[0].xyyy, IMM[0].yzzz, IN[1].xyyy
8: MUL TEMP[4], CONST[1], IN[0].xxxx
9: MAD TEMP[4], CONST[2], IN[0].yyyy, TEMP[4]
10: MAD TEMP[4], CONST[3], IN[0].zzzz, TEMP[4]
11: MAD TEMP[4], CONST[4], IN[0].wwww, TEMP[4]
12: MOV OUT[1], TEMP[0]
13: MOV OUT[2], TEMP[1]
14: MOV OUT[3], TEMP[2]
15: MOV OUT[4], TEMP[3]
16: MOV OUT[0], TEMP[4]
17: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = add i32 %5, %7
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0
%41 = add i32 %5, %7
%42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41)
%43 = extractelement <4 x float> %42, i32 0
%44 = extractelement <4 x float> %42, i32 1
%45 = fadd float %43, %13
%46 = fadd float %44, %14
%47 = fmul float %13, -5.000000e-01
%48 = fadd float %47, %43
%49 = fmul float %14, -5.000000e-01
%50 = fadd float %49, %44
%51 = fmul float %13, 5.000000e-01
%52 = fadd float %51, %43
%53 = fmul float %14, -5.000000e-01
%54 = fadd float %53, %44
%55 = fmul float %13, -5.000000e-01
%56 = fadd float %55, %43
%57 = fmul float %14, 5.000000e-01
%58 = fadd float %57, %44
%59 = fmul float %15, %35
%60 = fmul float %16, %35
%61 = fmul float %17, %35
%62 = fmul float %18, %35
%63 = fmul float %19, %36
%64 = fadd float %63, %59
%65 = fmul float %20, %36
%66 = fadd float %65, %60
%67 = fmul float %21, %36
%68 = fadd float %67, %61
%69 = fmul float %22, %36
%70 = fadd float %69, %62
%71 = fmul float %23, %37
%72 = fadd float %71, %64
%73 = fmul float %24, %37
%74 = fadd float %73, %66
%75 = fmul float %25, %37
%76 = fadd float %75, %68
%77 = fmul float %26, %37
%78 = fadd float %77, %70
%79 = fmul float %27, %38
%80 = fadd float %79, %72
%81 = fmul float %28, %38
%82 = fadd float %81, %74
%83 = fmul float %29, %38
%84 = fadd float %83, %76
%85 = fmul float %30, %38
%86 = fadd float %85, %78
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %48, float %50, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %52, float %54, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %56, float %58, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %80, float %82, float %84, float %86)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
v_mov_b32_e32 v0, 0 ; 7E000280
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_waitcnt lgkmcnt(0) ; BF8C007F
v_add_f32_e32 v7, s4, v5 ; 060E0A04
v_add_f32_e32 v8, s5, v6 ; 06100C05
exp 15, 32, 0, 0, 0, v7, v8, v0, v0 ; F800020F 00000807
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v7, -0.5, s4, v5 ; D2820007 041408F1
v_mad_f32 v5, 0.5, s4, v5 ; D2820005 041408F0
v_mad_f32 v8, -0.5, s5, v6 ; D2820008 04180AF1
exp 15, 33, 0, 0, 0, v7, v8, v0, v0 ; F800021F 00000807
exp 15, 34, 0, 0, 0, v5, v8, v0, v0 ; F800022F 00000805
v_mac_f32_e64 v6, 0.5, s5 ; D23E0006 00000AF0
exp 15, 35, 0, 0, 0, v7, v6, v0, v0 ; F800023F 00000607
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109
s_buffer_load_dword s10, s[0:3], 0xa ; C205010A
s_buffer_load_dword s11, s[0:3], 0xb ; C205810B
s_buffer_load_dword s12, s[0:3], 0xc ; C206010C
s_buffer_load_dword s13, s[0:3], 0xd ; C206810D
s_buffer_load_dword s14, s[0:3], 0xe ; C207010E
s_buffer_load_dword s15, s[0:3], 0xf ; C207810F
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s6, v1 ; 10000206
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v5, s7, v1 ; 100A0207
v_mac_f32_e32 v5, s9, v2 ; 3E0A0409
v_mul_f32_e32 v6, s8, v1 ; 100C0208
v_mac_f32_e32 v6, s10, v2 ; 3E0C040A
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v5, s13, v3 ; 3E0A060D
v_mac_f32_e32 v6, s14, v3 ; 3E0C060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v5, s17, v4 ; 3E0A0811
v_mac_f32_e32 v6, s18, v4 ; 3E0C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 280 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 0.2500, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].xy, IN[1].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: ADD TEMP[0], TEMP[0], TEMP[1]
5: MOV TEMP[1].xy, IN[2].xyyy
6: TEX TEMP[1], TEMP[1], SAMP[0], 2D
7: MOV TEMP[2].xy, IN[3].xyyy
8: TEX TEMP[2], TEMP[2], SAMP[0], 2D
9: ADD TEMP[1], TEMP[1], TEMP[2]
10: ADD TEMP[0], TEMP[0], TEMP[1]
11: MUL TEMP[0], TEMP[0], IMM[0].xxxx
12: MAX TEMP[0], TEMP[0], IMM[0].yyyy
13: MOV OUT[0], TEMP[0]
14: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%34 = bitcast float %26 to i32
%35 = bitcast float %27 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = bitcast <8 x i32> %23 to <32 x i8>
%39 = bitcast <4 x i32> %25 to <16 x i8>
%40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = extractelement <4 x float> %40, i32 3
%45 = bitcast float %28 to i32
%46 = bitcast float %29 to i32
%47 = insertelement <2 x i32> undef, i32 %45, i32 0
%48 = insertelement <2 x i32> %47, i32 %46, i32 1
%49 = bitcast <8 x i32> %23 to <32 x i8>
%50 = bitcast <4 x i32> %25 to <16 x i8>
%51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2)
%52 = extractelement <4 x float> %51, i32 0
%53 = extractelement <4 x float> %51, i32 1
%54 = extractelement <4 x float> %51, i32 2
%55 = extractelement <4 x float> %51, i32 3
%56 = fadd float %41, %52
%57 = fadd float %42, %53
%58 = fadd float %43, %54
%59 = fadd float %44, %55
%60 = bitcast float %30 to i32
%61 = bitcast float %31 to i32
%62 = insertelement <2 x i32> undef, i32 %60, i32 0
%63 = insertelement <2 x i32> %62, i32 %61, i32 1
%64 = bitcast <8 x i32> %23 to <32 x i8>
%65 = bitcast <4 x i32> %25 to <16 x i8>
%66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2)
%67 = extractelement <4 x float> %66, i32 0
%68 = extractelement <4 x float> %66, i32 1
%69 = extractelement <4 x float> %66, i32 2
%70 = extractelement <4 x float> %66, i32 3
%71 = bitcast float %32 to i32
%72 = bitcast float %33 to i32
%73 = insertelement <2 x i32> undef, i32 %71, i32 0
%74 = insertelement <2 x i32> %73, i32 %72, i32 1
%75 = bitcast <8 x i32> %23 to <32 x i8>
%76 = bitcast <4 x i32> %25 to <16 x i8>
%77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2)
%78 = extractelement <4 x float> %77, i32 0
%79 = extractelement <4 x float> %77, i32 1
%80 = extractelement <4 x float> %77, i32 2
%81 = extractelement <4 x float> %77, i32 3
%82 = fadd float %67, %78
%83 = fadd float %68, %79
%84 = fadd float %69, %80
%85 = fadd float %70, %81
%86 = fadd float %56, %82
%87 = fadd float %57, %83
%88 = fadd float %58, %84
%89 = fadd float %59, %85
%90 = fmul float %86, 2.500000e-01
%91 = fmul float %87, 2.500000e-01
%92 = fmul float %88, 2.500000e-01
%93 = fmul float %89, 2.500000e-01
%94 = call float @llvm.maxnum.f32(float %90, float 0.000000e+00)
%95 = call float @llvm.maxnum.f32(float %91, float 0.000000e+00)
%96 = call float @llvm.maxnum.f32(float %92, float 0.000000e+00)
%97 = call float @llvm.maxnum.f32(float %93, float 0.000000e+00)
%98 = call i32 @llvm.SI.packf16(float %94, float %95)
%99 = bitcast i32 %98 to float
%100 = call i32 @llvm.SI.packf16(float %96, float %97)
%101 = bitcast i32 %100 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %99, float %101, float %99, float %101)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800
v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801
v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900
v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901
v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00
v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01
v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00
v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020002
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[8:15], s[0:3] ; F0800F00 00020A04
image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020406
image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08
s_waitcnt vmcnt(2) ; BF8C0772
v_add_f32_e32 v0, v10, v0 ; 0600010A
v_add_f32_e32 v1, v11, v1 ; 0602030B
v_add_f32_e32 v2, v12, v2 ; 0604050C
v_add_f32_e32 v3, v13, v3 ; 0606070D
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v4, v14, v4 ; 0608090E
v_add_f32_e32 v5, v15, v5 ; 060A0B0F
v_add_f32_e32 v6, v16, v6 ; 060C0D10
v_add_f32_e32 v7, v17, v7 ; 060E0F11
v_add_f32_e32 v0, v4, v0 ; 06000104
v_add_f32_e32 v1, v5, v1 ; 06020305
v_add_f32_e32 v2, v6, v2 ; 06040506
v_add_f32_e32 v3, v7, v3 ; 06060707
v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_max_f32_e32 v0, 0, v0 ; 20000080
v_max_f32_e32 v1, 0, v1 ; 20020280
v_max_f32_e32 v2, 0, v2 ; 20040480
v_max_f32_e32 v3, 0, v3 ; 20060680
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 20
Code Size: 232 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].zw, IMM[0].xxxx
1: MOV TEMP[0].xy, IN[1].xyxx
2: MOV TEMP[1].zw, IMM[0].xxxx
3: MUL TEMP[2].xy, CONST[0].xyyy, IMM[0].yxxx
4: MUL TEMP[1].xy, TEMP[2].xyyy, CONST[1].xxxx
5: MUL TEMP[2], CONST[2], IN[0].xxxx
6: MAD TEMP[2], CONST[3], IN[0].yyyy, TEMP[2]
7: MAD TEMP[2], CONST[4], IN[0].zzzz, TEMP[2]
8: MAD TEMP[2], CONST[5], IN[0].wwww, TEMP[2]
9: MOV OUT[1], TEMP[0]
10: MOV OUT[2], TEMP[1]
11: MOV OUT[0], TEMP[2]
12: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
%34 = add i32 %5, %7
%35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34)
%36 = extractelement <4 x float> %35, i32 0
%37 = extractelement <4 x float> %35, i32 1
%38 = extractelement <4 x float> %35, i32 2
%39 = extractelement <4 x float> %35, i32 3
%40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = add i32 %5, %7
%43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = fmul float %13, 0.000000e+00
%47 = fmul float %46, %15
%48 = fmul float %14, %15
%49 = fmul float %16, %36
%50 = fmul float %17, %36
%51 = fmul float %18, %36
%52 = fmul float %19, %36
%53 = fmul float %20, %37
%54 = fadd float %53, %49
%55 = fmul float %21, %37
%56 = fadd float %55, %50
%57 = fmul float %22, %37
%58 = fadd float %57, %51
%59 = fmul float %23, %37
%60 = fadd float %59, %52
%61 = fmul float %24, %38
%62 = fadd float %61, %54
%63 = fmul float %25, %38
%64 = fadd float %63, %56
%65 = fmul float %26, %38
%66 = fadd float %65, %58
%67 = fmul float %27, %38
%68 = fadd float %67, %60
%69 = fmul float %28, %39
%70 = fadd float %69, %62
%71 = fmul float %29, %39
%72 = fadd float %71, %64
%73 = fmul float %30, %39
%74 = fadd float %73, %66
%75 = fmul float %31, %39
%76 = fadd float %75, %68
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float 1.000000e+00, float 1.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %47, float %48, float 1.000000e+00, float 1.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109
s_buffer_load_dword s8, s[0:3], 0xa ; C204010A
s_buffer_load_dword s9, s[0:3], 0xb ; C204810B
s_buffer_load_dword s10, s[0:3], 0xc ; C205010C
s_buffer_load_dword s11, s[0:3], 0xd ; C205810D
s_buffer_load_dword s13, s[0:3], 0xe ; C206810E
s_buffer_load_dword s14, s[0:3], 0xf ; C207010F
s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110
s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111
s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112
s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113
s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114
s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115
s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s6, v1 ; 10000206
v_mac_f32_e32 v0, s10, v2 ; 3E00040A
v_mul_f32_e32 v7, s7, v1 ; 100E0207
v_mac_f32_e32 v7, s11, v2 ; 3E0E040B
v_mul_f32_e32 v8, s8, v1 ; 10100208
v_mac_f32_e32 v8, s13, v2 ; 3E10040D
v_mul_f32_e32 v1, s9, v1 ; 10020209
v_mac_f32_e32 v1, s14, v2 ; 3E02040E
v_mac_f32_e32 v0, s15, v3 ; 3E00060F
v_mac_f32_e32 v7, s16, v3 ; 3E0E0610
v_mac_f32_e32 v8, s17, v3 ; 3E100611
v_mac_f32_e32 v1, s18, v3 ; 3E020612
v_mac_f32_e32 v0, s19, v4 ; 3E000813
v_mac_f32_e32 v7, s20, v4 ; 3E0E0814
v_mac_f32_e32 v8, s21, v4 ; 3E100815
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_mov_b32_e32 v2, 1.0 ; 7E0402F2
exp 15, 32, 0, 0, 0, v5, v6, v2, v2 ; F800020F 02020605
v_mul_f32_e64 v3, 0, s12 ; D2100003 00001880
v_mul_f32_e32 v3, s5, v3 ; 10060605
v_mov_b32_e32 v4, s5 ; 7E080205
v_mul_f32_e32 v4, s4, v4 ; 10080804
exp 15, 33, 0, 0, 0, v3, v4, v2, v2 ; F800021F 02020403
exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 232 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 3.0000, 0.0205, 0.0000, 0.0855}
IMM[1] FLT32 { 0.2320, 0.0000, 0.3240, 1.0000}
0: MUL TEMP[0].xy, IN[1].xyyy, IMM[0].xxxx
1: ADD TEMP[0].xy, IN[0].xyyy, -TEMP[0].xyyy
2: MOV TEMP[1].xy, TEMP[0].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MUL TEMP[1], TEMP[1], IMM[0].yyyz
5: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
6: MOV TEMP[2].xy, TEMP[0].xyyy
7: TEX TEMP[2], TEMP[2], SAMP[0], 2D
8: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1]
9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
10: MOV TEMP[2].xy, TEMP[0].xyyy
11: TEX TEMP[2], TEMP[2], SAMP[0], 2D
12: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1]
13: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
14: MOV TEMP[2].xy, TEMP[0].xyyy
15: TEX TEMP[2], TEMP[2], SAMP[0], 2D
16: MAD TEMP[1], TEMP[2], IMM[1].zzzw, TEMP[1]
17: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
18: MOV TEMP[2].xy, TEMP[0].xyyy
19: TEX TEMP[2], TEMP[2], SAMP[0], 2D
20: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1]
21: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
22: MOV TEMP[2].xy, TEMP[0].xyyy
23: TEX TEMP[2], TEMP[2], SAMP[0], 2D
24: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1]
25: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
26: MOV TEMP[0].xy, TEMP[0].xyyy
27: TEX TEMP[0], TEMP[0], SAMP[0], 2D
28: MAD TEMP[1], TEMP[0], IMM[0].yyyz, TEMP[1]
29: MOV OUT[0], TEMP[1]
30: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%30 = fmul float %28, 3.000000e+00
%31 = fmul float %29, 3.000000e+00
%32 = fsub float %26, %30
%33 = fsub float %27, %31
%34 = bitcast float %32 to i32
%35 = bitcast float %33 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = bitcast <8 x i32> %23 to <32 x i8>
%39 = bitcast <4 x i32> %25 to <16 x i8>
%40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = extractelement <4 x float> %40, i32 3
%45 = fmul float %41, 0x3F94FDF3C0000000
%46 = fmul float %42, 0x3F94FDF3C0000000
%47 = fmul float %43, 0x3F94FDF3C0000000
%48 = fmul float %44, 0.000000e+00
%49 = fadd float %32, %28
%50 = fadd float %33, %29
%51 = bitcast float %49 to i32
%52 = bitcast float %50 to i32
%53 = insertelement <2 x i32> undef, i32 %51, i32 0
%54 = insertelement <2 x i32> %53, i32 %52, i32 1
%55 = bitcast <8 x i32> %23 to <32 x i8>
%56 = bitcast <4 x i32> %25 to <16 x i8>
%57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %54, <32 x i8> %55, <16 x i8> %56, i32 2)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = extractelement <4 x float> %57, i32 3
%62 = fmul float %58, 0x3FB5E35400000000
%63 = fadd float %62, %45
%64 = fmul float %59, 0x3FB5E35400000000
%65 = fadd float %64, %46
%66 = fmul float %60, 0x3FB5E35400000000
%67 = fadd float %66, %47
%68 = fmul float %61, 0.000000e+00
%69 = fadd float %68, %48
%70 = fadd float %49, %28
%71 = fadd float %50, %29
%72 = bitcast float %70 to i32
%73 = bitcast float %71 to i32
%74 = insertelement <2 x i32> undef, i32 %72, i32 0
%75 = insertelement <2 x i32> %74, i32 %73, i32 1
%76 = bitcast <8 x i32> %23 to <32 x i8>
%77 = bitcast <4 x i32> %25 to <16 x i8>
%78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2)
%79 = extractelement <4 x float> %78, i32 0
%80 = extractelement <4 x float> %78, i32 1
%81 = extractelement <4 x float> %78, i32 2
%82 = extractelement <4 x float> %78, i32 3
%83 = fmul float %79, 0x3FCDB22D00000000
%84 = fadd float %83, %63
%85 = fmul float %80, 0x3FCDB22D00000000
%86 = fadd float %85, %65
%87 = fmul float %81, 0x3FCDB22D00000000
%88 = fadd float %87, %67
%89 = fmul float %82, 0.000000e+00
%90 = fadd float %89, %69
%91 = fadd float %70, %28
%92 = fadd float %71, %29
%93 = bitcast float %91 to i32
%94 = bitcast float %92 to i32
%95 = insertelement <2 x i32> undef, i32 %93, i32 0
%96 = insertelement <2 x i32> %95, i32 %94, i32 1
%97 = bitcast <8 x i32> %23 to <32 x i8>
%98 = bitcast <4 x i32> %25 to <16 x i8>
%99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = extractelement <4 x float> %99, i32 2
%103 = extractelement <4 x float> %99, i32 3
%104 = fmul float %100, 0x3FD4BC6A80000000
%105 = fadd float %104, %84
%106 = fmul float %101, 0x3FD4BC6A80000000
%107 = fadd float %106, %86
%108 = fmul float %102, 0x3FD4BC6A80000000
%109 = fadd float %108, %88
%110 = fadd float %103, %90
%111 = fadd float %91, %28
%112 = fadd float %92, %29
%113 = bitcast float %111 to i32
%114 = bitcast float %112 to i32
%115 = insertelement <2 x i32> undef, i32 %113, i32 0
%116 = insertelement <2 x i32> %115, i32 %114, i32 1
%117 = bitcast <8 x i32> %23 to <32 x i8>
%118 = bitcast <4 x i32> %25 to <16 x i8>
%119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2)
%120 = extractelement <4 x float> %119, i32 0
%121 = extractelement <4 x float> %119, i32 1
%122 = extractelement <4 x float> %119, i32 2
%123 = extractelement <4 x float> %119, i32 3
%124 = fmul float %120, 0x3FCDB22D00000000
%125 = fadd float %124, %105
%126 = fmul float %121, 0x3FCDB22D00000000
%127 = fadd float %126, %107
%128 = fmul float %122, 0x3FCDB22D00000000
%129 = fadd float %128, %109
%130 = fmul float %123, 0.000000e+00
%131 = fadd float %130, %110
%132 = fadd float %111, %28
%133 = fadd float %112, %29
%134 = bitcast float %132 to i32
%135 = bitcast float %133 to i32
%136 = insertelement <2 x i32> undef, i32 %134, i32 0
%137 = insertelement <2 x i32> %136, i32 %135, i32 1
%138 = bitcast <8 x i32> %23 to <32 x i8>
%139 = bitcast <4 x i32> %25 to <16 x i8>
%140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2)
%141 = extractelement <4 x float> %140, i32 0
%142 = extractelement <4 x float> %140, i32 1
%143 = extractelement <4 x float> %140, i32 2
%144 = extractelement <4 x float> %140, i32 3
%145 = fmul float %141, 0x3FB5E35400000000
%146 = fadd float %145, %125
%147 = fmul float %142, 0x3FB5E35400000000
%148 = fadd float %147, %127
%149 = fmul float %143, 0x3FB5E35400000000
%150 = fadd float %149, %129
%151 = fmul float %144, 0.000000e+00
%152 = fadd float %151, %131
%153 = fadd float %132, %28
%154 = fadd float %133, %29
%155 = bitcast float %153 to i32
%156 = bitcast float %154 to i32
%157 = insertelement <2 x i32> undef, i32 %155, i32 0
%158 = insertelement <2 x i32> %157, i32 %156, i32 1
%159 = bitcast <8 x i32> %23 to <32 x i8>
%160 = bitcast <4 x i32> %25 to <16 x i8>
%161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2)
%162 = extractelement <4 x float> %161, i32 0
%163 = extractelement <4 x float> %161, i32 1
%164 = extractelement <4 x float> %161, i32 2
%165 = extractelement <4 x float> %161, i32 3
%166 = fmul float %162, 0x3F94FDF3C0000000
%167 = fadd float %166, %146
%168 = fmul float %163, 0x3F94FDF3C0000000
%169 = fadd float %168, %148
%170 = fmul float %164, 0x3F94FDF3C0000000
%171 = fadd float %170, %150
%172 = fmul float %165, 0.000000e+00
%173 = fadd float %172, %152
%174 = call i32 @llvm.SI.packf16(float %167, float %169)
%175 = bitcast i32 %174 to float
%176 = call i32 @llvm.SI.packf16(float %171, float %173)
%177 = bitcast i32 %176 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %175, float %177, float %175, float %177)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500
v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501
v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000
v_mad_f32 v5, -v4, v1, v2 ; D2820005 240A0304
v_mad_f32 v6, -v0, v1, v3 ; D2820006 240E0300
v_mov_b32_e32 v1, 0x3ca7ef9e ; 7E0202FF 3CA7EF9E
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020705
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v2, v1, v7 ; 10040F01
v_mul_f32_e32 v3, v1, v8 ; 10061101
v_mul_f32_e32 v7, v1, v9 ; 100E1301
v_mul_f32_e32 v8, 0, v10 ; 10101480
v_add_f32_e32 v9, v4, v5 ; 06120B04
v_add_f32_e32 v10, v0, v6 ; 06140D00
v_mov_b32_e32 v5, 0x3daf1aa0 ; 7E0A02FF 3DAF1AA0
image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020B09
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v5, v11 ; 3E041705
v_mac_f32_e32 v3, v5, v12 ; 3E061905
v_mac_f32_e32 v7, v5, v13 ; 3E0E1B05
v_mac_f32_e32 v8, 0, v14 ; 3E101C80
v_add_f32_e32 v11, v4, v9 ; 06161304
v_add_f32_e32 v12, v0, v10 ; 06181500
v_mov_b32_e32 v6, 0x3e6d9168 ; 7E0C02FF 3E6D9168
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020D0B
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v6, v13 ; 3E041B06
v_mac_f32_e32 v3, v6, v14 ; 3E061D06
v_mac_f32_e32 v7, v6, v15 ; 3E0E1F06
v_mac_f32_e32 v8, 0, v16 ; 3E102080
v_add_f32_e32 v9, v4, v11 ; 06121704
v_add_f32_e32 v10, v0, v12 ; 06141900
v_mov_b32_e32 v11, 0x3ea5e354 ; 7E1602FF 3EA5E354
image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020C09
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v11, v12 ; 3E04190B
v_mac_f32_e32 v3, v11, v13 ; 3E061B0B
v_mac_f32_e32 v7, v11, v14 ; 3E0E1D0B
v_add_f32_e32 v8, v8, v15 ; 06101F08
v_add_f32_e32 v11, v4, v9 ; 06161304
v_add_f32_e32 v12, v0, v10 ; 06181500
v_add_f32_e32 v9, v4, v11 ; 06121704
v_add_f32_e32 v10, v0, v12 ; 06141900
v_add_f32_e32 v13, v4, v9 ; 061A1304
v_add_f32_e32 v14, v0, v10 ; 061C1500
image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020F0B
image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020909
image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800F00 0002130D
s_waitcnt vmcnt(2) ; BF8C0772
v_mac_f32_e32 v2, v6, v15 ; 3E041F06
v_mac_f32_e32 v3, v6, v16 ; 3E062106
v_mac_f32_e32 v7, v6, v17 ; 3E0E2306
v_mac_f32_e32 v8, 0, v18 ; 3E102480
s_waitcnt vmcnt(1) ; BF8C0771
v_mac_f32_e32 v2, v5, v9 ; 3E041305
v_mac_f32_e32 v3, v5, v10 ; 3E061505
v_mac_f32_e32 v7, v5, v11 ; 3E0E1705
v_mac_f32_e32 v8, 0, v12 ; 3E101880
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v1, v19 ; 3E042701
v_mac_f32_e32 v3, v1, v20 ; 3E062901
v_mac_f32_e32 v7, v1, v21 ; 3E0E2B01
v_mac_f32_e32 v8, 0, v22 ; 3E102C80
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 24
Code Size: 372 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].zw, IMM[0].xxxx
1: MOV TEMP[0].xy, IN[1].xyxx
2: MOV TEMP[1].zw, IMM[0].xxxx
3: MUL TEMP[2].xy, CONST[0].xyyy, IMM[0].xyyy
4: MUL TEMP[1].xy, TEMP[2].xyyy, CONST[1].xxxx
5: MUL TEMP[2], CONST[2], IN[0].xxxx
6: MAD TEMP[2], CONST[3], IN[0].yyyy, TEMP[2]
7: MAD TEMP[2], CONST[4], IN[0].zzzz, TEMP[2]
8: MAD TEMP[2], CONST[5], IN[0].wwww, TEMP[2]
9: MOV OUT[1], TEMP[0]
10: MOV OUT[2], TEMP[1]
11: MOV OUT[0], TEMP[2]
12: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
%34 = add i32 %5, %7
%35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34)
%36 = extractelement <4 x float> %35, i32 0
%37 = extractelement <4 x float> %35, i32 1
%38 = extractelement <4 x float> %35, i32 2
%39 = extractelement <4 x float> %35, i32 3
%40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = add i32 %5, %7
%43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = fmul float %14, 0.000000e+00
%47 = fmul float %13, %15
%48 = fmul float %46, %15
%49 = fmul float %16, %36
%50 = fmul float %17, %36
%51 = fmul float %18, %36
%52 = fmul float %19, %36
%53 = fmul float %20, %37
%54 = fadd float %53, %49
%55 = fmul float %21, %37
%56 = fadd float %55, %50
%57 = fmul float %22, %37
%58 = fadd float %57, %51
%59 = fmul float %23, %37
%60 = fadd float %59, %52
%61 = fmul float %24, %38
%62 = fadd float %61, %54
%63 = fmul float %25, %38
%64 = fadd float %63, %56
%65 = fmul float %26, %38
%66 = fadd float %65, %58
%67 = fmul float %27, %38
%68 = fadd float %67, %60
%69 = fmul float %28, %39
%70 = fadd float %69, %62
%71 = fmul float %29, %39
%72 = fadd float %71, %64
%73 = fmul float %30, %39
%74 = fadd float %73, %66
%75 = fmul float %31, %39
%76 = fadd float %75, %68
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float 1.000000e+00, float 1.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %47, float %48, float 1.000000e+00, float 1.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109
s_buffer_load_dword s8, s[0:3], 0xa ; C204010A
s_buffer_load_dword s9, s[0:3], 0xb ; C204810B
s_buffer_load_dword s10, s[0:3], 0xc ; C205010C
s_buffer_load_dword s11, s[0:3], 0xd ; C205810D
s_buffer_load_dword s13, s[0:3], 0xe ; C206810E
s_buffer_load_dword s14, s[0:3], 0xf ; C207010F
s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110
s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111
s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112
s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113
s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114
s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115
s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s6, v1 ; 10000206
v_mac_f32_e32 v0, s10, v2 ; 3E00040A
v_mul_f32_e32 v7, s7, v1 ; 100E0207
v_mac_f32_e32 v7, s11, v2 ; 3E0E040B
v_mul_f32_e32 v8, s8, v1 ; 10100208
v_mac_f32_e32 v8, s13, v2 ; 3E10040D
v_mul_f32_e32 v1, s9, v1 ; 10020209
v_mac_f32_e32 v1, s14, v2 ; 3E02040E
v_mac_f32_e32 v0, s15, v3 ; 3E00060F
v_mac_f32_e32 v7, s16, v3 ; 3E0E0610
v_mac_f32_e32 v8, s17, v3 ; 3E100611
v_mac_f32_e32 v1, s18, v3 ; 3E020612
v_mac_f32_e32 v0, s19, v4 ; 3E000813
v_mac_f32_e32 v7, s20, v4 ; 3E0E0814
v_mac_f32_e32 v8, s21, v4 ; 3E100815
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_mov_b32_e32 v2, 1.0 ; 7E0402F2
exp 15, 32, 0, 0, 0, v5, v6, v2, v2 ; F800020F 02020605
v_mul_f32_e64 v3, 0, s4 ; D2100003 00000880
v_mul_f32_e32 v3, s5, v3 ; 10060605
v_mov_b32_e32 v4, s5 ; 7E080205
v_mul_f32_e32 v4, s12, v4 ; 1008080C
exp 15, 33, 0, 0, 0, v4, v3, v2, v2 ; F800021F 02020304
exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 232 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 3.0000, 0.0205, 0.0000, 0.0855}
IMM[1] FLT32 { 0.2320, 0.0000, 0.3240, 1.0000}
0: MUL TEMP[0].xy, IN[1].xyyy, IMM[0].xxxx
1: ADD TEMP[0].xy, IN[0].xyyy, -TEMP[0].xyyy
2: MOV TEMP[1].xy, TEMP[0].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MUL TEMP[1], TEMP[1], IMM[0].yyyz
5: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
6: MOV TEMP[2].xy, TEMP[0].xyyy
7: TEX TEMP[2], TEMP[2], SAMP[0], 2D
8: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1]
9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
10: MOV TEMP[2].xy, TEMP[0].xyyy
11: TEX TEMP[2], TEMP[2], SAMP[0], 2D
12: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1]
13: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
14: MOV TEMP[2].xy, TEMP[0].xyyy
15: TEX TEMP[2], TEMP[2], SAMP[0], 2D
16: MAD TEMP[1], TEMP[2], IMM[1].zzzw, TEMP[1]
17: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
18: MOV TEMP[2].xy, TEMP[0].xyyy
19: TEX TEMP[2], TEMP[2], SAMP[0], 2D
20: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1]
21: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
22: MOV TEMP[2].xy, TEMP[0].xyyy
23: TEX TEMP[2], TEMP[2], SAMP[0], 2D
24: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1]
25: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
26: MOV TEMP[0].xy, TEMP[0].xyyy
27: TEX TEMP[0], TEMP[0], SAMP[0], 2D
28: MAD TEMP[1], TEMP[0], IMM[0].yyyz, TEMP[1]
29: MOV OUT[0], TEMP[1]
30: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%30 = fmul float %28, 3.000000e+00
%31 = fmul float %29, 3.000000e+00
%32 = fsub float %26, %30
%33 = fsub float %27, %31
%34 = bitcast float %32 to i32
%35 = bitcast float %33 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = bitcast <8 x i32> %23 to <32 x i8>
%39 = bitcast <4 x i32> %25 to <16 x i8>
%40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = extractelement <4 x float> %40, i32 3
%45 = fmul float %41, 0x3F94FDF3C0000000
%46 = fmul float %42, 0x3F94FDF3C0000000
%47 = fmul float %43, 0x3F94FDF3C0000000
%48 = fmul float %44, 0.000000e+00
%49 = fadd float %32, %28
%50 = fadd float %33, %29
%51 = bitcast float %49 to i32
%52 = bitcast float %50 to i32
%53 = insertelement <2 x i32> undef, i32 %51, i32 0
%54 = insertelement <2 x i32> %53, i32 %52, i32 1
%55 = bitcast <8 x i32> %23 to <32 x i8>
%56 = bitcast <4 x i32> %25 to <16 x i8>
%57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %54, <32 x i8> %55, <16 x i8> %56, i32 2)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = extractelement <4 x float> %57, i32 3
%62 = fmul float %58, 0x3FB5E35400000000
%63 = fadd float %62, %45
%64 = fmul float %59, 0x3FB5E35400000000
%65 = fadd float %64, %46
%66 = fmul float %60, 0x3FB5E35400000000
%67 = fadd float %66, %47
%68 = fmul float %61, 0.000000e+00
%69 = fadd float %68, %48
%70 = fadd float %49, %28
%71 = fadd float %50, %29
%72 = bitcast float %70 to i32
%73 = bitcast float %71 to i32
%74 = insertelement <2 x i32> undef, i32 %72, i32 0
%75 = insertelement <2 x i32> %74, i32 %73, i32 1
%76 = bitcast <8 x i32> %23 to <32 x i8>
%77 = bitcast <4 x i32> %25 to <16 x i8>
%78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2)
%79 = extractelement <4 x float> %78, i32 0
%80 = extractelement <4 x float> %78, i32 1
%81 = extractelement <4 x float> %78, i32 2
%82 = extractelement <4 x float> %78, i32 3
%83 = fmul float %79, 0x3FCDB22D00000000
%84 = fadd float %83, %63
%85 = fmul float %80, 0x3FCDB22D00000000
%86 = fadd float %85, %65
%87 = fmul float %81, 0x3FCDB22D00000000
%88 = fadd float %87, %67
%89 = fmul float %82, 0.000000e+00
%90 = fadd float %89, %69
%91 = fadd float %70, %28
%92 = fadd float %71, %29
%93 = bitcast float %91 to i32
%94 = bitcast float %92 to i32
%95 = insertelement <2 x i32> undef, i32 %93, i32 0
%96 = insertelement <2 x i32> %95, i32 %94, i32 1
%97 = bitcast <8 x i32> %23 to <32 x i8>
%98 = bitcast <4 x i32> %25 to <16 x i8>
%99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = extractelement <4 x float> %99, i32 2
%103 = extractelement <4 x float> %99, i32 3
%104 = fmul float %100, 0x3FD4BC6A80000000
%105 = fadd float %104, %84
%106 = fmul float %101, 0x3FD4BC6A80000000
%107 = fadd float %106, %86
%108 = fmul float %102, 0x3FD4BC6A80000000
%109 = fadd float %108, %88
%110 = fadd float %103, %90
%111 = fadd float %91, %28
%112 = fadd float %92, %29
%113 = bitcast float %111 to i32
%114 = bitcast float %112 to i32
%115 = insertelement <2 x i32> undef, i32 %113, i32 0
%116 = insertelement <2 x i32> %115, i32 %114, i32 1
%117 = bitcast <8 x i32> %23 to <32 x i8>
%118 = bitcast <4 x i32> %25 to <16 x i8>
%119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2)
%120 = extractelement <4 x float> %119, i32 0
%121 = extractelement <4 x float> %119, i32 1
%122 = extractelement <4 x float> %119, i32 2
%123 = extractelement <4 x float> %119, i32 3
%124 = fmul float %120, 0x3FCDB22D00000000
%125 = fadd float %124, %105
%126 = fmul float %121, 0x3FCDB22D00000000
%127 = fadd float %126, %107
%128 = fmul float %122, 0x3FCDB22D00000000
%129 = fadd float %128, %109
%130 = fmul float %123, 0.000000e+00
%131 = fadd float %130, %110
%132 = fadd float %111, %28
%133 = fadd float %112, %29
%134 = bitcast float %132 to i32
%135 = bitcast float %133 to i32
%136 = insertelement <2 x i32> undef, i32 %134, i32 0
%137 = insertelement <2 x i32> %136, i32 %135, i32 1
%138 = bitcast <8 x i32> %23 to <32 x i8>
%139 = bitcast <4 x i32> %25 to <16 x i8>
%140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2)
%141 = extractelement <4 x float> %140, i32 0
%142 = extractelement <4 x float> %140, i32 1
%143 = extractelement <4 x float> %140, i32 2
%144 = extractelement <4 x float> %140, i32 3
%145 = fmul float %141, 0x3FB5E35400000000
%146 = fadd float %145, %125
%147 = fmul float %142, 0x3FB5E35400000000
%148 = fadd float %147, %127
%149 = fmul float %143, 0x3FB5E35400000000
%150 = fadd float %149, %129
%151 = fmul float %144, 0.000000e+00
%152 = fadd float %151, %131
%153 = fadd float %132, %28
%154 = fadd float %133, %29
%155 = bitcast float %153 to i32
%156 = bitcast float %154 to i32
%157 = insertelement <2 x i32> undef, i32 %155, i32 0
%158 = insertelement <2 x i32> %157, i32 %156, i32 1
%159 = bitcast <8 x i32> %23 to <32 x i8>
%160 = bitcast <4 x i32> %25 to <16 x i8>
%161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2)
%162 = extractelement <4 x float> %161, i32 0
%163 = extractelement <4 x float> %161, i32 1
%164 = extractelement <4 x float> %161, i32 2
%165 = extractelement <4 x float> %161, i32 3
%166 = fmul float %162, 0x3F94FDF3C0000000
%167 = fadd float %166, %146
%168 = fmul float %163, 0x3F94FDF3C0000000
%169 = fadd float %168, %148
%170 = fmul float %164, 0x3F94FDF3C0000000
%171 = fadd float %170, %150
%172 = fmul float %165, 0.000000e+00
%173 = fadd float %172, %152
%174 = call i32 @llvm.SI.packf16(float %167, float %169)
%175 = bitcast i32 %174 to float
%176 = call i32 @llvm.SI.packf16(float %171, float %173)
%177 = bitcast i32 %176 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %175, float %177, float %175, float %177)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500
v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501
v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000
v_mad_f32 v5, -v4, v1, v2 ; D2820005 240A0304
v_mad_f32 v6, -v0, v1, v3 ; D2820006 240E0300
v_mov_b32_e32 v1, 0x3ca7ef9e ; 7E0202FF 3CA7EF9E
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020705
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v2, v1, v7 ; 10040F01
v_mul_f32_e32 v3, v1, v8 ; 10061101
v_mul_f32_e32 v7, v1, v9 ; 100E1301
v_mul_f32_e32 v8, 0, v10 ; 10101480
v_add_f32_e32 v9, v4, v5 ; 06120B04
v_add_f32_e32 v10, v0, v6 ; 06140D00
v_mov_b32_e32 v5, 0x3daf1aa0 ; 7E0A02FF 3DAF1AA0
image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020B09
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v5, v11 ; 3E041705
v_mac_f32_e32 v3, v5, v12 ; 3E061905
v_mac_f32_e32 v7, v5, v13 ; 3E0E1B05
v_mac_f32_e32 v8, 0, v14 ; 3E101C80
v_add_f32_e32 v11, v4, v9 ; 06161304
v_add_f32_e32 v12, v0, v10 ; 06181500
v_mov_b32_e32 v6, 0x3e6d9168 ; 7E0C02FF 3E6D9168
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020D0B
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v6, v13 ; 3E041B06
v_mac_f32_e32 v3, v6, v14 ; 3E061D06
v_mac_f32_e32 v7, v6, v15 ; 3E0E1F06
v_mac_f32_e32 v8, 0, v16 ; 3E102080
v_add_f32_e32 v9, v4, v11 ; 06121704
v_add_f32_e32 v10, v0, v12 ; 06141900
v_mov_b32_e32 v11, 0x3ea5e354 ; 7E1602FF 3EA5E354
image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020C09
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v11, v12 ; 3E04190B
v_mac_f32_e32 v3, v11, v13 ; 3E061B0B
v_mac_f32_e32 v7, v11, v14 ; 3E0E1D0B
v_add_f32_e32 v8, v8, v15 ; 06101F08
v_add_f32_e32 v11, v4, v9 ; 06161304
v_add_f32_e32 v12, v0, v10 ; 06181500
v_add_f32_e32 v9, v4, v11 ; 06121704
v_add_f32_e32 v10, v0, v12 ; 06141900
v_add_f32_e32 v13, v4, v9 ; 061A1304
v_add_f32_e32 v14, v0, v10 ; 061C1500
image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020F0B
image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020909
image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800F00 0002130D
s_waitcnt vmcnt(2) ; BF8C0772
v_mac_f32_e32 v2, v6, v15 ; 3E041F06
v_mac_f32_e32 v3, v6, v16 ; 3E062106
v_mac_f32_e32 v7, v6, v17 ; 3E0E2306
v_mac_f32_e32 v8, 0, v18 ; 3E102480
s_waitcnt vmcnt(1) ; BF8C0771
v_mac_f32_e32 v2, v5, v9 ; 3E041305
v_mac_f32_e32 v3, v5, v10 ; 3E061505
v_mac_f32_e32 v7, v5, v11 ; 3E0E1705
v_mac_f32_e32 v8, 0, v12 ; 3E101880
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v1, v19 ; 3E042701
v_mac_f32_e32 v3, v1, v20 ; 3E062901
v_mac_f32_e32 v7, v1, v21 ; 3E0E2B01
v_mac_f32_e32 v8, 0, v22 ; 3E102C80
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 24
Code Size: 372 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].zw, IMM[0].xxxx
1: MOV TEMP[0].xy, IN[1].xyxx
2: MUL TEMP[1], CONST[0], IN[0].xxxx
3: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1]
4: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1]
5: MAD TEMP[1], CONST[3], IN[0].wwww, TEMP[1]
6: MOV OUT[1], TEMP[0]
7: MOV OUT[0], TEMP[1]
8: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 1.000000e+00, float 1.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 1.0 ; 7E0202F2
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SAMP[6]
DCL SAMP[7]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL SVIEW[6], 2D, FLOAT
DCL SVIEW[7], 2D, FLOAT
DCL CONST[8..9]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.2300, 0.3500, 0.4500, 0.6000}
IMM[1] FLT32 { 0.5000, 0.4545, 0.8000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].w, TEMP[0].wwww
3: MOV TEMP[2].xy, IN[0].xyyy
4: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D
5: MOV TEMP[3].xy, IN[0].xyyy
6: TEX TEMP[3].xyz, TEMP[3], SAMP[2], 2D
7: MOV TEMP[4].xy, IN[0].xyyy
8: TEX TEMP[4].xyz, TEMP[4], SAMP[3], 2D
9: MOV TEMP[5].xy, IN[0].xyyy
10: TEX TEMP[5].xyz, TEMP[5], SAMP[4], 2D
11: MOV TEMP[6].xy, IN[0].xyyy
12: TEX TEMP[6].xyz, TEMP[6], SAMP[5], 2D
13: MOV TEMP[7].xy, IN[0].xyyy
14: TEX TEMP[7].xyz, TEMP[7], SAMP[6], 2D
15: MUL TEMP[8].xyz, TEMP[2].xyzz, IMM[1].xxxx
16: MAD TEMP[8].xyz, IMM[0].wwww, TEMP[3].xyzz, TEMP[8].xyzz
17: MAD TEMP[8].xyz, TEMP[4].xyzz, IMM[0].wwww, TEMP[8].xyzz
18: MAD TEMP[8].xyz, TEMP[5].xyzz, IMM[0].zzzz, TEMP[8].xyzz
19: MAD TEMP[8].xyz, TEMP[6].xyzz, IMM[0].yyyy, TEMP[8].xyzz
20: MAD TEMP[8].xyz, TEMP[7].xyzz, IMM[0].xxxx, TEMP[8].xyzz
21: MUL TEMP[8].xyz, TEMP[8].xyzz, IMM[1].yyyy
22: LRP TEMP[1].xyz, CONST[8].xxxx, TEMP[8].xyzz, TEMP[0].xyzz
23: MAD TEMP[0].xyz, TEMP[3].xyzz, IMM[1].zzzz, TEMP[2].xyzz
24: MAD TEMP[0].xyz, TEMP[4].xyzz, IMM[0].wwww, TEMP[0].xyzz
25: MAD TEMP[0].xyz, TEMP[5].xyzz, IMM[0].zzzz, TEMP[0].xyzz
26: MAD TEMP[0].xyz, TEMP[6].xyzz, IMM[0].yyyy, TEMP[0].xyzz
27: MAD TEMP[0].xyz, TEMP[7].xyzz, IMM[0].xxxx, TEMP[0].xyzz
28: MOV TEMP[2].xy, IN[0].xyyy
29: TEX TEMP[2].xyz, TEMP[2], SAMP[7], 2D
30: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[9].xxxx
31: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz
32: LRP TEMP[1].xyz, TEMP[2].xyzz, TEMP[0].xyzz, TEMP[1].xyzz
33: MOV OUT[0], TEMP[1]
34: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0
%28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0
%30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)*
%32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0
%33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)*
%35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0
%36 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%37 = bitcast <8 x i32> addrspace(2)* %36 to <32 x i8> addrspace(2)*
%38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0
%39 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%40 = bitcast <4 x i32> addrspace(2)* %39 to <16 x i8> addrspace(2)*
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)*
%44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0
%45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)*
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)*
%50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0
%51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)*
%53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0
%54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)*
%56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0
%57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)*
%59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0
%60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6
%61 = bitcast <8 x i32> addrspace(2)* %60 to <32 x i8> addrspace(2)*
%62 = load <32 x i8>, <32 x i8> addrspace(2)* %61, align 32, !tbaa !0
%63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6
%64 = bitcast <4 x i32> addrspace(2)* %63 to <16 x i8> addrspace(2)*
%65 = load <16 x i8>, <16 x i8> addrspace(2)* %64, align 16, !tbaa !0
%66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7
%67 = bitcast <8 x i32> addrspace(2)* %66 to <32 x i8> addrspace(2)*
%68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0
%69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7
%70 = bitcast <4 x i32> addrspace(2)* %69 to <16 x i8> addrspace(2)*
%71 = load <16 x i8>, <16 x i8> addrspace(2)* %70, align 16, !tbaa !0
%72 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%74 = bitcast float %72 to i32
%75 = bitcast float %73 to i32
%76 = insertelement <2 x i32> undef, i32 %74, i32 0
%77 = insertelement <2 x i32> %76, i32 %75, i32 1
%78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %27, <16 x i8> %29, i32 2)
%79 = extractelement <4 x float> %78, i32 0
%80 = extractelement <4 x float> %78, i32 1
%81 = extractelement <4 x float> %78, i32 2
%82 = extractelement <4 x float> %78, i32 3
%83 = bitcast float %72 to i32
%84 = bitcast float %73 to i32
%85 = insertelement <2 x i32> undef, i32 %83, i32 0
%86 = insertelement <2 x i32> %85, i32 %84, i32 1
%87 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %86, <32 x i8> %32, <16 x i8> %35, i32 2)
%88 = extractelement <4 x float> %87, i32 0
%89 = extractelement <4 x float> %87, i32 1
%90 = extractelement <4 x float> %87, i32 2
%91 = bitcast float %72 to i32
%92 = bitcast float %73 to i32
%93 = insertelement <2 x i32> undef, i32 %91, i32 0
%94 = insertelement <2 x i32> %93, i32 %92, i32 1
%95 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %38, <16 x i8> %41, i32 2)
%96 = extractelement <4 x float> %95, i32 0
%97 = extractelement <4 x float> %95, i32 1
%98 = extractelement <4 x float> %95, i32 2
%99 = bitcast float %72 to i32
%100 = bitcast float %73 to i32
%101 = insertelement <2 x i32> undef, i32 %99, i32 0
%102 = insertelement <2 x i32> %101, i32 %100, i32 1
%103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %44, <16 x i8> %47, i32 2)
%104 = extractelement <4 x float> %103, i32 0
%105 = extractelement <4 x float> %103, i32 1
%106 = extractelement <4 x float> %103, i32 2
%107 = bitcast float %72 to i32
%108 = bitcast float %73 to i32
%109 = insertelement <2 x i32> undef, i32 %107, i32 0
%110 = insertelement <2 x i32> %109, i32 %108, i32 1
%111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %110, <32 x i8> %50, <16 x i8> %53, i32 2)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = extractelement <4 x float> %111, i32 2
%115 = bitcast float %72 to i32
%116 = bitcast float %73 to i32
%117 = insertelement <2 x i32> undef, i32 %115, i32 0
%118 = insertelement <2 x i32> %117, i32 %116, i32 1
%119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %56, <16 x i8> %59, i32 2)
%120 = extractelement <4 x float> %119, i32 0
%121 = extractelement <4 x float> %119, i32 1
%122 = extractelement <4 x float> %119, i32 2
%123 = bitcast float %72 to i32
%124 = bitcast float %73 to i32
%125 = insertelement <2 x i32> undef, i32 %123, i32 0
%126 = insertelement <2 x i32> %125, i32 %124, i32 1
%127 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %62, <16 x i8> %65, i32 2)
%128 = extractelement <4 x float> %127, i32 0
%129 = extractelement <4 x float> %127, i32 1
%130 = extractelement <4 x float> %127, i32 2
%131 = fmul float %88, 5.000000e-01
%132 = fmul float %89, 5.000000e-01
%133 = fmul float %90, 5.000000e-01
%134 = fmul float %96, 0x3FE3333340000000
%135 = fadd float %134, %131
%136 = fmul float %97, 0x3FE3333340000000
%137 = fadd float %136, %132
%138 = fmul float %98, 0x3FE3333340000000
%139 = fadd float %138, %133
%140 = fmul float %104, 0x3FE3333340000000
%141 = fadd float %140, %135
%142 = fmul float %105, 0x3FE3333340000000
%143 = fadd float %142, %137
%144 = fmul float %106, 0x3FE3333340000000
%145 = fadd float %144, %139
%146 = fmul float %112, 0x3FDCCCCCC0000000
%147 = fadd float %146, %141
%148 = fmul float %113, 0x3FDCCCCCC0000000
%149 = fadd float %148, %143
%150 = fmul float %114, 0x3FDCCCCCC0000000
%151 = fadd float %150, %145
%152 = fmul float %120, 0x3FD6666660000000
%153 = fadd float %152, %147
%154 = fmul float %121, 0x3FD6666660000000
%155 = fadd float %154, %149
%156 = fmul float %122, 0x3FD6666660000000
%157 = fadd float %156, %151
%158 = fmul float %128, 0x3FCD70A3E0000000
%159 = fadd float %158, %153
%160 = fmul float %129, 0x3FCD70A3E0000000
%161 = fadd float %160, %155
%162 = fmul float %130, 0x3FCD70A3E0000000
%163 = fadd float %162, %157
%164 = fmul float %159, 0x3FDD1745C0000000
%165 = fmul float %161, 0x3FDD1745C0000000
%166 = fmul float %163, 0x3FDD1745C0000000
%167 = call float @llvm.AMDGPU.lrp(float %24, float %164, float %79)
%168 = call float @llvm.AMDGPU.lrp(float %24, float %165, float %80)
%169 = call float @llvm.AMDGPU.lrp(float %24, float %166, float %81)
%170 = fmul float %96, 0x3FE99999A0000000
%171 = fadd float %170, %88
%172 = fmul float %97, 0x3FE99999A0000000
%173 = fadd float %172, %89
%174 = fmul float %98, 0x3FE99999A0000000
%175 = fadd float %174, %90
%176 = fmul float %104, 0x3FE3333340000000
%177 = fadd float %176, %171
%178 = fmul float %105, 0x3FE3333340000000
%179 = fadd float %178, %173
%180 = fmul float %106, 0x3FE3333340000000
%181 = fadd float %180, %175
%182 = fmul float %112, 0x3FDCCCCCC0000000
%183 = fadd float %182, %177
%184 = fmul float %113, 0x3FDCCCCCC0000000
%185 = fadd float %184, %179
%186 = fmul float %114, 0x3FDCCCCCC0000000
%187 = fadd float %186, %181
%188 = fmul float %120, 0x3FD6666660000000
%189 = fadd float %188, %183
%190 = fmul float %121, 0x3FD6666660000000
%191 = fadd float %190, %185
%192 = fmul float %122, 0x3FD6666660000000
%193 = fadd float %192, %187
%194 = fmul float %128, 0x3FCD70A3E0000000
%195 = fadd float %194, %189
%196 = fmul float %129, 0x3FCD70A3E0000000
%197 = fadd float %196, %191
%198 = fmul float %130, 0x3FCD70A3E0000000
%199 = fadd float %198, %193
%200 = bitcast float %72 to i32
%201 = bitcast float %73 to i32
%202 = insertelement <2 x i32> undef, i32 %200, i32 0
%203 = insertelement <2 x i32> %202, i32 %201, i32 1
%204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %68, <16 x i8> %71, i32 2)
%205 = extractelement <4 x float> %204, i32 0
%206 = extractelement <4 x float> %204, i32 1
%207 = extractelement <4 x float> %204, i32 2
%208 = fmul float %205, %25
%209 = fmul float %206, %25
%210 = fmul float %207, %25
%211 = call float @llvm.AMDIL.clamp.(float %208, float 0.000000e+00, float 1.000000e+00)
%212 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00)
%213 = call float @llvm.AMDIL.clamp.(float %210, float 0.000000e+00, float 1.000000e+00)
%214 = call float @llvm.AMDGPU.lrp(float %211, float %195, float %167)
%215 = call float @llvm.AMDGPU.lrp(float %212, float %197, float %168)
%216 = call float @llvm.AMDGPU.lrp(float %213, float %199, float %169)
%217 = call i32 @llvm.SI.packf16(float %214, float %215)
%218 = bitcast i32 %217 to float
%219 = call i32 @llvm.SI.packf16(float %216, float %82)
%220 = bitcast i32 %219 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %218, float %220, float %218, float %220)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_mov_b32_e32 v2, 0x3f19999a ; 7E0402FF 3F19999A
v_mov_b32_e32 v3, 0x3ee66666 ; 7E0602FF 3EE66666
v_mov_b32_e32 v4, 0x3eb33333 ; 7E0802FF 3EB33333
v_mov_b32_e32 v5, 0x3e6b851f ; 7E0A02FF 3E6B851F
v_mov_b32_e32 v6, 0x3ee8ba2e ; 7E0C02FF 3EE8BA2E
v_mov_b32_e32 v7, 0x3f4ccccd ; 7E0E02FF 3F4CCCCD
v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000
v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001
v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100
v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500
s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504
s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508
s_load_dwordx4 s[40:43], s[4:5], 0xc ; C094050C
s_load_dwordx4 s[44:47], s[4:5], 0x10 ; C0960510
s_load_dwordx4 s[48:51], s[4:5], 0x14 ; C0980514
s_load_dwordx4 s[0:3], s[4:5], 0x18 ; C0800518
s_load_dwordx4 s[8:11], s[4:5], 0x1c ; C084051C
s_load_dwordx8 s[52:59], s[6:7], 0x0 ; C0DA0700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[12:15], 0x20 ; C2020D20
s_buffer_load_dword s5, s[12:15], 0x24 ; C2028D24
s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708
s_load_dwordx8 s[68:75], s[6:7], 0x10 ; C0E20710
s_load_dwordx8 s[76:83], s[6:7], 0x18 ; C0E60718
s_load_dwordx8 s[84:91], s[6:7], 0x20 ; C0EA0720
s_load_dwordx8 s[92:99], s[6:7], 0x28 ; C0EE0728
s_load_dwordx8 s[12:19], s[6:7], 0x30 ; C0C60730
s_load_dwordx8 s[20:27], s[6:7], 0x38 ; C0CA0738
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[52:59], s[28:31] ; F0800F00 00ED0A08
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e64 v0, 1.0, s4 ; D2080000 000008F2
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v1, v10, v0 ; 1002010A
image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[60:67], s[32:35] ; F0800700 010F0E08
image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[68:75], s[36:39] ; F0800700 01311108
image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[76:83], s[40:43] ; F0800700 01531408
image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[84:91], s[44:47] ; F0800700 01751708
s_waitcnt vmcnt(3) ; BF8C0773
v_mul_f32_e32 v10, 0.5, v14 ; 10141CF0
s_waitcnt vmcnt(2) ; BF8C0772
v_mac_f32_e32 v10, v2, v17 ; 3E142302
s_waitcnt vmcnt(1) ; BF8C0771
v_mac_f32_e32 v10, v2, v20 ; 3E142902
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v10, v3, v23 ; 3E142F03
v_mul_f32_e32 v26, 0.5, v15 ; 10341EF0
v_mac_f32_e32 v26, v2, v18 ; 3E342502
v_mac_f32_e32 v26, v2, v21 ; 3E342B02
v_mac_f32_e32 v26, v3, v24 ; 3E343103
v_mul_f32_e32 v27, 0.5, v16 ; 103620F0
v_mac_f32_e32 v27, v2, v19 ; 3E362702
v_mac_f32_e32 v27, v2, v22 ; 3E362D02
v_mac_f32_e32 v27, v3, v25 ; 3E363303
v_mad_f32 v14, v7, v17, v14 ; D282000E 043A2307
v_mac_f32_e32 v14, v2, v20 ; 3E1C2902
v_mac_f32_e32 v14, v3, v23 ; 3E1C2F03
v_mad_f32 v15, v7, v18, v15 ; D282000F 043E2507
v_mac_f32_e32 v16, v7, v19 ; 3E202707
v_mac_f32_e32 v15, v2, v21 ; 3E1E2B02
v_mac_f32_e32 v16, v2, v22 ; 3E202D02
v_mac_f32_e32 v15, v3, v24 ; 3E1E3103
v_mac_f32_e32 v16, v3, v25 ; 3E203303
image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[92:99], s[48:51] ; F0800700 01971108
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v10, v4, v17 ; 3E142304
v_mac_f32_e32 v26, v4, v18 ; 3E342504
v_mac_f32_e32 v27, v4, v19 ; 3E362704
v_mac_f32_e32 v14, v4, v17 ; 3E1C2304
v_mac_f32_e32 v15, v4, v18 ; 3E1E2504
v_mac_f32_e32 v16, v4, v19 ; 3E202704
image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[12:19], s[0:3] ; F0800700 00030208
image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[20:27], s[8:11] ; F0800700 00450708
s_waitcnt vmcnt(1) ; BF8C0771
v_mac_f32_e32 v10, v5, v2 ; 3E140505
v_mac_f32_e32 v26, v5, v3 ; 3E340705
v_mac_f32_e32 v27, v5, v4 ; 3E360905
v_mul_f32_e32 v10, v6, v10 ; 10141506
v_mac_f32_e32 v1, s4, v10 ; 3E021404
v_mul_f32_e32 v10, v6, v26 ; 10143506
v_mul_f32_e32 v6, v6, v27 ; 100C3706
v_mac_f32_e32 v14, v5, v2 ; 3E1C0505
v_mac_f32_e32 v15, v5, v3 ; 3E1E0705
v_mac_f32_e32 v16, v5, v4 ; 3E200905
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v2, s5, v7 ; 10040E05
v_mul_f32_e32 v3, s5, v8 ; 10061005
v_mul_f32_e32 v4, s5, v9 ; 10081205
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_sub_f32_e32 v5, 1.0, v2 ; 080A04F2
v_mul_f32_e32 v1, v1, v5 ; 10020B01
v_mac_f32_e32 v1, v14, v2 ; 3E02050E
v_mul_f32_e32 v2, v11, v0 ; 1004010B
v_mac_f32_e32 v2, s4, v10 ; 3E041404
v_mul_f32_e32 v0, v12, v0 ; 1000010C
v_mac_f32_e32 v0, s4, v6 ; 3E000C04
v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680
v_sub_f32_e32 v5, 1.0, v3 ; 080A06F2
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_mac_f32_e32 v2, v15, v3 ; 3E04070F
v_sub_f32_e32 v3, 1.0, v4 ; 080608F2
v_mul_f32_e32 v0, v0, v3 ; 10000700
v_mac_f32_e32 v0, v16, v4 ; 3E000910
v_cvt_pkrtz_f16_f32_e32 v0, v0, v13 ; 5E001B00
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 28
Code Size: 528 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[1]
DCL TEMP[0..37], LOCAL
IMM[0] FLT32 { 0.0000, -1.0000, 1.0000, 1.9632}
IMM[1] FLT32 { 0.0417, 0.1250, 0.7500, 0.2500}
IMM[2] FLT32 { -0.2500, 1.3333, 0.1111, -0.5000}
IMM[3] FLT32 { 0.5000, -1.5000, 1.5000, 2.0000}
IMM[4] UINT32 {0, 4294967295, 0, 0}
IMM[5] INT32 {0, 8, 1, 0}
0: MOV TEMP[0].xy, CONST[1].xyxx
1: MOV TEMP[1].zw, IMM[0].xxxx
2: MAD TEMP[1].xy, IMM[0].xyyy, CONST[1].xyyy, IN[0].xyyy
3: MOV TEMP[1].xy, TEMP[1].xyyy
4: MOV TEMP[1].w, IMM[0].xxxx
5: TXL TEMP[1], TEMP[1], SAMP[0], 2D
6: MOV TEMP[2], TEMP[1]
7: MOV TEMP[0].zw, IMM[0].xxxx
8: MAD TEMP[0].xy, IMM[0].yxxx, CONST[1].xyyy, IN[0].xyyy
9: MOV TEMP[0].xy, TEMP[0].xyyy
10: MOV TEMP[0].w, IMM[0].xxxx
11: TXL TEMP[0], TEMP[0], SAMP[0], 2D
12: MOV TEMP[3], TEMP[0]
13: MOV TEMP[4].xy, IN[0].xyyy
14: MOV TEMP[4].w, IMM[0].xxxx
15: TXL TEMP[4], TEMP[4], SAMP[0], 2D
16: MOV TEMP[5], TEMP[4]
17: MOV TEMP[6].zw, IMM[0].xxxx
18: MAD TEMP[6].xy, IMM[0].zxxx, CONST[1].xyyy, IN[0].xyyy
19: MOV TEMP[6].xy, TEMP[6].xyyy
20: MOV TEMP[6].w, IMM[0].xxxx
21: TXL TEMP[6], TEMP[6], SAMP[0], 2D
22: MOV TEMP[7], TEMP[6]
23: MOV TEMP[8].zw, IMM[0].xxxx
24: MAD TEMP[8].xy, IMM[0].xzzz, CONST[1].xyyy, IN[0].xyyy
25: MOV TEMP[8].xy, TEMP[8].xyyy
26: MOV TEMP[8].w, IMM[0].xxxx
27: TXL TEMP[8], TEMP[8], SAMP[0], 2D
28: MOV TEMP[9], TEMP[8]
29: MAD TEMP[10].x, TEMP[1].yyyy, IMM[0].wwww, TEMP[1].xxxx
30: MOV TEMP[2].x, TEMP[10].xxxx
31: MAD TEMP[11].x, TEMP[0].yyyy, IMM[0].wwww, TEMP[0].xxxx
32: MAD TEMP[12].x, TEMP[4].yyyy, IMM[0].wwww, TEMP[4].xxxx
33: MAD TEMP[13].x, TEMP[6].yyyy, IMM[0].wwww, TEMP[6].xxxx
34: MAD TEMP[14].x, TEMP[8].yyyy, IMM[0].wwww, TEMP[8].xxxx
35: MOV TEMP[3].x, TEMP[14].xxxx
36: MAX TEMP[15].x, TEMP[12].xxxx, TEMP[10].xxxx
37: MAX TEMP[16].x, TEMP[11].xxxx, TEMP[14].xxxx
38: MAX TEMP[16].x, TEMP[16].xxxx, TEMP[13].xxxx
39: MAX TEMP[15].x, TEMP[15].xxxx, TEMP[16].xxxx
40: MIN TEMP[16].x, TEMP[12].xxxx, TEMP[10].xxxx
41: MIN TEMP[5].x, TEMP[11].xxxx, TEMP[14].xxxx
42: MIN TEMP[5].x, TEMP[5].xxxx, TEMP[13].xxxx
43: MIN TEMP[16].x, TEMP[16].xxxx, TEMP[5].xxxx
44: ADD TEMP[16].x, TEMP[15].xxxx, -TEMP[16].xxxx
45: MUL TEMP[15].x, TEMP[15].xxxx, IMM[1].yyyy
46: MAX TEMP[15].x, IMM[1].xxxx, TEMP[15].xxxx
47: FSLT TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx
48: UIF TEMP[15].xxxx :0
49: MOV TEMP[15].xyz, TEMP[4].xyzx
50: ELSE :0
51: ADD TEMP[5].x, TEMP[10].xxxx, TEMP[11].xxxx
52: ADD TEMP[17].x, TEMP[13].xxxx, TEMP[14].xxxx
53: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[17].xxxx
54: MAD TEMP[5].x, TEMP[5].xxxx, IMM[1].wwww, -TEMP[12].xxxx
55: ABS TEMP[5].x, TEMP[5].xxxx
56: RCP TEMP[16].x, TEMP[16].xxxx
57: MAD TEMP[16].x, TEMP[5].xxxx, TEMP[16].xxxx, IMM[2].xxxx
58: MAX TEMP[16].x, IMM[0].xxxx, TEMP[16].xxxx
59: MUL TEMP[16].x, TEMP[16].xxxx, IMM[2].yyyy
60: MIN TEMP[16].x, IMM[1].zzzz, TEMP[16].xxxx
61: MOV TEMP[5].zw, IMM[0].xxxx
62: ADD TEMP[5].xy, IN[0].xyyy, -CONST[1].xyyy
63: MOV TEMP[5].xy, TEMP[5].xyyy
64: MOV TEMP[5].w, IMM[0].xxxx
65: TXL TEMP[5], TEMP[5], SAMP[0], 2D
66: MOV TEMP[18], TEMP[5]
67: MOV TEMP[17].zw, IMM[0].xxxx
68: MAD TEMP[17].xy, IMM[0].zyyy, CONST[1].xyyy, IN[0].xyyy
69: MOV TEMP[17].xy, TEMP[17].xyyy
70: MOV TEMP[17].w, IMM[0].xxxx
71: TXL TEMP[17], TEMP[17], SAMP[0], 2D
72: MOV TEMP[19], TEMP[17]
73: MOV TEMP[20].zw, IMM[0].xxxx
74: MAD TEMP[20].xy, IMM[0].yzzz, CONST[1].xyyy, IN[0].xyyy
75: MOV TEMP[20].xy, TEMP[20].xyyy
76: MOV TEMP[20].w, IMM[0].xxxx
77: TXL TEMP[20], TEMP[20], SAMP[0], 2D
78: MOV TEMP[21], TEMP[20]
79: MOV TEMP[22].zw, IMM[0].xxxx
80: ADD TEMP[22].xy, IN[0].xyyy, CONST[1].xyyy
81: MOV TEMP[22].xy, TEMP[22].xyyy
82: MOV TEMP[22].w, IMM[0].xxxx
83: TXL TEMP[22], TEMP[22], SAMP[0], 2D
84: MOV TEMP[23], TEMP[22]
85: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xyzz
86: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[6].xyzz
87: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xyzz
88: ADD TEMP[0].xyz, TEMP[8].xyzz, TEMP[5].xyzz
89: ADD TEMP[4].xyz, TEMP[17].xyzz, TEMP[20].xyzz
90: ADD TEMP[4].xyz, TEMP[4].xyzz, TEMP[22].xyzz
91: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xyzz
92: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[0].xyzz
93: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[2].zzzz
94: MAD TEMP[0].x, TEMP[5].yyyy, IMM[0].wwww, TEMP[5].xxxx
95: MAD TEMP[4].x, TEMP[17].yyyy, IMM[0].wwww, TEMP[17].xxxx
96: MAD TEMP[6].x, TEMP[20].yyyy, IMM[0].wwww, TEMP[20].xxxx
97: MAD TEMP[8].x, TEMP[22].yyyy, IMM[0].wwww, TEMP[22].xxxx
98: MUL TEMP[5].x, IMM[1].wwww, TEMP[0].xxxx
99: MAD TEMP[5].x, IMM[2].wwww, TEMP[11].xxxx, TEMP[5].xxxx
100: MAD TEMP[5].x, IMM[1].wwww, TEMP[6].xxxx, TEMP[5].xxxx
101: ABS TEMP[5].x, TEMP[5].xxxx
102: MAD TEMP[17].x, IMM[3].xxxx, TEMP[10].xxxx, -TEMP[12].xxxx
103: MAD TEMP[17].x, IMM[3].xxxx, TEMP[14].xxxx, TEMP[17].xxxx
104: ABS TEMP[17].x, TEMP[17].xxxx
105: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[17].xxxx
106: MUL TEMP[17].x, IMM[1].wwww, TEMP[4].xxxx
107: MAD TEMP[17].x, IMM[2].wwww, TEMP[13].xxxx, TEMP[17].xxxx
108: MAD TEMP[17].x, IMM[1].wwww, TEMP[8].xxxx, TEMP[17].xxxx
109: ABS TEMP[17].x, TEMP[17].xxxx
110: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[17].xxxx
111: MUL TEMP[0].x, IMM[1].wwww, TEMP[0].xxxx
112: MAD TEMP[0].x, IMM[2].wwww, TEMP[10].xxxx, TEMP[0].xxxx
113: MAD TEMP[0].x, IMM[1].wwww, TEMP[4].xxxx, TEMP[0].xxxx
114: ABS TEMP[0].x, TEMP[0].xxxx
115: MAD TEMP[4].x, IMM[3].xxxx, TEMP[11].xxxx, -TEMP[12].xxxx
116: MAD TEMP[4].x, IMM[3].xxxx, TEMP[13].xxxx, TEMP[4].xxxx
117: ABS TEMP[4].x, TEMP[4].xxxx
118: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx
119: MUL TEMP[4].x, IMM[1].wwww, TEMP[6].xxxx
120: MAD TEMP[4].x, IMM[2].wwww, TEMP[14].xxxx, TEMP[4].xxxx
121: MAD TEMP[4].x, IMM[1].wwww, TEMP[8].xxxx, TEMP[4].xxxx
122: ABS TEMP[4].x, TEMP[4].xxxx
123: ADD TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx
124: FSGE TEMP[0].x, TEMP[5].xxxx, TEMP[0].xxxx
125: UIF TEMP[0].xxxx :0
126: MOV TEMP[4].x, -CONST[1].yyyy
127: ELSE :0
128: MOV TEMP[4].x, -CONST[1].xxxx
129: ENDIF
130: MOV TEMP[6].x, TEMP[4].xxxx
131: NOT TEMP[8].x, TEMP[0].xxxx
132: UIF TEMP[8].xxxx :0
133: MOV TEMP[2].x, TEMP[11].xxxx
134: ENDIF
135: NOT TEMP[8].x, TEMP[0].xxxx
136: UIF TEMP[8].xxxx :0
137: MOV TEMP[3].x, TEMP[13].xxxx
138: ENDIF
139: ADD TEMP[8].x, TEMP[2].xxxx, -TEMP[12].xxxx
140: ABS TEMP[8].x, TEMP[8].xxxx
141: MOV TEMP[10].x, TEMP[8].xxxx
142: ADD TEMP[11].x, TEMP[3].xxxx, -TEMP[12].xxxx
143: ABS TEMP[11].x, TEMP[11].xxxx
144: ADD TEMP[13].x, TEMP[2].xxxx, TEMP[12].xxxx
145: MUL TEMP[2].x, TEMP[13].xxxx, IMM[3].xxxx
146: ADD TEMP[13].x, TEMP[3].xxxx, TEMP[12].xxxx
147: MUL TEMP[13].x, TEMP[13].xxxx, IMM[3].xxxx
148: MOV TEMP[3].x, TEMP[13].xxxx
149: FSGE TEMP[8].x, TEMP[8].xxxx, TEMP[11].xxxx
150: NOT TEMP[14].x, TEMP[8].xxxx
151: UIF TEMP[14].xxxx :0
152: MOV TEMP[2].x, TEMP[13].xxxx
153: ENDIF
154: NOT TEMP[13].x, TEMP[8].xxxx
155: UIF TEMP[13].xxxx :0
156: MOV TEMP[10].x, TEMP[11].xxxx
157: ENDIF
158: NOT TEMP[8].x, TEMP[8].xxxx
159: UIF TEMP[8].xxxx :0
160: MOV TEMP[6].x, -TEMP[4].xxxx
161: ENDIF
162: UIF TEMP[0].xxxx :0
163: MOV TEMP[4].x, IMM[0].xxxx
164: ELSE :0
165: MUL TEMP[4].x, TEMP[6].xxxx, IMM[3].xxxx
166: ENDIF
167: ADD TEMP[4].x, IN[0].xxxx, TEMP[4].xxxx
168: UIF TEMP[0].xxxx :0
169: MUL TEMP[8].x, TEMP[6].xxxx, IMM[3].xxxx
170: ELSE :0
171: MOV TEMP[8].x, IMM[0].xxxx
172: ENDIF
173: ADD TEMP[8].x, IN[0].yyyy, TEMP[8].xxxx
174: MOV TEMP[4].y, TEMP[8].xxxx
175: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww
176: MOV TEMP[8].xy, TEMP[4].xyxx
177: UIF TEMP[0].xxxx :0
178: MOV TEMP[11].y, IMM[0].xxxx
179: MOV TEMP[11].x, CONST[1].xxxx
180: MOV TEMP[11].xy, TEMP[11].xyxx
181: ELSE :0
182: MOV TEMP[13].x, IMM[0].xxxx
183: MOV TEMP[13].y, CONST[1].yyyy
184: MOV TEMP[11].xy, TEMP[13].xyxx
185: ENDIF
186: MOV TEMP[13].x, TEMP[2].xxxx
187: MOV TEMP[14].x, TEMP[2].xxxx
188: MOV TEMP[3].x, IMM[4].xxxx
189: MOV TEMP[5].x, IMM[4].xxxx
190: MAD TEMP[4].xy, TEMP[11].xyyy, IMM[3].yyyy, TEMP[4].xyyy
191: MAD TEMP[8].xy, TEMP[11].xyyy, IMM[3].zzzz, TEMP[8].xyyy
192: MUL TEMP[11].xy, TEMP[11].xyyy, IMM[3].wwww
193: MOV TEMP[17].x, IMM[5].xxxx
194: BGNLOOP :0
195: ISGE TEMP[20].x, TEMP[17].xxxx, IMM[5].yyyy
196: UIF TEMP[20].xxxx :0
197: BRK
198: ENDIF
199: NOT TEMP[22].x, TEMP[3].xxxx
200: UIF TEMP[22].xxxx :0
201: MOV TEMP[24].xy, TEMP[4].xyyy
202: TXD TEMP[7], TEMP[24], TEMP[11].xyyy, TEMP[11].xyyy, SAMP[0], 2D
203: MOV TEMP[25], TEMP[7]
204: MAD TEMP[13].x, TEMP[7].yyyy, IMM[0].wwww, TEMP[7].xxxx
205: ENDIF
206: NOT TEMP[26].x, TEMP[5].xxxx
207: UIF TEMP[26].xxxx :0
208: MOV TEMP[27].xy, TEMP[8].xyyy
209: TXD TEMP[28], TEMP[27], TEMP[11].xyyy, TEMP[11].xyyy, SAMP[0], 2D
210: MOV TEMP[29], TEMP[28]
211: MAD TEMP[14].x, TEMP[28].yyyy, IMM[0].wwww, TEMP[28].xxxx
212: ENDIF
213: UIF TEMP[3].xxxx :0
214: MOV TEMP[30].x, IMM[4].yyyy
215: ELSE :0
216: ADD TEMP[9].x, TEMP[13].xxxx, -TEMP[2].xxxx
217: ABS TEMP[31].x, TEMP[9].xxxx
218: FSGE TEMP[30].x, TEMP[31].xxxx, TEMP[10].xxxx
219: ENDIF
220: MOV TEMP[3].x, TEMP[30].xxxx
221: UIF TEMP[5].xxxx :0
222: MOV TEMP[32].x, IMM[4].yyyy
223: ELSE :0
224: ADD TEMP[33].x, TEMP[14].xxxx, -TEMP[2].xxxx
225: ABS TEMP[34].x, TEMP[33].xxxx
226: FSGE TEMP[32].x, TEMP[34].xxxx, TEMP[10].xxxx
227: ENDIF
228: MOV TEMP[5].x, TEMP[32].xxxx
229: AND TEMP[35].x, TEMP[30].xxxx, TEMP[32].xxxx
230: UIF TEMP[35].xxxx :0
231: BRK
232: ENDIF
233: NOT TEMP[36].x, TEMP[30].xxxx
234: UIF TEMP[36].xxxx :0
235: ADD TEMP[4].xy, TEMP[4].xyyy, -TEMP[11].xyyy
236: ENDIF
237: NOT TEMP[37].x, TEMP[32].xxxx
238: UIF TEMP[37].xxxx :0
239: ADD TEMP[8].xy, TEMP[8].xyyy, TEMP[11].xyyy
240: ENDIF
241: UADD TEMP[17].x, TEMP[17].xxxx, IMM[5].zzzz
242: ENDLOOP :0
243: UIF TEMP[0].xxxx :0
244: ADD TEMP[10].x, IN[0].xxxx, -TEMP[4].xxxx
245: ELSE :0
246: ADD TEMP[10].x, IN[0].yyyy, -TEMP[4].yyyy
247: ENDIF
248: UIF TEMP[0].xxxx :0
249: ADD TEMP[4].x, TEMP[8].xxxx, -IN[0].xxxx
250: ELSE :0
251: ADD TEMP[4].x, TEMP[8].yyyy, -IN[0].yyyy
252: ENDIF
253: FSLT TEMP[8].x, TEMP[10].xxxx, TEMP[4].xxxx
254: UIF TEMP[8].xxxx :0
255: MOV TEMP[11].x, TEMP[13].xxxx
256: ELSE :0
257: MOV TEMP[11].x, TEMP[14].xxxx
258: ENDIF
259: FSLT TEMP[12].x, TEMP[12].xxxx, TEMP[2].xxxx
260: FSLT TEMP[2].x, TEMP[11].xxxx, TEMP[2].xxxx
261: USEQ TEMP[2].x, TEMP[12].xxxx, TEMP[2].xxxx
262: UIF TEMP[2].xxxx :0
263: MOV TEMP[6].x, IMM[0].xxxx
264: ENDIF
265: ADD TEMP[2].x, TEMP[4].xxxx, TEMP[10].xxxx
266: UIF TEMP[8].xxxx :0
267: MOV TEMP[8].x, TEMP[10].xxxx
268: ELSE :0
269: MOV TEMP[8].x, TEMP[4].xxxx
270: ENDIF
271: RCP TEMP[4].x, TEMP[2].xxxx
272: MAD TEMP[4].x, TEMP[8].xxxx, -TEMP[4].xxxx, IMM[3].xxxx
273: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[6].xxxx
274: UIF TEMP[0].xxxx :0
275: MOV TEMP[6].x, IMM[0].xxxx
276: ELSE :0
277: MOV TEMP[6].x, TEMP[4].xxxx
278: ENDIF
279: UIF TEMP[0].xxxx :0
280: MOV TEMP[0].x, TEMP[4].xxxx
281: ELSE :0
282: MOV TEMP[0].x, IMM[0].xxxx
283: ENDIF
284: ADD TEMP[4].x, IN[0].xxxx, TEMP[6].xxxx
285: ADD TEMP[0].x, IN[0].yyyy, TEMP[0].xxxx
286: MOV TEMP[4].y, TEMP[0].xxxx
287: MOV TEMP[0].xy, TEMP[4].xyyy
288: MOV TEMP[0].w, IMM[0].xxxx
289: TXL TEMP[0].xyz, TEMP[0], SAMP[0], 2D
290: MOV TEMP[4].x, -TEMP[16].xxxx
291: MOV TEMP[4].y, -TEMP[16].xxxx
292: MOV TEMP[4].z, -TEMP[16].xxxx
293: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[16].xxxx, TEMP[0].xyzz
294: MAD TEMP[15].xyz, TEMP[4].xyzz, TEMP[0].xyzz, TEMP[1].xyzz
295: ENDIF
296: MOV TEMP[1].w, IMM[0].xxxx
297: MOV TEMP[1].xyz, TEMP[15].xyzx
298: MOV OUT[0], TEMP[1]
299: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%26 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%27 = load <8 x i32>, <8 x i32> addrspace(2)* %26, align 32, !tbaa !0
%28 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%29 = load <4 x i32>, <4 x i32> addrspace(2)* %28, align 16, !tbaa !0
%30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%32 = fmul float %24, 0.000000e+00
%33 = fadd float %32, %30
%34 = fsub float %31, %25
%35 = bitcast float %33 to i32
%36 = bitcast float %34 to i32
%37 = insertelement <4 x i32> undef, i32 %35, i32 0
%38 = insertelement <4 x i32> %37, i32 %36, i32 1
%39 = insertelement <4 x i32> %38, i32 0, i32 2
%40 = bitcast <8 x i32> %27 to <32 x i8>
%41 = bitcast <4 x i32> %29 to <16 x i8>
%42 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2)
%43 = extractelement <4 x float> %42, i32 0
%44 = extractelement <4 x float> %42, i32 1
%45 = fsub float %30, %24
%46 = fmul float %25, 0.000000e+00
%47 = fadd float %46, %31
%48 = bitcast float %45 to i32
%49 = bitcast float %47 to i32
%50 = insertelement <4 x i32> undef, i32 %48, i32 0
%51 = insertelement <4 x i32> %50, i32 %49, i32 1
%52 = insertelement <4 x i32> %51, i32 0, i32 2
%53 = bitcast <8 x i32> %27 to <32 x i8>
%54 = bitcast <4 x i32> %29 to <16 x i8>
%55 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %52, <32 x i8> %53, <16 x i8> %54, i32 2)
%56 = extractelement <4 x float> %55, i32 0
%57 = extractelement <4 x float> %55, i32 1
%58 = bitcast float %30 to i32
%59 = bitcast float %31 to i32
%60 = insertelement <4 x i32> undef, i32 %58, i32 0
%61 = insertelement <4 x i32> %60, i32 %59, i32 1
%62 = insertelement <4 x i32> %61, i32 0, i32 2
%63 = bitcast <8 x i32> %27 to <32 x i8>
%64 = bitcast <4 x i32> %29 to <16 x i8>
%65 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %62, <32 x i8> %63, <16 x i8> %64, i32 2)
%66 = extractelement <4 x float> %65, i32 0
%67 = extractelement <4 x float> %65, i32 1
%68 = extractelement <4 x float> %65, i32 2
%69 = fadd float %24, %30
%70 = fmul float %25, 0.000000e+00
%71 = fadd float %70, %31
%72 = bitcast float %69 to i32
%73 = bitcast float %71 to i32
%74 = insertelement <4 x i32> undef, i32 %72, i32 0
%75 = insertelement <4 x i32> %74, i32 %73, i32 1
%76 = insertelement <4 x i32> %75, i32 0, i32 2
%77 = bitcast <8 x i32> %27 to <32 x i8>
%78 = bitcast <4 x i32> %29 to <16 x i8>
%79 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %76, <32 x i8> %77, <16 x i8> %78, i32 2)
%80 = extractelement <4 x float> %79, i32 0
%81 = extractelement <4 x float> %79, i32 1
%82 = fmul float %24, 0.000000e+00
%83 = fadd float %82, %30
%84 = fadd float %25, %31
%85 = bitcast float %83 to i32
%86 = bitcast float %84 to i32
%87 = insertelement <4 x i32> undef, i32 %85, i32 0
%88 = insertelement <4 x i32> %87, i32 %86, i32 1
%89 = insertelement <4 x i32> %88, i32 0, i32 2
%90 = bitcast <8 x i32> %27 to <32 x i8>
%91 = bitcast <4 x i32> %29 to <16 x i8>
%92 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %89, <32 x i8> %90, <16 x i8> %91, i32 2)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = fmul float %44, 0x3FFF695000000000
%96 = fadd float %95, %43
%97 = fmul float %57, 0x3FFF695000000000
%98 = fadd float %97, %56
%99 = fmul float %67, 0x3FFF695000000000
%100 = fadd float %99, %66
%101 = fmul float %81, 0x3FFF695000000000
%102 = fadd float %101, %80
%103 = fmul float %94, 0x3FFF695000000000
%104 = fadd float %103, %93
%105 = call float @llvm.maxnum.f32(float %100, float %96)
%106 = call float @llvm.maxnum.f32(float %98, float %104)
%107 = call float @llvm.maxnum.f32(float %106, float %102)
%108 = call float @llvm.maxnum.f32(float %105, float %107)
%109 = call float @llvm.minnum.f32(float %100, float %96)
%110 = call float @llvm.minnum.f32(float %98, float %104)
%111 = call float @llvm.minnum.f32(float %110, float %102)
%112 = call float @llvm.minnum.f32(float %109, float %111)
%113 = fsub float %108, %112
%114 = fmul float %108, 1.250000e-01
%115 = call float @llvm.maxnum.f32(float %114, float 0x3FA5555580000000)
%116 = fcmp olt float %113, %115
br i1 %116, label %ENDIF, label %ELSE
ELSE: ; preds = %main_body
%117 = extractelement <4 x float> %92, i32 2
%118 = extractelement <4 x float> %79, i32 2
%119 = extractelement <4 x float> %55, i32 2
%120 = extractelement <4 x float> %42, i32 2
%121 = fadd float %96, %98
%122 = fadd float %102, %104
%123 = fadd float %121, %122
%124 = fmul float %123, 2.500000e-01
%125 = fsub float %124, %100
%126 = call float @llvm.fabs.f32(float %125)
%127 = fdiv float 1.000000e+00, %113
%128 = fmul float %126, %127
%129 = fadd float %128, -2.500000e-01
%130 = call float @llvm.maxnum.f32(float %129, float 0.000000e+00)
%131 = fmul float %130, 0x3FF5555500000000
%132 = call float @llvm.minnum.f32(float %131, float 7.500000e-01)
%133 = fsub float %30, %24
%134 = fsub float %31, %25
%135 = bitcast float %133 to i32
%136 = bitcast float %134 to i32
%137 = insertelement <4 x i32> undef, i32 %135, i32 0
%138 = insertelement <4 x i32> %137, i32 %136, i32 1
%139 = insertelement <4 x i32> %138, i32 0, i32 2
%140 = bitcast <8 x i32> %27 to <32 x i8>
%141 = bitcast <4 x i32> %29 to <16 x i8>
%142 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2)
%143 = extractelement <4 x float> %142, i32 0
%144 = extractelement <4 x float> %142, i32 1
%145 = extractelement <4 x float> %142, i32 2
%146 = fadd float %24, %30
%147 = fsub float %31, %25
%148 = bitcast float %146 to i32
%149 = bitcast float %147 to i32
%150 = insertelement <4 x i32> undef, i32 %148, i32 0
%151 = insertelement <4 x i32> %150, i32 %149, i32 1
%152 = insertelement <4 x i32> %151, i32 0, i32 2
%153 = bitcast <8 x i32> %27 to <32 x i8>
%154 = bitcast <4 x i32> %29 to <16 x i8>
%155 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %152, <32 x i8> %153, <16 x i8> %154, i32 2)
%156 = extractelement <4 x float> %155, i32 0
%157 = extractelement <4 x float> %155, i32 1
%158 = extractelement <4 x float> %155, i32 2
%159 = fsub float %30, %24
%160 = fadd float %25, %31
%161 = bitcast float %159 to i32
%162 = bitcast float %160 to i32
%163 = insertelement <4 x i32> undef, i32 %161, i32 0
%164 = insertelement <4 x i32> %163, i32 %162, i32 1
%165 = insertelement <4 x i32> %164, i32 0, i32 2
%166 = bitcast <8 x i32> %27 to <32 x i8>
%167 = bitcast <4 x i32> %29 to <16 x i8>
%168 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %165, <32 x i8> %166, <16 x i8> %167, i32 2)
%169 = extractelement <4 x float> %168, i32 0
%170 = extractelement <4 x float> %168, i32 1
%171 = extractelement <4 x float> %168, i32 2
%172 = fadd float %30, %24
%173 = fadd float %31, %25
%174 = bitcast float %172 to i32
%175 = bitcast float %173 to i32
%176 = insertelement <4 x i32> undef, i32 %174, i32 0
%177 = insertelement <4 x i32> %176, i32 %175, i32 1
%178 = insertelement <4 x i32> %177, i32 0, i32 2
%179 = bitcast <8 x i32> %27 to <32 x i8>
%180 = bitcast <4 x i32> %29 to <16 x i8>
%181 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %178, <32 x i8> %179, <16 x i8> %180, i32 2)
%182 = extractelement <4 x float> %181, i32 0
%183 = extractelement <4 x float> %181, i32 1
%184 = extractelement <4 x float> %181, i32 2
%185 = fadd float %43, %56
%186 = fadd float %44, %57
%187 = fadd float %120, %119
%188 = fadd float %66, %80
%189 = fadd float %67, %81
%190 = fadd float %68, %118
%191 = fadd float %185, %188
%192 = fadd float %186, %189
%193 = fadd float %187, %190
%194 = fadd float %93, %143
%195 = fadd float %94, %144
%196 = fadd float %117, %145
%197 = fadd float %156, %169
%198 = fadd float %157, %170
%199 = fadd float %158, %171
%200 = fadd float %197, %182
%201 = fadd float %198, %183
%202 = fadd float %199, %184
%203 = fadd float %194, %200
%204 = fadd float %195, %201
%205 = fadd float %196, %202
%206 = fadd float %191, %203
%207 = fadd float %192, %204
%208 = fadd float %193, %205
%209 = fmul float %206, 0x3FBC71C6E0000000
%210 = fmul float %207, 0x3FBC71C6E0000000
%211 = fmul float %208, 0x3FBC71C6E0000000
%212 = fmul float %144, 0x3FFF695000000000
%213 = fadd float %212, %143
%214 = fmul float %157, 0x3FFF695000000000
%215 = fadd float %214, %156
%216 = fmul float %170, 0x3FFF695000000000
%217 = fadd float %216, %169
%218 = fmul float %183, 0x3FFF695000000000
%219 = fadd float %218, %182
%220 = fmul float %213, 2.500000e-01
%221 = fmul float %98, -5.000000e-01
%222 = fadd float %221, %220
%223 = fmul float %217, 2.500000e-01
%224 = fadd float %223, %222
%225 = call float @llvm.fabs.f32(float %224)
%226 = fmul float %96, 5.000000e-01
%227 = fsub float %226, %100
%228 = fmul float %104, 5.000000e-01
%229 = fadd float %228, %227
%230 = call float @llvm.fabs.f32(float %229)
%231 = fadd float %225, %230
%232 = fmul float %215, 2.500000e-01
%233 = fmul float %102, -5.000000e-01
%234 = fadd float %233, %232
%235 = fmul float %219, 2.500000e-01
%236 = fadd float %235, %234
%237 = call float @llvm.fabs.f32(float %236)
%238 = fadd float %231, %237
%239 = fmul float %213, 2.500000e-01
%240 = fmul float %96, -5.000000e-01
%241 = fadd float %240, %239
%242 = fmul float %215, 2.500000e-01
%243 = fadd float %242, %241
%244 = call float @llvm.fabs.f32(float %243)
%245 = fmul float %98, 5.000000e-01
%246 = fsub float %245, %100
%247 = fmul float %102, 5.000000e-01
%248 = fadd float %247, %246
%249 = call float @llvm.fabs.f32(float %248)
%250 = fadd float %244, %249
%251 = fmul float %217, 2.500000e-01
%252 = fmul float %104, -5.000000e-01
%253 = fadd float %252, %251
%254 = fmul float %219, 2.500000e-01
%255 = fadd float %254, %253
%256 = call float @llvm.fabs.f32(float %255)
%257 = fadd float %250, %256
%258 = fcmp oge float %238, %257
%.sink = select i1 %258, float %25, float %24
%259 = fsub float -0.000000e+00, %.sink
%. = select i1 %258, float %96, float %98
%temp12.0 = select i1 %258, float %104, float %102
%260 = fsub float %., %100
%261 = call float @llvm.fabs.f32(float %260)
%262 = fsub float %temp12.0, %100
%263 = call float @llvm.fabs.f32(float %262)
%264 = fcmp oge float %261, %263
%.234.v.v = select i1 %264, float %., float %temp12.0
%.234.v = fadd float %.234.v.v, %100
%.234 = fmul float %.234.v, 5.000000e-01
%temp40.0 = select i1 %264, float %261, float %263
%temp24.0 = select i1 %264, float %259, float %.sink
%265 = fmul float %temp24.0, 5.000000e-01
%temp16.1 = select i1 %258, float 0.000000e+00, float %265
%266 = fadd float %30, %temp16.1
%267 = fmul float %temp24.0, 5.000000e-01
%temp32.0 = select i1 %258, float %267, float 0.000000e+00
%268 = fadd float %31, %temp32.0
%269 = fmul float %temp40.0, 2.500000e-01
%.235 = select i1 %258, float 0.000000e+00, float %25
%.236 = select i1 %258, float %24, float 0.000000e+00
%270 = fmul float %.236, -1.500000e+00
%271 = fadd float %270, %266
%272 = fmul float %.235, -1.500000e+00
%273 = fadd float %272, %268
%274 = fmul float %.236, 1.500000e+00
%275 = fadd float %274, %266
%276 = fmul float %.235, 1.500000e+00
%277 = fadd float %276, %268
%278 = fmul float %.236, 2.000000e+00
%279 = fmul float %.235, 2.000000e+00
%280 = bitcast float %278 to i32
%281 = bitcast float %279 to i32
%282 = bitcast float %278 to i32
%283 = bitcast float %279 to i32
%284 = insertelement <8 x i32> undef, i32 %280, i32 0
%285 = insertelement <8 x i32> %284, i32 %281, i32 1
%286 = insertelement <8 x i32> %285, i32 %282, i32 2
%287 = insertelement <8 x i32> %286, i32 %283, i32 3
%288 = bitcast <8 x i32> %27 to <32 x i8>
%289 = bitcast <4 x i32> %29 to <16 x i8>
%290 = bitcast float %278 to i32
%291 = bitcast float %279 to i32
%292 = bitcast float %278 to i32
%293 = bitcast float %279 to i32
%294 = insertelement <8 x i32> undef, i32 %290, i32 0
%295 = insertelement <8 x i32> %294, i32 %291, i32 1
%296 = insertelement <8 x i32> %295, i32 %292, i32 2
%297 = insertelement <8 x i32> %296, i32 %293, i32 3
%298 = bitcast <8 x i32> %27 to <32 x i8>
%299 = bitcast <4 x i32> %29 to <16 x i8>
br label %LOOP
ENDIF: ; preds = %main_body, %ENDLOOP
%temp60.0 = phi float [ %338, %ENDLOOP ], [ %66, %main_body ]
%temp61.0 = phi float [ %340, %ENDLOOP ], [ %67, %main_body ]
%temp62.0 = phi float [ %342, %ENDLOOP ], [ %68, %main_body ]
%300 = call i32 @llvm.SI.packf16(float %temp60.0, float %temp61.0)
%301 = bitcast i32 %300 to float
%302 = call i32 @llvm.SI.packf16(float %temp62.0, float 0.000000e+00)
%303 = bitcast i32 %302 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %301, float %303, float %301, float %303)
ret void
LOOP: ; preds = %ENDIF194, %ELSE
%temp56.0 = phi float [ %.234, %ELSE ], [ %temp56.2, %ENDIF194 ]
%temp52.0 = phi float [ %.234, %ELSE ], [ %temp52.2, %ENDIF194 ]
%temp68.0 = phi float [ 0.000000e+00, %ELSE ], [ %393, %ENDIF194 ]
%temp33.0 = phi float [ %277, %ELSE ], [ %temp33.1, %ENDIF194 ]
%temp32.1 = phi float [ %275, %ELSE ], [ %temp32.2, %ENDIF194 ]
%temp20.0 = phi float [ 0.000000e+00, %ELSE ], [ %temp128.0, %ENDIF194 ]
%temp17.0 = phi float [ %273, %ELSE ], [ %temp17.1, %ENDIF194 ]
%temp16.2 = phi float [ %271, %ELSE ], [ %temp16.3, %ENDIF194 ]
%temp12.1 = phi float [ 0.000000e+00, %ELSE ], [ %temp120.0, %ENDIF194 ]
%304 = bitcast float %temp68.0 to i32
%305 = icmp sgt i32 %304, 7
br i1 %305, label %ENDLOOP, label %ENDIF179
ENDLOOP: ; preds = %ENDIF191, %LOOP
%temp56.1 = phi float [ %temp56.0, %LOOP ], [ %temp56.2, %ENDIF191 ]
%temp52.1 = phi float [ %temp52.0, %LOOP ], [ %temp52.2, %ENDIF191 ]
%306 = fsub float %30, %temp16.2
%307 = fsub float %31, %temp17.0
%temp40.1 = select i1 %258, float %306, float %307
%308 = fsub float %temp32.1, %30
%309 = fsub float %temp33.0, %31
%temp16.4 = select i1 %258, float %308, float %309
%310 = fcmp olt float %temp40.1, %temp16.4
%temp52.1.temp56.1 = select i1 %310, float %temp52.1, float %temp56.1
%311 = fcmp olt float %100, %.234
%312 = fcmp olt float %temp52.1.temp56.1, %.234
%tmp = xor i1 %311, %312
%temp24.1 = select i1 %tmp, float %temp24.0, float 0.000000e+00
%313 = fadd float %temp16.4, %temp40.1
%temp40.1.temp16.4 = select i1 %310, float %temp40.1, float %temp16.4
%314 = fdiv float 1.000000e+00, %313
%315 = fmul float %314, %temp40.1.temp16.4
%316 = fsub float 5.000000e-01, %315
%317 = fmul float %316, %temp24.1
%temp24.2 = select i1 %258, float 0.000000e+00, float %317
%.237 = select i1 %258, float %317, float 0.000000e+00
%318 = fadd float %30, %temp24.2
%319 = fadd float %31, %.237
%320 = bitcast float %318 to i32
%321 = bitcast float %319 to i32
%322 = insertelement <4 x i32> undef, i32 %320, i32 0
%323 = insertelement <4 x i32> %322, i32 %321, i32 1
%324 = insertelement <4 x i32> %323, i32 0, i32 2
%325 = bitcast <8 x i32> %27 to <32 x i8>
%326 = bitcast <4 x i32> %29 to <16 x i8>
%327 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %324, <32 x i8> %325, <16 x i8> %326, i32 2)
%328 = extractelement <4 x float> %327, i32 0
%329 = extractelement <4 x float> %327, i32 1
%330 = extractelement <4 x float> %327, i32 2
%331 = fmul float %209, %132
%332 = fadd float %331, %328
%333 = fmul float %210, %132
%334 = fadd float %333, %329
%335 = fmul float %211, %132
%336 = fadd float %335, %330
%337 = fmul float %132, %328
%338 = fsub float %332, %337
%339 = fmul float %132, %329
%340 = fsub float %334, %339
%341 = fmul float %132, %330
%342 = fsub float %336, %341
br label %ENDIF
ENDIF179: ; preds = %LOOP
%343 = bitcast float %temp12.1 to i32
%344 = icmp eq i32 %343, -1
br i1 %344, label %ENDIF182, label %IF183
IF183: ; preds = %ENDIF179
%345 = bitcast float %temp16.2 to i32
%346 = bitcast float %temp17.0 to i32
%347 = insertelement <8 x i32> %287, i32 %345, i32 4
%348 = insertelement <8 x i32> %347, i32 %346, i32 5
%349 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %348, <32 x i8> %288, <16 x i8> %289, i32 2)
%350 = extractelement <4 x float> %349, i32 0
%351 = extractelement <4 x float> %349, i32 1
%352 = fmul float %351, 0x3FFF695000000000
%353 = fadd float %352, %350
br label %ENDIF182
ENDIF182: ; preds = %ENDIF179, %IF183
%temp52.2 = phi float [ %353, %IF183 ], [ %temp52.0, %ENDIF179 ]
%354 = bitcast float %temp20.0 to i32
%355 = icmp eq i32 %354, -1
br i1 %355, label %ENDIF185, label %IF186
IF186: ; preds = %ENDIF182
%356 = bitcast float %temp32.1 to i32
%357 = bitcast float %temp33.0 to i32
%358 = insertelement <8 x i32> %297, i32 %356, i32 4
%359 = insertelement <8 x i32> %358, i32 %357, i32 5
%360 = call <4 x float> @llvm.SI.sampled.v8i32(<8 x i32> %359, <32 x i8> %298, <16 x i8> %299, i32 2)
%361 = extractelement <4 x float> %360, i32 0
%362 = extractelement <4 x float> %360, i32 1
%363 = fmul float %362, 0x3FFF695000000000
%364 = fadd float %363, %361
br label %ENDIF185
ENDIF185: ; preds = %ENDIF182, %IF186
%temp56.2 = phi float [ %364, %IF186 ], [ %temp56.0, %ENDIF182 ]
%365 = bitcast float %temp12.1 to i32
%366 = icmp eq i32 %365, 0
br i1 %366, label %ELSE190, label %ENDIF188
ELSE190: ; preds = %ENDIF185
%367 = fsub float %temp52.2, %.234
%368 = call float @llvm.fabs.f32(float %367)
%369 = fcmp oge float %368, %269
%370 = sext i1 %369 to i32
%371 = bitcast i32 %370 to float
br label %ENDIF188
ENDIF188: ; preds = %ENDIF185, %ELSE190
%temp120.0 = phi float [ %371, %ELSE190 ], [ 0xFFFFFFFFE0000000, %ENDIF185 ]
%372 = bitcast float %temp20.0 to i32
%373 = icmp eq i32 %372, 0
br i1 %373, label %ELSE193, label %ENDIF191
ELSE193: ; preds = %ENDIF188
%374 = fsub float %temp56.2, %.234
%375 = call float @llvm.fabs.f32(float %374)
%376 = fcmp oge float %375, %269
%377 = sext i1 %376 to i32
%378 = bitcast i32 %377 to float
br label %ENDIF191
ENDIF191: ; preds = %ENDIF188, %ELSE193
%temp128.0 = phi float [ %378, %ELSE193 ], [ 0xFFFFFFFFE0000000, %ENDIF188 ]
%379 = bitcast float %temp120.0 to i32
%380 = bitcast float %temp128.0 to i32
%381 = and i32 %379, %380
%382 = icmp eq i32 %381, 0
br i1 %382, label %ENDIF194, label %ENDLOOP
ENDIF194: ; preds = %ENDIF191
%383 = bitcast float %temp120.0 to i32
%384 = icmp ne i32 %383, -1
%385 = fsub float %temp16.2, %278
%386 = fsub float %temp17.0, %279
%temp17.1 = select i1 %384, float %386, float %temp17.0
%temp16.3 = select i1 %384, float %385, float %temp16.2
%387 = bitcast float %temp128.0 to i32
%388 = icmp ne i32 %387, -1
%389 = fadd float %temp32.1, %278
%390 = fadd float %temp33.0, %279
%temp33.1 = select i1 %388, float %390, float %temp33.0
%temp32.2 = select i1 %388, float %389, float %temp32.1
%391 = bitcast float %temp68.0 to i32
%392 = add i32 %391, 1
%393 = bitcast i32 %392 to float
br label %LOOP
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sampled.v8i32(<8 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mad_f32 v7, 0, s8, v2 ; D2820007 04081080
v_subrev_f32_e32 v8, s10, v3 ; 0A10060A
v_subrev_f32_e32 v10, s8, v2 ; 0A140408
v_mad_f32 v11, 0, s10, v3 ; D282000B 040C1480
v_mov_b32_e32 v9, 0 ; 7E120280
v_mov_b32_e32 v12, v9 ; 7E180309
v_mov_b32_e32 v4, v9 ; 7E080309
image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900F00 00641407
s_waitcnt vmcnt(0) ; BF8C0770
image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900F00 0064170A
v_add_f32_e32 v10, s8, v2 ; 06140408
v_mov_b32_e32 v12, v9 ; 7E180309
image_sample_l v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[16:23], s[12:15] ; F0900700 00640402
image_sample_l v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900F00 00640D0A
v_add_f32_e32 v8, s10, v3 ; 0610060A
s_waitcnt vmcnt(0) ; BF8C0770
image_sample_l v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900F00 00641007
v_mov_b32_e32 v1, 0x3ffb4a80 ; 7E0202FF 3FFB4A80
v_mad_f32 v8, v1, v21, v20 ; D2820008 04522B01
v_mad_f32 v10, v1, v24, v23 ; D282000A 045E3101
v_mad_f32 v0, v1, v5, v4 ; D2820000 04120B01
v_mad_f32 v11, v1, v14, v13 ; D282000B 04361D01
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v12, v1, v17, v16 ; D282000C 04422301
v_max3_f32 v1, v10, v12, v11 ; D2A80001 042E190A
v_max3_f32 v7, v0, v8, v1 ; D2A80007 04061100
v_min3_f32 v1, v10, v12, v11 ; D2A20001 042E190A
v_min3_f32 v1, v0, v8, v1 ; D2A20001 04061100
v_subrev_f32_e32 v1, v1, v7 ; 0A020F01
v_mul_f32_e32 v7, 0x3e000000, v7 ; 100E0EFF 3E000000
v_max_f32_e32 v7, 0x3d2aaaac, v7 ; 200E0EFF 3D2AAAAC
v_cmp_nlt_f32_e32 vcc, v1, v7 ; 7C1C0F01
s_and_saveexec_b64 s[6:7], vcc ; BE86246A
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_3 ; BF880000
v_rcp_f32_e32 v1, v1 ; 7E025501
v_mov_b32_e32 v19, s8 ; 7E260208
v_mov_b32_e32 v28, s10 ; 7E38020A
v_add_f32_e32 v7, v10, v8 ; 060E110A
v_add_f32_e32 v9, v12, v11 ; 0612170C
v_add_f32_e32 v7, v9, v7 ; 060E0F09
v_mov_b32_e32 v29, 0x3e800000 ; 7E3A02FF 3E800000
v_mad_f32 v7, v7, v29, -v0 ; D2820007 84023B07
v_mov_b32_e32 v9, 0xbe800000 ; 7E1202FF BE800000
v_mad_f32 v1, |v7|, v1, v9 ; D2820101 04260307
v_max_f32_e32 v1, 0, v1 ; 20020280
v_mul_f32_e32 v1, 0x3faaaaa8, v1 ; 100202FF 3FAAAAA8
v_min_f32_e32 v1, 0x3f400000, v1 ; 1E0202FF 3F400000
v_add_f32_e32 v7, v23, v20 ; 060E2917
v_add_f32_e32 v9, v24, v21 ; 06122B18
v_add_f32_e32 v20, v25, v22 ; 06282D19
v_subrev_f32_e32 v21, s8, v2 ; 0A2A0408
v_subrev_f32_e32 v22, s10, v3 ; 0A2C060A
v_mov_b32_e32 v23, 0 ; 7E2E0280
v_add_f32_e32 v4, v13, v4 ; 0608090D
v_add_f32_e32 v5, v14, v5 ; 060A0B0E
v_add_f32_e32 v24, s8, v2 ; 06300408
v_add_f32_e32 v6, v15, v6 ; 060C0D0F
v_mov_b32_e32 v25, v22 ; 7E320316
image_sample_l v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0900700 00640D15
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v16, v13, v16 ; 0620210D
v_add_f32_e32 v17, v14, v17 ; 0622230E
v_add_f32_e32 v15, v15, v18 ; 061E250F
v_mov_b32_e32 v26, v23 ; 7E340317
v_add_f32_e32 v22, s10, v3 ; 062C060A
image_sample_l v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[16:23], s[12:15] ; F0900700 00641E18
v_mov_b32_e32 v25, v22 ; 7E320316
v_mov_b32_e32 v18, 0x3ffb4a80 ; 7E2402FF 3FFB4A80
v_mad_f32 v13, v18, v14, v13 ; D282000D 04361D12
v_mov_b32_e32 v26, v23 ; 7E340317
image_sample_l v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0900700 00641515
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v14, v21, v30 ; 061C3D15
v_add_f32_e32 v27, v22, v31 ; 06363F16
v_add_f32_e32 v23, v23, v32 ; 062E4117
v_mad_f32 v30, v18, v31, v30 ; D282001E 047A3F12
v_mad_f32 v21, v18, v22, v21 ; D2820015 04562D12
image_sample_l v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[16:23], s[12:15] ; F0900700 00641818
v_add_f32_e32 v4, v4, v7 ; 06080F04
v_add_f32_e32 v5, v5, v9 ; 060A1305
v_add_f32_e32 v6, v6, v20 ; 060C2906
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v7, v24, v14 ; 060E1D18
v_add_f32_e32 v7, v7, v16 ; 060E2107
v_add_f32_e32 v9, v25, v27 ; 06123719
v_add_f32_e32 v9, v9, v17 ; 06122309
v_add_f32_e32 v14, v26, v23 ; 061C2F1A
v_add_f32_e32 v14, v14, v15 ; 061C1F0E
v_add_f32_e32 v4, v7, v4 ; 06080907
v_add_f32_e32 v5, v9, v5 ; 060A0B09
v_add_f32_e32 v7, v14, v6 ; 060E0D0E
v_mov_b32_e32 v9, 0x3de38e37 ; 7E1202FF 3DE38E37
v_mul_f32_e32 v6, v9, v4 ; 100C0909
v_mul_f32_e32 v5, v9, v5 ; 100A0B09
v_mul_f32_e32 v4, v9, v7 ; 10080F09
v_mad_f32 v9, v18, v25, v24 ; D2820009 04623312
v_mul_f32_e32 v13, v29, v13 ; 101A1B1D
v_mad_f32 v7, -0.5, v10, v13 ; D2820007 043614F1
v_mul_f32_e32 v14, v29, v21 ; 101C2B1D
v_mac_f32_e32 v7, v29, v21 ; 3E0E2B1D
v_mad_f32 v15, 0.5, v8, -v0 ; D282000F 840210F0
v_mac_f32_e32 v15, 0.5, v12 ; 3E1E18F0
v_add_f32_e64 v7, |v7|, |v15| ; D2060307 00021F07
v_mul_f32_e32 v15, v29, v30 ; 101E3D1D
v_mac_f32_e32 v15, -0.5, v11 ; 3E1E16F1
v_mac_f32_e32 v15, v29, v9 ; 3E1E131D
v_add_f32_e64 v7, v7, |v15| ; D2060207 00021F07
v_mac_f32_e32 v13, -0.5, v8 ; 3E1A10F1
v_mac_f32_e32 v13, v29, v30 ; 3E1A3D1D
v_mad_f32 v15, 0.5, v10, -v0 ; D282000F 840214F0
v_mac_f32_e32 v15, 0.5, v11 ; 3E1E16F0
v_add_f32_e64 v13, |v13|, |v15| ; D206030D 00021F0D
v_mac_f32_e32 v14, -0.5, v12 ; 3E1C18F1
v_mac_f32_e32 v14, v29, v9 ; 3E1C131D
v_add_f32_e64 v9, v13, |v14| ; D2060209 00021D0D
v_cmp_ge_f32_e32 vcc, v7, v9 ; 7C0C1307
v_cndmask_b32_e32 v13, v19, v28 ; 001A3913
v_cndmask_b32_e32 v8, v10, v8 ; 0010110A
v_cndmask_b32_e32 v10, v11, v12 ; 0014190B
v_subrev_f32_e32 v11, v0, v8 ; 0A161100
v_mov_b32_e32 v12, 0x7fffffff ; 7E1802FF 7FFFFFFF
v_and_b32_e32 v14, v11, v12 ; 361C190B
v_subrev_f32_e32 v15, v0, v10 ; 0A1E1500
v_and_b32_e32 v12, v15, v12 ; 3618190F
v_cmp_ge_f32_e64 s[0:1], |v11|, |v15| ; D00C0300 00021F0B
v_cndmask_b32_e64 v10, v10, v8, s[0:1] ; D200000A 0002110A
v_xor_b32_e32 v8, 0x80000000, v13 ; 3A101AFF 80000000
v_cndmask_b32_e64 v11, v12, v14, s[0:1] ; D200000B 00021D0C
v_cndmask_b32_e64 v8, v13, v8, s[0:1] ; D2000008 0002110D
v_mul_f32_e32 v12, 0.5, v8 ; 101810F0
v_cndmask_b32_e64 v13, v12, 0, vcc ; D200000D 01A9010C
v_add_f32_e32 v26, v13, v2 ; 0634050D
v_cndmask_b32_e32 v12, 0, v12 ; 00181880
v_add_f32_e32 v27, v12, v3 ; 0636070C
v_mul_f32_e32 v11, v29, v11 ; 1016171D
v_cndmask_b32_e64 v12, v28, 0, vcc ; D200000C 01A9011C
v_cndmask_b32_e32 v13, 0, v19 ; 001A2680
v_mov_b32_e32 v14, 0xbfc00000 ; 7E1C02FF BFC00000
v_mad_f32 v18, v14, v13, v26 ; D2820012 046A1B0E
v_mov_b32_e32 v15, 0x3fc00000 ; 7E1E02FF 3FC00000
v_mac_f32_e32 v26, v15, v13 ; 3E341B0F
v_mad_f32 v28, v14, v12, v27 ; D282001C 046E190E
v_mac_f32_e32 v27, v15, v12 ; 3E36190F
v_add_f32_e32 v14, v13, v13 ; 061C1B0D
v_add_f32_e32 v15, v12, v12 ; 061E190C
v_add_f32_e32 v10, v0, v10 ; 06141500
v_mov_b32_e32 v16, v14 ; 7E20030E
v_mul_f32_e32 v10, 0.5, v10 ; 101414F0
v_mov_b32_e32 v17, v15 ; 7E22030F
v_mov_b32_e32 v19, 0 ; 7E260280
s_mov_b64 s[0:1], 0 ; BE800480
v_mov_b32_e32 v12, v10 ; 7E18030A
v_mov_b32_e32 v13, v10 ; 7E1A030A
v_mov_b32_e32 v24, 0 ; 7E300280
v_mov_b32_e32 v25, 0 ; 7E320280
v_mov_b32_e32 v23, v18 ; 7E2E0312
v_mov_b32_e32 v22, v28 ; 7E2C031C
v_mov_b32_e32 v21, v26 ; 7E2A031A
v_mov_b32_e32 v20, v27 ; 7E28031B
v_cmp_gt_i32_e32 vcc, 8, v19 ; 7D082688
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
s_cbranch_execz BB0_9 ; BF880000
v_cmp_ne_i32_e32 vcc, -1, v25 ; 7D0A32C1
s_and_saveexec_b64 s[4:5], vcc ; BE84246A
s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E
s_cbranch_execz BB0_10 ; BF880000
v_mov_b32_e32 v18, v23 ; 7E240317
v_mov_b32_e32 v26, v14 ; 7E34030E
v_mov_b32_e32 v27, v15 ; 7E36030F
v_mov_b32_e32 v28, v16 ; 7E380310
v_mov_b32_e32 v29, v17 ; 7E3A0311
v_mov_b32_e32 v30, v18 ; 7E3C0312
v_mov_b32_e32 v31, v19 ; 7E3E0313
v_mov_b32_e32 v32, v20 ; 7E400314
v_mov_b32_e32 v33, v21 ; 7E420315
v_mov_b32_e32 v31, v22 ; 7E3E0316
image_sample_d v[26:27], 3, 0, 0, 0, 0, 0, 0, 0, v[26:33], s[16:23], s[12:15] ; F0880300 00641A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v13, v27, v26, 0x3ffb4a80 ; 401A351B 3FFB4A80
s_or_b64 exec, exec, s[4:5] ; 88FE047E
v_cmp_ne_i32_e32 vcc, -1, v24 ; 7D0A30C1
s_and_saveexec_b64 s[4:5], vcc ; BE84246A
s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E
s_cbranch_execz BB0_12 ; BF880000
v_mov_b32_e32 v26, v14 ; 7E34030E
v_mov_b32_e32 v27, v15 ; 7E36030F
v_mov_b32_e32 v28, v16 ; 7E380310
v_mov_b32_e32 v29, v17 ; 7E3A0311
v_mov_b32_e32 v30, v18 ; 7E3C0312
v_mov_b32_e32 v31, v19 ; 7E3E0313
v_mov_b32_e32 v32, v20 ; 7E400314
v_mov_b32_e32 v33, v21 ; 7E420315
v_mov_b32_e32 v30, v21 ; 7E3C0315
v_mov_b32_e32 v31, v20 ; 7E3E0314
image_sample_d v[26:27], 3, 0, 0, 0, 0, 0, 0, 0, v[26:33], s[16:23], s[12:15] ; F0880300 00641A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v27, v26, 0x3ffb4a80 ; 4018351B 3FFB4A80
s_or_b64 exec, exec, s[4:5] ; 88FE047E
v_cmp_eq_i32_e32 vcc, 0, v25 ; 7D043280
v_mov_b32_e32 v25, -1 ; 7E3202C1
s_and_saveexec_b64 s[4:5], vcc ; BE84246A
s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E
v_subrev_f32_e32 v18, v10, v13 ; 0A241B0A
v_cmp_ge_f32_e64 s[24:25], |v18|, v11 ; D00C0118 00021712
v_cndmask_b32_e64 v25, 0, -1, s[24:25] ; D2000019 00618280
s_or_b64 exec, exec, s[4:5] ; 88FE047E
v_cmp_eq_i32_e32 vcc, 0, v24 ; 7D043080
v_mov_b32_e32 v24, -1 ; 7E3002C1
s_and_saveexec_b64 s[4:5], vcc ; BE84246A
s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E
v_subrev_f32_e32 v18, v10, v12 ; 0A24190A
v_cmp_ge_f32_e64 s[24:25], |v18|, v11 ; D00C0118 00021712
v_cndmask_b32_e64 v24, 0, -1, s[24:25] ; D2000018 00618280
s_or_b64 exec, exec, s[4:5] ; 88FE047E
v_and_b32_e32 v18, v24, v25 ; 36243318
v_cmp_eq_i32_e32 vcc, 0, v18 ; 7D042480
s_and_saveexec_b64 s[4:5], vcc ; BE84246A
s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E
s_cbranch_execz BB0_5 ; BF880000
v_cmp_ne_i32_e32 vcc, -1, v25 ; 7D0A32C1
v_subrev_f32_e32 v18, v14, v23 ; 0A242F0E
v_subrev_f32_e32 v26, v15, v22 ; 0A342D0F
v_cndmask_b32_e32 v28, v22, v26 ; 00383516
v_cndmask_b32_e32 v18, v23, v18 ; 00242517
v_cmp_ne_i32_e32 vcc, -1, v24 ; 7D0A30C1
v_add_f32_e32 v26, v14, v21 ; 06342B0E
v_add_f32_e32 v27, v15, v20 ; 0636290F
v_cndmask_b32_e32 v27, v20, v27 ; 00363714
v_cndmask_b32_e32 v26, v21, v26 ; 00343515
v_add_i32_e32 v19, 1, v19 ; 4A262681
s_or_b64 exec, exec, s[4:5] ; 88FE047E
s_or_b64 s[0:1], s[4:5], s[0:1] ; 88800004
s_or_b64 exec, exec, s[2:3] ; 88FE027E
s_or_b64 s[0:1], s[2:3], s[0:1] ; 88800002
s_andn2_b64 exec, exec, s[0:1] ; 8AFE007E
s_cbranch_execnz BB0_4 ; BF890000
s_or_b64 exec, exec, s[0:1] ; 88FE007E
v_cmp_ge_f32_e32 vcc, v7, v9 ; 7C0C1307
v_subrev_f32_e32 v7, v23, v2 ; 0A0E0517
v_subrev_f32_e32 v9, v22, v3 ; 0A120716
v_cndmask_b32_e32 v7, v9, v7 ; 000E0F09
v_subrev_f32_e32 v9, v2, v21 ; 0A122B02
v_subrev_f32_e32 v11, v3, v20 ; 0A162903
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_add_f32_e32 v11, v7, v9 ; 06161307
v_mov_b32_e32 v14, 0x6f800000 ; 7E1C02FF 6F800000
v_cmp_gt_f32_e64 s[0:1], |v11|, v14 ; D0080100 00021D0B
v_mov_b32_e32 v14, 0x2f800000 ; 7E1C02FF 2F800000
v_cndmask_b32_e64 v14, 1.0, v14, s[0:1] ; D200000E 00021CF2
v_cmp_lt_f32_e64 s[0:1], v7, v9 ; D0020000 00021307
v_cndmask_b32_e64 v12, v12, v13, s[0:1] ; D200000C 00021B0C
v_cmp_lt_f32_e64 s[2:3], v0, v10 ; D0020002 00021500
v_cmp_lt_f32_e64 s[4:5], v12, v10 ; D0020004 0002150C
v_mul_f32_e32 v0, v14, v11 ; 1000170E
v_rcp_f32_e32 v0, v0 ; 7E005500
s_xor_b64 s[2:3], s[2:3], s[4:5] ; 89820402
v_cndmask_b32_e64 v8, 0, v8, s[2:3] ; D2000008 000A1080
v_cndmask_b32_e64 v7, v9, v7, s[0:1] ; D2000007 00020F09
v_mul_f32_e32 v0, v0, v14 ; 10001D00
v_mad_f32 v0, -v0, v7, 0.5 ; D2820000 23C20F00
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_cndmask_b32_e64 v7, v0, 0, vcc ; D2000007 01A90100
v_cndmask_b32_e32 v0, 0, v0 ; 00000080
v_add_f32_e32 v7, v7, v2 ; 060E0507
v_add_f32_e32 v8, v0, v3 ; 06100700
v_mov_b32_e32 v9, 0 ; 7E120280
image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900700 00640707
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v0, v1, v6, v7 ; D2820000 041E0D01
v_mad_f32 v2, v1, v5, v8 ; D2820002 04220B01
v_mad_f32 v3, v1, v4, v9 ; D2820003 04260901
v_mad_f32 v4, -v1, v7, v0 ; D2820004 24020F01
v_mad_f32 v5, -v1, v8, v2 ; D2820005 240A1101
v_mad_f32 v6, -v1, v9, v3 ; D2820006 240E1301
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04
v_cvt_pkrtz_f16_f32_e64 v1, v6, 0 ; D25E0001 00010106
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 36
Code Size: 1504 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MAD TEMP[1].xy, IN[2].xyyy, CONST[1].xyyy, CONST[1].zwww
5: MAD TEMP[2].x, TEMP[0].zzzz, CONST[0].zzzz, CONST[0].wwww
6: MOV TEMP[1].z, TEMP[2].xxxx
7: MOV OUT[2], TEMP[1]
8: MOV OUT[0], TEMP[0]
9: MOV OUT[1], IN[1]
10: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = add i32 %5, %7
%38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0
%45 = add i32 %5, %7
%46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45)
%47 = extractelement <4 x float> %46, i32 0
%48 = extractelement <4 x float> %46, i32 1
%49 = extractelement <4 x float> %46, i32 2
%50 = extractelement <4 x float> %46, i32 3
%51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0
%53 = add i32 %5, %7
%54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53)
%55 = extractelement <4 x float> %54, i32 0
%56 = extractelement <4 x float> %54, i32 1
%57 = fmul float %19, %39
%58 = fmul float %20, %39
%59 = fmul float %21, %39
%60 = fmul float %22, %39
%61 = fmul float %23, %40
%62 = fadd float %61, %57
%63 = fmul float %24, %40
%64 = fadd float %63, %58
%65 = fmul float %25, %40
%66 = fadd float %65, %59
%67 = fmul float %26, %40
%68 = fadd float %67, %60
%69 = fmul float %27, %41
%70 = fadd float %69, %62
%71 = fmul float %28, %41
%72 = fadd float %71, %64
%73 = fmul float %29, %41
%74 = fadd float %73, %66
%75 = fmul float %30, %41
%76 = fadd float %75, %68
%77 = fmul float %31, %42
%78 = fadd float %77, %70
%79 = fmul float %32, %42
%80 = fadd float %79, %72
%81 = fmul float %33, %42
%82 = fadd float %81, %74
%83 = fmul float %34, %42
%84 = fadd float %83, %76
%85 = fmul float %55, %15
%86 = fadd float %85, %17
%87 = fmul float %56, %16
%88 = fadd float %87, %18
%89 = fmul float %82, %13
%90 = fadd float %89, %14
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %49, float %50)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float %90, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %78, float %80, float %82, float %84)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s16, s[0:3], 0x7 ; C2080107
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
buffer_load_format_xyzw v[9:12], v0, s[8:11], 0 idxen ; E00C2000 80020900
s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
s_buffer_load_dword s8, s[0:3], 0xc ; C204010C
s_buffer_load_dword s9, s[0:3], 0xd ; C204810D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s16 ; 7E000210
s_buffer_load_dword s10, s[0:3], 0xe ; C205010E
s_buffer_load_dword s11, s[0:3], 0xf ; C205810F
s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110
s_buffer_load_dword s13, s[0:3], 0x2 ; C2068102
s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106
s_buffer_load_dword s15, s[0:3], 0x4 ; C2078104
s_buffer_load_dword s16, s[0:3], 0x5 ; C2080105
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v11, s14 ; 7E16020E
v_mac_f32_e32 v11, s15, v9 ; 3E16120F
v_mac_f32_e32 v0, s16, v10 ; 3E001410
s_buffer_load_dword s14, s[0:3], 0x3 ; C2070103
s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111
s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112
s_buffer_load_dword s17, s[0:3], 0x13 ; C2088113
s_buffer_load_dword s18, s[0:3], 0x14 ; C2090114
s_buffer_load_dword s19, s[0:3], 0x15 ; C2098115
s_buffer_load_dword s20, s[0:3], 0x16 ; C20A0116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
v_mul_f32_e32 v9, s4, v1 ; 10120204
v_mac_f32_e32 v9, s8, v2 ; 3E120408
v_mul_f32_e32 v10, s5, v1 ; 10140205
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_mul_f32_e32 v12, s6, v1 ; 10180206
v_mac_f32_e32 v12, s10, v2 ; 3E18040A
v_mul_f32_e32 v1, s7, v1 ; 10020207
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v9, s12, v3 ; 3E12060C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v10, s15, v3 ; 3E14060F
v_mac_f32_e32 v12, s16, v3 ; 3E180610
v_mac_f32_e32 v1, s17, v3 ; 3E020611
v_mac_f32_e32 v9, s18, v4 ; 3E120812
v_mac_f32_e32 v10, s19, v4 ; 3E140813
v_mac_f32_e32 v12, s20, v4 ; 3E180814
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
v_mov_b32_e32 v2, s14 ; 7E04020E
v_mac_f32_e32 v2, s13, v12 ; 3E04180D
v_mov_b32_e32 v3, 0 ; 7E060280
exp 15, 33, 0, 0, 0, v11, v0, v2, v3 ; F800021F 0302000B
exp 15, 12, 0, 1, 0, v9, v10, v12, v1 ; F80008CF 010C0A09
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 268 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 4
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[1]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 2.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], IMM[0].xxxx, IN[0]
1: MOV TEMP[1].xy, IN[1].xyyy
2: TEX TEMP[1], TEMP[1], SAMP[0], 2D
3: MUL TEMP[1], CONST[1], TEMP[1]
4: MUL TEMP[0], TEMP[0], TEMP[1]
5: MOV TEMP[1].w, TEMP[0].wwww
6: MOV_SAT TEMP[2].x, IN[1].zzzz
7: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xxxx
8: MOV OUT[0], TEMP[1]
9: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0
%30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0
%32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%34 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%39 = fmul float %32, 2.000000e+00
%40 = fmul float %33, 2.000000e+00
%41 = fmul float %34, 2.000000e+00
%42 = fmul float %35, 2.000000e+00
%43 = bitcast float %36 to i32
%44 = bitcast float %37 to i32
%45 = insertelement <2 x i32> undef, i32 %43, i32 0
%46 = insertelement <2 x i32> %45, i32 %44, i32 1
%47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %29, <16 x i8> %31, i32 2)
%48 = extractelement <4 x float> %47, i32 0
%49 = extractelement <4 x float> %47, i32 1
%50 = extractelement <4 x float> %47, i32 2
%51 = extractelement <4 x float> %47, i32 3
%52 = fmul float %24, %48
%53 = fmul float %25, %49
%54 = fmul float %26, %50
%55 = fmul float %27, %51
%56 = fmul float %39, %52
%57 = fmul float %40, %53
%58 = fmul float %41, %54
%59 = fmul float %42, %55
%60 = call float @llvm.AMDIL.clamp.(float %38, float 0.000000e+00, float 1.000000e+00)
%61 = fmul float %56, %60
%62 = fmul float %57, %60
%63 = fmul float %58, %60
%64 = fcmp ugt float %59, %4
%65 = select i1 %64, float 1.000000e+00, float -1.000000e+00
call void @llvm.AMDGPU.kill(float %65)
%66 = call i32 @llvm.SI.packf16(float %61, float %62)
%67 = bitcast i32 %66 to float
%68 = call i32 @llvm.SI.packf16(float %63, float %59)
%69 = bitcast i32 %68 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %67, float %69, float %67, float %69)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640606
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v1, s4, v6 ; 10020C04
v_mul_f32_e32 v6, s5, v7 ; 100C0E05
v_mul_f32_e32 v7, s9, v8 ; 100E1009
v_mul_f32_e32 v8, s0, v9 ; 10101200
v_add_f32_e32 v2, v2, v2 ; 06040502
v_mul_f32_e32 v1, v1, v2 ; 10020501
v_add_f32_e32 v2, v3, v3 ; 06040703
v_mul_f32_e32 v2, v6, v2 ; 10040506
v_add_f32_e32 v3, v4, v4 ; 06060904
v_mul_f32_e32 v3, v7, v3 ; 10060707
v_add_f32_e32 v4, v5, v5 ; 06080B05
v_mul_f32_e32 v4, v8, v4 ; 10080908
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_mul_f32_e32 v1, v0, v1 ; 10020300
v_mul_f32_e32 v2, v0, v2 ; 10040500
v_mul_f32_e32 v0, v0, v3 ; 10000700
v_cmp_nge_f32_e32 vcc, s8, v4 ; 7C120808
v_cndmask_b32_e64 v3, -1.0, 1.0, vcc ; D2000003 01A9E4F3
v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e32 v0, v0, v4 ; 5E000900
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 12
Code Size: 216 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..4]
DCL TEMP[0..2], LOCAL
0: MUL TEMP[0], CONST[1], IN[0].xxxx
1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[2].xyxx
5: MAD TEMP[2].xy, IN[0].xyyy, CONST[0].xyyy, CONST[0].zwww
6: MOV TEMP[1].zw, TEMP[2].yyxy
7: MOV OUT[2], TEMP[1]
8: MOV OUT[0], TEMP[0]
9: MOV OUT[1], IN[1]
10: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0
%35 = add i32 %5, %7
%36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35)
%37 = extractelement <4 x float> %36, i32 0
%38 = extractelement <4 x float> %36, i32 1
%39 = extractelement <4 x float> %36, i32 2
%40 = extractelement <4 x float> %36, i32 3
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = extractelement <4 x float> %44, i32 2
%48 = extractelement <4 x float> %44, i32 3
%49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0
%51 = add i32 %5, %7
%52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %51)
%53 = extractelement <4 x float> %52, i32 0
%54 = extractelement <4 x float> %52, i32 1
%55 = fmul float %17, %37
%56 = fmul float %18, %37
%57 = fmul float %19, %37
%58 = fmul float %20, %37
%59 = fmul float %21, %38
%60 = fadd float %59, %55
%61 = fmul float %22, %38
%62 = fadd float %61, %56
%63 = fmul float %23, %38
%64 = fadd float %63, %57
%65 = fmul float %24, %38
%66 = fadd float %65, %58
%67 = fmul float %25, %39
%68 = fadd float %67, %60
%69 = fmul float %26, %39
%70 = fadd float %69, %62
%71 = fmul float %27, %39
%72 = fadd float %71, %64
%73 = fmul float %28, %39
%74 = fadd float %73, %66
%75 = fmul float %29, %40
%76 = fadd float %75, %68
%77 = fmul float %30, %40
%78 = fadd float %77, %70
%79 = fmul float %31, %40
%80 = fadd float %79, %72
%81 = fmul float %32, %40
%82 = fadd float %81, %74
%83 = fmul float %37, %13
%84 = fadd float %83, %15
%85 = fmul float %38, %14
%86 = fadd float %85, %16
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %47, float %48)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %53, float %54, float %84, float %86)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %76, float %78, float %80, float %82)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s4 ; 7E000204
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v5, s5 ; 7E0A0205
v_mac_f32_e32 v0, s6, v1 ; 3E000206
v_mac_f32_e32 v5, s7, v2 ; 3E0A0407
exp 15, 33, 0, 0, 0, v9, v10, v0, v5 ; F800021F 05000A09
s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105
s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106
s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107
s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108
s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109
s_buffer_load_dword s10, s[0:3], 0xa ; C205010A
s_buffer_load_dword s11, s[0:3], 0xb ; C205810B
s_buffer_load_dword s12, s[0:3], 0xc ; C206010C
s_buffer_load_dword s13, s[0:3], 0xd ; C206810D
s_buffer_load_dword s14, s[0:3], 0xe ; C207010E
s_buffer_load_dword s15, s[0:3], 0xf ; C207810F
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s8, v1 ; 10000208
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s7, v2 ; 3E000407
v_mul_f32_e32 v5, s4, v1 ; 100A0204
v_mac_f32_e32 v5, s9, v2 ; 3E0A0409
v_mul_f32_e32 v6, s5, v1 ; 100C0205
v_mac_f32_e32 v6, s10, v2 ; 3E0C040A
v_mul_f32_e32 v1, s6, v1 ; 10020206
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v5, s13, v3 ; 3E0A060D
v_mac_f32_e32 v6, s14, v3 ; 3E0C060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v5, s17, v4 ; 3E0A0811
v_mac_f32_e32 v6, s18, v4 ; 3E0C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 256 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: ABS TEMP[0].xy, IN[1].zwww
1: MAX TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy
2: FSLT TEMP[0].x, IMM[0].xxxx, TEMP[0].xxxx
3: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
4: KILL_IF -TEMP[0].xxxx
5: MOV TEMP[0].xy, IN[1].xyyy
6: TEX TEMP[0], TEMP[0], SAMP[0], 2D
7: MUL TEMP[0], TEMP[0], IN[0]
8: MOV OUT[0], TEMP[0]
9: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%34 = call float @llvm.fabs.f32(float %32)
%35 = call float @llvm.fabs.f32(float %33)
%36 = call float @llvm.maxnum.f32(float %34, float %35)
%37 = fcmp ogt float %36, 1.000000e+00
%38 = select i1 %37, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %38)
%39 = bitcast float %30 to i32
%40 = bitcast float %31 to i32
%41 = insertelement <2 x i32> undef, i32 %39, i32 0
%42 = insertelement <2 x i32> %41, i32 %40, i32 1
%43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %23, <16 x i8> %25, i32 2)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = extractelement <4 x float> %43, i32 2
%47 = extractelement <4 x float> %43, i32 3
%48 = fmul float %44, %26
%49 = fmul float %45, %27
%50 = fmul float %46, %28
%51 = fmul float %47, %29
%52 = call i32 @llvm.SI.packf16(float %48, float %49)
%53 = bitcast i32 %52 to float
%54 = call i32 @llvm.SI.packf16(float %50, float %51)
%55 = bitcast i32 %54 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %53, float %55, float %53, float %55)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700
v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701
v_max_f32_e64 v0, |v8|, |v0| ; D2200300 00020108
v_cmp_lt_f32_e32 vcc, 1.0, v0 ; 7C0200F2
v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800F00 00010606
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v0, v2, v6 ; 10000D02
v_mul_f32_e32 v1, v3, v7 ; 10020F03
v_mul_f32_e32 v2, v4, v8 ; 10041104
v_mul_f32_e32 v3, v5, v9 ; 10061305
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 12
Code Size: 156 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0]
DCL CONST[4..12]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 1.0000, 0.0156, 0.0500, 0.0800}
IMM[1] FLT32 { 0.5000, 2.0000, -1.0000, 4.0000}
0: MUL TEMP[0].x, IN[0].xxxx, CONST[4].xxxx
1: FSLT TEMP[1].x, TEMP[0].xxxx, CONST[5].xxxx
2: UIF TEMP[1].xxxx :0
3: RCP TEMP[1].x, CONST[4].zzzz
4: MUL TEMP[1].x, TEMP[0].xxxx, TEMP[1].xxxx
5: ELSE :0
6: ADD TEMP[2].x, CONST[4].xxxx, -TEMP[0].xxxx
7: FSLT TEMP[2].x, TEMP[2].xxxx, CONST[5].zzzz
8: UIF TEMP[2].xxxx :0
9: ADD TEMP[2].x, CONST[4].xxxx, -TEMP[0].xxxx
10: RCP TEMP[3].x, CONST[4].zzzz
11: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx
12: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[2].xxxx
13: ELSE :0
14: MOV TEMP[2].x, -CONST[5].xxxx
15: MOV TEMP[3].x, -CONST[5].zzzz
16: ADD TEMP[4].x, TEMP[0].xxxx, TEMP[2].xxxx
17: ADD TEMP[5].x, CONST[4].xxxx, TEMP[2].xxxx
18: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx
19: RCP TEMP[5].x, TEMP[5].xxxx
20: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
21: ADD TEMP[2].x, CONST[4].zzzz, TEMP[2].xxxx
22: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx
23: MAD TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx, CONST[5].xxxx
24: RCP TEMP[3].x, CONST[4].zzzz
25: MUL TEMP[1].x, TEMP[2].xxxx, TEMP[3].xxxx
26: ENDIF
27: ENDIF
28: MOV TEMP[0].x, TEMP[1].xxxx
29: MUL TEMP[1].x, IN[0].yyyy, CONST[4].yyyy
30: FSLT TEMP[2].x, TEMP[1].xxxx, CONST[5].wwww
31: UIF TEMP[2].xxxx :0
32: RCP TEMP[2].x, CONST[4].wwww
33: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx
34: ELSE :0
35: ADD TEMP[3].x, CONST[4].yyyy, -TEMP[1].xxxx
36: FSLT TEMP[3].x, TEMP[3].xxxx, CONST[5].yyyy
37: UIF TEMP[3].xxxx :0
38: ADD TEMP[3].x, CONST[4].yyyy, -TEMP[1].xxxx
39: RCP TEMP[4].x, CONST[4].wwww
40: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
41: ADD TEMP[2].x, IMM[0].xxxx, -TEMP[3].xxxx
42: ELSE :0
43: MOV TEMP[3].x, -CONST[5].wwww
44: MOV TEMP[4].x, -CONST[5].yyyy
45: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx
46: ADD TEMP[5].x, CONST[4].yyyy, TEMP[3].xxxx
47: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
48: RCP TEMP[5].x, TEMP[5].xxxx
49: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx
50: ADD TEMP[3].x, CONST[4].wwww, TEMP[3].xxxx
51: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
52: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx, CONST[5].wwww
53: RCP TEMP[3].x, CONST[4].wwww
54: MUL TEMP[2].x, TEMP[1].xxxx, TEMP[3].xxxx
55: ENDIF
56: ENDIF
57: MOV TEMP[0].y, TEMP[2].xxxx
58: MOV TEMP[0].xy, TEMP[0].xyyy
59: TEX TEMP[0].x, TEMP[0], SAMP[1], 2D
60: ADD TEMP[1].xy, IN[0].xyyy, CONST[6].xxxx
61: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[4].xyyy
62: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy
63: MOV TEMP[1].xy, TEMP[1].xyyy
64: TEX TEMP[1].xy, TEMP[1], SAMP[2], 2D
65: MAD TEMP[1].xy, TEMP[1].xyyy, CONST[12].xxxx, IN[0].xyyy
66: MUL TEMP[2].x, CONST[6].xxxx, IMM[0].zzzz
67: ADD TEMP[2].x, TEMP[1].xxxx, -TEMP[2].xxxx
68: MAD TEMP[1].x, CONST[6].xxxx, IMM[0].wwww, TEMP[1].yyyy
69: MOV TEMP[2].y, TEMP[1].xxxx
70: MUL TEMP[1].xy, TEMP[2].xyyy, IMM[1].xxxx
71: FRC TEMP[1].xy, TEMP[1].xyyy
72: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[1].yyyy, IMM[1].zzzz
73: ABS TEMP[1].xy, TEMP[1].xyyy
74: LRP TEMP[2], TEMP[1].xxxx, CONST[8], CONST[7]
75: LRP TEMP[3], TEMP[1].xxxx, CONST[10], CONST[9]
76: LRP TEMP[2], TEMP[1].yyyy, TEMP[3], TEMP[2]
77: MOV TEMP[1].xy, TEMP[1].xyyy
78: TEX TEMP[1], TEMP[1], SAMP[0], 2D
79: MUL TEMP[1], TEMP[2], TEMP[1]
80: MUL TEMP[2].x, TEMP[0].xxxx, CONST[11].xxxx
81: ADD TEMP[3].x, IN[0].xxxx, IN[0].yyyy
82: MAD TEMP[3].x, CONST[0].xxxx, CONST[12].wwww, TEMP[3].xxxx
83: MUL TEMP[3].x, TEMP[3].xxxx, CONST[12].zzzz
84: SIN TEMP[3].x, TEMP[3].xxxx
85: ADD TEMP[3].x, TEMP[3].xxxx, IMM[1].yyyy
86: ADD TEMP[0].x, TEMP[0].xxxx, -CONST[12].yyyy
87: MOV_SAT TEMP[0].x, TEMP[0].xxxx
88: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].wwww
89: LRP TEMP[0].x, TEMP[0].xxxx, TEMP[3].xxxx, IMM[0].xxxx
90: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx
91: MUL TEMP[0], TEMP[1], TEMP[0].xxxx
92: MOV OUT[0], TEMP[0]
93: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204)
%55 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0
%57 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0
%59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%60 = bitcast <8 x i32> addrspace(2)* %59 to <32 x i8> addrspace(2)*
%61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0
%62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%63 = bitcast <4 x i32> addrspace(2)* %62 to <16 x i8> addrspace(2)*
%64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0
%65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)*
%67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0
%68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)*
%70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0
%71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%73 = fmul float %71, %25
%74 = fcmp olt float %73, %29
br i1 %74, label %IF, label %ELSE
IF: ; preds = %main_body
%75 = fdiv float 1.000000e+00, %27
%76 = fmul float %73, %75
br label %ENDIF
ELSE: ; preds = %main_body
%77 = fsub float %25, %73
%78 = fcmp olt float %77, %31
br i1 %78, label %IF25, label %ELSE26
ENDIF: ; preds = %IF25, %ELSE26, %IF
%temp4.0 = phi float [ %76, %IF ], [ %84, %IF25 ], [ %95, %ELSE26 ]
%79 = fmul float %72, %26
%80 = fcmp olt float %79, %32
br i1 %80, label %IF28, label %ELSE29
IF25: ; preds = %ELSE
%81 = fsub float %25, %73
%82 = fdiv float 1.000000e+00, %27
%83 = fmul float %81, %82
%84 = fsub float 1.000000e+00, %83
br label %ENDIF
ELSE26: ; preds = %ELSE
%85 = fsub float %73, %29
%86 = fsub float %25, %29
%87 = fsub float %86, %31
%88 = fdiv float 1.000000e+00, %87
%89 = fmul float %85, %88
%90 = fsub float %27, %29
%91 = fsub float %90, %31
%92 = fmul float %89, %91
%93 = fadd float %92, %29
%94 = fdiv float 1.000000e+00, %27
%95 = fmul float %93, %94
br label %ENDIF
IF28: ; preds = %ENDIF
%96 = fdiv float 1.000000e+00, %28
%97 = fmul float %79, %96
br label %ENDIF27
ELSE29: ; preds = %ENDIF
%98 = fsub float %26, %79
%99 = fcmp olt float %98, %30
br i1 %99, label %IF31, label %ELSE32
ENDIF27: ; preds = %IF31, %ELSE32, %IF28
%temp8.0 = phi float [ %97, %IF28 ], [ %187, %IF31 ], [ %198, %ELSE32 ]
%100 = bitcast float %temp4.0 to i32
%101 = bitcast float %temp8.0 to i32
%102 = insertelement <2 x i32> undef, i32 %100, i32 0
%103 = insertelement <2 x i32> %102, i32 %101, i32 1
%104 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %103, <32 x i8> %61, <16 x i8> %64, i32 2)
%105 = extractelement <4 x float> %104, i32 0
%106 = fadd float %71, %33
%107 = fadd float %72, %33
%108 = fmul float %106, %25
%109 = fmul float %107, %26
%110 = fmul float %108, 1.562500e-02
%111 = fmul float %109, 1.562500e-02
%112 = bitcast float %110 to i32
%113 = bitcast float %111 to i32
%114 = insertelement <2 x i32> undef, i32 %112, i32 0
%115 = insertelement <2 x i32> %114, i32 %113, i32 1
%116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %115, <32 x i8> %67, <16 x i8> %70, i32 2)
%117 = extractelement <4 x float> %116, i32 0
%118 = extractelement <4 x float> %116, i32 1
%119 = fmul float %117, %51
%120 = fadd float %119, %71
%121 = fmul float %118, %51
%122 = fadd float %121, %72
%123 = fmul float %33, 0x3FA99999A0000000
%124 = fsub float %120, %123
%125 = fmul float %33, 0x3FB47AE140000000
%126 = fadd float %125, %122
%127 = fmul float %124, 5.000000e-01
%128 = fmul float %126, 5.000000e-01
%129 = call float @llvm.floor.f32(float %127)
%130 = fsub float %127, %129
%131 = call float @llvm.floor.f32(float %128)
%132 = fsub float %128, %131
%133 = fmul float %130, 2.000000e+00
%134 = fadd float %133, -1.000000e+00
%135 = fmul float %132, 2.000000e+00
%136 = fadd float %135, -1.000000e+00
%137 = call float @llvm.fabs.f32(float %134)
%138 = call float @llvm.fabs.f32(float %136)
%139 = call float @llvm.AMDGPU.lrp(float %137, float %38, float %34)
%140 = call float @llvm.AMDGPU.lrp(float %137, float %39, float %35)
%141 = call float @llvm.AMDGPU.lrp(float %137, float %40, float %36)
%142 = call float @llvm.AMDGPU.lrp(float %137, float %41, float %37)
%143 = call float @llvm.AMDGPU.lrp(float %137, float %46, float %42)
%144 = call float @llvm.AMDGPU.lrp(float %137, float %47, float %43)
%145 = call float @llvm.AMDGPU.lrp(float %137, float %48, float %44)
%146 = call float @llvm.AMDGPU.lrp(float %137, float %49, float %45)
%147 = call float @llvm.AMDGPU.lrp(float %138, float %143, float %139)
%148 = call float @llvm.AMDGPU.lrp(float %138, float %144, float %140)
%149 = call float @llvm.AMDGPU.lrp(float %138, float %145, float %141)
%150 = call float @llvm.AMDGPU.lrp(float %138, float %146, float %142)
%151 = bitcast float %137 to i32
%152 = bitcast float %138 to i32
%153 = insertelement <2 x i32> undef, i32 %151, i32 0
%154 = insertelement <2 x i32> %153, i32 %152, i32 1
%155 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %154, <32 x i8> %56, <16 x i8> %58, i32 2)
%156 = extractelement <4 x float> %155, i32 0
%157 = extractelement <4 x float> %155, i32 1
%158 = extractelement <4 x float> %155, i32 2
%159 = extractelement <4 x float> %155, i32 3
%160 = fmul float %147, %156
%161 = fmul float %148, %157
%162 = fmul float %149, %158
%163 = fmul float %150, %159
%164 = fmul float %105, %50
%165 = fadd float %71, %72
%166 = fmul float %24, %54
%167 = fadd float %166, %165
%168 = fmul float %167, %53
%169 = call float @llvm.sin.f32(float %168)
%170 = fadd float %169, 2.000000e+00
%171 = fsub float %105, %52
%172 = call float @llvm.AMDIL.clamp.(float %171, float 0.000000e+00, float 1.000000e+00)
%173 = fmul float %172, 4.000000e+00
%174 = call float @llvm.AMDGPU.lrp(float %173, float %170, float 1.000000e+00)
%175 = fmul float %164, %174
%176 = fmul float %160, %175
%177 = fmul float %161, %175
%178 = fmul float %162, %175
%179 = fmul float %163, %175
%180 = call i32 @llvm.SI.packf16(float %176, float %177)
%181 = bitcast i32 %180 to float
%182 = call i32 @llvm.SI.packf16(float %178, float %179)
%183 = bitcast i32 %182 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %181, float %183, float %181, float %183)
ret void
IF31: ; preds = %ELSE29
%184 = fsub float %26, %79
%185 = fdiv float 1.000000e+00, %28
%186 = fmul float %184, %185
%187 = fsub float 1.000000e+00, %186
br label %ENDIF27
ELSE32: ; preds = %ELSE29
%188 = fsub float %79, %32
%189 = fsub float %26, %32
%190 = fsub float %189, %30
%191 = fdiv float 1.000000e+00, %190
%192 = fmul float %188, %191
%193 = fsub float %28, %32
%194 = fsub float %193, %30
%195 = fmul float %192, %194
%196 = fadd float %195, %32
%197 = fdiv float 1.000000e+00, %28
%198 = fmul float %196, %197
br label %ENDIF27
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sin.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[12:15], 0x10 ; C2000D10
s_buffer_load_dword s1, s[12:15], 0x14 ; C2008D14
s_buffer_load_dword s2, s[12:15], 0x12 ; C2010D12
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100
v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s0, v2 ; 10020400
v_cmp_ngt_f32_e32 vcc, s1, v1 ; 7C160201
s_and_saveexec_b64 s[8:9], vcc ; BE88246A
s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s3, s[12:15], 0x16 ; C2018D16
v_sub_f32_e32 v3, s0, v1 ; 08060200
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_ngt_f32_e32 vcc, s3, v3 ; 7C160603
s_and_saveexec_b64 s[10:11], vcc ; BE8A246A
s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E
s_cbranch_execz BB0_6 ; BF880000
v_mov_b32_e32 v3, s1 ; 7E060201
v_sub_f32_e32 v4, s0, v3 ; 08080600
v_subrev_f32_e32 v4, s3, v4 ; 0A080803
v_rcp_f32_e32 v4, v4 ; 7E085504
v_subrev_f32_e32 v5, s1, v1 ; 0A0A0201
v_rcp_f32_e32 v6, s2 ; 7E0C5402
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_sub_f32_e32 v5, s2, v3 ; 080A0602
v_subrev_f32_e32 v5, s3, v5 ; 0A0A0A03
v_mad_f32 v3, v5, v4, v3 ; D2820003 040E0905
v_mul_f32_e32 v3, v6, v3 ; 10060706
s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A
s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E
v_rcp_f32_e32 v3, s2 ; 7E065402
v_sub_f32_e32 v4, s0, v1 ; 08080200
v_mad_f32 v3, -v4, v3, 1.0 ; D2820003 23CA0704
s_or_b64 exec, exec, s[10:11] ; 88FE0A7E
s_or_saveexec_b64 s[8:9], s[8:9] ; BE882508
s_buffer_load_dword s16, s[12:15], 0x11 ; C2080D11
s_buffer_load_dword s25, s[12:15], 0x17 ; C20C8D17
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[8:9] ; 89FE087E
v_rcp_f32_e32 v3, s2 ; 7E065402
v_mul_f32_e32 v3, v3, v1 ; 10060303
s_or_b64 exec, exec, s[8:9] ; 88FE087E
s_buffer_load_dword s30, s[12:15], 0x13 ; C20F0D13
s_buffer_load_dword s31, s[12:15], 0x33 ; C20F8D33
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_cmp_ngt_f32_e32 vcc, s25, v4 ; 7C160819
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[32:33], vcc ; BEA0246A
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_9 ; BF880000
s_buffer_load_dword s1, s[12:15], 0x15 ; C2008D15
v_sub_f32_e32 v1, s16, v4 ; 08020810
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_ngt_f32_e32 vcc, s1, v1 ; 7C160201
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
s_cbranch_execz BB0_14 ; BF880000
v_mov_b32_e32 v1, s25 ; 7E020219
v_sub_f32_e32 v5, s16, v1 ; 080A0210
v_subrev_f32_e32 v5, s1, v5 ; 0A0A0A01
v_rcp_f32_e32 v5, v5 ; 7E0A5505
v_subrev_f32_e32 v6, s25, v4 ; 0A0C0819
v_rcp_f32_e32 v7, s30 ; 7E0E541E
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_sub_f32_e32 v6, s30, v1 ; 080C021E
v_subrev_f32_e32 v6, s1, v6 ; 0A0C0C01
v_mad_f32 v1, v6, v5, v1 ; D2820001 04060B06
v_mul_f32_e32 v5, v7, v1 ; 100A0307
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_rcp_f32_e32 v1, s30 ; 7E02541E
v_sub_f32_e32 v5, s16, v4 ; 080A0810
v_mad_f32 v5, -v5, v1, 1.0 ; D2820005 23CA0305
s_or_b64 exec, exec, s[2:3] ; 88FE027E
s_or_saveexec_b64 s[32:33], s[32:33] ; BEA02520
s_buffer_load_dword s1, s[12:15], 0x0 ; C2008D00
s_buffer_load_dword s34, s[12:15], 0x18 ; C2110D18
s_buffer_load_dword s8, s[12:15], 0x1c ; C2040D1C
s_buffer_load_dword s9, s[12:15], 0x1d ; C2048D1D
s_buffer_load_dword s10, s[12:15], 0x1e ; C2050D1E
s_buffer_load_dword s11, s[12:15], 0x1f ; C2058D1F
s_buffer_load_dword s17, s[12:15], 0x20 ; C2088D20
s_buffer_load_dword s18, s[12:15], 0x21 ; C2090D21
s_buffer_load_dword s19, s[12:15], 0x22 ; C2098D22
s_buffer_load_dword s20, s[12:15], 0x23 ; C20A0D23
s_buffer_load_dword s21, s[12:15], 0x24 ; C20A8D24
s_buffer_load_dword s22, s[12:15], 0x25 ; C20B0D25
s_buffer_load_dword s23, s[12:15], 0x26 ; C20B8D26
s_buffer_load_dword s24, s[12:15], 0x27 ; C20C0D27
s_buffer_load_dword s26, s[12:15], 0x28 ; C20D0D28
s_buffer_load_dword s27, s[12:15], 0x29 ; C20D8D29
s_buffer_load_dword s28, s[12:15], 0x2a ; C20E0D2A
s_buffer_load_dword s29, s[12:15], 0x2b ; C20E8D2B
s_buffer_load_dword s3, s[12:15], 0x2c ; C2018D2C
s_buffer_load_dword s35, s[12:15], 0x30 ; C2118D30
s_buffer_load_dword s2, s[12:15], 0x31 ; C2010D31
s_buffer_load_dword s12, s[12:15], 0x32 ; C2060D32
v_mov_b32_e32 v1, s31 ; 7E02021F
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[32:33] ; 89FE207E
v_rcp_f32_e32 v5, s30 ; 7E0A541E
v_mul_f32_e32 v5, v5, v4 ; 100A0905
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_mov_b32_e32 v4, v5 ; 7E080305
v_add_f32_e32 v5, s34, v2 ; 060A0422
v_mul_f32_e32 v5, s0, v5 ; 100A0A00
v_add_f32_e32 v6, s34, v0 ; 060C0022
v_mul_f32_e32 v6, s16, v6 ; 100C0C10
v_mov_b32_e32 v7, 0x3c800000 ; 7E0E02FF 3C800000
v_mul_f32_e32 v8, v7, v5 ; 10100B07
v_mul_f32_e32 v9, v7, v6 ; 10120D07
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508
s_load_dwordx8 s[44:51], s[6:7], 0x8 ; C0D60708
s_load_dwordx8 s[52:59], s[6:7], 0x10 ; C0DA0710
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[44:51], s[36:39] ; F0800100 012B0303
image_sample v[4:5], 3, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[52:59], s[40:43] ; F0800300 014D0408
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v4, s35, v4, v2 ; D2820004 040A0823
v_mad_f32 v5, s35, v5, v0 ; D2820005 04020A23
v_mov_b32_e32 v6, 0xbd4ccccd ; 7E0C02FF BD4CCCCD
v_mac_f32_e32 v4, s34, v6 ; 3E080C22
v_mov_b32_e32 v6, 0x3da3d70a ; 7E0C02FF 3DA3D70A
v_mac_f32_e32 v5, s34, v6 ; 3E0A0C22
v_mul_f32_e32 v6, 0.5, v4 ; 100C08F0
v_floor_f32_e32 v6, v6 ; 7E0C4906
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_mad_f32 v4, 0.5, v4, -v6 ; D2820004 841A08F0
v_mul_f32_e32 v6, 0.5, v5 ; 100C0AF0
v_floor_f32_e32 v6, v6 ; 7E0C4906
v_mad_f32 v5, 0.5, v5, -v6 ; D2820005 841A0AF0
v_mad_f32 v4, 2.0, v4, -1.0 ; D2820004 03CE08F4
v_mad_f32 v5, 2.0, v5, -1.0 ; D2820005 03CE0AF4
v_mov_b32_e32 v6, 0x7fffffff ; 7E0C02FF 7FFFFFFF
v_and_b32_e32 v7, v4, v6 ; 360E0D04
v_and_b32_e32 v8, v5, v6 ; 36100D05
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[36:43], s[32:35] ; F0800F00 01090607
v_sub_f32_e64 v10, 1.0, |v4| ; D208020A 000208F2
v_mul_f32_e32 v11, s8, v10 ; 10161408
v_mad_f32 v11, |v4|, s17, v11 ; D282010B 042C2304
v_mul_f32_e32 v12, s9, v10 ; 10181409
v_mad_f32 v12, |v4|, s18, v12 ; D282010C 04302504
v_mul_f32_e32 v13, s10, v10 ; 101A140A
v_mad_f32 v13, |v4|, s19, v13 ; D282010D 04342704
v_mul_f32_e32 v14, s11, v10 ; 101C140B
v_mad_f32 v14, |v4|, s20, v14 ; D282010E 04382904
v_mul_f32_e32 v15, s21, v10 ; 101E1415
v_mad_f32 v15, |v4|, s26, v15 ; D282010F 043C3504
v_mul_f32_e32 v16, s22, v10 ; 10201416
v_mad_f32 v16, |v4|, s27, v16 ; D2820110 04403704
v_mul_f32_e32 v17, s23, v10 ; 10221417
v_mad_f32 v17, |v4|, s28, v17 ; D2820111 04443904
v_mul_f32_e32 v10, s24, v10 ; 10141418
v_mad_f32 v4, |v4|, s29, v10 ; D2820104 04283B04
v_sub_f32_e64 v10, 1.0, |v5| ; D208020A 00020AF2
v_mul_f32_e32 v11, v11, v10 ; 1016150B
v_mad_f32 v11, |v5|, v15, v11 ; D282010B 042E1F05
v_mul_f32_e32 v12, v12, v10 ; 1018150C
v_mad_f32 v12, |v5|, v16, v12 ; D282010C 04322105
v_mul_f32_e32 v13, v13, v10 ; 101A150D
v_mad_f32 v13, |v5|, v17, v13 ; D282010D 04362305
v_mul_f32_e32 v10, v14, v10 ; 1014150E
v_mad_f32 v4, |v5|, v4, v10 ; D2820104 042A0905
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v5, v6, v11 ; 100A1706
v_mul_f32_e32 v6, v7, v12 ; 100C1907
v_mul_f32_e32 v7, v8, v13 ; 100E1B08
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v8, s3, v3 ; 10100603
v_add_f32_e32 v0, v0, v2 ; 06000500
v_mac_f32_e32 v0, s1, v1 ; 3E000201
v_mul_f32_e32 v0, s12, v0 ; 1000000C
v_mul_f32_e32 v0, 0x3e22f983, v0 ; 100000FF 3E22F983
v_fract_f32_e32 v0, v0 ; 7E004100
v_sin_f32_e32 v0, v0 ; 7E006B00
v_add_f32_e32 v0, 2.0, v0 ; 060000F4
v_subrev_f32_e32 v1, s2, v3 ; 0A020602
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v2, 4.0, v1 ; 100402F6
v_mad_f32 v1, 4.0, -v1, 1.0 ; D2820001 43CA02F6
v_mac_f32_e32 v1, v0, v2 ; 3E020500
v_mul_f32_e32 v0, v1, v8 ; 10001101
v_mul_f32_e32 v1, v0, v5 ; 10020B00
v_mul_f32_e32 v2, v0, v6 ; 10040D00
v_mul_f32_e32 v3, v0, v7 ; 10060F00
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 20
Code Size: 940 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MAD TEMP[1].xy, IN[1].xyyy, CONST[1].xyyy, CONST[1].zwww
5: MAD TEMP[2].x, TEMP[0].zzzz, CONST[0].zzzz, CONST[0].wwww
6: MOV TEMP[1].z, TEMP[2].xxxx
7: MOV OUT[1], TEMP[1]
8: MOV OUT[0], TEMP[0]
9: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = add i32 %5, %7
%38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0
%45 = add i32 %5, %7
%46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45)
%47 = extractelement <4 x float> %46, i32 0
%48 = extractelement <4 x float> %46, i32 1
%49 = fmul float %19, %39
%50 = fmul float %20, %39
%51 = fmul float %21, %39
%52 = fmul float %22, %39
%53 = fmul float %23, %40
%54 = fadd float %53, %49
%55 = fmul float %24, %40
%56 = fadd float %55, %50
%57 = fmul float %25, %40
%58 = fadd float %57, %51
%59 = fmul float %26, %40
%60 = fadd float %59, %52
%61 = fmul float %27, %41
%62 = fadd float %61, %54
%63 = fmul float %28, %41
%64 = fadd float %63, %56
%65 = fmul float %29, %41
%66 = fadd float %65, %58
%67 = fmul float %30, %41
%68 = fadd float %67, %60
%69 = fmul float %31, %42
%70 = fadd float %69, %62
%71 = fmul float %32, %42
%72 = fadd float %71, %64
%73 = fmul float %33, %42
%74 = fadd float %73, %66
%75 = fmul float %34, %42
%76 = fadd float %75, %68
%77 = fmul float %47, %15
%78 = fadd float %77, %17
%79 = fmul float %48, %16
%80 = fadd float %79, %18
%81 = fmul float %74, %13
%82 = fadd float %81, %14
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %78, float %80, float %82, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x7 ; C2060107
s_buffer_load_dword s13, s[0:3], 0x8 ; C2068108
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109
s_buffer_load_dword s5, s[0:3], 0xa ; C202810A
s_buffer_load_dword s6, s[0:3], 0xb ; C203010B
s_buffer_load_dword s7, s[0:3], 0xc ; C203810C
s_buffer_load_dword s8, s[0:3], 0xd ; C204010D
s_buffer_load_dword s9, s[0:3], 0xe ; C204810E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s12 ; 7E00020C
s_buffer_load_dword s10, s[0:3], 0xf ; C205010F
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s12, s[0:3], 0x6 ; C2060106
s_buffer_load_dword s14, s[0:3], 0x4 ; C2070104
s_buffer_load_dword s15, s[0:3], 0x5 ; C2078105
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v7, s12 ; 7E0E020C
v_mac_f32_e32 v7, s14, v5 ; 3E0E0A0E
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102
s_buffer_load_dword s14, s[0:3], 0x3 ; C2070103
s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111
s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112
s_buffer_load_dword s17, s[0:3], 0x13 ; C2088113
s_buffer_load_dword s18, s[0:3], 0x14 ; C2090114
s_buffer_load_dword s19, s[0:3], 0x15 ; C2098115
s_buffer_load_dword s20, s[0:3], 0x16 ; C20A0116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
v_mul_f32_e32 v5, s13, v1 ; 100A020D
v_mac_f32_e32 v5, s7, v2 ; 3E0A0407
v_mul_f32_e32 v6, s4, v1 ; 100C0204
v_mac_f32_e32 v6, s8, v2 ; 3E0C0408
v_mul_f32_e32 v8, s5, v1 ; 10100205
v_mac_f32_e32 v8, s9, v2 ; 3E100409
v_mul_f32_e32 v1, s6, v1 ; 10020206
v_mac_f32_e32 v1, s10, v2 ; 3E02040A
v_mac_f32_e32 v5, s11, v3 ; 3E0A060B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v6, s15, v3 ; 3E0C060F
v_mac_f32_e32 v8, s16, v3 ; 3E100610
v_mac_f32_e32 v1, s17, v3 ; 3E020611
v_mac_f32_e32 v5, s18, v4 ; 3E0A0812
v_mac_f32_e32 v6, s19, v4 ; 3E0C0813
v_mac_f32_e32 v8, s20, v4 ; 3E100814
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_mov_b32_e32 v2, s14 ; 7E04020E
v_mac_f32_e32 v2, s12, v8 ; 3E04100C
v_mov_b32_e32 v3, 0 ; 7E060280
exp 15, 32, 0, 0, 0, v7, v0, v2, v3 ; F800020F 03020007
exp 15, 12, 0, 1, 0, v5, v6, v8, v1 ; F80008CF 01080605
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 248 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D
2: MOV_SAT TEMP[1].x, IN[0].zzzz
3: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz
4: MOV TEMP[0].w, IMM[0].xxxx
5: MOV OUT[0], TEMP[0]
6: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0
%29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%34 = bitcast float %31 to i32
%35 = bitcast float %32 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %28, <16 x i8> %30, i32 2)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = call float @llvm.AMDIL.clamp.(float %33, float 0.000000e+00, float 1.000000e+00)
%43 = call float @llvm.AMDGPU.lrp(float %42, float %39, float %24)
%44 = call float @llvm.AMDGPU.lrp(float %42, float %40, float %25)
%45 = call float @llvm.AMDGPU.lrp(float %42, float %41, float %26)
%46 = call i32 @llvm.SI.packf16(float %43, float %44)
%47 = bitcast i32 %46 to float
%48 = call i32 @llvm.SI.packf16(float %45, float 1.000000e+00)
%49 = bitcast i32 %48 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %47, float %49, float %47, float %49)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101
s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200
v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201
image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800700 00230102
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v4, 1.0, v0 ; 080800F2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s8, v4 ; 100A0808
v_mul_f32_e32 v6, s9, v4 ; 100C0809
v_mul_f32_e32 v4, s0, v4 ; 10080800
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v5, v1, v0 ; 3E0A0101
v_mac_f32_e32 v6, v2, v0 ; 3E0C0102
v_mac_f32_e32 v4, v3, v0 ; 3E080103
v_cvt_pkrtz_f16_f32_e32 v0, v5, v6 ; 5E000D05
v_cvt_pkrtz_f16_f32_e64 v1, v4, 1.0 ; D25E0001 0001E504
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 136 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[3..4]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 0.4000, 1.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].xy, IN[0].xyyy
3: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D
4: MAD TEMP[1].x, CONST[0].xxxx, TEMP[1].xxxx, CONST[0].yyyy
5: RCP TEMP[1].x, TEMP[1].xxxx
6: ADD TEMP[2].xy, CONST[4].xyyy, -IN[0].xyyy
7: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy
8: SQRT TEMP[2].x, TEMP[2].xxxx
9: ADD TEMP[2].x, CONST[4].wwww, -TEMP[2].xxxx
10: MOV_SAT TEMP[2].x, TEMP[2].xxxx
11: MOV TEMP[3], IMM[0].xxxx
12: FSLT TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
13: UIF TEMP[1].xxxx :0
14: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[3].xyzz
15: MAX TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx
16: DP3 TEMP[0].x, TEMP[0].xyzz, IMM[0].zzzz
17: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
18: MOV TEMP[3], TEMP[0].xxxx
19: ENDIF
20: MOV OUT[0], TEMP[3]
21: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0
%31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%34 = bitcast <8 x i32> addrspace(2)* %33 to <32 x i8> addrspace(2)*
%35 = load <32 x i8>, <32 x i8> addrspace(2)* %34, align 32, !tbaa !0
%36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%37 = bitcast <4 x i32> addrspace(2)* %36 to <16 x i8> addrspace(2)*
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%41 = bitcast float %39 to i32
%42 = bitcast float %40 to i32
%43 = insertelement <2 x i32> undef, i32 %41, i32 0
%44 = insertelement <2 x i32> %43, i32 %42, i32 1
%45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %44, <32 x i8> %30, <16 x i8> %32, i32 2)
%46 = bitcast float %39 to i32
%47 = bitcast float %40 to i32
%48 = insertelement <2 x i32> undef, i32 %46, i32 0
%49 = insertelement <2 x i32> %48, i32 %47, i32 1
%50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %35, <16 x i8> %38, i32 2)
%51 = extractelement <4 x float> %50, i32 0
%52 = fmul float %24, %51
%53 = fadd float %52, %25
%54 = fdiv float 1.000000e+00, %53
%55 = fsub float %26, %39
%56 = fsub float %27, %40
%57 = fmul float %55, %55
%58 = fmul float %56, %56
%59 = fadd float %57, %58
%60 = call float @llvm.sqrt.f32(float %59)
%61 = fsub float %28, %60
%62 = call float @llvm.AMDIL.clamp.(float %61, float 0.000000e+00, float 1.000000e+00)
%63 = fcmp ogt float %54, 0x3FD99999A0000000
br i1 %63, label %IF, label %ENDIF
IF: ; preds = %main_body
%64 = extractelement <4 x float> %45, i32 2
%65 = extractelement <4 x float> %45, i32 1
%66 = extractelement <4 x float> %45, i32 0
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%70 = fsub float %66, %69
%71 = fsub float %65, %68
%72 = fsub float %64, %67
%73 = call float @llvm.maxnum.f32(float %70, float 0.000000e+00)
%74 = call float @llvm.maxnum.f32(float %71, float 0.000000e+00)
%75 = call float @llvm.maxnum.f32(float %72, float 0.000000e+00)
%76 = fadd float %74, %73
%77 = fadd float %76, %75
%78 = fmul float %77, %62
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp12.0 = phi float [ %78, %IF ], [ 0.000000e+00, %main_body ]
%79 = call i32 @llvm.SI.packf16(float %temp12.0, float %temp12.0)
%80 = bitcast i32 %79 to float
%81 = call i32 @llvm.SI.packf16(float %temp12.0, float %temp12.0)
%82 = bitcast i32 %81 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %80, float %82, float %80, float %82)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_mov_b32_e32 v5, 0x3ecccccd ; 7E0A02FF 3ECCCCCD
v_mov_b32_e32 v2, 0 ; 7E040280
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x1 ; C2060101
s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708
s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100
v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000
v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001
v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100
v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[8:11] ; F0800100 00440003
v_mov_b32_e32 v1, s12 ; 7E02020C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v1, s13, v0 ; 3E02000D
v_rcp_f32_e32 v0, v1 ; 7E005501
v_cmp_lt_f32_e32 vcc, v5, v0 ; 7C020105
s_and_saveexec_b64 s[8:9], vcc ; BE88246A
s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s10, s[0:3], 0x13 ; C2050113
s_buffer_load_dword s11, s[0:3], 0xc ; C205810C
s_buffer_load_dword s12, s[0:3], 0xd ; C206010D
s_buffer_load_dword s13, s[0:3], 0xe ; C206810E
s_buffer_load_dword s14, s[0:3], 0x10 ; C2070110
s_buffer_load_dword s15, s[0:3], 0x11 ; C2078111
s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500
s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v0, s14, v3 ; 0800060E
v_sub_f32_e32 v1, s15, v4 ; 0802080F
v_mul_f32_e32 v1, v1, v1 ; 10020301
image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[20:27], s[16:19] ; F0800700 00850203
v_mac_f32_e32 v1, v0, v0 ; 3E020100
v_sqrt_f32_e32 v0, v1 ; 7E006701
v_sub_f32_e32 v0, s10, v0 ; 0800000A
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v1, s11, v2 ; 0A02040B
v_subrev_f32_e32 v2, s12, v3 ; 0A04060C
v_subrev_f32_e32 v3, s13, v4 ; 0A06080D
v_max_f32_e32 v1, 0, v1 ; 20020280
v_max_f32_e32 v2, 0, v2 ; 20040480
v_max_f32_e32 v3, 0, v3 ; 20060680
v_add_f32_e32 v1, v1, v2 ; 06020501
v_add_f32_e32 v1, v3, v1 ; 06020303
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_mul_f32_e32 v2, v0, v1 ; 10040300
s_or_b64 exec, exec, s[8:9] ; 88FE087E
v_cvt_pkrtz_f16_f32_e32 v0, v2, v2 ; 5E000502
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 8
Code Size: 240 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV OUT[0], TEMP[0]
5: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = fmul float %13, %33
%38 = fmul float %14, %33
%39 = fmul float %15, %33
%40 = fmul float %16, %33
%41 = fmul float %17, %34
%42 = fadd float %41, %37
%43 = fmul float %18, %34
%44 = fadd float %43, %38
%45 = fmul float %19, %34
%46 = fadd float %45, %39
%47 = fmul float %20, %34
%48 = fadd float %47, %40
%49 = fmul float %21, %35
%50 = fadd float %49, %42
%51 = fmul float %22, %35
%52 = fadd float %51, %44
%53 = fmul float %23, %35
%54 = fadd float %53, %46
%55 = fmul float %24, %35
%56 = fadd float %55, %48
%57 = fmul float %25, %36
%58 = fadd float %57, %50
%59 = fmul float %26, %36
%60 = fadd float %59, %52
%61 = fmul float %27, %36
%62 = fadd float %61, %54
%63 = fmul float %28, %36
%64 = fadd float %63, %56
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s4, v0 ; 10080004
v_mac_f32_e32 v4, s8, v1 ; 3E080208
v_mul_f32_e32 v5, s5, v0 ; 100A0005
v_mac_f32_e32 v5, s9, v1 ; 3E0A0209
v_mul_f32_e32 v6, s6, v0 ; 100C0006
v_mac_f32_e32 v6, s10, v1 ; 3E0C020A
v_mul_f32_e32 v0, s7, v0 ; 10000007
v_mac_f32_e32 v0, s11, v1 ; 3E00020B
v_mac_f32_e32 v4, s12, v2 ; 3E08040C
v_mac_f32_e32 v5, s13, v2 ; 3E0A040D
v_mac_f32_e32 v6, s14, v2 ; 3E0C040E
v_mac_f32_e32 v0, s15, v2 ; 3E00040F
v_mac_f32_e32 v4, s16, v3 ; 3E080610
v_mac_f32_e32 v5, s17, v3 ; 3E0A0611
v_mac_f32_e32 v6, s18, v3 ; 3E0C0612
v_mac_f32_e32 v0, s0, v3 ; 3E000600
exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL OUT[0], COLOR
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV OUT[0], IMM[0].xxxx
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%23 = bitcast i32 %22 to float
%24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%25 = bitcast i32 %24 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 8
VGPRS: 4
Code Size: 20 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: ADD TEMP[2].xy, CONST[1].xyyy, -IN[1].xyyy
6: MUL TEMP[2].xy, TEMP[2].xyyy, CONST[0].xyyy
7: MOV TEMP[1].zw, TEMP[2].yyxy
8: MOV OUT[1], TEMP[1]
9: MOV OUT[0], TEMP[0]
10: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0
%35 = add i32 %5, %7
%36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35)
%37 = extractelement <4 x float> %36, i32 0
%38 = extractelement <4 x float> %36, i32 1
%39 = extractelement <4 x float> %36, i32 2
%40 = extractelement <4 x float> %36, i32 3
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = fmul float %17, %37
%48 = fmul float %18, %37
%49 = fmul float %19, %37
%50 = fmul float %20, %37
%51 = fmul float %21, %38
%52 = fadd float %51, %47
%53 = fmul float %22, %38
%54 = fadd float %53, %48
%55 = fmul float %23, %38
%56 = fadd float %55, %49
%57 = fmul float %24, %38
%58 = fadd float %57, %50
%59 = fmul float %25, %39
%60 = fadd float %59, %52
%61 = fmul float %26, %39
%62 = fadd float %61, %54
%63 = fmul float %27, %39
%64 = fadd float %63, %56
%65 = fmul float %28, %39
%66 = fadd float %65, %58
%67 = fmul float %29, %40
%68 = fadd float %67, %60
%69 = fmul float %30, %40
%70 = fadd float %69, %62
%71 = fmul float %31, %40
%72 = fadd float %71, %64
%73 = fmul float %32, %40
%74 = fadd float %73, %66
%75 = fsub float %15, %45
%76 = fsub float %16, %46
%77 = fmul float %75, %13
%78 = fmul float %76, %14
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %77, float %78)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101
s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v0, s4, v5 ; 08000A04
v_sub_f32_e32 v7, s5, v6 ; 080E0C05
v_mul_f32_e32 v0, s6, v0 ; 10000006
v_mul_f32_e32 v7, s7, v7 ; 100E0E07
exp 15, 32, 0, 0, 0, v5, v6, v0, v7 ; F800020F 07000605
s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109
s_buffer_load_dword s5, s[0:3], 0xa ; C202810A
s_buffer_load_dword s6, s[0:3], 0xb ; C203010B
s_buffer_load_dword s7, s[0:3], 0xc ; C203810C
s_buffer_load_dword s9, s[0:3], 0xd ; C204810D
s_buffer_load_dword s10, s[0:3], 0xe ; C205010E
s_buffer_load_dword s11, s[0:3], 0xf ; C205810F
s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110
s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111
s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112
s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113
s_buffer_load_dword s16, s[0:3], 0x14 ; C2080114
s_buffer_load_dword s17, s[0:3], 0x15 ; C2088115
s_buffer_load_dword s18, s[0:3], 0x16 ; C2090116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s8, v1 ; 10000208
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s7, v2 ; 3E000407
v_mul_f32_e32 v5, s4, v1 ; 100A0204
v_mac_f32_e32 v5, s9, v2 ; 3E0A0409
v_mul_f32_e32 v6, s5, v1 ; 100C0205
v_mac_f32_e32 v6, s10, v2 ; 3E0C040A
v_mul_f32_e32 v1, s6, v1 ; 10020206
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v5, s13, v3 ; 3E0A060D
v_mac_f32_e32 v6, s14, v3 ; 3E0C060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v5, s17, v4 ; 3E0A0811
v_mac_f32_e32 v6, s18, v4 ; 3E0C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 232 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 0.1667, 0.0000, 0.0000, 0.0000}
0: ADD TEMP[0].xy, IN[0].xyyy, IN[0].zwww
1: MOV TEMP[1].xy, IN[0].xyyy
2: TEX TEMP[1], TEMP[1], SAMP[0], 2D
3: MOV TEMP[2].xy, TEMP[0].xyyy
4: TEX TEMP[2], TEMP[2], SAMP[0], 2D
5: ADD TEMP[1], TEMP[1], TEMP[2]
6: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].zwww
7: MOV TEMP[2].xy, TEMP[0].xyyy
8: TEX TEMP[2], TEMP[2], SAMP[0], 2D
9: ADD TEMP[1], TEMP[1], TEMP[2]
10: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].zwww
11: MOV TEMP[2].xy, TEMP[0].xyyy
12: TEX TEMP[2], TEMP[2], SAMP[0], 2D
13: ADD TEMP[1], TEMP[1], TEMP[2]
14: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].zwww
15: MOV TEMP[2].xy, TEMP[0].xyyy
16: TEX TEMP[2], TEMP[2], SAMP[0], 2D
17: ADD TEMP[1], TEMP[1], TEMP[2]
18: ADD TEMP[0].xy, TEMP[0].xyyy, IN[0].zwww
19: MOV TEMP[0].xy, TEMP[0].xyyy
20: TEX TEMP[0], TEMP[0], SAMP[0], 2D
21: ADD TEMP[1], TEMP[1], TEMP[0]
22: MUL TEMP[0], TEMP[1], IMM[0].xxxx
23: MOV OUT[0], TEMP[0]
24: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = fadd float %26, %28
%31 = fadd float %27, %29
%32 = bitcast float %26 to i32
%33 = bitcast float %27 to i32
%34 = insertelement <2 x i32> undef, i32 %32, i32 0
%35 = insertelement <2 x i32> %34, i32 %33, i32 1
%36 = bitcast <8 x i32> %23 to <32 x i8>
%37 = bitcast <4 x i32> %25 to <16 x i8>
%38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = bitcast float %30 to i32
%44 = bitcast float %31 to i32
%45 = insertelement <2 x i32> undef, i32 %43, i32 0
%46 = insertelement <2 x i32> %45, i32 %44, i32 1
%47 = bitcast <8 x i32> %23 to <32 x i8>
%48 = bitcast <4 x i32> %25 to <16 x i8>
%49 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %47, <16 x i8> %48, i32 2)
%50 = extractelement <4 x float> %49, i32 0
%51 = extractelement <4 x float> %49, i32 1
%52 = extractelement <4 x float> %49, i32 2
%53 = extractelement <4 x float> %49, i32 3
%54 = fadd float %39, %50
%55 = fadd float %40, %51
%56 = fadd float %41, %52
%57 = fadd float %42, %53
%58 = fadd float %30, %28
%59 = fadd float %31, %29
%60 = bitcast float %58 to i32
%61 = bitcast float %59 to i32
%62 = insertelement <2 x i32> undef, i32 %60, i32 0
%63 = insertelement <2 x i32> %62, i32 %61, i32 1
%64 = bitcast <8 x i32> %23 to <32 x i8>
%65 = bitcast <4 x i32> %25 to <16 x i8>
%66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2)
%67 = extractelement <4 x float> %66, i32 0
%68 = extractelement <4 x float> %66, i32 1
%69 = extractelement <4 x float> %66, i32 2
%70 = extractelement <4 x float> %66, i32 3
%71 = fadd float %54, %67
%72 = fadd float %55, %68
%73 = fadd float %56, %69
%74 = fadd float %57, %70
%75 = fadd float %58, %28
%76 = fadd float %59, %29
%77 = bitcast float %75 to i32
%78 = bitcast float %76 to i32
%79 = insertelement <2 x i32> undef, i32 %77, i32 0
%80 = insertelement <2 x i32> %79, i32 %78, i32 1
%81 = bitcast <8 x i32> %23 to <32 x i8>
%82 = bitcast <4 x i32> %25 to <16 x i8>
%83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %80, <32 x i8> %81, <16 x i8> %82, i32 2)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = fadd float %71, %84
%89 = fadd float %72, %85
%90 = fadd float %73, %86
%91 = fadd float %74, %87
%92 = fadd float %75, %28
%93 = fadd float %76, %29
%94 = bitcast float %92 to i32
%95 = bitcast float %93 to i32
%96 = insertelement <2 x i32> undef, i32 %94, i32 0
%97 = insertelement <2 x i32> %96, i32 %95, i32 1
%98 = bitcast <8 x i32> %23 to <32 x i8>
%99 = bitcast <4 x i32> %25 to <16 x i8>
%100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %98, <16 x i8> %99, i32 2)
%101 = extractelement <4 x float> %100, i32 0
%102 = extractelement <4 x float> %100, i32 1
%103 = extractelement <4 x float> %100, i32 2
%104 = extractelement <4 x float> %100, i32 3
%105 = fadd float %88, %101
%106 = fadd float %89, %102
%107 = fadd float %90, %103
%108 = fadd float %91, %104
%109 = fadd float %92, %28
%110 = fadd float %93, %29
%111 = bitcast float %109 to i32
%112 = bitcast float %110 to i32
%113 = insertelement <2 x i32> undef, i32 %111, i32 0
%114 = insertelement <2 x i32> %113, i32 %112, i32 1
%115 = bitcast <8 x i32> %23 to <32 x i8>
%116 = bitcast <4 x i32> %25 to <16 x i8>
%117 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %114, <32 x i8> %115, <16 x i8> %116, i32 2)
%118 = extractelement <4 x float> %117, i32 0
%119 = extractelement <4 x float> %117, i32 1
%120 = extractelement <4 x float> %117, i32 2
%121 = extractelement <4 x float> %117, i32 3
%122 = fadd float %105, %118
%123 = fadd float %106, %119
%124 = fadd float %107, %120
%125 = fadd float %108, %121
%126 = fmul float %122, 0x3FC5555560000000
%127 = fmul float %123, 0x3FC5555560000000
%128 = fmul float %124, 0x3FC5555560000000
%129 = fmul float %125, 0x3FC5555560000000
%130 = call i32 @llvm.SI.packf16(float %126, float %127)
%131 = bitcast i32 %130 to float
%132 = call i32 @llvm.SI.packf16(float %128, float %129)
%133 = bitcast i32 %132 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %131, float %133, float %131, float %133)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300
v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301
v_add_f32_e32 v5, v4, v2 ; 060A0504
v_add_f32_e32 v6, v0, v3 ; 060C0700
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020702
image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020B05
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v1, v11, v7 ; 06020F0B
v_add_f32_e32 v2, v12, v8 ; 0604110C
v_add_f32_e32 v3, v13, v9 ; 0606130D
v_add_f32_e32 v7, v14, v10 ; 060E150E
v_add_f32_e32 v8, v4, v5 ; 06100B04
v_add_f32_e32 v9, v0, v6 ; 06120D00
v_add_f32_e32 v5, v4, v8 ; 060A1104
v_add_f32_e32 v6, v0, v9 ; 060C1300
v_add_f32_e32 v10, v4, v5 ; 06140B04
v_add_f32_e32 v11, v0, v6 ; 06160D00
v_add_f32_e32 v12, v4, v10 ; 06181504
v_add_f32_e32 v13, v0, v11 ; 061A1700
image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08
image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00021205
image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[0:3] ; F0800F00 0002080A
image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[8:15], s[0:3] ; F0800F00 0002160C
s_waitcnt vmcnt(3) ; BF8C0773
v_add_f32_e32 v0, v14, v1 ; 0600030E
v_add_f32_e32 v1, v15, v2 ; 0602050F
v_add_f32_e32 v2, v16, v3 ; 06040710
v_add_f32_e32 v3, v17, v7 ; 06060F11
s_waitcnt vmcnt(2) ; BF8C0772
v_add_f32_e32 v0, v18, v0 ; 06000112
v_add_f32_e32 v1, v19, v1 ; 06020313
v_add_f32_e32 v2, v20, v2 ; 06040514
v_add_f32_e32 v3, v21, v3 ; 06060715
s_waitcnt vmcnt(1) ; BF8C0771
v_add_f32_e32 v0, v8, v0 ; 06000108
v_add_f32_e32 v1, v9, v1 ; 06020309
v_add_f32_e32 v2, v10, v2 ; 0604050A
v_add_f32_e32 v3, v11, v3 ; 0606070B
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v0, v22, v0 ; 06000116
v_add_f32_e32 v1, v23, v1 ; 06020317
v_add_f32_e32 v2, v24, v2 ; 06040518
v_add_f32_e32 v3, v25, v3 ; 06060719
v_mov_b32_e32 v4, 0x3e2aaaab ; 7E0802FF 3E2AAAAB
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 28
Code Size: 284 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[2]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: ADD TEMP[0], IMM[0].xxxx, -TEMP[0]
3: MOV TEMP[1].xy, IN[0].xyyy
4: TEX TEMP[1], TEMP[1], SAMP[1], 2D
5: MUL TEMP[1], TEMP[1], CONST[2]
6: MOV_SAT TEMP[1], TEMP[1]
7: ADD TEMP[1], IMM[0].xxxx, -TEMP[1]
8: MUL TEMP[0], TEMP[0], TEMP[1]
9: ADD TEMP[0], IMM[0].xxxx, -TEMP[0]
10: MOV OUT[0], TEMP[0]
11: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0
%30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0
%32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)*
%34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0
%35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)*
%37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0
%38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%40 = bitcast float %38 to i32
%41 = bitcast float %39 to i32
%42 = insertelement <2 x i32> undef, i32 %40, i32 0
%43 = insertelement <2 x i32> %42, i32 %41, i32 1
%44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %29, <16 x i8> %31, i32 2)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = extractelement <4 x float> %44, i32 2
%48 = extractelement <4 x float> %44, i32 3
%49 = fsub float 1.000000e+00, %45
%50 = fsub float 1.000000e+00, %46
%51 = fsub float 1.000000e+00, %47
%52 = fsub float 1.000000e+00, %48
%53 = bitcast float %38 to i32
%54 = bitcast float %39 to i32
%55 = insertelement <2 x i32> undef, i32 %53, i32 0
%56 = insertelement <2 x i32> %55, i32 %54, i32 1
%57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %56, <32 x i8> %34, <16 x i8> %37, i32 2)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = extractelement <4 x float> %57, i32 3
%62 = fmul float %58, %24
%63 = fmul float %59, %25
%64 = fmul float %60, %26
%65 = fmul float %61, %27
%66 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00)
%67 = call float @llvm.AMDIL.clamp.(float %63, float 0.000000e+00, float 1.000000e+00)
%68 = call float @llvm.AMDIL.clamp.(float %64, float 0.000000e+00, float 1.000000e+00)
%69 = call float @llvm.AMDIL.clamp.(float %65, float 0.000000e+00, float 1.000000e+00)
%70 = fsub float 1.000000e+00, %66
%71 = fsub float 1.000000e+00, %67
%72 = fsub float 1.000000e+00, %68
%73 = fsub float 1.000000e+00, %69
%74 = fmul float %49, %70
%75 = fmul float %50, %71
%76 = fmul float %51, %72
%77 = fmul float %52, %73
%78 = fsub float 1.000000e+00, %74
%79 = fsub float 1.000000e+00, %75
%80 = fsub float 1.000000e+00, %76
%81 = fsub float 1.000000e+00, %77
%82 = call i32 @llvm.SI.packf16(float %78, float %79)
%83 = bitcast i32 %82 to float
%84 = call i32 @llvm.SI.packf16(float %80, float %81)
%85 = bitcast i32 %84 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %83, float %85, float %83, float %85)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700
s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s28, s[0:3], 0x8 ; C20E0108
s_buffer_load_dword s29, s[0:3], 0x9 ; C20E8109
s_buffer_load_dword s30, s[0:3], 0xa ; C20F010A
s_buffer_load_dword s0, s[0:3], 0xb ; C200010B
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[12:15] ; F0800F00 00650402
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[16:19] ; F0800F00 00810002
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s28, v0 ; 1000001C
v_mul_f32_e32 v1, s29, v1 ; 1002021D
v_mul_f32_e32 v2, s30, v2 ; 1004041E
v_mul_f32_e32 v3, s0, v3 ; 10060600
v_sub_f32_e32 v4, 1.0, v4 ; 080808F2
v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2
v_sub_f32_e32 v6, 1.0, v6 ; 080C0CF2
v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_mad_f32 v0, -v4, v0, 1.0 ; D2820000 23CA0104
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_sub_f32_e32 v1, 1.0, v1 ; 080202F2
v_mad_f32 v1, -v5, v1, 1.0 ; D2820001 23CA0305
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_sub_f32_e32 v2, 1.0, v2 ; 080404F2
v_mad_f32 v2, -v6, v2, 1.0 ; D2820002 23CA0506
v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680
v_sub_f32_e32 v3, 1.0, v3 ; 080606F2
v_mad_f32 v3, -v7, v3, 1.0 ; D2820003 23CA0707
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 8
Code Size: 216 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..4]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.5000, 1.0000, -1.0000, 0.0000}
0: MUL TEMP[0].xy, CONST[0].xyyy, IMM[0].xxxx
1: MUL TEMP[1].xy, CONST[0].xyyy, IMM[0].yzzz
2: MUL TEMP[2], CONST[1], IN[0].xxxx
3: MAD TEMP[2], CONST[2], IN[0].yyyy, TEMP[2]
4: MAD TEMP[2], CONST[3], IN[0].zzzz, TEMP[2]
5: MAD TEMP[2], CONST[4], IN[0].wwww, TEMP[2]
6: ADD TEMP[3].xy, IN[1].xyyy, TEMP[0].xyyy
7: ADD TEMP[0].xy, IN[1].xyyy, -TEMP[0].xyyy
8: MOV TEMP[3].zw, TEMP[0].yyxy
9: MUL TEMP[0].xy, TEMP[1].xyyy, IMM[0].xxxx
10: ADD TEMP[0].xy, IN[1].xyyy, -TEMP[0].xyyy
11: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[0].xxxx, IN[1].xyyy
12: MOV TEMP[0].zw, TEMP[1].yyxy
13: MOV TEMP[1].xy, IN[1].xyxx
14: MOV OUT[1], TEMP[3]
15: MOV OUT[2], TEMP[0]
16: MOV OUT[3], TEMP[1]
17: MOV OUT[0], TEMP[2]
18: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = add i32 %5, %7
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0
%41 = add i32 %5, %7
%42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41)
%43 = extractelement <4 x float> %42, i32 0
%44 = extractelement <4 x float> %42, i32 1
%45 = fmul float %13, 5.000000e-01
%46 = fmul float %14, 5.000000e-01
%47 = fmul float %15, %35
%48 = fmul float %16, %35
%49 = fmul float %17, %35
%50 = fmul float %18, %35
%51 = fmul float %19, %36
%52 = fadd float %51, %47
%53 = fmul float %20, %36
%54 = fadd float %53, %48
%55 = fmul float %21, %36
%56 = fadd float %55, %49
%57 = fmul float %22, %36
%58 = fadd float %57, %50
%59 = fmul float %23, %37
%60 = fadd float %59, %52
%61 = fmul float %24, %37
%62 = fadd float %61, %54
%63 = fmul float %25, %37
%64 = fadd float %63, %56
%65 = fmul float %26, %37
%66 = fadd float %65, %58
%67 = fmul float %27, %38
%68 = fadd float %67, %60
%69 = fmul float %28, %38
%70 = fadd float %69, %62
%71 = fmul float %29, %38
%72 = fadd float %71, %64
%73 = fmul float %30, %38
%74 = fadd float %73, %66
%75 = fadd float %43, %45
%76 = fadd float %44, %46
%77 = fsub float %43, %45
%78 = fsub float %44, %46
%79 = fmul float %13, 5.000000e-01
%80 = fmul float %14, -5.000000e-01
%81 = fsub float %43, %79
%82 = fsub float %44, %80
%83 = fmul float %13, 5.000000e-01
%84 = fadd float %83, %43
%85 = fmul float %14, -5.000000e-01
%86 = fadd float %85, %44
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %77, float %78)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %81, float %82, float %84, float %86)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %43, float %44, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106
s_buffer_load_dword s7, s[0:3], 0x7 ; C2038107
s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108
s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109
s_buffer_load_dword s10, s[0:3], 0xa ; C205010A
s_buffer_load_dword s11, s[0:3], 0xb ; C205810B
s_buffer_load_dword s14, s[0:3], 0xc ; C207010C
s_buffer_load_dword s15, s[0:3], 0xd ; C207810D
s_buffer_load_dword s16, s[0:3], 0xe ; C208010E
s_buffer_load_dword s17, s[0:3], 0xf ; C208810F
s_buffer_load_dword s18, s[0:3], 0x10 ; C2090110
s_buffer_load_dword s19, s[0:3], 0x11 ; C2098111
s_buffer_load_dword s20, s[0:3], 0x12 ; C20A0112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s4, v2 ; 10000404
v_mad_f32 v8, 0.5, s12, v6 ; D2820008 041818F0
v_mad_f32 v9, 0.5, -s12, v6 ; D2820009 441818F0
v_mad_f32 v10, 0.5, s13, v7 ; D282000A 041C1AF0
v_mad_f32 v11, 0.5, -s13, v7 ; D282000B 441C1AF0
exp 15, 32, 0, 0, 0, v8, v10, v9, v11 ; F800020F 0B090A08
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v10, -0.5, -s13, v7 ; D282000A 441C1AF1
v_mad_f32 v11, -0.5, s13, v7 ; D282000B 041C1AF1
exp 15, 33, 0, 0, 0, v9, v10, v8, v11 ; F800021F 0B080A09
v_mac_f32_e32 v0, s8, v3 ; 3E000608
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v8, s5, v2 ; 10100405
v_mac_f32_e32 v8, s9, v3 ; 3E100609
v_mul_f32_e32 v9, s6, v2 ; 10120406
v_mac_f32_e32 v9, s10, v3 ; 3E12060A
v_mul_f32_e32 v2, s7, v2 ; 10040407
v_mac_f32_e32 v2, s11, v3 ; 3E04060B
v_mac_f32_e32 v0, s14, v4 ; 3E00080E
v_mac_f32_e32 v8, s15, v4 ; 3E10080F
v_mac_f32_e32 v9, s16, v4 ; 3E120810
v_mac_f32_e32 v2, s17, v4 ; 3E040811
v_mac_f32_e32 v0, s18, v5 ; 3E000A12
v_mac_f32_e32 v8, s19, v5 ; 3E100A13
v_mac_f32_e32 v9, s20, v5 ; 3E120A14
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 34, 0, 0, 0, v6, v7, v1, v1 ; F800022F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 272 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
0: MOV TEMP[0].xy, IN[2].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].xy, IN[0].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MAX TEMP[0], TEMP[0], TEMP[1]
5: MOV TEMP[1].xy, IN[0].zwww
6: TEX TEMP[1], TEMP[1], SAMP[0], 2D
7: MOV TEMP[2].xy, IN[1].xyyy
8: TEX TEMP[2], TEMP[2], SAMP[0], 2D
9: MAX TEMP[1], TEMP[1], TEMP[2]
10: MOV TEMP[2].xy, IN[1].zwww
11: TEX TEMP[2], TEMP[2], SAMP[0], 2D
12: MAX TEMP[1], TEMP[1], TEMP[2]
13: MAX TEMP[0], TEMP[0], TEMP[1]
14: MOV OUT[0], TEMP[0]
15: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%36 = bitcast float %34 to i32
%37 = bitcast float %35 to i32
%38 = insertelement <2 x i32> undef, i32 %36, i32 0
%39 = insertelement <2 x i32> %38, i32 %37, i32 1
%40 = bitcast <8 x i32> %23 to <32 x i8>
%41 = bitcast <4 x i32> %25 to <16 x i8>
%42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2)
%43 = extractelement <4 x float> %42, i32 0
%44 = extractelement <4 x float> %42, i32 1
%45 = extractelement <4 x float> %42, i32 2
%46 = extractelement <4 x float> %42, i32 3
%47 = bitcast float %26 to i32
%48 = bitcast float %27 to i32
%49 = insertelement <2 x i32> undef, i32 %47, i32 0
%50 = insertelement <2 x i32> %49, i32 %48, i32 1
%51 = bitcast <8 x i32> %23 to <32 x i8>
%52 = bitcast <4 x i32> %25 to <16 x i8>
%53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %51, <16 x i8> %52, i32 2)
%54 = extractelement <4 x float> %53, i32 0
%55 = extractelement <4 x float> %53, i32 1
%56 = extractelement <4 x float> %53, i32 2
%57 = extractelement <4 x float> %53, i32 3
%58 = call float @llvm.maxnum.f32(float %43, float %54)
%59 = call float @llvm.maxnum.f32(float %44, float %55)
%60 = call float @llvm.maxnum.f32(float %45, float %56)
%61 = call float @llvm.maxnum.f32(float %46, float %57)
%62 = bitcast float %28 to i32
%63 = bitcast float %29 to i32
%64 = insertelement <2 x i32> undef, i32 %62, i32 0
%65 = insertelement <2 x i32> %64, i32 %63, i32 1
%66 = bitcast <8 x i32> %23 to <32 x i8>
%67 = bitcast <4 x i32> %25 to <16 x i8>
%68 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %66, <16 x i8> %67, i32 2)
%69 = extractelement <4 x float> %68, i32 0
%70 = extractelement <4 x float> %68, i32 1
%71 = extractelement <4 x float> %68, i32 2
%72 = extractelement <4 x float> %68, i32 3
%73 = bitcast float %30 to i32
%74 = bitcast float %31 to i32
%75 = insertelement <2 x i32> undef, i32 %73, i32 0
%76 = insertelement <2 x i32> %75, i32 %74, i32 1
%77 = bitcast <8 x i32> %23 to <32 x i8>
%78 = bitcast <4 x i32> %25 to <16 x i8>
%79 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %76, <32 x i8> %77, <16 x i8> %78, i32 2)
%80 = extractelement <4 x float> %79, i32 0
%81 = extractelement <4 x float> %79, i32 1
%82 = extractelement <4 x float> %79, i32 2
%83 = extractelement <4 x float> %79, i32 3
%84 = call float @llvm.maxnum.f32(float %69, float %80)
%85 = call float @llvm.maxnum.f32(float %70, float %81)
%86 = call float @llvm.maxnum.f32(float %71, float %82)
%87 = call float @llvm.maxnum.f32(float %72, float %83)
%88 = bitcast float %32 to i32
%89 = bitcast float %33 to i32
%90 = insertelement <2 x i32> undef, i32 %88, i32 0
%91 = insertelement <2 x i32> %90, i32 %89, i32 1
%92 = bitcast <8 x i32> %23 to <32 x i8>
%93 = bitcast <4 x i32> %25 to <16 x i8>
%94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %92, <16 x i8> %93, i32 2)
%95 = extractelement <4 x float> %94, i32 0
%96 = extractelement <4 x float> %94, i32 1
%97 = extractelement <4 x float> %94, i32 2
%98 = extractelement <4 x float> %94, i32 3
%99 = call float @llvm.maxnum.f32(float %84, float %95)
%100 = call float @llvm.maxnum.f32(float %85, float %96)
%101 = call float @llvm.maxnum.f32(float %86, float %97)
%102 = call float @llvm.maxnum.f32(float %87, float %98)
%103 = call float @llvm.maxnum.f32(float %58, float %99)
%104 = call float @llvm.maxnum.f32(float %59, float %100)
%105 = call float @llvm.maxnum.f32(float %60, float %101)
%106 = call float @llvm.maxnum.f32(float %61, float %102)
%107 = call i32 @llvm.SI.packf16(float %103, float %104)
%108 = bitcast i32 %107 to float
%109 = call i32 @llvm.SI.packf16(float %105, float %106)
%110 = bitcast i32 %109 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %108, float %110, float %108, float %110)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900
v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[4:11], s[0:3] ; F0800F00 00010A0A
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002
image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800F00 00010E04
image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800F00 00010406
image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[4:11], s[0:3] ; F0800F00 00011208
s_waitcnt vmcnt(0) ; BF8C0770
v_max3_f32 v4, v14, v4, v18 ; D2A80004 044A090E
v_max3_f32 v5, v15, v5, v19 ; D2A80005 044E0B0F
v_max3_f32 v6, v16, v6, v20 ; D2A80006 04520D10
v_max3_f32 v7, v17, v7, v21 ; D2A80007 04560F11
v_max3_f32 v0, v10, v0, v4 ; D2A80000 0412010A
v_max3_f32 v1, v11, v1, v5 ; D2A80001 0416030B
v_max3_f32 v2, v12, v2, v6 ; D2A80002 041A050C
v_max3_f32 v3, v13, v3, v7 ; D2A80003 041E070D
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 24
Code Size: 228 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..4]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.5000, 1.0000, -1.0000, 0.0000}
0: MUL TEMP[0].xy, CONST[0].xyyy, IMM[0].xxxx
1: MUL TEMP[1].xy, CONST[0].xyyy, IMM[0].yzzz
2: MUL TEMP[2], CONST[1], IN[0].xxxx
3: MAD TEMP[2], CONST[2], IN[0].yyyy, TEMP[2]
4: MAD TEMP[2], CONST[3], IN[0].zzzz, TEMP[2]
5: MAD TEMP[2], CONST[4], IN[0].wwww, TEMP[2]
6: ADD TEMP[3].xy, IN[1].xyyy, TEMP[0].xyyy
7: ADD TEMP[0].xy, IN[1].xyyy, -TEMP[0].xyyy
8: MOV TEMP[3].zw, TEMP[0].yyxy
9: MUL TEMP[0].xy, TEMP[1].xyyy, IMM[0].xxxx
10: ADD TEMP[0].xy, IN[1].xyyy, -TEMP[0].xyyy
11: MAD TEMP[1].xy, TEMP[1].xyyy, IMM[0].xxxx, IN[1].xyyy
12: MOV TEMP[0].zw, TEMP[1].yyxy
13: MOV OUT[1], TEMP[3]
14: MOV OUT[2], TEMP[0]
15: MOV OUT[0], TEMP[2]
16: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = add i32 %5, %7
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0
%41 = add i32 %5, %7
%42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41)
%43 = extractelement <4 x float> %42, i32 0
%44 = extractelement <4 x float> %42, i32 1
%45 = fmul float %13, 5.000000e-01
%46 = fmul float %14, 5.000000e-01
%47 = fmul float %15, %35
%48 = fmul float %16, %35
%49 = fmul float %17, %35
%50 = fmul float %18, %35
%51 = fmul float %19, %36
%52 = fadd float %51, %47
%53 = fmul float %20, %36
%54 = fadd float %53, %48
%55 = fmul float %21, %36
%56 = fadd float %55, %49
%57 = fmul float %22, %36
%58 = fadd float %57, %50
%59 = fmul float %23, %37
%60 = fadd float %59, %52
%61 = fmul float %24, %37
%62 = fadd float %61, %54
%63 = fmul float %25, %37
%64 = fadd float %63, %56
%65 = fmul float %26, %37
%66 = fadd float %65, %58
%67 = fmul float %27, %38
%68 = fadd float %67, %60
%69 = fmul float %28, %38
%70 = fadd float %69, %62
%71 = fmul float %29, %38
%72 = fadd float %71, %64
%73 = fmul float %30, %38
%74 = fadd float %73, %66
%75 = fadd float %43, %45
%76 = fadd float %44, %46
%77 = fsub float %43, %45
%78 = fsub float %44, %46
%79 = fmul float %13, 5.000000e-01
%80 = fmul float %14, -5.000000e-01
%81 = fsub float %43, %79
%82 = fsub float %44, %80
%83 = fmul float %13, 5.000000e-01
%84 = fadd float %83, %43
%85 = fmul float %14, -5.000000e-01
%86 = fadd float %85, %44
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %77, float %78)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %81, float %82, float %84, float %86)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mad_f32 v0, 0.5, s4, v5 ; D2820000 041408F0
v_mad_f32 v5, 0.5, -s4, v5 ; D2820005 441408F0
v_mad_f32 v7, 0.5, s5, v6 ; D2820007 04180AF0
v_mad_f32 v8, 0.5, -s5, v6 ; D2820008 44180AF0
exp 15, 32, 0, 0, 0, v0, v7, v5, v8 ; F800020F 08050700
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v7, -0.5, -s5, v6 ; D2820007 44180AF1
v_mac_f32_e64 v6, -0.5, s5 ; D23E0006 00000AF1
exp 15, 33, 0, 0, 0, v5, v7, v0, v6 ; F800021F 06000705
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109
s_buffer_load_dword s10, s[0:3], 0xa ; C205010A
s_buffer_load_dword s11, s[0:3], 0xb ; C205810B
s_buffer_load_dword s12, s[0:3], 0xc ; C206010C
s_buffer_load_dword s13, s[0:3], 0xd ; C206810D
s_buffer_load_dword s14, s[0:3], 0xe ; C207010E
s_buffer_load_dword s15, s[0:3], 0xf ; C207810F
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s6, v1 ; 10000206
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v5, s7, v1 ; 100A0207
v_mac_f32_e32 v5, s9, v2 ; 3E0A0409
v_mul_f32_e32 v6, s8, v1 ; 100C0208
v_mac_f32_e32 v6, s10, v2 ; 3E0C040A
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v5, s13, v3 ; 3E0A060D
v_mac_f32_e32 v6, s14, v3 ; 3E0C060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v5, s17, v4 ; 3E0A0811
v_mac_f32_e32 v6, s18, v4 ; 3E0C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 268 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 0.2500, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].xy, IN[0].zwww
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: ADD TEMP[0], TEMP[0], TEMP[1]
5: MOV TEMP[1].xy, IN[1].xyyy
6: TEX TEMP[1], TEMP[1], SAMP[0], 2D
7: MOV TEMP[2].xy, IN[1].zwww
8: TEX TEMP[2], TEMP[2], SAMP[0], 2D
9: ADD TEMP[1], TEMP[1], TEMP[2]
10: ADD TEMP[0], TEMP[0], TEMP[1]
11: MUL TEMP[0], TEMP[0], IMM[0].xxxx
12: MOV OUT[0], TEMP[0]
13: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%34 = bitcast float %26 to i32
%35 = bitcast float %27 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = bitcast <8 x i32> %23 to <32 x i8>
%39 = bitcast <4 x i32> %25 to <16 x i8>
%40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = extractelement <4 x float> %40, i32 3
%45 = bitcast float %28 to i32
%46 = bitcast float %29 to i32
%47 = insertelement <2 x i32> undef, i32 %45, i32 0
%48 = insertelement <2 x i32> %47, i32 %46, i32 1
%49 = bitcast <8 x i32> %23 to <32 x i8>
%50 = bitcast <4 x i32> %25 to <16 x i8>
%51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2)
%52 = extractelement <4 x float> %51, i32 0
%53 = extractelement <4 x float> %51, i32 1
%54 = extractelement <4 x float> %51, i32 2
%55 = extractelement <4 x float> %51, i32 3
%56 = fadd float %41, %52
%57 = fadd float %42, %53
%58 = fadd float %43, %54
%59 = fadd float %44, %55
%60 = bitcast float %30 to i32
%61 = bitcast float %31 to i32
%62 = insertelement <2 x i32> undef, i32 %60, i32 0
%63 = insertelement <2 x i32> %62, i32 %61, i32 1
%64 = bitcast <8 x i32> %23 to <32 x i8>
%65 = bitcast <4 x i32> %25 to <16 x i8>
%66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2)
%67 = extractelement <4 x float> %66, i32 0
%68 = extractelement <4 x float> %66, i32 1
%69 = extractelement <4 x float> %66, i32 2
%70 = extractelement <4 x float> %66, i32 3
%71 = bitcast float %32 to i32
%72 = bitcast float %33 to i32
%73 = insertelement <2 x i32> undef, i32 %71, i32 0
%74 = insertelement <2 x i32> %73, i32 %72, i32 1
%75 = bitcast <8 x i32> %23 to <32 x i8>
%76 = bitcast <4 x i32> %25 to <16 x i8>
%77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2)
%78 = extractelement <4 x float> %77, i32 0
%79 = extractelement <4 x float> %77, i32 1
%80 = extractelement <4 x float> %77, i32 2
%81 = extractelement <4 x float> %77, i32 3
%82 = fadd float %67, %78
%83 = fadd float %68, %79
%84 = fadd float %69, %80
%85 = fadd float %70, %81
%86 = fadd float %56, %82
%87 = fadd float %57, %83
%88 = fadd float %58, %84
%89 = fadd float %59, %85
%90 = fmul float %86, 2.500000e-01
%91 = fmul float %87, 2.500000e-01
%92 = fmul float %88, 2.500000e-01
%93 = fmul float %89, 2.500000e-01
%94 = call i32 @llvm.SI.packf16(float %90, float %91)
%95 = bitcast i32 %94 to float
%96 = call i32 @llvm.SI.packf16(float %92, float %93)
%97 = bitcast i32 %96 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %95, float %97, float %95, float %97)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020002
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[8:15], s[0:3] ; F0800F00 00020A04
image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020406
image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08
s_waitcnt vmcnt(2) ; BF8C0772
v_add_f32_e32 v0, v10, v0 ; 0600010A
v_add_f32_e32 v1, v11, v1 ; 0602030B
v_add_f32_e32 v2, v12, v2 ; 0604050C
v_add_f32_e32 v3, v13, v3 ; 0606070D
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v4, v14, v4 ; 0608090E
v_add_f32_e32 v5, v15, v5 ; 060A0B0F
v_add_f32_e32 v6, v16, v6 ; 060C0D10
v_add_f32_e32 v7, v17, v7 ; 060E0F11
v_add_f32_e32 v0, v4, v0 ; 06000104
v_add_f32_e32 v1, v5, v1 ; 06020305
v_add_f32_e32 v2, v6, v2 ; 06040506
v_add_f32_e32 v3, v7, v3 ; 06060707
v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 20
Code Size: 216 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[1]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].w, TEMP[0].wwww
3: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[1].xyzz
4: MAX TEMP[1].xyz, IMM[0].xxxx, TEMP[0].xyzz
5: MOV OUT[0], TEMP[1]
6: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0
%29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%33 = bitcast float %31 to i32
%34 = bitcast float %32 to i32
%35 = insertelement <2 x i32> undef, i32 %33, i32 0
%36 = insertelement <2 x i32> %35, i32 %34, i32 1
%37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %36, <32 x i8> %28, <16 x i8> %30, i32 2)
%38 = extractelement <4 x float> %37, i32 0
%39 = extractelement <4 x float> %37, i32 1
%40 = extractelement <4 x float> %37, i32 2
%41 = extractelement <4 x float> %37, i32 3
%42 = fsub float %38, %24
%43 = fsub float %39, %25
%44 = fsub float %40, %26
%45 = call float @llvm.maxnum.f32(float %42, float 0.000000e+00)
%46 = call float @llvm.maxnum.f32(float %43, float 0.000000e+00)
%47 = call float @llvm.maxnum.f32(float %44, float 0.000000e+00)
%48 = call i32 @llvm.SI.packf16(float %45, float %46)
%49 = bitcast i32 %48 to float
%50 = call i32 @llvm.SI.packf16(float %47, float %41)
%51 = bitcast i32 %50 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105
s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800F00 00230002
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_subrev_f32_e32 v0, s8, v0 ; 0A000008
v_subrev_f32_e32 v1, s10, v1 ; 0A02020A
v_subrev_f32_e32 v2, s0, v2 ; 0A040400
v_max_f32_e32 v2, 0, v2 ; 20040480
v_cvt_pkrtz_f16_f32_e32 v2, v2, v3 ; 5E040702
v_max_f32_e32 v0, 0, v0 ; 20000080
v_max_f32_e32 v1, 0, v1 ; 20020280
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 108 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL CONST[0..4]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 1.0000, -1.0000, 2.0000, 3.0000}
IMM[1] FLT32 { 5.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[0].xyxy, IMM[0].xxyy
1: MUL TEMP[1], CONST[1], IN[0].xxxx
2: MAD TEMP[1], CONST[2], IN[0].yyyy, TEMP[1]
3: MAD TEMP[1], CONST[3], IN[0].zzzz, TEMP[1]
4: MAD TEMP[1], CONST[4], IN[0].wwww, TEMP[1]
5: ADD TEMP[2], IN[1].xyxy, TEMP[0]
6: MAD TEMP[3], TEMP[0], IMM[0].zzzz, IN[1].xyxy
7: MAD TEMP[4], TEMP[0], IMM[0].wwww, IN[1].xyxy
8: MAD TEMP[0], TEMP[0], IMM[1].xxxx, IN[1].xyxy
9: MOV TEMP[5].xy, IN[1].xyxx
10: MOV OUT[1], TEMP[2]
11: MOV OUT[5], TEMP[5]
12: MOV OUT[2], TEMP[3]
13: MOV OUT[3], TEMP[4]
14: MOV OUT[4], TEMP[0]
15: MOV OUT[0], TEMP[1]
16: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = add i32 %5, %7
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0
%41 = add i32 %5, %7
%42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41)
%43 = extractelement <4 x float> %42, i32 0
%44 = extractelement <4 x float> %42, i32 1
%45 = fmul float %15, %35
%46 = fmul float %16, %35
%47 = fmul float %17, %35
%48 = fmul float %18, %35
%49 = fmul float %19, %36
%50 = fadd float %49, %45
%51 = fmul float %20, %36
%52 = fadd float %51, %46
%53 = fmul float %21, %36
%54 = fadd float %53, %47
%55 = fmul float %22, %36
%56 = fadd float %55, %48
%57 = fmul float %23, %37
%58 = fadd float %57, %50
%59 = fmul float %24, %37
%60 = fadd float %59, %52
%61 = fmul float %25, %37
%62 = fadd float %61, %54
%63 = fmul float %26, %37
%64 = fadd float %63, %56
%65 = fmul float %27, %38
%66 = fadd float %65, %58
%67 = fmul float %28, %38
%68 = fadd float %67, %60
%69 = fmul float %29, %38
%70 = fadd float %69, %62
%71 = fmul float %30, %38
%72 = fadd float %71, %64
%73 = fadd float %43, %13
%74 = fadd float %44, %14
%75 = fsub float %43, %13
%76 = fsub float %44, %14
%77 = fmul float %13, 2.000000e+00
%78 = fadd float %77, %43
%79 = fmul float %14, 2.000000e+00
%80 = fadd float %79, %44
%81 = fmul float %13, -2.000000e+00
%82 = fadd float %81, %43
%83 = fmul float %14, -2.000000e+00
%84 = fadd float %83, %44
%85 = fmul float %13, 3.000000e+00
%86 = fadd float %85, %43
%87 = fmul float %14, 3.000000e+00
%88 = fadd float %87, %44
%89 = fmul float %13, -3.000000e+00
%90 = fadd float %89, %43
%91 = fmul float %14, -3.000000e+00
%92 = fadd float %91, %44
%93 = fmul float %13, 5.000000e+00
%94 = fadd float %93, %43
%95 = fmul float %14, 5.000000e+00
%96 = fadd float %95, %44
%97 = fmul float %13, -5.000000e+00
%98 = fadd float %97, %43
%99 = fmul float %14, -5.000000e+00
%100 = fadd float %99, %44
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %74, float %75, float %76)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %78, float %80, float %82, float %84)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %86, float %88, float %90, float %92)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %94, float %96, float %98, float %100)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %43, float %44, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %66, float %68, float %70, float %72)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101
s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104
s_buffer_load_dword s11, s[0:3], 0x5 ; C2058105
s_buffer_load_dword s16, s[0:3], 0x6 ; C2080106
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_add_f32_e32 v0, s8, v5 ; 06000A08
v_add_f32_e32 v7, s9, v6 ; 060E0C09
v_subrev_f32_e32 v8, s8, v5 ; 0A100A08
v_subrev_f32_e32 v9, s9, v6 ; 0A120C09
exp 15, 32, 0, 0, 0, v0, v7, v8, v9 ; F800020F 09080700
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v0, 2.0, s8, v5 ; D2820000 041410F4
v_mad_f32 v7, 2.0, s9, v6 ; D2820007 041812F4
v_mad_f32 v8, -2.0, s8, v5 ; D2820008 041410F5
v_mad_f32 v9, -2.0, s9, v6 ; D2820009 041812F5
exp 15, 33, 0, 0, 0, v0, v7, v8, v9 ; F800021F 09080700
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v0, 0x40400000 ; 7E0002FF 40400000
v_mad_f32 v7, s8, v0, v5 ; D2820007 04160008
v_mad_f32 v0, s9, v0, v6 ; D2820000 041A0009
v_mov_b32_e32 v8, 0xc0400000 ; 7E1002FF C0400000
v_mad_f32 v9, s8, v8, v5 ; D2820009 04161008
v_mad_f32 v8, s9, v8, v6 ; D2820008 041A1009
exp 15, 34, 0, 0, 0, v7, v0, v9, v8 ; F800022F 08090007
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v0, 0x40a00000 ; 7E0002FF 40A00000
v_mad_f32 v7, s8, v0, v5 ; D2820007 04160008
v_mad_f32 v0, s9, v0, v6 ; D2820000 041A0009
v_mov_b32_e32 v8, 0xc0a00000 ; 7E1002FF C0A00000
v_mad_f32 v9, s8, v8, v5 ; D2820009 04161008
v_mad_f32 v8, s9, v8, v6 ; D2820008 041A1009
exp 15, 35, 0, 0, 0, v7, v0, v9, v8 ; F800023F 08090007
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v0, 0 ; 7E000280
exp 15, 36, 0, 0, 0, v5, v6, v0, v0 ; F800024F 00000605
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xa ; C203810A
s_buffer_load_dword s8, s[0:3], 0xb ; C204010B
s_buffer_load_dword s9, s[0:3], 0xc ; C204810C
s_buffer_load_dword s12, s[0:3], 0xd ; C206010D
s_buffer_load_dword s13, s[0:3], 0xe ; C206810E
s_buffer_load_dword s14, s[0:3], 0xf ; C207010F
s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s10, v1 ; 1000020A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v5, s11, v1 ; 100A020B
v_mac_f32_e32 v5, s6, v2 ; 3E0A0406
v_mul_f32_e32 v6, s16, v1 ; 100C0210
v_mac_f32_e32 v6, s7, v2 ; 3E0C0407
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, s8, v2 ; 3E020408
v_mac_f32_e32 v0, s9, v3 ; 3E000609
v_mac_f32_e32 v5, s12, v3 ; 3E0A060C
v_mac_f32_e32 v6, s13, v3 ; 3E0C060D
v_mac_f32_e32 v1, s14, v3 ; 3E02060E
v_mac_f32_e32 v0, s15, v4 ; 3E00080F
v_mac_f32_e32 v5, s17, v4 ; 3E0A0811
v_mac_f32_e32 v6, s18, v4 ; 3E0C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 400 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.0525, 0.0750, 0.1100, 0.1500}
IMM[1] FLT32 { 0.2250, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[3].zwww
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].xy, IN[3].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MOV TEMP[2].xy, IN[2].zwww
5: TEX TEMP[2], TEMP[2], SAMP[0], 2D
6: MOV TEMP[3].xy, IN[2].xyyy
7: TEX TEMP[3], TEMP[3], SAMP[0], 2D
8: MOV TEMP[4].xy, IN[1].zwww
9: TEX TEMP[4], TEMP[4], SAMP[0], 2D
10: MOV TEMP[5].xy, IN[1].xyyy
11: TEX TEMP[5], TEMP[5], SAMP[0], 2D
12: MOV TEMP[6].xy, IN[0].zwww
13: TEX TEMP[6], TEMP[6], SAMP[0], 2D
14: MOV TEMP[7].xy, IN[0].xyyy
15: TEX TEMP[7], TEMP[7], SAMP[0], 2D
16: MOV TEMP[8].xy, IN[4].xyyy
17: TEX TEMP[8], TEMP[8], SAMP[0], 2D
18: MUL TEMP[8], IMM[1].xxxx, TEMP[8]
19: MAD TEMP[7], IMM[0].wwww, TEMP[7], TEMP[8]
20: MAD TEMP[6], IMM[0].wwww, TEMP[6], TEMP[7]
21: MAD TEMP[5], IMM[0].zzzz, TEMP[5], TEMP[6]
22: MAD TEMP[4], IMM[0].zzzz, TEMP[4], TEMP[5]
23: MAD TEMP[3], IMM[0].yyyy, TEMP[3], TEMP[4]
24: MAD TEMP[2], IMM[0].yyyy, TEMP[2], TEMP[3]
25: MAD TEMP[1], IMM[0].xxxx, TEMP[1], TEMP[2]
26: MAD TEMP[0], IMM[0].xxxx, TEMP[0], TEMP[1]
27: MOV OUT[0], TEMP[0]
28: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%34 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%44 = bitcast float %40 to i32
%45 = bitcast float %41 to i32
%46 = insertelement <2 x i32> undef, i32 %44, i32 0
%47 = insertelement <2 x i32> %46, i32 %45, i32 1
%48 = bitcast <8 x i32> %23 to <32 x i8>
%49 = bitcast <4 x i32> %25 to <16 x i8>
%50 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %47, <32 x i8> %48, <16 x i8> %49, i32 2)
%51 = extractelement <4 x float> %50, i32 0
%52 = extractelement <4 x float> %50, i32 1
%53 = extractelement <4 x float> %50, i32 2
%54 = extractelement <4 x float> %50, i32 3
%55 = bitcast float %38 to i32
%56 = bitcast float %39 to i32
%57 = insertelement <2 x i32> undef, i32 %55, i32 0
%58 = insertelement <2 x i32> %57, i32 %56, i32 1
%59 = bitcast <8 x i32> %23 to <32 x i8>
%60 = bitcast <4 x i32> %25 to <16 x i8>
%61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %58, <32 x i8> %59, <16 x i8> %60, i32 2)
%62 = extractelement <4 x float> %61, i32 0
%63 = extractelement <4 x float> %61, i32 1
%64 = extractelement <4 x float> %61, i32 2
%65 = extractelement <4 x float> %61, i32 3
%66 = bitcast float %36 to i32
%67 = bitcast float %37 to i32
%68 = insertelement <2 x i32> undef, i32 %66, i32 0
%69 = insertelement <2 x i32> %68, i32 %67, i32 1
%70 = bitcast <8 x i32> %23 to <32 x i8>
%71 = bitcast <4 x i32> %25 to <16 x i8>
%72 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %70, <16 x i8> %71, i32 2)
%73 = extractelement <4 x float> %72, i32 0
%74 = extractelement <4 x float> %72, i32 1
%75 = extractelement <4 x float> %72, i32 2
%76 = extractelement <4 x float> %72, i32 3
%77 = bitcast float %34 to i32
%78 = bitcast float %35 to i32
%79 = insertelement <2 x i32> undef, i32 %77, i32 0
%80 = insertelement <2 x i32> %79, i32 %78, i32 1
%81 = bitcast <8 x i32> %23 to <32 x i8>
%82 = bitcast <4 x i32> %25 to <16 x i8>
%83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %80, <32 x i8> %81, <16 x i8> %82, i32 2)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = bitcast float %32 to i32
%89 = bitcast float %33 to i32
%90 = insertelement <2 x i32> undef, i32 %88, i32 0
%91 = insertelement <2 x i32> %90, i32 %89, i32 1
%92 = bitcast <8 x i32> %23 to <32 x i8>
%93 = bitcast <4 x i32> %25 to <16 x i8>
%94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %92, <16 x i8> %93, i32 2)
%95 = extractelement <4 x float> %94, i32 0
%96 = extractelement <4 x float> %94, i32 1
%97 = extractelement <4 x float> %94, i32 2
%98 = extractelement <4 x float> %94, i32 3
%99 = bitcast float %30 to i32
%100 = bitcast float %31 to i32
%101 = insertelement <2 x i32> undef, i32 %99, i32 0
%102 = insertelement <2 x i32> %101, i32 %100, i32 1
%103 = bitcast <8 x i32> %23 to <32 x i8>
%104 = bitcast <4 x i32> %25 to <16 x i8>
%105 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %102, <32 x i8> %103, <16 x i8> %104, i32 2)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = extractelement <4 x float> %105, i32 2
%109 = extractelement <4 x float> %105, i32 3
%110 = bitcast float %28 to i32
%111 = bitcast float %29 to i32
%112 = insertelement <2 x i32> undef, i32 %110, i32 0
%113 = insertelement <2 x i32> %112, i32 %111, i32 1
%114 = bitcast <8 x i32> %23 to <32 x i8>
%115 = bitcast <4 x i32> %25 to <16 x i8>
%116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %114, <16 x i8> %115, i32 2)
%117 = extractelement <4 x float> %116, i32 0
%118 = extractelement <4 x float> %116, i32 1
%119 = extractelement <4 x float> %116, i32 2
%120 = extractelement <4 x float> %116, i32 3
%121 = bitcast float %26 to i32
%122 = bitcast float %27 to i32
%123 = insertelement <2 x i32> undef, i32 %121, i32 0
%124 = insertelement <2 x i32> %123, i32 %122, i32 1
%125 = bitcast <8 x i32> %23 to <32 x i8>
%126 = bitcast <4 x i32> %25 to <16 x i8>
%127 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %124, <32 x i8> %125, <16 x i8> %126, i32 2)
%128 = extractelement <4 x float> %127, i32 0
%129 = extractelement <4 x float> %127, i32 1
%130 = extractelement <4 x float> %127, i32 2
%131 = extractelement <4 x float> %127, i32 3
%132 = bitcast float %42 to i32
%133 = bitcast float %43 to i32
%134 = insertelement <2 x i32> undef, i32 %132, i32 0
%135 = insertelement <2 x i32> %134, i32 %133, i32 1
%136 = bitcast <8 x i32> %23 to <32 x i8>
%137 = bitcast <4 x i32> %25 to <16 x i8>
%138 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %135, <32 x i8> %136, <16 x i8> %137, i32 2)
%139 = extractelement <4 x float> %138, i32 0
%140 = extractelement <4 x float> %138, i32 1
%141 = extractelement <4 x float> %138, i32 2
%142 = extractelement <4 x float> %138, i32 3
%143 = fmul float %139, 0x3FCCCCCCC0000000
%144 = fmul float %140, 0x3FCCCCCCC0000000
%145 = fmul float %141, 0x3FCCCCCCC0000000
%146 = fmul float %142, 0x3FCCCCCCC0000000
%147 = fmul float %128, 0x3FC3333340000000
%148 = fadd float %147, %143
%149 = fmul float %129, 0x3FC3333340000000
%150 = fadd float %149, %144
%151 = fmul float %130, 0x3FC3333340000000
%152 = fadd float %151, %145
%153 = fmul float %131, 0x3FC3333340000000
%154 = fadd float %153, %146
%155 = fmul float %117, 0x3FC3333340000000
%156 = fadd float %155, %148
%157 = fmul float %118, 0x3FC3333340000000
%158 = fadd float %157, %150
%159 = fmul float %119, 0x3FC3333340000000
%160 = fadd float %159, %152
%161 = fmul float %120, 0x3FC3333340000000
%162 = fadd float %161, %154
%163 = fmul float %106, 0x3FBC28F5C0000000
%164 = fadd float %163, %156
%165 = fmul float %107, 0x3FBC28F5C0000000
%166 = fadd float %165, %158
%167 = fmul float %108, 0x3FBC28F5C0000000
%168 = fadd float %167, %160
%169 = fmul float %109, 0x3FBC28F5C0000000
%170 = fadd float %169, %162
%171 = fmul float %95, 0x3FBC28F5C0000000
%172 = fadd float %171, %164
%173 = fmul float %96, 0x3FBC28F5C0000000
%174 = fadd float %173, %166
%175 = fmul float %97, 0x3FBC28F5C0000000
%176 = fadd float %175, %168
%177 = fmul float %98, 0x3FBC28F5C0000000
%178 = fadd float %177, %170
%179 = fmul float %84, 0x3FB3333340000000
%180 = fadd float %179, %172
%181 = fmul float %85, 0x3FB3333340000000
%182 = fadd float %181, %174
%183 = fmul float %86, 0x3FB3333340000000
%184 = fadd float %183, %176
%185 = fmul float %87, 0x3FB3333340000000
%186 = fadd float %185, %178
%187 = fmul float %73, 0x3FB3333340000000
%188 = fadd float %187, %180
%189 = fmul float %74, 0x3FB3333340000000
%190 = fadd float %189, %182
%191 = fmul float %75, 0x3FB3333340000000
%192 = fadd float %191, %184
%193 = fmul float %76, 0x3FB3333340000000
%194 = fadd float %193, %186
%195 = fmul float %62, 0x3FAAE147A0000000
%196 = fadd float %195, %188
%197 = fmul float %63, 0x3FAAE147A0000000
%198 = fadd float %197, %190
%199 = fmul float %64, 0x3FAAE147A0000000
%200 = fadd float %199, %192
%201 = fmul float %65, 0x3FAAE147A0000000
%202 = fadd float %201, %194
%203 = fmul float %51, 0x3FAAE147A0000000
%204 = fadd float %203, %196
%205 = fmul float %52, 0x3FAAE147A0000000
%206 = fadd float %205, %198
%207 = fmul float %53, 0x3FAAE147A0000000
%208 = fadd float %207, %200
%209 = fmul float %54, 0x3FAAE147A0000000
%210 = fadd float %209, %202
%211 = call i32 @llvm.SI.packf16(float %204, float %206)
%212 = bitcast i32 %211 to float
%213 = call i32 @llvm.SI.packf16(float %208, float %210)
%214 = bitcast i32 %213 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %212, float %214, float %212, float %214)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
v_mov_b32_e32 v2, 0x3e666666 ; 7E0402FF 3E666666
v_mov_b32_e32 v3, 0x3e19999a ; 7E0602FF 3E19999A
v_mov_b32_e32 v4, 0x3de147ae ; 7E0802FF 3DE147AE
v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000
v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001
v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100
v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101
v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200
v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201
v_interp_p1_f32 v8, v0, 3, 0, [m0] ; C8200300
v_interp_p2_f32 v8, [v8], v1, 3, 0, [m0] ; C8210301
v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400
v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401
v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500
v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501
v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600
v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601
v_interp_p1_f32 v12, v0, 3, 1, [m0] ; C8300700
v_interp_p2_f32 v12, [v12], v1, 3, 1, [m0] ; C8310701
v_interp_p1_f32 v13, v0, 0, 2, [m0] ; C8340800
v_interp_p2_f32 v13, [v13], v1, 0, 2, [m0] ; C8350801
v_interp_p1_f32 v14, v0, 1, 2, [m0] ; C8380900
v_interp_p2_f32 v14, [v14], v1, 1, 2, [m0] ; C8390901
v_interp_p1_f32 v15, v0, 2, 2, [m0] ; C83C0A00
v_interp_p2_f32 v15, [v15], v1, 2, 2, [m0] ; C83D0A01
v_interp_p1_f32 v16, v0, 3, 2, [m0] ; C8400B00
v_interp_p2_f32 v16, [v16], v1, 3, 2, [m0] ; C8410B01
v_interp_p1_f32 v17, v0, 0, 3, [m0] ; C8440C00
v_interp_p2_f32 v17, [v17], v1, 0, 3, [m0] ; C8450C01
v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00
v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01
v_interp_p1_f32 v19, v0, 2, 3, [m0] ; C84C0E00
v_interp_p2_f32 v19, [v19], v1, 2, 3, [m0] ; C84D0E01
v_interp_p1_f32 v20, v0, 3, 3, [m0] ; C8500F00
v_interp_p2_f32 v20, [v20], v1, 3, 3, [m0] ; C8510F01
v_interp_p1_f32 v21, v0, 0, 4, [m0] ; C8541000
v_interp_p2_f32 v21, [v21], v1, 0, 4, [m0] ; C8551001
v_interp_p1_f32 v22, v0, 1, 4, [m0] ; C8581100
v_interp_p2_f32 v22, [v22], v1, 1, 4, [m0] ; C8591101
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[4:11], s[0:3] ; F0800F00 00011713
image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[4:11], s[0:3] ; F0800F00 00011111
image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[4:11], s[0:3] ; F0800F00 00011B0F
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[4:11], s[0:3] ; F0800F00 00010D0D
image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800F00 00011F0B
image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[4:11], s[0:3] ; F0800F00 00010909
image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[4:11], s[0:3] ; F0800F00 00012307
image_sample v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[4:11], s[0:3] ; F0800F00 00010505
image_sample v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[4:11], s[0:3] ; F0800F00 00012715
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v0, v2, v39 ; 10004F02
v_mul_f32_e32 v1, v2, v40 ; 10025102
v_mul_f32_e32 v21, v2, v41 ; 102A5302
v_mul_f32_e32 v2, v2, v42 ; 10045502
v_mac_f32_e32 v0, v3, v5 ; 3E000B03
v_mac_f32_e32 v1, v3, v6 ; 3E020D03
v_mac_f32_e32 v21, v3, v7 ; 3E2A0F03
v_mac_f32_e32 v2, v3, v8 ; 3E041103
v_mac_f32_e32 v0, v3, v35 ; 3E004703
v_mac_f32_e32 v1, v3, v36 ; 3E024903
v_mac_f32_e32 v21, v3, v37 ; 3E2A4B03
v_mac_f32_e32 v2, v3, v38 ; 3E044D03
v_mac_f32_e32 v0, v4, v9 ; 3E001304
v_mac_f32_e32 v1, v4, v10 ; 3E021504
v_mac_f32_e32 v21, v4, v11 ; 3E2A1704
v_mac_f32_e32 v2, v4, v12 ; 3E041904
v_mac_f32_e32 v0, v4, v31 ; 3E003F04
v_mac_f32_e32 v1, v4, v32 ; 3E024104
v_mac_f32_e32 v21, v4, v33 ; 3E2A4304
v_mac_f32_e32 v2, v4, v34 ; 3E044504
v_mov_b32_e32 v3, 0x3d99999a ; 7E0602FF 3D99999A
v_mac_f32_e32 v0, v3, v13 ; 3E001B03
v_mac_f32_e32 v1, v3, v14 ; 3E021D03
v_mac_f32_e32 v21, v3, v15 ; 3E2A1F03
v_mac_f32_e32 v2, v3, v16 ; 3E042103
v_mac_f32_e32 v0, v3, v27 ; 3E003703
v_mac_f32_e32 v1, v3, v28 ; 3E023903
v_mac_f32_e32 v21, v3, v29 ; 3E2A3B03
v_mac_f32_e32 v2, v3, v30 ; 3E043D03
v_mov_b32_e32 v3, 0x3d570a3d ; 7E0602FF 3D570A3D
v_mac_f32_e32 v0, v3, v17 ; 3E002303
v_mac_f32_e32 v1, v3, v18 ; 3E022503
v_mac_f32_e32 v21, v3, v19 ; 3E2A2703
v_mac_f32_e32 v2, v3, v20 ; 3E042903
v_mac_f32_e32 v0, v3, v23 ; 3E002F03
v_mac_f32_e32 v1, v3, v24 ; 3E023103
v_mac_f32_e32 v21, v3, v25 ; 3E2A3303
v_mac_f32_e32 v2, v3, v26 ; 3E043503
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v21, v2 ; 5E020515
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 44
Code Size: 444 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0], LOCAL
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV OUT[0], TEMP[0]
3: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = bitcast float %26 to i32
%29 = bitcast float %27 to i32
%30 = insertelement <2 x i32> undef, i32 %28, i32 0
%31 = insertelement <2 x i32> %30, i32 %29, i32 1
%32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = call i32 @llvm.SI.packf16(float %33, float %34)
%38 = bitcast i32 %37 to float
%39 = call i32 @llvm.SI.packf16(float %35, float %36)
%40 = bitcast i32 %39 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002
s_waitcnt vmcnt(0) ; BF8C0770
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 68 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0], LOCAL
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV OUT[0], TEMP[0]
3: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = bitcast float %26 to i32
%29 = bitcast float %27 to i32
%30 = insertelement <2 x i32> undef, i32 %28, i32 0
%31 = insertelement <2 x i32> %30, i32 %29, i32 1
%32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = call i32 @llvm.SI.packf16(float %33, float %34)
%38 = bitcast i32 %37 to float
%39 = call i32 @llvm.SI.packf16(float %35, float %36)
%40 = bitcast i32 %39 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030002
s_waitcnt vmcnt(0) ; BF8C0770
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 68 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV OUT[1], IN[1].xyxy
5: MOV OUT[0], TEMP[0]
6: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %41, float %42)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
v_mac_f32_e32 v0, s8, v2 ; 3E000408
v_mul_f32_e32 v7, s5, v1 ; 100E0205
v_mac_f32_e32 v7, s9, v2 ; 3E0E0409
v_mul_f32_e32 v8, s6, v1 ; 10100206
v_mac_f32_e32 v8, s10, v2 ; 3E10040A
v_mul_f32_e32 v1, s7, v1 ; 10020207
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v7, s13, v3 ; 3E0E060D
v_mac_f32_e32 v8, s14, v3 ; 3E10060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v7, s17, v4 ; 3E0E0811
v_mac_f32_e32 v8, s18, v4 ; 3E100812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 32, 0, 0, 0, v5, v6, v5, v6 ; F800020F 06050605
exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 192 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[2]
DCL TEMP[0..1], LOCAL
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[1], 2D
2: MOV TEMP[1].xy, IN[0].zwww
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MAD TEMP[0], CONST[2].xxxx, TEMP[0], TEMP[1]
5: MOV OUT[0], TEMP[0]
6: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0
%27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0
%29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%30 = bitcast <8 x i32> addrspace(2)* %29 to <32 x i8> addrspace(2)*
%31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0
%32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%33 = bitcast <4 x i32> addrspace(2)* %32 to <16 x i8> addrspace(2)*
%34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0
%35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%39 = bitcast float %35 to i32
%40 = bitcast float %36 to i32
%41 = insertelement <2 x i32> undef, i32 %39, i32 0
%42 = insertelement <2 x i32> %41, i32 %40, i32 1
%43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %31, <16 x i8> %34, i32 2)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = extractelement <4 x float> %43, i32 2
%47 = extractelement <4 x float> %43, i32 3
%48 = bitcast float %37 to i32
%49 = bitcast float %38 to i32
%50 = insertelement <2 x i32> undef, i32 %48, i32 0
%51 = insertelement <2 x i32> %50, i32 %49, i32 1
%52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %26, <16 x i8> %28, i32 2)
%53 = extractelement <4 x float> %52, i32 0
%54 = extractelement <4 x float> %52, i32 1
%55 = extractelement <4 x float> %52, i32 2
%56 = extractelement <4 x float> %52, i32 3
%57 = fmul float %24, %44
%58 = fadd float %57, %53
%59 = fmul float %24, %45
%60 = fadd float %59, %54
%61 = fmul float %24, %46
%62 = fadd float %61, %55
%63 = fmul float %24, %47
%64 = fadd float %63, %56
%65 = call i32 @llvm.SI.packf16(float %58, float %60)
%66 = bitcast i32 %65 to float
%67 = call i32 @llvm.SI.packf16(float %62, float %64)
%68 = bitcast i32 %67 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %66, float %68, float %66, float %68)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504
s_mov_b32 m0, s9 ; BEFC0309
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[0:3], 0x8 ; C2000108
s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[16:19] ; F0800F00 00850002
image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[12:15] ; F0800F00 00610404
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v0, s0, v0, v4 ; D2820000 04120000
v_mad_f32 v1, s0, v1, v5 ; D2820001 04160200
v_mad_f32 v2, s0, v2, v6 ; D2820002 041A0400
v_mac_f32_e32 v7, s0, v3 ; 3E0E0600
v_cvt_pkrtz_f16_f32_e32 v2, v2, v7 ; 5E040F02
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 8
Code Size: 140 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL CONST[0..4]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { 0.5000, 4.0000, 0.0000, 0.0000}
0: MUL TEMP[0].xy, CONST[0].xyyy, IMM[0].xxxx
1: ADD TEMP[1].xy, IN[1].xyyy, -TEMP[0].xyyy
2: ADD TEMP[2].xy, IN[1].xyyy, TEMP[0].xyyy
3: MOV TEMP[1].zw, TEMP[2].yyxy
4: MUL TEMP[2].xy, -CONST[0].xyyy, IMM[0].xxxx
5: MOV TEMP[2].zw, TEMP[0].yyxy
6: MUL TEMP[3].xy, TEMP[2].xyyy, IMM[0].yyyy
7: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy
8: MOV TEMP[3].zw, TEMP[0].yyxy
9: MUL TEMP[0], CONST[1], IN[0].xxxx
10: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0]
11: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0]
12: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0]
13: MOV TEMP[4].xy, IN[1].xyxx
14: MOV OUT[1], TEMP[1]
15: MOV OUT[4], TEMP[4]
16: MOV OUT[2], TEMP[2]
17: MOV OUT[3], TEMP[3]
18: MOV OUT[0], TEMP[0]
19: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = add i32 %5, %7
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0
%41 = add i32 %5, %7
%42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41)
%43 = extractelement <4 x float> %42, i32 0
%44 = extractelement <4 x float> %42, i32 1
%45 = fmul float %13, 5.000000e-01
%46 = fmul float %14, 5.000000e-01
%47 = fsub float %43, %45
%48 = fsub float %44, %46
%49 = fadd float %43, %45
%50 = fadd float %44, %46
%51 = fmul float %13, -5.000000e-01
%52 = fmul float %14, -5.000000e-01
%53 = fmul float %51, 4.000000e+00
%54 = fmul float %52, 4.000000e+00
%55 = fmul float %45, 4.000000e+00
%56 = fmul float %46, 4.000000e+00
%57 = fmul float %15, %35
%58 = fmul float %16, %35
%59 = fmul float %17, %35
%60 = fmul float %18, %35
%61 = fmul float %19, %36
%62 = fadd float %61, %57
%63 = fmul float %20, %36
%64 = fadd float %63, %58
%65 = fmul float %21, %36
%66 = fadd float %65, %59
%67 = fmul float %22, %36
%68 = fadd float %67, %60
%69 = fmul float %23, %37
%70 = fadd float %69, %62
%71 = fmul float %24, %37
%72 = fadd float %71, %64
%73 = fmul float %25, %37
%74 = fadd float %73, %66
%75 = fmul float %26, %37
%76 = fadd float %75, %68
%77 = fmul float %27, %38
%78 = fadd float %77, %70
%79 = fmul float %28, %38
%80 = fadd float %79, %72
%81 = fmul float %29, %38
%82 = fadd float %81, %74
%83 = fmul float %30, %38
%84 = fadd float %83, %76
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %47, float %48, float %49, float %50)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %51, float %52, float %45, float %46)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %53, float %54, float %55, float %56)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %43, float %44, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %78, float %80, float %82, float %84)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101
s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104
s_buffer_load_dword s11, s[0:3], 0x5 ; C2058105
s_buffer_load_dword s16, s[0:3], 0x6 ; C2080106
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mad_f32 v0, 0.5, -s8, v5 ; D2820000 441410F0
v_mad_f32 v7, 0.5, -s9, v6 ; D2820007 441812F0
v_mad_f32 v8, 0.5, s8, v5 ; D2820008 041410F0
v_mad_f32 v9, 0.5, s9, v6 ; D2820009 041812F0
exp 15, 32, 0, 0, 0, v0, v7, v8, v9 ; F800020F 09080700
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e64 v0, 0.5, s8 ; D2100000 000010F0
v_mul_f32_e64 v7, -0.5, s8 ; D2100007 000010F1
v_mul_f32_e64 v8, 0.5, s9 ; D2100008 000012F0
v_mul_f32_e64 v9, -0.5, s9 ; D2100009 000012F1
exp 15, 33, 0, 0, 0, v7, v9, v0, v8 ; F800021F 08000907
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v7, 4.0, v7 ; 100E0EF6
v_mul_f32_e32 v9, 4.0, v9 ; 101212F6
v_mul_f32_e32 v0, 4.0, v0 ; 100000F6
v_mul_f32_e32 v8, 4.0, v8 ; 101010F6
exp 15, 34, 0, 0, 0, v7, v9, v0, v8 ; F800022F 08000907
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v0, 0 ; 7E000280
exp 15, 35, 0, 0, 0, v5, v6, v0, v0 ; F800023F 00000605
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xa ; C203810A
s_buffer_load_dword s8, s[0:3], 0xb ; C204010B
s_buffer_load_dword s9, s[0:3], 0xc ; C204810C
s_buffer_load_dword s12, s[0:3], 0xd ; C206010D
s_buffer_load_dword s13, s[0:3], 0xe ; C206810E
s_buffer_load_dword s14, s[0:3], 0xf ; C207010F
s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s10, v1 ; 1000020A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v5, s11, v1 ; 100A020B
v_mac_f32_e32 v5, s6, v2 ; 3E0A0406
v_mul_f32_e32 v6, s16, v1 ; 100C0210
v_mac_f32_e32 v6, s7, v2 ; 3E0C0407
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, s8, v2 ; 3E020408
v_mac_f32_e32 v0, s9, v3 ; 3E000609
v_mac_f32_e32 v5, s12, v3 ; 3E0A060C
v_mac_f32_e32 v6, s13, v3 ; 3E0C060D
v_mac_f32_e32 v1, s14, v3 ; 3E02060E
v_mac_f32_e32 v0, s15, v4 ; 3E00080F
v_mac_f32_e32 v5, s17, v4 ; 3E0A0811
v_mac_f32_e32 v6, s18, v4 ; 3E0C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 324 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0]
DCL CONST[2..4]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.0000, 2.0000, 0.0026, -2.0000}
IMM[1] FLT32 { 0.2500, 0.5000, 1.0000, 0.0000}
IMM[2] UINT32 {4294967295, 0, 0, 0}
0: MOV TEMP[0].xy, IN[0].xyyy
1: MOV TEMP[0].w, IMM[0].xxxx
2: TXL TEMP[0].xyz, TEMP[0], SAMP[0], 2D
3: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[0].xyzz
4: ADD TEMP[1].x, TEMP[0].xxxx, TEMP[0].zzzz
5: MUL TEMP[1].x, TEMP[0].yyyy, TEMP[1].xxxx
6: SQRT TEMP[1].x, TEMP[1].xxxx
7: MUL TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
8: ADD TEMP[2].x, TEMP[0].xxxx, TEMP[0].yyyy
9: ADD TEMP[0].x, TEMP[2].xxxx, TEMP[0].zzzz
10: MAD TEMP[0].x, TEMP[1].xxxx, CONST[0].wwww, TEMP[0].xxxx
11: MOV TEMP[1].xy, IN[0].xwww
12: MOV TEMP[1].w, IMM[0].xxxx
13: TXL TEMP[1].xyz, TEMP[1], SAMP[0], 2D
14: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[0].xyzz
15: ADD TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz
16: MUL TEMP[2].x, TEMP[1].yyyy, TEMP[2].xxxx
17: SQRT TEMP[2].x, TEMP[2].xxxx
18: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
19: ADD TEMP[3].x, TEMP[1].xxxx, TEMP[1].yyyy
20: ADD TEMP[1].x, TEMP[3].xxxx, TEMP[1].zzzz
21: MAD TEMP[1].x, TEMP[2].xxxx, CONST[0].wwww, TEMP[1].xxxx
22: MOV TEMP[2].xy, IN[0].zyyy
23: MOV TEMP[2].w, IMM[0].xxxx
24: TXL TEMP[2].xyz, TEMP[2], SAMP[0], 2D
25: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[0].xyzz
26: MOV TEMP[3].xy, IN[0].zwww
27: MOV TEMP[3].w, IMM[0].xxxx
28: TXL TEMP[3].xyz, TEMP[3], SAMP[0], 2D
29: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[0].xyzz
30: ADD TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz
31: MUL TEMP[4].x, TEMP[3].yyyy, TEMP[4].xxxx
32: SQRT TEMP[4].x, TEMP[4].xxxx
33: MUL TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx
34: ADD TEMP[5].x, TEMP[3].xxxx, TEMP[3].yyyy
35: ADD TEMP[3].x, TEMP[5].xxxx, TEMP[3].zzzz
36: MAD TEMP[3].x, TEMP[4].xxxx, CONST[0].wwww, TEMP[3].xxxx
37: MOV TEMP[4].xy, IN[3].xyyy
38: MOV TEMP[4].w, IMM[0].xxxx
39: TXL TEMP[4].xyz, TEMP[4], SAMP[0], 2D
40: MUL TEMP[5].xyz, TEMP[4].xyzz, CONST[0].xyzz
41: ADD TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz
42: MUL TEMP[6].x, TEMP[5].yyyy, TEMP[6].xxxx
43: SQRT TEMP[6].x, TEMP[6].xxxx
44: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
45: ADD TEMP[7].x, TEMP[5].xxxx, TEMP[5].yyyy
46: ADD TEMP[5].x, TEMP[7].xxxx, TEMP[5].zzzz
47: MAD TEMP[5].x, TEMP[6].xxxx, CONST[0].wwww, TEMP[5].xxxx
48: ADD TEMP[6].x, TEMP[2].xxxx, TEMP[2].zzzz
49: MUL TEMP[6].x, TEMP[2].yyyy, TEMP[6].xxxx
50: SQRT TEMP[6].x, TEMP[6].xxxx
51: MUL TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
52: ADD TEMP[7].x, TEMP[2].xxxx, TEMP[2].yyyy
53: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[2].zzzz
54: MAD TEMP[2].x, TEMP[6].xxxx, CONST[0].wwww, TEMP[2].xxxx
55: ADD TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz
56: MAX TEMP[6].x, TEMP[2].xxxx, TEMP[3].xxxx
57: MAX TEMP[7].x, TEMP[0].xxxx, TEMP[1].xxxx
58: MAX TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx
59: MIN TEMP[7].x, TEMP[2].xxxx, TEMP[3].xxxx
60: MIN TEMP[8].x, TEMP[0].xxxx, TEMP[1].xxxx
61: MIN TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx
62: MUL TEMP[8].x, TEMP[6].xxxx, CONST[3].xxxx
63: MAX TEMP[8].x, CONST[2].xxxx, TEMP[8].xxxx
64: ADD TEMP[1].x, TEMP[1].xxxx, -TEMP[2].xxxx
65: MAX TEMP[2].x, TEMP[6].xxxx, TEMP[5].xxxx
66: MIN TEMP[5].x, TEMP[7].xxxx, TEMP[5].xxxx
67: ADD TEMP[2].x, TEMP[2].xxxx, -TEMP[5].xxxx
68: ADD TEMP[0].x, TEMP[3].xxxx, -TEMP[0].xxxx
69: FSLT TEMP[2].x, TEMP[2].xxxx, TEMP[8].xxxx
70: UIF TEMP[2].xxxx :0
71: MOV TEMP[2].xyz, TEMP[4].xyzx
72: ELSE :0
73: ADD TEMP[3].x, TEMP[1].xxxx, TEMP[0].xxxx
74: ADD TEMP[0].x, TEMP[1].xxxx, -TEMP[0].xxxx
75: MOV TEMP[3].y, TEMP[0].xxxx
76: DP2 TEMP[0].x, TEMP[3].xyyy, TEMP[3].xyyy
77: RSQ TEMP[0].x, TEMP[0].xxxx
78: MUL TEMP[0].xy, TEMP[3].xyyy, TEMP[0].xxxx
79: MUL TEMP[3].xy, TEMP[0].xyyy, IN[1].zwww
80: ADD TEMP[1].xy, IN[3].xyyy, -TEMP[3].xyyy
81: MAD TEMP[3].xy, TEMP[0].xyyy, IN[1].zwww, IN[3].xyyy
82: ABS TEMP[4].x, TEMP[0].xxxx
83: ABS TEMP[5].x, TEMP[0].yyyy
84: MIN TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
85: MUL TEMP[4].x, TEMP[4].xxxx, CONST[4].xxxx
86: RCP TEMP[4].x, TEMP[4].xxxx
87: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[4].xxxx
88: MAX TEMP[0].xy, TEMP[0].xyyy, IMM[0].wwww
89: MIN TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy
90: MUL TEMP[5].xy, TEMP[0].xyyy, IN[2].zwww
91: ADD TEMP[4].xy, IN[3].xyyy, -TEMP[5].xyyy
92: MAD TEMP[0].xy, TEMP[0].xyyy, IN[2].zwww, IN[3].xyyy
93: MOV TEMP[1].xy, TEMP[1].xyyy
94: MOV TEMP[1].w, IMM[0].xxxx
95: TXL TEMP[1].xyz, TEMP[1], SAMP[0], 2D
96: MOV TEMP[3].xy, TEMP[3].xyyy
97: MOV TEMP[3].w, IMM[0].xxxx
98: TXL TEMP[3].xyz, TEMP[3], SAMP[0], 2D
99: ADD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
100: MOV TEMP[3].xy, TEMP[4].xyyy
101: MOV TEMP[3].w, IMM[0].xxxx
102: TXL TEMP[3].xyz, TEMP[3], SAMP[0], 2D
103: MOV TEMP[0].xy, TEMP[0].xyyy
104: MOV TEMP[0].w, IMM[0].xxxx
105: TXL TEMP[0].xyz, TEMP[0], SAMP[0], 2D
106: ADD TEMP[0].xyz, TEMP[3].xyzz, TEMP[0].xyzz
107: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[1].xxxx
108: MAD TEMP[0].xyz, TEMP[1].xyzz, IMM[1].xxxx, TEMP[0].xyzz
109: MUL TEMP[3].xyz, TEMP[1].xyzz, CONST[0].xyzz
110: ADD TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz
111: MUL TEMP[4].x, TEMP[3].yyyy, TEMP[4].xxxx
112: SQRT TEMP[4].x, TEMP[4].xxxx
113: MUL TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx
114: ADD TEMP[5].x, TEMP[3].xxxx, TEMP[3].yyyy
115: ADD TEMP[3].x, TEMP[5].xxxx, TEMP[3].zzzz
116: MAD TEMP[3].x, TEMP[4].xxxx, CONST[0].wwww, TEMP[3].xxxx
117: FSLT TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx
118: UIF TEMP[3].xxxx :0
119: MOV TEMP[3].x, IMM[2].xxxx
120: ELSE :0
121: MUL TEMP[4].xyz, TEMP[0].xyzz, CONST[0].xyzz
122: ADD TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz
123: MUL TEMP[5].x, TEMP[4].yyyy, TEMP[5].xxxx
124: SQRT TEMP[5].x, TEMP[5].xxxx
125: MUL TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx
126: ADD TEMP[7].x, TEMP[4].xxxx, TEMP[4].yyyy
127: ADD TEMP[4].x, TEMP[7].xxxx, TEMP[4].zzzz
128: MAD TEMP[4].x, TEMP[5].xxxx, CONST[0].wwww, TEMP[4].xxxx
129: FSLT TEMP[3].x, TEMP[6].xxxx, TEMP[4].xxxx
130: ENDIF
131: UIF TEMP[3].xxxx :0
132: MUL TEMP[2].xyz, TEMP[1].xyzz, IMM[1].yyyy
133: ELSE :0
134: MOV TEMP[2].xyz, TEMP[0].xyzx
135: ENDIF
136: ENDIF
137: MOV TEMP[0].w, IMM[1].zzzz
138: MOV TEMP[0].xyz, TEMP[2].xyzx
139: MOV OUT[0], TEMP[0]
140: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%31 = load <8 x i32>, <8 x i32> addrspace(2)* %30, align 32, !tbaa !0
%32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%33 = load <4 x i32>, <4 x i32> addrspace(2)* %32, align 16, !tbaa !0
%34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%44 = bitcast float %34 to i32
%45 = bitcast float %35 to i32
%46 = insertelement <4 x i32> undef, i32 %44, i32 0
%47 = insertelement <4 x i32> %46, i32 %45, i32 1
%48 = insertelement <4 x i32> %47, i32 0, i32 2
%49 = bitcast <8 x i32> %31 to <32 x i8>
%50 = bitcast <4 x i32> %33 to <16 x i8>
%51 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2)
%52 = extractelement <4 x float> %51, i32 0
%53 = extractelement <4 x float> %51, i32 1
%54 = extractelement <4 x float> %51, i32 2
%55 = fmul float %52, %24
%56 = fmul float %53, %25
%57 = fmul float %54, %26
%58 = fadd float %55, %57
%59 = fmul float %56, %58
%60 = call float @llvm.sqrt.f32(float %59)
%61 = fmul float %60, 2.000000e+00
%62 = fadd float %55, %56
%63 = fadd float %62, %57
%64 = fmul float %61, %27
%65 = fadd float %64, %63
%66 = bitcast float %34 to i32
%67 = bitcast float %37 to i32
%68 = insertelement <4 x i32> undef, i32 %66, i32 0
%69 = insertelement <4 x i32> %68, i32 %67, i32 1
%70 = insertelement <4 x i32> %69, i32 0, i32 2
%71 = bitcast <8 x i32> %31 to <32 x i8>
%72 = bitcast <4 x i32> %33 to <16 x i8>
%73 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %70, <32 x i8> %71, <16 x i8> %72, i32 2)
%74 = extractelement <4 x float> %73, i32 0
%75 = extractelement <4 x float> %73, i32 1
%76 = extractelement <4 x float> %73, i32 2
%77 = fmul float %74, %24
%78 = fmul float %75, %25
%79 = fmul float %76, %26
%80 = fadd float %77, %79
%81 = fmul float %78, %80
%82 = call float @llvm.sqrt.f32(float %81)
%83 = fmul float %82, 2.000000e+00
%84 = fadd float %77, %78
%85 = fadd float %84, %79
%86 = fmul float %83, %27
%87 = fadd float %86, %85
%88 = bitcast float %36 to i32
%89 = bitcast float %35 to i32
%90 = insertelement <4 x i32> undef, i32 %88, i32 0
%91 = insertelement <4 x i32> %90, i32 %89, i32 1
%92 = insertelement <4 x i32> %91, i32 0, i32 2
%93 = bitcast <8 x i32> %31 to <32 x i8>
%94 = bitcast <4 x i32> %33 to <16 x i8>
%95 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %92, <32 x i8> %93, <16 x i8> %94, i32 2)
%96 = extractelement <4 x float> %95, i32 0
%97 = extractelement <4 x float> %95, i32 1
%98 = extractelement <4 x float> %95, i32 2
%99 = fmul float %96, %24
%100 = fmul float %97, %25
%101 = fmul float %98, %26
%102 = bitcast float %36 to i32
%103 = bitcast float %37 to i32
%104 = insertelement <4 x i32> undef, i32 %102, i32 0
%105 = insertelement <4 x i32> %104, i32 %103, i32 1
%106 = insertelement <4 x i32> %105, i32 0, i32 2
%107 = bitcast <8 x i32> %31 to <32 x i8>
%108 = bitcast <4 x i32> %33 to <16 x i8>
%109 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %106, <32 x i8> %107, <16 x i8> %108, i32 2)
%110 = extractelement <4 x float> %109, i32 0
%111 = extractelement <4 x float> %109, i32 1
%112 = extractelement <4 x float> %109, i32 2
%113 = fmul float %110, %24
%114 = fmul float %111, %25
%115 = fmul float %112, %26
%116 = fadd float %113, %115
%117 = fmul float %114, %116
%118 = call float @llvm.sqrt.f32(float %117)
%119 = fmul float %118, 2.000000e+00
%120 = fadd float %113, %114
%121 = fadd float %120, %115
%122 = fmul float %119, %27
%123 = fadd float %122, %121
%124 = bitcast float %42 to i32
%125 = bitcast float %43 to i32
%126 = insertelement <4 x i32> undef, i32 %124, i32 0
%127 = insertelement <4 x i32> %126, i32 %125, i32 1
%128 = insertelement <4 x i32> %127, i32 0, i32 2
%129 = bitcast <8 x i32> %31 to <32 x i8>
%130 = bitcast <4 x i32> %33 to <16 x i8>
%131 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %128, <32 x i8> %129, <16 x i8> %130, i32 2)
%132 = extractelement <4 x float> %131, i32 0
%133 = extractelement <4 x float> %131, i32 1
%134 = extractelement <4 x float> %131, i32 2
%135 = fmul float %132, %24
%136 = fmul float %133, %25
%137 = fmul float %134, %26
%138 = fadd float %135, %137
%139 = fmul float %136, %138
%140 = call float @llvm.sqrt.f32(float %139)
%141 = fmul float %140, 2.000000e+00
%142 = fadd float %135, %136
%143 = fadd float %142, %137
%144 = fmul float %141, %27
%145 = fadd float %144, %143
%146 = fadd float %99, %101
%147 = fmul float %100, %146
%148 = call float @llvm.sqrt.f32(float %147)
%149 = fmul float %148, 2.000000e+00
%150 = fadd float %99, %100
%151 = fadd float %150, %101
%152 = fmul float %149, %27
%153 = fadd float %152, %151
%154 = fadd float %153, 0x3F65555580000000
%155 = call float @llvm.maxnum.f32(float %154, float %123)
%156 = call float @llvm.maxnum.f32(float %65, float %87)
%157 = call float @llvm.maxnum.f32(float %155, float %156)
%158 = call float @llvm.minnum.f32(float %154, float %123)
%159 = call float @llvm.minnum.f32(float %65, float %87)
%160 = call float @llvm.minnum.f32(float %158, float %159)
%161 = fmul float %157, %29
%162 = call float @llvm.maxnum.f32(float %28, float %161)
%163 = fsub float %87, %154
%164 = call float @llvm.maxnum.f32(float %157, float %145)
%165 = call float @llvm.minnum.f32(float %160, float %145)
%166 = fsub float %164, %165
%167 = fsub float %123, %65
%168 = fcmp olt float %166, %162
br i1 %168, label %ENDIF, label %ELSE
ELSE: ; preds = %main_body
%169 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%170 = fadd float %163, %167
%171 = fsub float %163, %167
%172 = fmul float %170, %170
%173 = fmul float %171, %171
%174 = fadd float %172, %173
%175 = call float @llvm.AMDGPU.rsq.clamped.f32(float %174)
%176 = fmul float %170, %175
%177 = fmul float %171, %175
%178 = fmul float %176, %38
%179 = fmul float %177, %39
%180 = fsub float %42, %178
%181 = fsub float %43, %179
%182 = fmul float %176, %38
%183 = fadd float %182, %42
%184 = fmul float %177, %39
%185 = fadd float %184, %43
%186 = call float @llvm.fabs.f32(float %176)
%187 = call float @llvm.fabs.f32(float %177)
%188 = call float @llvm.minnum.f32(float %186, float %187)
%189 = fmul float %188, %169
%190 = fdiv float 1.000000e+00, %189
%191 = fmul float %176, %190
%192 = fmul float %177, %190
%193 = call float @llvm.maxnum.f32(float %191, float -2.000000e+00)
%194 = call float @llvm.maxnum.f32(float %192, float -2.000000e+00)
%195 = call float @llvm.minnum.f32(float %193, float 2.000000e+00)
%196 = call float @llvm.minnum.f32(float %194, float 2.000000e+00)
%197 = fmul float %195, %40
%198 = fmul float %196, %41
%199 = fsub float %42, %197
%200 = fsub float %43, %198
%201 = fmul float %195, %40
%202 = fadd float %201, %42
%203 = fmul float %196, %41
%204 = fadd float %203, %43
%205 = bitcast float %180 to i32
%206 = bitcast float %181 to i32
%207 = insertelement <4 x i32> undef, i32 %205, i32 0
%208 = insertelement <4 x i32> %207, i32 %206, i32 1
%209 = insertelement <4 x i32> %208, i32 0, i32 2
%210 = bitcast <8 x i32> %31 to <32 x i8>
%211 = bitcast <4 x i32> %33 to <16 x i8>
%212 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %209, <32 x i8> %210, <16 x i8> %211, i32 2)
%213 = extractelement <4 x float> %212, i32 0
%214 = extractelement <4 x float> %212, i32 1
%215 = extractelement <4 x float> %212, i32 2
%216 = bitcast float %183 to i32
%217 = bitcast float %185 to i32
%218 = insertelement <4 x i32> undef, i32 %216, i32 0
%219 = insertelement <4 x i32> %218, i32 %217, i32 1
%220 = insertelement <4 x i32> %219, i32 0, i32 2
%221 = bitcast <8 x i32> %31 to <32 x i8>
%222 = bitcast <4 x i32> %33 to <16 x i8>
%223 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %220, <32 x i8> %221, <16 x i8> %222, i32 2)
%224 = extractelement <4 x float> %223, i32 0
%225 = extractelement <4 x float> %223, i32 1
%226 = extractelement <4 x float> %223, i32 2
%227 = fadd float %213, %224
%228 = fadd float %214, %225
%229 = fadd float %215, %226
%230 = bitcast float %199 to i32
%231 = bitcast float %200 to i32
%232 = insertelement <4 x i32> undef, i32 %230, i32 0
%233 = insertelement <4 x i32> %232, i32 %231, i32 1
%234 = insertelement <4 x i32> %233, i32 0, i32 2
%235 = bitcast <8 x i32> %31 to <32 x i8>
%236 = bitcast <4 x i32> %33 to <16 x i8>
%237 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %234, <32 x i8> %235, <16 x i8> %236, i32 2)
%238 = extractelement <4 x float> %237, i32 0
%239 = extractelement <4 x float> %237, i32 1
%240 = extractelement <4 x float> %237, i32 2
%241 = bitcast float %202 to i32
%242 = bitcast float %204 to i32
%243 = insertelement <4 x i32> undef, i32 %241, i32 0
%244 = insertelement <4 x i32> %243, i32 %242, i32 1
%245 = insertelement <4 x i32> %244, i32 0, i32 2
%246 = bitcast <8 x i32> %31 to <32 x i8>
%247 = bitcast <4 x i32> %33 to <16 x i8>
%248 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %245, <32 x i8> %246, <16 x i8> %247, i32 2)
%249 = extractelement <4 x float> %248, i32 0
%250 = extractelement <4 x float> %248, i32 1
%251 = extractelement <4 x float> %248, i32 2
%252 = fadd float %238, %249
%253 = fadd float %239, %250
%254 = fadd float %240, %251
%255 = fmul float %252, 2.500000e-01
%256 = fmul float %253, 2.500000e-01
%257 = fmul float %254, 2.500000e-01
%258 = fmul float %227, 2.500000e-01
%259 = fadd float %258, %255
%260 = fmul float %228, 2.500000e-01
%261 = fadd float %260, %256
%262 = fmul float %229, 2.500000e-01
%263 = fadd float %262, %257
%264 = fmul float %227, %24
%265 = fmul float %228, %25
%266 = fmul float %229, %26
%267 = fadd float %264, %266
%268 = fmul float %265, %267
%269 = call float @llvm.sqrt.f32(float %268)
%270 = fmul float %269, 2.000000e+00
%271 = fadd float %264, %265
%272 = fadd float %271, %266
%273 = fmul float %270, %27
%274 = fadd float %273, %272
%275 = fcmp olt float %274, %160
br i1 %275, label %ENDIF36, label %ELSE38
ENDIF: ; preds = %IF40, %ENDIF36, %main_body
%temp10.0 = phi float [ %134, %main_body ], [ %294, %IF40 ], [ %263, %ENDIF36 ]
%temp9.0 = phi float [ %133, %main_body ], [ %293, %IF40 ], [ %261, %ENDIF36 ]
%temp8.0 = phi float [ %132, %main_body ], [ %292, %IF40 ], [ %259, %ENDIF36 ]
%276 = call i32 @llvm.SI.packf16(float %temp8.0, float %temp9.0)
%277 = bitcast i32 %276 to float
%278 = call i32 @llvm.SI.packf16(float %temp10.0, float 1.000000e+00)
%279 = bitcast i32 %278 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %277, float %279, float %277, float %279)
ret void
ELSE38: ; preds = %ELSE
%280 = fmul float %259, %24
%281 = fmul float %261, %25
%282 = fmul float %263, %26
%283 = fadd float %280, %282
%284 = fmul float %281, %283
%285 = call float @llvm.sqrt.f32(float %284)
%286 = fmul float %285, 2.000000e+00
%287 = fadd float %280, %281
%288 = fadd float %287, %282
%289 = fmul float %286, %27
%290 = fadd float %289, %288
%291 = fcmp olt float %157, %290
br label %ENDIF36
ENDIF36: ; preds = %ELSE, %ELSE38
%temp12.0 = phi i1 [ %291, %ELSE38 ], [ true, %ELSE ]
br i1 %temp12.0, label %IF40, label %ENDIF
IF40: ; preds = %ENDIF36
%292 = fmul float %227, 5.000000e-01
%293 = fmul float %228, 5.000000e-01
%294 = fmul float %229, 5.000000e-01
br label %ENDIF
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000
v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001
v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100
v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101
v_interp_p1_f32 v13, v0, 2, 0, [m0] ; C8340200
v_interp_p2_f32 v13, [v13], v1, 2, 0, [m0] ; C8350201
v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301
v_mov_b32_e32 v6, 0 ; 7E0C0280
v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00
v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01
v_mov_b32_e32 v8, v4 ; 7E100304
v_mov_b32_e32 v9, v5 ; 7E120305
v_mov_b32_e32 v10, v6 ; 7E140306
v_mov_b32_e32 v11, v7 ; 7E160307
v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00
v_mov_b32_e32 v9, v7 ; 7E120307
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s18, s[0:3], 0x2 ; C2090102
s_buffer_load_dword s16, s[0:3], 0x0 ; C2080100
s_buffer_load_dword s17, s[0:3], 0x1 ; C2088101
v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01
v_mov_b32_e32 v10, v6 ; 7E140306
v_mov_b32_e32 v14, v5 ; 7E1C0305
image_sample_l v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[4:11], s[12:15] ; F0900700 00611004
image_sample_l v[19:21], 7, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[4:11], s[12:15] ; F0900700 00611308
v_mov_b32_e32 v15, v6 ; 7E1E0306
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
image_sample_l v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[4:11], s[12:15] ; F0900700 0061160D
v_mov_b32_e32 v14, v7 ; 7E1C0307
v_mov_b32_e32 v15, v6 ; 7E1E0306
v_mov_b32_e32 v4, v6 ; 7E080306
s_buffer_load_dword s19, s[0:3], 0x3 ; C2098103
s_buffer_load_dword s20, s[0:3], 0x8 ; C20A0108
v_mul_f32_e32 v5, s18, v18 ; 100A2412
v_mac_f32_e32 v5, s16, v16 ; 3E0A2010
v_mul_f32_e32 v10, s17, v17 ; 10142211
v_mul_f32_e32 v5, v5, v10 ; 100A1505
v_mac_f32_e32 v10, s16, v16 ; 3E142010
v_mac_f32_e32 v10, s18, v18 ; 3E142412
s_waitcnt vmcnt(1) ; BF8C0771
v_mul_f32_e32 v6, s18, v21 ; 100C2A12
v_mac_f32_e32 v6, s16, v19 ; 3E0C2610
v_mul_f32_e32 v12, s17, v20 ; 10182811
v_mul_f32_e32 v6, v6, v12 ; 100C1906
v_mac_f32_e32 v12, s16, v19 ; 3E182610
v_mac_f32_e32 v12, s18, v21 ; 3E182A12
image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[13:16], s[4:11], s[12:15] ; F0900700 0061070D
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v13, s18, v9 ; 101A1212
v_mac_f32_e32 v13, s16, v7 ; 3E1A0E10
v_mul_f32_e32 v11, s17, v8 ; 10161011
v_mul_f32_e32 v8, v13, v11 ; 1010170D
v_mac_f32_e32 v11, s16, v7 ; 3E160E10
v_mac_f32_e32 v11, s18, v9 ; 3E161212
v_mul_f32_e32 v7, s18, v24 ; 100E3012
v_mac_f32_e32 v7, s16, v22 ; 3E0E2C10
v_mul_f32_e32 v13, s17, v23 ; 101A2E11
v_mul_f32_e32 v14, v7, v13 ; 101C1B07
v_mac_f32_e32 v13, s16, v22 ; 3E1A2C10
v_mac_f32_e32 v13, s18, v24 ; 3E1A3012
v_sqrt_f32_e32 v5, v5 ; 7E0A6705
v_add_f32_e32 v5, v5, v5 ; 060A0B05
v_mac_f32_e32 v10, s19, v5 ; 3E140A13
v_sqrt_f32_e32 v5, v6 ; 7E0A6706
v_add_f32_e32 v5, v5, v5 ; 060A0B05
v_mac_f32_e32 v12, s19, v5 ; 3E180A13
v_sqrt_f32_e32 v5, v8 ; 7E0A6708
v_add_f32_e32 v5, v5, v5 ; 060A0B05
v_mac_f32_e32 v11, s19, v5 ; 3E160A13
image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[4:11], s[12:15] ; F0900700 00610702
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v4, s18, v9 ; 10081212
v_mac_f32_e32 v4, s16, v7 ; 3E080E10
v_mul_f32_e32 v15, s17, v8 ; 101E1011
v_mul_f32_e32 v4, v4, v15 ; 10081F04
v_sqrt_f32_e32 v4, v4 ; 7E086704
v_add_f32_e32 v4, v4, v4 ; 06080904
s_buffer_load_dword s21, s[0:3], 0xc ; C20A810C
v_mac_f32_e32 v15, s16, v7 ; 3E1E0E10
v_mac_f32_e32 v15, s18, v9 ; 3E1E1212
v_mac_f32_e32 v15, s19, v4 ; 3E1E0813
v_sqrt_f32_e32 v4, v14 ; 7E08670E
v_add_f32_e32 v4, v4, v4 ; 06080904
v_mac_f32_e32 v13, s19, v4 ; 3E1A0813
v_add_f32_e32 v4, 0x3b2aaaac, v13 ; 06081AFF 3B2AAAAC
v_max_f32_e32 v5, v12, v10 ; 200A150C
v_max3_f32 v5, v4, v11, v5 ; D2A80005 04161704
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s21, v5 ; 100C0A15
v_max_f32_e32 v13, s20, v6 ; 201A0C14
v_min_f32_e32 v6, v12, v10 ; 1E0C150C
v_min3_f32 v6, v4, v11, v6 ; D2A20006 041A1704
v_max_f32_e32 v14, v15, v5 ; 201C0B0F
v_min_f32_e32 v15, v15, v6 ; 1E1E0D0F
v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F
v_cmp_nlt_f32_e32 vcc, v14, v13 ; 7C1C1B0E
s_and_saveexec_b64 s[20:21], vcc ; BE94246A
s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E
s_cbranch_execz BB0_5 ; BF880000
v_subrev_f32_e32 v4, v4, v12 ; 0A081904
v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600
v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601
v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700
v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701
v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00
v_subrev_f32_e32 v10, v10, v11 ; 0A14170A
s_buffer_load_dword s22, s[0:3], 0x10 ; C20B0110
v_add_f32_e32 v11, v10, v4 ; 0616090A
v_subrev_f32_e32 v4, v10, v4 ; 0A08090A
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mac_f32_e32 v10, v11, v11 ; 3E14170B
v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A
v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01
v_interp_p1_f32 v0, v0, 3, 2, [m0] ; C8000B00
v_interp_p2_f32 v0, [v0], v1, 3, 2, [m0] ; C8010B01
v_mul_f32_e32 v1, v10, v11 ; 1002170A
v_mul_f32_e32 v4, v10, v4 ; 1008090A
v_mad_f32 v11, -v1, v7, v2 ; D282000B 240A0F01
v_min_f32_e64 v10, |v1|, |v4| ; D21E030A 00020901
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v10, s22, v10 ; 10141416
v_rcp_f32_e32 v10, v10 ; 7E14550A
v_mad_f32 v12, -v4, v8, v3 ; D282000C 240E1104
v_mad_f32 v14, v7, v1, v2 ; D282000E 040A0307
v_mad_f32 v15, v8, v4, v3 ; D282000F 040E0908
v_mul_f32_e32 v1, v10, v1 ; 1002030A
v_mul_f32_e32 v4, v10, v4 ; 1008090A
v_max_f32_e32 v1, -2.0, v1 ; 200202F5
v_max_f32_e32 v4, -2.0, v4 ; 200808F5
v_min_f32_e32 v1, 2.0, v1 ; 1E0202F4
v_min_f32_e32 v4, 2.0, v4 ; 1E0808F4
v_mad_f32 v17, -v1, v9, v2 ; D2820011 240A1301
v_mad_f32 v18, -v4, v0, v3 ; D2820012 240E0104
v_mac_f32_e32 v2, v9, v1 ; 3E040309
v_mov_b32_e32 v13, 0 ; 7E1A0280
v_mac_f32_e32 v3, v0, v4 ; 3E060900
v_mov_b32_e32 v16, v13 ; 7E20030D
image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[11:14], s[4:11], s[12:15] ; F0900700 0061070B
image_sample_l v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[4:11], s[12:15] ; F0900700 00610A0E
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v0, v10, v7 ; 06000F0A
v_add_f32_e32 v1, v11, v8 ; 0602110B
v_add_f32_e32 v10, v12, v9 ; 0614130C
v_mov_b32_e32 v19, v13 ; 7E26030D
v_mov_b32_e32 v4, v13 ; 7E08030D
image_sample_l v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[4:11], s[12:15] ; F0900700 00610711
image_sample_l v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[2:5], s[4:11], s[12:15] ; F0900700 00610202
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v2, v2, v7 ; 06040F02
v_add_f32_e32 v3, v3, v8 ; 06061103
v_add_f32_e32 v4, v4, v9 ; 06081304
v_mov_b32_e32 v11, 0x3e800000 ; 7E1602FF 3E800000
v_mul_f32_e32 v7, v11, v2 ; 100E050B
v_mul_f32_e32 v8, v11, v3 ; 1010070B
v_mul_f32_e32 v9, v11, v4 ; 1012090B
v_mac_f32_e32 v7, v11, v0 ; 3E0E010B
v_mac_f32_e32 v8, v11, v1 ; 3E10030B
v_mac_f32_e32 v9, v11, v10 ; 3E12150B
v_mul_f32_e32 v2, s17, v1 ; 10040211
v_mul_f32_e32 v3, s18, v10 ; 10061412
v_mac_f32_e32 v3, s16, v0 ; 3E060010
v_mul_f32_e32 v3, v3, v2 ; 10060503
v_sqrt_f32_e32 v3, v3 ; 7E066703
v_add_f32_e32 v3, v3, v3 ; 06060703
v_mac_f32_e32 v2, s16, v0 ; 3E040010
v_mac_f32_e32 v2, s18, v10 ; 3E041412
v_mac_f32_e32 v2, s19, v3 ; 3E040613
v_cmp_nlt_f32_e32 vcc, v2, v6 ; 7C1C0D02
v_mov_b32_e32 v2, -1 ; 7E0402C1
s_and_saveexec_b64 s[22:23], vcc ; BE96246A
s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E
s_cbranch_execz BB0_6 ; BF880000
v_mul_f32_e32 v2, s17, v8 ; 10041011
v_mul_f32_e32 v3, s18, v9 ; 10061212
v_mac_f32_e32 v3, s16, v7 ; 3E060E10
v_mul_f32_e32 v3, v3, v2 ; 10060503
v_sqrt_f32_e32 v3, v3 ; 7E066703
v_add_f32_e32 v3, v3, v3 ; 06060703
v_mac_f32_e32 v2, s16, v7 ; 3E040E10
v_mac_f32_e32 v2, s18, v9 ; 3E041212
v_mac_f32_e32 v2, s19, v3 ; 3E040613
v_cmp_lt_f32_e32 vcc, v5, v2 ; 7C020505
v_cndmask_b32_e64 v2, 0, -1, vcc ; D2000002 01A98280
s_or_b64 exec, exec, s[22:23] ; 88FE167E
v_cmp_ne_i32_e32 vcc, 0, v2 ; 7D0A0480
s_and_saveexec_b64 s[22:23], vcc ; BE96246A
s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E
v_mul_f32_e32 v7, 0.5, v0 ; 100E00F0
v_mul_f32_e32 v8, 0.5, v1 ; 101002F0
v_mul_f32_e32 v9, 0.5, v10 ; 101214F0
s_or_b64 exec, exec, s[22:23] ; 88FE167E
s_or_b64 exec, exec, s[20:21] ; 88FE147E
v_cvt_pkrtz_f16_f32_e32 v0, v7, v8 ; 5E001107
v_cvt_pkrtz_f16_f32_e64 v1, v9, 1.0 ; D25E0001 0001E509
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 28
Code Size: 896 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[3..4]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { 0.5000, 2.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MUL TEMP[1].xyz, TEMP[0].xyzz, CONST[0].xyzz
3: MOV TEMP[2].y, IMM[0].xxxx
4: ADD TEMP[3].x, TEMP[1].xxxx, TEMP[1].zzzz
5: MUL TEMP[3].x, TEMP[1].yyyy, TEMP[3].xxxx
6: SQRT TEMP[3].x, TEMP[3].xxxx
7: MUL TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx
8: ADD TEMP[4].x, TEMP[1].xxxx, TEMP[1].yyyy
9: ADD TEMP[1].x, TEMP[4].xxxx, TEMP[1].zzzz
10: MAD TEMP[1].x, TEMP[3].xxxx, CONST[0].wwww, TEMP[1].xxxx
11: ADD TEMP[2].x, TEMP[1].xxxx, CONST[3].xxxx
12: MOV TEMP[1].xy, TEMP[2].xyyy
13: TEX TEMP[1].xyz, TEMP[1], SAMP[1], 2D
14: MOV TEMP[2].w, TEMP[0].wwww
15: LRP TEMP[2].xyz, CONST[4].xxxx, TEMP[0].xyzz, TEMP[1].xyzz
16: MOV OUT[0], TEMP[2]
17: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0
%32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
%34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)*
%36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0
%37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)*
%39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0
%40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%42 = bitcast float %40 to i32
%43 = bitcast float %41 to i32
%44 = insertelement <2 x i32> undef, i32 %42, i32 0
%45 = insertelement <2 x i32> %44, i32 %43, i32 1
%46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %31, <16 x i8> %33, i32 2)
%47 = extractelement <4 x float> %46, i32 0
%48 = extractelement <4 x float> %46, i32 1
%49 = extractelement <4 x float> %46, i32 2
%50 = extractelement <4 x float> %46, i32 3
%51 = fmul float %47, %24
%52 = fmul float %48, %25
%53 = fmul float %49, %26
%54 = fadd float %51, %53
%55 = fmul float %52, %54
%56 = call float @llvm.sqrt.f32(float %55)
%57 = fmul float %56, 2.000000e+00
%58 = fadd float %51, %52
%59 = fadd float %58, %53
%60 = fmul float %57, %27
%61 = fadd float %60, %59
%62 = fadd float %61, %28
%63 = bitcast float %62 to i32
%64 = insertelement <2 x i32> undef, i32 %63, i32 0
%65 = insertelement <2 x i32> %64, i32 1056964608, i32 1
%66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %36, <16 x i8> %39, i32 2)
%67 = extractelement <4 x float> %66, i32 0
%68 = extractelement <4 x float> %66, i32 1
%69 = extractelement <4 x float> %66, i32 2
%70 = call float @llvm.AMDGPU.lrp(float %29, float %47, float %67)
%71 = call float @llvm.AMDGPU.lrp(float %29, float %48, float %68)
%72 = call float @llvm.AMDGPU.lrp(float %29, float %49, float %69)
%73 = call i32 @llvm.SI.packf16(float %70, float %71)
%74 = bitcast i32 %73 to float
%75 = call i32 @llvm.SI.packf16(float %72, float %50)
%76 = bitcast i32 %75 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %74, float %76, float %74, float %76)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101
s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102
s_buffer_load_dword s10, s[0:3], 0x0 ; C2050100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504
s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800F00 00640002
s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s0, s[0:3], 0x10 ; C2000110
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v4, s8, v1 ; 10080208
v_mul_f32_e32 v5, s9, v2 ; 100A0409
v_mac_f32_e32 v5, s10, v0 ; 3E0A000A
v_mul_f32_e32 v5, v5, v4 ; 100A0905
v_mac_f32_e32 v4, s10, v0 ; 3E08000A
v_mac_f32_e32 v4, s9, v2 ; 3E080409
v_sqrt_f32_e32 v5, v5 ; 7E0A6705
v_add_f32_e32 v5, v5, v5 ; 060A0B05
v_mac_f32_e32 v4, s4, v5 ; 3E080A04
v_add_f32_e32 v4, s5, v4 ; 06080805
v_mov_b32_e32 v5, 0.5 ; 7E0A02F0
image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[28:35], s[24:27] ; F0800700 00C70404
v_sub_f32_e64 v7, 1.0, s0 ; D2080007 000000F2
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v4, v4, v7 ; 10080F04
v_mul_f32_e32 v5, v5, v7 ; 100A0F05
v_mul_f32_e32 v6, v6, v7 ; 100C0F06
v_mac_f32_e32 v4, s0, v0 ; 3E080000
v_mac_f32_e32 v5, s0, v1 ; 3E0A0200
v_mac_f32_e32 v6, s0, v2 ; 3E0C0400
v_cvt_pkrtz_f16_f32_e32 v0, v6, v3 ; 5E000706
v_cvt_pkrtz_f16_f32_e32 v1, v4, v5 ; 5E020B04
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 8
Code Size: 192 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..14]
DCL CONST[16..19]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999}
IMM[1] INT32 {256, 0, 1, 2}
IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039}
IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000}
IMM[4] INT32 {4, 0, 0, 0}
0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx
1: F2I TEMP[0].x, TEMP[0].xxxx
2: F2I TEMP[1].x, IN[2].yyyy
3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx
4: I2F TEMP[3].x, TEMP[0].xxxx
5: I2F TEMP[4].x, TEMP[2].xxxx
6: MOV TEMP[3].y, TEMP[4].xxxx
7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
8: INEG TEMP[2].x, TEMP[2].xxxx
9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx
10: I2F TEMP[2].x, TEMP[2].xxxx
11: MOV TEMP[3].z, TEMP[2].xxxx
12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy
13: I2F TEMP[1].x, TEMP[1].xxxx
14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx
15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww
16: F2I TEMP[1].x, TEMP[1].xxxx
17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy
18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx
19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz
20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx
21: MOV TEMP[4].y, TEMP[5].xxxx
22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww
23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx
24: MOV TEMP[4].z, TEMP[1].xxxx
25: MOV TEMP[1].xyz, TEMP[4].xyzx
26: MOV TEMP[4].w, IMM[2].yyyy
27: MOV TEMP[4].xyz, TEMP[3].xyzx
28: MOV TEMP[3].y, IMM[2].yzyy
29: DP4 TEMP[4].x, TEMP[1], TEMP[4]
30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww
31: MOV TEMP[3].xy, TEMP[3].xyyy
32: MOV TEMP[3].w, IMM[2].yyyy
33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D
34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy
35: MOV TEMP[2].w, TEMP[4].xxxx
36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz
37: MOV TEMP[1].w, TEMP[3].xxxx
38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx
39: I2F TEMP[0].x, TEMP[0].xxxx
40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx
41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww
42: MOV TEMP[3].x, CONST[11].xxxx
43: MOV TEMP[3].y, CONST[12].xxxx
44: MOV TEMP[3].z, CONST[13].xxxx
45: MOV TEMP[4].x, CONST[11].yyyy
46: MOV TEMP[4].y, CONST[12].yyyy
47: MOV TEMP[4].z, CONST[13].yyyy
48: MOV TEMP[5].x, CONST[11].zzzz
49: MOV TEMP[5].y, CONST[12].zzzz
50: MOV TEMP[5].z, CONST[13].zzzz
51: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
52: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
53: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].xxxx, TEMP[3].xyzz
54: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz
55: RSQ TEMP[3].x, TEMP[3].xxxx
56: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx
57: MOV TEMP[3].w, IMM[2].xxxx
58: MOV TEMP[3].xyz, TEMP[0].xyzx
59: DP4 TEMP[4].x, CONST[0], TEMP[3]
60: DP4 TEMP[5].x, CONST[1], TEMP[3]
61: MOV TEMP[4].y, TEMP[5].xxxx
62: DP4 TEMP[3].x, CONST[2], TEMP[3]
63: MOV TEMP[4].z, TEMP[3].xxxx
64: MUL TEMP[3], TEMP[0].xyzz, TEMP[0].yzzx
65: DP4 TEMP[5].x, CONST[3], TEMP[3]
66: DP4 TEMP[6].x, CONST[4], TEMP[3]
67: MOV TEMP[5].y, TEMP[6].xxxx
68: DP4 TEMP[3].x, CONST[5], TEMP[3]
69: MOV TEMP[5].z, TEMP[3].xxxx
70: MUL TEMP[3], CONST[16], IN[0].xxxx
71: MAD TEMP[3], CONST[17], IN[0].yyyy, TEMP[3]
72: MAD TEMP[3], CONST[18], IN[0].zzzz, TEMP[3]
73: MAD TEMP[3], CONST[19], IN[0].wwww, TEMP[3]
74: MUL TEMP[6], CONST[7], IN[0].xxxx
75: MAD TEMP[6], CONST[8], IN[0].yyyy, TEMP[6]
76: MAD TEMP[6], CONST[9], IN[0].zzzz, TEMP[6]
77: MAD TEMP[6].xyz, CONST[10], IN[0].wwww, TEMP[6]
78: MUL TEMP[7].x, TEMP[0].yyyy, TEMP[0].yyyy
79: MAD TEMP[7].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[7].xxxx
80: MAD TEMP[5].xyz, CONST[6].xyzz, TEMP[7].xxxx, TEMP[5].xyzz
81: ADD TEMP[4].xyz, TEMP[5].xyzz, TEMP[4].xyzz
82: MOV TEMP[0].xyz, TEMP[0].xyzx
83: MOV TEMP[0].w, TEMP[6].xxxx
84: MOV TEMP[5].xy, TEMP[6].yzyy
85: MOV TEMP[5].zw, TEMP[4].yyxy
86: MOV TEMP[4].x, TEMP[4].zzzz
87: MOV OUT[5], TEMP[4]
88: MOV OUT[1], TEMP[2]
89: MOV OUT[2], TEMP[1]
90: MOV OUT[4], TEMP[5]
91: MOV OUT[3], TEMP[0]
92: MOV OUT[0], TEMP[3]
93: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0
%79 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = fmul float %99, 2.500000e-01
%102 = fptosi float %101 to i32
%103 = fptosi float %100 to i32
%104 = sdiv i32 %103, 256
%105 = sitofp i32 %102 to float
%106 = sitofp i32 %104 to float
%107 = shl nsw i32 %104, 8
%108 = sub i32 %103, %107
%109 = sitofp i32 %108 to float
%110 = fadd float %105, -1.000000e+00
%111 = fadd float %106, -1.000000e+00
%112 = fadd float %109, -1.000000e+00
%113 = sitofp i32 %103 to float
%114 = fsub float %100, %113
%115 = fmul float %114, 1.000000e+01
%116 = fadd float %115, 0x3FDFFE5CA0000000
%117 = fptosi float %116 to i32
%118 = icmp eq i32 %117, 0
%119 = select i1 %118, float 1.000000e+00, float 0.000000e+00
%120 = icmp eq i32 %117, 1
%121 = select i1 %120, float 1.000000e+00, float 0.000000e+00
%122 = icmp eq i32 %117, 2
%123 = select i1 %122, float 1.000000e+00, float 0.000000e+00
%124 = fmul float %119, %105
%125 = fmul float %121, %106
%126 = fadd float %124, %125
%127 = fmul float %123, %109
%128 = fadd float %126, %127
%129 = fadd float %128, 0.000000e+00
%130 = fmul float %129, 0x3F70101020000000
%131 = bitcast float %130 to i32
%132 = insertelement <4 x i32> undef, i32 %131, i32 0
%133 = insertelement <4 x i32> %132, i32 1036831949, i32 1
%134 = insertelement <4 x i32> %133, i32 0, i32 2
%135 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %134, <32 x i8> %78, <16 x i8> %80, i32 2)
%136 = extractelement <4 x float> %135, i32 0
%137 = extractelement <4 x float> %135, i32 1
%138 = fmul float %136, 1.600000e+01
%139 = fadd float %138, -8.000000e+00
%140 = fmul float %137, 4.000000e+00
%141 = shl i32 %102, 2
%142 = sitofp i32 %141 to float
%143 = fsub float %99, %142
%144 = fadd float %143, -2.000000e+00
%145 = fmul float %52, %93
%146 = fmul float %55, %93
%147 = fmul float %58, %93
%148 = fmul float %53, %94
%149 = fadd float %148, %145
%150 = fmul float %56, %94
%151 = fadd float %150, %146
%152 = fmul float %59, %94
%153 = fadd float %152, %147
%154 = fmul float %54, %144
%155 = fadd float %154, %149
%156 = fmul float %57, %144
%157 = fadd float %156, %151
%158 = fmul float %60, %144
%159 = fadd float %158, %153
%160 = fmul float %155, %155
%161 = fmul float %157, %157
%162 = fadd float %161, %160
%163 = fmul float %159, %159
%164 = fadd float %162, %163
%165 = call float @llvm.AMDGPU.rsq.clamped.f32(float %164)
%166 = fmul float %155, %165
%167 = fmul float %157, %165
%168 = fmul float %159, %165
%169 = fmul float %13, %166
%170 = fmul float %14, %167
%171 = fadd float %169, %170
%172 = fmul float %15, %168
%173 = fadd float %171, %172
%174 = fadd float %173, %16
%175 = fmul float %17, %166
%176 = fmul float %18, %167
%177 = fadd float %175, %176
%178 = fmul float %19, %168
%179 = fadd float %177, %178
%180 = fadd float %179, %20
%181 = fmul float %21, %166
%182 = fmul float %22, %167
%183 = fadd float %181, %182
%184 = fmul float %23, %168
%185 = fadd float %183, %184
%186 = fadd float %185, %24
%187 = fmul float %166, %167
%188 = fmul float %167, %168
%189 = fmul float %168, %168
%190 = fmul float %168, %166
%191 = fmul float %25, %187
%192 = fmul float %26, %188
%193 = fadd float %191, %192
%194 = fmul float %27, %189
%195 = fadd float %193, %194
%196 = fmul float %28, %190
%197 = fadd float %195, %196
%198 = fmul float %29, %187
%199 = fmul float %30, %188
%200 = fadd float %198, %199
%201 = fmul float %31, %189
%202 = fadd float %200, %201
%203 = fmul float %32, %190
%204 = fadd float %202, %203
%205 = fmul float %33, %187
%206 = fmul float %34, %188
%207 = fadd float %205, %206
%208 = fmul float %35, %189
%209 = fadd float %207, %208
%210 = fmul float %36, %190
%211 = fadd float %209, %210
%212 = fmul float %61, %85
%213 = fmul float %62, %85
%214 = fmul float %63, %85
%215 = fmul float %64, %85
%216 = fmul float %65, %86
%217 = fadd float %216, %212
%218 = fmul float %66, %86
%219 = fadd float %218, %213
%220 = fmul float %67, %86
%221 = fadd float %220, %214
%222 = fmul float %68, %86
%223 = fadd float %222, %215
%224 = fmul float %69, %87
%225 = fadd float %224, %217
%226 = fmul float %70, %87
%227 = fadd float %226, %219
%228 = fmul float %71, %87
%229 = fadd float %228, %221
%230 = fmul float %72, %87
%231 = fadd float %230, %223
%232 = fmul float %73, %88
%233 = fadd float %232, %225
%234 = fmul float %74, %88
%235 = fadd float %234, %227
%236 = fmul float %75, %88
%237 = fadd float %236, %229
%238 = fmul float %76, %88
%239 = fadd float %238, %231
%240 = fmul float %40, %85
%241 = fmul float %41, %85
%242 = fmul float %42, %85
%243 = fmul float %43, %86
%244 = fadd float %243, %240
%245 = fmul float %44, %86
%246 = fadd float %245, %241
%247 = fmul float %45, %86
%248 = fadd float %247, %242
%249 = fmul float %46, %87
%250 = fadd float %249, %244
%251 = fmul float %47, %87
%252 = fadd float %251, %246
%253 = fmul float %48, %87
%254 = fadd float %253, %248
%255 = fmul float %49, %88
%256 = fadd float %255, %250
%257 = fmul float %50, %88
%258 = fadd float %257, %252
%259 = fmul float %51, %88
%260 = fadd float %259, %254
%261 = fmul float %167, %167
%262 = fmul float %166, %166
%263 = fsub float %262, %261
%264 = fmul float %37, %263
%265 = fadd float %264, %197
%266 = fmul float %38, %263
%267 = fadd float %266, %204
%268 = fmul float %39, %263
%269 = fadd float %268, %211
%270 = fadd float %265, %174
%271 = fadd float %267, %180
%272 = fadd float %269, %186
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %110, float %111, float %112, float %139)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %119, float %121, float %123, float %140)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %166, float %167, float %168, float %256)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %258, float %260, float %270, float %271)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %272, float %271, float %272, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %233, float %235, float %237, float %239)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5
s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904
s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s24, s[0:3], 0x2d ; C20C012D
buffer_load_format_xyzw v[2:5], v0, s[12:15], 0 idxen ; E00C2000 80030200
s_buffer_load_dword s10, s[0:3], 0x2e ; C205012E
s_buffer_load_dword s13, s[0:3], 0x30 ; C2068130
buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[8:11], v0, s[20:23], 0 idxen ; E00C2000 80050800
s_buffer_load_dword s14, s[0:3], 0x31 ; C2070131
s_buffer_load_dword s11, s[0:3], 0x32 ; C2058132
s_buffer_load_dword s15, s[0:3], 0x34 ; C2078134
s_buffer_load_dword s16, s[0:3], 0x35 ; C2080135
s_buffer_load_dword s12, s[0:3], 0x36 ; C2060136
s_buffer_load_dword s9, s[0:3], 0x40 ; C2048140
s_buffer_load_dword s8, s[0:3], 0x41 ; C2040141
s_buffer_load_dword s17, s[0:3], 0x2c ; C208812C
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s17, v6 ; 10000C11
v_mac_f32_e32 v0, s24, v7 ; 3E000E18
v_mul_f32_e32 v10, 0x3e800000, v8 ; 101410FF 3E800000
v_cvt_i32_f32_e32 v11, v9 ; 7E161109
v_cvt_i32_f32_e32 v10, v10 ; 7E14110A
v_mul_f32_e32 v12, s13, v6 ; 10180C0D
v_mac_f32_e32 v12, s14, v7 ; 3E180E0E
v_cvt_f32_i32_e32 v13, v11 ; 7E1A0B0B
v_lshlrev_b32_e32 v14, 2, v10 ; 341C1482
v_cvt_f32_i32_e32 v14, v14 ; 7E1C0B0E
v_mul_f32_e32 v6, s15, v6 ; 100C0C0F
v_mac_f32_e32 v6, s16, v7 ; 3E0C0E10
v_subrev_f32_e32 v7, v13, v9 ; 0A0E130D
v_subrev_f32_e32 v8, v14, v8 ; 0A10110E
v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000
v_ashrrev_i32_e32 v7, 31, v11 ; 300E169F
v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98
v_cvt_i32_f32_e32 v1, v1 ; 7E021101
v_add_i32_e32 v7, v11, v7 ; 4A0E0F0B
v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00
v_sub_i32_e32 v9, v11, v9 ; 4C12130B
v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280
v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480
v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281
v_cndmask_b32_e64 v13, 0, 1.0, vcc ; D200000D 01A9E480
v_cvt_f32_i32_e32 v10, v10 ; 7E140B0A
v_ashrrev_i32_e32 v7, 8, v7 ; 300E0E88
v_cvt_f32_i32_e32 v7, v7 ; 7E0E0B07
v_cvt_f32_i32_e32 v9, v9 ; 7E120B09
s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500
s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700
v_cmp_eq_i32_e32 vcc, 2, v1 ; 7D040282
v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480
v_mul_f32_e32 v14, v7, v13 ; 101C1B07
v_mac_f32_e32 v14, v10, v11 ; 3E1C170A
v_mac_f32_e32 v14, v9, v1 ; 3E1C0309
v_add_f32_e32 v14, 0, v14 ; 061C1C80
v_mov_b32_e32 v17, 0 ; 7E220280
v_mul_f32_e32 v15, 0x3b808081, v14 ; 101E1CFF 3B808081
v_mov_b32_e32 v16, 0x3dcccccd ; 7E2002FF 3DCCCCCD
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[14:15], 3, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[20:27], s[16:19] ; F0900300 00850E0F
v_add_f32_e32 v10, -1.0, v10 ; 061414F3
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_add_f32_e32 v9, -1.0, v9 ; 061212F3
v_mov_b32_e32 v16, 0xc1000000 ; 7E2002FF C1000000
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v14, v14, v16, 0x41800000 ; 401C210E 41800000
exp 15, 32, 0, 0, 0, v10, v7, v9, v14 ; F800020F 0E09070A
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v7, 4.0, v15 ; 100E1EF6
exp 15, 33, 0, 0, 0, v11, v13, v1, v7 ; F800021F 07010D0B
s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v1, -2.0, v8 ; 060210F5
v_mac_f32_e32 v0, s10, v1 ; 3E00020A
s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120
s_buffer_load_dword s6, s[0:3], 0x1d ; C203011D
s_buffer_load_dword s7, s[0:3], 0x21 ; C2038121
v_mac_f32_e32 v12, s11, v1 ; 3E18020B
s_buffer_load_dword s10, s[0:3], 0x24 ; C2050124
v_mac_f32_e32 v6, s12, v1 ; 3E0C020C
s_buffer_load_dword s11, s[0:3], 0x1e ; C205811E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s4, v2 ; 10020404
s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122
s_buffer_load_dword s12, s[0:3], 0x25 ; C2060125
v_mac_f32_e32 v1, s5, v3 ; 3E020605
v_mul_f32_e32 v7, s6, v2 ; 100E0406
v_mac_f32_e32 v7, s7, v3 ; 3E0E0607
s_buffer_load_dword s5, s[0:3], 0x28 ; C2028128
v_mac_f32_e32 v1, s10, v4 ; 3E02080A
v_mul_f32_e32 v8, v0, v0 ; 10100100
v_mac_f32_e32 v8, v12, v12 ; 3E10190C
v_mac_f32_e32 v8, v6, v6 ; 3E100D06
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101
s_buffer_load_dword s10, s[0:3], 0x29 ; C2050129
s_buffer_load_dword s13, s[0:3], 0x2a ; C206812A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s5, v5 ; 3E020A05
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v9, v8, v12 ; 10121908
v_mul_f32_e32 v6, v8, v6 ; 100C0D08
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_buffer_load_dword s14, s[0:3], 0x2 ; C2070102
s_buffer_load_dword s15, s[0:3], 0x3 ; C2078103
s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104
v_mul_f32_e32 v8, s7, v9 ; 10101207
v_mac_f32_e32 v8, s6, v0 ; 3E100006
s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106
s_buffer_load_dword s7, s[0:3], 0xc ; C203810C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0x7 ; C2090107
s_buffer_load_dword s19, s[0:3], 0x8 ; C2098108
s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v10, s5, v9 ; 10141205
v_mac_f32_e32 v10, s16, v0 ; 3E140010
v_mac_f32_e32 v8, s14, v6 ; 3E100C0E
s_buffer_load_dword s5, s[0:3], 0xe ; C202810E
v_mac_f32_e32 v10, s6, v6 ; 3E140C06
v_mul_f32_e32 v11, v6, v9 ; 10161306
v_mul_f32_e32 v12, s17, v11 ; 10181611
v_mul_f32_e32 v13, v9, v0 ; 101A0109
v_mac_f32_e32 v12, s7, v13 ; 3E181A07
s_buffer_load_dword s6, s[0:3], 0xf ; C203010F
v_add_f32_e32 v8, s15, v8 ; 0610100F
s_buffer_load_dword s7, s[0:3], 0x18 ; C2038118
v_mul_f32_e32 v14, v6, v6 ; 101C0D06
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v12, s5, v14 ; 3E181C05
s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110
s_buffer_load_dword s14, s[0:3], 0x11 ; C2070111
s_buffer_load_dword s15, s[0:3], 0x12 ; C2078112
s_buffer_load_dword s16, s[0:3], 0x13 ; C2080113
v_mul_f32_e32 v15, v0, v6 ; 101E0D00
v_mac_f32_e32 v12, s6, v15 ; 3E181E06
v_mul_f32_e32 v16, v9, v9 ; 10201309
v_mad_f32 v16, v0, v0, -v16 ; D2820010 84420100
v_mac_f32_e32 v12, s7, v16 ; 3E182007
s_buffer_load_dword s6, s[0:3], 0x19 ; C2030119
exp 15, 34, 0, 0, 0, v0, v9, v6, v1 ; F800022F 01060900
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_add_f32_e32 v1, v8, v12 ; 06021908
v_mul_f32_e32 v8, s14, v11 ; 1010160E
v_mac_f32_e32 v8, s5, v13 ; 3E101A05
s_buffer_load_dword s5, s[0:3], 0x26 ; C2028126
v_add_f32_e32 v10, s18, v10 ; 06141412
v_mac_f32_e32 v8, s15, v14 ; 3E101C0F
v_mac_f32_e32 v8, s16, v15 ; 3E101E10
s_buffer_load_dword s7, s[0:3], 0x1a ; C203811A
v_mac_f32_e32 v8, s6, v16 ; 3E102006
v_add_f32_e32 v8, v10, v8 ; 0610110A
v_mul_f32_e32 v10, s11, v2 ; 1014040B
v_mac_f32_e32 v10, s4, v3 ; 3E140604
v_mac_f32_e32 v7, s12, v4 ; 3E0E080C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v10, s5, v4 ; 3E140805
v_mac_f32_e32 v7, s10, v5 ; 3E0E0A0A
s_buffer_load_dword s4, s[0:3], 0xa ; C202010A
s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115
s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114
s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116
s_buffer_load_dword s11, s[0:3], 0x17 ; C2058117
v_mac_f32_e32 v10, s13, v5 ; 3E140A0D
s_buffer_load_dword s12, s[0:3], 0xb ; C206010B
exp 15, 35, 0, 0, 0, v7, v10, v1, v8 ; F800023F 08010A07
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mul_f32_e32 v1, s20, v9 ; 10021214
v_mac_f32_e32 v1, s19, v0 ; 3E020013
v_mac_f32_e32 v1, s4, v6 ; 3E020C04
v_mul_f32_e32 v0, s5, v11 ; 10001605
v_mac_f32_e32 v0, s6, v13 ; 3E001A06
v_mac_f32_e32 v0, s10, v14 ; 3E001C0A
v_mac_f32_e32 v0, s11, v15 ; 3E001E0B
v_mac_f32_e32 v0, s7, v16 ; 3E002007
v_add_f32_e32 v1, s12, v1 ; 0602020C
v_add_f32_e32 v0, v1, v0 ; 06000101
exp 15, 36, 0, 0, 0, v0, v8, v0, v17 ; F800024F 11000800
s_buffer_load_dword s4, s[0:3], 0x42 ; C2020142
s_buffer_load_dword s5, s[0:3], 0x43 ; C2028143
s_buffer_load_dword s6, s[0:3], 0x44 ; C2030144
s_buffer_load_dword s7, s[0:3], 0x45 ; C2038145
s_buffer_load_dword s10, s[0:3], 0x46 ; C2050146
s_buffer_load_dword s11, s[0:3], 0x47 ; C2058147
s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148
s_buffer_load_dword s13, s[0:3], 0x49 ; C2068149
s_buffer_load_dword s14, s[0:3], 0x4a ; C207014A
s_buffer_load_dword s15, s[0:3], 0x4b ; C207814B
s_buffer_load_dword s16, s[0:3], 0x4c ; C208014C
s_buffer_load_dword s17, s[0:3], 0x4d ; C208814D
s_buffer_load_dword s18, s[0:3], 0x4e ; C209014E
s_buffer_load_dword s0, s[0:3], 0x4f ; C200014F
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s9, v2 ; 10000409
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v1, s8, v2 ; 10020408
v_mac_f32_e32 v1, s7, v3 ; 3E020607
v_mul_f32_e32 v6, s4, v2 ; 100C0404
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v6, s10, v3 ; 3E0C060A
v_mac_f32_e32 v2, s11, v3 ; 3E04060B
v_mac_f32_e32 v0, s12, v4 ; 3E00080C
v_mac_f32_e32 v1, s13, v4 ; 3E02080D
v_mac_f32_e32 v6, s14, v4 ; 3E0C080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v1, s17, v5 ; 3E020A11
v_mac_f32_e32 v6, s18, v5 ; 3E0C0A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 20
Code Size: 960 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SAMP[6]
DCL SAMP[7]
DCL SAMP[8]
DCL SAMP[9]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL SVIEW[6], 2D, FLOAT
DCL SVIEW[7], 2D, FLOAT
DCL SVIEW[8], 2D, FLOAT
DCL SVIEW[9], 2D, FLOAT
DCL CONST[0..2]
DCL CONST[13..21]
DCL TEMP[0..38], LOCAL
IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000}
IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931}
IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000}
IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001}
IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[2].wwww
1: MOV TEMP[0].yz, IN[3].yxyy
2: MOV TEMP[1].xy, IN[3].zwzz
3: MOV TEMP[1].z, IN[4].xxxx
4: DP3 TEMP[2].x, CONST[1].xyzz, CONST[1].xyzz
5: RSQ TEMP[2].x, TEMP[2].xxxx
6: MUL TEMP[2].xyz, CONST[1].xyzz, TEMP[2].xxxx
7: ADD TEMP[3].xyz, CONST[0].xyzz, -TEMP[0].xyzz
8: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
9: RSQ TEMP[4].x, TEMP[4].xxxx
10: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
11: ABS TEMP[4].xyz, IN[2].xyzz
12: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
13: RSQ TEMP[5].x, TEMP[5].xxxx
14: MAD TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx, IMM[0].xxxx
15: MUL TEMP[4].xyz, TEMP[4].xyzz, IMM[0].yyyy
16: MAX TEMP[4].xyz, TEMP[4].xyzz, IMM[0].zzzz
17: ADD TEMP[5].x, TEMP[4].xxxx, TEMP[4].yyyy
18: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[4].zzzz
19: RCP TEMP[5].xyz, TEMP[5].xxxx
20: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xyzz
21: ADD TEMP[5], IN[0], IMM[0].wwww
22: FLR TEMP[5].xyz, TEMP[5]
23: MOV TEMP[6].x, CONST[13].xxxx
24: MUL TEMP[7].x, TEMP[5].xxxx, CONST[13].xxxx
25: MOV TEMP[8].x, TEMP[7].xxxx
26: FLR TEMP[7].x, TEMP[7].xxxx
27: MUL TEMP[7].x, TEMP[7].xxxx, CONST[13].xxxx
28: FSGE TEMP[9].x, TEMP[5].xxxx, IMM[1].xxxx
29: UIF TEMP[9].xxxx :0
30: MOV TEMP[6].x, CONST[14].xxxx
31: ADD TEMP[9].x, TEMP[5].xxxx, IMM[1].yyyy
32: MUL TEMP[9].x, TEMP[9].xxxx, CONST[14].xxxx
33: MOV TEMP[8].x, TEMP[9].xxxx
34: FLR TEMP[10].x, TEMP[9].xxxx
35: MUL TEMP[10].x, TEMP[10].xxxx, CONST[14].xxxx
36: MOV TEMP[7].x, TEMP[10].xxxx
37: FRC TEMP[9].x, TEMP[9].xxxx
38: FRC TEMP[11].x, TEMP[10].xxxx
39: MOV TEMP[9].y, TEMP[11].xxxx
40: FLR TEMP[10].x, TEMP[10].xxxx
41: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
42: MOV TEMP[9].z, TEMP[10].xxxx
43: MOV TEMP[9].xyz, TEMP[9].xyzx
44: ELSE :0
45: FRC TEMP[8].x, TEMP[8].xxxx
46: FRC TEMP[10].x, TEMP[7].xxxx
47: MOV TEMP[8].y, TEMP[10].xxxx
48: FLR TEMP[7].x, TEMP[7].xxxx
49: MOV TEMP[8].z, TEMP[7].xxxx
50: MOV TEMP[9].xyz, TEMP[8].xyzx
51: ENDIF
52: MOV TEMP[7].x, CONST[13].xxxx
53: MUL TEMP[8].x, TEMP[5].yyyy, CONST[13].xxxx
54: MOV TEMP[10].x, TEMP[8].xxxx
55: FLR TEMP[8].x, TEMP[8].xxxx
56: MUL TEMP[8].x, TEMP[8].xxxx, CONST[13].xxxx
57: FSGE TEMP[11].x, TEMP[5].yyyy, IMM[1].xxxx
58: UIF TEMP[11].xxxx :0
59: MOV TEMP[7].x, CONST[14].xxxx
60: ADD TEMP[11].x, TEMP[5].yyyy, IMM[1].yyyy
61: MUL TEMP[11].x, TEMP[11].xxxx, CONST[14].xxxx
62: MOV TEMP[10].x, TEMP[11].xxxx
63: FLR TEMP[12].x, TEMP[11].xxxx
64: MUL TEMP[12].x, TEMP[12].xxxx, CONST[14].xxxx
65: MOV TEMP[8].x, TEMP[12].xxxx
66: FRC TEMP[11].x, TEMP[11].xxxx
67: FRC TEMP[13].x, TEMP[12].xxxx
68: MOV TEMP[11].y, TEMP[13].xxxx
69: FLR TEMP[12].x, TEMP[12].xxxx
70: ADD TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz
71: MOV TEMP[11].z, TEMP[12].xxxx
72: MOV TEMP[11].xyz, TEMP[11].xyzx
73: ELSE :0
74: FRC TEMP[10].x, TEMP[10].xxxx
75: FRC TEMP[12].x, TEMP[8].xxxx
76: MOV TEMP[10].y, TEMP[12].xxxx
77: FLR TEMP[8].x, TEMP[8].xxxx
78: MOV TEMP[10].z, TEMP[8].xxxx
79: MOV TEMP[11].xyz, TEMP[10].xyzx
80: ENDIF
81: MOV TEMP[8].x, CONST[13].xxxx
82: MUL TEMP[10].x, TEMP[5].zzzz, CONST[13].xxxx
83: MOV TEMP[12].x, TEMP[10].xxxx
84: FLR TEMP[10].x, TEMP[10].xxxx
85: MUL TEMP[10].x, TEMP[10].xxxx, CONST[13].xxxx
86: FSGE TEMP[13].x, TEMP[5].zzzz, IMM[1].xxxx
87: UIF TEMP[13].xxxx :0
88: MOV TEMP[8].x, CONST[14].xxxx
89: ADD TEMP[5].x, TEMP[5].zzzz, IMM[1].yyyy
90: MUL TEMP[5].x, TEMP[5].xxxx, CONST[14].xxxx
91: MOV TEMP[12].x, TEMP[5].xxxx
92: FLR TEMP[13].x, TEMP[5].xxxx
93: MUL TEMP[13].x, TEMP[13].xxxx, CONST[14].xxxx
94: MOV TEMP[10].x, TEMP[13].xxxx
95: FRC TEMP[5].x, TEMP[5].xxxx
96: FRC TEMP[14].x, TEMP[13].xxxx
97: MOV TEMP[5].y, TEMP[14].xxxx
98: FLR TEMP[13].x, TEMP[13].xxxx
99: ADD TEMP[13].x, TEMP[13].xxxx, IMM[1].zzzz
100: MOV TEMP[5].z, TEMP[13].xxxx
101: MOV TEMP[5].xyz, TEMP[5].xyzx
102: ELSE :0
103: FRC TEMP[12].x, TEMP[12].xxxx
104: FRC TEMP[13].x, TEMP[10].xxxx
105: MOV TEMP[12].y, TEMP[13].xxxx
106: FLR TEMP[10].x, TEMP[10].xxxx
107: MOV TEMP[12].z, TEMP[10].xxxx
108: MOV TEMP[5].xyz, TEMP[12].xyzx
109: ENDIF
110: ADD TEMP[10].xyz, TEMP[0].xyzz, -CONST[0].xyzz
111: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[10].xyzz
112: MUL TEMP[10].x, CONST[19].xxxx, TEMP[10].xxxx
113: LG2 TEMP[10].x, TEMP[10].xxxx
114: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww
115: MUL TEMP[10].x, TEMP[10].xxxx, CONST[18].xxxx
116: MOV TEMP[12].xy, TEMP[0].xyxx
117: MOV TEMP[13].x, IMM[2].xxxx
118: FSNE TEMP[14].x, CONST[13].xxxx, TEMP[6].xxxx
119: UIF TEMP[14].xxxx :0
120: MOV TEMP[13].x, IMM[2].yyyy
121: RCP TEMP[14].x, CONST[16].xxxx
122: MUL TEMP[12].xy, TEMP[0].xyyy, TEMP[14].xxxx
123: ELSE :0
124: RCP TEMP[14].x, CONST[15].xxxx
125: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
126: ENDIF
127: FRC TEMP[12].xy, TEMP[12].xyyy
128: MUL TEMP[14].x, CONST[17].xxxx, IMM[2].wwww
129: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
130: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
131: MUL TEMP[13].x, TEMP[13].xxxx, CONST[17].xxxx
132: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
133: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[6].xxxx, TEMP[9].xyyy
134: MOV TEMP[13].xy, TEMP[12].xyyy
135: MOV TEMP[13].w, TEMP[10].xxxx
136: TXL TEMP[13], TEMP[13], SAMP[8], 2D
137: FSEQ TEMP[14].x, TEMP[9].zzzz, IMM[1].zzzz
138: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
139: MOV TEMP[15].xy, TEMP[12].xyyy
140: MOV TEMP[15].w, TEMP[10].xxxx
141: TXL TEMP[15], TEMP[15], SAMP[6], 2D
142: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[3].xxxx
143: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
144: MOV TEMP[17].xy, TEMP[12].xyyy
145: MOV TEMP[17].w, TEMP[10].xxxx
146: TXL TEMP[17], TEMP[17], SAMP[4], 2D
147: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[2].wwww
148: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
149: MOV TEMP[19].xy, TEMP[12].xyyy
150: MOV TEMP[19].w, TEMP[10].xxxx
151: TXL TEMP[19], TEMP[19], SAMP[2], 2D
152: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].zzzz
153: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
154: MOV TEMP[12].xy, TEMP[12].xyyy
155: MOV TEMP[12].w, TEMP[10].xxxx
156: TXL TEMP[12], TEMP[12], SAMP[0], 2D
157: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[3].yyyy
158: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
159: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
160: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
161: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
162: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
163: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12]
164: MOV TEMP[13].xy, IN[3].yxyy
165: MOV TEMP[14].x, IMM[2].xxxx
166: FSNE TEMP[15].x, CONST[13].xxxx, TEMP[6].xxxx
167: UIF TEMP[15].xxxx :0
168: MOV TEMP[14].x, IMM[2].yyyy
169: RCP TEMP[15].x, CONST[16].xxxx
170: MUL TEMP[13].xy, IN[3].yxxx, TEMP[15].xxxx
171: ELSE :0
172: RCP TEMP[15].x, CONST[15].xxxx
173: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
174: ENDIF
175: FRC TEMP[13].xy, TEMP[13].xyyy
176: MUL TEMP[15].x, CONST[17].xxxx, IMM[2].wwww
177: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
178: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
179: MUL TEMP[14].x, TEMP[14].xxxx, CONST[17].xxxx
180: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
181: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[6].xxxx, TEMP[9].xyyy
182: MOV TEMP[14].xy, TEMP[13].xyyy
183: MOV TEMP[14].w, TEMP[10].xxxx
184: TXL TEMP[14], TEMP[14], SAMP[8], 2D
185: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[1].zzzz
186: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
187: MOV TEMP[16].xy, TEMP[13].xyyy
188: MOV TEMP[16].w, TEMP[10].xxxx
189: TXL TEMP[16], TEMP[16], SAMP[6], 2D
190: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[3].xxxx
191: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
192: MOV TEMP[18].xy, TEMP[13].xyyy
193: MOV TEMP[18].w, TEMP[10].xxxx
194: TXL TEMP[18], TEMP[18], SAMP[4], 2D
195: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].wwww
196: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
197: MOV TEMP[20].xy, TEMP[13].xyyy
198: MOV TEMP[20].w, TEMP[10].xxxx
199: TXL TEMP[20], TEMP[20], SAMP[2], 2D
200: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].zzzz
201: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
202: MOV TEMP[13].xy, TEMP[13].xyyy
203: MOV TEMP[13].w, TEMP[10].xxxx
204: TXL TEMP[13], TEMP[13], SAMP[0], 2D
205: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[3].yyyy
206: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
207: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
208: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
209: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
210: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
211: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13]
212: MOV TEMP[14].xy, TEMP[0].zxzz
213: MOV TEMP[15].x, IMM[2].xxxx
214: FSNE TEMP[16].x, CONST[13].xxxx, TEMP[6].xxxx
215: UIF TEMP[16].xxxx :0
216: MOV TEMP[15].x, IMM[2].yyyy
217: RCP TEMP[16].x, CONST[16].xxxx
218: MUL TEMP[14].xy, TEMP[0].zxxx, TEMP[16].xxxx
219: ELSE :0
220: RCP TEMP[16].x, CONST[15].xxxx
221: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
222: ENDIF
223: FRC TEMP[14].xy, TEMP[14].xyyy
224: MUL TEMP[16].x, CONST[17].xxxx, IMM[2].wwww
225: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
226: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
227: MUL TEMP[15].x, TEMP[15].xxxx, CONST[17].xxxx
228: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
229: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[6].xxxx, TEMP[9].xyyy
230: MOV TEMP[15].xy, TEMP[14].xyyy
231: MOV TEMP[15].w, TEMP[10].xxxx
232: TXL TEMP[15], TEMP[15], SAMP[8], 2D
233: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz
234: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
235: MOV TEMP[17].xy, TEMP[14].xyyy
236: MOV TEMP[17].w, TEMP[10].xxxx
237: TXL TEMP[17], TEMP[17], SAMP[6], 2D
238: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx
239: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
240: MOV TEMP[19].xy, TEMP[14].xyyy
241: MOV TEMP[19].w, TEMP[10].xxxx
242: TXL TEMP[19], TEMP[19], SAMP[4], 2D
243: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww
244: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
245: MOV TEMP[21].xy, TEMP[14].xyyy
246: MOV TEMP[21].w, TEMP[10].xxxx
247: TXL TEMP[21], TEMP[21], SAMP[2], 2D
248: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz
249: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
250: MOV TEMP[14].xy, TEMP[14].xyyy
251: MOV TEMP[14].w, TEMP[10].xxxx
252: TXL TEMP[14], TEMP[14], SAMP[0], 2D
253: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[3].yyyy
254: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
255: MUL TEMP[14], TEMP[14], TEMP[23].xxxx
256: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14]
257: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14]
258: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14]
259: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14]
260: MOV TEMP[15].xy, TEMP[0].xyxx
261: MOV TEMP[16].x, IMM[2].xxxx
262: FSNE TEMP[17].x, CONST[13].xxxx, TEMP[7].xxxx
263: UIF TEMP[17].xxxx :0
264: MOV TEMP[16].x, IMM[2].yyyy
265: RCP TEMP[17].x, CONST[16].xxxx
266: MUL TEMP[15].xy, TEMP[0].xyyy, TEMP[17].xxxx
267: ELSE :0
268: RCP TEMP[17].x, CONST[15].xxxx
269: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx
270: ENDIF
271: FRC TEMP[15].xy, TEMP[15].xyyy
272: MUL TEMP[17].x, CONST[17].xxxx, IMM[2].wwww
273: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx
274: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx
275: MUL TEMP[16].x, TEMP[16].xxxx, CONST[17].xxxx
276: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx
277: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[7].xxxx, TEMP[11].xyyy
278: MOV TEMP[16].xy, TEMP[15].xyyy
279: MOV TEMP[16].w, TEMP[10].xxxx
280: TXL TEMP[16], TEMP[16], SAMP[8], 2D
281: FSEQ TEMP[17].x, TEMP[11].zzzz, IMM[1].zzzz
282: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
283: MOV TEMP[18].xy, TEMP[15].xyyy
284: MOV TEMP[18].w, TEMP[10].xxxx
285: TXL TEMP[18], TEMP[18], SAMP[6], 2D
286: FSEQ TEMP[19].x, TEMP[11].zzzz, IMM[3].xxxx
287: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
288: MOV TEMP[20].xy, TEMP[15].xyyy
289: MOV TEMP[20].w, TEMP[10].xxxx
290: TXL TEMP[20], TEMP[20], SAMP[4], 2D
291: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[2].wwww
292: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
293: MOV TEMP[22].xy, TEMP[15].xyyy
294: MOV TEMP[22].w, TEMP[10].xxxx
295: TXL TEMP[22], TEMP[22], SAMP[2], 2D
296: FSEQ TEMP[23].x, TEMP[11].zzzz, IMM[2].zzzz
297: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
298: MOV TEMP[15].xy, TEMP[15].xyyy
299: MOV TEMP[15].w, TEMP[10].xxxx
300: TXL TEMP[15], TEMP[15], SAMP[0], 2D
301: FSEQ TEMP[24].x, TEMP[11].zzzz, IMM[3].yyyy
302: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
303: MUL TEMP[15], TEMP[15], TEMP[24].xxxx
304: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15]
305: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15]
306: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15]
307: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15]
308: MOV TEMP[16].xy, IN[3].yxyy
309: MOV TEMP[17].x, IMM[2].xxxx
310: FSNE TEMP[18].x, CONST[13].xxxx, TEMP[7].xxxx
311: UIF TEMP[18].xxxx :0
312: MOV TEMP[17].x, IMM[2].yyyy
313: RCP TEMP[18].x, CONST[16].xxxx
314: MUL TEMP[16].xy, IN[3].yxxx, TEMP[18].xxxx
315: ELSE :0
316: RCP TEMP[18].x, CONST[15].xxxx
317: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx
318: ENDIF
319: FRC TEMP[16].xy, TEMP[16].xyyy
320: MUL TEMP[18].x, CONST[17].xxxx, IMM[2].wwww
321: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx
322: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx
323: MUL TEMP[17].x, TEMP[17].xxxx, CONST[17].xxxx
324: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx
325: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[7].xxxx, TEMP[11].xyyy
326: MOV TEMP[17].xy, TEMP[16].xyyy
327: MOV TEMP[17].w, TEMP[10].xxxx
328: TXL TEMP[17], TEMP[17], SAMP[8], 2D
329: FSEQ TEMP[18].x, TEMP[11].zzzz, IMM[1].zzzz
330: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
331: MOV TEMP[19].xy, TEMP[16].xyyy
332: MOV TEMP[19].w, TEMP[10].xxxx
333: TXL TEMP[19], TEMP[19], SAMP[6], 2D
334: FSEQ TEMP[20].x, TEMP[11].zzzz, IMM[3].xxxx
335: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
336: MOV TEMP[21].xy, TEMP[16].xyyy
337: MOV TEMP[21].w, TEMP[10].xxxx
338: TXL TEMP[21], TEMP[21], SAMP[4], 2D
339: FSEQ TEMP[22].x, TEMP[11].zzzz, IMM[2].wwww
340: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
341: MOV TEMP[23].xy, TEMP[16].xyyy
342: MOV TEMP[23].w, TEMP[10].xxxx
343: TXL TEMP[23], TEMP[23], SAMP[2], 2D
344: FSEQ TEMP[24].x, TEMP[11].zzzz, IMM[2].zzzz
345: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
346: MOV TEMP[16].xy, TEMP[16].xyyy
347: MOV TEMP[16].w, TEMP[10].xxxx
348: TXL TEMP[16], TEMP[16], SAMP[0], 2D
349: FSEQ TEMP[25].x, TEMP[11].zzzz, IMM[3].yyyy
350: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
351: MUL TEMP[16], TEMP[16], TEMP[25].xxxx
352: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16]
353: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16]
354: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16]
355: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16]
356: MOV TEMP[17].xy, TEMP[0].zxzz
357: MOV TEMP[18].x, IMM[2].xxxx
358: FSNE TEMP[19].x, CONST[13].xxxx, TEMP[7].xxxx
359: UIF TEMP[19].xxxx :0
360: MOV TEMP[18].x, IMM[2].yyyy
361: RCP TEMP[19].x, CONST[16].xxxx
362: MUL TEMP[17].xy, TEMP[0].zxxx, TEMP[19].xxxx
363: ELSE :0
364: RCP TEMP[19].x, CONST[15].xxxx
365: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx
366: ENDIF
367: FRC TEMP[17].xy, TEMP[17].xyyy
368: MUL TEMP[19].x, CONST[17].xxxx, IMM[2].wwww
369: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx
370: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx
371: MUL TEMP[18].x, TEMP[18].xxxx, CONST[17].xxxx
372: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx
373: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[7].xxxx, TEMP[11].xyyy
374: MOV TEMP[18].xy, TEMP[17].xyyy
375: MOV TEMP[18].w, TEMP[10].xxxx
376: TXL TEMP[18], TEMP[18], SAMP[8], 2D
377: FSEQ TEMP[19].x, TEMP[11].zzzz, IMM[1].zzzz
378: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
379: MOV TEMP[20].xy, TEMP[17].xyyy
380: MOV TEMP[20].w, TEMP[10].xxxx
381: TXL TEMP[20], TEMP[20], SAMP[6], 2D
382: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[3].xxxx
383: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
384: MOV TEMP[22].xy, TEMP[17].xyyy
385: MOV TEMP[22].w, TEMP[10].xxxx
386: TXL TEMP[22], TEMP[22], SAMP[4], 2D
387: FSEQ TEMP[23].x, TEMP[11].zzzz, IMM[2].wwww
388: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
389: MOV TEMP[24].xy, TEMP[17].xyyy
390: MOV TEMP[24].w, TEMP[10].xxxx
391: TXL TEMP[24], TEMP[24], SAMP[2], 2D
392: FSEQ TEMP[25].x, TEMP[11].zzzz, IMM[2].zzzz
393: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
394: MOV TEMP[17].xy, TEMP[17].xyyy
395: MOV TEMP[17].w, TEMP[10].xxxx
396: TXL TEMP[17], TEMP[17], SAMP[0], 2D
397: FSEQ TEMP[26].x, TEMP[11].zzzz, IMM[3].yyyy
398: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
399: MUL TEMP[17], TEMP[17], TEMP[26].xxxx
400: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17]
401: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17]
402: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17]
403: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17]
404: MOV TEMP[18].xy, TEMP[0].xyxx
405: MOV TEMP[19].x, IMM[2].xxxx
406: FSNE TEMP[20].x, CONST[13].xxxx, TEMP[8].xxxx
407: UIF TEMP[20].xxxx :0
408: MOV TEMP[19].x, IMM[2].yyyy
409: RCP TEMP[20].x, CONST[16].xxxx
410: MUL TEMP[18].xy, TEMP[0].xyyy, TEMP[20].xxxx
411: ELSE :0
412: RCP TEMP[20].x, CONST[15].xxxx
413: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx
414: ENDIF
415: FRC TEMP[18].xy, TEMP[18].xyyy
416: MUL TEMP[20].x, CONST[17].xxxx, IMM[2].wwww
417: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx
418: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx
419: MUL TEMP[19].x, TEMP[19].xxxx, CONST[17].xxxx
420: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx
421: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[8].xxxx, TEMP[5].xyyy
422: MOV TEMP[19].xy, TEMP[18].xyyy
423: MOV TEMP[19].w, TEMP[10].xxxx
424: TXL TEMP[19], TEMP[19], SAMP[8], 2D
425: FSEQ TEMP[20].x, TEMP[5].zzzz, IMM[1].zzzz
426: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
427: MOV TEMP[21].xy, TEMP[18].xyyy
428: MOV TEMP[21].w, TEMP[10].xxxx
429: TXL TEMP[21], TEMP[21], SAMP[6], 2D
430: FSEQ TEMP[22].x, TEMP[5].zzzz, IMM[3].xxxx
431: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
432: MOV TEMP[23].xy, TEMP[18].xyyy
433: MOV TEMP[23].w, TEMP[10].xxxx
434: TXL TEMP[23], TEMP[23], SAMP[4], 2D
435: FSEQ TEMP[24].x, TEMP[5].zzzz, IMM[2].wwww
436: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
437: MOV TEMP[25].xy, TEMP[18].xyyy
438: MOV TEMP[25].w, TEMP[10].xxxx
439: TXL TEMP[25], TEMP[25], SAMP[2], 2D
440: FSEQ TEMP[26].x, TEMP[5].zzzz, IMM[2].zzzz
441: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
442: MOV TEMP[18].xy, TEMP[18].xyyy
443: MOV TEMP[18].w, TEMP[10].xxxx
444: TXL TEMP[18], TEMP[18], SAMP[0], 2D
445: FSEQ TEMP[27].x, TEMP[5].zzzz, IMM[3].yyyy
446: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz
447: MUL TEMP[18], TEMP[18], TEMP[27].xxxx
448: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18]
449: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18]
450: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18]
451: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18]
452: MOV TEMP[19].xy, IN[3].yxyy
453: MOV TEMP[20].x, IMM[2].xxxx
454: FSNE TEMP[21].x, CONST[13].xxxx, TEMP[8].xxxx
455: UIF TEMP[21].xxxx :0
456: MOV TEMP[20].x, IMM[2].yyyy
457: RCP TEMP[21].x, CONST[16].xxxx
458: MUL TEMP[19].xy, IN[3].yxxx, TEMP[21].xxxx
459: ELSE :0
460: RCP TEMP[21].x, CONST[15].xxxx
461: MUL TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx
462: ENDIF
463: FRC TEMP[19].xy, TEMP[19].xyyy
464: MUL TEMP[21].x, CONST[17].xxxx, IMM[2].wwww
465: MUL TEMP[21].x, TEMP[21].xxxx, TEMP[20].xxxx
466: ADD TEMP[21].x, IMM[2].zzzz, -TEMP[21].xxxx
467: MUL TEMP[20].x, TEMP[20].xxxx, CONST[17].xxxx
468: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx, TEMP[20].xxxx
469: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[8].xxxx, TEMP[5].xyyy
470: MOV TEMP[20].xy, TEMP[19].xyyy
471: MOV TEMP[20].w, TEMP[10].xxxx
472: TXL TEMP[20], TEMP[20], SAMP[8], 2D
473: FSEQ TEMP[21].x, TEMP[5].zzzz, IMM[1].zzzz
474: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
475: MOV TEMP[22].xy, TEMP[19].xyyy
476: MOV TEMP[22].w, TEMP[10].xxxx
477: TXL TEMP[22], TEMP[22], SAMP[6], 2D
478: FSEQ TEMP[23].x, TEMP[5].zzzz, IMM[3].xxxx
479: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
480: MOV TEMP[24].xy, TEMP[19].xyyy
481: MOV TEMP[24].w, TEMP[10].xxxx
482: TXL TEMP[24], TEMP[24], SAMP[4], 2D
483: FSEQ TEMP[25].x, TEMP[5].zzzz, IMM[2].wwww
484: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
485: MOV TEMP[26].xy, TEMP[19].xyyy
486: MOV TEMP[26].w, TEMP[10].xxxx
487: TXL TEMP[26], TEMP[26], SAMP[2], 2D
488: FSEQ TEMP[27].x, TEMP[5].zzzz, IMM[2].zzzz
489: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz
490: MOV TEMP[19].xy, TEMP[19].xyyy
491: MOV TEMP[19].w, TEMP[10].xxxx
492: TXL TEMP[19], TEMP[19], SAMP[0], 2D
493: FSEQ TEMP[28].x, TEMP[5].zzzz, IMM[3].yyyy
494: AND TEMP[28].x, TEMP[28].xxxx, IMM[2].zzzz
495: MUL TEMP[19], TEMP[19], TEMP[28].xxxx
496: MAD TEMP[19], TEMP[26], TEMP[27].xxxx, TEMP[19]
497: MAD TEMP[19], TEMP[24], TEMP[25].xxxx, TEMP[19]
498: MAD TEMP[19], TEMP[22], TEMP[23].xxxx, TEMP[19]
499: MAD TEMP[19], TEMP[20], TEMP[21].xxxx, TEMP[19]
500: MOV TEMP[20].xy, TEMP[0].zxzz
501: MOV TEMP[21].x, IMM[2].xxxx
502: FSNE TEMP[22].x, CONST[13].xxxx, TEMP[8].xxxx
503: UIF TEMP[22].xxxx :0
504: MOV TEMP[21].x, IMM[2].yyyy
505: RCP TEMP[22].x, CONST[16].xxxx
506: MUL TEMP[20].xy, TEMP[0].zxxx, TEMP[22].xxxx
507: ELSE :0
508: RCP TEMP[22].x, CONST[15].xxxx
509: MUL TEMP[20].xy, TEMP[20].xyyy, TEMP[22].xxxx
510: ENDIF
511: FRC TEMP[20].xy, TEMP[20].xyyy
512: MUL TEMP[22].x, CONST[17].xxxx, IMM[2].wwww
513: MUL TEMP[22].x, TEMP[22].xxxx, TEMP[21].xxxx
514: ADD TEMP[22].x, IMM[2].zzzz, -TEMP[22].xxxx
515: MUL TEMP[21].x, TEMP[21].xxxx, CONST[17].xxxx
516: MAD TEMP[20].xy, TEMP[20].xyyy, TEMP[22].xxxx, TEMP[21].xxxx
517: MAD TEMP[20].xy, TEMP[20].xyyy, TEMP[8].xxxx, TEMP[5].xyyy
518: MOV TEMP[21].xy, TEMP[20].xyyy
519: MOV TEMP[21].w, TEMP[10].xxxx
520: TXL TEMP[21], TEMP[21], SAMP[8], 2D
521: FSEQ TEMP[22].x, TEMP[5].zzzz, IMM[1].zzzz
522: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
523: MOV TEMP[23].xy, TEMP[20].xyyy
524: MOV TEMP[23].w, TEMP[10].xxxx
525: TXL TEMP[23], TEMP[23], SAMP[6], 2D
526: FSEQ TEMP[24].x, TEMP[5].zzzz, IMM[3].xxxx
527: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
528: MOV TEMP[25].xy, TEMP[20].xyyy
529: MOV TEMP[25].w, TEMP[10].xxxx
530: TXL TEMP[25], TEMP[25], SAMP[4], 2D
531: FSEQ TEMP[26].x, TEMP[5].zzzz, IMM[2].wwww
532: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
533: MOV TEMP[27].xy, TEMP[20].xyyy
534: MOV TEMP[27].w, TEMP[10].xxxx
535: TXL TEMP[27], TEMP[27], SAMP[2], 2D
536: FSEQ TEMP[28].x, TEMP[5].zzzz, IMM[2].zzzz
537: AND TEMP[28].x, TEMP[28].xxxx, IMM[2].zzzz
538: MOV TEMP[20].xy, TEMP[20].xyyy
539: MOV TEMP[20].w, TEMP[10].xxxx
540: TXL TEMP[20], TEMP[20], SAMP[0], 2D
541: FSEQ TEMP[29].x, TEMP[5].zzzz, IMM[3].yyyy
542: AND TEMP[29].x, TEMP[29].xxxx, IMM[2].zzzz
543: MUL TEMP[20], TEMP[20], TEMP[29].xxxx
544: MAD TEMP[20], TEMP[27], TEMP[28].xxxx, TEMP[20]
545: MAD TEMP[20], TEMP[25], TEMP[26].xxxx, TEMP[20]
546: MAD TEMP[20], TEMP[23], TEMP[24].xxxx, TEMP[20]
547: MAD TEMP[20], TEMP[21], TEMP[22].xxxx, TEMP[20]
548: MUL TEMP[18], TEMP[18], TEMP[4].zzzz
549: MAD TEMP[18], TEMP[19], TEMP[4].xxxx, TEMP[18]
550: MAD TEMP[18], TEMP[20], TEMP[4].yyyy, TEMP[18]
551: MUL TEMP[15], TEMP[15], TEMP[4].zzzz
552: MAD TEMP[15], TEMP[16], TEMP[4].xxxx, TEMP[15]
553: MAD TEMP[15], TEMP[17], TEMP[4].yyyy, TEMP[15]
554: MUL TEMP[12], TEMP[12], TEMP[4].zzzz
555: MAD TEMP[12], TEMP[13], TEMP[4].xxxx, TEMP[12]
556: MAD TEMP[12], TEMP[14], TEMP[4].yyyy, TEMP[12]
557: MUL TEMP[12], IN[1].xxxx, TEMP[12]
558: MAD TEMP[12], IN[1].yyyy, TEMP[15], TEMP[12]
559: MAD TEMP[12].xyz, IN[1].zzzz, TEMP[18], TEMP[12]
560: MOV TEMP[13].xy, IN[3].yxyy
561: MOV TEMP[14].x, IMM[2].xxxx
562: FSNE TEMP[15].x, CONST[13].xxxx, TEMP[6].xxxx
563: UIF TEMP[15].xxxx :0
564: MOV TEMP[14].x, IMM[2].yyyy
565: RCP TEMP[15].x, CONST[16].xxxx
566: MUL TEMP[13].xy, IN[3].yxxx, TEMP[15].xxxx
567: ELSE :0
568: RCP TEMP[15].x, CONST[15].xxxx
569: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
570: ENDIF
571: FRC TEMP[13].xy, TEMP[13].xyyy
572: MUL TEMP[15].x, CONST[17].xxxx, IMM[2].wwww
573: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
574: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
575: MUL TEMP[14].x, TEMP[14].xxxx, CONST[17].xxxx
576: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
577: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[6].xxxx, TEMP[9].xyyy
578: MOV TEMP[14].xy, TEMP[13].xyyy
579: MOV TEMP[14].w, TEMP[10].xxxx
580: TXL TEMP[14], TEMP[14], SAMP[9], 2D
581: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[1].zzzz
582: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
583: MOV TEMP[16].xy, TEMP[13].xyyy
584: MOV TEMP[16].w, TEMP[10].xxxx
585: TXL TEMP[16], TEMP[16], SAMP[7], 2D
586: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[3].xxxx
587: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
588: MOV TEMP[18].xy, TEMP[13].xyyy
589: MOV TEMP[18].w, TEMP[10].xxxx
590: TXL TEMP[18], TEMP[18], SAMP[5], 2D
591: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].wwww
592: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
593: MOV TEMP[20].xy, TEMP[13].xyyy
594: MOV TEMP[20].w, TEMP[10].xxxx
595: TXL TEMP[20], TEMP[20], SAMP[3], 2D
596: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].zzzz
597: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
598: MOV TEMP[13].xy, TEMP[13].xyyy
599: MOV TEMP[13].w, TEMP[10].xxxx
600: TXL TEMP[13], TEMP[13], SAMP[1], 2D
601: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[3].yyyy
602: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
603: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
604: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
605: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
606: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
607: MAD TEMP[13].yw, TEMP[14], TEMP[15].xxxx, TEMP[13]
608: MAD TEMP[13].xy, TEMP[13].wyyy, IMM[2].wwww, IMM[3].zzzz
609: DP2 TEMP[14].x, TEMP[13].xyyy, TEMP[13].xyyy
610: MOV_SAT TEMP[30].x, TEMP[14].xxxx
611: MOV TEMP[14].xy, TEMP[0].zxzz
612: MOV TEMP[15].x, IMM[2].xxxx
613: FSNE TEMP[16].x, CONST[13].xxxx, TEMP[6].xxxx
614: UIF TEMP[16].xxxx :0
615: MOV TEMP[15].x, IMM[2].yyyy
616: RCP TEMP[16].x, CONST[16].xxxx
617: MUL TEMP[14].xy, TEMP[0].zxxx, TEMP[16].xxxx
618: ELSE :0
619: RCP TEMP[16].x, CONST[15].xxxx
620: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
621: ENDIF
622: FRC TEMP[14].xy, TEMP[14].xyyy
623: MUL TEMP[16].x, CONST[17].xxxx, IMM[2].wwww
624: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
625: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
626: MUL TEMP[15].x, TEMP[15].xxxx, CONST[17].xxxx
627: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
628: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[6].xxxx, TEMP[9].xyyy
629: MOV TEMP[15].xy, TEMP[14].xyyy
630: MOV TEMP[15].w, TEMP[10].xxxx
631: TXL TEMP[15], TEMP[15], SAMP[9], 2D
632: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz
633: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
634: MOV TEMP[17].xy, TEMP[14].xyyy
635: MOV TEMP[17].w, TEMP[10].xxxx
636: TXL TEMP[17], TEMP[17], SAMP[7], 2D
637: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx
638: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
639: MOV TEMP[19].xy, TEMP[14].xyyy
640: MOV TEMP[19].w, TEMP[10].xxxx
641: TXL TEMP[19], TEMP[19], SAMP[5], 2D
642: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww
643: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
644: MOV TEMP[21].xy, TEMP[14].xyyy
645: MOV TEMP[21].w, TEMP[10].xxxx
646: TXL TEMP[21], TEMP[21], SAMP[3], 2D
647: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz
648: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
649: MOV TEMP[14].xy, TEMP[14].xyyy
650: MOV TEMP[14].w, TEMP[10].xxxx
651: TXL TEMP[14], TEMP[14], SAMP[1], 2D
652: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[3].yyyy
653: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
654: MUL TEMP[14], TEMP[14], TEMP[23].xxxx
655: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14]
656: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14]
657: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14]
658: MAD TEMP[14].yw, TEMP[15], TEMP[16].xxxx, TEMP[14]
659: MAD TEMP[14].xy, TEMP[14].wyyy, IMM[2].wwww, IMM[3].zzzz
660: DP2 TEMP[15].x, TEMP[14].xyyy, TEMP[14].xyyy
661: MOV_SAT TEMP[31].x, TEMP[15].xxxx
662: MOV TEMP[15].xy, TEMP[0].xyxx
663: MOV TEMP[16].x, IMM[2].xxxx
664: FSNE TEMP[17].x, CONST[13].xxxx, TEMP[6].xxxx
665: UIF TEMP[17].xxxx :0
666: MOV TEMP[16].x, IMM[2].yyyy
667: RCP TEMP[17].x, CONST[16].xxxx
668: MUL TEMP[15].xy, TEMP[0].xyyy, TEMP[17].xxxx
669: ELSE :0
670: RCP TEMP[17].x, CONST[15].xxxx
671: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx
672: ENDIF
673: FRC TEMP[15].xy, TEMP[15].xyyy
674: MUL TEMP[17].x, CONST[17].xxxx, IMM[2].wwww
675: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx
676: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx
677: MUL TEMP[16].x, TEMP[16].xxxx, CONST[17].xxxx
678: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx
679: MAD TEMP[6].xy, TEMP[15].xyyy, TEMP[6].xxxx, TEMP[9].xyyy
680: MOV TEMP[15].xy, TEMP[6].xyyy
681: MOV TEMP[15].w, TEMP[10].xxxx
682: TXL TEMP[15], TEMP[15], SAMP[9], 2D
683: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz
684: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
685: MOV TEMP[17].xy, TEMP[6].xyyy
686: MOV TEMP[17].w, TEMP[10].xxxx
687: TXL TEMP[17], TEMP[17], SAMP[7], 2D
688: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx
689: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
690: MOV TEMP[19].xy, TEMP[6].xyyy
691: MOV TEMP[19].w, TEMP[10].xxxx
692: TXL TEMP[19], TEMP[19], SAMP[5], 2D
693: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww
694: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
695: MOV TEMP[21].xy, TEMP[6].xyyy
696: MOV TEMP[21].w, TEMP[10].xxxx
697: TXL TEMP[21], TEMP[21], SAMP[3], 2D
698: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz
699: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
700: MOV TEMP[6].xy, TEMP[6].xyyy
701: MOV TEMP[6].w, TEMP[10].xxxx
702: TXL TEMP[6], TEMP[6], SAMP[1], 2D
703: FSEQ TEMP[9].x, TEMP[9].zzzz, IMM[3].yyyy
704: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz
705: MUL TEMP[6], TEMP[6], TEMP[9].xxxx
706: MAD TEMP[6], TEMP[21], TEMP[22].xxxx, TEMP[6]
707: MAD TEMP[6], TEMP[19], TEMP[20].xxxx, TEMP[6]
708: MAD TEMP[6], TEMP[17], TEMP[18].xxxx, TEMP[6]
709: MAD TEMP[6].yw, TEMP[15], TEMP[16].xxxx, TEMP[6]
710: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz
711: DP2 TEMP[9].x, TEMP[6].xyyy, TEMP[6].xyyy
712: MOV_SAT TEMP[32].x, TEMP[9].xxxx
713: MOV TEMP[9].x, IMM[3].yyyy
714: MOV TEMP[9].y, TEMP[13].xxxx
715: MOV TEMP[9].z, TEMP[13].yyyy
716: MOV TEMP[13].y, IMM[3].yyyy
717: MOV TEMP[13].x, TEMP[14].yyyy
718: MOV TEMP[13].z, TEMP[14].xxxx
719: MOV TEMP[14].z, IMM[3].yyyy
720: MOV TEMP[14].xy, TEMP[6].xyxx
721: MUL TEMP[6].xyz, TEMP[9].xyzz, TEMP[4].xxxx
722: MAD TEMP[6].xyz, TEMP[13].xyzz, TEMP[4].yyyy, TEMP[6].xyzz
723: MAD TEMP[6].xyz, TEMP[14].xyzz, TEMP[4].zzzz, TEMP[6].xyzz
724: MOV TEMP[9].xy, IN[3].yxyy
725: MOV TEMP[13].x, IMM[2].xxxx
726: FSNE TEMP[14].x, CONST[13].xxxx, TEMP[7].xxxx
727: UIF TEMP[14].xxxx :0
728: MOV TEMP[13].x, IMM[2].yyyy
729: RCP TEMP[14].x, CONST[16].xxxx
730: MUL TEMP[9].xy, IN[3].yxxx, TEMP[14].xxxx
731: ELSE :0
732: RCP TEMP[14].x, CONST[15].xxxx
733: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[14].xxxx
734: ENDIF
735: FRC TEMP[9].xy, TEMP[9].xyyy
736: MUL TEMP[14].x, CONST[17].xxxx, IMM[2].wwww
737: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
738: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
739: MUL TEMP[13].x, TEMP[13].xxxx, CONST[17].xxxx
740: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
741: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[7].xxxx, TEMP[11].xyyy
742: MOV TEMP[13].xy, TEMP[9].xyyy
743: MOV TEMP[13].w, TEMP[10].xxxx
744: TXL TEMP[13], TEMP[13], SAMP[9], 2D
745: FSEQ TEMP[14].x, TEMP[11].zzzz, IMM[1].zzzz
746: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
747: MOV TEMP[15].xy, TEMP[9].xyyy
748: MOV TEMP[15].w, TEMP[10].xxxx
749: TXL TEMP[15], TEMP[15], SAMP[7], 2D
750: FSEQ TEMP[16].x, TEMP[11].zzzz, IMM[3].xxxx
751: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
752: MOV TEMP[17].xy, TEMP[9].xyyy
753: MOV TEMP[17].w, TEMP[10].xxxx
754: TXL TEMP[17], TEMP[17], SAMP[5], 2D
755: FSEQ TEMP[18].x, TEMP[11].zzzz, IMM[2].wwww
756: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
757: MOV TEMP[19].xy, TEMP[9].xyyy
758: MOV TEMP[19].w, TEMP[10].xxxx
759: TXL TEMP[19], TEMP[19], SAMP[3], 2D
760: FSEQ TEMP[20].x, TEMP[11].zzzz, IMM[2].zzzz
761: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
762: MOV TEMP[9].xy, TEMP[9].xyyy
763: MOV TEMP[9].w, TEMP[10].xxxx
764: TXL TEMP[9], TEMP[9], SAMP[1], 2D
765: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[3].yyyy
766: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
767: MUL TEMP[9], TEMP[9], TEMP[21].xxxx
768: MAD TEMP[9], TEMP[19], TEMP[20].xxxx, TEMP[9]
769: MAD TEMP[9], TEMP[17], TEMP[18].xxxx, TEMP[9]
770: MAD TEMP[9], TEMP[15], TEMP[16].xxxx, TEMP[9]
771: MAD TEMP[9].yw, TEMP[13], TEMP[14].xxxx, TEMP[9]
772: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz
773: DP2 TEMP[13].x, TEMP[9].xyyy, TEMP[9].xyyy
774: MOV_SAT TEMP[33].x, TEMP[13].xxxx
775: MOV TEMP[13].xy, TEMP[0].zxzz
776: MOV TEMP[14].x, IMM[2].xxxx
777: FSNE TEMP[15].x, CONST[13].xxxx, TEMP[7].xxxx
778: UIF TEMP[15].xxxx :0
779: MOV TEMP[14].x, IMM[2].yyyy
780: RCP TEMP[15].x, CONST[16].xxxx
781: MUL TEMP[13].xy, TEMP[0].zxxx, TEMP[15].xxxx
782: ELSE :0
783: RCP TEMP[15].x, CONST[15].xxxx
784: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
785: ENDIF
786: FRC TEMP[13].xy, TEMP[13].xyyy
787: MUL TEMP[15].x, CONST[17].xxxx, IMM[2].wwww
788: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
789: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
790: MUL TEMP[14].x, TEMP[14].xxxx, CONST[17].xxxx
791: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
792: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[7].xxxx, TEMP[11].xyyy
793: MOV TEMP[14].xy, TEMP[13].xyyy
794: MOV TEMP[14].w, TEMP[10].xxxx
795: TXL TEMP[14], TEMP[14], SAMP[9], 2D
796: FSEQ TEMP[15].x, TEMP[11].zzzz, IMM[1].zzzz
797: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
798: MOV TEMP[16].xy, TEMP[13].xyyy
799: MOV TEMP[16].w, TEMP[10].xxxx
800: TXL TEMP[16], TEMP[16], SAMP[7], 2D
801: FSEQ TEMP[17].x, TEMP[11].zzzz, IMM[3].xxxx
802: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
803: MOV TEMP[18].xy, TEMP[13].xyyy
804: MOV TEMP[18].w, TEMP[10].xxxx
805: TXL TEMP[18], TEMP[18], SAMP[5], 2D
806: FSEQ TEMP[19].x, TEMP[11].zzzz, IMM[2].wwww
807: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
808: MOV TEMP[20].xy, TEMP[13].xyyy
809: MOV TEMP[20].w, TEMP[10].xxxx
810: TXL TEMP[20], TEMP[20], SAMP[3], 2D
811: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[2].zzzz
812: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
813: MOV TEMP[13].xy, TEMP[13].xyyy
814: MOV TEMP[13].w, TEMP[10].xxxx
815: TXL TEMP[13], TEMP[13], SAMP[1], 2D
816: FSEQ TEMP[22].x, TEMP[11].zzzz, IMM[3].yyyy
817: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
818: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
819: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
820: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
821: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
822: MAD TEMP[13].yw, TEMP[14], TEMP[15].xxxx, TEMP[13]
823: MAD TEMP[13].xy, TEMP[13].wyyy, IMM[2].wwww, IMM[3].zzzz
824: DP2 TEMP[14].x, TEMP[13].xyyy, TEMP[13].xyyy
825: MOV_SAT TEMP[34].x, TEMP[14].xxxx
826: MOV TEMP[14].xy, TEMP[0].xyxx
827: MOV TEMP[15].x, IMM[2].xxxx
828: FSNE TEMP[16].x, CONST[13].xxxx, TEMP[7].xxxx
829: UIF TEMP[16].xxxx :0
830: MOV TEMP[15].x, IMM[2].yyyy
831: RCP TEMP[16].x, CONST[16].xxxx
832: MUL TEMP[14].xy, TEMP[0].xyyy, TEMP[16].xxxx
833: ELSE :0
834: RCP TEMP[16].x, CONST[15].xxxx
835: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
836: ENDIF
837: FRC TEMP[14].xy, TEMP[14].xyyy
838: MUL TEMP[16].x, CONST[17].xxxx, IMM[2].wwww
839: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
840: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
841: MUL TEMP[15].x, TEMP[15].xxxx, CONST[17].xxxx
842: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
843: MAD TEMP[7].xy, TEMP[14].xyyy, TEMP[7].xxxx, TEMP[11].xyyy
844: MOV TEMP[14].xy, TEMP[7].xyyy
845: MOV TEMP[14].w, TEMP[10].xxxx
846: TXL TEMP[14], TEMP[14], SAMP[9], 2D
847: FSEQ TEMP[15].x, TEMP[11].zzzz, IMM[1].zzzz
848: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
849: MOV TEMP[16].xy, TEMP[7].xyyy
850: MOV TEMP[16].w, TEMP[10].xxxx
851: TXL TEMP[16], TEMP[16], SAMP[7], 2D
852: FSEQ TEMP[17].x, TEMP[11].zzzz, IMM[3].xxxx
853: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
854: MOV TEMP[18].xy, TEMP[7].xyyy
855: MOV TEMP[18].w, TEMP[10].xxxx
856: TXL TEMP[18], TEMP[18], SAMP[5], 2D
857: FSEQ TEMP[19].x, TEMP[11].zzzz, IMM[2].wwww
858: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
859: MOV TEMP[20].xy, TEMP[7].xyyy
860: MOV TEMP[20].w, TEMP[10].xxxx
861: TXL TEMP[20], TEMP[20], SAMP[3], 2D
862: FSEQ TEMP[21].x, TEMP[11].zzzz, IMM[2].zzzz
863: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
864: MOV TEMP[7].xy, TEMP[7].xyyy
865: MOV TEMP[7].w, TEMP[10].xxxx
866: TXL TEMP[7], TEMP[7], SAMP[1], 2D
867: FSEQ TEMP[11].x, TEMP[11].zzzz, IMM[3].yyyy
868: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
869: MUL TEMP[7], TEMP[7], TEMP[11].xxxx
870: MAD TEMP[7], TEMP[20], TEMP[21].xxxx, TEMP[7]
871: MAD TEMP[7], TEMP[18], TEMP[19].xxxx, TEMP[7]
872: MAD TEMP[7], TEMP[16], TEMP[17].xxxx, TEMP[7]
873: MAD TEMP[7].yw, TEMP[14], TEMP[15].xxxx, TEMP[7]
874: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz
875: DP2 TEMP[11].x, TEMP[7].xyyy, TEMP[7].xyyy
876: MOV_SAT TEMP[35].x, TEMP[11].xxxx
877: MOV TEMP[11].x, IMM[3].yyyy
878: MOV TEMP[11].y, TEMP[9].xxxx
879: MOV TEMP[11].z, TEMP[9].yyyy
880: MOV TEMP[9].y, IMM[3].yyyy
881: MOV TEMP[9].x, TEMP[13].yyyy
882: MOV TEMP[9].z, TEMP[13].xxxx
883: MOV TEMP[13].z, IMM[3].yyyy
884: MOV TEMP[13].xy, TEMP[7].xyxx
885: MUL TEMP[7].xyz, TEMP[11].xyzz, TEMP[4].xxxx
886: MAD TEMP[7].xyz, TEMP[9].xyzz, TEMP[4].yyyy, TEMP[7].xyzz
887: MAD TEMP[7].xyz, TEMP[13].xyzz, TEMP[4].zzzz, TEMP[7].xyzz
888: MOV TEMP[9].xy, IN[3].yxyy
889: MOV TEMP[11].x, IMM[2].xxxx
890: FSNE TEMP[13].x, CONST[13].xxxx, TEMP[8].xxxx
891: UIF TEMP[13].xxxx :0
892: MOV TEMP[11].x, IMM[2].yyyy
893: RCP TEMP[13].x, CONST[16].xxxx
894: MUL TEMP[9].xy, IN[3].yxxx, TEMP[13].xxxx
895: ELSE :0
896: RCP TEMP[13].x, CONST[15].xxxx
897: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[13].xxxx
898: ENDIF
899: FRC TEMP[9].xy, TEMP[9].xyyy
900: MUL TEMP[13].x, CONST[17].xxxx, IMM[2].wwww
901: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[11].xxxx
902: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
903: MUL TEMP[11].x, TEMP[11].xxxx, CONST[17].xxxx
904: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[13].xxxx, TEMP[11].xxxx
905: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[8].xxxx, TEMP[5].xyyy
906: MOV TEMP[11].xy, TEMP[9].xyyy
907: MOV TEMP[11].w, TEMP[10].xxxx
908: TXL TEMP[11], TEMP[11], SAMP[9], 2D
909: FSEQ TEMP[13].x, TEMP[5].zzzz, IMM[1].zzzz
910: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
911: MOV TEMP[14].xy, TEMP[9].xyyy
912: MOV TEMP[14].w, TEMP[10].xxxx
913: TXL TEMP[14], TEMP[14], SAMP[7], 2D
914: FSEQ TEMP[15].x, TEMP[5].zzzz, IMM[3].xxxx
915: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
916: MOV TEMP[16].xy, TEMP[9].xyyy
917: MOV TEMP[16].w, TEMP[10].xxxx
918: TXL TEMP[16], TEMP[16], SAMP[5], 2D
919: FSEQ TEMP[17].x, TEMP[5].zzzz, IMM[2].wwww
920: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
921: MOV TEMP[18].xy, TEMP[9].xyyy
922: MOV TEMP[18].w, TEMP[10].xxxx
923: TXL TEMP[18], TEMP[18], SAMP[3], 2D
924: FSEQ TEMP[19].x, TEMP[5].zzzz, IMM[2].zzzz
925: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
926: MOV TEMP[9].xy, TEMP[9].xyyy
927: MOV TEMP[9].w, TEMP[10].xxxx
928: TXL TEMP[9], TEMP[9], SAMP[1], 2D
929: FSEQ TEMP[20].x, TEMP[5].zzzz, IMM[3].yyyy
930: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
931: MUL TEMP[9], TEMP[9], TEMP[20].xxxx
932: MAD TEMP[9], TEMP[18], TEMP[19].xxxx, TEMP[9]
933: MAD TEMP[9], TEMP[16], TEMP[17].xxxx, TEMP[9]
934: MAD TEMP[9], TEMP[14], TEMP[15].xxxx, TEMP[9]
935: MAD TEMP[9].yw, TEMP[11], TEMP[13].xxxx, TEMP[9]
936: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz
937: DP2 TEMP[11].x, TEMP[9].xyyy, TEMP[9].xyyy
938: MOV_SAT TEMP[36].x, TEMP[11].xxxx
939: MOV TEMP[11].xy, TEMP[0].zxzz
940: MOV TEMP[13].x, IMM[2].xxxx
941: FSNE TEMP[14].x, CONST[13].xxxx, TEMP[8].xxxx
942: UIF TEMP[14].xxxx :0
943: MOV TEMP[13].x, IMM[2].yyyy
944: RCP TEMP[14].x, CONST[16].xxxx
945: MUL TEMP[11].xy, TEMP[0].zxxx, TEMP[14].xxxx
946: ELSE :0
947: RCP TEMP[14].x, CONST[15].xxxx
948: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[14].xxxx
949: ENDIF
950: FRC TEMP[11].xy, TEMP[11].xyyy
951: MUL TEMP[14].x, CONST[17].xxxx, IMM[2].wwww
952: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
953: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
954: MUL TEMP[13].x, TEMP[13].xxxx, CONST[17].xxxx
955: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
956: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[8].xxxx, TEMP[5].xyyy
957: MOV TEMP[13].xy, TEMP[11].xyyy
958: MOV TEMP[13].w, TEMP[10].xxxx
959: TXL TEMP[13], TEMP[13], SAMP[9], 2D
960: FSEQ TEMP[14].x, TEMP[5].zzzz, IMM[1].zzzz
961: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
962: MOV TEMP[15].xy, TEMP[11].xyyy
963: MOV TEMP[15].w, TEMP[10].xxxx
964: TXL TEMP[15], TEMP[15], SAMP[7], 2D
965: FSEQ TEMP[16].x, TEMP[5].zzzz, IMM[3].xxxx
966: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
967: MOV TEMP[17].xy, TEMP[11].xyyy
968: MOV TEMP[17].w, TEMP[10].xxxx
969: TXL TEMP[17], TEMP[17], SAMP[5], 2D
970: FSEQ TEMP[18].x, TEMP[5].zzzz, IMM[2].wwww
971: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
972: MOV TEMP[19].xy, TEMP[11].xyyy
973: MOV TEMP[19].w, TEMP[10].xxxx
974: TXL TEMP[19], TEMP[19], SAMP[3], 2D
975: FSEQ TEMP[20].x, TEMP[5].zzzz, IMM[2].zzzz
976: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
977: MOV TEMP[11].xy, TEMP[11].xyyy
978: MOV TEMP[11].w, TEMP[10].xxxx
979: TXL TEMP[11], TEMP[11], SAMP[1], 2D
980: FSEQ TEMP[21].x, TEMP[5].zzzz, IMM[3].yyyy
981: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
982: MUL TEMP[11], TEMP[11], TEMP[21].xxxx
983: MAD TEMP[11], TEMP[19], TEMP[20].xxxx, TEMP[11]
984: MAD TEMP[11], TEMP[17], TEMP[18].xxxx, TEMP[11]
985: MAD TEMP[11], TEMP[15], TEMP[16].xxxx, TEMP[11]
986: MAD TEMP[11].yw, TEMP[13], TEMP[14].xxxx, TEMP[11]
987: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz
988: DP2 TEMP[13].x, TEMP[11].xyyy, TEMP[11].xyyy
989: MOV_SAT TEMP[37].x, TEMP[13].xxxx
990: MOV TEMP[13].xy, TEMP[0].xyxx
991: MOV TEMP[14].x, IMM[2].xxxx
992: FSNE TEMP[15].x, CONST[13].xxxx, TEMP[8].xxxx
993: UIF TEMP[15].xxxx :0
994: MOV TEMP[14].x, IMM[2].yyyy
995: RCP TEMP[15].x, CONST[16].xxxx
996: MUL TEMP[13].xy, TEMP[0].xyyy, TEMP[15].xxxx
997: ELSE :0
998: RCP TEMP[0].x, CONST[15].xxxx
999: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[0].xxxx
1000: ENDIF
1001: FRC TEMP[0].xy, TEMP[13].xyyy
1002: MUL TEMP[13].x, CONST[17].xxxx, IMM[2].wwww
1003: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[14].xxxx
1004: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
1005: MUL TEMP[14].x, TEMP[14].xxxx, CONST[17].xxxx
1006: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[13].xxxx, TEMP[14].xxxx
1007: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[8].xxxx, TEMP[5].xyyy
1008: MOV TEMP[8].xy, TEMP[0].xyyy
1009: MOV TEMP[8].w, TEMP[10].xxxx
1010: TXL TEMP[8], TEMP[8], SAMP[9], 2D
1011: FSEQ TEMP[13].x, TEMP[5].zzzz, IMM[1].zzzz
1012: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
1013: MOV TEMP[14].xy, TEMP[0].xyyy
1014: MOV TEMP[14].w, TEMP[10].xxxx
1015: TXL TEMP[14], TEMP[14], SAMP[7], 2D
1016: FSEQ TEMP[15].x, TEMP[5].zzzz, IMM[3].xxxx
1017: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
1018: MOV TEMP[16].xy, TEMP[0].xyyy
1019: MOV TEMP[16].w, TEMP[10].xxxx
1020: TXL TEMP[16], TEMP[16], SAMP[5], 2D
1021: FSEQ TEMP[17].x, TEMP[5].zzzz, IMM[2].wwww
1022: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
1023: MOV TEMP[18].xy, TEMP[0].xyyy
1024: MOV TEMP[18].w, TEMP[10].xxxx
1025: TXL TEMP[18], TEMP[18], SAMP[3], 2D
1026: FSEQ TEMP[19].x, TEMP[5].zzzz, IMM[2].zzzz
1027: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
1028: MOV TEMP[0].xy, TEMP[0].xyyy
1029: MOV TEMP[0].w, TEMP[10].xxxx
1030: TXL TEMP[0], TEMP[0], SAMP[1], 2D
1031: FSEQ TEMP[5].x, TEMP[5].zzzz, IMM[3].yyyy
1032: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].zzzz
1033: MUL TEMP[0], TEMP[0], TEMP[5].xxxx
1034: MAD TEMP[0], TEMP[18], TEMP[19].xxxx, TEMP[0]
1035: MAD TEMP[0], TEMP[16], TEMP[17].xxxx, TEMP[0]
1036: MAD TEMP[0], TEMP[14], TEMP[15].xxxx, TEMP[0]
1037: MAD TEMP[0].yw, TEMP[8], TEMP[13].xxxx, TEMP[0]
1038: MAD TEMP[0].xy, TEMP[0].wyyy, IMM[2].wwww, IMM[3].zzzz
1039: DP2 TEMP[5].x, TEMP[0].xyyy, TEMP[0].xyyy
1040: MOV_SAT TEMP[38].x, TEMP[5].xxxx
1041: MOV TEMP[5].x, IMM[3].yyyy
1042: MOV TEMP[5].y, TEMP[9].xxxx
1043: MOV TEMP[5].z, TEMP[9].yyyy
1044: MOV TEMP[8].y, IMM[3].yyyy
1045: MOV TEMP[8].x, TEMP[11].yyyy
1046: MOV TEMP[8].z, TEMP[11].xxxx
1047: MOV TEMP[9].z, IMM[3].yyyy
1048: MOV TEMP[9].xy, TEMP[0].xyxx
1049: MOV TEMP[0].w, IMM[2].zzzz
1050: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[4].xxxx
1051: MAD TEMP[5].xyz, TEMP[8].xyzz, TEMP[4].yyyy, TEMP[5].xyzz
1052: MAD TEMP[4].xyz, TEMP[9].xyzz, TEMP[4].zzzz, TEMP[5].xyzz
1053: MUL TEMP[5].xyz, IN[1].xxxx, TEMP[6].xyzz
1054: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[7].xyzz, TEMP[5].xyzz
1055: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[4].xyzz, TEMP[5].xyzz
1056: DP4 TEMP[4].x, TEMP[0], TEMP[0]
1057: RSQ TEMP[4].x, TEMP[4].xxxx
1058: MUL TEMP[0].xyz, TEMP[0], TEMP[4].xxxx
1059: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[0].wwww
1060: ADD TEMP[0].xyz, IN[2].xyzz, -TEMP[0].xyzz
1061: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
1062: RSQ TEMP[4].x, TEMP[4].xxxx
1063: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
1064: MOV TEMP[4].w, IMM[3].yyyy
1065: MUL TEMP[4].xyz, TEMP[12].xyzz, TEMP[1].xyzz
1066: ADD TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xyzz
1067: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
1068: RSQ TEMP[3].x, TEMP[3].xxxx
1069: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
1070: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
1071: MAX TEMP[1].x, IMM[3].wwww, TEMP[1].xxxx
1072: MUL TEMP[3].x, IMM[4].xxxx, IN[1].wwww
1073: POW TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx
1074: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1075: MOV TEMP[3].w, IMM[3].yyyy
1076: MOV TEMP[3].xyz, CONST[20].xyzx
1077: MOV TEMP[5].w, IMM[2].zzzz
1078: MUL TEMP[6].x, IMM[2].wwww, TEMP[1].xxxx
1079: ADD TEMP[6].x, IMM[3].xxxx, -TEMP[6].xxxx
1080: MUL TEMP[6].x, TEMP[1].xxxx, TEMP[6].xxxx
1081: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
1082: MUL TEMP[1].x, TEMP[1].xxxx, IN[1].wwww
1083: MUL TEMP[6].xyz, TEMP[12].xyzz, CONST[2].xyzz
1084: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[2].xyzz
1085: MOV_SAT TEMP[0].x, TEMP[0].xxxx
1086: MUL TEMP[2], CONST[21], IMM[2].wwww
1087: MAX TEMP[2], TEMP[2], TEMP[3]
1088: MIN TEMP[2].xyz, TEMP[2], IMM[4].yyyz
1089: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[12].xyzz
1090: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[2].xyzz
1091: MAD TEMP[0].xyz, CONST[2].xyzz, TEMP[1].xxxx, TEMP[0].xyzz
1092: MUL TEMP[5].xyz, TEMP[0].xyzz, IMM[0].wwww
1093: ADD TEMP[0].xyz, TEMP[4], TEMP[5]
1094: MOV TEMP[4].xyz, TEMP[0].xyzx
1095: MOV TEMP[4].w, IMM[2].zzzz
1096: MOV OUT[0], TEMP[4]
1097: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344)
%46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0
%48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0
%50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%51 = load <8 x i32>, <8 x i32> addrspace(2)* %50, align 32, !tbaa !0
%52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%53 = load <4 x i32>, <4 x i32> addrspace(2)* %52, align 16, !tbaa !0
%54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%55 = load <8 x i32>, <8 x i32> addrspace(2)* %54, align 32, !tbaa !0
%56 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%57 = load <4 x i32>, <4 x i32> addrspace(2)* %56, align 16, !tbaa !0
%58 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%59 = load <8 x i32>, <8 x i32> addrspace(2)* %58, align 32, !tbaa !0
%60 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%61 = load <4 x i32>, <4 x i32> addrspace(2)* %60, align 16, !tbaa !0
%62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%63 = load <8 x i32>, <8 x i32> addrspace(2)* %62, align 32, !tbaa !0
%64 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%65 = load <4 x i32>, <4 x i32> addrspace(2)* %64, align 16, !tbaa !0
%66 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%67 = load <8 x i32>, <8 x i32> addrspace(2)* %66, align 32, !tbaa !0
%68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%69 = load <4 x i32>, <4 x i32> addrspace(2)* %68, align 16, !tbaa !0
%70 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6
%71 = load <8 x i32>, <8 x i32> addrspace(2)* %70, align 32, !tbaa !0
%72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6
%73 = load <4 x i32>, <4 x i32> addrspace(2)* %72, align 16, !tbaa !0
%74 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7
%75 = load <8 x i32>, <8 x i32> addrspace(2)* %74, align 32, !tbaa !0
%76 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7
%77 = load <4 x i32>, <4 x i32> addrspace(2)* %76, align 16, !tbaa !0
%78 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8
%79 = load <8 x i32>, <8 x i32> addrspace(2)* %78, align 32, !tbaa !0
%80 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8
%81 = load <4 x i32>, <4 x i32> addrspace(2)* %80, align 16, !tbaa !0
%82 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9
%83 = load <8 x i32>, <8 x i32> addrspace(2)* %82, align 32, !tbaa !0
%84 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9
%85 = load <4 x i32>, <4 x i32> addrspace(2)* %84, align 16, !tbaa !0
%86 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%87 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%88 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%89 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%90 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%91 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%92 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%93 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%94 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%95 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%96 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%97 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%99 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%100 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%101 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%102 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%103 = fmul float %27, %27
%104 = fmul float %28, %28
%105 = fadd float %104, %103
%106 = fmul float %29, %29
%107 = fadd float %105, %106
%108 = call float @llvm.AMDGPU.rsq.clamped.f32(float %107)
%109 = fmul float %27, %108
%110 = fmul float %28, %108
%111 = fmul float %29, %108
%112 = fsub float %24, %97
%113 = fsub float %25, %98
%114 = fsub float %26, %99
%115 = fmul float %112, %112
%116 = fmul float %113, %113
%117 = fadd float %116, %115
%118 = fmul float %114, %114
%119 = fadd float %117, %118
%120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119)
%121 = fmul float %112, %120
%122 = fmul float %113, %120
%123 = fmul float %114, %120
%124 = call float @llvm.fabs.f32(float %94)
%125 = call float @llvm.fabs.f32(float %95)
%126 = call float @llvm.fabs.f32(float %96)
%127 = fmul float %124, %124
%128 = fmul float %125, %125
%129 = fadd float %128, %127
%130 = fmul float %126, %126
%131 = fadd float %129, %130
%132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131)
%133 = fmul float %124, %132
%134 = fadd float %133, 0xBFC99999A0000000
%135 = fmul float %125, %132
%136 = fadd float %135, 0xBFC99999A0000000
%137 = fmul float %126, %132
%138 = fadd float %137, 0xBFC99999A0000000
%139 = fmul float %134, 7.000000e+00
%140 = fmul float %136, 7.000000e+00
%141 = fmul float %138, 7.000000e+00
%142 = call float @llvm.maxnum.f32(float %139, float 0x3F847AE140000000)
%143 = call float @llvm.maxnum.f32(float %140, float 0x3F847AE140000000)
%144 = call float @llvm.maxnum.f32(float %141, float 0x3F847AE140000000)
%145 = fadd float %142, %143
%146 = fadd float %145, %144
%147 = fdiv float 1.000000e+00, %146
%148 = fmul float %142, %147
%149 = fmul float %143, %147
%150 = fmul float %144, %147
%151 = fadd float %86, 5.000000e-01
%152 = fadd float %87, 5.000000e-01
%153 = fadd float %88, 5.000000e-01
%154 = call float @llvm.floor.f32(float %151)
%155 = call float @llvm.floor.f32(float %152)
%156 = call float @llvm.floor.f32(float %153)
%157 = fmul float %154, %33
%158 = call float @llvm.floor.f32(float %157)
%159 = fmul float %158, %33
%160 = fcmp ult float %154, 6.400000e+01
br i1 %160, label %ELSE, label %IF
IF: ; preds = %main_body
%161 = fadd float %154, -6.400000e+01
%162 = fmul float %161, %34
%163 = call float @llvm.floor.f32(float %162)
%164 = fmul float %163, %34
%165 = call float @llvm.floor.f32(float %162)
%166 = fsub float %162, %165
%167 = call float @llvm.floor.f32(float %164)
%168 = fsub float %164, %167
%169 = call float @llvm.floor.f32(float %164)
%170 = fadd float %169, 4.000000e+00
br label %ENDIF
ELSE: ; preds = %main_body
%171 = call float @llvm.floor.f32(float %157)
%172 = fsub float %157, %171
%173 = call float @llvm.floor.f32(float %159)
%174 = fsub float %159, %173
%175 = call float @llvm.floor.f32(float %159)
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp36.0 = phi float [ %166, %IF ], [ %172, %ELSE ]
%temp37.0 = phi float [ %168, %IF ], [ %174, %ELSE ]
%temp38.0 = phi float [ %170, %IF ], [ %175, %ELSE ]
%temp24.0 = phi float [ %34, %IF ], [ %33, %ELSE ]
%176 = fmul float %155, %33
%177 = call float @llvm.floor.f32(float %176)
%178 = fmul float %177, %33
%179 = fcmp ult float %155, 6.400000e+01
br i1 %179, label %ELSE158, label %IF157
IF157: ; preds = %ENDIF
%180 = fadd float %155, -6.400000e+01
%181 = fmul float %180, %34
%182 = call float @llvm.floor.f32(float %181)
%183 = fmul float %182, %34
%184 = call float @llvm.floor.f32(float %181)
%185 = fsub float %181, %184
%186 = call float @llvm.floor.f32(float %183)
%187 = fsub float %183, %186
%188 = call float @llvm.floor.f32(float %183)
%189 = fadd float %188, 4.000000e+00
br label %ENDIF156
ELSE158: ; preds = %ENDIF
%190 = call float @llvm.floor.f32(float %176)
%191 = fsub float %176, %190
%192 = call float @llvm.floor.f32(float %178)
%193 = fsub float %178, %192
%194 = call float @llvm.floor.f32(float %178)
br label %ENDIF156
ENDIF156: ; preds = %ELSE158, %IF157
%temp44.0 = phi float [ %185, %IF157 ], [ %191, %ELSE158 ]
%temp45.0 = phi float [ %187, %IF157 ], [ %193, %ELSE158 ]
%temp46.0 = phi float [ %189, %IF157 ], [ %194, %ELSE158 ]
%temp28.0 = phi float [ %34, %IF157 ], [ %33, %ELSE158 ]
%195 = fmul float %156, %33
%196 = call float @llvm.floor.f32(float %195)
%197 = fmul float %196, %33
%198 = fcmp ult float %156, 6.400000e+01
br i1 %198, label %ELSE161, label %IF160
IF160: ; preds = %ENDIF156
%199 = fadd float %156, -6.400000e+01
%200 = fmul float %199, %34
%201 = call float @llvm.floor.f32(float %200)
%202 = fmul float %201, %34
%203 = call float @llvm.floor.f32(float %200)
%204 = fsub float %200, %203
%205 = call float @llvm.floor.f32(float %202)
%206 = fsub float %202, %205
%207 = call float @llvm.floor.f32(float %202)
%208 = fadd float %207, 4.000000e+00
br label %ENDIF159
ELSE161: ; preds = %ENDIF156
%209 = call float @llvm.floor.f32(float %195)
%210 = fsub float %195, %209
%211 = call float @llvm.floor.f32(float %197)
%212 = fsub float %197, %211
%213 = call float @llvm.floor.f32(float %197)
br label %ENDIF159
ENDIF159: ; preds = %ELSE161, %IF160
%temp32.0 = phi float [ %34, %IF160 ], [ %33, %ELSE161 ]
%temp22.0 = phi float [ %208, %IF160 ], [ %213, %ELSE161 ]
%temp21.0 = phi float [ %206, %IF160 ], [ %212, %ELSE161 ]
%temp20.0 = phi float [ %204, %IF160 ], [ %210, %ELSE161 ]
%214 = fsub float %97, %24
%215 = fsub float %98, %25
%216 = fsub float %99, %26
%217 = fmul float %214, %214
%218 = fmul float %215, %215
%219 = fadd float %218, %217
%220 = fmul float %216, %216
%221 = fadd float %219, %220
%222 = fmul float %39, %221
%223 = call float @llvm.log2.f32(float %222)
%224 = fmul float %223, 0x3FE62E4300000000
%225 = fmul float %224, %38
%226 = fcmp une float %33, %temp24.0
%.sink216 = select i1 %226, float %36, float %35
%temp52.0 = select i1 %226, float 1.953125e-03, float 3.906250e-03
%227 = fdiv float 1.000000e+00, %.sink216
%228 = fmul float %97, %227
%229 = fmul float %98, %227
%230 = call float @llvm.floor.f32(float %228)
%231 = fsub float %228, %230
%232 = call float @llvm.floor.f32(float %229)
%233 = fsub float %229, %232
%234 = fmul float %37, 2.000000e+00
%235 = fmul float %234, %temp52.0
%236 = fsub float 1.000000e+00, %235
%237 = fmul float %temp52.0, %37
%238 = fmul float %231, %236
%239 = fadd float %238, %237
%240 = fmul float %233, %236
%241 = fadd float %240, %237
%242 = fmul float %239, %temp24.0
%243 = fadd float %242, %temp36.0
%244 = fmul float %241, %temp24.0
%245 = fadd float %244, %temp37.0
%246 = bitcast float %243 to i32
%247 = bitcast float %245 to i32
%248 = bitcast float %225 to i32
%249 = insertelement <4 x i32> undef, i32 %246, i32 0
%250 = insertelement <4 x i32> %249, i32 %247, i32 1
%251 = insertelement <4 x i32> %250, i32 %248, i32 2
%252 = bitcast <8 x i32> %79 to <32 x i8>
%253 = bitcast <4 x i32> %81 to <16 x i8>
%254 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %251, <32 x i8> %252, <16 x i8> %253, i32 2)
%255 = extractelement <4 x float> %254, i32 0
%256 = extractelement <4 x float> %254, i32 1
%257 = extractelement <4 x float> %254, i32 2
%258 = fcmp oeq float %temp38.0, 4.000000e+00
%259 = select i1 %258, float 1.000000e+00, float 0.000000e+00
%260 = bitcast float %243 to i32
%261 = bitcast float %245 to i32
%262 = bitcast float %225 to i32
%263 = insertelement <4 x i32> undef, i32 %260, i32 0
%264 = insertelement <4 x i32> %263, i32 %261, i32 1
%265 = insertelement <4 x i32> %264, i32 %262, i32 2
%266 = bitcast <8 x i32> %71 to <32 x i8>
%267 = bitcast <4 x i32> %73 to <16 x i8>
%268 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %265, <32 x i8> %266, <16 x i8> %267, i32 2)
%269 = extractelement <4 x float> %268, i32 0
%270 = extractelement <4 x float> %268, i32 1
%271 = extractelement <4 x float> %268, i32 2
%272 = fcmp oeq float %temp38.0, 3.000000e+00
%273 = select i1 %272, float 1.000000e+00, float 0.000000e+00
%274 = bitcast float %243 to i32
%275 = bitcast float %245 to i32
%276 = bitcast float %225 to i32
%277 = insertelement <4 x i32> undef, i32 %274, i32 0
%278 = insertelement <4 x i32> %277, i32 %275, i32 1
%279 = insertelement <4 x i32> %278, i32 %276, i32 2
%280 = bitcast <8 x i32> %63 to <32 x i8>
%281 = bitcast <4 x i32> %65 to <16 x i8>
%282 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %279, <32 x i8> %280, <16 x i8> %281, i32 2)
%283 = extractelement <4 x float> %282, i32 0
%284 = extractelement <4 x float> %282, i32 1
%285 = extractelement <4 x float> %282, i32 2
%286 = fcmp oeq float %temp38.0, 2.000000e+00
%287 = select i1 %286, float 1.000000e+00, float 0.000000e+00
%288 = bitcast float %243 to i32
%289 = bitcast float %245 to i32
%290 = bitcast float %225 to i32
%291 = insertelement <4 x i32> undef, i32 %288, i32 0
%292 = insertelement <4 x i32> %291, i32 %289, i32 1
%293 = insertelement <4 x i32> %292, i32 %290, i32 2
%294 = bitcast <8 x i32> %55 to <32 x i8>
%295 = bitcast <4 x i32> %57 to <16 x i8>
%296 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %293, <32 x i8> %294, <16 x i8> %295, i32 2)
%297 = extractelement <4 x float> %296, i32 0
%298 = extractelement <4 x float> %296, i32 1
%299 = extractelement <4 x float> %296, i32 2
%300 = fcmp oeq float %temp38.0, 1.000000e+00
%301 = select i1 %300, float 1.000000e+00, float 0.000000e+00
%302 = bitcast float %243 to i32
%303 = bitcast float %245 to i32
%304 = bitcast float %225 to i32
%305 = insertelement <4 x i32> undef, i32 %302, i32 0
%306 = insertelement <4 x i32> %305, i32 %303, i32 1
%307 = insertelement <4 x i32> %306, i32 %304, i32 2
%308 = bitcast <8 x i32> %47 to <32 x i8>
%309 = bitcast <4 x i32> %49 to <16 x i8>
%310 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %307, <32 x i8> %308, <16 x i8> %309, i32 2)
%311 = extractelement <4 x float> %310, i32 0
%312 = extractelement <4 x float> %310, i32 1
%313 = extractelement <4 x float> %310, i32 2
%314 = fcmp oeq float %temp38.0, 0.000000e+00
%315 = select i1 %314, float 1.000000e+00, float 0.000000e+00
%316 = fmul float %311, %315
%317 = fmul float %312, %315
%318 = fmul float %313, %315
%319 = fmul float %297, %301
%320 = fadd float %319, %316
%321 = fmul float %298, %301
%322 = fadd float %321, %317
%323 = fmul float %299, %301
%324 = fadd float %323, %318
%325 = fmul float %283, %287
%326 = fadd float %325, %320
%327 = fmul float %284, %287
%328 = fadd float %327, %322
%329 = fmul float %285, %287
%330 = fadd float %329, %324
%331 = fmul float %269, %273
%332 = fadd float %331, %326
%333 = fmul float %270, %273
%334 = fadd float %333, %328
%335 = fmul float %271, %273
%336 = fadd float %335, %330
%337 = fmul float %255, %259
%338 = fadd float %337, %332
%339 = fmul float %256, %259
%340 = fadd float %339, %334
%341 = fmul float %257, %259
%342 = fadd float %341, %336
%343 = fcmp une float %33, %temp24.0
%.sink217 = select i1 %343, float %36, float %35
%temp56.0 = select i1 %343, float 1.953125e-03, float 3.906250e-03
%344 = fdiv float 1.000000e+00, %.sink217
%345 = fmul float %99, %344
%346 = fmul float %98, %344
%347 = call float @llvm.floor.f32(float %345)
%348 = fsub float %345, %347
%349 = call float @llvm.floor.f32(float %346)
%350 = fsub float %346, %349
%351 = fmul float %37, 2.000000e+00
%352 = fmul float %351, %temp56.0
%353 = fsub float 1.000000e+00, %352
%354 = fmul float %temp56.0, %37
%355 = fmul float %348, %353
%356 = fadd float %355, %354
%357 = fmul float %350, %353
%358 = fadd float %357, %354
%359 = fmul float %356, %temp24.0
%360 = fadd float %359, %temp36.0
%361 = fmul float %358, %temp24.0
%362 = fadd float %361, %temp37.0
%363 = bitcast float %360 to i32
%364 = bitcast float %362 to i32
%365 = bitcast float %225 to i32
%366 = insertelement <4 x i32> undef, i32 %363, i32 0
%367 = insertelement <4 x i32> %366, i32 %364, i32 1
%368 = insertelement <4 x i32> %367, i32 %365, i32 2
%369 = bitcast <8 x i32> %79 to <32 x i8>
%370 = bitcast <4 x i32> %81 to <16 x i8>
%371 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %368, <32 x i8> %369, <16 x i8> %370, i32 2)
%372 = extractelement <4 x float> %371, i32 0
%373 = extractelement <4 x float> %371, i32 1
%374 = extractelement <4 x float> %371, i32 2
%375 = fcmp oeq float %temp38.0, 4.000000e+00
%376 = select i1 %375, float 1.000000e+00, float 0.000000e+00
%377 = bitcast float %360 to i32
%378 = bitcast float %362 to i32
%379 = bitcast float %225 to i32
%380 = insertelement <4 x i32> undef, i32 %377, i32 0
%381 = insertelement <4 x i32> %380, i32 %378, i32 1
%382 = insertelement <4 x i32> %381, i32 %379, i32 2
%383 = bitcast <8 x i32> %71 to <32 x i8>
%384 = bitcast <4 x i32> %73 to <16 x i8>
%385 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %382, <32 x i8> %383, <16 x i8> %384, i32 2)
%386 = extractelement <4 x float> %385, i32 0
%387 = extractelement <4 x float> %385, i32 1
%388 = extractelement <4 x float> %385, i32 2
%389 = fcmp oeq float %temp38.0, 3.000000e+00
%390 = select i1 %389, float 1.000000e+00, float 0.000000e+00
%391 = bitcast float %360 to i32
%392 = bitcast float %362 to i32
%393 = bitcast float %225 to i32
%394 = insertelement <4 x i32> undef, i32 %391, i32 0
%395 = insertelement <4 x i32> %394, i32 %392, i32 1
%396 = insertelement <4 x i32> %395, i32 %393, i32 2
%397 = bitcast <8 x i32> %63 to <32 x i8>
%398 = bitcast <4 x i32> %65 to <16 x i8>
%399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2)
%400 = extractelement <4 x float> %399, i32 0
%401 = extractelement <4 x float> %399, i32 1
%402 = extractelement <4 x float> %399, i32 2
%403 = fcmp oeq float %temp38.0, 2.000000e+00
%404 = select i1 %403, float 1.000000e+00, float 0.000000e+00
%405 = bitcast float %360 to i32
%406 = bitcast float %362 to i32
%407 = bitcast float %225 to i32
%408 = insertelement <4 x i32> undef, i32 %405, i32 0
%409 = insertelement <4 x i32> %408, i32 %406, i32 1
%410 = insertelement <4 x i32> %409, i32 %407, i32 2
%411 = bitcast <8 x i32> %55 to <32 x i8>
%412 = bitcast <4 x i32> %57 to <16 x i8>
%413 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %410, <32 x i8> %411, <16 x i8> %412, i32 2)
%414 = extractelement <4 x float> %413, i32 0
%415 = extractelement <4 x float> %413, i32 1
%416 = extractelement <4 x float> %413, i32 2
%417 = fcmp oeq float %temp38.0, 1.000000e+00
%418 = select i1 %417, float 1.000000e+00, float 0.000000e+00
%419 = bitcast float %360 to i32
%420 = bitcast float %362 to i32
%421 = bitcast float %225 to i32
%422 = insertelement <4 x i32> undef, i32 %419, i32 0
%423 = insertelement <4 x i32> %422, i32 %420, i32 1
%424 = insertelement <4 x i32> %423, i32 %421, i32 2
%425 = bitcast <8 x i32> %47 to <32 x i8>
%426 = bitcast <4 x i32> %49 to <16 x i8>
%427 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %424, <32 x i8> %425, <16 x i8> %426, i32 2)
%428 = extractelement <4 x float> %427, i32 0
%429 = extractelement <4 x float> %427, i32 1
%430 = extractelement <4 x float> %427, i32 2
%431 = fcmp oeq float %temp38.0, 0.000000e+00
%432 = select i1 %431, float 1.000000e+00, float 0.000000e+00
%433 = fmul float %428, %432
%434 = fmul float %429, %432
%435 = fmul float %430, %432
%436 = fmul float %414, %418
%437 = fadd float %436, %433
%438 = fmul float %415, %418
%439 = fadd float %438, %434
%440 = fmul float %416, %418
%441 = fadd float %440, %435
%442 = fmul float %400, %404
%443 = fadd float %442, %437
%444 = fmul float %401, %404
%445 = fadd float %444, %439
%446 = fmul float %402, %404
%447 = fadd float %446, %441
%448 = fmul float %386, %390
%449 = fadd float %448, %443
%450 = fmul float %387, %390
%451 = fadd float %450, %445
%452 = fmul float %388, %390
%453 = fadd float %452, %447
%454 = fmul float %372, %376
%455 = fadd float %454, %449
%456 = fmul float %373, %376
%457 = fadd float %456, %451
%458 = fmul float %374, %376
%459 = fadd float %458, %453
%460 = fcmp une float %33, %temp24.0
%.sink218 = select i1 %460, float %36, float %35
%temp60.0 = select i1 %460, float 1.953125e-03, float 3.906250e-03
%461 = fdiv float 1.000000e+00, %.sink218
%462 = fmul float %99, %461
%463 = fmul float %97, %461
%464 = call float @llvm.floor.f32(float %462)
%465 = fsub float %462, %464
%466 = call float @llvm.floor.f32(float %463)
%467 = fsub float %463, %466
%468 = fmul float %37, 2.000000e+00
%469 = fmul float %468, %temp60.0
%470 = fsub float 1.000000e+00, %469
%471 = fmul float %temp60.0, %37
%472 = fmul float %465, %470
%473 = fadd float %472, %471
%474 = fmul float %467, %470
%475 = fadd float %474, %471
%476 = fmul float %473, %temp24.0
%477 = fadd float %476, %temp36.0
%478 = fmul float %475, %temp24.0
%479 = fadd float %478, %temp37.0
%480 = bitcast float %477 to i32
%481 = bitcast float %479 to i32
%482 = bitcast float %225 to i32
%483 = insertelement <4 x i32> undef, i32 %480, i32 0
%484 = insertelement <4 x i32> %483, i32 %481, i32 1
%485 = insertelement <4 x i32> %484, i32 %482, i32 2
%486 = bitcast <8 x i32> %79 to <32 x i8>
%487 = bitcast <4 x i32> %81 to <16 x i8>
%488 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %485, <32 x i8> %486, <16 x i8> %487, i32 2)
%489 = extractelement <4 x float> %488, i32 0
%490 = extractelement <4 x float> %488, i32 1
%491 = extractelement <4 x float> %488, i32 2
%492 = fcmp oeq float %temp38.0, 4.000000e+00
%493 = select i1 %492, float 1.000000e+00, float 0.000000e+00
%494 = bitcast float %477 to i32
%495 = bitcast float %479 to i32
%496 = bitcast float %225 to i32
%497 = insertelement <4 x i32> undef, i32 %494, i32 0
%498 = insertelement <4 x i32> %497, i32 %495, i32 1
%499 = insertelement <4 x i32> %498, i32 %496, i32 2
%500 = bitcast <8 x i32> %71 to <32 x i8>
%501 = bitcast <4 x i32> %73 to <16 x i8>
%502 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %499, <32 x i8> %500, <16 x i8> %501, i32 2)
%503 = extractelement <4 x float> %502, i32 0
%504 = extractelement <4 x float> %502, i32 1
%505 = extractelement <4 x float> %502, i32 2
%506 = fcmp oeq float %temp38.0, 3.000000e+00
%507 = select i1 %506, float 1.000000e+00, float 0.000000e+00
%508 = bitcast float %477 to i32
%509 = bitcast float %479 to i32
%510 = bitcast float %225 to i32
%511 = insertelement <4 x i32> undef, i32 %508, i32 0
%512 = insertelement <4 x i32> %511, i32 %509, i32 1
%513 = insertelement <4 x i32> %512, i32 %510, i32 2
%514 = bitcast <8 x i32> %63 to <32 x i8>
%515 = bitcast <4 x i32> %65 to <16 x i8>
%516 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %513, <32 x i8> %514, <16 x i8> %515, i32 2)
%517 = extractelement <4 x float> %516, i32 0
%518 = extractelement <4 x float> %516, i32 1
%519 = extractelement <4 x float> %516, i32 2
%520 = fcmp oeq float %temp38.0, 2.000000e+00
%521 = select i1 %520, float 1.000000e+00, float 0.000000e+00
%522 = bitcast float %477 to i32
%523 = bitcast float %479 to i32
%524 = bitcast float %225 to i32
%525 = insertelement <4 x i32> undef, i32 %522, i32 0
%526 = insertelement <4 x i32> %525, i32 %523, i32 1
%527 = insertelement <4 x i32> %526, i32 %524, i32 2
%528 = bitcast <8 x i32> %55 to <32 x i8>
%529 = bitcast <4 x i32> %57 to <16 x i8>
%530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %527, <32 x i8> %528, <16 x i8> %529, i32 2)
%531 = extractelement <4 x float> %530, i32 0
%532 = extractelement <4 x float> %530, i32 1
%533 = extractelement <4 x float> %530, i32 2
%534 = fcmp oeq float %temp38.0, 1.000000e+00
%535 = select i1 %534, float 1.000000e+00, float 0.000000e+00
%536 = bitcast float %477 to i32
%537 = bitcast float %479 to i32
%538 = bitcast float %225 to i32
%539 = insertelement <4 x i32> undef, i32 %536, i32 0
%540 = insertelement <4 x i32> %539, i32 %537, i32 1
%541 = insertelement <4 x i32> %540, i32 %538, i32 2
%542 = bitcast <8 x i32> %47 to <32 x i8>
%543 = bitcast <4 x i32> %49 to <16 x i8>
%544 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %541, <32 x i8> %542, <16 x i8> %543, i32 2)
%545 = extractelement <4 x float> %544, i32 0
%546 = extractelement <4 x float> %544, i32 1
%547 = extractelement <4 x float> %544, i32 2
%548 = fcmp oeq float %temp38.0, 0.000000e+00
%549 = select i1 %548, float 1.000000e+00, float 0.000000e+00
%550 = fmul float %545, %549
%551 = fmul float %546, %549
%552 = fmul float %547, %549
%553 = fmul float %531, %535
%554 = fadd float %553, %550
%555 = fmul float %532, %535
%556 = fadd float %555, %551
%557 = fmul float %533, %535
%558 = fadd float %557, %552
%559 = fmul float %517, %521
%560 = fadd float %559, %554
%561 = fmul float %518, %521
%562 = fadd float %561, %556
%563 = fmul float %519, %521
%564 = fadd float %563, %558
%565 = fmul float %503, %507
%566 = fadd float %565, %560
%567 = fmul float %504, %507
%568 = fadd float %567, %562
%569 = fmul float %505, %507
%570 = fadd float %569, %564
%571 = fmul float %489, %493
%572 = fadd float %571, %566
%573 = fmul float %490, %493
%574 = fadd float %573, %568
%575 = fmul float %491, %493
%576 = fadd float %575, %570
%577 = fcmp une float %33, %temp28.0
%.sink219 = select i1 %577, float %36, float %35
%temp64.0 = select i1 %577, float 1.953125e-03, float 3.906250e-03
%578 = fdiv float 1.000000e+00, %.sink219
%579 = fmul float %97, %578
%580 = fmul float %98, %578
%581 = call float @llvm.floor.f32(float %579)
%582 = fsub float %579, %581
%583 = call float @llvm.floor.f32(float %580)
%584 = fsub float %580, %583
%585 = fmul float %37, 2.000000e+00
%586 = fmul float %585, %temp64.0
%587 = fsub float 1.000000e+00, %586
%588 = fmul float %temp64.0, %37
%589 = fmul float %582, %587
%590 = fadd float %589, %588
%591 = fmul float %584, %587
%592 = fadd float %591, %588
%593 = fmul float %590, %temp28.0
%594 = fadd float %593, %temp44.0
%595 = fmul float %592, %temp28.0
%596 = fadd float %595, %temp45.0
%597 = bitcast float %594 to i32
%598 = bitcast float %596 to i32
%599 = bitcast float %225 to i32
%600 = insertelement <4 x i32> undef, i32 %597, i32 0
%601 = insertelement <4 x i32> %600, i32 %598, i32 1
%602 = insertelement <4 x i32> %601, i32 %599, i32 2
%603 = bitcast <8 x i32> %79 to <32 x i8>
%604 = bitcast <4 x i32> %81 to <16 x i8>
%605 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %602, <32 x i8> %603, <16 x i8> %604, i32 2)
%606 = extractelement <4 x float> %605, i32 0
%607 = extractelement <4 x float> %605, i32 1
%608 = extractelement <4 x float> %605, i32 2
%609 = fcmp oeq float %temp46.0, 4.000000e+00
%610 = select i1 %609, float 1.000000e+00, float 0.000000e+00
%611 = bitcast float %594 to i32
%612 = bitcast float %596 to i32
%613 = bitcast float %225 to i32
%614 = insertelement <4 x i32> undef, i32 %611, i32 0
%615 = insertelement <4 x i32> %614, i32 %612, i32 1
%616 = insertelement <4 x i32> %615, i32 %613, i32 2
%617 = bitcast <8 x i32> %71 to <32 x i8>
%618 = bitcast <4 x i32> %73 to <16 x i8>
%619 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %616, <32 x i8> %617, <16 x i8> %618, i32 2)
%620 = extractelement <4 x float> %619, i32 0
%621 = extractelement <4 x float> %619, i32 1
%622 = extractelement <4 x float> %619, i32 2
%623 = fcmp oeq float %temp46.0, 3.000000e+00
%624 = select i1 %623, float 1.000000e+00, float 0.000000e+00
%625 = bitcast float %594 to i32
%626 = bitcast float %596 to i32
%627 = bitcast float %225 to i32
%628 = insertelement <4 x i32> undef, i32 %625, i32 0
%629 = insertelement <4 x i32> %628, i32 %626, i32 1
%630 = insertelement <4 x i32> %629, i32 %627, i32 2
%631 = bitcast <8 x i32> %63 to <32 x i8>
%632 = bitcast <4 x i32> %65 to <16 x i8>
%633 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %630, <32 x i8> %631, <16 x i8> %632, i32 2)
%634 = extractelement <4 x float> %633, i32 0
%635 = extractelement <4 x float> %633, i32 1
%636 = extractelement <4 x float> %633, i32 2
%637 = fcmp oeq float %temp46.0, 2.000000e+00
%638 = select i1 %637, float 1.000000e+00, float 0.000000e+00
%639 = bitcast float %594 to i32
%640 = bitcast float %596 to i32
%641 = bitcast float %225 to i32
%642 = insertelement <4 x i32> undef, i32 %639, i32 0
%643 = insertelement <4 x i32> %642, i32 %640, i32 1
%644 = insertelement <4 x i32> %643, i32 %641, i32 2
%645 = bitcast <8 x i32> %55 to <32 x i8>
%646 = bitcast <4 x i32> %57 to <16 x i8>
%647 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %644, <32 x i8> %645, <16 x i8> %646, i32 2)
%648 = extractelement <4 x float> %647, i32 0
%649 = extractelement <4 x float> %647, i32 1
%650 = extractelement <4 x float> %647, i32 2
%651 = fcmp oeq float %temp46.0, 1.000000e+00
%652 = select i1 %651, float 1.000000e+00, float 0.000000e+00
%653 = bitcast float %594 to i32
%654 = bitcast float %596 to i32
%655 = bitcast float %225 to i32
%656 = insertelement <4 x i32> undef, i32 %653, i32 0
%657 = insertelement <4 x i32> %656, i32 %654, i32 1
%658 = insertelement <4 x i32> %657, i32 %655, i32 2
%659 = bitcast <8 x i32> %47 to <32 x i8>
%660 = bitcast <4 x i32> %49 to <16 x i8>
%661 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %658, <32 x i8> %659, <16 x i8> %660, i32 2)
%662 = extractelement <4 x float> %661, i32 0
%663 = extractelement <4 x float> %661, i32 1
%664 = extractelement <4 x float> %661, i32 2
%665 = fcmp oeq float %temp46.0, 0.000000e+00
%666 = select i1 %665, float 1.000000e+00, float 0.000000e+00
%667 = fmul float %662, %666
%668 = fmul float %663, %666
%669 = fmul float %664, %666
%670 = fmul float %648, %652
%671 = fadd float %670, %667
%672 = fmul float %649, %652
%673 = fadd float %672, %668
%674 = fmul float %650, %652
%675 = fadd float %674, %669
%676 = fmul float %634, %638
%677 = fadd float %676, %671
%678 = fmul float %635, %638
%679 = fadd float %678, %673
%680 = fmul float %636, %638
%681 = fadd float %680, %675
%682 = fmul float %620, %624
%683 = fadd float %682, %677
%684 = fmul float %621, %624
%685 = fadd float %684, %679
%686 = fmul float %622, %624
%687 = fadd float %686, %681
%688 = fmul float %606, %610
%689 = fadd float %688, %683
%690 = fmul float %607, %610
%691 = fadd float %690, %685
%692 = fmul float %608, %610
%693 = fadd float %692, %687
%694 = fcmp une float %33, %temp28.0
%.sink220 = select i1 %694, float %36, float %35
%temp68.0 = select i1 %694, float 1.953125e-03, float 3.906250e-03
%695 = fdiv float 1.000000e+00, %.sink220
%696 = fmul float %99, %695
%697 = fmul float %98, %695
%698 = call float @llvm.floor.f32(float %696)
%699 = fsub float %696, %698
%700 = call float @llvm.floor.f32(float %697)
%701 = fsub float %697, %700
%702 = fmul float %37, 2.000000e+00
%703 = fmul float %702, %temp68.0
%704 = fsub float 1.000000e+00, %703
%705 = fmul float %temp68.0, %37
%706 = fmul float %699, %704
%707 = fadd float %706, %705
%708 = fmul float %701, %704
%709 = fadd float %708, %705
%710 = fmul float %707, %temp28.0
%711 = fadd float %710, %temp44.0
%712 = fmul float %709, %temp28.0
%713 = fadd float %712, %temp45.0
%714 = bitcast float %711 to i32
%715 = bitcast float %713 to i32
%716 = bitcast float %225 to i32
%717 = insertelement <4 x i32> undef, i32 %714, i32 0
%718 = insertelement <4 x i32> %717, i32 %715, i32 1
%719 = insertelement <4 x i32> %718, i32 %716, i32 2
%720 = bitcast <8 x i32> %79 to <32 x i8>
%721 = bitcast <4 x i32> %81 to <16 x i8>
%722 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %719, <32 x i8> %720, <16 x i8> %721, i32 2)
%723 = extractelement <4 x float> %722, i32 0
%724 = extractelement <4 x float> %722, i32 1
%725 = extractelement <4 x float> %722, i32 2
%726 = fcmp oeq float %temp46.0, 4.000000e+00
%727 = select i1 %726, float 1.000000e+00, float 0.000000e+00
%728 = bitcast float %711 to i32
%729 = bitcast float %713 to i32
%730 = bitcast float %225 to i32
%731 = insertelement <4 x i32> undef, i32 %728, i32 0
%732 = insertelement <4 x i32> %731, i32 %729, i32 1
%733 = insertelement <4 x i32> %732, i32 %730, i32 2
%734 = bitcast <8 x i32> %71 to <32 x i8>
%735 = bitcast <4 x i32> %73 to <16 x i8>
%736 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %733, <32 x i8> %734, <16 x i8> %735, i32 2)
%737 = extractelement <4 x float> %736, i32 0
%738 = extractelement <4 x float> %736, i32 1
%739 = extractelement <4 x float> %736, i32 2
%740 = fcmp oeq float %temp46.0, 3.000000e+00
%741 = select i1 %740, float 1.000000e+00, float 0.000000e+00
%742 = bitcast float %711 to i32
%743 = bitcast float %713 to i32
%744 = bitcast float %225 to i32
%745 = insertelement <4 x i32> undef, i32 %742, i32 0
%746 = insertelement <4 x i32> %745, i32 %743, i32 1
%747 = insertelement <4 x i32> %746, i32 %744, i32 2
%748 = bitcast <8 x i32> %63 to <32 x i8>
%749 = bitcast <4 x i32> %65 to <16 x i8>
%750 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %747, <32 x i8> %748, <16 x i8> %749, i32 2)
%751 = extractelement <4 x float> %750, i32 0
%752 = extractelement <4 x float> %750, i32 1
%753 = extractelement <4 x float> %750, i32 2
%754 = fcmp oeq float %temp46.0, 2.000000e+00
%755 = select i1 %754, float 1.000000e+00, float 0.000000e+00
%756 = bitcast float %711 to i32
%757 = bitcast float %713 to i32
%758 = bitcast float %225 to i32
%759 = insertelement <4 x i32> undef, i32 %756, i32 0
%760 = insertelement <4 x i32> %759, i32 %757, i32 1
%761 = insertelement <4 x i32> %760, i32 %758, i32 2
%762 = bitcast <8 x i32> %55 to <32 x i8>
%763 = bitcast <4 x i32> %57 to <16 x i8>
%764 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %761, <32 x i8> %762, <16 x i8> %763, i32 2)
%765 = extractelement <4 x float> %764, i32 0
%766 = extractelement <4 x float> %764, i32 1
%767 = extractelement <4 x float> %764, i32 2
%768 = fcmp oeq float %temp46.0, 1.000000e+00
%769 = select i1 %768, float 1.000000e+00, float 0.000000e+00
%770 = bitcast float %711 to i32
%771 = bitcast float %713 to i32
%772 = bitcast float %225 to i32
%773 = insertelement <4 x i32> undef, i32 %770, i32 0
%774 = insertelement <4 x i32> %773, i32 %771, i32 1
%775 = insertelement <4 x i32> %774, i32 %772, i32 2
%776 = bitcast <8 x i32> %47 to <32 x i8>
%777 = bitcast <4 x i32> %49 to <16 x i8>
%778 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %775, <32 x i8> %776, <16 x i8> %777, i32 2)
%779 = extractelement <4 x float> %778, i32 0
%780 = extractelement <4 x float> %778, i32 1
%781 = extractelement <4 x float> %778, i32 2
%782 = fcmp oeq float %temp46.0, 0.000000e+00
%783 = select i1 %782, float 1.000000e+00, float 0.000000e+00
%784 = fmul float %779, %783
%785 = fmul float %780, %783
%786 = fmul float %781, %783
%787 = fmul float %765, %769
%788 = fadd float %787, %784
%789 = fmul float %766, %769
%790 = fadd float %789, %785
%791 = fmul float %767, %769
%792 = fadd float %791, %786
%793 = fmul float %751, %755
%794 = fadd float %793, %788
%795 = fmul float %752, %755
%796 = fadd float %795, %790
%797 = fmul float %753, %755
%798 = fadd float %797, %792
%799 = fmul float %737, %741
%800 = fadd float %799, %794
%801 = fmul float %738, %741
%802 = fadd float %801, %796
%803 = fmul float %739, %741
%804 = fadd float %803, %798
%805 = fmul float %723, %727
%806 = fadd float %805, %800
%807 = fmul float %724, %727
%808 = fadd float %807, %802
%809 = fmul float %725, %727
%810 = fadd float %809, %804
%811 = fcmp une float %33, %temp28.0
%.sink221 = select i1 %811, float %36, float %35
%temp72.0 = select i1 %811, float 1.953125e-03, float 3.906250e-03
%812 = fdiv float 1.000000e+00, %.sink221
%813 = fmul float %99, %812
%814 = fmul float %97, %812
%815 = call float @llvm.floor.f32(float %813)
%816 = fsub float %813, %815
%817 = call float @llvm.floor.f32(float %814)
%818 = fsub float %814, %817
%819 = fmul float %37, 2.000000e+00
%820 = fmul float %819, %temp72.0
%821 = fsub float 1.000000e+00, %820
%822 = fmul float %temp72.0, %37
%823 = fmul float %816, %821
%824 = fadd float %823, %822
%825 = fmul float %818, %821
%826 = fadd float %825, %822
%827 = fmul float %824, %temp28.0
%828 = fadd float %827, %temp44.0
%829 = fmul float %826, %temp28.0
%830 = fadd float %829, %temp45.0
%831 = bitcast float %828 to i32
%832 = bitcast float %830 to i32
%833 = bitcast float %225 to i32
%834 = insertelement <4 x i32> undef, i32 %831, i32 0
%835 = insertelement <4 x i32> %834, i32 %832, i32 1
%836 = insertelement <4 x i32> %835, i32 %833, i32 2
%837 = bitcast <8 x i32> %79 to <32 x i8>
%838 = bitcast <4 x i32> %81 to <16 x i8>
%839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %836, <32 x i8> %837, <16 x i8> %838, i32 2)
%840 = extractelement <4 x float> %839, i32 0
%841 = extractelement <4 x float> %839, i32 1
%842 = extractelement <4 x float> %839, i32 2
%843 = fcmp oeq float %temp46.0, 4.000000e+00
%844 = select i1 %843, float 1.000000e+00, float 0.000000e+00
%845 = bitcast float %828 to i32
%846 = bitcast float %830 to i32
%847 = bitcast float %225 to i32
%848 = insertelement <4 x i32> undef, i32 %845, i32 0
%849 = insertelement <4 x i32> %848, i32 %846, i32 1
%850 = insertelement <4 x i32> %849, i32 %847, i32 2
%851 = bitcast <8 x i32> %71 to <32 x i8>
%852 = bitcast <4 x i32> %73 to <16 x i8>
%853 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %850, <32 x i8> %851, <16 x i8> %852, i32 2)
%854 = extractelement <4 x float> %853, i32 0
%855 = extractelement <4 x float> %853, i32 1
%856 = extractelement <4 x float> %853, i32 2
%857 = fcmp oeq float %temp46.0, 3.000000e+00
%858 = select i1 %857, float 1.000000e+00, float 0.000000e+00
%859 = bitcast float %828 to i32
%860 = bitcast float %830 to i32
%861 = bitcast float %225 to i32
%862 = insertelement <4 x i32> undef, i32 %859, i32 0
%863 = insertelement <4 x i32> %862, i32 %860, i32 1
%864 = insertelement <4 x i32> %863, i32 %861, i32 2
%865 = bitcast <8 x i32> %63 to <32 x i8>
%866 = bitcast <4 x i32> %65 to <16 x i8>
%867 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %864, <32 x i8> %865, <16 x i8> %866, i32 2)
%868 = extractelement <4 x float> %867, i32 0
%869 = extractelement <4 x float> %867, i32 1
%870 = extractelement <4 x float> %867, i32 2
%871 = fcmp oeq float %temp46.0, 2.000000e+00
%872 = select i1 %871, float 1.000000e+00, float 0.000000e+00
%873 = bitcast float %828 to i32
%874 = bitcast float %830 to i32
%875 = bitcast float %225 to i32
%876 = insertelement <4 x i32> undef, i32 %873, i32 0
%877 = insertelement <4 x i32> %876, i32 %874, i32 1
%878 = insertelement <4 x i32> %877, i32 %875, i32 2
%879 = bitcast <8 x i32> %55 to <32 x i8>
%880 = bitcast <4 x i32> %57 to <16 x i8>
%881 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %878, <32 x i8> %879, <16 x i8> %880, i32 2)
%882 = extractelement <4 x float> %881, i32 0
%883 = extractelement <4 x float> %881, i32 1
%884 = extractelement <4 x float> %881, i32 2
%885 = fcmp oeq float %temp46.0, 1.000000e+00
%886 = select i1 %885, float 1.000000e+00, float 0.000000e+00
%887 = bitcast float %828 to i32
%888 = bitcast float %830 to i32
%889 = bitcast float %225 to i32
%890 = insertelement <4 x i32> undef, i32 %887, i32 0
%891 = insertelement <4 x i32> %890, i32 %888, i32 1
%892 = insertelement <4 x i32> %891, i32 %889, i32 2
%893 = bitcast <8 x i32> %47 to <32 x i8>
%894 = bitcast <4 x i32> %49 to <16 x i8>
%895 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %892, <32 x i8> %893, <16 x i8> %894, i32 2)
%896 = extractelement <4 x float> %895, i32 0
%897 = extractelement <4 x float> %895, i32 1
%898 = extractelement <4 x float> %895, i32 2
%899 = fcmp oeq float %temp46.0, 0.000000e+00
%900 = select i1 %899, float 1.000000e+00, float 0.000000e+00
%901 = fmul float %896, %900
%902 = fmul float %897, %900
%903 = fmul float %898, %900
%904 = fmul float %882, %886
%905 = fadd float %904, %901
%906 = fmul float %883, %886
%907 = fadd float %906, %902
%908 = fmul float %884, %886
%909 = fadd float %908, %903
%910 = fmul float %868, %872
%911 = fadd float %910, %905
%912 = fmul float %869, %872
%913 = fadd float %912, %907
%914 = fmul float %870, %872
%915 = fadd float %914, %909
%916 = fmul float %854, %858
%917 = fadd float %916, %911
%918 = fmul float %855, %858
%919 = fadd float %918, %913
%920 = fmul float %856, %858
%921 = fadd float %920, %915
%922 = fmul float %840, %844
%923 = fadd float %922, %917
%924 = fmul float %841, %844
%925 = fadd float %924, %919
%926 = fmul float %842, %844
%927 = fadd float %926, %921
%928 = fcmp une float %33, %temp32.0
%.sink222 = select i1 %928, float %36, float %35
%temp76.0 = select i1 %928, float 1.953125e-03, float 3.906250e-03
%929 = fdiv float 1.000000e+00, %.sink222
%930 = fmul float %97, %929
%931 = fmul float %98, %929
%932 = call float @llvm.floor.f32(float %930)
%933 = fsub float %930, %932
%934 = call float @llvm.floor.f32(float %931)
%935 = fsub float %931, %934
%936 = fmul float %37, 2.000000e+00
%937 = fmul float %936, %temp76.0
%938 = fsub float 1.000000e+00, %937
%939 = fmul float %temp76.0, %37
%940 = fmul float %933, %938
%941 = fadd float %940, %939
%942 = fmul float %935, %938
%943 = fadd float %942, %939
%944 = fmul float %941, %temp32.0
%945 = fadd float %944, %temp20.0
%946 = fmul float %943, %temp32.0
%947 = fadd float %946, %temp21.0
%948 = bitcast float %945 to i32
%949 = bitcast float %947 to i32
%950 = bitcast float %225 to i32
%951 = insertelement <4 x i32> undef, i32 %948, i32 0
%952 = insertelement <4 x i32> %951, i32 %949, i32 1
%953 = insertelement <4 x i32> %952, i32 %950, i32 2
%954 = bitcast <8 x i32> %79 to <32 x i8>
%955 = bitcast <4 x i32> %81 to <16 x i8>
%956 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %953, <32 x i8> %954, <16 x i8> %955, i32 2)
%957 = extractelement <4 x float> %956, i32 0
%958 = extractelement <4 x float> %956, i32 1
%959 = extractelement <4 x float> %956, i32 2
%960 = fcmp oeq float %temp22.0, 4.000000e+00
%961 = select i1 %960, float 1.000000e+00, float 0.000000e+00
%962 = bitcast float %945 to i32
%963 = bitcast float %947 to i32
%964 = bitcast float %225 to i32
%965 = insertelement <4 x i32> undef, i32 %962, i32 0
%966 = insertelement <4 x i32> %965, i32 %963, i32 1
%967 = insertelement <4 x i32> %966, i32 %964, i32 2
%968 = bitcast <8 x i32> %71 to <32 x i8>
%969 = bitcast <4 x i32> %73 to <16 x i8>
%970 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %967, <32 x i8> %968, <16 x i8> %969, i32 2)
%971 = extractelement <4 x float> %970, i32 0
%972 = extractelement <4 x float> %970, i32 1
%973 = extractelement <4 x float> %970, i32 2
%974 = fcmp oeq float %temp22.0, 3.000000e+00
%975 = select i1 %974, float 1.000000e+00, float 0.000000e+00
%976 = bitcast float %945 to i32
%977 = bitcast float %947 to i32
%978 = bitcast float %225 to i32
%979 = insertelement <4 x i32> undef, i32 %976, i32 0
%980 = insertelement <4 x i32> %979, i32 %977, i32 1
%981 = insertelement <4 x i32> %980, i32 %978, i32 2
%982 = bitcast <8 x i32> %63 to <32 x i8>
%983 = bitcast <4 x i32> %65 to <16 x i8>
%984 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %981, <32 x i8> %982, <16 x i8> %983, i32 2)
%985 = extractelement <4 x float> %984, i32 0
%986 = extractelement <4 x float> %984, i32 1
%987 = extractelement <4 x float> %984, i32 2
%988 = fcmp oeq float %temp22.0, 2.000000e+00
%989 = select i1 %988, float 1.000000e+00, float 0.000000e+00
%990 = bitcast float %945 to i32
%991 = bitcast float %947 to i32
%992 = bitcast float %225 to i32
%993 = insertelement <4 x i32> undef, i32 %990, i32 0
%994 = insertelement <4 x i32> %993, i32 %991, i32 1
%995 = insertelement <4 x i32> %994, i32 %992, i32 2
%996 = bitcast <8 x i32> %55 to <32 x i8>
%997 = bitcast <4 x i32> %57 to <16 x i8>
%998 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %995, <32 x i8> %996, <16 x i8> %997, i32 2)
%999 = extractelement <4 x float> %998, i32 0
%1000 = extractelement <4 x float> %998, i32 1
%1001 = extractelement <4 x float> %998, i32 2
%1002 = fcmp oeq float %temp22.0, 1.000000e+00
%1003 = select i1 %1002, float 1.000000e+00, float 0.000000e+00
%1004 = bitcast float %945 to i32
%1005 = bitcast float %947 to i32
%1006 = bitcast float %225 to i32
%1007 = insertelement <4 x i32> undef, i32 %1004, i32 0
%1008 = insertelement <4 x i32> %1007, i32 %1005, i32 1
%1009 = insertelement <4 x i32> %1008, i32 %1006, i32 2
%1010 = bitcast <8 x i32> %47 to <32 x i8>
%1011 = bitcast <4 x i32> %49 to <16 x i8>
%1012 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1009, <32 x i8> %1010, <16 x i8> %1011, i32 2)
%1013 = extractelement <4 x float> %1012, i32 0
%1014 = extractelement <4 x float> %1012, i32 1
%1015 = extractelement <4 x float> %1012, i32 2
%1016 = fcmp oeq float %temp22.0, 0.000000e+00
%1017 = select i1 %1016, float 1.000000e+00, float 0.000000e+00
%1018 = fmul float %1013, %1017
%1019 = fmul float %1014, %1017
%1020 = fmul float %1015, %1017
%1021 = fmul float %999, %1003
%1022 = fadd float %1021, %1018
%1023 = fmul float %1000, %1003
%1024 = fadd float %1023, %1019
%1025 = fmul float %1001, %1003
%1026 = fadd float %1025, %1020
%1027 = fmul float %985, %989
%1028 = fadd float %1027, %1022
%1029 = fmul float %986, %989
%1030 = fadd float %1029, %1024
%1031 = fmul float %987, %989
%1032 = fadd float %1031, %1026
%1033 = fmul float %971, %975
%1034 = fadd float %1033, %1028
%1035 = fmul float %972, %975
%1036 = fadd float %1035, %1030
%1037 = fmul float %973, %975
%1038 = fadd float %1037, %1032
%1039 = fmul float %957, %961
%1040 = fadd float %1039, %1034
%1041 = fmul float %958, %961
%1042 = fadd float %1041, %1036
%1043 = fmul float %959, %961
%1044 = fadd float %1043, %1038
%1045 = fcmp une float %33, %temp32.0
%.sink223 = select i1 %1045, float %36, float %35
%temp80.0 = select i1 %1045, float 1.953125e-03, float 3.906250e-03
%1046 = fdiv float 1.000000e+00, %.sink223
%1047 = fmul float %99, %1046
%1048 = fmul float %98, %1046
%1049 = call float @llvm.floor.f32(float %1047)
%1050 = fsub float %1047, %1049
%1051 = call float @llvm.floor.f32(float %1048)
%1052 = fsub float %1048, %1051
%1053 = fmul float %37, 2.000000e+00
%1054 = fmul float %1053, %temp80.0
%1055 = fsub float 1.000000e+00, %1054
%1056 = fmul float %temp80.0, %37
%1057 = fmul float %1050, %1055
%1058 = fadd float %1057, %1056
%1059 = fmul float %1052, %1055
%1060 = fadd float %1059, %1056
%1061 = fmul float %1058, %temp32.0
%1062 = fadd float %1061, %temp20.0
%1063 = fmul float %1060, %temp32.0
%1064 = fadd float %1063, %temp21.0
%1065 = bitcast float %1062 to i32
%1066 = bitcast float %1064 to i32
%1067 = bitcast float %225 to i32
%1068 = insertelement <4 x i32> undef, i32 %1065, i32 0
%1069 = insertelement <4 x i32> %1068, i32 %1066, i32 1
%1070 = insertelement <4 x i32> %1069, i32 %1067, i32 2
%1071 = bitcast <8 x i32> %79 to <32 x i8>
%1072 = bitcast <4 x i32> %81 to <16 x i8>
%1073 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1070, <32 x i8> %1071, <16 x i8> %1072, i32 2)
%1074 = extractelement <4 x float> %1073, i32 0
%1075 = extractelement <4 x float> %1073, i32 1
%1076 = extractelement <4 x float> %1073, i32 2
%1077 = fcmp oeq float %temp22.0, 4.000000e+00
%1078 = select i1 %1077, float 1.000000e+00, float 0.000000e+00
%1079 = bitcast float %1062 to i32
%1080 = bitcast float %1064 to i32
%1081 = bitcast float %225 to i32
%1082 = insertelement <4 x i32> undef, i32 %1079, i32 0
%1083 = insertelement <4 x i32> %1082, i32 %1080, i32 1
%1084 = insertelement <4 x i32> %1083, i32 %1081, i32 2
%1085 = bitcast <8 x i32> %71 to <32 x i8>
%1086 = bitcast <4 x i32> %73 to <16 x i8>
%1087 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1084, <32 x i8> %1085, <16 x i8> %1086, i32 2)
%1088 = extractelement <4 x float> %1087, i32 0
%1089 = extractelement <4 x float> %1087, i32 1
%1090 = extractelement <4 x float> %1087, i32 2
%1091 = fcmp oeq float %temp22.0, 3.000000e+00
%1092 = select i1 %1091, float 1.000000e+00, float 0.000000e+00
%1093 = bitcast float %1062 to i32
%1094 = bitcast float %1064 to i32
%1095 = bitcast float %225 to i32
%1096 = insertelement <4 x i32> undef, i32 %1093, i32 0
%1097 = insertelement <4 x i32> %1096, i32 %1094, i32 1
%1098 = insertelement <4 x i32> %1097, i32 %1095, i32 2
%1099 = bitcast <8 x i32> %63 to <32 x i8>
%1100 = bitcast <4 x i32> %65 to <16 x i8>
%1101 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1098, <32 x i8> %1099, <16 x i8> %1100, i32 2)
%1102 = extractelement <4 x float> %1101, i32 0
%1103 = extractelement <4 x float> %1101, i32 1
%1104 = extractelement <4 x float> %1101, i32 2
%1105 = fcmp oeq float %temp22.0, 2.000000e+00
%1106 = select i1 %1105, float 1.000000e+00, float 0.000000e+00
%1107 = bitcast float %1062 to i32
%1108 = bitcast float %1064 to i32
%1109 = bitcast float %225 to i32
%1110 = insertelement <4 x i32> undef, i32 %1107, i32 0
%1111 = insertelement <4 x i32> %1110, i32 %1108, i32 1
%1112 = insertelement <4 x i32> %1111, i32 %1109, i32 2
%1113 = bitcast <8 x i32> %55 to <32 x i8>
%1114 = bitcast <4 x i32> %57 to <16 x i8>
%1115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1112, <32 x i8> %1113, <16 x i8> %1114, i32 2)
%1116 = extractelement <4 x float> %1115, i32 0
%1117 = extractelement <4 x float> %1115, i32 1
%1118 = extractelement <4 x float> %1115, i32 2
%1119 = fcmp oeq float %temp22.0, 1.000000e+00
%1120 = select i1 %1119, float 1.000000e+00, float 0.000000e+00
%1121 = bitcast float %1062 to i32
%1122 = bitcast float %1064 to i32
%1123 = bitcast float %225 to i32
%1124 = insertelement <4 x i32> undef, i32 %1121, i32 0
%1125 = insertelement <4 x i32> %1124, i32 %1122, i32 1
%1126 = insertelement <4 x i32> %1125, i32 %1123, i32 2
%1127 = bitcast <8 x i32> %47 to <32 x i8>
%1128 = bitcast <4 x i32> %49 to <16 x i8>
%1129 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1126, <32 x i8> %1127, <16 x i8> %1128, i32 2)
%1130 = extractelement <4 x float> %1129, i32 0
%1131 = extractelement <4 x float> %1129, i32 1
%1132 = extractelement <4 x float> %1129, i32 2
%1133 = fcmp oeq float %temp22.0, 0.000000e+00
%1134 = select i1 %1133, float 1.000000e+00, float 0.000000e+00
%1135 = fmul float %1130, %1134
%1136 = fmul float %1131, %1134
%1137 = fmul float %1132, %1134
%1138 = fmul float %1116, %1120
%1139 = fadd float %1138, %1135
%1140 = fmul float %1117, %1120
%1141 = fadd float %1140, %1136
%1142 = fmul float %1118, %1120
%1143 = fadd float %1142, %1137
%1144 = fmul float %1102, %1106
%1145 = fadd float %1144, %1139
%1146 = fmul float %1103, %1106
%1147 = fadd float %1146, %1141
%1148 = fmul float %1104, %1106
%1149 = fadd float %1148, %1143
%1150 = fmul float %1088, %1092
%1151 = fadd float %1150, %1145
%1152 = fmul float %1089, %1092
%1153 = fadd float %1152, %1147
%1154 = fmul float %1090, %1092
%1155 = fadd float %1154, %1149
%1156 = fmul float %1074, %1078
%1157 = fadd float %1156, %1151
%1158 = fmul float %1075, %1078
%1159 = fadd float %1158, %1153
%1160 = fmul float %1076, %1078
%1161 = fadd float %1160, %1155
%1162 = fcmp une float %33, %temp32.0
%.sink224 = select i1 %1162, float %36, float %35
%temp84.0 = select i1 %1162, float 1.953125e-03, float 3.906250e-03
%1163 = fdiv float 1.000000e+00, %.sink224
%1164 = fmul float %99, %1163
%1165 = fmul float %97, %1163
%1166 = call float @llvm.floor.f32(float %1164)
%1167 = fsub float %1164, %1166
%1168 = call float @llvm.floor.f32(float %1165)
%1169 = fsub float %1165, %1168
%1170 = fmul float %37, 2.000000e+00
%1171 = fmul float %1170, %temp84.0
%1172 = fsub float 1.000000e+00, %1171
%1173 = fmul float %temp84.0, %37
%1174 = fmul float %1167, %1172
%1175 = fadd float %1174, %1173
%1176 = fmul float %1169, %1172
%1177 = fadd float %1176, %1173
%1178 = fmul float %1175, %temp32.0
%1179 = fadd float %1178, %temp20.0
%1180 = fmul float %1177, %temp32.0
%1181 = fadd float %1180, %temp21.0
%1182 = bitcast float %1179 to i32
%1183 = bitcast float %1181 to i32
%1184 = bitcast float %225 to i32
%1185 = insertelement <4 x i32> undef, i32 %1182, i32 0
%1186 = insertelement <4 x i32> %1185, i32 %1183, i32 1
%1187 = insertelement <4 x i32> %1186, i32 %1184, i32 2
%1188 = bitcast <8 x i32> %79 to <32 x i8>
%1189 = bitcast <4 x i32> %81 to <16 x i8>
%1190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1187, <32 x i8> %1188, <16 x i8> %1189, i32 2)
%1191 = extractelement <4 x float> %1190, i32 0
%1192 = extractelement <4 x float> %1190, i32 1
%1193 = extractelement <4 x float> %1190, i32 2
%1194 = fcmp oeq float %temp22.0, 4.000000e+00
%1195 = select i1 %1194, float 1.000000e+00, float 0.000000e+00
%1196 = bitcast float %1179 to i32
%1197 = bitcast float %1181 to i32
%1198 = bitcast float %225 to i32
%1199 = insertelement <4 x i32> undef, i32 %1196, i32 0
%1200 = insertelement <4 x i32> %1199, i32 %1197, i32 1
%1201 = insertelement <4 x i32> %1200, i32 %1198, i32 2
%1202 = bitcast <8 x i32> %71 to <32 x i8>
%1203 = bitcast <4 x i32> %73 to <16 x i8>
%1204 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1201, <32 x i8> %1202, <16 x i8> %1203, i32 2)
%1205 = extractelement <4 x float> %1204, i32 0
%1206 = extractelement <4 x float> %1204, i32 1
%1207 = extractelement <4 x float> %1204, i32 2
%1208 = fcmp oeq float %temp22.0, 3.000000e+00
%1209 = select i1 %1208, float 1.000000e+00, float 0.000000e+00
%1210 = bitcast float %1179 to i32
%1211 = bitcast float %1181 to i32
%1212 = bitcast float %225 to i32
%1213 = insertelement <4 x i32> undef, i32 %1210, i32 0
%1214 = insertelement <4 x i32> %1213, i32 %1211, i32 1
%1215 = insertelement <4 x i32> %1214, i32 %1212, i32 2
%1216 = bitcast <8 x i32> %63 to <32 x i8>
%1217 = bitcast <4 x i32> %65 to <16 x i8>
%1218 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1215, <32 x i8> %1216, <16 x i8> %1217, i32 2)
%1219 = extractelement <4 x float> %1218, i32 0
%1220 = extractelement <4 x float> %1218, i32 1
%1221 = extractelement <4 x float> %1218, i32 2
%1222 = fcmp oeq float %temp22.0, 2.000000e+00
%1223 = select i1 %1222, float 1.000000e+00, float 0.000000e+00
%1224 = bitcast float %1179 to i32
%1225 = bitcast float %1181 to i32
%1226 = bitcast float %225 to i32
%1227 = insertelement <4 x i32> undef, i32 %1224, i32 0
%1228 = insertelement <4 x i32> %1227, i32 %1225, i32 1
%1229 = insertelement <4 x i32> %1228, i32 %1226, i32 2
%1230 = bitcast <8 x i32> %55 to <32 x i8>
%1231 = bitcast <4 x i32> %57 to <16 x i8>
%1232 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1229, <32 x i8> %1230, <16 x i8> %1231, i32 2)
%1233 = extractelement <4 x float> %1232, i32 0
%1234 = extractelement <4 x float> %1232, i32 1
%1235 = extractelement <4 x float> %1232, i32 2
%1236 = fcmp oeq float %temp22.0, 1.000000e+00
%1237 = select i1 %1236, float 1.000000e+00, float 0.000000e+00
%1238 = bitcast float %1179 to i32
%1239 = bitcast float %1181 to i32
%1240 = bitcast float %225 to i32
%1241 = insertelement <4 x i32> undef, i32 %1238, i32 0
%1242 = insertelement <4 x i32> %1241, i32 %1239, i32 1
%1243 = insertelement <4 x i32> %1242, i32 %1240, i32 2
%1244 = bitcast <8 x i32> %47 to <32 x i8>
%1245 = bitcast <4 x i32> %49 to <16 x i8>
%1246 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1243, <32 x i8> %1244, <16 x i8> %1245, i32 2)
%1247 = extractelement <4 x float> %1246, i32 0
%1248 = extractelement <4 x float> %1246, i32 1
%1249 = extractelement <4 x float> %1246, i32 2
%1250 = fcmp oeq float %temp22.0, 0.000000e+00
%1251 = select i1 %1250, float 1.000000e+00, float 0.000000e+00
%1252 = fmul float %1247, %1251
%1253 = fmul float %1248, %1251
%1254 = fmul float %1249, %1251
%1255 = fmul float %1233, %1237
%1256 = fadd float %1255, %1252
%1257 = fmul float %1234, %1237
%1258 = fadd float %1257, %1253
%1259 = fmul float %1235, %1237
%1260 = fadd float %1259, %1254
%1261 = fmul float %1219, %1223
%1262 = fadd float %1261, %1256
%1263 = fmul float %1220, %1223
%1264 = fadd float %1263, %1258
%1265 = fmul float %1221, %1223
%1266 = fadd float %1265, %1260
%1267 = fmul float %1205, %1209
%1268 = fadd float %1267, %1262
%1269 = fmul float %1206, %1209
%1270 = fadd float %1269, %1264
%1271 = fmul float %1207, %1209
%1272 = fadd float %1271, %1266
%1273 = fmul float %1191, %1195
%1274 = fadd float %1273, %1268
%1275 = fmul float %1192, %1195
%1276 = fadd float %1275, %1270
%1277 = fmul float %1193, %1195
%1278 = fadd float %1277, %1272
%1279 = fmul float %1040, %150
%1280 = fmul float %1042, %150
%1281 = fmul float %1044, %150
%1282 = fmul float %1157, %148
%1283 = fadd float %1282, %1279
%1284 = fmul float %1159, %148
%1285 = fadd float %1284, %1280
%1286 = fmul float %1161, %148
%1287 = fadd float %1286, %1281
%1288 = fmul float %1274, %149
%1289 = fadd float %1288, %1283
%1290 = fmul float %1276, %149
%1291 = fadd float %1290, %1285
%1292 = fmul float %1278, %149
%1293 = fadd float %1292, %1287
%1294 = fmul float %689, %150
%1295 = fmul float %691, %150
%1296 = fmul float %693, %150
%1297 = fmul float %806, %148
%1298 = fadd float %1297, %1294
%1299 = fmul float %808, %148
%1300 = fadd float %1299, %1295
%1301 = fmul float %810, %148
%1302 = fadd float %1301, %1296
%1303 = fmul float %923, %149
%1304 = fadd float %1303, %1298
%1305 = fmul float %925, %149
%1306 = fadd float %1305, %1300
%1307 = fmul float %927, %149
%1308 = fadd float %1307, %1302
%1309 = fmul float %338, %150
%1310 = fmul float %340, %150
%1311 = fmul float %342, %150
%1312 = fmul float %455, %148
%1313 = fadd float %1312, %1309
%1314 = fmul float %457, %148
%1315 = fadd float %1314, %1310
%1316 = fmul float %459, %148
%1317 = fadd float %1316, %1311
%1318 = fmul float %572, %149
%1319 = fadd float %1318, %1313
%1320 = fmul float %574, %149
%1321 = fadd float %1320, %1315
%1322 = fmul float %576, %149
%1323 = fadd float %1322, %1317
%1324 = fmul float %90, %1319
%1325 = fmul float %90, %1321
%1326 = fmul float %90, %1323
%1327 = fmul float %91, %1304
%1328 = fadd float %1327, %1324
%1329 = fmul float %91, %1306
%1330 = fadd float %1329, %1325
%1331 = fmul float %91, %1308
%1332 = fadd float %1331, %1326
%1333 = fmul float %92, %1289
%1334 = fadd float %1333, %1328
%1335 = fmul float %92, %1291
%1336 = fadd float %1335, %1330
%1337 = fmul float %92, %1293
%1338 = fadd float %1337, %1332
%1339 = fcmp une float %33, %temp24.0
%.sink225 = select i1 %1339, float %36, float %35
%temp56.2 = select i1 %1339, float 1.953125e-03, float 3.906250e-03
%1340 = fdiv float 1.000000e+00, %.sink225
%1341 = fmul float %99, %1340
%1342 = fmul float %98, %1340
%1343 = call float @llvm.floor.f32(float %1341)
%1344 = fsub float %1341, %1343
%1345 = call float @llvm.floor.f32(float %1342)
%1346 = fsub float %1342, %1345
%1347 = fmul float %37, 2.000000e+00
%1348 = fmul float %1347, %temp56.2
%1349 = fsub float 1.000000e+00, %1348
%1350 = fmul float %temp56.2, %37
%1351 = fmul float %1344, %1349
%1352 = fadd float %1351, %1350
%1353 = fmul float %1346, %1349
%1354 = fadd float %1353, %1350
%1355 = fmul float %1352, %temp24.0
%1356 = fadd float %1355, %temp36.0
%1357 = fmul float %1354, %temp24.0
%1358 = fadd float %1357, %temp37.0
%1359 = bitcast float %1356 to i32
%1360 = bitcast float %1358 to i32
%1361 = bitcast float %225 to i32
%1362 = insertelement <4 x i32> undef, i32 %1359, i32 0
%1363 = insertelement <4 x i32> %1362, i32 %1360, i32 1
%1364 = insertelement <4 x i32> %1363, i32 %1361, i32 2
%1365 = bitcast <8 x i32> %83 to <32 x i8>
%1366 = bitcast <4 x i32> %85 to <16 x i8>
%1367 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1364, <32 x i8> %1365, <16 x i8> %1366, i32 2)
%1368 = extractelement <4 x float> %1367, i32 1
%1369 = extractelement <4 x float> %1367, i32 3
%1370 = fcmp oeq float %temp38.0, 4.000000e+00
%1371 = select i1 %1370, float 1.000000e+00, float 0.000000e+00
%1372 = bitcast float %1356 to i32
%1373 = bitcast float %1358 to i32
%1374 = bitcast float %225 to i32
%1375 = insertelement <4 x i32> undef, i32 %1372, i32 0
%1376 = insertelement <4 x i32> %1375, i32 %1373, i32 1
%1377 = insertelement <4 x i32> %1376, i32 %1374, i32 2
%1378 = bitcast <8 x i32> %75 to <32 x i8>
%1379 = bitcast <4 x i32> %77 to <16 x i8>
%1380 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1377, <32 x i8> %1378, <16 x i8> %1379, i32 2)
%1381 = extractelement <4 x float> %1380, i32 1
%1382 = extractelement <4 x float> %1380, i32 3
%1383 = fcmp oeq float %temp38.0, 3.000000e+00
%1384 = select i1 %1383, float 1.000000e+00, float 0.000000e+00
%1385 = bitcast float %1356 to i32
%1386 = bitcast float %1358 to i32
%1387 = bitcast float %225 to i32
%1388 = insertelement <4 x i32> undef, i32 %1385, i32 0
%1389 = insertelement <4 x i32> %1388, i32 %1386, i32 1
%1390 = insertelement <4 x i32> %1389, i32 %1387, i32 2
%1391 = bitcast <8 x i32> %67 to <32 x i8>
%1392 = bitcast <4 x i32> %69 to <16 x i8>
%1393 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1390, <32 x i8> %1391, <16 x i8> %1392, i32 2)
%1394 = extractelement <4 x float> %1393, i32 1
%1395 = extractelement <4 x float> %1393, i32 3
%1396 = fcmp oeq float %temp38.0, 2.000000e+00
%1397 = select i1 %1396, float 1.000000e+00, float 0.000000e+00
%1398 = bitcast float %1356 to i32
%1399 = bitcast float %1358 to i32
%1400 = bitcast float %225 to i32
%1401 = insertelement <4 x i32> undef, i32 %1398, i32 0
%1402 = insertelement <4 x i32> %1401, i32 %1399, i32 1
%1403 = insertelement <4 x i32> %1402, i32 %1400, i32 2
%1404 = bitcast <8 x i32> %59 to <32 x i8>
%1405 = bitcast <4 x i32> %61 to <16 x i8>
%1406 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1403, <32 x i8> %1404, <16 x i8> %1405, i32 2)
%1407 = extractelement <4 x float> %1406, i32 1
%1408 = extractelement <4 x float> %1406, i32 3
%1409 = fcmp oeq float %temp38.0, 1.000000e+00
%1410 = select i1 %1409, float 1.000000e+00, float 0.000000e+00
%1411 = bitcast float %1356 to i32
%1412 = bitcast float %1358 to i32
%1413 = bitcast float %225 to i32
%1414 = insertelement <4 x i32> undef, i32 %1411, i32 0
%1415 = insertelement <4 x i32> %1414, i32 %1412, i32 1
%1416 = insertelement <4 x i32> %1415, i32 %1413, i32 2
%1417 = bitcast <8 x i32> %51 to <32 x i8>
%1418 = bitcast <4 x i32> %53 to <16 x i8>
%1419 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1416, <32 x i8> %1417, <16 x i8> %1418, i32 2)
%1420 = extractelement <4 x float> %1419, i32 1
%1421 = extractelement <4 x float> %1419, i32 3
%1422 = fcmp oeq float %temp38.0, 0.000000e+00
%1423 = select i1 %1422, float 1.000000e+00, float 0.000000e+00
%1424 = fmul float %1420, %1423
%1425 = fmul float %1421, %1423
%1426 = fmul float %1407, %1410
%1427 = fadd float %1426, %1424
%1428 = fmul float %1408, %1410
%1429 = fadd float %1428, %1425
%1430 = fmul float %1394, %1397
%1431 = fadd float %1430, %1427
%1432 = fmul float %1395, %1397
%1433 = fadd float %1432, %1429
%1434 = fmul float %1381, %1384
%1435 = fadd float %1434, %1431
%1436 = fmul float %1382, %1384
%1437 = fadd float %1436, %1433
%1438 = fmul float %1368, %1371
%1439 = fadd float %1438, %1435
%1440 = fmul float %1369, %1371
%1441 = fadd float %1440, %1437
%1442 = fmul float %1441, 2.000000e+00
%1443 = fadd float %1442, -1.000000e+00
%1444 = fmul float %1439, 2.000000e+00
%1445 = fadd float %1444, -1.000000e+00
%1446 = fmul float %1443, %1443
%1447 = fmul float %1445, %1445
%1448 = fadd float %1446, %1447
%1449 = call float @llvm.AMDIL.clamp.(float %1448, float 0.000000e+00, float 1.000000e+00)
%1450 = fcmp une float %33, %temp24.0
%.sink226 = select i1 %1450, float %36, float %35
%temp60.2 = select i1 %1450, float 1.953125e-03, float 3.906250e-03
%1451 = fdiv float 1.000000e+00, %.sink226
%1452 = fmul float %99, %1451
%1453 = fmul float %97, %1451
%1454 = call float @llvm.floor.f32(float %1452)
%1455 = fsub float %1452, %1454
%1456 = call float @llvm.floor.f32(float %1453)
%1457 = fsub float %1453, %1456
%1458 = fmul float %37, 2.000000e+00
%1459 = fmul float %1458, %temp60.2
%1460 = fsub float 1.000000e+00, %1459
%1461 = fmul float %temp60.2, %37
%1462 = fmul float %1455, %1460
%1463 = fadd float %1462, %1461
%1464 = fmul float %1457, %1460
%1465 = fadd float %1464, %1461
%1466 = fmul float %1463, %temp24.0
%1467 = fadd float %1466, %temp36.0
%1468 = fmul float %1465, %temp24.0
%1469 = fadd float %1468, %temp37.0
%1470 = bitcast float %1467 to i32
%1471 = bitcast float %1469 to i32
%1472 = bitcast float %225 to i32
%1473 = insertelement <4 x i32> undef, i32 %1470, i32 0
%1474 = insertelement <4 x i32> %1473, i32 %1471, i32 1
%1475 = insertelement <4 x i32> %1474, i32 %1472, i32 2
%1476 = bitcast <8 x i32> %83 to <32 x i8>
%1477 = bitcast <4 x i32> %85 to <16 x i8>
%1478 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1475, <32 x i8> %1476, <16 x i8> %1477, i32 2)
%1479 = extractelement <4 x float> %1478, i32 1
%1480 = extractelement <4 x float> %1478, i32 3
%1481 = fcmp oeq float %temp38.0, 4.000000e+00
%1482 = select i1 %1481, float 1.000000e+00, float 0.000000e+00
%1483 = bitcast float %1467 to i32
%1484 = bitcast float %1469 to i32
%1485 = bitcast float %225 to i32
%1486 = insertelement <4 x i32> undef, i32 %1483, i32 0
%1487 = insertelement <4 x i32> %1486, i32 %1484, i32 1
%1488 = insertelement <4 x i32> %1487, i32 %1485, i32 2
%1489 = bitcast <8 x i32> %75 to <32 x i8>
%1490 = bitcast <4 x i32> %77 to <16 x i8>
%1491 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1488, <32 x i8> %1489, <16 x i8> %1490, i32 2)
%1492 = extractelement <4 x float> %1491, i32 1
%1493 = extractelement <4 x float> %1491, i32 3
%1494 = fcmp oeq float %temp38.0, 3.000000e+00
%1495 = select i1 %1494, float 1.000000e+00, float 0.000000e+00
%1496 = bitcast float %1467 to i32
%1497 = bitcast float %1469 to i32
%1498 = bitcast float %225 to i32
%1499 = insertelement <4 x i32> undef, i32 %1496, i32 0
%1500 = insertelement <4 x i32> %1499, i32 %1497, i32 1
%1501 = insertelement <4 x i32> %1500, i32 %1498, i32 2
%1502 = bitcast <8 x i32> %67 to <32 x i8>
%1503 = bitcast <4 x i32> %69 to <16 x i8>
%1504 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1501, <32 x i8> %1502, <16 x i8> %1503, i32 2)
%1505 = extractelement <4 x float> %1504, i32 1
%1506 = extractelement <4 x float> %1504, i32 3
%1507 = fcmp oeq float %temp38.0, 2.000000e+00
%1508 = select i1 %1507, float 1.000000e+00, float 0.000000e+00
%1509 = bitcast float %1467 to i32
%1510 = bitcast float %1469 to i32
%1511 = bitcast float %225 to i32
%1512 = insertelement <4 x i32> undef, i32 %1509, i32 0
%1513 = insertelement <4 x i32> %1512, i32 %1510, i32 1
%1514 = insertelement <4 x i32> %1513, i32 %1511, i32 2
%1515 = bitcast <8 x i32> %59 to <32 x i8>
%1516 = bitcast <4 x i32> %61 to <16 x i8>
%1517 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1514, <32 x i8> %1515, <16 x i8> %1516, i32 2)
%1518 = extractelement <4 x float> %1517, i32 1
%1519 = extractelement <4 x float> %1517, i32 3
%1520 = fcmp oeq float %temp38.0, 1.000000e+00
%1521 = select i1 %1520, float 1.000000e+00, float 0.000000e+00
%1522 = bitcast float %1467 to i32
%1523 = bitcast float %1469 to i32
%1524 = bitcast float %225 to i32
%1525 = insertelement <4 x i32> undef, i32 %1522, i32 0
%1526 = insertelement <4 x i32> %1525, i32 %1523, i32 1
%1527 = insertelement <4 x i32> %1526, i32 %1524, i32 2
%1528 = bitcast <8 x i32> %51 to <32 x i8>
%1529 = bitcast <4 x i32> %53 to <16 x i8>
%1530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1527, <32 x i8> %1528, <16 x i8> %1529, i32 2)
%1531 = extractelement <4 x float> %1530, i32 1
%1532 = extractelement <4 x float> %1530, i32 3
%1533 = fcmp oeq float %temp38.0, 0.000000e+00
%1534 = select i1 %1533, float 1.000000e+00, float 0.000000e+00
%1535 = fmul float %1531, %1534
%1536 = fmul float %1532, %1534
%1537 = fmul float %1518, %1521
%1538 = fadd float %1537, %1535
%1539 = fmul float %1519, %1521
%1540 = fadd float %1539, %1536
%1541 = fmul float %1505, %1508
%1542 = fadd float %1541, %1538
%1543 = fmul float %1506, %1508
%1544 = fadd float %1543, %1540
%1545 = fmul float %1492, %1495
%1546 = fadd float %1545, %1542
%1547 = fmul float %1493, %1495
%1548 = fadd float %1547, %1544
%1549 = fmul float %1479, %1482
%1550 = fadd float %1549, %1546
%1551 = fmul float %1480, %1482
%1552 = fadd float %1551, %1548
%1553 = fmul float %1552, 2.000000e+00
%1554 = fadd float %1553, -1.000000e+00
%1555 = fmul float %1550, 2.000000e+00
%1556 = fadd float %1555, -1.000000e+00
%1557 = fmul float %1554, %1554
%1558 = fmul float %1556, %1556
%1559 = fadd float %1557, %1558
%1560 = call float @llvm.AMDIL.clamp.(float %1559, float 0.000000e+00, float 1.000000e+00)
%1561 = fcmp une float %33, %temp24.0
%.sink227 = select i1 %1561, float %36, float %35
%temp64.2 = select i1 %1561, float 1.953125e-03, float 3.906250e-03
%1562 = fdiv float 1.000000e+00, %.sink227
%1563 = fmul float %97, %1562
%1564 = fmul float %98, %1562
%1565 = call float @llvm.floor.f32(float %1563)
%1566 = fsub float %1563, %1565
%1567 = call float @llvm.floor.f32(float %1564)
%1568 = fsub float %1564, %1567
%1569 = fmul float %37, 2.000000e+00
%1570 = fmul float %1569, %temp64.2
%1571 = fsub float 1.000000e+00, %1570
%1572 = fmul float %temp64.2, %37
%1573 = fmul float %1566, %1571
%1574 = fadd float %1573, %1572
%1575 = fmul float %1568, %1571
%1576 = fadd float %1575, %1572
%1577 = fmul float %1574, %temp24.0
%1578 = fadd float %1577, %temp36.0
%1579 = fmul float %1576, %temp24.0
%1580 = fadd float %1579, %temp37.0
%1581 = bitcast float %1578 to i32
%1582 = bitcast float %1580 to i32
%1583 = bitcast float %225 to i32
%1584 = insertelement <4 x i32> undef, i32 %1581, i32 0
%1585 = insertelement <4 x i32> %1584, i32 %1582, i32 1
%1586 = insertelement <4 x i32> %1585, i32 %1583, i32 2
%1587 = bitcast <8 x i32> %83 to <32 x i8>
%1588 = bitcast <4 x i32> %85 to <16 x i8>
%1589 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1586, <32 x i8> %1587, <16 x i8> %1588, i32 2)
%1590 = extractelement <4 x float> %1589, i32 1
%1591 = extractelement <4 x float> %1589, i32 3
%1592 = fcmp oeq float %temp38.0, 4.000000e+00
%1593 = select i1 %1592, float 1.000000e+00, float 0.000000e+00
%1594 = bitcast float %1578 to i32
%1595 = bitcast float %1580 to i32
%1596 = bitcast float %225 to i32
%1597 = insertelement <4 x i32> undef, i32 %1594, i32 0
%1598 = insertelement <4 x i32> %1597, i32 %1595, i32 1
%1599 = insertelement <4 x i32> %1598, i32 %1596, i32 2
%1600 = bitcast <8 x i32> %75 to <32 x i8>
%1601 = bitcast <4 x i32> %77 to <16 x i8>
%1602 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1599, <32 x i8> %1600, <16 x i8> %1601, i32 2)
%1603 = extractelement <4 x float> %1602, i32 1
%1604 = extractelement <4 x float> %1602, i32 3
%1605 = fcmp oeq float %temp38.0, 3.000000e+00
%1606 = select i1 %1605, float 1.000000e+00, float 0.000000e+00
%1607 = bitcast float %1578 to i32
%1608 = bitcast float %1580 to i32
%1609 = bitcast float %225 to i32
%1610 = insertelement <4 x i32> undef, i32 %1607, i32 0
%1611 = insertelement <4 x i32> %1610, i32 %1608, i32 1
%1612 = insertelement <4 x i32> %1611, i32 %1609, i32 2
%1613 = bitcast <8 x i32> %67 to <32 x i8>
%1614 = bitcast <4 x i32> %69 to <16 x i8>
%1615 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1612, <32 x i8> %1613, <16 x i8> %1614, i32 2)
%1616 = extractelement <4 x float> %1615, i32 1
%1617 = extractelement <4 x float> %1615, i32 3
%1618 = fcmp oeq float %temp38.0, 2.000000e+00
%1619 = select i1 %1618, float 1.000000e+00, float 0.000000e+00
%1620 = bitcast float %1578 to i32
%1621 = bitcast float %1580 to i32
%1622 = bitcast float %225 to i32
%1623 = insertelement <4 x i32> undef, i32 %1620, i32 0
%1624 = insertelement <4 x i32> %1623, i32 %1621, i32 1
%1625 = insertelement <4 x i32> %1624, i32 %1622, i32 2
%1626 = bitcast <8 x i32> %59 to <32 x i8>
%1627 = bitcast <4 x i32> %61 to <16 x i8>
%1628 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1625, <32 x i8> %1626, <16 x i8> %1627, i32 2)
%1629 = extractelement <4 x float> %1628, i32 1
%1630 = extractelement <4 x float> %1628, i32 3
%1631 = fcmp oeq float %temp38.0, 1.000000e+00
%1632 = select i1 %1631, float 1.000000e+00, float 0.000000e+00
%1633 = bitcast float %1578 to i32
%1634 = bitcast float %1580 to i32
%1635 = bitcast float %225 to i32
%1636 = insertelement <4 x i32> undef, i32 %1633, i32 0
%1637 = insertelement <4 x i32> %1636, i32 %1634, i32 1
%1638 = insertelement <4 x i32> %1637, i32 %1635, i32 2
%1639 = bitcast <8 x i32> %51 to <32 x i8>
%1640 = bitcast <4 x i32> %53 to <16 x i8>
%1641 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1638, <32 x i8> %1639, <16 x i8> %1640, i32 2)
%1642 = extractelement <4 x float> %1641, i32 1
%1643 = extractelement <4 x float> %1641, i32 3
%1644 = fcmp oeq float %temp38.0, 0.000000e+00
%1645 = select i1 %1644, float 1.000000e+00, float 0.000000e+00
%1646 = fmul float %1642, %1645
%1647 = fmul float %1643, %1645
%1648 = fmul float %1629, %1632
%1649 = fadd float %1648, %1646
%1650 = fmul float %1630, %1632
%1651 = fadd float %1650, %1647
%1652 = fmul float %1616, %1619
%1653 = fadd float %1652, %1649
%1654 = fmul float %1617, %1619
%1655 = fadd float %1654, %1651
%1656 = fmul float %1603, %1606
%1657 = fadd float %1656, %1653
%1658 = fmul float %1604, %1606
%1659 = fadd float %1658, %1655
%1660 = fmul float %1590, %1593
%1661 = fadd float %1660, %1657
%1662 = fmul float %1591, %1593
%1663 = fadd float %1662, %1659
%1664 = fmul float %1663, 2.000000e+00
%1665 = fadd float %1664, -1.000000e+00
%1666 = fmul float %1661, 2.000000e+00
%1667 = fadd float %1666, -1.000000e+00
%1668 = fmul float %1665, %1665
%1669 = fmul float %1667, %1667
%1670 = fadd float %1668, %1669
%1671 = call float @llvm.AMDIL.clamp.(float %1670, float 0.000000e+00, float 1.000000e+00)
%1672 = fmul float %148, 0.000000e+00
%1673 = fmul float %1443, %148
%1674 = fmul float %1445, %148
%1675 = fmul float %1556, %149
%1676 = fadd float %1675, %1672
%1677 = fmul float %149, 0.000000e+00
%1678 = fadd float %1677, %1673
%1679 = fmul float %1554, %149
%1680 = fadd float %1679, %1674
%1681 = fmul float %1665, %150
%1682 = fadd float %1681, %1676
%1683 = fmul float %1667, %150
%1684 = fadd float %1683, %1678
%1685 = fmul float %150, 0.000000e+00
%1686 = fadd float %1685, %1680
%1687 = fcmp une float %33, %temp28.0
%.sink228 = select i1 %1687, float %36, float %35
%temp52.3 = select i1 %1687, float 1.953125e-03, float 3.906250e-03
%1688 = fdiv float 1.000000e+00, %.sink228
%1689 = fmul float %99, %1688
%1690 = fmul float %98, %1688
%1691 = call float @llvm.floor.f32(float %1689)
%1692 = fsub float %1689, %1691
%1693 = call float @llvm.floor.f32(float %1690)
%1694 = fsub float %1690, %1693
%1695 = fmul float %37, 2.000000e+00
%1696 = fmul float %1695, %temp52.3
%1697 = fsub float 1.000000e+00, %1696
%1698 = fmul float %temp52.3, %37
%1699 = fmul float %1692, %1697
%1700 = fadd float %1699, %1698
%1701 = fmul float %1694, %1697
%1702 = fadd float %1701, %1698
%1703 = fmul float %1700, %temp28.0
%1704 = fadd float %1703, %temp44.0
%1705 = fmul float %1702, %temp28.0
%1706 = fadd float %1705, %temp45.0
%1707 = bitcast float %1704 to i32
%1708 = bitcast float %1706 to i32
%1709 = bitcast float %225 to i32
%1710 = insertelement <4 x i32> undef, i32 %1707, i32 0
%1711 = insertelement <4 x i32> %1710, i32 %1708, i32 1
%1712 = insertelement <4 x i32> %1711, i32 %1709, i32 2
%1713 = bitcast <8 x i32> %83 to <32 x i8>
%1714 = bitcast <4 x i32> %85 to <16 x i8>
%1715 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1712, <32 x i8> %1713, <16 x i8> %1714, i32 2)
%1716 = extractelement <4 x float> %1715, i32 1
%1717 = extractelement <4 x float> %1715, i32 3
%1718 = fcmp oeq float %temp46.0, 4.000000e+00
%1719 = select i1 %1718, float 1.000000e+00, float 0.000000e+00
%1720 = bitcast float %1704 to i32
%1721 = bitcast float %1706 to i32
%1722 = bitcast float %225 to i32
%1723 = insertelement <4 x i32> undef, i32 %1720, i32 0
%1724 = insertelement <4 x i32> %1723, i32 %1721, i32 1
%1725 = insertelement <4 x i32> %1724, i32 %1722, i32 2
%1726 = bitcast <8 x i32> %75 to <32 x i8>
%1727 = bitcast <4 x i32> %77 to <16 x i8>
%1728 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1725, <32 x i8> %1726, <16 x i8> %1727, i32 2)
%1729 = extractelement <4 x float> %1728, i32 1
%1730 = extractelement <4 x float> %1728, i32 3
%1731 = fcmp oeq float %temp46.0, 3.000000e+00
%1732 = select i1 %1731, float 1.000000e+00, float 0.000000e+00
%1733 = bitcast float %1704 to i32
%1734 = bitcast float %1706 to i32
%1735 = bitcast float %225 to i32
%1736 = insertelement <4 x i32> undef, i32 %1733, i32 0
%1737 = insertelement <4 x i32> %1736, i32 %1734, i32 1
%1738 = insertelement <4 x i32> %1737, i32 %1735, i32 2
%1739 = bitcast <8 x i32> %67 to <32 x i8>
%1740 = bitcast <4 x i32> %69 to <16 x i8>
%1741 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1738, <32 x i8> %1739, <16 x i8> %1740, i32 2)
%1742 = extractelement <4 x float> %1741, i32 1
%1743 = extractelement <4 x float> %1741, i32 3
%1744 = fcmp oeq float %temp46.0, 2.000000e+00
%1745 = select i1 %1744, float 1.000000e+00, float 0.000000e+00
%1746 = bitcast float %1704 to i32
%1747 = bitcast float %1706 to i32
%1748 = bitcast float %225 to i32
%1749 = insertelement <4 x i32> undef, i32 %1746, i32 0
%1750 = insertelement <4 x i32> %1749, i32 %1747, i32 1
%1751 = insertelement <4 x i32> %1750, i32 %1748, i32 2
%1752 = bitcast <8 x i32> %59 to <32 x i8>
%1753 = bitcast <4 x i32> %61 to <16 x i8>
%1754 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1751, <32 x i8> %1752, <16 x i8> %1753, i32 2)
%1755 = extractelement <4 x float> %1754, i32 1
%1756 = extractelement <4 x float> %1754, i32 3
%1757 = fcmp oeq float %temp46.0, 1.000000e+00
%1758 = select i1 %1757, float 1.000000e+00, float 0.000000e+00
%1759 = bitcast float %1704 to i32
%1760 = bitcast float %1706 to i32
%1761 = bitcast float %225 to i32
%1762 = insertelement <4 x i32> undef, i32 %1759, i32 0
%1763 = insertelement <4 x i32> %1762, i32 %1760, i32 1
%1764 = insertelement <4 x i32> %1763, i32 %1761, i32 2
%1765 = bitcast <8 x i32> %51 to <32 x i8>
%1766 = bitcast <4 x i32> %53 to <16 x i8>
%1767 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1764, <32 x i8> %1765, <16 x i8> %1766, i32 2)
%1768 = extractelement <4 x float> %1767, i32 1
%1769 = extractelement <4 x float> %1767, i32 3
%1770 = fcmp oeq float %temp46.0, 0.000000e+00
%1771 = select i1 %1770, float 1.000000e+00, float 0.000000e+00
%1772 = fmul float %1768, %1771
%1773 = fmul float %1769, %1771
%1774 = fmul float %1755, %1758
%1775 = fadd float %1774, %1772
%1776 = fmul float %1756, %1758
%1777 = fadd float %1776, %1773
%1778 = fmul float %1742, %1745
%1779 = fadd float %1778, %1775
%1780 = fmul float %1743, %1745
%1781 = fadd float %1780, %1777
%1782 = fmul float %1729, %1732
%1783 = fadd float %1782, %1779
%1784 = fmul float %1730, %1732
%1785 = fadd float %1784, %1781
%1786 = fmul float %1716, %1719
%1787 = fadd float %1786, %1783
%1788 = fmul float %1717, %1719
%1789 = fadd float %1788, %1785
%1790 = fmul float %1789, 2.000000e+00
%1791 = fadd float %1790, -1.000000e+00
%1792 = fmul float %1787, 2.000000e+00
%1793 = fadd float %1792, -1.000000e+00
%1794 = fmul float %1791, %1791
%1795 = fmul float %1793, %1793
%1796 = fadd float %1794, %1795
%1797 = call float @llvm.AMDIL.clamp.(float %1796, float 0.000000e+00, float 1.000000e+00)
%1798 = fcmp une float %33, %temp28.0
%.sink229 = select i1 %1798, float %36, float %35
%temp56.4 = select i1 %1798, float 1.953125e-03, float 3.906250e-03
%1799 = fdiv float 1.000000e+00, %.sink229
%1800 = fmul float %99, %1799
%1801 = fmul float %97, %1799
%1802 = call float @llvm.floor.f32(float %1800)
%1803 = fsub float %1800, %1802
%1804 = call float @llvm.floor.f32(float %1801)
%1805 = fsub float %1801, %1804
%1806 = fmul float %37, 2.000000e+00
%1807 = fmul float %1806, %temp56.4
%1808 = fsub float 1.000000e+00, %1807
%1809 = fmul float %temp56.4, %37
%1810 = fmul float %1803, %1808
%1811 = fadd float %1810, %1809
%1812 = fmul float %1805, %1808
%1813 = fadd float %1812, %1809
%1814 = fmul float %1811, %temp28.0
%1815 = fadd float %1814, %temp44.0
%1816 = fmul float %1813, %temp28.0
%1817 = fadd float %1816, %temp45.0
%1818 = bitcast float %1815 to i32
%1819 = bitcast float %1817 to i32
%1820 = bitcast float %225 to i32
%1821 = insertelement <4 x i32> undef, i32 %1818, i32 0
%1822 = insertelement <4 x i32> %1821, i32 %1819, i32 1
%1823 = insertelement <4 x i32> %1822, i32 %1820, i32 2
%1824 = bitcast <8 x i32> %83 to <32 x i8>
%1825 = bitcast <4 x i32> %85 to <16 x i8>
%1826 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1823, <32 x i8> %1824, <16 x i8> %1825, i32 2)
%1827 = extractelement <4 x float> %1826, i32 1
%1828 = extractelement <4 x float> %1826, i32 3
%1829 = fcmp oeq float %temp46.0, 4.000000e+00
%1830 = select i1 %1829, float 1.000000e+00, float 0.000000e+00
%1831 = bitcast float %1815 to i32
%1832 = bitcast float %1817 to i32
%1833 = bitcast float %225 to i32
%1834 = insertelement <4 x i32> undef, i32 %1831, i32 0
%1835 = insertelement <4 x i32> %1834, i32 %1832, i32 1
%1836 = insertelement <4 x i32> %1835, i32 %1833, i32 2
%1837 = bitcast <8 x i32> %75 to <32 x i8>
%1838 = bitcast <4 x i32> %77 to <16 x i8>
%1839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1836, <32 x i8> %1837, <16 x i8> %1838, i32 2)
%1840 = extractelement <4 x float> %1839, i32 1
%1841 = extractelement <4 x float> %1839, i32 3
%1842 = fcmp oeq float %temp46.0, 3.000000e+00
%1843 = select i1 %1842, float 1.000000e+00, float 0.000000e+00
%1844 = bitcast float %1815 to i32
%1845 = bitcast float %1817 to i32
%1846 = bitcast float %225 to i32
%1847 = insertelement <4 x i32> undef, i32 %1844, i32 0
%1848 = insertelement <4 x i32> %1847, i32 %1845, i32 1
%1849 = insertelement <4 x i32> %1848, i32 %1846, i32 2
%1850 = bitcast <8 x i32> %67 to <32 x i8>
%1851 = bitcast <4 x i32> %69 to <16 x i8>
%1852 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1849, <32 x i8> %1850, <16 x i8> %1851, i32 2)
%1853 = extractelement <4 x float> %1852, i32 1
%1854 = extractelement <4 x float> %1852, i32 3
%1855 = fcmp oeq float %temp46.0, 2.000000e+00
%1856 = select i1 %1855, float 1.000000e+00, float 0.000000e+00
%1857 = bitcast float %1815 to i32
%1858 = bitcast float %1817 to i32
%1859 = bitcast float %225 to i32
%1860 = insertelement <4 x i32> undef, i32 %1857, i32 0
%1861 = insertelement <4 x i32> %1860, i32 %1858, i32 1
%1862 = insertelement <4 x i32> %1861, i32 %1859, i32 2
%1863 = bitcast <8 x i32> %59 to <32 x i8>
%1864 = bitcast <4 x i32> %61 to <16 x i8>
%1865 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1862, <32 x i8> %1863, <16 x i8> %1864, i32 2)
%1866 = extractelement <4 x float> %1865, i32 1
%1867 = extractelement <4 x float> %1865, i32 3
%1868 = fcmp oeq float %temp46.0, 1.000000e+00
%1869 = select i1 %1868, float 1.000000e+00, float 0.000000e+00
%1870 = bitcast float %1815 to i32
%1871 = bitcast float %1817 to i32
%1872 = bitcast float %225 to i32
%1873 = insertelement <4 x i32> undef, i32 %1870, i32 0
%1874 = insertelement <4 x i32> %1873, i32 %1871, i32 1
%1875 = insertelement <4 x i32> %1874, i32 %1872, i32 2
%1876 = bitcast <8 x i32> %51 to <32 x i8>
%1877 = bitcast <4 x i32> %53 to <16 x i8>
%1878 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1875, <32 x i8> %1876, <16 x i8> %1877, i32 2)
%1879 = extractelement <4 x float> %1878, i32 1
%1880 = extractelement <4 x float> %1878, i32 3
%1881 = fcmp oeq float %temp46.0, 0.000000e+00
%1882 = select i1 %1881, float 1.000000e+00, float 0.000000e+00
%1883 = fmul float %1879, %1882
%1884 = fmul float %1880, %1882
%1885 = fmul float %1866, %1869
%1886 = fadd float %1885, %1883
%1887 = fmul float %1867, %1869
%1888 = fadd float %1887, %1884
%1889 = fmul float %1853, %1856
%1890 = fadd float %1889, %1886
%1891 = fmul float %1854, %1856
%1892 = fadd float %1891, %1888
%1893 = fmul float %1840, %1843
%1894 = fadd float %1893, %1890
%1895 = fmul float %1841, %1843
%1896 = fadd float %1895, %1892
%1897 = fmul float %1827, %1830
%1898 = fadd float %1897, %1894
%1899 = fmul float %1828, %1830
%1900 = fadd float %1899, %1896
%1901 = fmul float %1900, 2.000000e+00
%1902 = fadd float %1901, -1.000000e+00
%1903 = fmul float %1898, 2.000000e+00
%1904 = fadd float %1903, -1.000000e+00
%1905 = fmul float %1902, %1902
%1906 = fmul float %1904, %1904
%1907 = fadd float %1905, %1906
%1908 = call float @llvm.AMDIL.clamp.(float %1907, float 0.000000e+00, float 1.000000e+00)
%1909 = fcmp une float %33, %temp28.0
%.sink230 = select i1 %1909, float %36, float %35
%temp60.4 = select i1 %1909, float 1.953125e-03, float 3.906250e-03
%1910 = fdiv float 1.000000e+00, %.sink230
%1911 = fmul float %97, %1910
%1912 = fmul float %98, %1910
%1913 = call float @llvm.floor.f32(float %1911)
%1914 = fsub float %1911, %1913
%1915 = call float @llvm.floor.f32(float %1912)
%1916 = fsub float %1912, %1915
%1917 = fmul float %37, 2.000000e+00
%1918 = fmul float %1917, %temp60.4
%1919 = fsub float 1.000000e+00, %1918
%1920 = fmul float %temp60.4, %37
%1921 = fmul float %1914, %1919
%1922 = fadd float %1921, %1920
%1923 = fmul float %1916, %1919
%1924 = fadd float %1923, %1920
%1925 = fmul float %1922, %temp28.0
%1926 = fadd float %1925, %temp44.0
%1927 = fmul float %1924, %temp28.0
%1928 = fadd float %1927, %temp45.0
%1929 = bitcast float %1926 to i32
%1930 = bitcast float %1928 to i32
%1931 = bitcast float %225 to i32
%1932 = insertelement <4 x i32> undef, i32 %1929, i32 0
%1933 = insertelement <4 x i32> %1932, i32 %1930, i32 1
%1934 = insertelement <4 x i32> %1933, i32 %1931, i32 2
%1935 = bitcast <8 x i32> %83 to <32 x i8>
%1936 = bitcast <4 x i32> %85 to <16 x i8>
%1937 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1934, <32 x i8> %1935, <16 x i8> %1936, i32 2)
%1938 = extractelement <4 x float> %1937, i32 1
%1939 = extractelement <4 x float> %1937, i32 3
%1940 = fcmp oeq float %temp46.0, 4.000000e+00
%1941 = select i1 %1940, float 1.000000e+00, float 0.000000e+00
%1942 = bitcast float %1926 to i32
%1943 = bitcast float %1928 to i32
%1944 = bitcast float %225 to i32
%1945 = insertelement <4 x i32> undef, i32 %1942, i32 0
%1946 = insertelement <4 x i32> %1945, i32 %1943, i32 1
%1947 = insertelement <4 x i32> %1946, i32 %1944, i32 2
%1948 = bitcast <8 x i32> %75 to <32 x i8>
%1949 = bitcast <4 x i32> %77 to <16 x i8>
%1950 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1947, <32 x i8> %1948, <16 x i8> %1949, i32 2)
%1951 = extractelement <4 x float> %1950, i32 1
%1952 = extractelement <4 x float> %1950, i32 3
%1953 = fcmp oeq float %temp46.0, 3.000000e+00
%1954 = select i1 %1953, float 1.000000e+00, float 0.000000e+00
%1955 = bitcast float %1926 to i32
%1956 = bitcast float %1928 to i32
%1957 = bitcast float %225 to i32
%1958 = insertelement <4 x i32> undef, i32 %1955, i32 0
%1959 = insertelement <4 x i32> %1958, i32 %1956, i32 1
%1960 = insertelement <4 x i32> %1959, i32 %1957, i32 2
%1961 = bitcast <8 x i32> %67 to <32 x i8>
%1962 = bitcast <4 x i32> %69 to <16 x i8>
%1963 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1960, <32 x i8> %1961, <16 x i8> %1962, i32 2)
%1964 = extractelement <4 x float> %1963, i32 1
%1965 = extractelement <4 x float> %1963, i32 3
%1966 = fcmp oeq float %temp46.0, 2.000000e+00
%1967 = select i1 %1966, float 1.000000e+00, float 0.000000e+00
%1968 = bitcast float %1926 to i32
%1969 = bitcast float %1928 to i32
%1970 = bitcast float %225 to i32
%1971 = insertelement <4 x i32> undef, i32 %1968, i32 0
%1972 = insertelement <4 x i32> %1971, i32 %1969, i32 1
%1973 = insertelement <4 x i32> %1972, i32 %1970, i32 2
%1974 = bitcast <8 x i32> %59 to <32 x i8>
%1975 = bitcast <4 x i32> %61 to <16 x i8>
%1976 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1973, <32 x i8> %1974, <16 x i8> %1975, i32 2)
%1977 = extractelement <4 x float> %1976, i32 1
%1978 = extractelement <4 x float> %1976, i32 3
%1979 = fcmp oeq float %temp46.0, 1.000000e+00
%1980 = select i1 %1979, float 1.000000e+00, float 0.000000e+00
%1981 = bitcast float %1926 to i32
%1982 = bitcast float %1928 to i32
%1983 = bitcast float %225 to i32
%1984 = insertelement <4 x i32> undef, i32 %1981, i32 0
%1985 = insertelement <4 x i32> %1984, i32 %1982, i32 1
%1986 = insertelement <4 x i32> %1985, i32 %1983, i32 2
%1987 = bitcast <8 x i32> %51 to <32 x i8>
%1988 = bitcast <4 x i32> %53 to <16 x i8>
%1989 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1986, <32 x i8> %1987, <16 x i8> %1988, i32 2)
%1990 = extractelement <4 x float> %1989, i32 1
%1991 = extractelement <4 x float> %1989, i32 3
%1992 = fcmp oeq float %temp46.0, 0.000000e+00
%1993 = select i1 %1992, float 1.000000e+00, float 0.000000e+00
%1994 = fmul float %1990, %1993
%1995 = fmul float %1991, %1993
%1996 = fmul float %1977, %1980
%1997 = fadd float %1996, %1994
%1998 = fmul float %1978, %1980
%1999 = fadd float %1998, %1995
%2000 = fmul float %1964, %1967
%2001 = fadd float %2000, %1997
%2002 = fmul float %1965, %1967
%2003 = fadd float %2002, %1999
%2004 = fmul float %1951, %1954
%2005 = fadd float %2004, %2001
%2006 = fmul float %1952, %1954
%2007 = fadd float %2006, %2003
%2008 = fmul float %1938, %1941
%2009 = fadd float %2008, %2005
%2010 = fmul float %1939, %1941
%2011 = fadd float %2010, %2007
%2012 = fmul float %2011, 2.000000e+00
%2013 = fadd float %2012, -1.000000e+00
%2014 = fmul float %2009, 2.000000e+00
%2015 = fadd float %2014, -1.000000e+00
%2016 = fmul float %2013, %2013
%2017 = fmul float %2015, %2015
%2018 = fadd float %2016, %2017
%2019 = call float @llvm.AMDIL.clamp.(float %2018, float 0.000000e+00, float 1.000000e+00)
%2020 = fmul float %148, 0.000000e+00
%2021 = fmul float %1791, %148
%2022 = fmul float %1793, %148
%2023 = fmul float %1904, %149
%2024 = fadd float %2023, %2020
%2025 = fmul float %149, 0.000000e+00
%2026 = fadd float %2025, %2021
%2027 = fmul float %1902, %149
%2028 = fadd float %2027, %2022
%2029 = fmul float %2013, %150
%2030 = fadd float %2029, %2024
%2031 = fmul float %2015, %150
%2032 = fadd float %2031, %2026
%2033 = fmul float %150, 0.000000e+00
%2034 = fadd float %2033, %2028
%2035 = fcmp une float %33, %temp32.0
%.sink231 = select i1 %2035, float %36, float %35
%temp44.1 = select i1 %2035, float 1.953125e-03, float 3.906250e-03
%2036 = fdiv float 1.000000e+00, %.sink231
%2037 = fmul float %99, %2036
%2038 = fmul float %98, %2036
%2039 = call float @llvm.floor.f32(float %2037)
%2040 = fsub float %2037, %2039
%2041 = call float @llvm.floor.f32(float %2038)
%2042 = fsub float %2038, %2041
%2043 = fmul float %37, 2.000000e+00
%2044 = fmul float %2043, %temp44.1
%2045 = fsub float 1.000000e+00, %2044
%2046 = fmul float %temp44.1, %37
%2047 = fmul float %2040, %2045
%2048 = fadd float %2047, %2046
%2049 = fmul float %2042, %2045
%2050 = fadd float %2049, %2046
%2051 = fmul float %2048, %temp32.0
%2052 = fadd float %2051, %temp20.0
%2053 = fmul float %2050, %temp32.0
%2054 = fadd float %2053, %temp21.0
%2055 = bitcast float %2052 to i32
%2056 = bitcast float %2054 to i32
%2057 = bitcast float %225 to i32
%2058 = insertelement <4 x i32> undef, i32 %2055, i32 0
%2059 = insertelement <4 x i32> %2058, i32 %2056, i32 1
%2060 = insertelement <4 x i32> %2059, i32 %2057, i32 2
%2061 = bitcast <8 x i32> %83 to <32 x i8>
%2062 = bitcast <4 x i32> %85 to <16 x i8>
%2063 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2060, <32 x i8> %2061, <16 x i8> %2062, i32 2)
%2064 = extractelement <4 x float> %2063, i32 1
%2065 = extractelement <4 x float> %2063, i32 3
%2066 = fcmp oeq float %temp22.0, 4.000000e+00
%2067 = select i1 %2066, float 1.000000e+00, float 0.000000e+00
%2068 = bitcast float %2052 to i32
%2069 = bitcast float %2054 to i32
%2070 = bitcast float %225 to i32
%2071 = insertelement <4 x i32> undef, i32 %2068, i32 0
%2072 = insertelement <4 x i32> %2071, i32 %2069, i32 1
%2073 = insertelement <4 x i32> %2072, i32 %2070, i32 2
%2074 = bitcast <8 x i32> %75 to <32 x i8>
%2075 = bitcast <4 x i32> %77 to <16 x i8>
%2076 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2073, <32 x i8> %2074, <16 x i8> %2075, i32 2)
%2077 = extractelement <4 x float> %2076, i32 1
%2078 = extractelement <4 x float> %2076, i32 3
%2079 = fcmp oeq float %temp22.0, 3.000000e+00
%2080 = select i1 %2079, float 1.000000e+00, float 0.000000e+00
%2081 = bitcast float %2052 to i32
%2082 = bitcast float %2054 to i32
%2083 = bitcast float %225 to i32
%2084 = insertelement <4 x i32> undef, i32 %2081, i32 0
%2085 = insertelement <4 x i32> %2084, i32 %2082, i32 1
%2086 = insertelement <4 x i32> %2085, i32 %2083, i32 2
%2087 = bitcast <8 x i32> %67 to <32 x i8>
%2088 = bitcast <4 x i32> %69 to <16 x i8>
%2089 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2086, <32 x i8> %2087, <16 x i8> %2088, i32 2)
%2090 = extractelement <4 x float> %2089, i32 1
%2091 = extractelement <4 x float> %2089, i32 3
%2092 = fcmp oeq float %temp22.0, 2.000000e+00
%2093 = select i1 %2092, float 1.000000e+00, float 0.000000e+00
%2094 = bitcast float %2052 to i32
%2095 = bitcast float %2054 to i32
%2096 = bitcast float %225 to i32
%2097 = insertelement <4 x i32> undef, i32 %2094, i32 0
%2098 = insertelement <4 x i32> %2097, i32 %2095, i32 1
%2099 = insertelement <4 x i32> %2098, i32 %2096, i32 2
%2100 = bitcast <8 x i32> %59 to <32 x i8>
%2101 = bitcast <4 x i32> %61 to <16 x i8>
%2102 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2099, <32 x i8> %2100, <16 x i8> %2101, i32 2)
%2103 = extractelement <4 x float> %2102, i32 1
%2104 = extractelement <4 x float> %2102, i32 3
%2105 = fcmp oeq float %temp22.0, 1.000000e+00
%2106 = select i1 %2105, float 1.000000e+00, float 0.000000e+00
%2107 = bitcast float %2052 to i32
%2108 = bitcast float %2054 to i32
%2109 = bitcast float %225 to i32
%2110 = insertelement <4 x i32> undef, i32 %2107, i32 0
%2111 = insertelement <4 x i32> %2110, i32 %2108, i32 1
%2112 = insertelement <4 x i32> %2111, i32 %2109, i32 2
%2113 = bitcast <8 x i32> %51 to <32 x i8>
%2114 = bitcast <4 x i32> %53 to <16 x i8>
%2115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2112, <32 x i8> %2113, <16 x i8> %2114, i32 2)
%2116 = extractelement <4 x float> %2115, i32 1
%2117 = extractelement <4 x float> %2115, i32 3
%2118 = fcmp oeq float %temp22.0, 0.000000e+00
%2119 = select i1 %2118, float 1.000000e+00, float 0.000000e+00
%2120 = fmul float %2116, %2119
%2121 = fmul float %2117, %2119
%2122 = fmul float %2103, %2106
%2123 = fadd float %2122, %2120
%2124 = fmul float %2104, %2106
%2125 = fadd float %2124, %2121
%2126 = fmul float %2090, %2093
%2127 = fadd float %2126, %2123
%2128 = fmul float %2091, %2093
%2129 = fadd float %2128, %2125
%2130 = fmul float %2077, %2080
%2131 = fadd float %2130, %2127
%2132 = fmul float %2078, %2080
%2133 = fadd float %2132, %2129
%2134 = fmul float %2064, %2067
%2135 = fadd float %2134, %2131
%2136 = fmul float %2065, %2067
%2137 = fadd float %2136, %2133
%2138 = fmul float %2137, 2.000000e+00
%2139 = fadd float %2138, -1.000000e+00
%2140 = fmul float %2135, 2.000000e+00
%2141 = fadd float %2140, -1.000000e+00
%2142 = fmul float %2139, %2139
%2143 = fmul float %2141, %2141
%2144 = fadd float %2142, %2143
%2145 = call float @llvm.AMDIL.clamp.(float %2144, float 0.000000e+00, float 1.000000e+00)
%2146 = fcmp une float %33, %temp32.0
%.sink232 = select i1 %2146, float %36, float %35
%temp52.5 = select i1 %2146, float 1.953125e-03, float 3.906250e-03
%2147 = fdiv float 1.000000e+00, %.sink232
%2148 = fmul float %99, %2147
%2149 = fmul float %97, %2147
%2150 = call float @llvm.floor.f32(float %2148)
%2151 = fsub float %2148, %2150
%2152 = call float @llvm.floor.f32(float %2149)
%2153 = fsub float %2149, %2152
%2154 = fmul float %37, 2.000000e+00
%2155 = fmul float %2154, %temp52.5
%2156 = fsub float 1.000000e+00, %2155
%2157 = fmul float %temp52.5, %37
%2158 = fmul float %2151, %2156
%2159 = fadd float %2158, %2157
%2160 = fmul float %2153, %2156
%2161 = fadd float %2160, %2157
%2162 = fmul float %2159, %temp32.0
%2163 = fadd float %2162, %temp20.0
%2164 = fmul float %2161, %temp32.0
%2165 = fadd float %2164, %temp21.0
%2166 = bitcast float %2163 to i32
%2167 = bitcast float %2165 to i32
%2168 = bitcast float %225 to i32
%2169 = insertelement <4 x i32> undef, i32 %2166, i32 0
%2170 = insertelement <4 x i32> %2169, i32 %2167, i32 1
%2171 = insertelement <4 x i32> %2170, i32 %2168, i32 2
%2172 = bitcast <8 x i32> %83 to <32 x i8>
%2173 = bitcast <4 x i32> %85 to <16 x i8>
%2174 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2171, <32 x i8> %2172, <16 x i8> %2173, i32 2)
%2175 = extractelement <4 x float> %2174, i32 1
%2176 = extractelement <4 x float> %2174, i32 3
%2177 = fcmp oeq float %temp22.0, 4.000000e+00
%2178 = select i1 %2177, float 1.000000e+00, float 0.000000e+00
%2179 = bitcast float %2163 to i32
%2180 = bitcast float %2165 to i32
%2181 = bitcast float %225 to i32
%2182 = insertelement <4 x i32> undef, i32 %2179, i32 0
%2183 = insertelement <4 x i32> %2182, i32 %2180, i32 1
%2184 = insertelement <4 x i32> %2183, i32 %2181, i32 2
%2185 = bitcast <8 x i32> %75 to <32 x i8>
%2186 = bitcast <4 x i32> %77 to <16 x i8>
%2187 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2184, <32 x i8> %2185, <16 x i8> %2186, i32 2)
%2188 = extractelement <4 x float> %2187, i32 1
%2189 = extractelement <4 x float> %2187, i32 3
%2190 = fcmp oeq float %temp22.0, 3.000000e+00
%2191 = select i1 %2190, float 1.000000e+00, float 0.000000e+00
%2192 = bitcast float %2163 to i32
%2193 = bitcast float %2165 to i32
%2194 = bitcast float %225 to i32
%2195 = insertelement <4 x i32> undef, i32 %2192, i32 0
%2196 = insertelement <4 x i32> %2195, i32 %2193, i32 1
%2197 = insertelement <4 x i32> %2196, i32 %2194, i32 2
%2198 = bitcast <8 x i32> %67 to <32 x i8>
%2199 = bitcast <4 x i32> %69 to <16 x i8>
%2200 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2197, <32 x i8> %2198, <16 x i8> %2199, i32 2)
%2201 = extractelement <4 x float> %2200, i32 1
%2202 = extractelement <4 x float> %2200, i32 3
%2203 = fcmp oeq float %temp22.0, 2.000000e+00
%2204 = select i1 %2203, float 1.000000e+00, float 0.000000e+00
%2205 = bitcast float %2163 to i32
%2206 = bitcast float %2165 to i32
%2207 = bitcast float %225 to i32
%2208 = insertelement <4 x i32> undef, i32 %2205, i32 0
%2209 = insertelement <4 x i32> %2208, i32 %2206, i32 1
%2210 = insertelement <4 x i32> %2209, i32 %2207, i32 2
%2211 = bitcast <8 x i32> %59 to <32 x i8>
%2212 = bitcast <4 x i32> %61 to <16 x i8>
%2213 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2210, <32 x i8> %2211, <16 x i8> %2212, i32 2)
%2214 = extractelement <4 x float> %2213, i32 1
%2215 = extractelement <4 x float> %2213, i32 3
%2216 = fcmp oeq float %temp22.0, 1.000000e+00
%2217 = select i1 %2216, float 1.000000e+00, float 0.000000e+00
%2218 = bitcast float %2163 to i32
%2219 = bitcast float %2165 to i32
%2220 = bitcast float %225 to i32
%2221 = insertelement <4 x i32> undef, i32 %2218, i32 0
%2222 = insertelement <4 x i32> %2221, i32 %2219, i32 1
%2223 = insertelement <4 x i32> %2222, i32 %2220, i32 2
%2224 = bitcast <8 x i32> %51 to <32 x i8>
%2225 = bitcast <4 x i32> %53 to <16 x i8>
%2226 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2223, <32 x i8> %2224, <16 x i8> %2225, i32 2)
%2227 = extractelement <4 x float> %2226, i32 1
%2228 = extractelement <4 x float> %2226, i32 3
%2229 = fcmp oeq float %temp22.0, 0.000000e+00
%2230 = select i1 %2229, float 1.000000e+00, float 0.000000e+00
%2231 = fmul float %2227, %2230
%2232 = fmul float %2228, %2230
%2233 = fmul float %2214, %2217
%2234 = fadd float %2233, %2231
%2235 = fmul float %2215, %2217
%2236 = fadd float %2235, %2232
%2237 = fmul float %2201, %2204
%2238 = fadd float %2237, %2234
%2239 = fmul float %2202, %2204
%2240 = fadd float %2239, %2236
%2241 = fmul float %2188, %2191
%2242 = fadd float %2241, %2238
%2243 = fmul float %2189, %2191
%2244 = fadd float %2243, %2240
%2245 = fmul float %2175, %2178
%2246 = fadd float %2245, %2242
%2247 = fmul float %2176, %2178
%2248 = fadd float %2247, %2244
%2249 = fmul float %2248, 2.000000e+00
%2250 = fadd float %2249, -1.000000e+00
%2251 = fmul float %2246, 2.000000e+00
%2252 = fadd float %2251, -1.000000e+00
%2253 = fmul float %2250, %2250
%2254 = fmul float %2252, %2252
%2255 = fadd float %2253, %2254
%2256 = call float @llvm.AMDIL.clamp.(float %2255, float 0.000000e+00, float 1.000000e+00)
%2257 = fcmp une float %33, %temp32.0
%.sink233 = select i1 %2257, float %36, float %35
%temp56.6 = select i1 %2257, float 1.953125e-03, float 3.906250e-03
%2258 = fdiv float 1.000000e+00, %.sink233
%2259 = fmul float %97, %2258
%2260 = fmul float %98, %2258
%2261 = call float @llvm.floor.f32(float %2259)
%2262 = fsub float %2259, %2261
%2263 = call float @llvm.floor.f32(float %2260)
%2264 = fsub float %2260, %2263
%2265 = fmul float %37, 2.000000e+00
%2266 = fmul float %2265, %temp56.6
%2267 = fsub float 1.000000e+00, %2266
%2268 = fmul float %temp56.6, %37
%2269 = fmul float %2262, %2267
%2270 = fadd float %2269, %2268
%2271 = fmul float %2264, %2267
%2272 = fadd float %2271, %2268
%2273 = fmul float %2270, %temp32.0
%2274 = fadd float %2273, %temp20.0
%2275 = fmul float %2272, %temp32.0
%2276 = fadd float %2275, %temp21.0
%2277 = bitcast float %2274 to i32
%2278 = bitcast float %2276 to i32
%2279 = bitcast float %225 to i32
%2280 = insertelement <4 x i32> undef, i32 %2277, i32 0
%2281 = insertelement <4 x i32> %2280, i32 %2278, i32 1
%2282 = insertelement <4 x i32> %2281, i32 %2279, i32 2
%2283 = bitcast <8 x i32> %83 to <32 x i8>
%2284 = bitcast <4 x i32> %85 to <16 x i8>
%2285 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2282, <32 x i8> %2283, <16 x i8> %2284, i32 2)
%2286 = extractelement <4 x float> %2285, i32 1
%2287 = extractelement <4 x float> %2285, i32 3
%2288 = fcmp oeq float %temp22.0, 4.000000e+00
%2289 = select i1 %2288, float 1.000000e+00, float 0.000000e+00
%2290 = bitcast float %2274 to i32
%2291 = bitcast float %2276 to i32
%2292 = bitcast float %225 to i32
%2293 = insertelement <4 x i32> undef, i32 %2290, i32 0
%2294 = insertelement <4 x i32> %2293, i32 %2291, i32 1
%2295 = insertelement <4 x i32> %2294, i32 %2292, i32 2
%2296 = bitcast <8 x i32> %75 to <32 x i8>
%2297 = bitcast <4 x i32> %77 to <16 x i8>
%2298 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2295, <32 x i8> %2296, <16 x i8> %2297, i32 2)
%2299 = extractelement <4 x float> %2298, i32 1
%2300 = extractelement <4 x float> %2298, i32 3
%2301 = fcmp oeq float %temp22.0, 3.000000e+00
%2302 = select i1 %2301, float 1.000000e+00, float 0.000000e+00
%2303 = bitcast float %2274 to i32
%2304 = bitcast float %2276 to i32
%2305 = bitcast float %225 to i32
%2306 = insertelement <4 x i32> undef, i32 %2303, i32 0
%2307 = insertelement <4 x i32> %2306, i32 %2304, i32 1
%2308 = insertelement <4 x i32> %2307, i32 %2305, i32 2
%2309 = bitcast <8 x i32> %67 to <32 x i8>
%2310 = bitcast <4 x i32> %69 to <16 x i8>
%2311 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2308, <32 x i8> %2309, <16 x i8> %2310, i32 2)
%2312 = extractelement <4 x float> %2311, i32 1
%2313 = extractelement <4 x float> %2311, i32 3
%2314 = fcmp oeq float %temp22.0, 2.000000e+00
%2315 = select i1 %2314, float 1.000000e+00, float 0.000000e+00
%2316 = bitcast float %2274 to i32
%2317 = bitcast float %2276 to i32
%2318 = bitcast float %225 to i32
%2319 = insertelement <4 x i32> undef, i32 %2316, i32 0
%2320 = insertelement <4 x i32> %2319, i32 %2317, i32 1
%2321 = insertelement <4 x i32> %2320, i32 %2318, i32 2
%2322 = bitcast <8 x i32> %59 to <32 x i8>
%2323 = bitcast <4 x i32> %61 to <16 x i8>
%2324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2321, <32 x i8> %2322, <16 x i8> %2323, i32 2)
%2325 = extractelement <4 x float> %2324, i32 1
%2326 = extractelement <4 x float> %2324, i32 3
%2327 = fcmp oeq float %temp22.0, 1.000000e+00
%2328 = select i1 %2327, float 1.000000e+00, float 0.000000e+00
%2329 = bitcast float %2274 to i32
%2330 = bitcast float %2276 to i32
%2331 = bitcast float %225 to i32
%2332 = insertelement <4 x i32> undef, i32 %2329, i32 0
%2333 = insertelement <4 x i32> %2332, i32 %2330, i32 1
%2334 = insertelement <4 x i32> %2333, i32 %2331, i32 2
%2335 = bitcast <8 x i32> %51 to <32 x i8>
%2336 = bitcast <4 x i32> %53 to <16 x i8>
%2337 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2334, <32 x i8> %2335, <16 x i8> %2336, i32 2)
%2338 = extractelement <4 x float> %2337, i32 1
%2339 = extractelement <4 x float> %2337, i32 3
%2340 = fcmp oeq float %temp22.0, 0.000000e+00
%2341 = select i1 %2340, float 1.000000e+00, float 0.000000e+00
%2342 = fmul float %2338, %2341
%2343 = fmul float %2339, %2341
%2344 = fmul float %2325, %2328
%2345 = fadd float %2344, %2342
%2346 = fmul float %2326, %2328
%2347 = fadd float %2346, %2343
%2348 = fmul float %2312, %2315
%2349 = fadd float %2348, %2345
%2350 = fmul float %2313, %2315
%2351 = fadd float %2350, %2347
%2352 = fmul float %2299, %2302
%2353 = fadd float %2352, %2349
%2354 = fmul float %2300, %2302
%2355 = fadd float %2354, %2351
%2356 = fmul float %2286, %2289
%2357 = fadd float %2356, %2353
%2358 = fmul float %2287, %2289
%2359 = fadd float %2358, %2355
%2360 = fmul float %2359, 2.000000e+00
%2361 = fadd float %2360, -1.000000e+00
%2362 = fmul float %2357, 2.000000e+00
%2363 = fadd float %2362, -1.000000e+00
%2364 = fmul float %2361, %2361
%2365 = fmul float %2363, %2363
%2366 = fadd float %2364, %2365
%2367 = call float @llvm.AMDIL.clamp.(float %2366, float 0.000000e+00, float 1.000000e+00)
%2368 = fmul float %148, 0.000000e+00
%2369 = fmul float %2139, %148
%2370 = fmul float %2141, %148
%2371 = fmul float %2252, %149
%2372 = fadd float %2371, %2368
%2373 = fmul float %149, 0.000000e+00
%2374 = fadd float %2373, %2369
%2375 = fmul float %2250, %149
%2376 = fadd float %2375, %2370
%2377 = fmul float %2361, %150
%2378 = fadd float %2377, %2372
%2379 = fmul float %2363, %150
%2380 = fadd float %2379, %2374
%2381 = fmul float %150, 0.000000e+00
%2382 = fadd float %2381, %2376
%2383 = fmul float %90, %1682
%2384 = fmul float %90, %1684
%2385 = fmul float %90, %1686
%2386 = fmul float %91, %2030
%2387 = fadd float %2386, %2383
%2388 = fmul float %91, %2032
%2389 = fadd float %2388, %2384
%2390 = fmul float %91, %2034
%2391 = fadd float %2390, %2385
%2392 = fmul float %92, %2378
%2393 = fadd float %2392, %2387
%2394 = fmul float %92, %2380
%2395 = fadd float %2394, %2389
%2396 = fmul float %92, %2382
%2397 = fadd float %2396, %2391
%2398 = fmul float %2393, %2393
%2399 = fmul float %2395, %2395
%2400 = fadd float %2398, %2399
%2401 = fmul float %2397, %2397
%2402 = fadd float %2400, %2401
%2403 = fadd float %2402, 1.000000e+00
%2404 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2403)
%2405 = fmul float %2393, %2404
%2406 = fmul float %2395, %2404
%2407 = fmul float %2397, %2404
%2408 = fmul float %2405, %89
%2409 = fmul float %2406, %89
%2410 = fmul float %2407, %89
%2411 = fsub float %94, %2408
%2412 = fsub float %95, %2409
%2413 = fsub float %96, %2410
%2414 = fmul float %2411, %2411
%2415 = fmul float %2412, %2412
%2416 = fadd float %2415, %2414
%2417 = fmul float %2413, %2413
%2418 = fadd float %2416, %2417
%2419 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2418)
%2420 = fmul float %2411, %2419
%2421 = fmul float %2412, %2419
%2422 = fmul float %2413, %2419
%2423 = fmul float %1334, %100
%2424 = fmul float %1336, %101
%2425 = fmul float %1338, %102
%2426 = fadd float %109, %121
%2427 = fadd float %110, %122
%2428 = fadd float %111, %123
%2429 = fmul float %2426, %2426
%2430 = fmul float %2427, %2427
%2431 = fadd float %2430, %2429
%2432 = fmul float %2428, %2428
%2433 = fadd float %2431, %2432
%2434 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2433)
%2435 = fmul float %2426, %2434
%2436 = fmul float %2427, %2434
%2437 = fmul float %2428, %2434
%2438 = fmul float %2420, %2435
%2439 = fmul float %2421, %2436
%2440 = fadd float %2439, %2438
%2441 = fmul float %2422, %2437
%2442 = fadd float %2440, %2441
%2443 = call float @llvm.maxnum.f32(float %2442, float 0x3F1A36E2E0000000)
%2444 = fmul float %93, 3.200000e+01
%2445 = call float @llvm.pow.f32(float %2443, float %2444)
%2446 = call float @llvm.AMDIL.clamp.(float %2445, float 0.000000e+00, float 1.000000e+00)
%2447 = fmul float %2446, 2.000000e+00
%2448 = fsub float 3.000000e+00, %2447
%2449 = fmul float %2446, %2448
%2450 = fmul float %2446, %2449
%2451 = fmul float %2450, %93
%2452 = fmul float %1334, %30
%2453 = fmul float %1336, %31
%2454 = fmul float %1338, %32
%2455 = fmul float %2420, %109
%2456 = fmul float %2421, %110
%2457 = fadd float %2456, %2455
%2458 = fmul float %2422, %111
%2459 = fadd float %2457, %2458
%2460 = call float @llvm.AMDIL.clamp.(float %2459, float 0.000000e+00, float 1.000000e+00)
%2461 = fmul float %43, 2.000000e+00
%2462 = fmul float %44, 2.000000e+00
%2463 = fmul float %45, 2.000000e+00
%2464 = call float @llvm.maxnum.f32(float %2461, float %40)
%2465 = call float @llvm.maxnum.f32(float %2462, float %41)
%2466 = call float @llvm.maxnum.f32(float %2463, float %42)
%2467 = call float @llvm.minnum.f32(float %2464, float 1.000000e+00)
%2468 = call float @llvm.minnum.f32(float %2465, float 1.000000e+00)
%2469 = call float @llvm.minnum.f32(float %2466, float 1.000000e+00)
%2470 = fmul float %2467, %1334
%2471 = fmul float %2468, %1336
%2472 = fmul float %2469, %1338
%2473 = fmul float %2452, %2460
%2474 = fadd float %2473, %2470
%2475 = fmul float %2453, %2460
%2476 = fadd float %2475, %2471
%2477 = fmul float %2454, %2460
%2478 = fadd float %2477, %2472
%2479 = fmul float %30, %2451
%2480 = fadd float %2479, %2474
%2481 = fmul float %31, %2451
%2482 = fadd float %2481, %2476
%2483 = fmul float %32, %2451
%2484 = fadd float %2483, %2478
%2485 = fmul float %2480, 5.000000e-01
%2486 = fmul float %2482, 5.000000e-01
%2487 = fmul float %2484, 5.000000e-01
%2488 = fadd float %2423, %2485
%2489 = fadd float %2424, %2486
%2490 = fadd float %2425, %2487
%2491 = call i32 @llvm.SI.packf16(float %2488, float %2489)
%2492 = bitcast i32 %2491 to float
%2493 = call i32 @llvm.SI.packf16(float %2490, float 1.000000e+00)
%2494 = bitcast i32 %2493 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2492, float %2494, float %2492, float %2494)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_writelane_b32 v254, s10, 52 ; 05FD680A
s_mov_b64 s[100:101], s[4:5] ; BEE40404
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000
v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001
v_interp_p1_f32 v20, v0, 1, 0, [m0] ; C8500100
v_interp_p2_f32 v20, [v20], v1, 1, 0, [m0] ; C8510101
v_interp_p1_f32 v21, v0, 2, 0, [m0] ; C8540200
v_interp_p2_f32 v21, [v21], v1, 2, 0, [m0] ; C8550201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v10, v0, 0, 1, [m0] ; C8280400
v_interp_p2_f32 v10, [v10], v1, 0, 1, [m0] ; C8290401
v_interp_p1_f32 v11, v0, 1, 1, [m0] ; C82C0500
v_interp_p2_f32 v11, [v11], v1, 1, 1, [m0] ; C82D0501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700
v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701
v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800
v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801
v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900
v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00
v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01
v_interp_p1_f32 v33, v0, 3, 2, [m0] ; C8840B00
v_interp_p2_f32 v33, [v33], v1, 3, 2, [m0] ; C8850B01
v_interp_p1_f32 v37, v0, 0, 3, [m0] ; C8940C00
v_interp_p2_f32 v37, [v37], v1, 0, 3, [m0] ; C8950C01
v_interp_p1_f32 v27, v0, 1, 3, [m0] ; C86C0D00
v_interp_p2_f32 v27, [v27], v1, 1, 3, [m0] ; C86D0D01
v_interp_p1_f32 v3, v0, 2, 3, [m0] ; C80C0E00
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s1, s[8:11], 0x4 ; C2008904
s_buffer_load_dword s0, s[8:11], 0x5 ; C2000905
v_interp_p2_f32 v3, [v3], v1, 2, 3, [m0] ; C80D0E01
v_interp_p1_f32 v4, v0, 3, 3, [m0] ; C8100F00
s_buffer_load_dword s38, s[8:11], 0x6 ; C2130906
v_interp_p2_f32 v4, [v4], v1, 3, 3, [m0] ; C8110F01
v_interp_p1_f32 v0, v0, 0, 4, [m0] ; C8001000
s_buffer_load_dword s40, s[8:11], 0x0 ; C2140900
s_buffer_load_dword s39, s[8:11], 0x1 ; C2138901
v_interp_p2_f32 v0, [v0], v1, 0, 4, [m0] ; C8011001
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e64 v1, s1, s1 ; D2100001 00000201
v_mac_f32_e64 v1, s0, s0 ; D23E0001 00000000
s_buffer_load_dword s2, s[8:11], 0x34 ; C2010934
s_buffer_load_dword s3, s[8:11], 0x38 ; C2018938
v_mac_f32_e64 v1, s38, s38 ; D23E0001 00004C26
s_buffer_load_dword s41, s[8:11], 0x2 ; C2148902
v_rsq_clamp_f32_e32 v15, v1 ; 7E1E5901
v_sub_f32_e32 v16, s40, v33 ; 08204228
v_sub_f32_e32 v17, s39, v37 ; 08224A27
v_mul_f32_e32 v1, v16, v16 ; 10022110
v_mac_f32_e32 v1, v17, v17 ; 3E022311
v_add_f32_e32 v12, 0.5, v12 ; 061818F0
v_floor_f32_e32 v18, v12 ; 7E24490C
v_mov_b32_e32 v12, 0x42800000 ; 7E1802FF 42800000
v_cmp_le_f32_e32 vcc, v12, v18 ; 7C06250C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v19, s41, v27 ; 08263629
v_mac_f32_e32 v1, v19, v19 ; 3E022713
v_rsq_clamp_f32_e32 v23, v1 ; 7E2E5901
v_mov_b32_e32 v13, 0x7fffffff ; 7E1A02FF 7FFFFFFF
v_and_b32_e32 v1, v7, v13 ; 36021B07
s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000
s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000
s_mov_b32 s14, -1 ; BE8E03C1
s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000
v_readlane_b32 s12, v254, 52 ; 021969FE
s_nop 2 ; BF800002
buffer_store_dword v1, s[12:15], s12 ; E0700000 0C030100
v_and_b32_e32 v12, v8, v13 ; 36181B08
v_and_b32_e32 v13, v9, v13 ; 361A1B09
v_mul_f32_e64 v14, |v7|, |v7| ; D210030E 00020F07
v_mad_f32 v14, |v8|, |v8|, v14 ; D282030E 043A1108
v_mad_f32 v14, |v9|, |v9|, v14 ; D282030E 043A1309
v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E
v_add_f32_e32 v20, 0.5, v20 ; 062828F0
v_add_f32_e32 v21, 0.5, v21 ; 062A2AF0
v_floor_f32_e32 v24, v20 ; 7E304914
v_floor_f32_e32 v20, v21 ; 7E284915
v_mov_b32_e32 v30, s3 ; 7E3C0203
v_mul_f32_e32 v25, s2, v18 ; 10322402
v_floor_f32_e32 v22, v25 ; 7E2C4919
s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700
s_and_saveexec_b64 s[12:13], vcc ; BE8C246A
s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E
v_mov_b32_e32 v21, 0xc2800000 ; 7E2A02FF C2800000
v_add_f32_e32 v18, v18, v21 ; 06242B12
v_mul_f32_e32 v21, s3, v18 ; 102A2403
v_floor_f32_e32 v21, v21 ; 7E2A4915
v_mul_f32_e32 v26, s3, v21 ; 10342A03
v_mad_f32 v31, v18, s3, -v21 ; D282001F 84540712
v_floor_f32_e32 v18, v26 ; 7E24491A
v_mad_f32 v32, v21, s3, -v18 ; D2820020 84480715
v_add_f32_e32 v21, 4.0, v18 ; 062A24F6
s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C
v_mov_b32_e32 v18, s2 ; 7E240202
v_mov_b32_e32 v41, v30 ; 7E52031E
s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E
v_mul_f32_e32 v21, s2, v22 ; 102A2C02
v_floor_f32_e32 v26, v25 ; 7E344919
v_subrev_f32_e32 v31, v26, v25 ; 0A3E331A
v_floor_f32_e32 v21, v21 ; 7E2A4915
v_mad_f32 v32, v22, s2, -v21 ; D2820020 84540516
v_mov_b32_e32 v41, v18 ; 7E520312
s_or_b64 exec, exec, s[12:13] ; 88FE0C7E
v_mul_f32_e32 v26, s2, v24 ; 10343002
v_floor_f32_e32 v25, v26 ; 7E32491A
v_mov_b32_e32 v22, 0x42800000 ; 7E2C02FF 42800000
v_cmp_le_f32_e32 vcc, v22, v24 ; 7C063116
s_and_saveexec_b64 s[12:13], vcc ; BE8C246A
s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E
v_mov_b32_e32 v22, 0xc2800000 ; 7E2C02FF C2800000
v_add_f32_e32 v22, v24, v22 ; 062C2D18
v_mul_f32_e32 v24, s3, v22 ; 10302C03
v_floor_f32_e32 v24, v24 ; 7E304918
v_mul_f32_e32 v29, s3, v24 ; 103A3003
v_mad_f32 v28, v22, s3, -v24 ; D282001C 84600716
v_floor_f32_e32 v22, v29 ; 7E2C491D
v_mad_f32 v29, v24, s3, -v22 ; D282001D 84580718
v_add_f32_e32 v22, 4.0, v22 ; 062C2CF6
s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C
v_mov_b32_e32 v38, v30 ; 7E4C031E
s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E
v_mul_f32_e32 v22, s2, v25 ; 102C3202
v_floor_f32_e32 v24, v26 ; 7E30491A
v_subrev_f32_e32 v28, v24, v26 ; 0A383518
v_floor_f32_e32 v22, v22 ; 7E2C4916
v_mad_f32 v29, v25, s2, -v22 ; D282001D 84580519
v_mov_b32_e32 v38, v18 ; 7E4C0312
s_or_b64 exec, exec, s[12:13] ; 88FE0C7E
s_buffer_load_dword s15, s[8:11], 0x3c ; C207893C
s_buffer_load_dword s14, s[8:11], 0x40 ; C2070940
v_mul_f32_e32 v35, s2, v20 ; 10462802
v_floor_f32_e32 v34, v35 ; 7E444923
v_mov_b32_e32 v24, 0x42800000 ; 7E3002FF 42800000
v_cmp_le_f32_e32 vcc, v24, v20 ; 7C062918
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[12:13], vcc ; BE8C246A
s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E
v_mov_b32_e32 v24, 0xc2800000 ; 7E3002FF C2800000
v_add_f32_e32 v20, v20, v24 ; 06283114
v_mul_f32_e32 v24, s3, v20 ; 10302803
v_floor_f32_e32 v24, v24 ; 7E304918
v_mul_f32_e32 v26, s3, v24 ; 10343003
v_mad_f32 v25, v20, s3, -v24 ; D2820019 84600714
v_floor_f32_e32 v20, v26 ; 7E28491A
v_mad_f32 v26, v24, s3, -v20 ; D282001A 84500718
v_add_f32_e32 v24, 4.0, v20 ; 063028F6
s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C
v_mov_b32_e32 v39, s15 ; 7E4E020F
s_buffer_load_dword s4, s[8:11], 0x8 ; C2020908
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 38 ; 05FD4C04
s_buffer_load_dword s4, s[8:11], 0x9 ; C2020909
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 39 ; 05FD4E04
s_buffer_load_dword s4, s[8:11], 0xa ; C202090A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 1 ; 05FD0204
s_buffer_load_dword s37, s[8:11], 0x44 ; C2128944
s_buffer_load_dword s44, s[8:11], 0x48 ; C2160948
s_buffer_load_dword s42, s[8:11], 0x4c ; C215094C
s_buffer_load_dword s4, s[8:11], 0x50 ; C2020950
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 4 ; 05FD0804
s_buffer_load_dword s4, s[8:11], 0x51 ; C2020951
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 3 ; 05FD0604
s_buffer_load_dword s4, s[8:11], 0x52 ; C2020952
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 2 ; 05FD0404
s_buffer_load_dword s4, s[8:11], 0x54 ; C2020954
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 7 ; 05FD0E04
s_buffer_load_dword s4, s[8:11], 0x55 ; C2020955
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 6 ; 05FD0C04
s_buffer_load_dword s4, s[8:11], 0x56 ; C2020956
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 5 ; 05FD0A04
v_mov_b32_e32 v40, s14 ; 7E50020E
s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E
v_mul_f32_e32 v20, s2, v34 ; 10284402
v_floor_f32_e32 v24, v35 ; 7E304923
v_subrev_f32_e32 v25, v24, v35 ; 0A324718
v_floor_f32_e32 v24, v20 ; 7E304914
v_mad_f32 v26, v34, s2, -v24 ; D282001A 84600522
v_mov_b32_e32 v30, v18 ; 7E3C0312
s_or_b64 exec, exec, s[12:13] ; 88FE0C7E
v_mul_f32_e32 v20, v23, v16 ; 10282117
v_mul_f32_e32 v18, v23, v17 ; 10242317
v_mul_f32_e32 v19, v23, v19 ; 10262717
v_mac_f32_e32 v20, s1, v15 ; 3E281E01
v_mac_f32_e32 v18, s0, v15 ; 3E241E00
v_mac_f32_e32 v19, s38, v15 ; 3E261E26
v_cmp_neq_f32_e64 s[26:27], s2, v41 ; D01A001A 00025202
v_cmp_eq_f32_e32 vcc, 4.0, v21 ; 7C042AF6
v_cmp_eq_f32_e64 s[4:5], 2.0, v21 ; D0040004 00022AF4
v_writelane_b32 v254, s4, 22 ; 05FD2C04
v_writelane_b32 v254, s5, 23 ; 05FD2E05
v_cmp_eq_f32_e64 s[4:5], 1.0, v21 ; D0040004 00022AF2
v_writelane_b32 v254, s4, 18 ; 05FD2404
v_writelane_b32 v254, s5, 19 ; 05FD2605
v_cmp_eq_f32_e64 s[4:5], 0, v21 ; D0040004 00022A80
v_writelane_b32 v254, s4, 20 ; 05FD2804
v_writelane_b32 v254, s5, 21 ; 05FD2A05
v_cmp_neq_f32_e64 s[24:25], s2, v38 ; D01A0018 00024C02
v_cmp_neq_f32_e64 s[22:23], s2, v30 ; D01A0016 00023C02
v_cmp_eq_f32_e64 s[2:3], 4.0, v22 ; D0040002 00022CF6
v_writelane_b32 v254, s2, 12 ; 05FD1802
v_writelane_b32 v254, s3, 13 ; 05FD1A03
v_cmp_eq_f32_e64 s[2:3], 2.0, v22 ; D0040002 00022CF4
v_writelane_b32 v254, s2, 14 ; 05FD1C02
v_writelane_b32 v254, s3, 15 ; 05FD1E03
v_cmp_eq_f32_e64 s[2:3], 1.0, v22 ; D0040002 00022CF2
v_writelane_b32 v254, s2, 16 ; 05FD2002
v_writelane_b32 v254, s3, 17 ; 05FD2203
v_cmp_eq_f32_e64 s[2:3], 0, v22 ; D0040002 00022C80
v_writelane_b32 v254, s2, 24 ; 05FD3002
v_writelane_b32 v254, s3, 25 ; 05FD3203
v_mul_f32_e32 v17, s1, v15 ; 10221E01
v_mul_f32_e32 v16, s0, v15 ; 10201E00
v_mul_f32_e32 v15, s38, v15 ; 101E1E26
v_cmp_eq_f32_e64 s[0:1], 4.0, v24 ; D0040000 000230F6
v_writelane_b32 v254, s0, 8 ; 05FD1000
v_writelane_b32 v254, s1, 9 ; 05FD1201
v_subrev_f32_e32 v23, s40, v33 ; 0A2E4228
v_mul_f32_e32 v34, v23, v23 ; 10442F17
v_cmp_eq_f32_e64 s[0:1], 2.0, v24 ; D0040000 000230F4
v_writelane_b32 v254, s0, 10 ; 05FD1400
v_writelane_b32 v254, s1, 11 ; 05FD1601
v_subrev_f32_e32 v23, s39, v37 ; 0A2E4A27
v_mac_f32_e32 v34, v23, v23 ; 3E442F17
v_mov_b32_e32 v23, 0xbe4ccccd ; 7E2E02FF BE4CCCCD
v_subrev_f32_e32 v35, s41, v27 ; 0A463629
v_mac_f32_e32 v34, v35, v35 ; 3E444723
v_mul_f32_e32 v34, s42, v34 ; 1044442A
v_log_f32_e32 v34, v34 ; 7E444F22
v_cndmask_b32_e64 v35, v39, v40, s[26:27] ; D2000023 006A5127
v_rcp_f32_e32 v35, v35 ; 7E465523
v_mov_b32_e32 v46, 0x3b000000 ; 7E5C02FF 3B000000
v_mul_f32_e32 v34, 0x3f317218, v34 ; 104444FF 3F317218
v_mov_b32_e32 v47, 0x3b800000 ; 7E5E02FF 3B800000
v_cndmask_b32_e64 v36, v47, v46, s[26:27] ; D2000024 006A5D2F
s_load_dwordx4 s[96:99], s[100:101], 0x20 ; C0B06520
v_mul_f32_e32 v42, v35, v33 ; 10544323
v_floor_f32_e32 v42, v42 ; 7E54492A
v_mad_f32 v42, v33, v35, -v42 ; D282002A 84AA4721
v_mul_f32_e32 v43, v35, v37 ; 10564B23
v_floor_f32_e32 v43, v43 ; 7E56492B
v_mad_f32 v43, v37, v35, -v43 ; D282002B 84AE4725
v_add_f32_e64 v48, s37, s37 ; D2060030 00004A25
v_mad_f32 v44, -v48, v36, 1.0 ; D282002C 23CA4930
v_mul_f32_e32 v45, s37, v36 ; 105A4825
v_mad_f32 v49, v44, v42, v45 ; D2820031 04B6552C
v_mad_f32 v42, v44, v43, v45 ; D282002A 04B6572C
v_mul_f32_e32 v36, v35, v27 ; 10483723
v_floor_f32_e32 v36, v36 ; 7E484924
v_mad_f32 v35, v27, v35, -v36 ; D2820023 8492471B
v_mac_f32_e32 v45, v44, v35 ; 3E5A472C
v_cndmask_b32_e64 v35, v39, v40, s[24:25] ; D2000023 00625127
v_rcp_f32_e32 v50, v35 ; 7E645523
v_mul_f32_e32 v36, s44, v34 ; 1048442C
v_mad_f32 v34, v41, v49, v31 ; D2820022 047E6329
v_mad_f32 v35, v41, v42, v32 ; D2820023 04825529
v_mac_f32_e32 v31, v41, v45 ; 3E3E5B29
v_mul_f32_e32 v42, v50, v33 ; 10544332
v_floor_f32_e32 v42, v42 ; 7E54492A
v_mad_f32 v51, v33, v50, -v42 ; D2820033 84AA6521
v_mov_b32_e32 v42, v31 ; 7E54031F
v_mov_b32_e32 v43, v32 ; 7E560320
v_mov_b32_e32 v44, v33 ; 7E580321
v_mov_b32_e32 v45, v34 ; 7E5A0322
v_mac_f32_e32 v32, v41, v49 ; 3E406329
v_mul_f32_e32 v41, v50, v37 ; 10524B32
v_floor_f32_e32 v41, v41 ; 7E524929
v_mad_f32 v41, v37, v50, -v41 ; D2820029 84A66525
v_mul_f32_e32 v43, v50, v27 ; 10563732
v_floor_f32_e32 v43, v43 ; 7E56492B
v_mad_f32 v43, v27, v50, -v43 ; D282002B 84AE651B
v_cndmask_b32_e64 v39, v39, v40, s[22:23] ; D2000027 005A5127
v_cndmask_b32_e64 v40, v47, v46, s[24:25] ; D2000028 00625D2F
v_mad_f32 v44, -v48, v40, 1.0 ; D282002C 23CA5130
v_mul_f32_e32 v40, s37, v40 ; 10505025
v_rcp_f32_e32 v39, v39 ; 7E4E5527
v_mad_f32 v49, v44, v51, v40 ; D2820031 04A2672C
v_mad_f32 v41, v44, v41, v40 ; D2820029 04A2532C
v_mac_f32_e32 v40, v44, v43 ; 3E50572C
v_mul_f32_e32 v43, v39, v33 ; 10564327
v_floor_f32_e32 v43, v43 ; 7E56492B
v_mad_f32 v33, v33, v39, -v43 ; D2820021 84AE4F21
v_mul_f32_e32 v43, v39, v37 ; 10564B27
v_floor_f32_e32 v43, v43 ; 7E56492B
v_mad_f32 v37, v37, v39, -v43 ; D2820025 84AE4F25
v_mul_f32_e32 v43, v39, v27 ; 10563727
v_floor_f32_e32 v43, v43 ; 7E56492B
v_mad_f32 v27, v27, v39, -v43 ; D282001B 84AE4F1B
v_cndmask_b32_e64 v39, v47, v46, s[22:23] ; D2000027 005A5D2F
v_mad_f32 v43, -v48, v39, 1.0 ; D282002B 23CA4F30
v_mul_f32_e32 v44, s37, v39 ; 10584E25
v_mad_f32 v33, v43, v33, v44 ; D2820021 04B2432B
v_mad_f32 v37, v43, v37, v44 ; D2820025 04B24B2B
v_mac_f32_e32 v44, v43, v27 ; 3E58372B
v_mad_f32 v50, v38, v49, v28 ; D2820032 04726326
v_mad_f32 v51, v38, v41, v29 ; D2820033 04765326
v_mac_f32_e32 v28, v38, v40 ; 3E385126
v_mad_f32 v39, v30, v33, v25 ; D2820027 0466431E
v_mad_f32 v40, v30, v37, v26 ; D2820028 046A4B1E
v_mac_f32_e32 v25, v30, v44 ; 3E32591E
v_mov_b32_e32 v53, v28 ; 7E6A031C
v_mov_b32_e32 v54, v29 ; 7E6C031D
v_mov_b32_e32 v55, v30 ; 7E6E031E
v_mov_b32_e32 v56, v31 ; 7E70031F
v_mac_f32_e32 v29, v38, v49 ; 3E3A6326
v_mov_b32_e32 v46, v25 ; 7E5C0319
v_mov_b32_e32 v47, v26 ; 7E5E031A
v_mov_b32_e32 v48, v27 ; 7E60031B
v_mov_b32_e32 v49, v28 ; 7E62031C
v_mac_f32_e32 v26, v30, v33 ; 3E34431E
v_mov_b32_e32 v43, v35 ; 7E560323
v_mov_b32_e32 v52, v36 ; 7E680324
v_mov_b32_e32 v54, v51 ; 7E6C0333
v_mov_b32_e32 v41, v36 ; 7E520324
v_mov_b32_e32 v47, v40 ; 7E5E0328
s_load_dwordx8 s[64:71], s[6:7], 0x40 ; C0E00740
v_mov_b32_e32 v44, v36 ; 7E580324
v_mov_b32_e32 v33, v36 ; 7E420324
v_mov_b32_e32 v55, v36 ; 7E6E0324
v_mov_b32_e32 v30, v36 ; 7E3C0324
v_mov_b32_e32 v48, v36 ; 7E600324
v_mov_b32_e32 v27, v36 ; 7E360324
s_load_dwordx4 s[44:47], s[100:101], 0x8 ; C0966508
s_load_dwordx4 s[28:31], s[100:101], 0x18 ; C08E6518
s_load_dwordx8 s[12:19], s[6:7], 0x20 ; C0C60720
s_load_dwordx8 s[56:63], s[6:7], 0x30 ; C0DC0730
s_load_dwordx4 s[8:11], s[100:101], 0x10 ; C0846510
s_load_dwordx4 s[48:51], s[100:101], 0x24 ; C0986524
s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710
s_load_dwordx8 s[80:87], s[6:7], 0x48 ; C0E80748
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[57:59], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[64:71], s[96:99] ; F0900700 03103922
s_load_dwordx4 s[52:55], s[100:101], 0x0 ; C09A6500
s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700
s_load_dwordx4 s[40:43], s[100:101], 0x1c ; C094651C
s_load_dwordx8 s[72:79], s[6:7], 0x38 ; C0E40738
image_sample_l v[60:62], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[56:63], s[28:31] ; F0900700 00EE3C22
image_sample_l v[63:65], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[12:19], s[8:11] ; F0900700 00433F22
s_load_dwordx4 s[0:3], s[100:101], 0xc ; C080650C
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v254, s0, 40 ; 05FD5000
v_writelane_b32 v254, s1, 41 ; 05FD5201
v_writelane_b32 v254, s2, 42 ; 05FD5402
v_writelane_b32 v254, s3, 43 ; 05FD5603
s_load_dwordx8 s[88:95], s[6:7], 0x18 ; C0EC0718
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s88, 44 ; 05FD5858
v_writelane_b32 v254, s89, 45 ; 05FD5A59
v_writelane_b32 v254, s90, 46 ; 05FD5C5A
v_writelane_b32 v254, s91, 47 ; 05FD5E5B
v_writelane_b32 v254, s92, 48 ; 05FD605C
v_writelane_b32 v254, s93, 49 ; 05FD625D
v_writelane_b32 v254, s94, 50 ; 05FD645E
v_writelane_b32 v254, s95, 51 ; 05FD665F
image_sample_l v[66:68], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[20:27], s[44:47] ; F0900700 01654222
s_load_dwordx4 s[0:3], s[100:101], 0x4 ; C0806504
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v254, s0, 26 ; 05FD3400
v_writelane_b32 v254, s1, 27 ; 05FD3601
v_writelane_b32 v254, s2, 28 ; 05FD3802
v_writelane_b32 v254, s3, 29 ; 05FD3A03
s_load_dwordx8 s[88:95], s[6:7], 0x8 ; C0EC0708
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s88, 30 ; 05FD3C58
v_writelane_b32 v254, s89, 31 ; 05FD3E59
v_writelane_b32 v254, s90, 32 ; 05FD405A
v_writelane_b32 v254, s91, 33 ; 05FD425B
v_writelane_b32 v254, s92, 34 ; 05FD445C
v_writelane_b32 v254, s93, 35 ; 05FD465D
v_writelane_b32 v254, s94, 36 ; 05FD485E
v_writelane_b32 v254, s95, 37 ; 05FD4A5F
image_sample_l v[69:71], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[32:39], s[52:55] ; F0900700 01A84522
image_sample_l v[72:74], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[64:71], s[96:99] ; F0900700 0310482A
image_sample_l v[75:77], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[56:63], s[28:31] ; F0900700 00EE4B2A
image_sample_l v[78:80], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[12:19], s[8:11] ; F0900700 00434E2A
image_sample_l v[81:83], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[20:27], s[44:47] ; F0900700 0165512A
image_sample_l v[84:86], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[32:39], s[52:55] ; F0900700 01A8542A
image_sample_l v[87:89], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[64:71], s[96:99] ; F0900700 0310571F
image_sample_l v[90:92], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[56:63], s[28:31] ; F0900700 00EE5A1F
image_sample_l v[93:95], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[12:19], s[8:11] ; F0900700 00435D1F
image_sample_l v[96:98], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[20:27], s[44:47] ; F0900700 0165601F
image_sample_l v[99:101], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[32:39], s[52:55] ; F0900700 01A8631F
image_sample_l v[102:104], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[64:71], s[96:99] ; F0900700 03106632
image_sample_l v[105:107], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[56:63], s[28:31] ; F0900700 00EE6932
image_sample_l v[108:110], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[12:19], s[8:11] ; F0900700 00436C32
image_sample_l v[111:113], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[20:27], s[44:47] ; F0900700 01656F32
image_sample_l v[114:116], 7, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[32:39], s[52:55] ; F0900700 01A87232
image_sample_l v[117:119], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[64:71], s[96:99] ; F0900700 03107535
image_sample_l v[120:122], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[56:63], s[28:31] ; F0900700 00EE7835
image_sample_l v[123:125], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[12:19], s[8:11] ; F0900700 00437B35
image_sample_l v[126:128], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[20:27], s[44:47] ; F0900700 01657E35
image_sample_l v[129:131], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[32:39], s[52:55] ; F0900700 01A88135
image_sample_l v[132:134], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[64:71], s[96:99] ; F0900700 0310841C
image_sample_l v[135:137], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[56:63], s[28:31] ; F0900700 00EE871C
image_sample_l v[138:140], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[12:19], s[8:11] ; F0900700 00438A1C
image_sample_l v[141:143], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[20:27], s[44:47] ; F0900700 01658D1C
image_sample_l v[144:146], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[32:39], s[52:55] ; F0900700 01A8901C
image_sample_l v[147:149], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[64:71], s[96:99] ; F0900700 03109327
image_sample_l v[150:152], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[56:63], s[28:31] ; F0900700 00EE9627
image_sample_l v[153:155], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[12:19], s[8:11] ; F0900700 00439927
image_sample_l v[156:158], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[20:27], s[44:47] ; F0900700 01659C27
image_sample_l v[159:161], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[32:39], s[52:55] ; F0900700 01A89F27
image_sample_l v[162:164], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[64:71], s[96:99] ; F0900700 0310A22E
image_sample_l v[165:167], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[56:63], s[28:31] ; F0900700 00EEA52E
image_sample_l v[168:170], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[12:19], s[8:11] ; F0900700 0043A82E
s_load_dwordx4 s[88:91], s[100:101], 0x14 ; C0AC6514
s_load_dwordx8 s[0:7], s[6:7], 0x28 ; C0C00728
image_sample_l v[171:173], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[20:27], s[44:47] ; F0900700 0165AB2E
image_sample_l v[174:176], 7, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[32:39], s[52:55] ; F0900700 01A8AE2E
image_sample_l v[177:179], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[64:71], s[96:99] ; F0900700 0310B119
image_sample_l v[180:182], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[56:63], s[28:31] ; F0900700 00EEB419
image_sample_l v[183:185], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[8:11] ; F0900700 0043B719
image_sample_l v[186:188], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[20:27], s[44:47] ; F0900700 0165BA19
image_sample_l v[189:191], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[32:39], s[52:55] ; F0900700 01A8BD19
image_sample_l v[37:38], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[80:87], s[48:51] ; F0900A00 0194252A
image_sample_l v[192:193], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[72:79], s[40:43] ; F0900A00 0152C02A
s_waitcnt vmcnt(9) lgkmcnt(0) ; BF8C0079
image_sample_l v[194:195], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[0:7], s[88:91] ; F0900A00 02C0C22A
v_readlane_b32 s8, v254, 40 ; 021151FE
v_readlane_b32 s9, v254, 41 ; 021353FE
v_readlane_b32 s10, v254, 42 ; 021555FE
v_readlane_b32 s11, v254, 43 ; 021757FE
s_nop 2 ; BF800002
v_readlane_b32 s24, v254, 44 ; 023159FE
v_readlane_b32 s25, v254, 45 ; 02335BFE
v_readlane_b32 s26, v254, 46 ; 02355DFE
v_readlane_b32 s27, v254, 47 ; 02375FFE
v_readlane_b32 s28, v254, 48 ; 023961FE
v_readlane_b32 s29, v254, 49 ; 023B63FE
v_readlane_b32 s30, v254, 50 ; 023D65FE
v_readlane_b32 s31, v254, 51 ; 023F67FE
s_nop 2 ; BF800002
image_sample_l v[196:197], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[24:31], s[8:11] ; F0900A00 0046C42A
v_readlane_b32 s92, v254, 26 ; 02B935FE
v_readlane_b32 s93, v254, 27 ; 02BB37FE
v_readlane_b32 s94, v254, 28 ; 02BD39FE
v_readlane_b32 s95, v254, 29 ; 02BF3BFE
s_nop 2 ; BF800002
v_readlane_b32 s16, v254, 30 ; 02213DFE
v_readlane_b32 s17, v254, 31 ; 02233FFE
v_readlane_b32 s18, v254, 32 ; 022541FE
v_readlane_b32 s19, v254, 33 ; 022743FE
v_readlane_b32 s20, v254, 34 ; 022945FE
v_readlane_b32 s21, v254, 35 ; 022B47FE
v_readlane_b32 s22, v254, 36 ; 022D49FE
v_readlane_b32 s23, v254, 37 ; 022F4BFE
s_nop 2 ; BF800002
image_sample_l v[42:43], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[16:23], s[92:95] ; F0900A00 02E42A2A
image_sample_l v[44:45], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[80:87], s[48:51] ; F0900A00 01942C1F
image_sample_l v[198:199], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[72:79], s[40:43] ; F0900A00 0152C61F
image_sample_l v[200:201], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[0:7], s[88:91] ; F0900A00 02C0C81F
image_sample_l v[202:203], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[24:31], s[8:11] ; F0900A00 0046CA1F
s_mov_b32 s12, s8 ; BE8C0308
s_mov_b32 s13, s9 ; BE8D0309
s_mov_b32 s14, s10 ; BE8E030A
s_mov_b32 s15, s11 ; BE8F030B
image_sample_l v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[16:23], s[92:95] ; F0900A00 02E41F1F
s_mov_b32 s8, s92 ; BE88035C
s_mov_b32 s9, s93 ; BE89035D
s_mov_b32 s10, s94 ; BE8A035E
s_mov_b32 s11, s95 ; BE8B035F
s_waitcnt vmcnt(9) ; BF8C0779
image_sample_l v[204:205], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[80:87], s[48:51] ; F0900A00 0194CC22
image_sample_l v[206:207], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[72:79], s[40:43] ; F0900A00 0152CE22
image_sample_l v[208:209], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[0:7], s[88:91] ; F0900A00 02C0D022
image_sample_l v[210:211], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[24:31], s[12:15] ; F0900A00 0066D222
image_sample_l v[33:34], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[16:23], s[8:11] ; F0900A00 00442122
image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[80:87], s[48:51] ; F0900A00 01942335
image_sample_l v[212:213], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[72:79], s[40:43] ; F0900A00 0152D435
image_sample_l v[214:215], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[0:7], s[88:91] ; F0900A00 02C0D635
image_sample_l v[216:217], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[24:31], s[12:15] ; F0900A00 0066D835
image_sample_l v[53:54], 10, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[16:23], s[8:11] ; F0900A00 00443535
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[55:56], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[80:87], s[48:51] ; F0900A00 0194371C
image_sample_l v[218:219], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[72:79], s[40:43] ; F0900A00 0152DA1C
image_sample_l v[220:221], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[0:7], s[88:91] ; F0900A00 02C0DC1C
image_sample_l v[222:223], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[24:31], s[12:15] ; F0900A00 0066DE1C
image_sample_l v[28:29], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[16:23], s[8:11] ; F0900A00 00441C1C
s_waitcnt vmcnt(5) ; BF8C0775
image_sample_l v[224:225], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[80:87], s[48:51] ; F0900A00 0194E032
image_sample_l v[226:227], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[72:79], s[40:43] ; F0900A00 0152E232
image_sample_l v[228:229], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[0:7], s[88:91] ; F0900A00 02C0E432
image_sample_l v[230:231], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[24:31], s[12:15] ; F0900A00 0066E632
image_sample_l v[50:51], 10, 0, 0, 0, 0, 0, 0, 0, v[50:53], s[16:23], s[8:11] ; F0900A00 00443232
image_sample_l v[232:233], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[80:87], s[48:51] ; F0900A00 0194E82E
image_sample_l v[234:235], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[72:79], s[40:43] ; F0900A00 0152EA2E
image_sample_l v[236:237], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[0:7], s[88:91] ; F0900A00 02C0EC2E
image_sample_l v[238:239], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[24:31], s[12:15] ; F0900A00 0066EE2E
image_sample_l v[46:47], 10, 0, 0, 0, 0, 0, 0, 0, v[46:49], s[16:23], s[8:11] ; F0900A00 00442E2E
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[48:49], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[80:87], s[48:51] ; F0900A00 01943019
image_sample_l v[240:241], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[72:79], s[40:43] ; F0900A00 0152F019
image_sample_l v[242:243], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[0:7], s[88:91] ; F0900A00 02C0F219
image_sample_l v[244:245], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[24:31], s[12:15] ; F0900A00 0066F419
image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[16:23], s[8:11] ; F0900A00 00441919
image_sample_l v[246:247], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[80:87], s[48:51] ; F0900A00 0194F627
image_sample_l v[248:249], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[72:79], s[40:43] ; F0900A00 0152F827
image_sample_l v[250:251], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[0:7], s[88:91] ; F0900A00 02C0FA27
v_cndmask_b32_e64 v27, 0, 1.0, vcc ; D200001B 01A9E480
v_mov_b32_e32 v30, 0x40400000 ; 7E3C02FF 40400000
v_cmp_eq_f32_e64 s[4:5], v21, v30 ; D0040004 00023D15
v_cndmask_b32_e64 v21, 0, 1.0, s[4:5] ; D2000015 0011E480
v_readlane_b32 s0, v254, 22 ; 02012DFE
v_readlane_b32 s1, v254, 23 ; 02032FFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v52, 0, 1.0, s[0:1] ; D2000034 0001E480
v_readlane_b32 s0, v254, 18 ; 020125FE
v_readlane_b32 s1, v254, 19 ; 020327FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v252, 0, 1.0, s[0:1] ; D20000FC 0001E480
v_readlane_b32 s0, v254, 20 ; 020129FE
v_readlane_b32 s1, v254, 21 ; 02032BFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v253, 0, 1.0, s[0:1] ; D20000FD 0001E480
v_readlane_b32 s0, v254, 24 ; 020131FE
v_readlane_b32 s1, v254, 25 ; 020333FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v1, 0, 1.0, s[0:1] ; D2000001 0001E480
v_mul_f32_e32 v69, v253, v69 ; 108A8BFD
v_mul_f32_e32 v70, v253, v70 ; 108C8DFD
v_mul_f32_e32 v71, v253, v71 ; 108E8FFD
v_mac_f32_e32 v69, v252, v66 ; 3E8A85FC
v_mac_f32_e32 v70, v252, v67 ; 3E8C87FC
v_mac_f32_e32 v71, v252, v68 ; 3E8E89FC
v_mac_f32_e32 v69, v52, v63 ; 3E8A7F34
v_mac_f32_e32 v70, v52, v64 ; 3E8C8134
v_mac_f32_e32 v71, v52, v65 ; 3E8E8334
v_mac_f32_e32 v69, v21, v60 ; 3E8A7915
v_mac_f32_e32 v70, v21, v61 ; 3E8C7B15
v_mac_f32_e32 v71, v21, v62 ; 3E8E7D15
v_mac_f32_e32 v69, v27, v57 ; 3E8A731B
v_mac_f32_e32 v70, v27, v58 ; 3E8C751B
v_mac_f32_e32 v71, v27, v59 ; 3E8E771B
v_mul_f32_e32 v57, v253, v84 ; 1072A9FD
v_mul_f32_e32 v58, v253, v85 ; 1074ABFD
v_mul_f32_e32 v59, v253, v86 ; 1076ADFD
v_mac_f32_e32 v57, v252, v81 ; 3E72A3FC
v_mac_f32_e32 v58, v252, v82 ; 3E74A5FC
v_mac_f32_e32 v59, v252, v83 ; 3E76A7FC
v_mac_f32_e32 v57, v52, v78 ; 3E729D34
v_mac_f32_e32 v58, v52, v79 ; 3E749F34
v_mac_f32_e32 v59, v52, v80 ; 3E76A134
v_mac_f32_e32 v57, v21, v75 ; 3E729715
v_mac_f32_e32 v58, v21, v76 ; 3E749915
v_mac_f32_e32 v59, v21, v77 ; 3E769B15
v_mac_f32_e32 v57, v27, v72 ; 3E72911B
v_mac_f32_e32 v58, v27, v73 ; 3E74931B
v_mac_f32_e32 v59, v27, v74 ; 3E76951B
v_mul_f32_e32 v60, v253, v99 ; 1078C7FD
v_mul_f32_e32 v61, v253, v100 ; 107AC9FD
v_mul_f32_e32 v62, v253, v101 ; 107CCBFD
v_mac_f32_e32 v60, v252, v96 ; 3E78C1FC
v_mac_f32_e32 v61, v252, v97 ; 3E7AC3FC
v_mac_f32_e32 v62, v252, v98 ; 3E7CC5FC
v_mac_f32_e32 v60, v52, v93 ; 3E78BB34
v_mac_f32_e32 v61, v52, v94 ; 3E7ABD34
v_mac_f32_e32 v62, v52, v95 ; 3E7CBF34
v_mac_f32_e32 v60, v21, v90 ; 3E78B515
v_mac_f32_e32 v61, v21, v91 ; 3E7AB715
v_mac_f32_e32 v62, v21, v92 ; 3E7CB915
v_mac_f32_e32 v60, v27, v87 ; 3E78AF1B
v_mac_f32_e32 v61, v27, v88 ; 3E7AB11B
v_mac_f32_e32 v62, v27, v89 ; 3E7CB31B
v_mul_f32_e32 v63, v1, v114 ; 107EE501
v_mul_f32_e32 v64, v1, v115 ; 1080E701
v_mul_f32_e32 v65, v1, v116 ; 1082E901
v_readlane_b32 s0, v254, 16 ; 020121FE
v_readlane_b32 s1, v254, 17 ; 020323FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v66, 0, 1.0, s[0:1] ; D2000042 0001E480
v_mac_f32_e32 v63, v66, v111 ; 3E7EDF42
v_mac_f32_e32 v64, v66, v112 ; 3E80E142
v_mac_f32_e32 v65, v66, v113 ; 3E82E342
v_readlane_b32 s0, v254, 14 ; 02011DFE
v_readlane_b32 s1, v254, 15 ; 02031FFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v67, 0, 1.0, s[0:1] ; D2000043 0001E480
v_mac_f32_e32 v63, v67, v108 ; 3E7ED943
v_mac_f32_e32 v64, v67, v109 ; 3E80DB43
v_mac_f32_e32 v65, v67, v110 ; 3E82DD43
v_cmp_eq_f32_e64 s[4:5], v22, v30 ; D0040004 00023D16
v_cndmask_b32_e64 v22, 0, 1.0, s[4:5] ; D2000016 0011E480
v_mac_f32_e32 v63, v22, v105 ; 3E7ED316
v_mac_f32_e32 v64, v22, v106 ; 3E80D516
v_mac_f32_e32 v65, v22, v107 ; 3E82D716
v_readlane_b32 s0, v254, 12 ; 020119FE
v_readlane_b32 s1, v254, 13 ; 02031BFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v68, 0, 1.0, s[0:1] ; D2000044 0001E480
v_mac_f32_e32 v63, v68, v102 ; 3E7ECD44
v_mac_f32_e32 v64, v68, v103 ; 3E80CF44
v_mac_f32_e32 v65, v68, v104 ; 3E82D144
v_mul_f32_e32 v72, v1, v129 ; 10910301
v_mul_f32_e32 v73, v1, v130 ; 10930501
v_mul_f32_e32 v74, v1, v131 ; 10950701
v_mac_f32_e32 v72, v66, v126 ; 3E90FD42
v_mac_f32_e32 v73, v66, v127 ; 3E92FF42
v_mac_f32_e32 v74, v66, v128 ; 3E950142
v_mac_f32_e32 v72, v67, v123 ; 3E90F743
v_mac_f32_e32 v73, v67, v124 ; 3E92F943
v_mac_f32_e32 v74, v67, v125 ; 3E94FB43
v_mac_f32_e32 v72, v22, v120 ; 3E90F116
v_mac_f32_e32 v73, v22, v121 ; 3E92F316
v_mac_f32_e32 v74, v22, v122 ; 3E94F516
v_mac_f32_e32 v72, v68, v117 ; 3E90EB44
v_mac_f32_e32 v73, v68, v118 ; 3E92ED44
v_mac_f32_e32 v74, v68, v119 ; 3E94EF44
v_mul_f32_e32 v75, v1, v144 ; 10972101
v_mul_f32_e32 v76, v1, v145 ; 10992301
v_mul_f32_e32 v77, v1, v146 ; 109B2501
v_mac_f32_e32 v75, v66, v141 ; 3E971B42
v_mac_f32_e32 v76, v66, v142 ; 3E991D42
v_mac_f32_e32 v77, v66, v143 ; 3E9B1F42
v_mac_f32_e32 v75, v67, v138 ; 3E971543
v_mac_f32_e32 v76, v67, v139 ; 3E991743
v_mac_f32_e32 v77, v67, v140 ; 3E9B1943
v_mac_f32_e32 v75, v22, v135 ; 3E970F16
v_mac_f32_e32 v76, v22, v136 ; 3E991116
v_mac_f32_e32 v77, v22, v137 ; 3E9B1316
v_mac_f32_e32 v75, v68, v132 ; 3E970944
v_mac_f32_e32 v76, v68, v133 ; 3E990B44
v_mac_f32_e32 v77, v68, v134 ; 3E9B0D44
v_cmp_eq_f32_e64 s[2:3], 0, v24 ; D0040002 00023080
v_cndmask_b32_e64 v78, 0, 1.0, s[2:3] ; D200004E 0009E480
v_mul_f32_e32 v79, v78, v159 ; 109F3F4E
v_mul_f32_e32 v80, v78, v160 ; 10A1414E
v_mul_f32_e32 v81, v78, v161 ; 10A3434E
v_cmp_eq_f32_e64 s[2:3], 1.0, v24 ; D0040002 000230F2
v_cndmask_b32_e64 v82, 0, 1.0, s[2:3] ; D2000052 0009E480
v_mac_f32_e32 v79, v82, v156 ; 3E9F3952
v_mac_f32_e32 v80, v82, v157 ; 3EA13B52
v_mac_f32_e32 v81, v82, v158 ; 3EA33D52
v_readlane_b32 s0, v254, 10 ; 020115FE
v_readlane_b32 s1, v254, 11 ; 020317FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v83, 0, 1.0, s[0:1] ; D2000053 0001E480
v_mac_f32_e32 v79, v83, v153 ; 3E9F3353
v_mac_f32_e32 v80, v83, v154 ; 3EA13553
v_mac_f32_e32 v81, v83, v155 ; 3EA33753
v_cmp_eq_f32_e64 s[0:1], v24, v30 ; D0040000 00023D18
v_cndmask_b32_e64 v24, 0, 1.0, s[0:1] ; D2000018 0001E480
v_mac_f32_e32 v79, v24, v150 ; 3E9F2D18
v_mac_f32_e32 v80, v24, v151 ; 3EA12F18
v_mac_f32_e32 v81, v24, v152 ; 3EA33118
v_readlane_b32 s0, v254, 8 ; 020111FE
v_readlane_b32 s1, v254, 9 ; 020313FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v84, 0, 1.0, s[0:1] ; D2000054 0001E480
v_mac_f32_e32 v79, v84, v147 ; 3E9F2754
v_mac_f32_e32 v80, v84, v148 ; 3EA12954
v_mac_f32_e32 v81, v84, v149 ; 3EA32B54
v_mul_f32_e32 v85, v78, v174 ; 10AB5D4E
v_mul_f32_e32 v86, v78, v175 ; 10AD5F4E
v_mul_f32_e32 v87, v78, v176 ; 10AF614E
v_mac_f32_e32 v85, v82, v171 ; 3EAB5752
v_mac_f32_e32 v86, v82, v172 ; 3EAD5952
v_mac_f32_e32 v87, v82, v173 ; 3EAF5B52
v_mac_f32_e32 v85, v83, v168 ; 3EAB5153
v_mac_f32_e32 v86, v83, v169 ; 3EAD5353
v_mac_f32_e32 v87, v83, v170 ; 3EAF5553
v_mac_f32_e32 v85, v24, v165 ; 3EAB4B18
v_mac_f32_e32 v86, v24, v166 ; 3EAD4D18
v_mac_f32_e32 v87, v24, v167 ; 3EAF4F18
v_mac_f32_e32 v85, v84, v162 ; 3EAB4554
v_mac_f32_e32 v86, v84, v163 ; 3EAD4754
v_mac_f32_e32 v87, v84, v164 ; 3EAF4954
v_mul_f32_e32 v88, v78, v189 ; 10B17B4E
v_mul_f32_e32 v89, v78, v190 ; 10B37D4E
v_mul_f32_e32 v90, v78, v191 ; 10B57F4E
v_mac_f32_e32 v88, v82, v186 ; 3EB17552
v_mac_f32_e32 v89, v82, v187 ; 3EB37752
v_mac_f32_e32 v90, v82, v188 ; 3EB57952
v_mac_f32_e32 v88, v83, v183 ; 3EB16F53
v_mac_f32_e32 v89, v83, v184 ; 3EB37153
v_mac_f32_e32 v90, v83, v185 ; 3EB57353
v_mac_f32_e32 v88, v24, v180 ; 3EB16918
v_mac_f32_e32 v89, v24, v181 ; 3EB36B18
v_mac_f32_e32 v90, v24, v182 ; 3EB56D18
v_mac_f32_e32 v88, v84, v177 ; 3EB16354
v_mac_f32_e32 v89, v84, v178 ; 3EB36554
v_mac_f32_e32 v90, v84, v179 ; 3EB56754
v_mul_f32_e32 v42, v253, v42 ; 105455FD
v_mul_f32_e32 v43, v253, v43 ; 105657FD
v_mac_f32_e32 v42, v252, v196 ; 3E5589FC
v_mac_f32_e32 v43, v252, v197 ; 3E578BFC
v_mul_f32_e32 v31, v253, v31 ; 103E3FFD
v_mul_f32_e32 v32, v253, v32 ; 104041FD
v_mac_f32_e32 v31, v252, v202 ; 3E3F95FC
v_mac_f32_e32 v32, v252, v203 ; 3E4197FC
v_mul_f32_e32 v33, v253, v33 ; 104243FD
v_mul_f32_e32 v34, v253, v34 ; 104445FD
v_mac_f32_e32 v33, v252, v210 ; 3E43A5FC
v_mac_f32_e32 v34, v252, v211 ; 3E45A7FC
v_mac_f32_e32 v42, v52, v194 ; 3E558534
v_mac_f32_e32 v43, v52, v195 ; 3E578734
v_mac_f32_e32 v31, v52, v200 ; 3E3F9134
v_mac_f32_e32 v32, v52, v201 ; 3E419334
v_mac_f32_e32 v33, v52, v208 ; 3E43A134
v_mac_f32_e32 v34, v52, v209 ; 3E45A334
v_mac_f32_e32 v42, v21, v192 ; 3E558115
v_mac_f32_e32 v43, v21, v193 ; 3E578315
v_mac_f32_e32 v31, v21, v198 ; 3E3F8D15
v_mac_f32_e32 v32, v21, v199 ; 3E418F15
v_mac_f32_e32 v33, v21, v206 ; 3E439D15
v_mac_f32_e32 v34, v21, v207 ; 3E459F15
v_mac_f32_e32 v42, v27, v37 ; 3E544B1B
v_mac_f32_e32 v43, v27, v38 ; 3E564D1B
v_mac_f32_e32 v31, v27, v44 ; 3E3E591B
v_mac_f32_e32 v32, v27, v45 ; 3E405B1B
v_mac_f32_e32 v33, v27, v204 ; 3E43991B
v_mac_f32_e32 v34, v27, v205 ; 3E459B1B
v_mul_f32_e32 v21, v1, v53 ; 102A6B01
v_mul_f32_e32 v27, v1, v54 ; 10366D01
v_mac_f32_e32 v21, v66, v216 ; 3E2BB142
v_mac_f32_e32 v27, v66, v217 ; 3E37B342
v_mul_f32_e32 v28, v1, v28 ; 10383901
v_mul_f32_e32 v29, v1, v29 ; 103A3B01
v_mac_f32_e32 v28, v66, v222 ; 3E39BD42
v_mac_f32_e32 v29, v66, v223 ; 3E3BBF42
s_waitcnt vmcnt(13) ; BF8C077D
v_mul_f32_e32 v37, v1, v50 ; 104A6501
v_mul_f32_e32 v1, v1, v51 ; 10026701
v_mac_f32_e32 v37, v66, v230 ; 3E4BCD42
v_mac_f32_e32 v1, v66, v231 ; 3E03CF42
v_mac_f32_e32 v21, v67, v214 ; 3E2BAD43
v_mac_f32_e32 v27, v67, v215 ; 3E37AF43
v_mac_f32_e32 v28, v67, v220 ; 3E39B943
v_mac_f32_e32 v29, v67, v221 ; 3E3BBB43
v_mac_f32_e32 v37, v67, v228 ; 3E4BC943
v_mac_f32_e32 v1, v67, v229 ; 3E03CB43
v_mac_f32_e32 v21, v22, v212 ; 3E2BA916
v_mac_f32_e32 v27, v22, v213 ; 3E37AB16
v_mac_f32_e32 v28, v22, v218 ; 3E39B516
v_mac_f32_e32 v29, v22, v219 ; 3E3BB716
v_mac_f32_e32 v37, v22, v226 ; 3E4BC516
v_mac_f32_e32 v1, v22, v227 ; 3E03C716
v_mac_f32_e32 v21, v68, v35 ; 3E2A4744
v_mac_f32_e32 v27, v68, v36 ; 3E364944
v_mac_f32_e32 v28, v68, v55 ; 3E386F44
v_mac_f32_e32 v29, v68, v56 ; 3E3A7144
v_mac_f32_e32 v37, v68, v224 ; 3E4BC144
v_mac_f32_e32 v1, v68, v225 ; 3E03C344
image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[24:31], s[12:15] ; F0900A00 00662327
image_sample_l v[38:39], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[16:23], s[8:11] ; F0900A00 00442627
s_waitcnt vmcnt(10) ; BF8C077A
v_mul_f32_e32 v22, v78, v46 ; 102C5D4E
v_mul_f32_e32 v40, v78, v47 ; 10505F4E
v_mac_f32_e32 v22, v82, v238 ; 3E2DDD52
v_mac_f32_e32 v40, v82, v239 ; 3E51DF52
s_waitcnt vmcnt(5) ; BF8C0775
v_mul_f32_e32 v25, v78, v25 ; 1032334E
v_mul_f32_e32 v26, v78, v26 ; 1034354E
v_mac_f32_e32 v25, v82, v244 ; 3E33E952
v_mac_f32_e32 v26, v82, v245 ; 3E35EB52
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v38, v78, v38 ; 104C4D4E
v_mul_f32_e32 v39, v78, v39 ; 104E4F4E
v_mac_f32_e32 v38, v82, v35 ; 3E4C4752
v_mac_f32_e32 v39, v82, v36 ; 3E4E4952
v_mac_f32_e32 v22, v83, v236 ; 3E2DD953
v_mac_f32_e32 v40, v83, v237 ; 3E51DB53
v_mac_f32_e32 v25, v83, v242 ; 3E33E553
v_mac_f32_e32 v26, v83, v243 ; 3E35E753
v_mac_f32_e32 v38, v83, v250 ; 3E4DF553
v_mac_f32_e32 v39, v83, v251 ; 3E4FF753
v_mac_f32_e32 v22, v24, v234 ; 3E2DD518
v_mac_f32_e32 v40, v24, v235 ; 3E51D718
v_mac_f32_e32 v25, v24, v240 ; 3E33E118
v_mac_f32_e32 v26, v24, v241 ; 3E35E318
v_mac_f32_e32 v38, v24, v248 ; 3E4DF118
v_mac_f32_e32 v39, v24, v249 ; 3E4FF318
s_mov_b32 s0, SCRATCH_RSRC_DWORD0 ; BE8003FF 00000000
s_mov_b32 s1, SCRATCH_RSRC_DWORD1 ; BE8103FF 00000000
s_mov_b32 s2, -1 ; BE8203C1
s_mov_b32 s3, 0x80f000 ; BE8303FF 0080F000
buffer_load_dword v24, s[0:3], s12 ; E0300000 0C001800
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v24, v14, v24, v23 ; D2820018 045E310E
v_mad_f32 v12, v14, v12, v23 ; D282000C 045E190E
v_mac_f32_e32 v23, v14, v13 ; 3E2E1B0E
v_mac_f32_e32 v22, v84, v232 ; 3E2DD154
v_mac_f32_e32 v40, v84, v233 ; 3E51D354
v_mov_b32_e32 v13, 0x40e00000 ; 7E1A02FF 40E00000
v_mul_f32_e32 v14, v13, v24 ; 101C310D
v_mul_f32_e32 v12, v13, v12 ; 1018190D
v_mul_f32_e32 v13, v13, v23 ; 101A2F0D
v_mov_b32_e32 v23, 0x3c23d70a ; 7E2E02FF 3C23D70A
v_max_f32_e32 v14, v23, v14 ; 201C1D17
v_max_f32_e32 v12, v23, v12 ; 20181917
v_max_f32_e32 v13, v23, v13 ; 201A1B17
v_add_f32_e32 v23, v12, v14 ; 062E1D0C
v_add_f32_e32 v23, v13, v23 ; 062E2F0D
v_rcp_f32_e32 v23, v23 ; 7E2E5517
v_mac_f32_e32 v25, v84, v48 ; 3E326154
v_mac_f32_e32 v26, v84, v49 ; 3E346354
v_mac_f32_e32 v38, v84, v246 ; 3E4DED54
v_mac_f32_e32 v39, v84, v247 ; 3E4FEF54
v_mul_f32_e32 v14, v23, v14 ; 101C1D17
v_mul_f32_e32 v12, v23, v12 ; 10181917
v_mul_f32_e32 v13, v23, v13 ; 101A1B17
v_mul_f32_e32 v23, v13, v63 ; 102E7F0D
v_mac_f32_e32 v23, v14, v72 ; 3E2E910E
v_mul_f32_e32 v24, v13, v64 ; 1030810D
v_mac_f32_e32 v24, v14, v73 ; 3E30930E
v_mul_f32_e32 v35, v13, v65 ; 1046830D
v_mac_f32_e32 v35, v14, v74 ; 3E46950E
v_mac_f32_e32 v23, v12, v75 ; 3E2E970C
v_mac_f32_e32 v24, v12, v76 ; 3E30990C
v_mac_f32_e32 v35, v12, v77 ; 3E469B0C
v_mul_f32_e32 v36, v13, v69 ; 10488B0D
v_mac_f32_e32 v36, v14, v57 ; 3E48730E
v_mul_f32_e32 v41, v13, v70 ; 10528D0D
v_mac_f32_e32 v41, v14, v58 ; 3E52750E
v_mul_f32_e32 v44, v13, v71 ; 10588F0D
v_mac_f32_e32 v44, v14, v59 ; 3E58770E
v_mac_f32_e32 v36, v12, v60 ; 3E48790C
v_mac_f32_e32 v41, v12, v61 ; 3E527B0C
v_mac_f32_e32 v44, v12, v62 ; 3E587D0C
v_mul_f32_e32 v36, v36, v10 ; 10481524
v_mac_f32_e32 v36, v23, v11 ; 3E481717
v_mul_f32_e32 v23, v41, v10 ; 102E1529
v_mac_f32_e32 v23, v24, v11 ; 3E2E1718
v_mul_f32_e32 v24, v44, v10 ; 1030152C
v_mac_f32_e32 v24, v35, v11 ; 3E301723
v_mad_f32 v35, 2.0, v42, -1.0 ; D2820023 03CE54F4
v_mad_f32 v32, 2.0, v32, -1.0 ; D2820020 03CE40F4
v_mul_f32_e32 v35, v14, v35 ; 1046470E
v_mac_f32_e32 v35, v12, v32 ; 3E46410C
v_mad_f32 v31, 2.0, v31, -1.0 ; D282001F 03CE3EF4
v_mad_f32 v32, 2.0, v34, -1.0 ; D2820020 03CE44F4
v_mul_f32_e32 v34, 0, v14 ; 10441C80
v_mad_f32 v31, v12, v31, v34 ; D282001F 048A3F0C
v_mac_f32_e32 v31, v13, v32 ; 3E3E410D
v_mad_f32 v32, 2.0, v43, -1.0 ; D2820020 03CE56F4
v_mad_f32 v33, 2.0, v33, -1.0 ; D2820021 03CE42F4
v_mul_f32_e32 v32, v14, v32 ; 1040410E
v_mac_f32_e32 v32, 0, v12 ; 3E401880
v_mac_f32_e32 v32, v13, v33 ; 3E40430D
v_mad_f32 v21, 2.0, v21, -1.0 ; D2820015 03CE2AF4
v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4
v_mul_f32_e32 v21, v14, v21 ; 102A2B0E
v_mac_f32_e32 v21, v12, v29 ; 3E2A3B0C
v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v28, v12, v28, v34 ; D282001C 048A390C
v_mac_f32_e32 v28, v13, v1 ; 3E38030D
v_mad_f32 v1, 2.0, v27, -1.0 ; D2820001 03CE36F4
v_mad_f32 v27, 2.0, v37, -1.0 ; D282001B 03CE4AF4
v_mul_f32_e32 v1, v14, v1 ; 1002030E
v_mac_f32_e32 v1, 0, v12 ; 3E021880
v_mac_f32_e32 v1, v13, v27 ; 3E02370D
v_mac_f32_e32 v35, 0, v13 ; 3E461A80
v_mul_f32_e32 v27, v31, v10 ; 1036151F
v_mul_f32_e32 v29, v32, v10 ; 103A1520
v_mul_f32_e32 v10, v35, v10 ; 10141523
v_mac_f32_e32 v27, v28, v11 ; 3E36171C
v_mac_f32_e32 v29, v1, v11 ; 3E3A1701
v_mac_f32_e32 v21, 0, v13 ; 3E2A1A80
v_mac_f32_e32 v10, v21, v11 ; 3E141715
v_mul_f32_e32 v1, v13, v79 ; 10029F0D
v_mac_f32_e32 v1, v14, v85 ; 3E02AB0E
v_mul_f32_e32 v11, v13, v80 ; 1016A10D
v_mac_f32_e32 v11, v14, v86 ; 3E16AD0E
v_mul_f32_e32 v21, v13, v81 ; 102AA30D
v_mac_f32_e32 v21, v14, v87 ; 3E2AAF0E
v_mac_f32_e32 v1, v12, v88 ; 3E02B10C
v_mac_f32_e32 v11, v12, v89 ; 3E16B30C
v_mac_f32_e32 v21, v12, v90 ; 3E2AB50C
v_mad_f32 v28, 2.0, v40, -1.0 ; D282001C 03CE50F4
v_mad_f32 v22, 2.0, v22, -1.0 ; D2820016 03CE2CF4
v_mul_f32_e32 v28, v14, v28 ; 1038390E
v_mul_f32_e32 v14, v14, v22 ; 101C2D0E
v_mad_f32 v22, 2.0, v25, -1.0 ; D2820016 03CE32F4
v_mac_f32_e32 v34, v12, v22 ; 3E442D0C
v_mad_f32 v22, 2.0, v26, -1.0 ; D2820016 03CE34F4
v_mac_f32_e32 v14, v12, v22 ; 3E1C2D0C
v_mac_f32_e32 v28, 0, v12 ; 3E381880
v_mad_f32 v12, 2.0, v39, -1.0 ; D282000C 03CE4EF4
v_mac_f32_e32 v34, v13, v12 ; 3E44190D
v_mad_f32 v12, 2.0, v38, -1.0 ; D282000C 03CE4CF4
v_mac_f32_e32 v28, v13, v12 ; 3E38190D
v_mac_f32_e32 v14, 0, v13 ; 3E1C1A80
v_mac_f32_e32 v27, v34, v6 ; 3E360D22
v_mac_f32_e32 v29, v28, v6 ; 3E3A0D1C
v_mac_f32_e32 v10, v14, v6 ; 3E140D0E
v_mul_f32_e32 v12, v29, v29 ; 10183B1D
v_mac_f32_e32 v12, v27, v27 ; 3E18371B
v_mac_f32_e32 v12, v10, v10 ; 3E18150A
v_add_f32_e32 v12, 1.0, v12 ; 061818F2
v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C
v_mac_f32_e32 v36, v1, v6 ; 3E480D01
v_mac_f32_e32 v23, v11, v6 ; 3E2E0D0B
v_mac_f32_e32 v24, v21, v6 ; 3E300D15
v_mul_f32_e32 v1, v12, v27 ; 1002370C
v_mul_f32_e32 v6, v12, v29 ; 100C3B0C
v_mul_f32_e32 v10, v12, v10 ; 1014150C
v_mad_f32 v1, -v1, v5, v7 ; D2820001 241E0B01
v_mad_f32 v6, -v6, v5, v8 ; D2820006 24220B06
v_mad_f32 v5, -v10, v5, v9 ; D2820005 24260B0A
v_mul_f32_e32 v7, v1, v1 ; 100E0301
v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06
v_mac_f32_e32 v7, v5, v5 ; 3E0E0B05
v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907
v_mul_f32_e32 v8, v20, v20 ; 10102914
v_mac_f32_e32 v8, v18, v18 ; 3E102512
v_mac_f32_e32 v8, v19, v19 ; 3E102713
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v1, v7, v1 ; 10020307
v_mul_f32_e32 v6, v7, v6 ; 100C0D07
v_mul_f32_e32 v5, v7, v5 ; 100A0B07
v_mul_f32_e32 v7, v8, v20 ; 100E2908
v_mul_f32_e32 v9, v8, v18 ; 10122508
v_mul_f32_e32 v8, v8, v19 ; 10102708
v_mul_f32_e32 v7, v7, v1 ; 100E0307
v_mac_f32_e32 v7, v9, v6 ; 3E0E0D09
v_mac_f32_e32 v7, v8, v5 ; 3E0E0B08
v_mul_f32_e32 v1, v17, v1 ; 10020311
v_max_f32_e32 v7, 0x38d1b717, v7 ; 200E0EFF 38D1B717
v_log_f32_e32 v7, v7 ; 7E0E4F07
v_mac_f32_e32 v1, v16, v6 ; 3E020D10
v_mac_f32_e32 v1, v15, v5 ; 3E020B0F
v_mul_f32_e32 v5, 0x42000000, v2 ; 100A04FF 42000000
v_mul_legacy_f32_e32 v5, v5, v7 ; 0E0A0F05
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80
v_mac_f32_e32 v30, -2.0, v5 ; 3E3C0AF5
v_mul_f32_e32 v6, v30, v5 ; 100C0B1E
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_readlane_b32 s2, v254, 38 ; 02054DFE
s_nop 2 ; BF800002
v_mul_f32_e32 v5, s2, v36 ; 100A4802
v_readlane_b32 s0, v254, 7 ; 02010FFE
s_nop 2 ; BF800002
v_add_f32_e64 v6, s0, s0 ; D2060006 00000000
v_readlane_b32 s0, v254, 4 ; 020109FE
s_nop 2 ; BF800002
v_max_f32_e32 v6, s0, v6 ; 200C0C00
v_readlane_b32 s3, v254, 39 ; 02074FFE
s_nop 2 ; BF800002
v_mul_f32_e32 v7, s3, v23 ; 100E2E03
v_readlane_b32 s0, v254, 6 ; 02010DFE
s_nop 2 ; BF800002
v_add_f32_e64 v8, s0, s0 ; D2060008 00000000
v_readlane_b32 s0, v254, 3 ; 020107FE
s_nop 2 ; BF800002
v_max_f32_e32 v8, s0, v8 ; 20101000
v_readlane_b32 s0, v254, 1 ; 020103FE
s_nop 2 ; BF800002
v_mul_f32_e32 v9, s0, v24 ; 10123000
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_readlane_b32 s1, v254, 5 ; 02030BFE
s_nop 2 ; BF800002
v_add_f32_e64 v10, s1, s1 ; D206000A 00000201
v_readlane_b32 s1, v254, 2 ; 020305FE
s_nop 2 ; BF800002
v_max_f32_e32 v10, s1, v10 ; 20141401
v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2
v_min_f32_e32 v8, 1.0, v8 ; 1E1010F2
v_min_f32_e32 v10, 1.0, v10 ; 1E1414F2
v_mul_f32_e32 v6, v36, v6 ; 100C0D24
v_mul_f32_e32 v8, v23, v8 ; 10101117
v_mul_f32_e32 v10, v24, v10 ; 10141518
v_mac_f32_e32 v6, v1, v5 ; 3E0C0B01
v_mac_f32_e32 v8, v1, v7 ; 3E100F01
v_mac_f32_e32 v10, v1, v9 ; 3E141301
v_mac_f32_e32 v6, s2, v2 ; 3E0C0402
v_mac_f32_e32 v8, s3, v2 ; 3E100403
v_mac_f32_e32 v10, s0, v2 ; 3E140400
v_mul_f32_e32 v1, 0.5, v6 ; 10020CF0
v_mul_f32_e32 v2, 0.5, v8 ; 100410F0
v_mul_f32_e32 v5, 0.5, v10 ; 100A14F0
v_mac_f32_e32 v1, v3, v36 ; 3E024903
v_mac_f32_e32 v2, v4, v23 ; 3E042F04
v_mac_f32_e32 v5, v0, v24 ; 3E0A3100
v_cvt_pkrtz_f16_f32_e32 v0, v1, v2 ; 5E000501
v_cvt_pkrtz_f16_f32_e64 v1, v5, 1.0 ; D25E0001 0001E505
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 256
Code Size: 4864 bytes
LDS: 0 blocks
Scratch: 16384 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..7]
DCL TEMP[0..3], LOCAL
IMM[0] INT32 {0, 1, 2, 3}
IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000}
0: F2I TEMP[0].x, IN[0].zzzz
1: USEQ TEMP[1], TEMP[0].xxxx, IMM[0]
2: UCMP TEMP[2].x, TEMP[1].xxxx, CONST[0].xxxx, TEMP[2].xxxx
3: UCMP TEMP[2].x, TEMP[1].yyyy, CONST[0].yyyy, TEMP[2].xxxx
4: UCMP TEMP[2].x, TEMP[1].zzzz, CONST[0].zzzz, TEMP[2].xxxx
5: UCMP TEMP[2].x, TEMP[1].wwww, CONST[0].wwww, TEMP[2].xxxx
6: MOV TEMP[1].x, TEMP[2].xxxx
7: USEQ TEMP[2], TEMP[0].xxxx, IMM[0]
8: UCMP TEMP[3].x, TEMP[2].xxxx, CONST[1].xxxx, TEMP[3].xxxx
9: UCMP TEMP[3].x, TEMP[2].yyyy, CONST[1].yyyy, TEMP[3].xxxx
10: UCMP TEMP[3].x, TEMP[2].zzzz, CONST[1].zzzz, TEMP[3].xxxx
11: UCMP TEMP[3].x, TEMP[2].wwww, CONST[1].wwww, TEMP[3].xxxx
12: MOV TEMP[1].y, TEMP[3].xxxx
13: USEQ TEMP[0], TEMP[0].xxxx, IMM[0]
14: UCMP TEMP[2].x, TEMP[0].xxxx, CONST[2].xxxx, TEMP[2].xxxx
15: UCMP TEMP[2].x, TEMP[0].yyyy, CONST[2].yyyy, TEMP[2].xxxx
16: UCMP TEMP[2].x, TEMP[0].zzzz, CONST[2].zzzz, TEMP[2].xxxx
17: UCMP TEMP[2].x, TEMP[0].wwww, CONST[2].wwww, TEMP[2].xxxx
18: MOV TEMP[1].z, TEMP[2].xxxx
19: MOV TEMP[0].xyz, TEMP[1].xyzx
20: MOV TEMP[0].w, IN[0].zzzz
21: MUL TEMP[1], CONST[4], IN[0].xxxx
22: MAD TEMP[1], CONST[5], IN[0].yyyy, TEMP[1]
23: MAD TEMP[1], CONST[6], IMM[1].xxxx, TEMP[1]
24: MAD TEMP[1], CONST[7], IN[0].wwww, TEMP[1]
25: MOV OUT[2], IN[1].xyxy
26: MOV OUT[1], TEMP[0]
27: MOV OUT[0], TEMP[1]
28: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = extractelement <4 x float> %44, i32 2
%48 = extractelement <4 x float> %44, i32 3
%49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0
%51 = add i32 %5, %7
%52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %51)
%53 = extractelement <4 x float> %52, i32 0
%54 = extractelement <4 x float> %52, i32 1
%55 = fptosi float %47 to i32
%56 = icmp eq i32 %55, 0
%57 = icmp eq i32 %55, 1
%58 = icmp eq i32 %55, 2
%59 = icmp eq i32 %55, 3
%60 = select i1 %56, float %13, float 0.000000e+00
%61 = select i1 %57, float %14, float %60
%62 = select i1 %58, float %15, float %61
%63 = select i1 %59, float %16, float %62
%64 = icmp eq i32 %55, 0
%65 = sext i1 %64 to i32
%66 = icmp eq i32 %55, 1
%67 = icmp eq i32 %55, 2
%68 = icmp eq i32 %55, 3
%69 = bitcast i32 %65 to float
%70 = select i1 %64, float %17, float 0.000000e+00
%71 = select i1 %66, float %18, float %70
%72 = select i1 %67, float %19, float %71
%73 = select i1 %68, float %20, float %72
%74 = icmp eq i32 %55, 0
%75 = icmp eq i32 %55, 1
%76 = icmp eq i32 %55, 2
%77 = icmp eq i32 %55, 3
%78 = select i1 %74, float %21, float %69
%79 = select i1 %75, float %22, float %78
%80 = select i1 %76, float %23, float %79
%81 = select i1 %77, float %24, float %80
%82 = fmul float %25, %45
%83 = fmul float %26, %45
%84 = fmul float %27, %45
%85 = fmul float %28, %45
%86 = fmul float %29, %46
%87 = fadd float %86, %82
%88 = fmul float %30, %46
%89 = fadd float %88, %83
%90 = fmul float %31, %46
%91 = fadd float %90, %84
%92 = fmul float %32, %46
%93 = fadd float %92, %85
%94 = fmul float %33, 0x3FB99999A0000000
%95 = fadd float %94, %87
%96 = fmul float %34, 0x3FB99999A0000000
%97 = fadd float %96, %89
%98 = fmul float %35, 0x3FB99999A0000000
%99 = fadd float %98, %91
%100 = fmul float %36, 0x3FB99999A0000000
%101 = fadd float %100, %93
%102 = fmul float %37, %48
%103 = fadd float %102, %95
%104 = fmul float %38, %48
%105 = fadd float %104, %97
%106 = fmul float %39, %48
%107 = fadd float %106, %99
%108 = fmul float %40, %48
%109 = fadd float %108, %101
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %63, float %73, float %81, float %47)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %53, float %54, float %53, float %54)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %103, float %105, float %107, float %109)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103
s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105
s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106
s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s8 ; 7E000208
s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108
s_waitcnt vmcnt(0) ; BF8C0770
v_mov_b32_e32 v7, s9 ; 7E0E0209
s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109
v_mov_b32_e32 v8, s10 ; 7E10020A
s_buffer_load_dword s9, s[0:3], 0xa ; C204810A
v_mov_b32_e32 v9, s11 ; 7E12020B
s_buffer_load_dword s10, s[0:3], 0xb ; C205010B
v_mov_b32_e32 v10, s16 ; 7E140210
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s12, s[0:3], 0x11 ; C2060111
s_buffer_load_dword s13, s[0:3], 0x12 ; C2068112
v_mov_b32_e32 v11, s4 ; 7E160204
s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113
s_buffer_load_dword s14, s[0:3], 0x14 ; C2070114
s_buffer_load_dword s15, s[0:3], 0x15 ; C2078115
s_buffer_load_dword s16, s[0:3], 0x16 ; C2080116
s_buffer_load_dword s17, s[0:3], 0x17 ; C2088117
s_buffer_load_dword s18, s[0:3], 0x18 ; C2090118
s_buffer_load_dword s19, s[0:3], 0x19 ; C2098119
s_buffer_load_dword s20, s[0:3], 0x1a ; C20A011A
s_buffer_load_dword s21, s[0:3], 0x1b ; C20A811B
s_buffer_load_dword s22, s[0:3], 0x1c ; C20B011C
v_cvt_i32_f32_e32 v12, v3 ; 7E181103
s_buffer_load_dword s23, s[0:3], 0x1d ; C20B811D
s_buffer_load_dword s24, s[0:3], 0x1e ; C20C011E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s7 ; 7E1A0207
v_cmp_eq_i32_e32 vcc, 0, v12 ; 7D041880
v_cndmask_b32_e64 v14, 0, -1, vcc ; D200000E 01A98280
v_cndmask_b32_e32 v13, v14, v13 ; 001A1B0E
v_cndmask_b32_e32 v0, 0, v0 ; 00000080
v_cndmask_b32_e32 v10, 0, v10 ; 00141480
v_cmp_eq_i32_e32 vcc, 1, v12 ; 7D041881
v_cndmask_b32_e32 v0, v0, v7 ; 00000F00
v_cndmask_b32_e32 v7, v10, v11 ; 000E170A
v_mov_b32_e32 v10, s8 ; 7E140208
v_cndmask_b32_e32 v10, v13, v10 ; 0014150D
v_cmp_eq_i32_e32 vcc, 2, v12 ; 7D041882
v_cndmask_b32_e32 v0, v0, v8 ; 00001100
v_mov_b32_e32 v8, s5 ; 7E100205
v_cndmask_b32_e32 v7, v7, v8 ; 000E1107
v_mov_b32_e32 v8, s9 ; 7E100209
v_cndmask_b32_e32 v8, v10, v8 ; 0010110A
v_cmp_eq_i32_e32 vcc, 3, v12 ; 7D041883
v_cndmask_b32_e32 v0, v0, v9 ; 00001300
v_mov_b32_e32 v9, s6 ; 7E120206
v_cndmask_b32_e32 v7, v7, v9 ; 000E1307
v_mov_b32_e32 v9, s10 ; 7E12020A
v_cndmask_b32_e32 v8, v8, v9 ; 00101308
exp 15, 32, 0, 0, 0, v0, v7, v8, v3 ; F800020F 03080700
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s11, v1 ; 1000020B
v_mac_f32_e32 v0, s14, v2 ; 3E00040E
v_mul_f32_e32 v3, s12, v1 ; 1006020C
v_mac_f32_e32 v3, s15, v2 ; 3E06040F
v_mul_f32_e32 v7, s13, v1 ; 100E020D
v_mac_f32_e32 v7, s16, v2 ; 3E0E0410
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, s17, v2 ; 3E020411
v_mov_b32_e32 v2, 0x3dcccccd ; 7E0402FF 3DCCCCCD
v_mac_f32_e32 v0, s18, v2 ; 3E000412
v_mac_f32_e32 v3, s19, v2 ; 3E060413
v_mac_f32_e32 v7, s20, v2 ; 3E0E0414
v_mac_f32_e32 v1, s21, v2 ; 3E020415
v_mac_f32_e32 v0, s22, v4 ; 3E000816
v_mac_f32_e32 v3, s23, v4 ; 3E060817
v_mac_f32_e32 v7, s24, v4 ; 3E0E0818
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 33, 0, 0, 0, v5, v6, v5, v6 ; F800021F 06050605
exp 15, 12, 0, 1, 0, v0, v3, v7, v1 ; F80008CF 01070300
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 16
Code Size: 388 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..1]
DCL CONST[4..8]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, 0.0000, 0.0000}
IMM[1] INT32 {1, 2, 3, 0}
IMM[2] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[1].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].xy, IN[1].zwww
3: TEX TEMP[1].x, TEMP[1], SAMP[1], 2D
4: MAD TEMP[2].x, CONST[0].xxxx, TEMP[1].xxxx, CONST[0].yyyy
5: RCP TEMP[2].x, TEMP[2].xxxx
6: MUL TEMP[2].xyz, TEMP[2].xxxx, IN[0]
7: MUL TEMP[3].xyz, CONST[4].wwww, TEMP[2].xyzz
8: ADD TEMP[4].y, CONST[8].xyzz, TEMP[2].xyzz
9: ADD TEMP[4].x, TEMP[4].yyyy, -CONST[4].xxxx
10: MUL TEMP[5].x, IMM[0].yyyy, CONST[4].zzzz
11: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx
12: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[4].xxxx
13: MIN TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz
14: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz
15: SQRT TEMP[3].x, TEMP[3].xxxx
16: ADD TEMP[4].x, TEMP[4].xxxx, CONST[4].yyyy
17: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx
18: ADD TEMP[2].x, TEMP[2].yyyy, IMM[0].wwww
19: ABS TEMP[2].x, TEMP[2].xxxx
20: RCP TEMP[2].x, TEMP[2].xxxx
21: MUL TEMP[2].x, TEMP[5].xxxx, TEMP[2].xxxx
22: MAD TEMP[2].x, CONST[4].zzzz, TEMP[4].xxxx, -TEMP[2].xxxx
23: MAD TEMP[2].x, -TEMP[3].xxxx, TEMP[2].xxxx, CONST[5].xxxx
24: MAX TEMP[2].x, IMM[0].zzzz, TEMP[2].xxxx
25: MOV TEMP[3].x, IMM[0].zzzz
26: USEQ TEMP[4].x, CONST[6].xxxx, IMM[1].xxxx
27: UIF TEMP[4].xxxx :0
28: MAD TEMP[3].x, TEMP[2].xxxx, CONST[7].zzzz, CONST[7].wwww
29: ENDIF
30: USEQ TEMP[4].x, CONST[6].xxxx, IMM[1].yyyy
31: UIF TEMP[4].xxxx :0
32: MUL TEMP[4].x, CONST[7].yyyy, TEMP[2].xxxx
33: EX2 TEMP[3].x, -TEMP[4].xxxx
34: ENDIF
35: USEQ TEMP[4].x, CONST[6].xxxx, IMM[1].zzzz
36: UIF TEMP[4].xxxx :0
37: MUL TEMP[2].x, CONST[7].xxxx, TEMP[2].xxxx
38: MUL TEMP[2].x, -TEMP[2].xxxx, TEMP[2].xxxx
39: EX2 TEMP[3].x, TEMP[2].xxxx
40: ENDIF
41: MOV_SAT TEMP[2].x, TEMP[3].xxxx
42: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx
43: UIF TEMP[1].xxxx :0
44: MOV TEMP[2].x, IMM[0].xxxx
45: ENDIF
46: LRP TEMP[0], TEMP[2].xxxx, TEMP[0], CONST[1]
47: MOV OUT[0], TEMP[0]
48: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%40 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%41 = load <32 x i8>, <32 x i8> addrspace(2)* %40, align 32, !tbaa !0
%42 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%43 = load <16 x i8>, <16 x i8> addrspace(2)* %42, align 16, !tbaa !0
%44 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%45 = bitcast <8 x i32> addrspace(2)* %44 to <32 x i8> addrspace(2)*
%46 = load <32 x i8>, <32 x i8> addrspace(2)* %45, align 32, !tbaa !0
%47 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%48 = bitcast <4 x i32> addrspace(2)* %47 to <16 x i8> addrspace(2)*
%49 = load <16 x i8>, <16 x i8> addrspace(2)* %48, align 16, !tbaa !0
%50 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%51 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%52 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%53 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%54 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%55 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%56 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%57 = bitcast float %53 to i32
%58 = bitcast float %54 to i32
%59 = insertelement <2 x i32> undef, i32 %57, i32 0
%60 = insertelement <2 x i32> %59, i32 %58, i32 1
%61 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %60, <32 x i8> %41, <16 x i8> %43, i32 2)
%62 = extractelement <4 x float> %61, i32 0
%63 = extractelement <4 x float> %61, i32 1
%64 = extractelement <4 x float> %61, i32 2
%65 = extractelement <4 x float> %61, i32 3
%66 = bitcast float %55 to i32
%67 = bitcast float %56 to i32
%68 = insertelement <2 x i32> undef, i32 %66, i32 0
%69 = insertelement <2 x i32> %68, i32 %67, i32 1
%70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %46, <16 x i8> %49, i32 2)
%71 = extractelement <4 x float> %70, i32 0
%72 = fmul float %24, %71
%73 = fadd float %72, %25
%74 = fdiv float 1.000000e+00, %73
%75 = fmul float %74, %50
%76 = fmul float %74, %51
%77 = fmul float %74, %52
%78 = fmul float %33, %75
%79 = fmul float %33, %76
%80 = fmul float %33, %77
%81 = fadd float %39, %76
%82 = fsub float %81, %30
%83 = fmul float %32, 2.000000e+00
%84 = fsub float 1.000000e+00, %83
%85 = fmul float %84, %82
%86 = call float @llvm.minnum.f32(float %85, float 0.000000e+00)
%87 = fmul float %78, %78
%88 = fmul float %79, %79
%89 = fadd float %88, %87
%90 = fmul float %80, %80
%91 = fadd float %89, %90
%92 = call float @llvm.sqrt.f32(float %91)
%93 = fadd float %82, %31
%94 = fmul float %86, %86
%95 = fadd float %76, 0x3EE4F8B580000000
%96 = call float @llvm.fabs.f32(float %95)
%97 = fdiv float 1.000000e+00, %96
%98 = fmul float %94, %97
%99 = fmul float %32, %93
%100 = fsub float %99, %98
%101 = fmul float %92, %100
%102 = fsub float %34, %101
%103 = call float @llvm.maxnum.f32(float %102, float 0.000000e+00)
%104 = bitcast float %35 to i32
%105 = icmp eq i32 %104, 1
%106 = fmul float %103, %37
%107 = fadd float %106, %38
%temp12.0 = select i1 %105, float %107, float 0.000000e+00
%108 = bitcast float %35 to i32
%109 = icmp eq i32 %108, 2
br i1 %109, label %IF25, label %ENDIF24
IF25: ; preds = %main_body
%110 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%111 = fmul float %110, %103
%112 = fsub float -0.000000e+00, %111
%113 = call float @llvm.AMDIL.exp.(float %112)
br label %ENDIF24
ENDIF24: ; preds = %main_body, %IF25
%temp12.1 = phi float [ %113, %IF25 ], [ %temp12.0, %main_body ]
%114 = bitcast float %35 to i32
%115 = icmp eq i32 %114, 3
br i1 %115, label %IF28, label %ENDIF27
IF28: ; preds = %ENDIF24
%116 = fmul float %36, %103
%117 = fmul float %116, %116
%118 = fsub float -0.000000e+00, %117
%119 = call float @llvm.AMDIL.exp.(float %118)
br label %ENDIF27
ENDIF27: ; preds = %ENDIF24, %IF28
%temp12.2 = phi float [ %119, %IF28 ], [ %temp12.1, %ENDIF24 ]
%120 = call float @llvm.AMDIL.clamp.(float %temp12.2, float 0.000000e+00, float 1.000000e+00)
%121 = fcmp oge float %71, 0x3FEFFFFDE0000000
%. = select i1 %121, float 1.000000e+00, float %120
%122 = call float @llvm.AMDGPU.lrp(float %., float %62, float %26)
%123 = call float @llvm.AMDGPU.lrp(float %., float %63, float %27)
%124 = call float @llvm.AMDGPU.lrp(float %., float %64, float %28)
%125 = call float @llvm.AMDGPU.lrp(float %., float %65, float %29)
%126 = call i32 @llvm.SI.packf16(float %122, float %123)
%127 = bitcast i32 %126 to float
%128 = call i32 @llvm.SI.packf16(float %124, float %125)
%129 = bitcast i32 %128 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %127, float %129, float %127, float %129)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #2
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000
v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001
v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100
v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101
v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200
v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500
v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440002
image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[24:31], s[12:15] ; F0800100 00660408
s_buffer_load_dword s6, s[0:3], 0x21 ; C2030121
s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110
s_buffer_load_dword s8, s[0:3], 0x12 ; C2040112
s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v8, s5 ; 7E100205
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v8, s4, v4 ; 3E100804
v_rcp_f32_e32 v8, v8 ; 7E105508
s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113
v_mul_f32_e32 v5, v5, v8 ; 100A1105
v_mad_f32 v9, v8, v6, s6 ; D2820009 001A0D08
v_subrev_f32_e32 v9, s7, v9 ; 0A121207
v_mad_f32 v10, -2.0, s8, 1.0 ; D282000A 03C810F5
v_mul_f32_e32 v10, v9, v10 ; 10141509
v_add_f32_e32 v9, s9, v9 ; 06121209
v_mul_f32_e32 v11, v6, v8 ; 10161106
v_mul_f32_e32 v7, v7, v8 ; 100E1107
v_madak_f32_e32 v6, v8, v6, 0x3727c5ac ; 420C0D08 3727C5AC
v_rcp_f32_e64 v6, |v6| ; D3540106 00000106
s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F
v_min_f32_e32 v8, 0, v10 ; 1E101480
v_mul_f32_e32 v8, v8, v8 ; 10101108
v_mul_f32_e32 v6, v6, v8 ; 100C1106
v_mad_f32 v6, s8, v9, -v6 ; D2820006 841A1208
s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114
s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118
s_buffer_load_dword s7, s[0:3], 0x1e ; C203811E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s4, v5 ; 100A0A04
v_mul_f32_e32 v8, s4, v11 ; 10101604
v_mul_f32_e32 v7, s4, v7 ; 100E0E04
v_mul_f32_e32 v5, v5, v5 ; 100A0B05
v_mac_f32_e32 v5, v8, v8 ; 3E0A1108
v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07
v_sqrt_f32_e32 v5, v5 ; 7E0A6705
v_mad_f32 v5, -v5, v6, s6 ; D2820005 201A0D05
v_max_f32_e32 v6, 0, v5 ; 200C0A80
v_mov_b32_e32 v5, s5 ; 7E0A0205
v_mac_f32_e32 v5, s7, v6 ; 3E0A0C07
v_cmp_eq_i32_e64 vcc, 1, s8 ; D104006A 00001081
v_cndmask_b32_e32 v5, 0, v5 ; 000A0A80
v_cmp_eq_i32_e64 s[4:5], 2, s8 ; D1040004 00001082
s_and_saveexec_b64 s[10:11], s[4:5] ; BE8A2404
s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E
s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s4, v6 ; 100A0C04
v_exp_f32_e64 v5, -v5 ; D34A0005 20000105
s_or_b64 exec, exec, s[10:11] ; 88FE0A7E
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
v_cmp_eq_i32_e64 s[8:9], 3, s8 ; D1040008 00001083
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[8:9], s[8:9] ; BE882408
s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E
s_buffer_load_dword s10, s[0:3], 0x1c ; C205011C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s10, v6 ; 100A0C0A
v_mul_f32_e32 v5, v5, v5 ; 100A0B05
v_exp_f32_e64 v5, -v5 ; D34A0005 20000105
s_or_b64 exec, exec, s[8:9] ; 88FE087E
v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80
v_mov_b32_e32 v6, 0x3f7fffef ; 7E0C02FF 3F7FFFEF
v_cmp_le_f32_e32 vcc, v6, v4 ; 7C060906
v_cndmask_b32_e64 v4, v5, 1.0, vcc ; D2000004 01A9E505
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
v_mul_f32_e32 v6, s7, v5 ; 100C0A07
v_mac_f32_e32 v6, v0, v4 ; 3E0C0900
v_mul_f32_e32 v0, s6, v5 ; 10000A06
v_mac_f32_e32 v0, v1, v4 ; 3E000901
v_mul_f32_e32 v1, s5, v5 ; 10020A05
v_mac_f32_e32 v1, v2, v4 ; 3E020902
v_mul_f32_e32 v2, s4, v5 ; 10040A04
v_mac_f32_e32 v2, v3, v4 ; 3E040903
v_cvt_pkrtz_f16_f32_e32 v0, v6, v0 ; 5E000106
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 496 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..4]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { -0.5000, 0.5000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[1], IN[0].xxxx
1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0]
4: ADD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy
5: MAD TEMP[2].xy, CONST[0].xyyy, IMM[0].xxxx, IN[1].xyyy
6: MOV TEMP[1].zw, TEMP[2].yyxy
7: MAD TEMP[2].xy, CONST[0].xyyy, IMM[0].yxxx, IN[1].xyyy
8: MAD TEMP[3].xy, CONST[0].xyyy, IMM[0].xyyy, IN[1].xyyy
9: MOV TEMP[2].zw, TEMP[3].yyxy
10: MOV OUT[1], TEMP[1]
11: MOV OUT[2], TEMP[2]
12: MOV OUT[0], TEMP[0]
13: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = add i32 %5, %7
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %32, i32 0, i32 %33)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0
%41 = add i32 %5, %7
%42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41)
%43 = extractelement <4 x float> %42, i32 0
%44 = extractelement <4 x float> %42, i32 1
%45 = fmul float %15, %35
%46 = fmul float %16, %35
%47 = fmul float %17, %35
%48 = fmul float %18, %35
%49 = fmul float %19, %36
%50 = fadd float %49, %45
%51 = fmul float %20, %36
%52 = fadd float %51, %46
%53 = fmul float %21, %36
%54 = fadd float %53, %47
%55 = fmul float %22, %36
%56 = fadd float %55, %48
%57 = fmul float %23, %37
%58 = fadd float %57, %50
%59 = fmul float %24, %37
%60 = fadd float %59, %52
%61 = fmul float %25, %37
%62 = fadd float %61, %54
%63 = fmul float %26, %37
%64 = fadd float %63, %56
%65 = fmul float %27, %38
%66 = fadd float %65, %58
%67 = fmul float %28, %38
%68 = fadd float %67, %60
%69 = fmul float %29, %38
%70 = fadd float %69, %62
%71 = fmul float %30, %38
%72 = fadd float %71, %64
%73 = fadd float %43, %13
%74 = fadd float %44, %14
%75 = fmul float %13, -5.000000e-01
%76 = fadd float %75, %43
%77 = fmul float %14, -5.000000e-01
%78 = fadd float %77, %44
%79 = fmul float %13, 5.000000e-01
%80 = fadd float %79, %43
%81 = fmul float %14, -5.000000e-01
%82 = fadd float %81, %44
%83 = fmul float %13, -5.000000e-01
%84 = fadd float %83, %43
%85 = fmul float %14, 5.000000e-01
%86 = fadd float %85, %44
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %74, float %76, float %78)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %80, float %82, float %84, float %86)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %66, float %68, float %70, float %72)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_waitcnt lgkmcnt(0) ; BF8C007F
v_add_f32_e32 v0, s4, v5 ; 06000A04
v_add_f32_e32 v7, s5, v6 ; 060E0C05
v_mad_f32 v8, -0.5, s4, v5 ; D2820008 041408F1
v_mad_f32 v9, -0.5, s5, v6 ; D2820009 04180AF1
exp 15, 32, 0, 0, 0, v0, v7, v8, v9 ; F800020F 09080700
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v0, 0.5, s4, v5 ; D2820000 041408F0
v_mac_f32_e64 v6, 0.5, s5 ; D23E0006 00000AF0
exp 15, 33, 0, 0, 0, v0, v9, v8, v6 ; F800021F 06080900
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109
s_buffer_load_dword s10, s[0:3], 0xa ; C205010A
s_buffer_load_dword s11, s[0:3], 0xb ; C205810B
s_buffer_load_dword s12, s[0:3], 0xc ; C206010C
s_buffer_load_dword s13, s[0:3], 0xd ; C206810D
s_buffer_load_dword s14, s[0:3], 0xe ; C207010E
s_buffer_load_dword s15, s[0:3], 0xf ; C207810F
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s6, v1 ; 10000206
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v5, s7, v1 ; 100A0207
v_mac_f32_e32 v5, s9, v2 ; 3E0A0409
v_mul_f32_e32 v6, s8, v1 ; 100C0208
v_mac_f32_e32 v6, s10, v2 ; 3E0C040A
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v5, s13, v3 ; 3E0A060D
v_mac_f32_e32 v6, s14, v3 ; 3E0C060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v5, s17, v4 ; 3E0A0811
v_mac_f32_e32 v6, s18, v4 ; 3E0C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 260 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 0.2500, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].xy, IN[0].zwww
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: ADD TEMP[0], TEMP[0], TEMP[1]
5: MOV TEMP[1].xy, IN[1].xyyy
6: TEX TEMP[1], TEMP[1], SAMP[0], 2D
7: MOV TEMP[2].xy, IN[1].zwww
8: TEX TEMP[2], TEMP[2], SAMP[0], 2D
9: ADD TEMP[1], TEMP[1], TEMP[2]
10: ADD TEMP[0], TEMP[0], TEMP[1]
11: MUL TEMP[0], TEMP[0], IMM[0].xxxx
12: MOV OUT[0], TEMP[0]
13: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%34 = bitcast float %26 to i32
%35 = bitcast float %27 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = bitcast <8 x i32> %23 to <32 x i8>
%39 = bitcast <4 x i32> %25 to <16 x i8>
%40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = extractelement <4 x float> %40, i32 3
%45 = bitcast float %28 to i32
%46 = bitcast float %29 to i32
%47 = insertelement <2 x i32> undef, i32 %45, i32 0
%48 = insertelement <2 x i32> %47, i32 %46, i32 1
%49 = bitcast <8 x i32> %23 to <32 x i8>
%50 = bitcast <4 x i32> %25 to <16 x i8>
%51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %48, <32 x i8> %49, <16 x i8> %50, i32 2)
%52 = extractelement <4 x float> %51, i32 0
%53 = extractelement <4 x float> %51, i32 1
%54 = extractelement <4 x float> %51, i32 2
%55 = extractelement <4 x float> %51, i32 3
%56 = fadd float %41, %52
%57 = fadd float %42, %53
%58 = fadd float %43, %54
%59 = fadd float %44, %55
%60 = bitcast float %30 to i32
%61 = bitcast float %31 to i32
%62 = insertelement <2 x i32> undef, i32 %60, i32 0
%63 = insertelement <2 x i32> %62, i32 %61, i32 1
%64 = bitcast <8 x i32> %23 to <32 x i8>
%65 = bitcast <4 x i32> %25 to <16 x i8>
%66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %63, <32 x i8> %64, <16 x i8> %65, i32 2)
%67 = extractelement <4 x float> %66, i32 0
%68 = extractelement <4 x float> %66, i32 1
%69 = extractelement <4 x float> %66, i32 2
%70 = extractelement <4 x float> %66, i32 3
%71 = bitcast float %32 to i32
%72 = bitcast float %33 to i32
%73 = insertelement <2 x i32> undef, i32 %71, i32 0
%74 = insertelement <2 x i32> %73, i32 %72, i32 1
%75 = bitcast <8 x i32> %23 to <32 x i8>
%76 = bitcast <4 x i32> %25 to <16 x i8>
%77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2)
%78 = extractelement <4 x float> %77, i32 0
%79 = extractelement <4 x float> %77, i32 1
%80 = extractelement <4 x float> %77, i32 2
%81 = extractelement <4 x float> %77, i32 3
%82 = fadd float %67, %78
%83 = fadd float %68, %79
%84 = fadd float %69, %80
%85 = fadd float %70, %81
%86 = fadd float %56, %82
%87 = fadd float %57, %83
%88 = fadd float %58, %84
%89 = fadd float %59, %85
%90 = fmul float %86, 2.500000e-01
%91 = fmul float %87, 2.500000e-01
%92 = fmul float %88, 2.500000e-01
%93 = fmul float %89, 2.500000e-01
%94 = call i32 @llvm.SI.packf16(float %90, float %91)
%95 = bitcast i32 %94 to float
%96 = call i32 @llvm.SI.packf16(float %92, float %93)
%97 = bitcast i32 %96 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %95, float %97, float %95, float %97)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020002
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[8:15], s[0:3] ; F0800F00 00020A04
image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800F00 00020406
image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[8:15], s[0:3] ; F0800F00 00020E08
s_waitcnt vmcnt(2) ; BF8C0772
v_add_f32_e32 v0, v10, v0 ; 0600010A
v_add_f32_e32 v1, v11, v1 ; 0602030B
v_add_f32_e32 v2, v12, v2 ; 0604050C
v_add_f32_e32 v3, v13, v3 ; 0606070D
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v4, v14, v4 ; 0608090E
v_add_f32_e32 v5, v15, v5 ; 060A0B0F
v_add_f32_e32 v6, v16, v6 ; 060C0D10
v_add_f32_e32 v7, v17, v7 ; 060E0F11
v_add_f32_e32 v0, v4, v0 ; 06000104
v_add_f32_e32 v1, v5, v1 ; 06020305
v_add_f32_e32 v2, v6, v2 ; 06040506
v_add_f32_e32 v3, v7, v3 ; 06060707
v_mov_b32_e32 v4, 0x3e800000 ; 7E0802FF 3E800000
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 20
Code Size: 216 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].zw, IMM[0].xxxx
1: MOV TEMP[0].xy, IN[1].xyxx
2: MUL TEMP[1], CONST[2], IN[0].xxxx
3: MAD TEMP[1], CONST[3], IN[0].yyyy, TEMP[1]
4: MAD TEMP[1], CONST[4], IN[0].zzzz, TEMP[1]
5: MAD TEMP[1], CONST[5], IN[0].wwww, TEMP[1]
6: MUL TEMP[2].xy, CONST[0].xyyy, IMM[0].yxxx
7: MUL TEMP[2].xy, TEMP[2].xyyy, CONST[1].xxxx
8: MOV OUT[1], TEMP[0]
9: MOV OUT[2], TEMP[2]
10: MOV OUT[0], TEMP[1]
11: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
%34 = add i32 %5, %7
%35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34)
%36 = extractelement <4 x float> %35, i32 0
%37 = extractelement <4 x float> %35, i32 1
%38 = extractelement <4 x float> %35, i32 2
%39 = extractelement <4 x float> %35, i32 3
%40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = add i32 %5, %7
%43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = fmul float %16, %36
%47 = fmul float %17, %36
%48 = fmul float %18, %36
%49 = fmul float %19, %36
%50 = fmul float %20, %37
%51 = fadd float %50, %46
%52 = fmul float %21, %37
%53 = fadd float %52, %47
%54 = fmul float %22, %37
%55 = fadd float %54, %48
%56 = fmul float %23, %37
%57 = fadd float %56, %49
%58 = fmul float %24, %38
%59 = fadd float %58, %51
%60 = fmul float %25, %38
%61 = fadd float %60, %53
%62 = fmul float %26, %38
%63 = fadd float %62, %55
%64 = fmul float %27, %38
%65 = fadd float %64, %57
%66 = fmul float %28, %39
%67 = fadd float %66, %59
%68 = fmul float %29, %39
%69 = fadd float %68, %61
%70 = fmul float %30, %39
%71 = fadd float %70, %63
%72 = fmul float %31, %39
%73 = fadd float %72, %65
%74 = fmul float %13, 0.000000e+00
%75 = fmul float %74, %15
%76 = fmul float %14, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float 1.000000e+00, float 1.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %75, float %76, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %69, float %71, float %73)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 1.0 ; 7E0202F2
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xa ; C203810A
s_buffer_load_dword s8, s[0:3], 0xb ; C204010B
s_buffer_load_dword s9, s[0:3], 0xc ; C204810C
s_buffer_load_dword s10, s[0:3], 0xd ; C205010D
s_buffer_load_dword s11, s[0:3], 0xe ; C205810E
s_buffer_load_dword s14, s[0:3], 0xf ; C207010F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110
s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111
s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112
s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113
s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114
s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115
s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
v_mul_f32_e32 v0, s5, v2 ; 10000405
v_mac_f32_e32 v0, s9, v3 ; 3E000609
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v1, s6, v2 ; 10020406
v_mac_f32_e32 v1, s10, v3 ; 3E02060A
v_mul_f32_e32 v6, s7, v2 ; 100C0407
v_mac_f32_e32 v6, s11, v3 ; 3E0C060B
v_mul_f32_e32 v2, s8, v2 ; 10040408
v_mac_f32_e32 v2, s14, v3 ; 3E04060E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s15, v4 ; 3E00080F
v_mac_f32_e32 v1, s16, v4 ; 3E020810
v_mac_f32_e32 v6, s17, v4 ; 3E0C0811
v_mac_f32_e32 v2, s18, v4 ; 3E040812
v_mac_f32_e32 v0, s19, v5 ; 3E000A13
v_mac_f32_e32 v1, s20, v5 ; 3E020A14
v_mac_f32_e32 v6, s21, v5 ; 3E0C0A15
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
v_mul_f32_e64 v3, 0, s12 ; D2100003 00001880
v_mul_f32_e32 v3, s4, v3 ; 10060604
v_mov_b32_e32 v4, s4 ; 7E080204
v_mul_f32_e32 v4, s13, v4 ; 1008080D
v_mov_b32_e32 v5, 0 ; 7E0A0280
exp 15, 33, 0, 0, 0, v3, v4, v5, v5 ; F800021F 05050403
exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 244 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 3.0000, 0.0205, 0.0000, 0.0855}
IMM[1] FLT32 { 0.2320, 0.0000, 0.3240, 1.0000}
0: MUL TEMP[0].xy, IN[1].xyyy, IMM[0].xxxx
1: ADD TEMP[0].xy, IN[0].xyyy, -TEMP[0].xyyy
2: MOV TEMP[1].xy, TEMP[0].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MUL TEMP[1], TEMP[1], IMM[0].yyyz
5: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
6: MOV TEMP[2].xy, TEMP[0].xyyy
7: TEX TEMP[2], TEMP[2], SAMP[0], 2D
8: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1]
9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
10: MOV TEMP[2].xy, TEMP[0].xyyy
11: TEX TEMP[2], TEMP[2], SAMP[0], 2D
12: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1]
13: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
14: MOV TEMP[2].xy, TEMP[0].xyyy
15: TEX TEMP[2], TEMP[2], SAMP[0], 2D
16: MAD TEMP[1], TEMP[2], IMM[1].zzzw, TEMP[1]
17: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
18: MOV TEMP[2].xy, TEMP[0].xyyy
19: TEX TEMP[2], TEMP[2], SAMP[0], 2D
20: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1]
21: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
22: MOV TEMP[2].xy, TEMP[0].xyyy
23: TEX TEMP[2], TEMP[2], SAMP[0], 2D
24: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1]
25: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
26: MOV TEMP[0].xy, TEMP[0].xyyy
27: TEX TEMP[0], TEMP[0], SAMP[0], 2D
28: MAD TEMP[1], TEMP[0], IMM[0].yyyz, TEMP[1]
29: MOV OUT[0], TEMP[1]
30: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%30 = fmul float %28, 3.000000e+00
%31 = fmul float %29, 3.000000e+00
%32 = fsub float %26, %30
%33 = fsub float %27, %31
%34 = bitcast float %32 to i32
%35 = bitcast float %33 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = bitcast <8 x i32> %23 to <32 x i8>
%39 = bitcast <4 x i32> %25 to <16 x i8>
%40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = extractelement <4 x float> %40, i32 3
%45 = fmul float %41, 0x3F94FDF3C0000000
%46 = fmul float %42, 0x3F94FDF3C0000000
%47 = fmul float %43, 0x3F94FDF3C0000000
%48 = fmul float %44, 0.000000e+00
%49 = fadd float %32, %28
%50 = fadd float %33, %29
%51 = bitcast float %49 to i32
%52 = bitcast float %50 to i32
%53 = insertelement <2 x i32> undef, i32 %51, i32 0
%54 = insertelement <2 x i32> %53, i32 %52, i32 1
%55 = bitcast <8 x i32> %23 to <32 x i8>
%56 = bitcast <4 x i32> %25 to <16 x i8>
%57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %54, <32 x i8> %55, <16 x i8> %56, i32 2)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = extractelement <4 x float> %57, i32 3
%62 = fmul float %58, 0x3FB5E35400000000
%63 = fadd float %62, %45
%64 = fmul float %59, 0x3FB5E35400000000
%65 = fadd float %64, %46
%66 = fmul float %60, 0x3FB5E35400000000
%67 = fadd float %66, %47
%68 = fmul float %61, 0.000000e+00
%69 = fadd float %68, %48
%70 = fadd float %49, %28
%71 = fadd float %50, %29
%72 = bitcast float %70 to i32
%73 = bitcast float %71 to i32
%74 = insertelement <2 x i32> undef, i32 %72, i32 0
%75 = insertelement <2 x i32> %74, i32 %73, i32 1
%76 = bitcast <8 x i32> %23 to <32 x i8>
%77 = bitcast <4 x i32> %25 to <16 x i8>
%78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2)
%79 = extractelement <4 x float> %78, i32 0
%80 = extractelement <4 x float> %78, i32 1
%81 = extractelement <4 x float> %78, i32 2
%82 = extractelement <4 x float> %78, i32 3
%83 = fmul float %79, 0x3FCDB22D00000000
%84 = fadd float %83, %63
%85 = fmul float %80, 0x3FCDB22D00000000
%86 = fadd float %85, %65
%87 = fmul float %81, 0x3FCDB22D00000000
%88 = fadd float %87, %67
%89 = fmul float %82, 0.000000e+00
%90 = fadd float %89, %69
%91 = fadd float %70, %28
%92 = fadd float %71, %29
%93 = bitcast float %91 to i32
%94 = bitcast float %92 to i32
%95 = insertelement <2 x i32> undef, i32 %93, i32 0
%96 = insertelement <2 x i32> %95, i32 %94, i32 1
%97 = bitcast <8 x i32> %23 to <32 x i8>
%98 = bitcast <4 x i32> %25 to <16 x i8>
%99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = extractelement <4 x float> %99, i32 2
%103 = extractelement <4 x float> %99, i32 3
%104 = fmul float %100, 0x3FD4BC6A80000000
%105 = fadd float %104, %84
%106 = fmul float %101, 0x3FD4BC6A80000000
%107 = fadd float %106, %86
%108 = fmul float %102, 0x3FD4BC6A80000000
%109 = fadd float %108, %88
%110 = fadd float %103, %90
%111 = fadd float %91, %28
%112 = fadd float %92, %29
%113 = bitcast float %111 to i32
%114 = bitcast float %112 to i32
%115 = insertelement <2 x i32> undef, i32 %113, i32 0
%116 = insertelement <2 x i32> %115, i32 %114, i32 1
%117 = bitcast <8 x i32> %23 to <32 x i8>
%118 = bitcast <4 x i32> %25 to <16 x i8>
%119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2)
%120 = extractelement <4 x float> %119, i32 0
%121 = extractelement <4 x float> %119, i32 1
%122 = extractelement <4 x float> %119, i32 2
%123 = extractelement <4 x float> %119, i32 3
%124 = fmul float %120, 0x3FCDB22D00000000
%125 = fadd float %124, %105
%126 = fmul float %121, 0x3FCDB22D00000000
%127 = fadd float %126, %107
%128 = fmul float %122, 0x3FCDB22D00000000
%129 = fadd float %128, %109
%130 = fmul float %123, 0.000000e+00
%131 = fadd float %130, %110
%132 = fadd float %111, %28
%133 = fadd float %112, %29
%134 = bitcast float %132 to i32
%135 = bitcast float %133 to i32
%136 = insertelement <2 x i32> undef, i32 %134, i32 0
%137 = insertelement <2 x i32> %136, i32 %135, i32 1
%138 = bitcast <8 x i32> %23 to <32 x i8>
%139 = bitcast <4 x i32> %25 to <16 x i8>
%140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2)
%141 = extractelement <4 x float> %140, i32 0
%142 = extractelement <4 x float> %140, i32 1
%143 = extractelement <4 x float> %140, i32 2
%144 = extractelement <4 x float> %140, i32 3
%145 = fmul float %141, 0x3FB5E35400000000
%146 = fadd float %145, %125
%147 = fmul float %142, 0x3FB5E35400000000
%148 = fadd float %147, %127
%149 = fmul float %143, 0x3FB5E35400000000
%150 = fadd float %149, %129
%151 = fmul float %144, 0.000000e+00
%152 = fadd float %151, %131
%153 = fadd float %132, %28
%154 = fadd float %133, %29
%155 = bitcast float %153 to i32
%156 = bitcast float %154 to i32
%157 = insertelement <2 x i32> undef, i32 %155, i32 0
%158 = insertelement <2 x i32> %157, i32 %156, i32 1
%159 = bitcast <8 x i32> %23 to <32 x i8>
%160 = bitcast <4 x i32> %25 to <16 x i8>
%161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2)
%162 = extractelement <4 x float> %161, i32 0
%163 = extractelement <4 x float> %161, i32 1
%164 = extractelement <4 x float> %161, i32 2
%165 = extractelement <4 x float> %161, i32 3
%166 = fmul float %162, 0x3F94FDF3C0000000
%167 = fadd float %166, %146
%168 = fmul float %163, 0x3F94FDF3C0000000
%169 = fadd float %168, %148
%170 = fmul float %164, 0x3F94FDF3C0000000
%171 = fadd float %170, %150
%172 = fmul float %165, 0.000000e+00
%173 = fadd float %172, %152
%174 = call i32 @llvm.SI.packf16(float %167, float %169)
%175 = bitcast i32 %174 to float
%176 = call i32 @llvm.SI.packf16(float %171, float %173)
%177 = bitcast i32 %176 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %175, float %177, float %175, float %177)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500
v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501
v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000
v_mad_f32 v5, -v4, v1, v2 ; D2820005 240A0304
v_mad_f32 v6, -v0, v1, v3 ; D2820006 240E0300
v_mov_b32_e32 v1, 0x3ca7ef9e ; 7E0202FF 3CA7EF9E
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020705
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v2, v1, v7 ; 10040F01
v_mul_f32_e32 v3, v1, v8 ; 10061101
v_mul_f32_e32 v7, v1, v9 ; 100E1301
v_mul_f32_e32 v8, 0, v10 ; 10101480
v_add_f32_e32 v9, v4, v5 ; 06120B04
v_add_f32_e32 v10, v0, v6 ; 06140D00
v_mov_b32_e32 v5, 0x3daf1aa0 ; 7E0A02FF 3DAF1AA0
image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020B09
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v5, v11 ; 3E041705
v_mac_f32_e32 v3, v5, v12 ; 3E061905
v_mac_f32_e32 v7, v5, v13 ; 3E0E1B05
v_mac_f32_e32 v8, 0, v14 ; 3E101C80
v_add_f32_e32 v11, v4, v9 ; 06161304
v_add_f32_e32 v12, v0, v10 ; 06181500
v_mov_b32_e32 v6, 0x3e6d9168 ; 7E0C02FF 3E6D9168
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020D0B
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v6, v13 ; 3E041B06
v_mac_f32_e32 v3, v6, v14 ; 3E061D06
v_mac_f32_e32 v7, v6, v15 ; 3E0E1F06
v_mac_f32_e32 v8, 0, v16 ; 3E102080
v_add_f32_e32 v9, v4, v11 ; 06121704
v_add_f32_e32 v10, v0, v12 ; 06141900
v_mov_b32_e32 v11, 0x3ea5e354 ; 7E1602FF 3EA5E354
image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020C09
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v11, v12 ; 3E04190B
v_mac_f32_e32 v3, v11, v13 ; 3E061B0B
v_mac_f32_e32 v7, v11, v14 ; 3E0E1D0B
v_add_f32_e32 v8, v8, v15 ; 06101F08
v_add_f32_e32 v11, v4, v9 ; 06161304
v_add_f32_e32 v12, v0, v10 ; 06181500
v_add_f32_e32 v9, v4, v11 ; 06121704
v_add_f32_e32 v10, v0, v12 ; 06141900
v_add_f32_e32 v13, v4, v9 ; 061A1304
v_add_f32_e32 v14, v0, v10 ; 061C1500
image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020F0B
image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020909
image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800F00 0002130D
s_waitcnt vmcnt(2) ; BF8C0772
v_mac_f32_e32 v2, v6, v15 ; 3E041F06
v_mac_f32_e32 v3, v6, v16 ; 3E062106
v_mac_f32_e32 v7, v6, v17 ; 3E0E2306
v_mac_f32_e32 v8, 0, v18 ; 3E102480
s_waitcnt vmcnt(1) ; BF8C0771
v_mac_f32_e32 v2, v5, v9 ; 3E041305
v_mac_f32_e32 v3, v5, v10 ; 3E061505
v_mac_f32_e32 v7, v5, v11 ; 3E0E1705
v_mac_f32_e32 v8, 0, v12 ; 3E101880
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v1, v19 ; 3E042701
v_mac_f32_e32 v3, v1, v20 ; 3E062901
v_mac_f32_e32 v7, v1, v21 ; 3E0E2B01
v_mac_f32_e32 v8, 0, v22 ; 3E102C80
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 24
Code Size: 372 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].zw, IMM[0].xxxx
1: MOV TEMP[0].xy, IN[1].xyxx
2: MUL TEMP[1], CONST[2], IN[0].xxxx
3: MAD TEMP[1], CONST[3], IN[0].yyyy, TEMP[1]
4: MAD TEMP[1], CONST[4], IN[0].zzzz, TEMP[1]
5: MAD TEMP[1], CONST[5], IN[0].wwww, TEMP[1]
6: MUL TEMP[2].xy, CONST[0].xyyy, IMM[0].xyyy
7: MUL TEMP[2].xy, TEMP[2].xyyy, CONST[1].xxxx
8: MOV OUT[1], TEMP[0]
9: MOV OUT[2], TEMP[2]
10: MOV OUT[0], TEMP[1]
11: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
%34 = add i32 %5, %7
%35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34)
%36 = extractelement <4 x float> %35, i32 0
%37 = extractelement <4 x float> %35, i32 1
%38 = extractelement <4 x float> %35, i32 2
%39 = extractelement <4 x float> %35, i32 3
%40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = add i32 %5, %7
%43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = fmul float %16, %36
%47 = fmul float %17, %36
%48 = fmul float %18, %36
%49 = fmul float %19, %36
%50 = fmul float %20, %37
%51 = fadd float %50, %46
%52 = fmul float %21, %37
%53 = fadd float %52, %47
%54 = fmul float %22, %37
%55 = fadd float %54, %48
%56 = fmul float %23, %37
%57 = fadd float %56, %49
%58 = fmul float %24, %38
%59 = fadd float %58, %51
%60 = fmul float %25, %38
%61 = fadd float %60, %53
%62 = fmul float %26, %38
%63 = fadd float %62, %55
%64 = fmul float %27, %38
%65 = fadd float %64, %57
%66 = fmul float %28, %39
%67 = fadd float %66, %59
%68 = fmul float %29, %39
%69 = fadd float %68, %61
%70 = fmul float %30, %39
%71 = fadd float %70, %63
%72 = fmul float %31, %39
%73 = fadd float %72, %65
%74 = fmul float %14, 0.000000e+00
%75 = fmul float %13, %15
%76 = fmul float %74, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %44, float %45, float 1.000000e+00, float 1.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %75, float %76, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %67, float %69, float %71, float %73)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 1.0 ; 7E0202F2
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xa ; C203810A
s_buffer_load_dword s8, s[0:3], 0xb ; C204010B
s_buffer_load_dword s9, s[0:3], 0xc ; C204810C
s_buffer_load_dword s10, s[0:3], 0xd ; C205010D
s_buffer_load_dword s11, s[0:3], 0xe ; C205810E
s_buffer_load_dword s14, s[0:3], 0xf ; C207010F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110
s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111
s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112
s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113
s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114
s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115
s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
v_mul_f32_e32 v0, s5, v2 ; 10000405
v_mac_f32_e32 v0, s9, v3 ; 3E000609
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v1, s6, v2 ; 10020406
v_mac_f32_e32 v1, s10, v3 ; 3E02060A
v_mul_f32_e32 v6, s7, v2 ; 100C0407
v_mac_f32_e32 v6, s11, v3 ; 3E0C060B
v_mul_f32_e32 v2, s8, v2 ; 10040408
v_mac_f32_e32 v2, s14, v3 ; 3E04060E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s15, v4 ; 3E00080F
v_mac_f32_e32 v1, s16, v4 ; 3E020810
v_mac_f32_e32 v6, s17, v4 ; 3E0C0811
v_mac_f32_e32 v2, s18, v4 ; 3E040812
v_mac_f32_e32 v0, s19, v5 ; 3E000A13
v_mac_f32_e32 v1, s20, v5 ; 3E020A14
v_mac_f32_e32 v6, s21, v5 ; 3E0C0A15
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
v_mul_f32_e64 v3, 0, s13 ; D2100003 00001A80
v_mul_f32_e32 v3, s4, v3 ; 10060604
v_mov_b32_e32 v4, s4 ; 7E080204
v_mul_f32_e32 v4, s12, v4 ; 1008080C
v_mov_b32_e32 v5, 0 ; 7E0A0280
exp 15, 33, 0, 0, 0, v4, v3, v5, v5 ; F800021F 05050304
exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 244 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 3.0000, 0.0205, 0.0000, 0.0855}
IMM[1] FLT32 { 0.2320, 0.0000, 0.3240, 1.0000}
0: MUL TEMP[0].xy, IN[1].xyyy, IMM[0].xxxx
1: ADD TEMP[0].xy, IN[0].xyyy, -TEMP[0].xyyy
2: MOV TEMP[1].xy, TEMP[0].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MUL TEMP[1], TEMP[1], IMM[0].yyyz
5: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
6: MOV TEMP[2].xy, TEMP[0].xyyy
7: TEX TEMP[2], TEMP[2], SAMP[0], 2D
8: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1]
9: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
10: MOV TEMP[2].xy, TEMP[0].xyyy
11: TEX TEMP[2], TEMP[2], SAMP[0], 2D
12: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1]
13: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
14: MOV TEMP[2].xy, TEMP[0].xyyy
15: TEX TEMP[2], TEMP[2], SAMP[0], 2D
16: MAD TEMP[1], TEMP[2], IMM[1].zzzw, TEMP[1]
17: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
18: MOV TEMP[2].xy, TEMP[0].xyyy
19: TEX TEMP[2], TEMP[2], SAMP[0], 2D
20: MAD TEMP[1], TEMP[2], IMM[1].xxxy, TEMP[1]
21: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
22: MOV TEMP[2].xy, TEMP[0].xyyy
23: TEX TEMP[2], TEMP[2], SAMP[0], 2D
24: MAD TEMP[1], TEMP[2], IMM[0].wwwz, TEMP[1]
25: ADD TEMP[0].xy, TEMP[0].xyyy, IN[1].xyyy
26: MOV TEMP[0].xy, TEMP[0].xyyy
27: TEX TEMP[0], TEMP[0], SAMP[0], 2D
28: MAD TEMP[1], TEMP[0], IMM[0].yyyz, TEMP[1]
29: MOV OUT[0], TEMP[1]
30: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%23 = load <8 x i32>, <8 x i32> addrspace(2)* %22, align 32, !tbaa !0
%24 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%25 = load <4 x i32>, <4 x i32> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%30 = fmul float %28, 3.000000e+00
%31 = fmul float %29, 3.000000e+00
%32 = fsub float %26, %30
%33 = fsub float %27, %31
%34 = bitcast float %32 to i32
%35 = bitcast float %33 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = bitcast <8 x i32> %23 to <32 x i8>
%39 = bitcast <4 x i32> %25 to <16 x i8>
%40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = extractelement <4 x float> %40, i32 3
%45 = fmul float %41, 0x3F94FDF3C0000000
%46 = fmul float %42, 0x3F94FDF3C0000000
%47 = fmul float %43, 0x3F94FDF3C0000000
%48 = fmul float %44, 0.000000e+00
%49 = fadd float %32, %28
%50 = fadd float %33, %29
%51 = bitcast float %49 to i32
%52 = bitcast float %50 to i32
%53 = insertelement <2 x i32> undef, i32 %51, i32 0
%54 = insertelement <2 x i32> %53, i32 %52, i32 1
%55 = bitcast <8 x i32> %23 to <32 x i8>
%56 = bitcast <4 x i32> %25 to <16 x i8>
%57 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %54, <32 x i8> %55, <16 x i8> %56, i32 2)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = extractelement <4 x float> %57, i32 3
%62 = fmul float %58, 0x3FB5E35400000000
%63 = fadd float %62, %45
%64 = fmul float %59, 0x3FB5E35400000000
%65 = fadd float %64, %46
%66 = fmul float %60, 0x3FB5E35400000000
%67 = fadd float %66, %47
%68 = fmul float %61, 0.000000e+00
%69 = fadd float %68, %48
%70 = fadd float %49, %28
%71 = fadd float %50, %29
%72 = bitcast float %70 to i32
%73 = bitcast float %71 to i32
%74 = insertelement <2 x i32> undef, i32 %72, i32 0
%75 = insertelement <2 x i32> %74, i32 %73, i32 1
%76 = bitcast <8 x i32> %23 to <32 x i8>
%77 = bitcast <4 x i32> %25 to <16 x i8>
%78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2)
%79 = extractelement <4 x float> %78, i32 0
%80 = extractelement <4 x float> %78, i32 1
%81 = extractelement <4 x float> %78, i32 2
%82 = extractelement <4 x float> %78, i32 3
%83 = fmul float %79, 0x3FCDB22D00000000
%84 = fadd float %83, %63
%85 = fmul float %80, 0x3FCDB22D00000000
%86 = fadd float %85, %65
%87 = fmul float %81, 0x3FCDB22D00000000
%88 = fadd float %87, %67
%89 = fmul float %82, 0.000000e+00
%90 = fadd float %89, %69
%91 = fadd float %70, %28
%92 = fadd float %71, %29
%93 = bitcast float %91 to i32
%94 = bitcast float %92 to i32
%95 = insertelement <2 x i32> undef, i32 %93, i32 0
%96 = insertelement <2 x i32> %95, i32 %94, i32 1
%97 = bitcast <8 x i32> %23 to <32 x i8>
%98 = bitcast <4 x i32> %25 to <16 x i8>
%99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = extractelement <4 x float> %99, i32 2
%103 = extractelement <4 x float> %99, i32 3
%104 = fmul float %100, 0x3FD4BC6A80000000
%105 = fadd float %104, %84
%106 = fmul float %101, 0x3FD4BC6A80000000
%107 = fadd float %106, %86
%108 = fmul float %102, 0x3FD4BC6A80000000
%109 = fadd float %108, %88
%110 = fadd float %103, %90
%111 = fadd float %91, %28
%112 = fadd float %92, %29
%113 = bitcast float %111 to i32
%114 = bitcast float %112 to i32
%115 = insertelement <2 x i32> undef, i32 %113, i32 0
%116 = insertelement <2 x i32> %115, i32 %114, i32 1
%117 = bitcast <8 x i32> %23 to <32 x i8>
%118 = bitcast <4 x i32> %25 to <16 x i8>
%119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %116, <32 x i8> %117, <16 x i8> %118, i32 2)
%120 = extractelement <4 x float> %119, i32 0
%121 = extractelement <4 x float> %119, i32 1
%122 = extractelement <4 x float> %119, i32 2
%123 = extractelement <4 x float> %119, i32 3
%124 = fmul float %120, 0x3FCDB22D00000000
%125 = fadd float %124, %105
%126 = fmul float %121, 0x3FCDB22D00000000
%127 = fadd float %126, %107
%128 = fmul float %122, 0x3FCDB22D00000000
%129 = fadd float %128, %109
%130 = fmul float %123, 0.000000e+00
%131 = fadd float %130, %110
%132 = fadd float %111, %28
%133 = fadd float %112, %29
%134 = bitcast float %132 to i32
%135 = bitcast float %133 to i32
%136 = insertelement <2 x i32> undef, i32 %134, i32 0
%137 = insertelement <2 x i32> %136, i32 %135, i32 1
%138 = bitcast <8 x i32> %23 to <32 x i8>
%139 = bitcast <4 x i32> %25 to <16 x i8>
%140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %138, <16 x i8> %139, i32 2)
%141 = extractelement <4 x float> %140, i32 0
%142 = extractelement <4 x float> %140, i32 1
%143 = extractelement <4 x float> %140, i32 2
%144 = extractelement <4 x float> %140, i32 3
%145 = fmul float %141, 0x3FB5E35400000000
%146 = fadd float %145, %125
%147 = fmul float %142, 0x3FB5E35400000000
%148 = fadd float %147, %127
%149 = fmul float %143, 0x3FB5E35400000000
%150 = fadd float %149, %129
%151 = fmul float %144, 0.000000e+00
%152 = fadd float %151, %131
%153 = fadd float %132, %28
%154 = fadd float %133, %29
%155 = bitcast float %153 to i32
%156 = bitcast float %154 to i32
%157 = insertelement <2 x i32> undef, i32 %155, i32 0
%158 = insertelement <2 x i32> %157, i32 %156, i32 1
%159 = bitcast <8 x i32> %23 to <32 x i8>
%160 = bitcast <4 x i32> %25 to <16 x i8>
%161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2)
%162 = extractelement <4 x float> %161, i32 0
%163 = extractelement <4 x float> %161, i32 1
%164 = extractelement <4 x float> %161, i32 2
%165 = extractelement <4 x float> %161, i32 3
%166 = fmul float %162, 0x3F94FDF3C0000000
%167 = fadd float %166, %146
%168 = fmul float %163, 0x3F94FDF3C0000000
%169 = fadd float %168, %148
%170 = fmul float %164, 0x3F94FDF3C0000000
%171 = fadd float %170, %150
%172 = fmul float %165, 0.000000e+00
%173 = fadd float %172, %152
%174 = call i32 @llvm.SI.packf16(float %167, float %169)
%175 = bitcast i32 %174 to float
%176 = call i32 @llvm.SI.packf16(float %171, float %173)
%177 = bitcast i32 %176 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %175, float %177, float %175, float %177)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500
v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501
v_mov_b32_e32 v1, 0x40400000 ; 7E0202FF 40400000
v_mad_f32 v5, -v4, v1, v2 ; D2820005 240A0304
v_mad_f32 v6, -v0, v1, v3 ; D2820006 240E0300
v_mov_b32_e32 v1, 0x3ca7ef9e ; 7E0202FF 3CA7EF9E
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[0:3] ; F0800F00 00020705
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v2, v1, v7 ; 10040F01
v_mul_f32_e32 v3, v1, v8 ; 10061101
v_mul_f32_e32 v7, v1, v9 ; 100E1301
v_mul_f32_e32 v8, 0, v10 ; 10101480
v_add_f32_e32 v9, v4, v5 ; 06120B04
v_add_f32_e32 v10, v0, v6 ; 06140D00
v_mov_b32_e32 v5, 0x3daf1aa0 ; 7E0A02FF 3DAF1AA0
image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020B09
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v5, v11 ; 3E041705
v_mac_f32_e32 v3, v5, v12 ; 3E061905
v_mac_f32_e32 v7, v5, v13 ; 3E0E1B05
v_mac_f32_e32 v8, 0, v14 ; 3E101C80
v_add_f32_e32 v11, v4, v9 ; 06161304
v_add_f32_e32 v12, v0, v10 ; 06181500
v_mov_b32_e32 v6, 0x3e6d9168 ; 7E0C02FF 3E6D9168
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020D0B
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v6, v13 ; 3E041B06
v_mac_f32_e32 v3, v6, v14 ; 3E061D06
v_mac_f32_e32 v7, v6, v15 ; 3E0E1F06
v_mac_f32_e32 v8, 0, v16 ; 3E102080
v_add_f32_e32 v9, v4, v11 ; 06121704
v_add_f32_e32 v10, v0, v12 ; 06141900
v_mov_b32_e32 v11, 0x3ea5e354 ; 7E1602FF 3EA5E354
image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020C09
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v11, v12 ; 3E04190B
v_mac_f32_e32 v3, v11, v13 ; 3E061B0B
v_mac_f32_e32 v7, v11, v14 ; 3E0E1D0B
v_add_f32_e32 v8, v8, v15 ; 06101F08
v_add_f32_e32 v11, v4, v9 ; 06161304
v_add_f32_e32 v12, v0, v10 ; 06181500
v_add_f32_e32 v9, v4, v11 ; 06121704
v_add_f32_e32 v10, v0, v12 ; 06141900
v_add_f32_e32 v13, v4, v9 ; 061A1304
v_add_f32_e32 v14, v0, v10 ; 061C1500
image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[8:15], s[0:3] ; F0800F00 00020F0B
image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[8:15], s[0:3] ; F0800F00 00020909
image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[0:3] ; F0800F00 0002130D
s_waitcnt vmcnt(2) ; BF8C0772
v_mac_f32_e32 v2, v6, v15 ; 3E041F06
v_mac_f32_e32 v3, v6, v16 ; 3E062106
v_mac_f32_e32 v7, v6, v17 ; 3E0E2306
v_mac_f32_e32 v8, 0, v18 ; 3E102480
s_waitcnt vmcnt(1) ; BF8C0771
v_mac_f32_e32 v2, v5, v9 ; 3E041305
v_mac_f32_e32 v3, v5, v10 ; 3E061505
v_mac_f32_e32 v7, v5, v11 ; 3E0E1705
v_mac_f32_e32 v8, 0, v12 ; 3E101880
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v1, v19 ; 3E042701
v_mac_f32_e32 v3, v1, v20 ; 3E062901
v_mac_f32_e32 v7, v1, v21 ; 3E0E2B01
v_mac_f32_e32 v8, 0, v22 ; 3E102C80
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e32 v1, v7, v8 ; 5E021107
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 24
Code Size: 372 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL CONST[0..17]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx
1: UIF TEMP[0].xxxx :0
2: MUL TEMP[0], CONST[2], IN[0].xxxx
3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
5: ADD TEMP[0].xyz, TEMP[0], CONST[5]
6: MOV TEMP[1].x, CONST[6].xxxx
7: MOV TEMP[1].y, CONST[7].xxxx
8: MOV TEMP[1].z, CONST[8].xxxx
9: MOV TEMP[2].x, CONST[6].yyyy
10: MOV TEMP[2].y, CONST[7].yyyy
11: MOV TEMP[2].z, CONST[8].yyyy
12: MOV TEMP[3].x, CONST[6].zzzz
13: MOV TEMP[3].y, CONST[7].zzzz
14: MOV TEMP[3].z, CONST[8].zzzz
15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx
16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz
17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz
18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
19: RSQ TEMP[2].x, TEMP[2].xxxx
20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww
22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz
23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
24: RSQ TEMP[3].x, TEMP[3].xxxx
25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz
27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx
29: SQRT TEMP[2].x, TEMP[2].xxxx
30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx
31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
33: MUL TEMP[1], CONST[10], TEMP[0].xxxx
34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1]
35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1]
36: ADD TEMP[0], TEMP[0], CONST[13]
37: ELSE :0
38: MUL TEMP[1], CONST[14], IN[0].xxxx
39: MAD TEMP[1], CONST[15], IN[0].yyyy, TEMP[1]
40: MAD TEMP[1], CONST[16], IN[0].zzzz, TEMP[1]
41: ADD TEMP[0], TEMP[1], CONST[17]
42: ENDIF
43: MOV TEMP[1].xyw, TEMP[0].xyxw
44: RCP TEMP[2].x, TEMP[0].wwww
45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx
46: MOV_SAT TEMP[2].x, TEMP[2].xxxx
47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx
48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww
49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx
50: MOV TEMP[1].z, TEMP[0].xxxx
51: MOV OUT[0], TEMP[1]
52: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0
%19 = add i32 %5, %7
%20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19)
%21 = extractelement <4 x float> %20, i32 0
%22 = extractelement <4 x float> %20, i32 1
%23 = extractelement <4 x float> %20, i32 2
%24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = add i32 %5, %7
%27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %26)
%28 = extractelement <4 x float> %27, i32 0
%29 = extractelement <4 x float> %27, i32 1
%30 = extractelement <4 x float> %27, i32 2
%31 = fcmp une float %16, 0.000000e+00
br i1 %31, label %IF, label %ELSE
IF: ; preds = %main_body
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%72 = fmul float %68, %21
%73 = fmul float %67, %21
%74 = fmul float %66, %21
%75 = fmul float %65, %22
%76 = fadd float %75, %72
%77 = fmul float %64, %22
%78 = fadd float %77, %73
%79 = fmul float %63, %22
%80 = fadd float %79, %74
%81 = fmul float %62, %23
%82 = fadd float %81, %76
%83 = fmul float %61, %23
%84 = fadd float %83, %78
%85 = fmul float %60, %23
%86 = fadd float %85, %80
%87 = fadd float %82, %59
%88 = fadd float %84, %58
%89 = fadd float %86, %57
%90 = fmul float %56, %28
%91 = fmul float %53, %28
%92 = fmul float %50, %28
%93 = fmul float %55, %29
%94 = fadd float %93, %90
%95 = fmul float %52, %29
%96 = fadd float %95, %91
%97 = fmul float %49, %29
%98 = fadd float %97, %92
%99 = fmul float %54, %30
%100 = fadd float %99, %94
%101 = fmul float %51, %30
%102 = fadd float %101, %96
%103 = fmul float %48, %30
%104 = fadd float %103, %98
%105 = fmul float %100, %100
%106 = fmul float %102, %102
%107 = fadd float %106, %105
%108 = fmul float %104, %104
%109 = fadd float %107, %108
%110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109)
%111 = fmul float %100, %110
%112 = fmul float %102, %110
%113 = fmul float %104, %110
%114 = fmul float %87, %13
%115 = fmul float %88, %13
%116 = fmul float %89, %13
%117 = fsub float %71, %114
%118 = fsub float %70, %115
%119 = fsub float %69, %116
%120 = fmul float %117, %117
%121 = fmul float %118, %118
%122 = fadd float %121, %120
%123 = fmul float %119, %119
%124 = fadd float %122, %123
%125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124)
%126 = fmul float %117, %125
%127 = fmul float %118, %125
%128 = fmul float %119, %125
%129 = fmul float %111, %126
%130 = fmul float %112, %127
%131 = fadd float %130, %129
%132 = fmul float %113, %128
%133 = fadd float %131, %132
%134 = fmul float %133, %133
%135 = fsub float 1.000000e+00, %134
%136 = call float @llvm.sqrt.f32(float %135)
%137 = fmul float %16, %136
%138 = fmul float %111, %137
%139 = fmul float %112, %137
%140 = fmul float %113, %137
%141 = fsub float %87, %138
%142 = fsub float %88, %139
%143 = fsub float %89, %140
%144 = fmul float %47, %141
%145 = fmul float %46, %141
%146 = fmul float %45, %141
%147 = fmul float %44, %141
%148 = fmul float %43, %142
%149 = fadd float %148, %144
%150 = fmul float %42, %142
%151 = fadd float %150, %145
%152 = fmul float %41, %142
%153 = fadd float %152, %146
%154 = fmul float %40, %142
%155 = fadd float %154, %147
%156 = fmul float %39, %143
%157 = fadd float %156, %149
%158 = fmul float %38, %143
%159 = fadd float %158, %151
%160 = fmul float %37, %143
%161 = fadd float %160, %153
%162 = fmul float %36, %143
%163 = fadd float %162, %155
%164 = fadd float %157, %35
%165 = fadd float %159, %34
%166 = fadd float %161, %33
%167 = fadd float %163, %32
br label %ENDIF
ELSE: ; preds = %main_body
%168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%169 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%177 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%184 = fmul float %183, %21
%185 = fmul float %182, %21
%186 = fmul float %181, %21
%187 = fmul float %180, %21
%188 = fmul float %179, %22
%189 = fadd float %188, %184
%190 = fmul float %178, %22
%191 = fadd float %190, %185
%192 = fmul float %177, %22
%193 = fadd float %192, %186
%194 = fmul float %176, %22
%195 = fadd float %194, %187
%196 = fmul float %175, %23
%197 = fadd float %196, %189
%198 = fmul float %174, %23
%199 = fadd float %198, %191
%200 = fmul float %173, %23
%201 = fadd float %200, %193
%202 = fmul float %172, %23
%203 = fadd float %202, %195
%204 = fadd float %197, %171
%205 = fadd float %199, %170
%206 = fadd float %201, %169
%207 = fadd float %203, %168
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp.0 = phi float [ %164, %IF ], [ %204, %ELSE ]
%temp1.0 = phi float [ %165, %IF ], [ %205, %ELSE ]
%temp2.0 = phi float [ %166, %IF ], [ %206, %ELSE ]
%temp3.0 = phi float [ %167, %IF ], [ %207, %ELSE ]
%208 = fdiv float 1.000000e+00, %temp3.0
%209 = fmul float %14, %208
%210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00)
%211 = fadd float %temp2.0, %210
%212 = fsub float -0.000000e+00, %temp3.0
%213 = call float @llvm.maxnum.f32(float %211, float %212)
%214 = call float @llvm.AMDGPU.lrp(float %15, float %213, float %211)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %214, float %temp3.0)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v4, s10, v0 ; 4A08000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[3:6], v4, s[8:11], 0 idxen ; E00C2000 80020304
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880
s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s5, s[0:3], 0x47 ; C2028147
s_buffer_load_dword s8, s[0:3], 0x42 ; C2040142
s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143
s_buffer_load_dword s10, s[0:3], 0x44 ; C2050144
s_buffer_load_dword s11, s[0:3], 0x45 ; C2058145
s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146
s_buffer_load_dword s13, s[0:3], 0x3d ; C206813D
s_buffer_load_dword s14, s[0:3], 0x3e ; C207013E
s_buffer_load_dword s15, s[0:3], 0x3f ; C207813F
s_buffer_load_dword s16, s[0:3], 0x40 ; C2080140
s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141
s_buffer_load_dword s18, s[0:3], 0x38 ; C2090138
s_buffer_load_dword s19, s[0:3], 0x39 ; C2098139
s_buffer_load_dword s20, s[0:3], 0x3a ; C20A013A
s_buffer_load_dword s21, s[0:3], 0x3b ; C20A813B
s_buffer_load_dword s22, s[0:3], 0x3c ; C20B013C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s18, v0 ; 100C0012
v_mul_f32_e32 v7, s19, v0 ; 100E0013
v_mul_f32_e32 v9, s20, v0 ; 10120014
v_mul_f32_e32 v10, s21, v0 ; 10140015
v_mac_f32_e32 v6, s22, v1 ; 3E0C0216
v_mac_f32_e32 v7, s13, v1 ; 3E0E020D
v_mac_f32_e32 v9, s14, v1 ; 3E12020E
v_mac_f32_e32 v10, s15, v1 ; 3E14020F
v_mac_f32_e32 v6, s16, v2 ; 3E0C0410
v_mac_f32_e32 v7, s17, v2 ; 3E0E0411
v_mac_f32_e32 v9, s8, v2 ; 3E120408
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_add_f32_e32 v6, s10, v6 ; 060C0C0A
v_add_f32_e32 v8, s11, v7 ; 06100E0B
v_add_f32_e32 v9, s12, v9 ; 0612120C
v_add_f32_e32 v7, s5, v10 ; 060E1405
s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[6:7] ; 89FE067E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s9, s[0:3], 0x37 ; C2048137
s_buffer_load_dword s10, s[0:3], 0x32 ; C2050132
s_buffer_load_dword s11, s[0:3], 0x33 ; C2058133
s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134
s_buffer_load_dword s13, s[0:3], 0x35 ; C2068135
s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136
s_buffer_load_dword s15, s[0:3], 0x2d ; C207812D
s_buffer_load_dword s16, s[0:3], 0x2e ; C208012E
s_buffer_load_dword s17, s[0:3], 0x2f ; C208812F
s_buffer_load_dword s18, s[0:3], 0x30 ; C2090130
s_buffer_load_dword s19, s[0:3], 0x31 ; C2098131
s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128
s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129
s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A
s_buffer_load_dword s23, s[0:3], 0x2b ; C20B812B
s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C
s_buffer_load_dword s25, s[0:3], 0x1d ; C20C811D
s_buffer_load_dword s26, s[0:3], 0x1e ; C20D011E
s_buffer_load_dword s27, s[0:3], 0x20 ; C20D8120
s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121
s_buffer_load_dword s29, s[0:3], 0x22 ; C20E8122
s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116
s_buffer_load_dword s31, s[0:3], 0x18 ; C20F8118
s_buffer_load_dword s32, s[0:3], 0x19 ; C2100119
s_buffer_load_dword s33, s[0:3], 0x1a ; C210811A
s_buffer_load_dword s34, s[0:3], 0x1c ; C211011C
s_buffer_load_dword s35, s[0:3], 0x10 ; C2118110
s_buffer_load_dword s36, s[0:3], 0x11 ; C2120111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s27, v3 ; 100C061B
v_mac_f32_e32 v6, s28, v4 ; 3E0C081C
v_mac_f32_e32 v6, s29, v5 ; 3E0C0A1D
s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112
v_mul_f32_e32 v7, s31, v3 ; 100E061F
v_mac_f32_e32 v7, s32, v4 ; 3E0E0820
v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21
v_mul_f32_e32 v3, s34, v3 ; 10060622
v_mac_f32_e32 v3, s25, v4 ; 3E060819
v_mac_f32_e32 v3, s26, v5 ; 3E060A1A
s_buffer_load_dword s25, s[0:3], 0x14 ; C20C8114
s_buffer_load_dword s26, s[0:3], 0x15 ; C20D0115
s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109
s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A
s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C
s_buffer_load_dword s32, s[0:3], 0xd ; C210010D
s_buffer_load_dword s33, s[0:3], 0xe ; C210810E
s_buffer_load_dword s34, s[0:3], 0x0 ; C2110100
v_mul_f32_e32 v4, v7, v7 ; 10080F07
v_mac_f32_e32 v4, v3, v3 ; 3E080703
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s28, v0 ; 100A001C
v_mul_f32_e32 v8, s29, v0 ; 1010001D
s_buffer_load_dword s28, s[0:3], 0x1 ; C20E0101
v_mac_f32_e32 v5, s32, v1 ; 3E0A0220
v_mac_f32_e32 v8, s33, v1 ; 3E100221
v_mac_f32_e32 v5, s36, v2 ; 3E0A0424
v_mac_f32_e32 v8, s27, v2 ; 3E10041B
v_add_f32_e32 v5, s26, v5 ; 060A0A1A
v_add_f32_e32 v8, s30, v8 ; 0610101E
s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102
s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103
s_buffer_load_dword s29, s[0:3], 0x8 ; C20E8108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v9, s27 ; 7E12021B
v_mad_f32 v10, -v5, v9, s28 ; D282000A 20721305
v_mul_f32_e32 v0, s29, v0 ; 1000001D
v_mac_f32_e32 v0, s31, v1 ; 3E00021F
v_mac_f32_e32 v0, s35, v2 ; 3E000423
v_add_f32_e32 v0, s25, v0 ; 06000019
v_mad_f32 v1, -v0, v9, s34 ; D2820001 208A1300
v_mad_f32 v2, -v8, v9, s26 ; D2820002 206A1308
v_mul_f32_e32 v9, v1, v1 ; 10120301
v_mac_f32_e32 v9, v10, v10 ; 3E12150A
v_mac_f32_e32 v9, v2, v2 ; 3E120502
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v7, v4, v7 ; 100E0F04
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v1, v9, v1 ; 10020309
v_mul_f32_e32 v6, v9, v10 ; 100C1509
v_mul_f32_e32 v2, v9, v2 ; 10040509
v_mul_f32_e32 v1, v1, v7 ; 10020F01
v_mac_f32_e32 v1, v6, v3 ; 3E020706
v_mac_f32_e32 v1, v2, v4 ; 3E020902
v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mad_f32 v0, -v7, v1, v0 ; D2820000 24020307
v_mad_f32 v2, -v3, v1, v5 ; D2820002 24160303
v_mad_f32 v1, -v4, v1, v8 ; D2820001 24220304
v_mul_f32_e32 v3, s20, v0 ; 10060014
v_mul_f32_e32 v4, s21, v0 ; 10080015
v_mul_f32_e32 v5, s22, v0 ; 100A0016
v_mul_f32_e32 v0, s23, v0 ; 10000017
v_mac_f32_e32 v3, s24, v2 ; 3E060418
v_mac_f32_e32 v4, s15, v2 ; 3E08040F
v_mac_f32_e32 v5, s16, v2 ; 3E0A0410
v_mac_f32_e32 v0, s17, v2 ; 3E000411
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v4, s19, v1 ; 3E080213
v_mac_f32_e32 v5, s10, v1 ; 3E0A020A
v_mac_f32_e32 v0, s11, v1 ; 3E00020B
v_add_f32_e32 v6, s12, v3 ; 060C060C
v_add_f32_e32 v8, s13, v4 ; 0610080D
v_add_f32_e32 v9, s14, v5 ; 06120A0E
v_add_f32_e32 v7, s9, v0 ; 060E0009
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_rcp_f32_e32 v0, v7 ; 7E005507
v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_add_f32_e32 v0, v0, v9 ; 06001300
v_max_f32_e64 v2, v0, -v7 ; D2200002 40020F00
v_mul_f32_e32 v0, v0, v1 ; 10000300
v_mac_f32_e32 v0, s5, v2 ; 3E000405
exp 15, 12, 0, 1, 0, v6, v8, v0, v7 ; F80008CF 07000806
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 744 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL OUT[0], COLOR
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV OUT[0], IMM[0].xxxx
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%23 = bitcast i32 %22 to float
%24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%25 = bitcast i32 %24 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 8
VGPRS: 4
Code Size: 20 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..7]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[4], IN[0].xxxx
1: MAD TEMP[0], CONST[5], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[7], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[0], IN[0].xxxx
5: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1].xyz, CONST[3], IN[0].wwww, TEMP[1]
8: MOV TEMP[1].xyz, TEMP[1].xyzx
9: MOV OUT[1], TEMP[0]
10: MOV OUT[2], TEMP[1]
11: MOV OUT[0], TEMP[0]
12: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0
%46 = add i32 %5, %7
%47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46)
%48 = extractelement <4 x float> %47, i32 0
%49 = extractelement <4 x float> %47, i32 1
%50 = extractelement <4 x float> %47, i32 2
%51 = extractelement <4 x float> %47, i32 3
%52 = fmul float %28, %48
%53 = fmul float %29, %48
%54 = fmul float %30, %48
%55 = fmul float %31, %48
%56 = fmul float %32, %49
%57 = fadd float %56, %52
%58 = fmul float %33, %49
%59 = fadd float %58, %53
%60 = fmul float %34, %49
%61 = fadd float %60, %54
%62 = fmul float %35, %49
%63 = fadd float %62, %55
%64 = fmul float %36, %50
%65 = fadd float %64, %57
%66 = fmul float %37, %50
%67 = fadd float %66, %59
%68 = fmul float %38, %50
%69 = fadd float %68, %61
%70 = fmul float %39, %50
%71 = fadd float %70, %63
%72 = fmul float %40, %51
%73 = fadd float %72, %65
%74 = fmul float %41, %51
%75 = fadd float %74, %67
%76 = fmul float %42, %51
%77 = fadd float %76, %69
%78 = fmul float %43, %51
%79 = fadd float %78, %71
%80 = fmul float %13, %48
%81 = fmul float %14, %48
%82 = fmul float %15, %48
%83 = fmul float %16, %48
%84 = fmul float %17, %49
%85 = fadd float %84, %80
%86 = fmul float %18, %49
%87 = fadd float %86, %81
%88 = fmul float %19, %49
%89 = fadd float %88, %82
%90 = fmul float %20, %49
%91 = fadd float %90, %83
%92 = fmul float %21, %50
%93 = fadd float %92, %85
%94 = fmul float %22, %50
%95 = fadd float %94, %87
%96 = fmul float %23, %50
%97 = fadd float %96, %89
%98 = fmul float %24, %50
%99 = fadd float %98, %91
%100 = fmul float %25, %51
%101 = fadd float %100, %93
%102 = fmul float %26, %51
%103 = fadd float %102, %95
%104 = fmul float %27, %51
%105 = fadd float %104, %97
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %75, float %77, float %79)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %101, float %103, float %105, float %99)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %73, float %75, float %77, float %79)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s19, s[0:3], 0x10 ; C2098110
s_buffer_load_dword s20, s[0:3], 0x11 ; C20A0111
s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112
s_buffer_load_dword s22, s[0:3], 0x13 ; C20B0113
s_buffer_load_dword s23, s[0:3], 0x14 ; C20B8114
s_buffer_load_dword s24, s[0:3], 0x15 ; C20C0115
s_buffer_load_dword s25, s[0:3], 0x16 ; C20C8116
s_buffer_load_dword s26, s[0:3], 0x17 ; C20D0117
s_buffer_load_dword s27, s[0:3], 0x18 ; C20D8118
s_buffer_load_dword s28, s[0:3], 0x19 ; C20E0119
s_buffer_load_dword s29, s[0:3], 0x1a ; C20E811A
s_buffer_load_dword s30, s[0:3], 0x1b ; C20F011B
s_buffer_load_dword s31, s[0:3], 0x1c ; C20F811C
s_buffer_load_dword s32, s[0:3], 0x1d ; C210011D
s_buffer_load_dword s33, s[0:3], 0x1e ; C210811E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s19, v0 ; 10080013
v_mac_f32_e32 v4, s23, v1 ; 3E080217
v_mul_f32_e32 v5, s20, v0 ; 100A0014
v_mac_f32_e32 v5, s24, v1 ; 3E0A0218
v_mul_f32_e32 v6, s21, v0 ; 100C0015
v_mac_f32_e32 v6, s25, v1 ; 3E0C0219
v_mul_f32_e32 v7, s22, v0 ; 100E0016
v_mac_f32_e32 v7, s26, v1 ; 3E0E021A
v_mul_f32_e32 v8, s4, v0 ; 10100004
v_mac_f32_e32 v8, s8, v1 ; 3E100208
v_mul_f32_e32 v9, s5, v0 ; 10120005
v_mac_f32_e32 v9, s9, v1 ; 3E120209
v_mul_f32_e32 v10, s6, v0 ; 10140006
v_mac_f32_e32 v10, s10, v1 ; 3E14020A
v_mul_f32_e32 v0, s7, v0 ; 10000007
v_mac_f32_e32 v0, s11, v1 ; 3E00020B
v_mac_f32_e32 v4, s27, v2 ; 3E08041B
v_mac_f32_e32 v5, s28, v2 ; 3E0A041C
v_mac_f32_e32 v6, s29, v2 ; 3E0C041D
v_mac_f32_e32 v7, s30, v2 ; 3E0E041E
v_mac_f32_e32 v8, s12, v2 ; 3E10040C
v_mac_f32_e32 v9, s13, v2 ; 3E12040D
v_mac_f32_e32 v10, s14, v2 ; 3E14040E
v_mac_f32_e32 v0, s15, v2 ; 3E00040F
v_mac_f32_e32 v4, s31, v3 ; 3E08061F
v_mac_f32_e32 v5, s32, v3 ; 3E0A0620
v_mac_f32_e32 v6, s33, v3 ; 3E0C0621
v_mac_f32_e32 v7, s0, v3 ; 3E0E0600
v_mac_f32_e32 v8, s16, v3 ; 3E100610
v_mac_f32_e32 v9, s17, v3 ; 3E120611
v_mac_f32_e32 v10, s18, v3 ; 3E140612
exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504
exp 15, 33, 0, 0, 0, v8, v9, v10, v0 ; F800021F 000A0908
exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 308 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..2]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.5000, 0.0000, 1.0000, 0.3000}
IMM[1] FLT32 { 0.8000, 0.0500, 0.0000, 0.0000}
0: MUL TEMP[0].xyw, IN[0], IMM[0].xxxx
1: MOV TEMP[1].x, TEMP[0].xxxx
2: MUL TEMP[2].x, TEMP[0].yyyy, CONST[1].xxxx
3: MOV TEMP[1].y, TEMP[2].xxxx
4: ADD TEMP[0].xy, TEMP[1].xyyy, TEMP[0].wwww
5: MOV TEMP[1].xyz, IMM[0].yyyy
6: ADD TEMP[2].xyz, IN[1].xyzz, -CONST[0].xyzz
7: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
8: RSQ TEMP[3].x, TEMP[3].xxxx
9: MUL TEMP[2].y, TEMP[2].xyzz, TEMP[3].xxxx
10: MAD TEMP[2].x, TEMP[2].yyyy, IMM[0].wwww, IMM[1].xxxx
11: MOV_SAT TEMP[2].x, TEMP[2].xxxx
12: ADD TEMP[2].x, IMM[0].zzzz, -TEMP[2].xxxx
13: MOV_SAT TEMP[2].x, TEMP[2].xxxx
14: MOV TEMP[0].xy, TEMP[0].xyyy
15: MOV TEMP[0].w, IN[0].wwww
16: TXP TEMP[0].x, TEMP[0], SAMP[0], 2D
17: MAD TEMP[0].x, CONST[2].zzzz, TEMP[0].xxxx, CONST[2].wwww
18: RCP TEMP[0].x, TEMP[0].xxxx
19: ADD TEMP[0].x, TEMP[0].xxxx, -IN[0].wwww
20: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].yyyy
21: MOV_SAT TEMP[0].x, TEMP[0].xxxx
22: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx
23: MIN TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
24: MOV_SAT TEMP[0].x, TEMP[0].xxxx
25: MOV TEMP[1].w, TEMP[0].xxxx
26: MOV OUT[0], TEMP[1]
27: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0
%32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
%34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%40 = fmul float %34, 5.000000e-01
%41 = fmul float %35, 5.000000e-01
%42 = fmul float %36, 5.000000e-01
%43 = fmul float %41, %27
%44 = fadd float %40, %42
%45 = fadd float %43, %42
%46 = fsub float %37, %24
%47 = fsub float %38, %25
%48 = fsub float %39, %26
%49 = fmul float %46, %46
%50 = fmul float %47, %47
%51 = fadd float %50, %49
%52 = fmul float %48, %48
%53 = fadd float %51, %52
%54 = call float @llvm.AMDGPU.rsq.clamped.f32(float %53)
%55 = fmul float %47, %54
%56 = fmul float %55, 0x3FD3333340000000
%57 = fadd float %56, 0x3FE99999A0000000
%58 = call float @llvm.AMDIL.clamp.(float %57, float 0.000000e+00, float 1.000000e+00)
%59 = fsub float 1.000000e+00, %58
%60 = call float @llvm.AMDIL.clamp.(float %59, float 0.000000e+00, float 1.000000e+00)
%61 = fdiv float %44, %36
%62 = fdiv float %45, %36
%63 = bitcast float %61 to i32
%64 = bitcast float %62 to i32
%65 = insertelement <2 x i32> undef, i32 %63, i32 0
%66 = insertelement <2 x i32> %65, i32 %64, i32 1
%67 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %66, <32 x i8> %31, <16 x i8> %33, i32 2)
%68 = extractelement <4 x float> %67, i32 0
%69 = fmul float %28, %68
%70 = fadd float %69, %29
%71 = fdiv float 1.000000e+00, %70
%72 = fsub float %71, %36
%73 = fmul float %72, 0x3FA99999A0000000
%74 = call float @llvm.AMDIL.clamp.(float %73, float 0.000000e+00, float 1.000000e+00)
%75 = fmul float %60, %74
%76 = call float @llvm.minnum.f32(float %75, float 5.000000e-01)
%77 = call float @llvm.AMDIL.clamp.(float %76, float 0.000000e+00, float 1.000000e+00)
%78 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%79 = bitcast i32 %78 to float
%80 = call i32 @llvm.SI.packf16(float 0.000000e+00, float %77)
%81 = bitcast i32 %80 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %79, float %81, float %79, float %81)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104
s_buffer_load_dword s12, s[0:3], 0xa ; C206010A
s_buffer_load_dword s0, s[0:3], 0xb ; C200010B
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
v_mul_f32_e32 v1, 0.5, v3 ; 100206F0
v_mul_f32_e32 v3, 0.5, v4 ; 100608F0
v_mad_f32 v2, 0.5, v2, v3 ; D2820002 040E04F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v3, s11, v1 ; 3E06020B
s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500
v_subrev_f32_e32 v1, s8, v5 ; 0A020A08
v_subrev_f32_e32 v5, s9, v6 ; 0A0A0C09
v_subrev_f32_e32 v0, s10, v0 ; 0A00000A
v_mov_b32_e32 v6, 0x6f800000 ; 7E0C02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v4|, v6 ; D008016A 00020D04
v_mov_b32_e32 v6, 0x2f800000 ; 7E0C02FF 2F800000
v_cndmask_b32_e32 v6, 1.0, v6 ; 000C0CF2
v_mul_f32_e32 v7, v6, v4 ; 100E0906
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
v_rcp_f32_e32 v7, v7 ; 7E0E5507
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_mac_f32_e32 v1, v5, v5 ; 3E020B05
v_mac_f32_e32 v1, v0, v0 ; 3E020100
v_mul_f32_e32 v0, v7, v2 ; 10000507
v_mul_f32_e32 v2, v7, v3 ; 10040707
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v7, v0, v6 ; 100E0D00
v_mul_f32_e32 v8, v2, v6 ; 10100D02
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[4:11], s[16:19] ; F0800100 00810007
v_mul_f32_e32 v1, v1, v5 ; 10020B01
v_mov_b32_e32 v2, 0x3f4ccccd ; 7E0402FF 3F4CCCCD
v_madmk_f32_e32 v1, v1, v2, 0x3e99999a ; 40020501 3E99999A
v_mov_b32_e32 v2, s0 ; 7E040200
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, s12, v0 ; 3E04000C
v_rcp_f32_e32 v0, v2 ; 7E005502
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_sub_f32_e32 v1, 1.0, v1 ; 080202F2
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_subrev_f32_e32 v0, v4, v0 ; 0A000104
v_mul_f32_e32 v0, 0x3d4ccccd, v0 ; 100000FF 3D4CCCCD
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_mul_f32_e32 v0, v0, v1 ; 10000300
v_min_f32_e32 v0, 0.5, v0 ; 1E0000F0
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_cvt_pkrtz_f16_f32_e64 v1, 0, 0 ; D25E0001 00010080
v_cvt_pkrtz_f16_f32_e32 v0, 0, v0 ; 5E000080
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 328 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xyz, IN[1].xyzx
5: MOV OUT[1], TEMP[0]
6: MOV OUT[2], TEMP[1]
7: MOV OUT[0], TEMP[0]
8: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = fmul float %13, %33
%45 = fmul float %14, %33
%46 = fmul float %15, %33
%47 = fmul float %16, %33
%48 = fmul float %17, %34
%49 = fadd float %48, %44
%50 = fmul float %18, %34
%51 = fadd float %50, %45
%52 = fmul float %19, %34
%53 = fadd float %52, %46
%54 = fmul float %20, %34
%55 = fadd float %54, %47
%56 = fmul float %21, %35
%57 = fadd float %56, %49
%58 = fmul float %22, %35
%59 = fadd float %58, %51
%60 = fmul float %23, %35
%61 = fadd float %60, %53
%62 = fmul float %24, %35
%63 = fadd float %62, %55
%64 = fmul float %25, %36
%65 = fadd float %64, %57
%66 = fmul float %26, %36
%67 = fadd float %66, %59
%68 = fmul float %27, %36
%69 = fadd float %68, %61
%70 = fmul float %28, %36
%71 = fadd float %70, %63
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %65, float %67, float %69, float %71)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %41, float %42, float %43, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %67, float %69, float %71)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v1 ; 1000020C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v2 ; 3E000406
v_mul_f32_e32 v8, s13, v1 ; 1010020D
v_mac_f32_e32 v8, s7, v2 ; 3E100407
v_mul_f32_e32 v9, s4, v1 ; 10120204
v_mac_f32_e32 v9, s8, v2 ; 3E120408
v_mul_f32_e32 v1, s5, v1 ; 10020205
v_mac_f32_e32 v1, s9, v2 ; 3E020409
v_mac_f32_e32 v0, s10, v3 ; 3E00060A
v_mac_f32_e32 v8, s11, v3 ; 3E10060B
v_mac_f32_e32 v9, s14, v3 ; 3E12060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v8, s17, v4 ; 3E100811
v_mac_f32_e32 v9, s18, v4 ; 3E120812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 32, 0, 0, 0, v0, v8, v9, v1 ; F800020F 01090800
v_mov_b32_e32 v2, 0 ; 7E040280
exp 15, 33, 0, 0, 0, v5, v6, v7, v2 ; F800021F 02070605
exp 15, 12, 0, 1, 0, v0, v8, v9, v1 ; F80008CF 01090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 204 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[2]
DCL CONST[4]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.1000, 0.5000, 0.0150, 2.0000}
IMM[1] FLT32 { -0.9961, 1.0000, 0.0000, 0.0000}
0: MUL TEMP[0].x, CONST[0].xxxx, IMM[0].xxxx
1: MOV TEMP[1].x, TEMP[0].xxxx
2: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy
3: MOV TEMP[1].y, TEMP[0].xxxx
4: MAD TEMP[0].xy, IN[1].xyyy, IMM[0].zzzz, TEMP[1].xyyy
5: MOV TEMP[0].xy, TEMP[0].xyyy
6: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D
7: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[1].xxxx
8: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
9: RSQ TEMP[1].x, TEMP[1].xxxx
10: MUL TEMP[0].xy, TEMP[0].xyzz, TEMP[1].xxxx
11: MUL TEMP[1].xyw, IN[0], IMM[0].yyyy
12: ADD TEMP[1].xy, TEMP[1].xyyy, TEMP[1].wwww
13: MOV TEMP[2].x, TEMP[0].xxxx
14: RCP TEMP[3].x, CONST[4].wwww
15: MUL TEMP[3].x, CONST[4].zzzz, TEMP[3].xxxx
16: MUL TEMP[3].x, TEMP[0].yyyy, TEMP[3].xxxx
17: MOV TEMP[2].y, TEMP[3].xxxx
18: RCP TEMP[3].xy, IN[0].wwww
19: MUL TEMP[0].xy, TEMP[1].xyyy, TEMP[3].xyyy
20: MUL TEMP[1].xy, TEMP[2].xyyy, CONST[2].xxxx
21: MAD TEMP[0].xy, TEMP[1].xyyy, TEMP[3].xyyy, TEMP[0].xyyy
22: MUL TEMP[0].xy, TEMP[0].xyyy, IN[0].wwww
23: MOV TEMP[1].w, IMM[1].yyyy
24: MOV TEMP[0].xy, TEMP[0].xyyy
25: MOV TEMP[0].w, IN[0].wwww
26: TXP TEMP[0].xyz, TEMP[0], SAMP[1], 2D
27: MOV TEMP[1].xyz, TEMP[0].xyzx
28: MOV OUT[0], TEMP[1]
29: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%28 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%29 = load <32 x i8>, <32 x i8> addrspace(2)* %28, align 32, !tbaa !0
%30 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !tbaa !0
%32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)*
%34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0
%35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)*
%37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0
%38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%43 = fmul float %24, 0x3FB99999A0000000
%44 = fmul float %43, 5.000000e-01
%45 = fmul float %41, 0x3F8EB851E0000000
%46 = fadd float %45, %43
%47 = fmul float %42, 0x3F8EB851E0000000
%48 = fadd float %47, %44
%49 = bitcast float %46 to i32
%50 = bitcast float %48 to i32
%51 = insertelement <2 x i32> undef, i32 %49, i32 0
%52 = insertelement <2 x i32> %51, i32 %50, i32 1
%53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %29, <16 x i8> %31, i32 2)
%54 = extractelement <4 x float> %53, i32 0
%55 = extractelement <4 x float> %53, i32 1
%56 = extractelement <4 x float> %53, i32 2
%57 = fmul float %54, 2.000000e+00
%58 = fadd float %57, 0xBFEFDFDF00000000
%59 = fmul float %55, 2.000000e+00
%60 = fadd float %59, 0xBFEFDFDF00000000
%61 = fmul float %56, 2.000000e+00
%62 = fadd float %61, 0xBFEFDFDF00000000
%63 = fmul float %58, %58
%64 = fmul float %60, %60
%65 = fadd float %64, %63
%66 = fmul float %62, %62
%67 = fadd float %65, %66
%68 = call float @llvm.AMDGPU.rsq.clamped.f32(float %67)
%69 = fmul float %58, %68
%70 = fmul float %60, %68
%71 = fmul float %38, 5.000000e-01
%72 = fmul float %39, 5.000000e-01
%73 = fmul float %40, 5.000000e-01
%74 = fadd float %71, %73
%75 = fadd float %72, %73
%76 = fdiv float 1.000000e+00, %27
%77 = fmul float %26, %76
%78 = fmul float %70, %77
%79 = fdiv float 1.000000e+00, %40
%80 = fmul float %74, %79
%81 = fmul float %75, %79
%82 = fmul float %69, %25
%83 = fmul float %78, %25
%84 = fmul float %82, %79
%85 = fadd float %84, %80
%86 = fmul float %83, %79
%87 = fadd float %86, %81
%88 = fmul float %85, %40
%89 = fmul float %87, %40
%90 = fdiv float %88, %40
%91 = fdiv float %89, %40
%92 = bitcast float %90 to i32
%93 = bitcast float %91 to i32
%94 = insertelement <2 x i32> undef, i32 %92, i32 0
%95 = insertelement <2 x i32> %94, i32 %93, i32 1
%96 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %95, <32 x i8> %34, <16 x i8> %37, i32 2)
%97 = extractelement <4 x float> %96, i32 0
%98 = extractelement <4 x float> %96, i32 1
%99 = extractelement <4 x float> %96, i32 2
%100 = call i32 @llvm.SI.packf16(float %97, float %98)
%101 = bitcast i32 %100 to float
%102 = call i32 @llvm.SI.packf16(float %99, float 1.000000e+00)
%103 = bitcast i32 %102 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %101, float %103, float %101, float %103)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s16, s[0:3], 0x8 ; C2080108
s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112
s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113
v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500
s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700
v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501
v_mov_b32_e32 v1, 0x3dcccccd ; 7E0202FF 3DCCCCCD
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s8, v1 ; 100C0208
v_mul_f32_e32 v7, 0.5, v6 ; 100E0CF0
v_mov_b32_e32 v1, 0x3c75c28f ; 7E0202FF 3C75C28F
v_mac_f32_e32 v6, v1, v5 ; 3E0C0B01
v_mac_f32_e32 v7, v1, v0 ; 3E0E0101
s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504
s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708
image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[20:27], s[12:15] ; F0800700 00650506
v_mov_b32_e32 v0, 0xbf7efef8 ; 7E0002FF BF7EFEF8
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mad_f32 v1, 2.0, v5, v0 ; D2820001 04020AF4
v_mad_f32 v5, 2.0, v6, v0 ; D2820005 04020CF4
v_mad_f32 v0, 2.0, v7, v0 ; D2820000 04020EF4
v_mul_f32_e32 v6, v1, v1 ; 100C0301
v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05
v_mac_f32_e32 v6, v0, v0 ; 3E0C0100
v_rsq_clamp_f32_e32 v0, v6 ; 7E005906
v_mov_b32_e32 v6, 0x6f800000 ; 7E0C02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v4|, v6 ; D008016A 00020D04
v_mov_b32_e32 v6, 0x2f800000 ; 7E0C02FF 2F800000
v_cndmask_b32_e32 v6, 1.0, v6 ; 000C0CF2
v_mul_f32_e32 v1, v0, v1 ; 10020300
v_mul_f32_e32 v0, v0, v5 ; 10000B00
v_rcp_f32_e32 v5, s18 ; 7E0A5412
v_mul_f32_e32 v7, 0.5, v4 ; 100E08F0
v_mad_f32 v2, 0.5, v2, v7 ; D2820002 041E04F0
v_mac_f32_e32 v7, 0.5, v3 ; 3E0E06F0
v_mul_f32_e32 v3, s17, v5 ; 10060A11
v_rcp_f32_e32 v5, v4 ; 7E0A5504
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_mul_f32_e32 v1, s16, v1 ; 10020210
v_mul_f32_e32 v0, s16, v0 ; 10000010
v_mul_f32_e32 v2, v5, v2 ; 10040505
v_mac_f32_e32 v2, v5, v1 ; 3E040305
v_mul_f32_e32 v1, v5, v7 ; 10020F05
v_mul_f32_e32 v3, v6, v4 ; 10060906
v_rcp_f32_e32 v3, v3 ; 7E065503
v_mac_f32_e32 v1, v5, v0 ; 3E020105
v_mul_f32_e32 v0, v4, v2 ; 10000504
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_mul_f32_e32 v2, v0, v6 ; 10040D00
v_mul_f32_e32 v3, v1, v6 ; 10060D01
image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800700 00010002
s_waitcnt vmcnt(0) ; BF8C0770
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 8
Code Size: 344 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..14]
DCL CONST[16..19]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999}
IMM[1] INT32 {256, 0, 1, 2}
IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039}
IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000}
IMM[4] INT32 {4, 0, 0, 0}
0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx
1: F2I TEMP[0].x, TEMP[0].xxxx
2: F2I TEMP[1].x, IN[2].yyyy
3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx
4: I2F TEMP[3].x, TEMP[0].xxxx
5: I2F TEMP[4].x, TEMP[2].xxxx
6: MOV TEMP[3].y, TEMP[4].xxxx
7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
8: INEG TEMP[2].x, TEMP[2].xxxx
9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx
10: I2F TEMP[2].x, TEMP[2].xxxx
11: MOV TEMP[3].z, TEMP[2].xxxx
12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy
13: I2F TEMP[1].x, TEMP[1].xxxx
14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx
15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww
16: F2I TEMP[1].x, TEMP[1].xxxx
17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy
18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx
19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz
20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx
21: MOV TEMP[4].y, TEMP[5].xxxx
22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww
23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx
24: MOV TEMP[4].z, TEMP[1].xxxx
25: MOV TEMP[1].xyz, TEMP[4].xyzx
26: MOV TEMP[4].w, IMM[2].yyyy
27: MOV TEMP[4].xyz, TEMP[3].xyzx
28: MOV TEMP[3].y, IMM[2].yzyy
29: DP4 TEMP[4].x, TEMP[1], TEMP[4]
30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww
31: MOV TEMP[3].xy, TEMP[3].xyyy
32: MOV TEMP[3].w, IMM[2].yyyy
33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D
34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy
35: MOV TEMP[2].w, TEMP[4].xxxx
36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz
37: MOV TEMP[1].w, TEMP[3].xxxx
38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx
39: I2F TEMP[0].x, TEMP[0].xxxx
40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx
41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww
42: MUL TEMP[3], CONST[16], IN[0].xxxx
43: MAD TEMP[3], CONST[17], IN[0].yyyy, TEMP[3]
44: MAD TEMP[3], CONST[18], IN[0].zzzz, TEMP[3]
45: MAD TEMP[3], CONST[19], IN[0].wwww, TEMP[3]
46: MOV TEMP[4].x, CONST[11].xxxx
47: MOV TEMP[4].y, CONST[12].xxxx
48: MOV TEMP[4].z, CONST[13].xxxx
49: MOV TEMP[5].x, CONST[11].yyyy
50: MOV TEMP[5].y, CONST[12].yyyy
51: MOV TEMP[5].z, CONST[13].yyyy
52: MOV TEMP[6].x, CONST[11].zzzz
53: MOV TEMP[6].y, CONST[12].zzzz
54: MOV TEMP[6].z, CONST[13].zzzz
55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx
56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz
57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz
58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
59: RSQ TEMP[4].x, TEMP[4].xxxx
60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
61: MOV TEMP[4].w, IMM[2].xxxx
62: MOV TEMP[4].xyz, TEMP[0].xyzx
63: DP4 TEMP[5].x, CONST[0], TEMP[4]
64: DP4 TEMP[6].x, CONST[1], TEMP[4]
65: MOV TEMP[5].y, TEMP[6].xxxx
66: DP4 TEMP[4].x, CONST[2], TEMP[4]
67: MOV TEMP[5].z, TEMP[4].xxxx
68: MUL TEMP[4], TEMP[0].xyzz, TEMP[0].yzzx
69: DP4 TEMP[6].x, CONST[3], TEMP[4]
70: DP4 TEMP[7].x, CONST[4], TEMP[4]
71: MOV TEMP[6].y, TEMP[7].xxxx
72: DP4 TEMP[4].x, CONST[5], TEMP[4]
73: MOV TEMP[6].z, TEMP[4].xxxx
74: MUL TEMP[4].x, TEMP[0].yyyy, TEMP[0].yyyy
75: MAD TEMP[4].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[4].xxxx
76: MAD TEMP[4].xyz, CONST[6].xyzz, TEMP[4].xxxx, TEMP[6].xyzz
77: ADD TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xyzz
78: MOV TEMP[0].yzw, TEMP[0].yxyz
79: MUL TEMP[5], CONST[7], IN[0].xxxx
80: MAD TEMP[5], CONST[8], IN[0].yyyy, TEMP[5]
81: MAD TEMP[5], CONST[9], IN[0].zzzz, TEMP[5]
82: MAD TEMP[5].xyz, CONST[10], IN[0].wwww, TEMP[5]
83: MOV TEMP[5].xyz, TEMP[5].xyzx
84: MOV TEMP[5].w, TEMP[4].xxxx
85: MOV TEMP[4].xy, TEMP[4].yzyy
86: MOV TEMP[0].x, TEMP[3].zzzz
87: MOV OUT[4], TEMP[5]
88: MOV OUT[1], TEMP[2]
89: MOV OUT[2], TEMP[1]
90: MOV OUT[3], TEMP[0]
91: MOV OUT[0], TEMP[3]
92: MOV OUT[5], TEMP[4]
93: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%78 = load <32 x i8>, <32 x i8> addrspace(2)* %77, align 32, !tbaa !0
%79 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = fmul float %99, 2.500000e-01
%102 = fptosi float %101 to i32
%103 = fptosi float %100 to i32
%104 = sdiv i32 %103, 256
%105 = sitofp i32 %102 to float
%106 = sitofp i32 %104 to float
%107 = shl nsw i32 %104, 8
%108 = sub i32 %103, %107
%109 = sitofp i32 %108 to float
%110 = fadd float %105, -1.000000e+00
%111 = fadd float %106, -1.000000e+00
%112 = fadd float %109, -1.000000e+00
%113 = sitofp i32 %103 to float
%114 = fsub float %100, %113
%115 = fmul float %114, 1.000000e+01
%116 = fadd float %115, 0x3FDFFE5CA0000000
%117 = fptosi float %116 to i32
%118 = icmp eq i32 %117, 0
%119 = select i1 %118, float 1.000000e+00, float 0.000000e+00
%120 = icmp eq i32 %117, 1
%121 = select i1 %120, float 1.000000e+00, float 0.000000e+00
%122 = icmp eq i32 %117, 2
%123 = select i1 %122, float 1.000000e+00, float 0.000000e+00
%124 = fmul float %119, %105
%125 = fmul float %121, %106
%126 = fadd float %124, %125
%127 = fmul float %123, %109
%128 = fadd float %126, %127
%129 = fadd float %128, 0.000000e+00
%130 = fmul float %129, 0x3F70101020000000
%131 = bitcast float %130 to i32
%132 = insertelement <4 x i32> undef, i32 %131, i32 0
%133 = insertelement <4 x i32> %132, i32 1036831949, i32 1
%134 = insertelement <4 x i32> %133, i32 0, i32 2
%135 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %134, <32 x i8> %78, <16 x i8> %80, i32 2)
%136 = extractelement <4 x float> %135, i32 0
%137 = extractelement <4 x float> %135, i32 1
%138 = fmul float %136, 1.600000e+01
%139 = fadd float %138, -8.000000e+00
%140 = fmul float %137, 4.000000e+00
%141 = shl i32 %102, 2
%142 = sitofp i32 %141 to float
%143 = fsub float %99, %142
%144 = fadd float %143, -2.000000e+00
%145 = fmul float %61, %85
%146 = fmul float %62, %85
%147 = fmul float %63, %85
%148 = fmul float %64, %85
%149 = fmul float %65, %86
%150 = fadd float %149, %145
%151 = fmul float %66, %86
%152 = fadd float %151, %146
%153 = fmul float %67, %86
%154 = fadd float %153, %147
%155 = fmul float %68, %86
%156 = fadd float %155, %148
%157 = fmul float %69, %87
%158 = fadd float %157, %150
%159 = fmul float %70, %87
%160 = fadd float %159, %152
%161 = fmul float %71, %87
%162 = fadd float %161, %154
%163 = fmul float %72, %87
%164 = fadd float %163, %156
%165 = fmul float %73, %88
%166 = fadd float %165, %158
%167 = fmul float %74, %88
%168 = fadd float %167, %160
%169 = fmul float %75, %88
%170 = fadd float %169, %162
%171 = fmul float %76, %88
%172 = fadd float %171, %164
%173 = fmul float %52, %93
%174 = fmul float %55, %93
%175 = fmul float %58, %93
%176 = fmul float %53, %94
%177 = fadd float %176, %173
%178 = fmul float %56, %94
%179 = fadd float %178, %174
%180 = fmul float %59, %94
%181 = fadd float %180, %175
%182 = fmul float %54, %144
%183 = fadd float %182, %177
%184 = fmul float %57, %144
%185 = fadd float %184, %179
%186 = fmul float %60, %144
%187 = fadd float %186, %181
%188 = fmul float %183, %183
%189 = fmul float %185, %185
%190 = fadd float %189, %188
%191 = fmul float %187, %187
%192 = fadd float %190, %191
%193 = call float @llvm.AMDGPU.rsq.clamped.f32(float %192)
%194 = fmul float %183, %193
%195 = fmul float %185, %193
%196 = fmul float %187, %193
%197 = fmul float %13, %194
%198 = fmul float %14, %195
%199 = fadd float %197, %198
%200 = fmul float %15, %196
%201 = fadd float %199, %200
%202 = fadd float %201, %16
%203 = fmul float %17, %194
%204 = fmul float %18, %195
%205 = fadd float %203, %204
%206 = fmul float %19, %196
%207 = fadd float %205, %206
%208 = fadd float %207, %20
%209 = fmul float %21, %194
%210 = fmul float %22, %195
%211 = fadd float %209, %210
%212 = fmul float %23, %196
%213 = fadd float %211, %212
%214 = fadd float %213, %24
%215 = fmul float %194, %195
%216 = fmul float %195, %196
%217 = fmul float %196, %196
%218 = fmul float %196, %194
%219 = fmul float %25, %215
%220 = fmul float %26, %216
%221 = fadd float %219, %220
%222 = fmul float %27, %217
%223 = fadd float %221, %222
%224 = fmul float %28, %218
%225 = fadd float %223, %224
%226 = fmul float %29, %215
%227 = fmul float %30, %216
%228 = fadd float %226, %227
%229 = fmul float %31, %217
%230 = fadd float %228, %229
%231 = fmul float %32, %218
%232 = fadd float %230, %231
%233 = fmul float %33, %215
%234 = fmul float %34, %216
%235 = fadd float %233, %234
%236 = fmul float %35, %217
%237 = fadd float %235, %236
%238 = fmul float %36, %218
%239 = fadd float %237, %238
%240 = fmul float %195, %195
%241 = fmul float %194, %194
%242 = fsub float %241, %240
%243 = fmul float %37, %242
%244 = fadd float %243, %225
%245 = fmul float %38, %242
%246 = fadd float %245, %232
%247 = fmul float %39, %242
%248 = fadd float %247, %239
%249 = fadd float %244, %202
%250 = fadd float %246, %208
%251 = fadd float %248, %214
%252 = fmul float %40, %85
%253 = fmul float %41, %85
%254 = fmul float %42, %85
%255 = fmul float %43, %86
%256 = fadd float %255, %252
%257 = fmul float %44, %86
%258 = fadd float %257, %253
%259 = fmul float %45, %86
%260 = fadd float %259, %254
%261 = fmul float %46, %87
%262 = fadd float %261, %256
%263 = fmul float %47, %87
%264 = fadd float %263, %258
%265 = fmul float %48, %87
%266 = fadd float %265, %260
%267 = fmul float %49, %88
%268 = fadd float %267, %262
%269 = fmul float %50, %88
%270 = fadd float %269, %264
%271 = fmul float %51, %88
%272 = fadd float %271, %266
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %110, float %111, float %112, float %139)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %119, float %121, float %123, float %140)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %170, float %194, float %195, float %196)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %268, float %270, float %272, float %249)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %250, float %251, float %251, float %218)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %166, float %168, float %170, float %172)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5
v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000
v_mov_b32_e32 v5, 0 ; 7E0A0280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[60:63], s[2:3], 0x0 ; C09E0300
s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s45, s[60:63], 0x26 ; C216BD26
s_buffer_load_dword s44, s[60:63], 0x28 ; C2163D28
buffer_load_format_xyzw v[6:9], v0, s[0:3], 0 idxen ; E00C2000 80000600
buffer_load_format_xyzw v[10:13], v0, s[12:15], 0 idxen ; E00C2000 80030A00
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00
s_buffer_load_dword s43, s[60:63], 0x29 ; C215BD29
s_buffer_load_dword s42, s[60:63], 0x2a ; C2153D2A
s_buffer_load_dword s64, s[60:63], 0x2c ; C2203D2C
s_buffer_load_dword s46, s[60:63], 0x2d ; C2173D2D
s_buffer_load_dword s39, s[60:63], 0x2e ; C213BD2E
s_buffer_load_dword s65, s[60:63], 0x30 ; C220BD30
s_buffer_load_dword s47, s[60:63], 0x31 ; C217BD31
s_buffer_load_dword s40, s[60:63], 0x32 ; C2143D32
s_buffer_load_dword s66, s[60:63], 0x34 ; C2213D34
s_buffer_load_dword s48, s[60:63], 0x35 ; C2183D35
s_buffer_load_dword s41, s[60:63], 0x36 ; C214BD36
s_buffer_load_dword s67, s[60:63], 0x40 ; C221BD40
s_buffer_load_dword s68, s[60:63], 0x41 ; C2223D41
s_buffer_load_dword s1, s[60:63], 0xa ; C200BD0A
s_buffer_load_dword s0, s[60:63], 0xb ; C2003D0B
s_buffer_load_dword s13, s[60:63], 0xc ; C206BD0C
s_buffer_load_dword s16, s[60:63], 0xd ; C2083D0D
s_buffer_load_dword s12, s[60:63], 0xe ; C2063D0E
s_buffer_load_dword s9, s[60:63], 0x0 ; C204BD00
s_buffer_load_dword s10, s[60:63], 0x1 ; C2053D01
s_buffer_load_dword s8, s[60:63], 0x2 ; C2043D02
s_buffer_load_dword s2, s[60:63], 0x3 ; C2013D03
s_buffer_load_dword s11, s[60:63], 0x4 ; C205BD04
s_buffer_load_dword s3, s[60:63], 0xf ; C201BD0F
s_buffer_load_dword s18, s[60:63], 0x10 ; C2093D10
s_buffer_load_dword s27, s[60:63], 0x11 ; C20DBD11
s_buffer_load_dword s14, s[60:63], 0x12 ; C2073D12
s_buffer_load_dword s4, s[60:63], 0x13 ; C2023D13
s_buffer_load_dword s25, s[60:63], 0x14 ; C20CBD14
s_buffer_load_dword s38, s[60:63], 0x15 ; C2133D15
s_buffer_load_dword s17, s[60:63], 0x16 ; C208BD16
s_buffer_load_dword s5, s[60:63], 0x17 ; C202BD17
s_buffer_load_dword s6, s[60:63], 0x18 ; C2033D18
s_buffer_load_dword s69, s[60:63], 0x42 ; C222BD42
s_buffer_load_dword s70, s[60:63], 0x43 ; C2233D43
s_buffer_load_dword s50, s[60:63], 0x44 ; C2193D44
s_buffer_load_dword s51, s[60:63], 0x45 ; C219BD45
s_buffer_load_dword s49, s[60:63], 0x46 ; C218BD46
s_buffer_load_dword s19, s[60:63], 0x5 ; C209BD05
s_buffer_load_dword s15, s[60:63], 0x6 ; C207BD06
s_buffer_load_dword s7, s[60:63], 0x7 ; C203BD07
s_buffer_load_dword s24, s[60:63], 0x8 ; C20C3D08
s_buffer_load_dword s26, s[60:63], 0x9 ; C20D3D09
s_buffer_load_dword s36, s[60:63], 0x19 ; C2123D19
s_buffer_load_dword s37, s[60:63], 0x1a ; C212BD1A
s_buffer_load_dword s59, s[60:63], 0x1c ; C21DBD1C
s_buffer_load_dword s58, s[60:63], 0x1d ; C21D3D1D
s_buffer_load_dword s57, s[60:63], 0x1e ; C21CBD1E
s_buffer_load_dword s56, s[60:63], 0x20 ; C21C3D20
s_buffer_load_dword s54, s[60:63], 0x21 ; C21B3D21
s_buffer_load_dword s55, s[60:63], 0x22 ; C21BBD22
s_buffer_load_dword s52, s[60:63], 0x24 ; C21A3D24
s_buffer_load_dword s53, s[60:63], 0x25 ; C21ABD25
s_buffer_load_dword s71, s[60:63], 0x47 ; C223BD47
s_buffer_load_dword s72, s[60:63], 0x48 ; C2243D48
s_buffer_load_dword s73, s[60:63], 0x49 ; C224BD49
s_buffer_load_dword s74, s[60:63], 0x4a ; C2253D4A
s_buffer_load_dword s75, s[60:63], 0x4b ; C225BD4B
s_buffer_load_dword s76, s[60:63], 0x4c ; C2263D4C
s_buffer_load_dword s77, s[60:63], 0x4d ; C226BD4D
s_buffer_load_dword s78, s[60:63], 0x4e ; C2273D4E
s_buffer_load_dword s60, s[60:63], 0x4f ; C21E3D4F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s67, v6 ; 10000C43
v_mul_f32_e32 v14, s68, v6 ; 101C0C44
v_mul_f32_e32 v15, s69, v6 ; 101E0C45
v_mul_f32_e32 v16, s70, v6 ; 10200C46
v_mul_f32_e32 v17, s64, v10 ; 10221440
v_mul_f32_e32 v18, s65, v10 ; 10241441
v_mul_f32_e32 v10, s66, v10 ; 10141442
v_mul_f32_e32 v19, s59, v6 ; 10260C3B
v_mul_f32_e32 v20, s58, v6 ; 10280C3A
v_mul_f32_e32 v6, s57, v6 ; 100C0C39
v_mac_f32_e32 v0, s50, v7 ; 3E000E32
v_mac_f32_e32 v14, s51, v7 ; 3E1C0E33
v_mac_f32_e32 v15, s49, v7 ; 3E1E0E31
v_mac_f32_e32 v16, s71, v7 ; 3E200E47
v_mac_f32_e32 v17, s46, v11 ; 3E22162E
v_mac_f32_e32 v18, s47, v11 ; 3E24162F
v_mac_f32_e32 v10, s48, v11 ; 3E141630
v_mac_f32_e32 v19, s56, v7 ; 3E260E38
v_mac_f32_e32 v20, s54, v7 ; 3E280E36
v_mac_f32_e32 v6, s55, v7 ; 3E0C0E37
v_mac_f32_e32 v0, s72, v8 ; 3E001048
v_mac_f32_e32 v14, s73, v8 ; 3E1C1049
v_mac_f32_e32 v15, s74, v8 ; 3E1E104A
v_mac_f32_e32 v16, s75, v8 ; 3E20104B
v_mac_f32_e32 v19, s52, v8 ; 3E261034
v_mac_f32_e32 v20, s53, v8 ; 3E281035
v_mac_f32_e32 v6, s45, v8 ; 3E0C102D
v_mac_f32_e32 v0, s76, v9 ; 3E00124C
v_mac_f32_e32 v14, s77, v9 ; 3E1C124D
v_mac_f32_e32 v15, s78, v9 ; 3E1E124E
v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000
v_cvt_i32_f32_e32 v4, v13 ; 7E08110D
v_cvt_i32_f32_e32 v3, v3 ; 7E061103
v_mac_f32_e32 v16, s60, v9 ; 3E20123C
v_mac_f32_e32 v19, s44, v9 ; 3E26122C
v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04
v_lshlrev_b32_e32 v8, 2, v3 ; 34100682
v_cvt_f32_i32_e32 v8, v8 ; 7E100B08
v_mac_f32_e32 v20, s43, v9 ; 3E28122B
v_mac_f32_e32 v6, s42, v9 ; 3E0C122A
v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07
v_subrev_f32_e32 v8, v8, v12 ; 0A101908
v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000
v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F
v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98
v_cvt_i32_f32_e32 v1, v1 ; 7E021101
v_add_i32_e32 v7, v4, v7 ; 4A0E0F04
v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00
v_sub_i32_e32 v4, v4, v9 ; 4C081304
v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280
v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480
v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281
v_cndmask_b32_e64 v11, 0, 1.0, vcc ; D200000B 01A9E480
v_ashrrev_i32_e32 v7, 8, v7 ; 300E0E88
v_cvt_f32_i32_e32 v7, v7 ; 7E0E0B07
v_cvt_f32_i32_e32 v12, v3 ; 7E180B03
v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04
v_cmp_eq_i32_e32 vcc, 2, v1 ; 7D040282
v_mul_f32_e32 v1, v7, v11 ; 10021707
v_mac_f32_e32 v1, v12, v9 ; 3E02130C
v_cndmask_b32_e64 v21, 0, 1.0, vcc ; D2000015 01A9E480
v_mac_f32_e32 v1, v13, v21 ; 3E022B0D
v_add_f32_e32 v1, 0, v1 ; 06020280
v_mul_f32_e32 v3, 0x3b808081, v1 ; 100602FF 3B808081
v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD
image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[20:23] ; F0900300 00A70303
v_add_f32_e32 v1, -2.0, v8 ; 060210F5
v_mac_f32_e32 v17, s39, v1 ; 3E220227
v_mac_f32_e32 v18, s40, v1 ; 3E240228
v_mac_f32_e32 v10, s41, v1 ; 3E140229
v_add_f32_e32 v1, -1.0, v12 ; 060218F3
v_add_f32_e32 v5, -1.0, v7 ; 060A0EF3
v_add_f32_e32 v7, -1.0, v13 ; 060E1AF3
v_mul_f32_e32 v8, v17, v17 ; 10102311
v_mac_f32_e32 v8, v18, v18 ; 3E102512
v_mac_f32_e32 v8, v10, v10 ; 3E10150A
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000
exp 15, 32, 0, 0, 0, v1, v5, v7, v2 ; F800020F 02070501
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v1, 4.0, v4 ; 100208F6
exp 15, 33, 0, 0, 0, v9, v11, v21, v1 ; F800021F 01150B09
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v1, v8, v17 ; 10022308
v_mul_f32_e32 v2, v8, v18 ; 10042508
v_mul_f32_e32 v3, v8, v10 ; 10061508
v_mul_f32_e32 v4, v3, v2 ; 10080503
v_mul_f32_e32 v5, s16, v4 ; 100A0810
v_mul_f32_e32 v7, s27, v4 ; 100E081B
v_mul_f32_e32 v4, s38, v4 ; 10080826
v_mul_f32_e32 v8, v2, v1 ; 10100302
v_mac_f32_e32 v5, s13, v8 ; 3E0A100D
v_mac_f32_e32 v7, s18, v8 ; 3E0E1012
v_mac_f32_e32 v4, s25, v8 ; 3E081019
v_mul_f32_e32 v8, v3, v3 ; 10100703
v_mac_f32_e32 v5, s12, v8 ; 3E0A100C
v_mac_f32_e32 v7, s14, v8 ; 3E0E100E
v_mac_f32_e32 v4, s17, v8 ; 3E081011
v_mul_f32_e32 v8, s10, v2 ; 1010040A
v_mac_f32_e32 v8, s9, v1 ; 3E100209
v_mul_f32_e32 v9, s19, v2 ; 10120413
v_mac_f32_e32 v9, s11, v1 ; 3E12020B
v_mul_f32_e32 v10, s26, v2 ; 1014041A
v_mac_f32_e32 v10, s24, v1 ; 3E140218
v_mac_f32_e32 v8, s8, v3 ; 3E100608
v_mac_f32_e32 v9, s15, v3 ; 3E12060F
v_mac_f32_e32 v10, s1, v3 ; 3E140601
exp 15, 34, 0, 0, 0, v15, v1, v2, v3 ; F800022F 0302010F
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, v1, v3 ; 10060701
v_mul_f32_e32 v2, v2, v2 ; 10040502
v_mad_f32 v1, v1, v1, -v2 ; D2820001 840A0301
v_add_f32_e32 v2, s2, v8 ; 06041002
v_mac_f32_e32 v5, s3, v3 ; 3E0A0603
v_mac_f32_e32 v7, s4, v3 ; 3E0E0604
v_mac_f32_e32 v4, s5, v3 ; 3E080605
v_mac_f32_e32 v5, s6, v1 ; 3E0A0206
v_mac_f32_e32 v7, s36, v1 ; 3E0E0224
v_mac_f32_e32 v4, s37, v1 ; 3E080225
v_add_f32_e32 v1, v2, v5 ; 06020B02
exp 15, 35, 0, 0, 0, v19, v20, v6, v1 ; F800023F 01061413
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v1, s7, v9 ; 06021207
v_add_f32_e32 v1, v1, v7 ; 06020F01
v_add_f32_e32 v2, s0, v10 ; 06041400
v_add_f32_e32 v2, v2, v4 ; 06040902
exp 15, 36, 0, 0, 0, v1, v2, v2, v3 ; F800024F 03020201
exp 15, 12, 0, 1, 0, v0, v14, v15, v16 ; F80008CF 100F0E00
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 88
VGPRS: 24
Code Size: 928 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SAMP[6]
DCL SAMP[7]
DCL SAMP[8]
DCL SAMP[9]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL SVIEW[6], 2D, FLOAT
DCL SVIEW[7], 2D, FLOAT
DCL SVIEW[8], 2D, FLOAT
DCL SVIEW[9], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[15..23]
DCL TEMP[0..37], LOCAL
IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000}
IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931}
IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000}
IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001}
IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[3].wwww
1: MOV TEMP[0].yz, IN[4].yxyy
2: DP3 TEMP[1].x, CONST[1].xyzz, CONST[1].xyzz
3: RSQ TEMP[1].x, TEMP[1].xxxx
4: MUL TEMP[1].xyz, CONST[1].xyzz, TEMP[1].xxxx
5: ADD TEMP[2].xyz, CONST[0].xyzz, -IN[3].xyzz
6: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
7: RSQ TEMP[3].x, TEMP[3].xxxx
8: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
9: ABS TEMP[3].xyz, IN[2].yzww
10: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
11: RSQ TEMP[4].x, TEMP[4].xxxx
12: MAD TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx, IMM[0].xxxx
13: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[0].yyyy
14: MAX TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz
15: ADD TEMP[4].x, TEMP[3].xxxx, TEMP[3].yyyy
16: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[3].zzzz
17: RCP TEMP[4].xyz, TEMP[4].xxxx
18: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz
19: ADD TEMP[4], IN[0], IMM[0].wwww
20: FLR TEMP[4].xyz, TEMP[4]
21: MOV TEMP[5].x, CONST[15].xxxx
22: MUL TEMP[6].x, TEMP[4].xxxx, CONST[15].xxxx
23: MOV TEMP[7].x, TEMP[6].xxxx
24: FLR TEMP[6].x, TEMP[6].xxxx
25: MUL TEMP[6].x, TEMP[6].xxxx, CONST[15].xxxx
26: FSGE TEMP[8].x, TEMP[4].xxxx, IMM[1].xxxx
27: UIF TEMP[8].xxxx :0
28: MOV TEMP[5].x, CONST[16].xxxx
29: ADD TEMP[8].x, TEMP[4].xxxx, IMM[1].yyyy
30: MUL TEMP[8].x, TEMP[8].xxxx, CONST[16].xxxx
31: MOV TEMP[7].x, TEMP[8].xxxx
32: FLR TEMP[9].x, TEMP[8].xxxx
33: MUL TEMP[9].x, TEMP[9].xxxx, CONST[16].xxxx
34: MOV TEMP[6].x, TEMP[9].xxxx
35: FRC TEMP[8].x, TEMP[8].xxxx
36: FRC TEMP[10].x, TEMP[9].xxxx
37: MOV TEMP[8].y, TEMP[10].xxxx
38: FLR TEMP[9].x, TEMP[9].xxxx
39: ADD TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz
40: MOV TEMP[8].z, TEMP[9].xxxx
41: MOV TEMP[8].xyz, TEMP[8].xyzx
42: ELSE :0
43: FRC TEMP[7].x, TEMP[7].xxxx
44: FRC TEMP[9].x, TEMP[6].xxxx
45: MOV TEMP[7].y, TEMP[9].xxxx
46: FLR TEMP[6].x, TEMP[6].xxxx
47: MOV TEMP[7].z, TEMP[6].xxxx
48: MOV TEMP[8].xyz, TEMP[7].xyzx
49: ENDIF
50: MOV TEMP[6].x, CONST[15].xxxx
51: MUL TEMP[7].x, TEMP[4].yyyy, CONST[15].xxxx
52: MOV TEMP[9].x, TEMP[7].xxxx
53: FLR TEMP[7].x, TEMP[7].xxxx
54: MUL TEMP[7].x, TEMP[7].xxxx, CONST[15].xxxx
55: FSGE TEMP[10].x, TEMP[4].yyyy, IMM[1].xxxx
56: UIF TEMP[10].xxxx :0
57: MOV TEMP[6].x, CONST[16].xxxx
58: ADD TEMP[10].x, TEMP[4].yyyy, IMM[1].yyyy
59: MUL TEMP[10].x, TEMP[10].xxxx, CONST[16].xxxx
60: MOV TEMP[9].x, TEMP[10].xxxx
61: FLR TEMP[11].x, TEMP[10].xxxx
62: MUL TEMP[11].x, TEMP[11].xxxx, CONST[16].xxxx
63: MOV TEMP[7].x, TEMP[11].xxxx
64: FRC TEMP[10].x, TEMP[10].xxxx
65: FRC TEMP[12].x, TEMP[11].xxxx
66: MOV TEMP[10].y, TEMP[12].xxxx
67: FLR TEMP[11].x, TEMP[11].xxxx
68: ADD TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz
69: MOV TEMP[10].z, TEMP[11].xxxx
70: MOV TEMP[10].xyz, TEMP[10].xyzx
71: ELSE :0
72: FRC TEMP[9].x, TEMP[9].xxxx
73: FRC TEMP[11].x, TEMP[7].xxxx
74: MOV TEMP[9].y, TEMP[11].xxxx
75: FLR TEMP[7].x, TEMP[7].xxxx
76: MOV TEMP[9].z, TEMP[7].xxxx
77: MOV TEMP[10].xyz, TEMP[9].xyzx
78: ENDIF
79: MOV TEMP[7].x, CONST[15].xxxx
80: MUL TEMP[9].x, TEMP[4].zzzz, CONST[15].xxxx
81: MOV TEMP[11].x, TEMP[9].xxxx
82: FLR TEMP[9].x, TEMP[9].xxxx
83: MUL TEMP[9].x, TEMP[9].xxxx, CONST[15].xxxx
84: FSGE TEMP[12].x, TEMP[4].zzzz, IMM[1].xxxx
85: UIF TEMP[12].xxxx :0
86: MOV TEMP[7].x, CONST[16].xxxx
87: ADD TEMP[4].x, TEMP[4].zzzz, IMM[1].yyyy
88: MUL TEMP[4].x, TEMP[4].xxxx, CONST[16].xxxx
89: MOV TEMP[11].x, TEMP[4].xxxx
90: FLR TEMP[12].x, TEMP[4].xxxx
91: MUL TEMP[12].x, TEMP[12].xxxx, CONST[16].xxxx
92: MOV TEMP[9].x, TEMP[12].xxxx
93: FRC TEMP[4].x, TEMP[4].xxxx
94: FRC TEMP[13].x, TEMP[12].xxxx
95: MOV TEMP[4].y, TEMP[13].xxxx
96: FLR TEMP[12].x, TEMP[12].xxxx
97: ADD TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz
98: MOV TEMP[4].z, TEMP[12].xxxx
99: MOV TEMP[4].xyz, TEMP[4].xyzx
100: ELSE :0
101: FRC TEMP[11].x, TEMP[11].xxxx
102: FRC TEMP[12].x, TEMP[9].xxxx
103: MOV TEMP[11].y, TEMP[12].xxxx
104: FLR TEMP[9].x, TEMP[9].xxxx
105: MOV TEMP[11].z, TEMP[9].xxxx
106: MOV TEMP[4].xyz, TEMP[11].xyzx
107: ENDIF
108: ADD TEMP[9].xyz, IN[3].xyzz, -CONST[0].xyzz
109: DP3 TEMP[9].x, TEMP[9].xyzz, TEMP[9].xyzz
110: MUL TEMP[9].x, CONST[21].xxxx, TEMP[9].xxxx
111: LG2 TEMP[9].x, TEMP[9].xxxx
112: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].wwww
113: MUL TEMP[9].x, TEMP[9].xxxx, CONST[20].xxxx
114: MOV TEMP[11].xy, IN[3].xyxx
115: MOV TEMP[12].x, IMM[2].xxxx
116: FSNE TEMP[13].x, CONST[15].xxxx, TEMP[5].xxxx
117: UIF TEMP[13].xxxx :0
118: MOV TEMP[12].x, IMM[2].yyyy
119: RCP TEMP[13].x, CONST[18].xxxx
120: MUL TEMP[11].xy, IN[3].xyyy, TEMP[13].xxxx
121: ELSE :0
122: RCP TEMP[13].x, CONST[17].xxxx
123: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
124: ENDIF
125: FRC TEMP[11].xy, TEMP[11].xyyy
126: MUL TEMP[13].x, CONST[19].xxxx, IMM[2].wwww
127: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
128: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
129: MUL TEMP[12].x, TEMP[12].xxxx, CONST[19].xxxx
130: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
131: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
132: MOV TEMP[12].xy, TEMP[11].xyyy
133: MOV TEMP[12].w, TEMP[9].xxxx
134: TXL TEMP[12], TEMP[12], SAMP[8], 2D
135: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[1].zzzz
136: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
137: MOV TEMP[14].xy, TEMP[11].xyyy
138: MOV TEMP[14].w, TEMP[9].xxxx
139: TXL TEMP[14], TEMP[14], SAMP[6], 2D
140: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[3].xxxx
141: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
142: MOV TEMP[16].xy, TEMP[11].xyyy
143: MOV TEMP[16].w, TEMP[9].xxxx
144: TXL TEMP[16], TEMP[16], SAMP[4], 2D
145: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].wwww
146: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
147: MOV TEMP[18].xy, TEMP[11].xyyy
148: MOV TEMP[18].w, TEMP[9].xxxx
149: TXL TEMP[18], TEMP[18], SAMP[2], 2D
150: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].zzzz
151: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
152: MOV TEMP[11].xy, TEMP[11].xyyy
153: MOV TEMP[11].w, TEMP[9].xxxx
154: TXL TEMP[11], TEMP[11], SAMP[0], 2D
155: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[3].yyyy
156: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
157: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
158: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
159: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
160: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
161: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11]
162: MOV TEMP[12].xy, IN[3].zyzz
163: MOV TEMP[13].x, IMM[2].xxxx
164: FSNE TEMP[14].x, CONST[15].xxxx, TEMP[5].xxxx
165: UIF TEMP[14].xxxx :0
166: MOV TEMP[13].x, IMM[2].yyyy
167: RCP TEMP[14].x, CONST[18].xxxx
168: MUL TEMP[12].xy, IN[3].zyyy, TEMP[14].xxxx
169: ELSE :0
170: RCP TEMP[14].x, CONST[17].xxxx
171: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
172: ENDIF
173: FRC TEMP[12].xy, TEMP[12].xyyy
174: MUL TEMP[14].x, CONST[19].xxxx, IMM[2].wwww
175: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
176: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
177: MUL TEMP[13].x, TEMP[13].xxxx, CONST[19].xxxx
178: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
179: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
180: MOV TEMP[13].xy, TEMP[12].xyyy
181: MOV TEMP[13].w, TEMP[9].xxxx
182: TXL TEMP[13], TEMP[13], SAMP[8], 2D
183: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[1].zzzz
184: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
185: MOV TEMP[15].xy, TEMP[12].xyyy
186: MOV TEMP[15].w, TEMP[9].xxxx
187: TXL TEMP[15], TEMP[15], SAMP[6], 2D
188: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[3].xxxx
189: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
190: MOV TEMP[17].xy, TEMP[12].xyyy
191: MOV TEMP[17].w, TEMP[9].xxxx
192: TXL TEMP[17], TEMP[17], SAMP[4], 2D
193: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].wwww
194: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
195: MOV TEMP[19].xy, TEMP[12].xyyy
196: MOV TEMP[19].w, TEMP[9].xxxx
197: TXL TEMP[19], TEMP[19], SAMP[2], 2D
198: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[2].zzzz
199: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
200: MOV TEMP[12].xy, TEMP[12].xyyy
201: MOV TEMP[12].w, TEMP[9].xxxx
202: TXL TEMP[12], TEMP[12], SAMP[0], 2D
203: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[3].yyyy
204: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
205: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
206: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
207: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
208: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
209: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12]
210: MOV TEMP[13].xy, IN[3].zxzz
211: MOV TEMP[14].x, IMM[2].xxxx
212: FSNE TEMP[15].x, CONST[15].xxxx, TEMP[5].xxxx
213: UIF TEMP[15].xxxx :0
214: MOV TEMP[14].x, IMM[2].yyyy
215: RCP TEMP[15].x, CONST[18].xxxx
216: MUL TEMP[13].xy, IN[3].zxxx, TEMP[15].xxxx
217: ELSE :0
218: RCP TEMP[15].x, CONST[17].xxxx
219: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
220: ENDIF
221: FRC TEMP[13].xy, TEMP[13].xyyy
222: MUL TEMP[15].x, CONST[19].xxxx, IMM[2].wwww
223: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
224: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
225: MUL TEMP[14].x, TEMP[14].xxxx, CONST[19].xxxx
226: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
227: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
228: MOV TEMP[14].xy, TEMP[13].xyyy
229: MOV TEMP[14].w, TEMP[9].xxxx
230: TXL TEMP[14], TEMP[14], SAMP[8], 2D
231: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz
232: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
233: MOV TEMP[16].xy, TEMP[13].xyyy
234: MOV TEMP[16].w, TEMP[9].xxxx
235: TXL TEMP[16], TEMP[16], SAMP[6], 2D
236: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx
237: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
238: MOV TEMP[18].xy, TEMP[13].xyyy
239: MOV TEMP[18].w, TEMP[9].xxxx
240: TXL TEMP[18], TEMP[18], SAMP[4], 2D
241: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww
242: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
243: MOV TEMP[20].xy, TEMP[13].xyyy
244: MOV TEMP[20].w, TEMP[9].xxxx
245: TXL TEMP[20], TEMP[20], SAMP[2], 2D
246: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz
247: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
248: MOV TEMP[13].xy, TEMP[13].xyyy
249: MOV TEMP[13].w, TEMP[9].xxxx
250: TXL TEMP[13], TEMP[13], SAMP[0], 2D
251: FSEQ TEMP[22].x, TEMP[8].zzzz, IMM[3].yyyy
252: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
253: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
254: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
255: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
256: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
257: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13]
258: MOV TEMP[14].xy, IN[3].xyxx
259: MOV TEMP[15].x, IMM[2].xxxx
260: FSNE TEMP[16].x, CONST[15].xxxx, TEMP[6].xxxx
261: UIF TEMP[16].xxxx :0
262: MOV TEMP[15].x, IMM[2].yyyy
263: RCP TEMP[16].x, CONST[18].xxxx
264: MUL TEMP[14].xy, IN[3].xyyy, TEMP[16].xxxx
265: ELSE :0
266: RCP TEMP[16].x, CONST[17].xxxx
267: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
268: ENDIF
269: FRC TEMP[14].xy, TEMP[14].xyyy
270: MUL TEMP[16].x, CONST[19].xxxx, IMM[2].wwww
271: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
272: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
273: MUL TEMP[15].x, TEMP[15].xxxx, CONST[19].xxxx
274: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
275: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
276: MOV TEMP[15].xy, TEMP[14].xyyy
277: MOV TEMP[15].w, TEMP[9].xxxx
278: TXL TEMP[15], TEMP[15], SAMP[8], 2D
279: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[1].zzzz
280: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
281: MOV TEMP[17].xy, TEMP[14].xyyy
282: MOV TEMP[17].w, TEMP[9].xxxx
283: TXL TEMP[17], TEMP[17], SAMP[6], 2D
284: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[3].xxxx
285: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
286: MOV TEMP[19].xy, TEMP[14].xyyy
287: MOV TEMP[19].w, TEMP[9].xxxx
288: TXL TEMP[19], TEMP[19], SAMP[4], 2D
289: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].wwww
290: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
291: MOV TEMP[21].xy, TEMP[14].xyyy
292: MOV TEMP[21].w, TEMP[9].xxxx
293: TXL TEMP[21], TEMP[21], SAMP[2], 2D
294: FSEQ TEMP[22].x, TEMP[10].zzzz, IMM[2].zzzz
295: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
296: MOV TEMP[14].xy, TEMP[14].xyyy
297: MOV TEMP[14].w, TEMP[9].xxxx
298: TXL TEMP[14], TEMP[14], SAMP[0], 2D
299: FSEQ TEMP[23].x, TEMP[10].zzzz, IMM[3].yyyy
300: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
301: MUL TEMP[14], TEMP[14], TEMP[23].xxxx
302: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14]
303: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14]
304: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14]
305: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14]
306: MOV TEMP[15].xy, IN[3].zyzz
307: MOV TEMP[16].x, IMM[2].xxxx
308: FSNE TEMP[17].x, CONST[15].xxxx, TEMP[6].xxxx
309: UIF TEMP[17].xxxx :0
310: MOV TEMP[16].x, IMM[2].yyyy
311: RCP TEMP[17].x, CONST[18].xxxx
312: MUL TEMP[15].xy, IN[3].zyyy, TEMP[17].xxxx
313: ELSE :0
314: RCP TEMP[17].x, CONST[17].xxxx
315: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx
316: ENDIF
317: FRC TEMP[15].xy, TEMP[15].xyyy
318: MUL TEMP[17].x, CONST[19].xxxx, IMM[2].wwww
319: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx
320: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx
321: MUL TEMP[16].x, TEMP[16].xxxx, CONST[19].xxxx
322: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx
323: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
324: MOV TEMP[16].xy, TEMP[15].xyyy
325: MOV TEMP[16].w, TEMP[9].xxxx
326: TXL TEMP[16], TEMP[16], SAMP[8], 2D
327: FSEQ TEMP[17].x, TEMP[10].zzzz, IMM[1].zzzz
328: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
329: MOV TEMP[18].xy, TEMP[15].xyyy
330: MOV TEMP[18].w, TEMP[9].xxxx
331: TXL TEMP[18], TEMP[18], SAMP[6], 2D
332: FSEQ TEMP[19].x, TEMP[10].zzzz, IMM[3].xxxx
333: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
334: MOV TEMP[20].xy, TEMP[15].xyyy
335: MOV TEMP[20].w, TEMP[9].xxxx
336: TXL TEMP[20], TEMP[20], SAMP[4], 2D
337: FSEQ TEMP[21].x, TEMP[10].zzzz, IMM[2].wwww
338: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
339: MOV TEMP[22].xy, TEMP[15].xyyy
340: MOV TEMP[22].w, TEMP[9].xxxx
341: TXL TEMP[22], TEMP[22], SAMP[2], 2D
342: FSEQ TEMP[23].x, TEMP[10].zzzz, IMM[2].zzzz
343: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
344: MOV TEMP[15].xy, TEMP[15].xyyy
345: MOV TEMP[15].w, TEMP[9].xxxx
346: TXL TEMP[15], TEMP[15], SAMP[0], 2D
347: FSEQ TEMP[24].x, TEMP[10].zzzz, IMM[3].yyyy
348: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
349: MUL TEMP[15], TEMP[15], TEMP[24].xxxx
350: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15]
351: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15]
352: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15]
353: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15]
354: MOV TEMP[16].xy, IN[3].zxzz
355: MOV TEMP[17].x, IMM[2].xxxx
356: FSNE TEMP[18].x, CONST[15].xxxx, TEMP[6].xxxx
357: UIF TEMP[18].xxxx :0
358: MOV TEMP[17].x, IMM[2].yyyy
359: RCP TEMP[18].x, CONST[18].xxxx
360: MUL TEMP[16].xy, IN[3].zxxx, TEMP[18].xxxx
361: ELSE :0
362: RCP TEMP[18].x, CONST[17].xxxx
363: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx
364: ENDIF
365: FRC TEMP[16].xy, TEMP[16].xyyy
366: MUL TEMP[18].x, CONST[19].xxxx, IMM[2].wwww
367: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx
368: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx
369: MUL TEMP[17].x, TEMP[17].xxxx, CONST[19].xxxx
370: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx
371: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
372: MOV TEMP[17].xy, TEMP[16].xyyy
373: MOV TEMP[17].w, TEMP[9].xxxx
374: TXL TEMP[17], TEMP[17], SAMP[8], 2D
375: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[1].zzzz
376: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
377: MOV TEMP[19].xy, TEMP[16].xyyy
378: MOV TEMP[19].w, TEMP[9].xxxx
379: TXL TEMP[19], TEMP[19], SAMP[6], 2D
380: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[3].xxxx
381: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
382: MOV TEMP[21].xy, TEMP[16].xyyy
383: MOV TEMP[21].w, TEMP[9].xxxx
384: TXL TEMP[21], TEMP[21], SAMP[4], 2D
385: FSEQ TEMP[22].x, TEMP[10].zzzz, IMM[2].wwww
386: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
387: MOV TEMP[23].xy, TEMP[16].xyyy
388: MOV TEMP[23].w, TEMP[9].xxxx
389: TXL TEMP[23], TEMP[23], SAMP[2], 2D
390: FSEQ TEMP[24].x, TEMP[10].zzzz, IMM[2].zzzz
391: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
392: MOV TEMP[16].xy, TEMP[16].xyyy
393: MOV TEMP[16].w, TEMP[9].xxxx
394: TXL TEMP[16], TEMP[16], SAMP[0], 2D
395: FSEQ TEMP[25].x, TEMP[10].zzzz, IMM[3].yyyy
396: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
397: MUL TEMP[16], TEMP[16], TEMP[25].xxxx
398: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16]
399: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16]
400: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16]
401: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16]
402: MOV TEMP[17].xy, IN[3].xyxx
403: MOV TEMP[18].x, IMM[2].xxxx
404: FSNE TEMP[19].x, CONST[15].xxxx, TEMP[7].xxxx
405: UIF TEMP[19].xxxx :0
406: MOV TEMP[18].x, IMM[2].yyyy
407: RCP TEMP[19].x, CONST[18].xxxx
408: MUL TEMP[17].xy, IN[3].xyyy, TEMP[19].xxxx
409: ELSE :0
410: RCP TEMP[19].x, CONST[17].xxxx
411: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx
412: ENDIF
413: FRC TEMP[17].xy, TEMP[17].xyyy
414: MUL TEMP[19].x, CONST[19].xxxx, IMM[2].wwww
415: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx
416: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx
417: MUL TEMP[18].x, TEMP[18].xxxx, CONST[19].xxxx
418: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx
419: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
420: MOV TEMP[18].xy, TEMP[17].xyyy
421: MOV TEMP[18].w, TEMP[9].xxxx
422: TXL TEMP[18], TEMP[18], SAMP[8], 2D
423: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[1].zzzz
424: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
425: MOV TEMP[20].xy, TEMP[17].xyyy
426: MOV TEMP[20].w, TEMP[9].xxxx
427: TXL TEMP[20], TEMP[20], SAMP[6], 2D
428: FSEQ TEMP[21].x, TEMP[4].zzzz, IMM[3].xxxx
429: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
430: MOV TEMP[22].xy, TEMP[17].xyyy
431: MOV TEMP[22].w, TEMP[9].xxxx
432: TXL TEMP[22], TEMP[22], SAMP[4], 2D
433: FSEQ TEMP[23].x, TEMP[4].zzzz, IMM[2].wwww
434: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
435: MOV TEMP[24].xy, TEMP[17].xyyy
436: MOV TEMP[24].w, TEMP[9].xxxx
437: TXL TEMP[24], TEMP[24], SAMP[2], 2D
438: FSEQ TEMP[25].x, TEMP[4].zzzz, IMM[2].zzzz
439: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
440: MOV TEMP[17].xy, TEMP[17].xyyy
441: MOV TEMP[17].w, TEMP[9].xxxx
442: TXL TEMP[17], TEMP[17], SAMP[0], 2D
443: FSEQ TEMP[26].x, TEMP[4].zzzz, IMM[3].yyyy
444: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
445: MUL TEMP[17], TEMP[17], TEMP[26].xxxx
446: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17]
447: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17]
448: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17]
449: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17]
450: MOV TEMP[18].xy, IN[3].zyzz
451: MOV TEMP[19].x, IMM[2].xxxx
452: FSNE TEMP[20].x, CONST[15].xxxx, TEMP[7].xxxx
453: UIF TEMP[20].xxxx :0
454: MOV TEMP[19].x, IMM[2].yyyy
455: RCP TEMP[20].x, CONST[18].xxxx
456: MUL TEMP[18].xy, IN[3].zyyy, TEMP[20].xxxx
457: ELSE :0
458: RCP TEMP[20].x, CONST[17].xxxx
459: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx
460: ENDIF
461: FRC TEMP[18].xy, TEMP[18].xyyy
462: MUL TEMP[20].x, CONST[19].xxxx, IMM[2].wwww
463: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx
464: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx
465: MUL TEMP[19].x, TEMP[19].xxxx, CONST[19].xxxx
466: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx
467: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
468: MOV TEMP[19].xy, TEMP[18].xyyy
469: MOV TEMP[19].w, TEMP[9].xxxx
470: TXL TEMP[19], TEMP[19], SAMP[8], 2D
471: FSEQ TEMP[20].x, TEMP[4].zzzz, IMM[1].zzzz
472: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
473: MOV TEMP[21].xy, TEMP[18].xyyy
474: MOV TEMP[21].w, TEMP[9].xxxx
475: TXL TEMP[21], TEMP[21], SAMP[6], 2D
476: FSEQ TEMP[22].x, TEMP[4].zzzz, IMM[3].xxxx
477: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
478: MOV TEMP[23].xy, TEMP[18].xyyy
479: MOV TEMP[23].w, TEMP[9].xxxx
480: TXL TEMP[23], TEMP[23], SAMP[4], 2D
481: FSEQ TEMP[24].x, TEMP[4].zzzz, IMM[2].wwww
482: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
483: MOV TEMP[25].xy, TEMP[18].xyyy
484: MOV TEMP[25].w, TEMP[9].xxxx
485: TXL TEMP[25], TEMP[25], SAMP[2], 2D
486: FSEQ TEMP[26].x, TEMP[4].zzzz, IMM[2].zzzz
487: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
488: MOV TEMP[18].xy, TEMP[18].xyyy
489: MOV TEMP[18].w, TEMP[9].xxxx
490: TXL TEMP[18], TEMP[18], SAMP[0], 2D
491: FSEQ TEMP[27].x, TEMP[4].zzzz, IMM[3].yyyy
492: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz
493: MUL TEMP[18], TEMP[18], TEMP[27].xxxx
494: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18]
495: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18]
496: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18]
497: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18]
498: MOV TEMP[19].xy, IN[3].zxzz
499: MOV TEMP[20].x, IMM[2].xxxx
500: FSNE TEMP[21].x, CONST[15].xxxx, TEMP[7].xxxx
501: UIF TEMP[21].xxxx :0
502: MOV TEMP[20].x, IMM[2].yyyy
503: RCP TEMP[21].x, CONST[18].xxxx
504: MUL TEMP[19].xy, IN[3].zxxx, TEMP[21].xxxx
505: ELSE :0
506: RCP TEMP[21].x, CONST[17].xxxx
507: MUL TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx
508: ENDIF
509: FRC TEMP[19].xy, TEMP[19].xyyy
510: MUL TEMP[21].x, CONST[19].xxxx, IMM[2].wwww
511: MUL TEMP[21].x, TEMP[21].xxxx, TEMP[20].xxxx
512: ADD TEMP[21].x, IMM[2].zzzz, -TEMP[21].xxxx
513: MUL TEMP[20].x, TEMP[20].xxxx, CONST[19].xxxx
514: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx, TEMP[20].xxxx
515: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
516: MOV TEMP[20].xy, TEMP[19].xyyy
517: MOV TEMP[20].w, TEMP[9].xxxx
518: TXL TEMP[20], TEMP[20], SAMP[8], 2D
519: FSEQ TEMP[21].x, TEMP[4].zzzz, IMM[1].zzzz
520: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
521: MOV TEMP[22].xy, TEMP[19].xyyy
522: MOV TEMP[22].w, TEMP[9].xxxx
523: TXL TEMP[22], TEMP[22], SAMP[6], 2D
524: FSEQ TEMP[23].x, TEMP[4].zzzz, IMM[3].xxxx
525: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
526: MOV TEMP[24].xy, TEMP[19].xyyy
527: MOV TEMP[24].w, TEMP[9].xxxx
528: TXL TEMP[24], TEMP[24], SAMP[4], 2D
529: FSEQ TEMP[25].x, TEMP[4].zzzz, IMM[2].wwww
530: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
531: MOV TEMP[26].xy, TEMP[19].xyyy
532: MOV TEMP[26].w, TEMP[9].xxxx
533: TXL TEMP[26], TEMP[26], SAMP[2], 2D
534: FSEQ TEMP[27].x, TEMP[4].zzzz, IMM[2].zzzz
535: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz
536: MOV TEMP[19].xy, TEMP[19].xyyy
537: MOV TEMP[19].w, TEMP[9].xxxx
538: TXL TEMP[19], TEMP[19], SAMP[0], 2D
539: FSEQ TEMP[28].x, TEMP[4].zzzz, IMM[3].yyyy
540: AND TEMP[28].x, TEMP[28].xxxx, IMM[2].zzzz
541: MUL TEMP[19], TEMP[19], TEMP[28].xxxx
542: MAD TEMP[19], TEMP[26], TEMP[27].xxxx, TEMP[19]
543: MAD TEMP[19], TEMP[24], TEMP[25].xxxx, TEMP[19]
544: MAD TEMP[19], TEMP[22], TEMP[23].xxxx, TEMP[19]
545: MAD TEMP[19], TEMP[20], TEMP[21].xxxx, TEMP[19]
546: MUL TEMP[17], TEMP[17], TEMP[3].zzzz
547: MAD TEMP[17], TEMP[18], TEMP[3].xxxx, TEMP[17]
548: MAD TEMP[17], TEMP[19], TEMP[3].yyyy, TEMP[17]
549: MUL TEMP[14], TEMP[14], TEMP[3].zzzz
550: MAD TEMP[14], TEMP[15], TEMP[3].xxxx, TEMP[14]
551: MAD TEMP[14], TEMP[16], TEMP[3].yyyy, TEMP[14]
552: MUL TEMP[11], TEMP[11], TEMP[3].zzzz
553: MAD TEMP[11], TEMP[12], TEMP[3].xxxx, TEMP[11]
554: MAD TEMP[11], TEMP[13], TEMP[3].yyyy, TEMP[11]
555: MUL TEMP[11], IN[1].xxxx, TEMP[11]
556: MAD TEMP[11], IN[1].yyyy, TEMP[14], TEMP[11]
557: MAD TEMP[11].xyz, IN[1].zzzz, TEMP[17], TEMP[11]
558: MOV TEMP[12].xy, IN[3].zyzz
559: MOV TEMP[13].x, IMM[2].xxxx
560: FSNE TEMP[14].x, CONST[15].xxxx, TEMP[5].xxxx
561: UIF TEMP[14].xxxx :0
562: MOV TEMP[13].x, IMM[2].yyyy
563: RCP TEMP[14].x, CONST[18].xxxx
564: MUL TEMP[12].xy, IN[3].zyyy, TEMP[14].xxxx
565: ELSE :0
566: RCP TEMP[14].x, CONST[17].xxxx
567: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
568: ENDIF
569: FRC TEMP[12].xy, TEMP[12].xyyy
570: MUL TEMP[14].x, CONST[19].xxxx, IMM[2].wwww
571: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
572: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
573: MUL TEMP[13].x, TEMP[13].xxxx, CONST[19].xxxx
574: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
575: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
576: MOV TEMP[13].xy, TEMP[12].xyyy
577: MOV TEMP[13].w, TEMP[9].xxxx
578: TXL TEMP[13], TEMP[13], SAMP[9], 2D
579: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[1].zzzz
580: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
581: MOV TEMP[15].xy, TEMP[12].xyyy
582: MOV TEMP[15].w, TEMP[9].xxxx
583: TXL TEMP[15], TEMP[15], SAMP[7], 2D
584: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[3].xxxx
585: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
586: MOV TEMP[17].xy, TEMP[12].xyyy
587: MOV TEMP[17].w, TEMP[9].xxxx
588: TXL TEMP[17], TEMP[17], SAMP[5], 2D
589: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].wwww
590: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
591: MOV TEMP[19].xy, TEMP[12].xyyy
592: MOV TEMP[19].w, TEMP[9].xxxx
593: TXL TEMP[19], TEMP[19], SAMP[3], 2D
594: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[2].zzzz
595: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
596: MOV TEMP[12].xy, TEMP[12].xyyy
597: MOV TEMP[12].w, TEMP[9].xxxx
598: TXL TEMP[12], TEMP[12], SAMP[1], 2D
599: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[3].yyyy
600: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
601: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
602: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
603: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
604: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
605: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12]
606: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz
607: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy
608: MOV_SAT TEMP[29].x, TEMP[13].xxxx
609: MOV TEMP[13].xy, IN[3].zxzz
610: MOV TEMP[14].x, IMM[2].xxxx
611: FSNE TEMP[15].x, CONST[15].xxxx, TEMP[5].xxxx
612: UIF TEMP[15].xxxx :0
613: MOV TEMP[14].x, IMM[2].yyyy
614: RCP TEMP[15].x, CONST[18].xxxx
615: MUL TEMP[13].xy, IN[3].zxxx, TEMP[15].xxxx
616: ELSE :0
617: RCP TEMP[15].x, CONST[17].xxxx
618: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
619: ENDIF
620: FRC TEMP[13].xy, TEMP[13].xyyy
621: MUL TEMP[15].x, CONST[19].xxxx, IMM[2].wwww
622: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
623: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
624: MUL TEMP[14].x, TEMP[14].xxxx, CONST[19].xxxx
625: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
626: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
627: MOV TEMP[14].xy, TEMP[13].xyyy
628: MOV TEMP[14].w, TEMP[9].xxxx
629: TXL TEMP[14], TEMP[14], SAMP[9], 2D
630: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz
631: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
632: MOV TEMP[16].xy, TEMP[13].xyyy
633: MOV TEMP[16].w, TEMP[9].xxxx
634: TXL TEMP[16], TEMP[16], SAMP[7], 2D
635: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx
636: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
637: MOV TEMP[18].xy, TEMP[13].xyyy
638: MOV TEMP[18].w, TEMP[9].xxxx
639: TXL TEMP[18], TEMP[18], SAMP[5], 2D
640: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww
641: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
642: MOV TEMP[20].xy, TEMP[13].xyyy
643: MOV TEMP[20].w, TEMP[9].xxxx
644: TXL TEMP[20], TEMP[20], SAMP[3], 2D
645: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz
646: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
647: MOV TEMP[13].xy, TEMP[13].xyyy
648: MOV TEMP[13].w, TEMP[9].xxxx
649: TXL TEMP[13], TEMP[13], SAMP[1], 2D
650: FSEQ TEMP[22].x, TEMP[8].zzzz, IMM[3].yyyy
651: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
652: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
653: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
654: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
655: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
656: MAD TEMP[13].yw, TEMP[14], TEMP[15].xxxx, TEMP[13]
657: MAD TEMP[13].xy, TEMP[13].wyyy, IMM[2].wwww, IMM[3].zzzz
658: DP2 TEMP[14].x, TEMP[13].xyyy, TEMP[13].xyyy
659: MOV_SAT TEMP[30].x, TEMP[14].xxxx
660: MOV TEMP[14].xy, IN[3].xyxx
661: MOV TEMP[15].x, IMM[2].xxxx
662: FSNE TEMP[16].x, CONST[15].xxxx, TEMP[5].xxxx
663: UIF TEMP[16].xxxx :0
664: MOV TEMP[15].x, IMM[2].yyyy
665: RCP TEMP[16].x, CONST[18].xxxx
666: MUL TEMP[14].xy, IN[3].xyyy, TEMP[16].xxxx
667: ELSE :0
668: RCP TEMP[16].x, CONST[17].xxxx
669: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
670: ENDIF
671: FRC TEMP[14].xy, TEMP[14].xyyy
672: MUL TEMP[16].x, CONST[19].xxxx, IMM[2].wwww
673: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
674: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
675: MUL TEMP[15].x, TEMP[15].xxxx, CONST[19].xxxx
676: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
677: MAD TEMP[5].xy, TEMP[14].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
678: MOV TEMP[14].xy, TEMP[5].xyyy
679: MOV TEMP[14].w, TEMP[9].xxxx
680: TXL TEMP[14], TEMP[14], SAMP[9], 2D
681: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz
682: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
683: MOV TEMP[16].xy, TEMP[5].xyyy
684: MOV TEMP[16].w, TEMP[9].xxxx
685: TXL TEMP[16], TEMP[16], SAMP[7], 2D
686: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx
687: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
688: MOV TEMP[18].xy, TEMP[5].xyyy
689: MOV TEMP[18].w, TEMP[9].xxxx
690: TXL TEMP[18], TEMP[18], SAMP[5], 2D
691: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww
692: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
693: MOV TEMP[20].xy, TEMP[5].xyyy
694: MOV TEMP[20].w, TEMP[9].xxxx
695: TXL TEMP[20], TEMP[20], SAMP[3], 2D
696: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz
697: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
698: MOV TEMP[5].xy, TEMP[5].xyyy
699: MOV TEMP[5].w, TEMP[9].xxxx
700: TXL TEMP[5], TEMP[5], SAMP[1], 2D
701: FSEQ TEMP[8].x, TEMP[8].zzzz, IMM[3].yyyy
702: AND TEMP[8].x, TEMP[8].xxxx, IMM[2].zzzz
703: MUL TEMP[5], TEMP[5], TEMP[8].xxxx
704: MAD TEMP[5], TEMP[20], TEMP[21].xxxx, TEMP[5]
705: MAD TEMP[5], TEMP[18], TEMP[19].xxxx, TEMP[5]
706: MAD TEMP[5], TEMP[16], TEMP[17].xxxx, TEMP[5]
707: MAD TEMP[5].yw, TEMP[14], TEMP[15].xxxx, TEMP[5]
708: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[2].wwww, IMM[3].zzzz
709: DP2 TEMP[8].x, TEMP[5].xyyy, TEMP[5].xyyy
710: MOV_SAT TEMP[31].x, TEMP[8].xxxx
711: MOV TEMP[8].x, IMM[3].yyyy
712: MOV TEMP[8].y, TEMP[12].xxxx
713: MOV TEMP[8].z, TEMP[12].yyyy
714: MOV TEMP[12].y, IMM[3].yyyy
715: MOV TEMP[12].x, TEMP[13].yyyy
716: MOV TEMP[12].z, TEMP[13].xxxx
717: MOV TEMP[13].z, IMM[3].yyyy
718: MOV TEMP[13].xy, TEMP[5].xyxx
719: MUL TEMP[5].xyz, TEMP[8].xyzz, TEMP[3].xxxx
720: MAD TEMP[5].xyz, TEMP[12].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
721: MAD TEMP[5].xyz, TEMP[13].xyzz, TEMP[3].zzzz, TEMP[5].xyzz
722: MOV TEMP[8].xy, IN[3].zyzz
723: MOV TEMP[12].x, IMM[2].xxxx
724: FSNE TEMP[13].x, CONST[15].xxxx, TEMP[6].xxxx
725: UIF TEMP[13].xxxx :0
726: MOV TEMP[12].x, IMM[2].yyyy
727: RCP TEMP[13].x, CONST[18].xxxx
728: MUL TEMP[8].xy, IN[3].zyyy, TEMP[13].xxxx
729: ELSE :0
730: RCP TEMP[13].x, CONST[17].xxxx
731: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx
732: ENDIF
733: FRC TEMP[8].xy, TEMP[8].xyyy
734: MUL TEMP[13].x, CONST[19].xxxx, IMM[2].wwww
735: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
736: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
737: MUL TEMP[12].x, TEMP[12].xxxx, CONST[19].xxxx
738: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
739: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
740: MOV TEMP[12].xy, TEMP[8].xyyy
741: MOV TEMP[12].w, TEMP[9].xxxx
742: TXL TEMP[12], TEMP[12], SAMP[9], 2D
743: FSEQ TEMP[13].x, TEMP[10].zzzz, IMM[1].zzzz
744: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
745: MOV TEMP[14].xy, TEMP[8].xyyy
746: MOV TEMP[14].w, TEMP[9].xxxx
747: TXL TEMP[14], TEMP[14], SAMP[7], 2D
748: FSEQ TEMP[15].x, TEMP[10].zzzz, IMM[3].xxxx
749: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
750: MOV TEMP[16].xy, TEMP[8].xyyy
751: MOV TEMP[16].w, TEMP[9].xxxx
752: TXL TEMP[16], TEMP[16], SAMP[5], 2D
753: FSEQ TEMP[17].x, TEMP[10].zzzz, IMM[2].wwww
754: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
755: MOV TEMP[18].xy, TEMP[8].xyyy
756: MOV TEMP[18].w, TEMP[9].xxxx
757: TXL TEMP[18], TEMP[18], SAMP[3], 2D
758: FSEQ TEMP[19].x, TEMP[10].zzzz, IMM[2].zzzz
759: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
760: MOV TEMP[8].xy, TEMP[8].xyyy
761: MOV TEMP[8].w, TEMP[9].xxxx
762: TXL TEMP[8], TEMP[8], SAMP[1], 2D
763: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[3].yyyy
764: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
765: MUL TEMP[8], TEMP[8], TEMP[20].xxxx
766: MAD TEMP[8], TEMP[18], TEMP[19].xxxx, TEMP[8]
767: MAD TEMP[8], TEMP[16], TEMP[17].xxxx, TEMP[8]
768: MAD TEMP[8], TEMP[14], TEMP[15].xxxx, TEMP[8]
769: MAD TEMP[8].yw, TEMP[12], TEMP[13].xxxx, TEMP[8]
770: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz
771: DP2 TEMP[12].x, TEMP[8].xyyy, TEMP[8].xyyy
772: MOV_SAT TEMP[32].x, TEMP[12].xxxx
773: MOV TEMP[12].xy, IN[3].zxzz
774: MOV TEMP[13].x, IMM[2].xxxx
775: FSNE TEMP[14].x, CONST[15].xxxx, TEMP[6].xxxx
776: UIF TEMP[14].xxxx :0
777: MOV TEMP[13].x, IMM[2].yyyy
778: RCP TEMP[14].x, CONST[18].xxxx
779: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx
780: ELSE :0
781: RCP TEMP[14].x, CONST[17].xxxx
782: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
783: ENDIF
784: FRC TEMP[12].xy, TEMP[12].xyyy
785: MUL TEMP[14].x, CONST[19].xxxx, IMM[2].wwww
786: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
787: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
788: MUL TEMP[13].x, TEMP[13].xxxx, CONST[19].xxxx
789: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
790: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
791: MOV TEMP[13].xy, TEMP[12].xyyy
792: MOV TEMP[13].w, TEMP[9].xxxx
793: TXL TEMP[13], TEMP[13], SAMP[9], 2D
794: FSEQ TEMP[14].x, TEMP[10].zzzz, IMM[1].zzzz
795: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
796: MOV TEMP[15].xy, TEMP[12].xyyy
797: MOV TEMP[15].w, TEMP[9].xxxx
798: TXL TEMP[15], TEMP[15], SAMP[7], 2D
799: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[3].xxxx
800: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
801: MOV TEMP[17].xy, TEMP[12].xyyy
802: MOV TEMP[17].w, TEMP[9].xxxx
803: TXL TEMP[17], TEMP[17], SAMP[5], 2D
804: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[2].wwww
805: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
806: MOV TEMP[19].xy, TEMP[12].xyyy
807: MOV TEMP[19].w, TEMP[9].xxxx
808: TXL TEMP[19], TEMP[19], SAMP[3], 2D
809: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].zzzz
810: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
811: MOV TEMP[12].xy, TEMP[12].xyyy
812: MOV TEMP[12].w, TEMP[9].xxxx
813: TXL TEMP[12], TEMP[12], SAMP[1], 2D
814: FSEQ TEMP[21].x, TEMP[10].zzzz, IMM[3].yyyy
815: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
816: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
817: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
818: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
819: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
820: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12]
821: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz
822: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy
823: MOV_SAT TEMP[33].x, TEMP[13].xxxx
824: MOV TEMP[13].xy, IN[3].xyxx
825: MOV TEMP[14].x, IMM[2].xxxx
826: FSNE TEMP[15].x, CONST[15].xxxx, TEMP[6].xxxx
827: UIF TEMP[15].xxxx :0
828: MOV TEMP[14].x, IMM[2].yyyy
829: RCP TEMP[15].x, CONST[18].xxxx
830: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx
831: ELSE :0
832: RCP TEMP[15].x, CONST[17].xxxx
833: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
834: ENDIF
835: FRC TEMP[13].xy, TEMP[13].xyyy
836: MUL TEMP[15].x, CONST[19].xxxx, IMM[2].wwww
837: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
838: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
839: MUL TEMP[14].x, TEMP[14].xxxx, CONST[19].xxxx
840: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
841: MAD TEMP[6].xy, TEMP[13].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
842: MOV TEMP[13].xy, TEMP[6].xyyy
843: MOV TEMP[13].w, TEMP[9].xxxx
844: TXL TEMP[13], TEMP[13], SAMP[9], 2D
845: FSEQ TEMP[14].x, TEMP[10].zzzz, IMM[1].zzzz
846: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
847: MOV TEMP[15].xy, TEMP[6].xyyy
848: MOV TEMP[15].w, TEMP[9].xxxx
849: TXL TEMP[15], TEMP[15], SAMP[7], 2D
850: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[3].xxxx
851: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
852: MOV TEMP[17].xy, TEMP[6].xyyy
853: MOV TEMP[17].w, TEMP[9].xxxx
854: TXL TEMP[17], TEMP[17], SAMP[5], 2D
855: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[2].wwww
856: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
857: MOV TEMP[19].xy, TEMP[6].xyyy
858: MOV TEMP[19].w, TEMP[9].xxxx
859: TXL TEMP[19], TEMP[19], SAMP[3], 2D
860: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].zzzz
861: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
862: MOV TEMP[6].xy, TEMP[6].xyyy
863: MOV TEMP[6].w, TEMP[9].xxxx
864: TXL TEMP[6], TEMP[6], SAMP[1], 2D
865: FSEQ TEMP[10].x, TEMP[10].zzzz, IMM[3].yyyy
866: AND TEMP[10].x, TEMP[10].xxxx, IMM[2].zzzz
867: MUL TEMP[6], TEMP[6], TEMP[10].xxxx
868: MAD TEMP[6], TEMP[19], TEMP[20].xxxx, TEMP[6]
869: MAD TEMP[6], TEMP[17], TEMP[18].xxxx, TEMP[6]
870: MAD TEMP[6], TEMP[15], TEMP[16].xxxx, TEMP[6]
871: MAD TEMP[6].yw, TEMP[13], TEMP[14].xxxx, TEMP[6]
872: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz
873: DP2 TEMP[10].x, TEMP[6].xyyy, TEMP[6].xyyy
874: MOV_SAT TEMP[34].x, TEMP[10].xxxx
875: MOV TEMP[10].x, IMM[3].yyyy
876: MOV TEMP[10].y, TEMP[8].xxxx
877: MOV TEMP[10].z, TEMP[8].yyyy
878: MOV TEMP[8].y, IMM[3].yyyy
879: MOV TEMP[8].x, TEMP[12].yyyy
880: MOV TEMP[8].z, TEMP[12].xxxx
881: MOV TEMP[12].z, IMM[3].yyyy
882: MOV TEMP[12].xy, TEMP[6].xyxx
883: MUL TEMP[6].xyz, TEMP[10].xyzz, TEMP[3].xxxx
884: MAD TEMP[6].xyz, TEMP[8].xyzz, TEMP[3].yyyy, TEMP[6].xyzz
885: MAD TEMP[6].xyz, TEMP[12].xyzz, TEMP[3].zzzz, TEMP[6].xyzz
886: MOV TEMP[8].xy, IN[3].zyzz
887: MOV TEMP[10].x, IMM[2].xxxx
888: FSNE TEMP[12].x, CONST[15].xxxx, TEMP[7].xxxx
889: UIF TEMP[12].xxxx :0
890: MOV TEMP[10].x, IMM[2].yyyy
891: RCP TEMP[12].x, CONST[18].xxxx
892: MUL TEMP[8].xy, IN[3].zyyy, TEMP[12].xxxx
893: ELSE :0
894: RCP TEMP[12].x, CONST[17].xxxx
895: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[12].xxxx
896: ENDIF
897: FRC TEMP[8].xy, TEMP[8].xyyy
898: MUL TEMP[12].x, CONST[19].xxxx, IMM[2].wwww
899: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[10].xxxx
900: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
901: MUL TEMP[10].x, TEMP[10].xxxx, CONST[19].xxxx
902: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[12].xxxx, TEMP[10].xxxx
903: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
904: MOV TEMP[10].xy, TEMP[8].xyyy
905: MOV TEMP[10].w, TEMP[9].xxxx
906: TXL TEMP[10], TEMP[10], SAMP[9], 2D
907: FSEQ TEMP[12].x, TEMP[4].zzzz, IMM[1].zzzz
908: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
909: MOV TEMP[13].xy, TEMP[8].xyyy
910: MOV TEMP[13].w, TEMP[9].xxxx
911: TXL TEMP[13], TEMP[13], SAMP[7], 2D
912: FSEQ TEMP[14].x, TEMP[4].zzzz, IMM[3].xxxx
913: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
914: MOV TEMP[15].xy, TEMP[8].xyyy
915: MOV TEMP[15].w, TEMP[9].xxxx
916: TXL TEMP[15], TEMP[15], SAMP[5], 2D
917: FSEQ TEMP[16].x, TEMP[4].zzzz, IMM[2].wwww
918: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
919: MOV TEMP[17].xy, TEMP[8].xyyy
920: MOV TEMP[17].w, TEMP[9].xxxx
921: TXL TEMP[17], TEMP[17], SAMP[3], 2D
922: FSEQ TEMP[18].x, TEMP[4].zzzz, IMM[2].zzzz
923: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
924: MOV TEMP[8].xy, TEMP[8].xyyy
925: MOV TEMP[8].w, TEMP[9].xxxx
926: TXL TEMP[8], TEMP[8], SAMP[1], 2D
927: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[3].yyyy
928: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
929: MUL TEMP[8], TEMP[8], TEMP[19].xxxx
930: MAD TEMP[8], TEMP[17], TEMP[18].xxxx, TEMP[8]
931: MAD TEMP[8], TEMP[15], TEMP[16].xxxx, TEMP[8]
932: MAD TEMP[8], TEMP[13], TEMP[14].xxxx, TEMP[8]
933: MAD TEMP[8].yw, TEMP[10], TEMP[12].xxxx, TEMP[8]
934: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz
935: DP2 TEMP[10].x, TEMP[8].xyyy, TEMP[8].xyyy
936: MOV_SAT TEMP[35].x, TEMP[10].xxxx
937: MOV TEMP[10].xy, IN[3].zxzz
938: MOV TEMP[12].x, IMM[2].xxxx
939: FSNE TEMP[13].x, CONST[15].xxxx, TEMP[7].xxxx
940: UIF TEMP[13].xxxx :0
941: MOV TEMP[12].x, IMM[2].yyyy
942: RCP TEMP[13].x, CONST[18].xxxx
943: MUL TEMP[10].xy, IN[3].zxxx, TEMP[13].xxxx
944: ELSE :0
945: RCP TEMP[13].x, CONST[17].xxxx
946: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx
947: ENDIF
948: FRC TEMP[10].xy, TEMP[10].xyyy
949: MUL TEMP[13].x, CONST[19].xxxx, IMM[2].wwww
950: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
951: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
952: MUL TEMP[12].x, TEMP[12].xxxx, CONST[19].xxxx
953: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
954: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
955: MOV TEMP[12].xy, TEMP[10].xyyy
956: MOV TEMP[12].w, TEMP[9].xxxx
957: TXL TEMP[12], TEMP[12], SAMP[9], 2D
958: FSEQ TEMP[13].x, TEMP[4].zzzz, IMM[1].zzzz
959: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
960: MOV TEMP[14].xy, TEMP[10].xyyy
961: MOV TEMP[14].w, TEMP[9].xxxx
962: TXL TEMP[14], TEMP[14], SAMP[7], 2D
963: FSEQ TEMP[15].x, TEMP[4].zzzz, IMM[3].xxxx
964: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
965: MOV TEMP[16].xy, TEMP[10].xyyy
966: MOV TEMP[16].w, TEMP[9].xxxx
967: TXL TEMP[16], TEMP[16], SAMP[5], 2D
968: FSEQ TEMP[17].x, TEMP[4].zzzz, IMM[2].wwww
969: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
970: MOV TEMP[18].xy, TEMP[10].xyyy
971: MOV TEMP[18].w, TEMP[9].xxxx
972: TXL TEMP[18], TEMP[18], SAMP[3], 2D
973: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[2].zzzz
974: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
975: MOV TEMP[10].xy, TEMP[10].xyyy
976: MOV TEMP[10].w, TEMP[9].xxxx
977: TXL TEMP[10], TEMP[10], SAMP[1], 2D
978: FSEQ TEMP[20].x, TEMP[4].zzzz, IMM[3].yyyy
979: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
980: MUL TEMP[10], TEMP[10], TEMP[20].xxxx
981: MAD TEMP[10], TEMP[18], TEMP[19].xxxx, TEMP[10]
982: MAD TEMP[10], TEMP[16], TEMP[17].xxxx, TEMP[10]
983: MAD TEMP[10], TEMP[14], TEMP[15].xxxx, TEMP[10]
984: MAD TEMP[10].yw, TEMP[12], TEMP[13].xxxx, TEMP[10]
985: MAD TEMP[10].xy, TEMP[10].wyyy, IMM[2].wwww, IMM[3].zzzz
986: DP2 TEMP[12].x, TEMP[10].xyyy, TEMP[10].xyyy
987: MOV_SAT TEMP[36].x, TEMP[12].xxxx
988: MOV TEMP[12].xy, IN[3].xyxx
989: MOV TEMP[13].x, IMM[2].xxxx
990: FSNE TEMP[14].x, CONST[15].xxxx, TEMP[7].xxxx
991: UIF TEMP[14].xxxx :0
992: MOV TEMP[13].x, IMM[2].yyyy
993: RCP TEMP[14].x, CONST[18].xxxx
994: MUL TEMP[12].xy, IN[3].xyyy, TEMP[14].xxxx
995: ELSE :0
996: RCP TEMP[14].x, CONST[17].xxxx
997: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
998: ENDIF
999: FRC TEMP[12].xy, TEMP[12].xyyy
1000: MUL TEMP[14].x, CONST[19].xxxx, IMM[2].wwww
1001: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
1002: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
1003: MUL TEMP[13].x, TEMP[13].xxxx, CONST[19].xxxx
1004: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
1005: MAD TEMP[7].xy, TEMP[12].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
1006: MOV TEMP[12].xy, TEMP[7].xyyy
1007: MOV TEMP[12].w, TEMP[9].xxxx
1008: TXL TEMP[12], TEMP[12], SAMP[9], 2D
1009: FSEQ TEMP[13].x, TEMP[4].zzzz, IMM[1].zzzz
1010: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
1011: MOV TEMP[14].xy, TEMP[7].xyyy
1012: MOV TEMP[14].w, TEMP[9].xxxx
1013: TXL TEMP[14], TEMP[14], SAMP[7], 2D
1014: FSEQ TEMP[15].x, TEMP[4].zzzz, IMM[3].xxxx
1015: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
1016: MOV TEMP[16].xy, TEMP[7].xyyy
1017: MOV TEMP[16].w, TEMP[9].xxxx
1018: TXL TEMP[16], TEMP[16], SAMP[5], 2D
1019: FSEQ TEMP[17].x, TEMP[4].zzzz, IMM[2].wwww
1020: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
1021: MOV TEMP[18].xy, TEMP[7].xyyy
1022: MOV TEMP[18].w, TEMP[9].xxxx
1023: TXL TEMP[18], TEMP[18], SAMP[3], 2D
1024: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[2].zzzz
1025: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
1026: MOV TEMP[7].xy, TEMP[7].xyyy
1027: MOV TEMP[7].w, TEMP[9].xxxx
1028: TXL TEMP[7], TEMP[7], SAMP[1], 2D
1029: FSEQ TEMP[4].x, TEMP[4].zzzz, IMM[3].yyyy
1030: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].zzzz
1031: MUL TEMP[4], TEMP[7], TEMP[4].xxxx
1032: MAD TEMP[4], TEMP[18], TEMP[19].xxxx, TEMP[4]
1033: MAD TEMP[4], TEMP[16], TEMP[17].xxxx, TEMP[4]
1034: MAD TEMP[4], TEMP[14], TEMP[15].xxxx, TEMP[4]
1035: MAD TEMP[4].yw, TEMP[12], TEMP[13].xxxx, TEMP[4]
1036: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz
1037: DP2 TEMP[7].x, TEMP[4].xyyy, TEMP[4].xyyy
1038: MOV_SAT TEMP[37].x, TEMP[7].xxxx
1039: MOV TEMP[7].x, IMM[3].yyyy
1040: MOV TEMP[7].y, TEMP[8].xxxx
1041: MOV TEMP[7].z, TEMP[8].yyyy
1042: MOV TEMP[8].y, IMM[3].yyyy
1043: MOV TEMP[8].x, TEMP[10].yyyy
1044: MOV TEMP[8].z, TEMP[10].xxxx
1045: MOV TEMP[9].z, IMM[3].yyyy
1046: MOV TEMP[9].xy, TEMP[4].xyxx
1047: MOV TEMP[4].w, IMM[2].zzzz
1048: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[3].xxxx
1049: MAD TEMP[7].xyz, TEMP[8].xyzz, TEMP[3].yyyy, TEMP[7].xyzz
1050: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[3].zzzz, TEMP[7].xyzz
1051: MUL TEMP[5].xyz, IN[1].xxxx, TEMP[5].xyzz
1052: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[6].xyzz, TEMP[5].xyzz
1053: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[3].xyzz, TEMP[5].xyzz
1054: DP4 TEMP[3].x, TEMP[4], TEMP[4]
1055: RSQ TEMP[3].x, TEMP[3].xxxx
1056: MUL TEMP[3].xyz, TEMP[4], TEMP[3].xxxx
1057: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[0].wwww
1058: ADD TEMP[3].xyz, IN[2].yzww, -TEMP[3].xyzz
1059: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
1060: RSQ TEMP[4].x, TEMP[4].xxxx
1061: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
1062: MOV TEMP[4].w, IMM[3].yyyy
1063: MUL TEMP[4].xyz, TEMP[11].xyzz, TEMP[0].xyzz
1064: ADD TEMP[0].xyz, TEMP[1].xyzz, TEMP[2].xyzz
1065: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz
1066: RSQ TEMP[2].x, TEMP[2].xxxx
1067: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx
1068: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
1069: MAX TEMP[0].x, IMM[3].wwww, TEMP[0].xxxx
1070: MUL TEMP[2].x, IMM[4].xxxx, IN[1].wwww
1071: POW TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
1072: MOV_SAT TEMP[0].x, TEMP[0].xxxx
1073: MOV TEMP[2].w, IMM[3].yyyy
1074: MOV TEMP[2].xyz, CONST[22].xyzx
1075: MOV TEMP[5].w, IMM[2].zzzz
1076: MUL TEMP[6].x, IMM[2].wwww, TEMP[0].xxxx
1077: ADD TEMP[6].x, IMM[3].xxxx, -TEMP[6].xxxx
1078: MUL TEMP[6].x, TEMP[0].xxxx, TEMP[6].xxxx
1079: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[6].xxxx
1080: MUL TEMP[0].x, TEMP[0].xxxx, IN[1].wwww
1081: MUL TEMP[6].xyz, TEMP[11].xyzz, CONST[4].xyzz
1082: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
1083: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1084: MUL TEMP[3], CONST[23], IMM[2].wwww
1085: MAX TEMP[2], TEMP[3], TEMP[2]
1086: MIN TEMP[2].xyz, TEMP[2], IMM[4].yyyz
1087: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[11].xyzz
1088: MAD TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].xxxx, TEMP[2].xyzz
1089: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[0].xxxx, TEMP[1].xyzz
1090: MUL TEMP[5].xyz, TEMP[0].xyzz, IMM[0].wwww
1091: ADD TEMP[0].xyz, TEMP[4], TEMP[5]
1092: MAD TEMP[1].x, IN[2].xxxx, CONST[3].zzzz, CONST[3].wwww
1093: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1094: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz
1095: MOV TEMP[4].w, IMM[2].zzzz
1096: MOV OUT[0], TEMP[4]
1097: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376)
%51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0
%53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0
%55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0
%57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0
%59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0
%61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0
%63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%64 = load <8 x i32>, <8 x i32> addrspace(2)* %63, align 32, !tbaa !0
%65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0
%67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0
%69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0
%71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0
%73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0
%75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6
%76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0
%77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6
%78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0
%79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7
%80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0
%81 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7
%82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0
%83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8
%84 = load <8 x i32>, <8 x i32> addrspace(2)* %83, align 32, !tbaa !0
%85 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8
%86 = load <4 x i32>, <4 x i32> addrspace(2)* %85, align 16, !tbaa !0
%87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9
%88 = load <8 x i32>, <8 x i32> addrspace(2)* %87, align 32, !tbaa !0
%89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9
%90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0
%91 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%92 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%93 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%94 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%95 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%96 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%97 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%98 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%99 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%100 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%101 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%102 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%103 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%104 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%105 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%106 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%107 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%108 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%109 = fmul float %27, %27
%110 = fmul float %28, %28
%111 = fadd float %110, %109
%112 = fmul float %29, %29
%113 = fadd float %111, %112
%114 = call float @llvm.AMDGPU.rsq.clamped.f32(float %113)
%115 = fmul float %27, %114
%116 = fmul float %28, %114
%117 = fmul float %29, %114
%118 = fsub float %24, %103
%119 = fsub float %25, %104
%120 = fsub float %26, %105
%121 = fmul float %118, %118
%122 = fmul float %119, %119
%123 = fadd float %122, %121
%124 = fmul float %120, %120
%125 = fadd float %123, %124
%126 = call float @llvm.AMDGPU.rsq.clamped.f32(float %125)
%127 = fmul float %118, %126
%128 = fmul float %119, %126
%129 = fmul float %120, %126
%130 = call float @llvm.fabs.f32(float %100)
%131 = call float @llvm.fabs.f32(float %101)
%132 = call float @llvm.fabs.f32(float %102)
%133 = fmul float %130, %130
%134 = fmul float %131, %131
%135 = fadd float %134, %133
%136 = fmul float %132, %132
%137 = fadd float %135, %136
%138 = call float @llvm.AMDGPU.rsq.clamped.f32(float %137)
%139 = fmul float %130, %138
%140 = fadd float %139, 0xBFC99999A0000000
%141 = fmul float %131, %138
%142 = fadd float %141, 0xBFC99999A0000000
%143 = fmul float %132, %138
%144 = fadd float %143, 0xBFC99999A0000000
%145 = fmul float %140, 7.000000e+00
%146 = fmul float %142, 7.000000e+00
%147 = fmul float %144, 7.000000e+00
%148 = call float @llvm.maxnum.f32(float %145, float 0x3F847AE140000000)
%149 = call float @llvm.maxnum.f32(float %146, float 0x3F847AE140000000)
%150 = call float @llvm.maxnum.f32(float %147, float 0x3F847AE140000000)
%151 = fadd float %148, %149
%152 = fadd float %151, %150
%153 = fdiv float 1.000000e+00, %152
%154 = fmul float %148, %153
%155 = fmul float %149, %153
%156 = fmul float %150, %153
%157 = fadd float %91, 5.000000e-01
%158 = fadd float %92, 5.000000e-01
%159 = fadd float %93, 5.000000e-01
%160 = call float @llvm.floor.f32(float %157)
%161 = call float @llvm.floor.f32(float %158)
%162 = call float @llvm.floor.f32(float %159)
%163 = fmul float %160, %38
%164 = call float @llvm.floor.f32(float %163)
%165 = fmul float %164, %38
%166 = fcmp ult float %160, 6.400000e+01
br i1 %166, label %ELSE, label %IF
IF: ; preds = %main_body
%167 = fadd float %160, -6.400000e+01
%168 = fmul float %167, %39
%169 = call float @llvm.floor.f32(float %168)
%170 = fmul float %169, %39
%171 = call float @llvm.floor.f32(float %168)
%172 = fsub float %168, %171
%173 = call float @llvm.floor.f32(float %170)
%174 = fsub float %170, %173
%175 = call float @llvm.floor.f32(float %170)
%176 = fadd float %175, 4.000000e+00
br label %ENDIF
ELSE: ; preds = %main_body
%177 = call float @llvm.floor.f32(float %163)
%178 = fsub float %163, %177
%179 = call float @llvm.floor.f32(float %165)
%180 = fsub float %165, %179
%181 = call float @llvm.floor.f32(float %165)
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp32.0 = phi float [ %172, %IF ], [ %178, %ELSE ]
%temp33.0 = phi float [ %174, %IF ], [ %180, %ELSE ]
%temp34.0 = phi float [ %176, %IF ], [ %181, %ELSE ]
%temp20.0 = phi float [ %39, %IF ], [ %38, %ELSE ]
%182 = fmul float %161, %38
%183 = call float @llvm.floor.f32(float %182)
%184 = fmul float %183, %38
%185 = fcmp ult float %161, 6.400000e+01
br i1 %185, label %ELSE154, label %IF153
IF153: ; preds = %ENDIF
%186 = fadd float %161, -6.400000e+01
%187 = fmul float %186, %39
%188 = call float @llvm.floor.f32(float %187)
%189 = fmul float %188, %39
%190 = call float @llvm.floor.f32(float %187)
%191 = fsub float %187, %190
%192 = call float @llvm.floor.f32(float %189)
%193 = fsub float %189, %192
%194 = call float @llvm.floor.f32(float %189)
%195 = fadd float %194, 4.000000e+00
br label %ENDIF152
ELSE154: ; preds = %ENDIF
%196 = call float @llvm.floor.f32(float %182)
%197 = fsub float %182, %196
%198 = call float @llvm.floor.f32(float %184)
%199 = fsub float %184, %198
%200 = call float @llvm.floor.f32(float %184)
br label %ENDIF152
ENDIF152: ; preds = %ELSE154, %IF153
%temp40.0 = phi float [ %191, %IF153 ], [ %197, %ELSE154 ]
%temp41.0 = phi float [ %193, %IF153 ], [ %199, %ELSE154 ]
%temp42.0 = phi float [ %195, %IF153 ], [ %200, %ELSE154 ]
%temp24.0 = phi float [ %39, %IF153 ], [ %38, %ELSE154 ]
%201 = fmul float %162, %38
%202 = call float @llvm.floor.f32(float %201)
%203 = fmul float %202, %38
%204 = fcmp ult float %162, 6.400000e+01
br i1 %204, label %ELSE157, label %IF156
IF156: ; preds = %ENDIF152
%205 = fadd float %162, -6.400000e+01
%206 = fmul float %205, %39
%207 = call float @llvm.floor.f32(float %206)
%208 = fmul float %207, %39
%209 = call float @llvm.floor.f32(float %206)
%210 = fsub float %206, %209
%211 = call float @llvm.floor.f32(float %208)
%212 = fsub float %208, %211
%213 = call float @llvm.floor.f32(float %208)
%214 = fadd float %213, 4.000000e+00
br label %ENDIF155
ELSE157: ; preds = %ENDIF152
%215 = call float @llvm.floor.f32(float %201)
%216 = fsub float %201, %215
%217 = call float @llvm.floor.f32(float %203)
%218 = fsub float %203, %217
%219 = call float @llvm.floor.f32(float %203)
br label %ENDIF155
ENDIF155: ; preds = %ELSE157, %IF156
%temp28.0 = phi float [ %39, %IF156 ], [ %38, %ELSE157 ]
%temp18.0 = phi float [ %214, %IF156 ], [ %219, %ELSE157 ]
%temp17.0 = phi float [ %212, %IF156 ], [ %218, %ELSE157 ]
%temp16.0 = phi float [ %210, %IF156 ], [ %216, %ELSE157 ]
%220 = fsub float %103, %24
%221 = fsub float %104, %25
%222 = fsub float %105, %26
%223 = fmul float %220, %220
%224 = fmul float %221, %221
%225 = fadd float %224, %223
%226 = fmul float %222, %222
%227 = fadd float %225, %226
%228 = fmul float %44, %227
%229 = call float @llvm.log2.f32(float %228)
%230 = fmul float %229, 0x3FE62E4300000000
%231 = fmul float %230, %43
%232 = fcmp une float %38, %temp20.0
%.sink212 = select i1 %232, float %41, float %40
%temp48.0 = select i1 %232, float 1.953125e-03, float 3.906250e-03
%233 = fdiv float 1.000000e+00, %.sink212
%234 = fmul float %103, %233
%235 = fmul float %104, %233
%236 = call float @llvm.floor.f32(float %234)
%237 = fsub float %234, %236
%238 = call float @llvm.floor.f32(float %235)
%239 = fsub float %235, %238
%240 = fmul float %42, 2.000000e+00
%241 = fmul float %240, %temp48.0
%242 = fsub float 1.000000e+00, %241
%243 = fmul float %temp48.0, %42
%244 = fmul float %237, %242
%245 = fadd float %244, %243
%246 = fmul float %239, %242
%247 = fadd float %246, %243
%248 = fmul float %245, %temp20.0
%249 = fadd float %248, %temp32.0
%250 = fmul float %247, %temp20.0
%251 = fadd float %250, %temp33.0
%252 = bitcast float %249 to i32
%253 = bitcast float %251 to i32
%254 = bitcast float %231 to i32
%255 = insertelement <4 x i32> undef, i32 %252, i32 0
%256 = insertelement <4 x i32> %255, i32 %253, i32 1
%257 = insertelement <4 x i32> %256, i32 %254, i32 2
%258 = bitcast <8 x i32> %84 to <32 x i8>
%259 = bitcast <4 x i32> %86 to <16 x i8>
%260 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %257, <32 x i8> %258, <16 x i8> %259, i32 2)
%261 = extractelement <4 x float> %260, i32 0
%262 = extractelement <4 x float> %260, i32 1
%263 = extractelement <4 x float> %260, i32 2
%264 = fcmp oeq float %temp34.0, 4.000000e+00
%265 = select i1 %264, float 1.000000e+00, float 0.000000e+00
%266 = bitcast float %249 to i32
%267 = bitcast float %251 to i32
%268 = bitcast float %231 to i32
%269 = insertelement <4 x i32> undef, i32 %266, i32 0
%270 = insertelement <4 x i32> %269, i32 %267, i32 1
%271 = insertelement <4 x i32> %270, i32 %268, i32 2
%272 = bitcast <8 x i32> %76 to <32 x i8>
%273 = bitcast <4 x i32> %78 to <16 x i8>
%274 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %271, <32 x i8> %272, <16 x i8> %273, i32 2)
%275 = extractelement <4 x float> %274, i32 0
%276 = extractelement <4 x float> %274, i32 1
%277 = extractelement <4 x float> %274, i32 2
%278 = fcmp oeq float %temp34.0, 3.000000e+00
%279 = select i1 %278, float 1.000000e+00, float 0.000000e+00
%280 = bitcast float %249 to i32
%281 = bitcast float %251 to i32
%282 = bitcast float %231 to i32
%283 = insertelement <4 x i32> undef, i32 %280, i32 0
%284 = insertelement <4 x i32> %283, i32 %281, i32 1
%285 = insertelement <4 x i32> %284, i32 %282, i32 2
%286 = bitcast <8 x i32> %68 to <32 x i8>
%287 = bitcast <4 x i32> %70 to <16 x i8>
%288 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %285, <32 x i8> %286, <16 x i8> %287, i32 2)
%289 = extractelement <4 x float> %288, i32 0
%290 = extractelement <4 x float> %288, i32 1
%291 = extractelement <4 x float> %288, i32 2
%292 = fcmp oeq float %temp34.0, 2.000000e+00
%293 = select i1 %292, float 1.000000e+00, float 0.000000e+00
%294 = bitcast float %249 to i32
%295 = bitcast float %251 to i32
%296 = bitcast float %231 to i32
%297 = insertelement <4 x i32> undef, i32 %294, i32 0
%298 = insertelement <4 x i32> %297, i32 %295, i32 1
%299 = insertelement <4 x i32> %298, i32 %296, i32 2
%300 = bitcast <8 x i32> %60 to <32 x i8>
%301 = bitcast <4 x i32> %62 to <16 x i8>
%302 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %299, <32 x i8> %300, <16 x i8> %301, i32 2)
%303 = extractelement <4 x float> %302, i32 0
%304 = extractelement <4 x float> %302, i32 1
%305 = extractelement <4 x float> %302, i32 2
%306 = fcmp oeq float %temp34.0, 1.000000e+00
%307 = select i1 %306, float 1.000000e+00, float 0.000000e+00
%308 = bitcast float %249 to i32
%309 = bitcast float %251 to i32
%310 = bitcast float %231 to i32
%311 = insertelement <4 x i32> undef, i32 %308, i32 0
%312 = insertelement <4 x i32> %311, i32 %309, i32 1
%313 = insertelement <4 x i32> %312, i32 %310, i32 2
%314 = bitcast <8 x i32> %52 to <32 x i8>
%315 = bitcast <4 x i32> %54 to <16 x i8>
%316 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %313, <32 x i8> %314, <16 x i8> %315, i32 2)
%317 = extractelement <4 x float> %316, i32 0
%318 = extractelement <4 x float> %316, i32 1
%319 = extractelement <4 x float> %316, i32 2
%320 = fcmp oeq float %temp34.0, 0.000000e+00
%321 = select i1 %320, float 1.000000e+00, float 0.000000e+00
%322 = fmul float %317, %321
%323 = fmul float %318, %321
%324 = fmul float %319, %321
%325 = fmul float %303, %307
%326 = fadd float %325, %322
%327 = fmul float %304, %307
%328 = fadd float %327, %323
%329 = fmul float %305, %307
%330 = fadd float %329, %324
%331 = fmul float %289, %293
%332 = fadd float %331, %326
%333 = fmul float %290, %293
%334 = fadd float %333, %328
%335 = fmul float %291, %293
%336 = fadd float %335, %330
%337 = fmul float %275, %279
%338 = fadd float %337, %332
%339 = fmul float %276, %279
%340 = fadd float %339, %334
%341 = fmul float %277, %279
%342 = fadd float %341, %336
%343 = fmul float %261, %265
%344 = fadd float %343, %338
%345 = fmul float %262, %265
%346 = fadd float %345, %340
%347 = fmul float %263, %265
%348 = fadd float %347, %342
%349 = fcmp une float %38, %temp20.0
%.sink213 = select i1 %349, float %41, float %40
%temp52.0 = select i1 %349, float 1.953125e-03, float 3.906250e-03
%350 = fdiv float 1.000000e+00, %.sink213
%351 = fmul float %105, %350
%352 = fmul float %104, %350
%353 = call float @llvm.floor.f32(float %351)
%354 = fsub float %351, %353
%355 = call float @llvm.floor.f32(float %352)
%356 = fsub float %352, %355
%357 = fmul float %42, 2.000000e+00
%358 = fmul float %357, %temp52.0
%359 = fsub float 1.000000e+00, %358
%360 = fmul float %temp52.0, %42
%361 = fmul float %354, %359
%362 = fadd float %361, %360
%363 = fmul float %356, %359
%364 = fadd float %363, %360
%365 = fmul float %362, %temp20.0
%366 = fadd float %365, %temp32.0
%367 = fmul float %364, %temp20.0
%368 = fadd float %367, %temp33.0
%369 = bitcast float %366 to i32
%370 = bitcast float %368 to i32
%371 = bitcast float %231 to i32
%372 = insertelement <4 x i32> undef, i32 %369, i32 0
%373 = insertelement <4 x i32> %372, i32 %370, i32 1
%374 = insertelement <4 x i32> %373, i32 %371, i32 2
%375 = bitcast <8 x i32> %84 to <32 x i8>
%376 = bitcast <4 x i32> %86 to <16 x i8>
%377 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %374, <32 x i8> %375, <16 x i8> %376, i32 2)
%378 = extractelement <4 x float> %377, i32 0
%379 = extractelement <4 x float> %377, i32 1
%380 = extractelement <4 x float> %377, i32 2
%381 = fcmp oeq float %temp34.0, 4.000000e+00
%382 = select i1 %381, float 1.000000e+00, float 0.000000e+00
%383 = bitcast float %366 to i32
%384 = bitcast float %368 to i32
%385 = bitcast float %231 to i32
%386 = insertelement <4 x i32> undef, i32 %383, i32 0
%387 = insertelement <4 x i32> %386, i32 %384, i32 1
%388 = insertelement <4 x i32> %387, i32 %385, i32 2
%389 = bitcast <8 x i32> %76 to <32 x i8>
%390 = bitcast <4 x i32> %78 to <16 x i8>
%391 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %388, <32 x i8> %389, <16 x i8> %390, i32 2)
%392 = extractelement <4 x float> %391, i32 0
%393 = extractelement <4 x float> %391, i32 1
%394 = extractelement <4 x float> %391, i32 2
%395 = fcmp oeq float %temp34.0, 3.000000e+00
%396 = select i1 %395, float 1.000000e+00, float 0.000000e+00
%397 = bitcast float %366 to i32
%398 = bitcast float %368 to i32
%399 = bitcast float %231 to i32
%400 = insertelement <4 x i32> undef, i32 %397, i32 0
%401 = insertelement <4 x i32> %400, i32 %398, i32 1
%402 = insertelement <4 x i32> %401, i32 %399, i32 2
%403 = bitcast <8 x i32> %68 to <32 x i8>
%404 = bitcast <4 x i32> %70 to <16 x i8>
%405 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %402, <32 x i8> %403, <16 x i8> %404, i32 2)
%406 = extractelement <4 x float> %405, i32 0
%407 = extractelement <4 x float> %405, i32 1
%408 = extractelement <4 x float> %405, i32 2
%409 = fcmp oeq float %temp34.0, 2.000000e+00
%410 = select i1 %409, float 1.000000e+00, float 0.000000e+00
%411 = bitcast float %366 to i32
%412 = bitcast float %368 to i32
%413 = bitcast float %231 to i32
%414 = insertelement <4 x i32> undef, i32 %411, i32 0
%415 = insertelement <4 x i32> %414, i32 %412, i32 1
%416 = insertelement <4 x i32> %415, i32 %413, i32 2
%417 = bitcast <8 x i32> %60 to <32 x i8>
%418 = bitcast <4 x i32> %62 to <16 x i8>
%419 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %416, <32 x i8> %417, <16 x i8> %418, i32 2)
%420 = extractelement <4 x float> %419, i32 0
%421 = extractelement <4 x float> %419, i32 1
%422 = extractelement <4 x float> %419, i32 2
%423 = fcmp oeq float %temp34.0, 1.000000e+00
%424 = select i1 %423, float 1.000000e+00, float 0.000000e+00
%425 = bitcast float %366 to i32
%426 = bitcast float %368 to i32
%427 = bitcast float %231 to i32
%428 = insertelement <4 x i32> undef, i32 %425, i32 0
%429 = insertelement <4 x i32> %428, i32 %426, i32 1
%430 = insertelement <4 x i32> %429, i32 %427, i32 2
%431 = bitcast <8 x i32> %52 to <32 x i8>
%432 = bitcast <4 x i32> %54 to <16 x i8>
%433 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %430, <32 x i8> %431, <16 x i8> %432, i32 2)
%434 = extractelement <4 x float> %433, i32 0
%435 = extractelement <4 x float> %433, i32 1
%436 = extractelement <4 x float> %433, i32 2
%437 = fcmp oeq float %temp34.0, 0.000000e+00
%438 = select i1 %437, float 1.000000e+00, float 0.000000e+00
%439 = fmul float %434, %438
%440 = fmul float %435, %438
%441 = fmul float %436, %438
%442 = fmul float %420, %424
%443 = fadd float %442, %439
%444 = fmul float %421, %424
%445 = fadd float %444, %440
%446 = fmul float %422, %424
%447 = fadd float %446, %441
%448 = fmul float %406, %410
%449 = fadd float %448, %443
%450 = fmul float %407, %410
%451 = fadd float %450, %445
%452 = fmul float %408, %410
%453 = fadd float %452, %447
%454 = fmul float %392, %396
%455 = fadd float %454, %449
%456 = fmul float %393, %396
%457 = fadd float %456, %451
%458 = fmul float %394, %396
%459 = fadd float %458, %453
%460 = fmul float %378, %382
%461 = fadd float %460, %455
%462 = fmul float %379, %382
%463 = fadd float %462, %457
%464 = fmul float %380, %382
%465 = fadd float %464, %459
%466 = fcmp une float %38, %temp20.0
%.sink214 = select i1 %466, float %41, float %40
%temp56.0 = select i1 %466, float 1.953125e-03, float 3.906250e-03
%467 = fdiv float 1.000000e+00, %.sink214
%468 = fmul float %105, %467
%469 = fmul float %103, %467
%470 = call float @llvm.floor.f32(float %468)
%471 = fsub float %468, %470
%472 = call float @llvm.floor.f32(float %469)
%473 = fsub float %469, %472
%474 = fmul float %42, 2.000000e+00
%475 = fmul float %474, %temp56.0
%476 = fsub float 1.000000e+00, %475
%477 = fmul float %temp56.0, %42
%478 = fmul float %471, %476
%479 = fadd float %478, %477
%480 = fmul float %473, %476
%481 = fadd float %480, %477
%482 = fmul float %479, %temp20.0
%483 = fadd float %482, %temp32.0
%484 = fmul float %481, %temp20.0
%485 = fadd float %484, %temp33.0
%486 = bitcast float %483 to i32
%487 = bitcast float %485 to i32
%488 = bitcast float %231 to i32
%489 = insertelement <4 x i32> undef, i32 %486, i32 0
%490 = insertelement <4 x i32> %489, i32 %487, i32 1
%491 = insertelement <4 x i32> %490, i32 %488, i32 2
%492 = bitcast <8 x i32> %84 to <32 x i8>
%493 = bitcast <4 x i32> %86 to <16 x i8>
%494 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %491, <32 x i8> %492, <16 x i8> %493, i32 2)
%495 = extractelement <4 x float> %494, i32 0
%496 = extractelement <4 x float> %494, i32 1
%497 = extractelement <4 x float> %494, i32 2
%498 = fcmp oeq float %temp34.0, 4.000000e+00
%499 = select i1 %498, float 1.000000e+00, float 0.000000e+00
%500 = bitcast float %483 to i32
%501 = bitcast float %485 to i32
%502 = bitcast float %231 to i32
%503 = insertelement <4 x i32> undef, i32 %500, i32 0
%504 = insertelement <4 x i32> %503, i32 %501, i32 1
%505 = insertelement <4 x i32> %504, i32 %502, i32 2
%506 = bitcast <8 x i32> %76 to <32 x i8>
%507 = bitcast <4 x i32> %78 to <16 x i8>
%508 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %505, <32 x i8> %506, <16 x i8> %507, i32 2)
%509 = extractelement <4 x float> %508, i32 0
%510 = extractelement <4 x float> %508, i32 1
%511 = extractelement <4 x float> %508, i32 2
%512 = fcmp oeq float %temp34.0, 3.000000e+00
%513 = select i1 %512, float 1.000000e+00, float 0.000000e+00
%514 = bitcast float %483 to i32
%515 = bitcast float %485 to i32
%516 = bitcast float %231 to i32
%517 = insertelement <4 x i32> undef, i32 %514, i32 0
%518 = insertelement <4 x i32> %517, i32 %515, i32 1
%519 = insertelement <4 x i32> %518, i32 %516, i32 2
%520 = bitcast <8 x i32> %68 to <32 x i8>
%521 = bitcast <4 x i32> %70 to <16 x i8>
%522 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %519, <32 x i8> %520, <16 x i8> %521, i32 2)
%523 = extractelement <4 x float> %522, i32 0
%524 = extractelement <4 x float> %522, i32 1
%525 = extractelement <4 x float> %522, i32 2
%526 = fcmp oeq float %temp34.0, 2.000000e+00
%527 = select i1 %526, float 1.000000e+00, float 0.000000e+00
%528 = bitcast float %483 to i32
%529 = bitcast float %485 to i32
%530 = bitcast float %231 to i32
%531 = insertelement <4 x i32> undef, i32 %528, i32 0
%532 = insertelement <4 x i32> %531, i32 %529, i32 1
%533 = insertelement <4 x i32> %532, i32 %530, i32 2
%534 = bitcast <8 x i32> %60 to <32 x i8>
%535 = bitcast <4 x i32> %62 to <16 x i8>
%536 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %533, <32 x i8> %534, <16 x i8> %535, i32 2)
%537 = extractelement <4 x float> %536, i32 0
%538 = extractelement <4 x float> %536, i32 1
%539 = extractelement <4 x float> %536, i32 2
%540 = fcmp oeq float %temp34.0, 1.000000e+00
%541 = select i1 %540, float 1.000000e+00, float 0.000000e+00
%542 = bitcast float %483 to i32
%543 = bitcast float %485 to i32
%544 = bitcast float %231 to i32
%545 = insertelement <4 x i32> undef, i32 %542, i32 0
%546 = insertelement <4 x i32> %545, i32 %543, i32 1
%547 = insertelement <4 x i32> %546, i32 %544, i32 2
%548 = bitcast <8 x i32> %52 to <32 x i8>
%549 = bitcast <4 x i32> %54 to <16 x i8>
%550 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %547, <32 x i8> %548, <16 x i8> %549, i32 2)
%551 = extractelement <4 x float> %550, i32 0
%552 = extractelement <4 x float> %550, i32 1
%553 = extractelement <4 x float> %550, i32 2
%554 = fcmp oeq float %temp34.0, 0.000000e+00
%555 = select i1 %554, float 1.000000e+00, float 0.000000e+00
%556 = fmul float %551, %555
%557 = fmul float %552, %555
%558 = fmul float %553, %555
%559 = fmul float %537, %541
%560 = fadd float %559, %556
%561 = fmul float %538, %541
%562 = fadd float %561, %557
%563 = fmul float %539, %541
%564 = fadd float %563, %558
%565 = fmul float %523, %527
%566 = fadd float %565, %560
%567 = fmul float %524, %527
%568 = fadd float %567, %562
%569 = fmul float %525, %527
%570 = fadd float %569, %564
%571 = fmul float %509, %513
%572 = fadd float %571, %566
%573 = fmul float %510, %513
%574 = fadd float %573, %568
%575 = fmul float %511, %513
%576 = fadd float %575, %570
%577 = fmul float %495, %499
%578 = fadd float %577, %572
%579 = fmul float %496, %499
%580 = fadd float %579, %574
%581 = fmul float %497, %499
%582 = fadd float %581, %576
%583 = fcmp une float %38, %temp24.0
%.sink215 = select i1 %583, float %41, float %40
%temp60.0 = select i1 %583, float 1.953125e-03, float 3.906250e-03
%584 = fdiv float 1.000000e+00, %.sink215
%585 = fmul float %103, %584
%586 = fmul float %104, %584
%587 = call float @llvm.floor.f32(float %585)
%588 = fsub float %585, %587
%589 = call float @llvm.floor.f32(float %586)
%590 = fsub float %586, %589
%591 = fmul float %42, 2.000000e+00
%592 = fmul float %591, %temp60.0
%593 = fsub float 1.000000e+00, %592
%594 = fmul float %temp60.0, %42
%595 = fmul float %588, %593
%596 = fadd float %595, %594
%597 = fmul float %590, %593
%598 = fadd float %597, %594
%599 = fmul float %596, %temp24.0
%600 = fadd float %599, %temp40.0
%601 = fmul float %598, %temp24.0
%602 = fadd float %601, %temp41.0
%603 = bitcast float %600 to i32
%604 = bitcast float %602 to i32
%605 = bitcast float %231 to i32
%606 = insertelement <4 x i32> undef, i32 %603, i32 0
%607 = insertelement <4 x i32> %606, i32 %604, i32 1
%608 = insertelement <4 x i32> %607, i32 %605, i32 2
%609 = bitcast <8 x i32> %84 to <32 x i8>
%610 = bitcast <4 x i32> %86 to <16 x i8>
%611 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %608, <32 x i8> %609, <16 x i8> %610, i32 2)
%612 = extractelement <4 x float> %611, i32 0
%613 = extractelement <4 x float> %611, i32 1
%614 = extractelement <4 x float> %611, i32 2
%615 = fcmp oeq float %temp42.0, 4.000000e+00
%616 = select i1 %615, float 1.000000e+00, float 0.000000e+00
%617 = bitcast float %600 to i32
%618 = bitcast float %602 to i32
%619 = bitcast float %231 to i32
%620 = insertelement <4 x i32> undef, i32 %617, i32 0
%621 = insertelement <4 x i32> %620, i32 %618, i32 1
%622 = insertelement <4 x i32> %621, i32 %619, i32 2
%623 = bitcast <8 x i32> %76 to <32 x i8>
%624 = bitcast <4 x i32> %78 to <16 x i8>
%625 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %622, <32 x i8> %623, <16 x i8> %624, i32 2)
%626 = extractelement <4 x float> %625, i32 0
%627 = extractelement <4 x float> %625, i32 1
%628 = extractelement <4 x float> %625, i32 2
%629 = fcmp oeq float %temp42.0, 3.000000e+00
%630 = select i1 %629, float 1.000000e+00, float 0.000000e+00
%631 = bitcast float %600 to i32
%632 = bitcast float %602 to i32
%633 = bitcast float %231 to i32
%634 = insertelement <4 x i32> undef, i32 %631, i32 0
%635 = insertelement <4 x i32> %634, i32 %632, i32 1
%636 = insertelement <4 x i32> %635, i32 %633, i32 2
%637 = bitcast <8 x i32> %68 to <32 x i8>
%638 = bitcast <4 x i32> %70 to <16 x i8>
%639 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %636, <32 x i8> %637, <16 x i8> %638, i32 2)
%640 = extractelement <4 x float> %639, i32 0
%641 = extractelement <4 x float> %639, i32 1
%642 = extractelement <4 x float> %639, i32 2
%643 = fcmp oeq float %temp42.0, 2.000000e+00
%644 = select i1 %643, float 1.000000e+00, float 0.000000e+00
%645 = bitcast float %600 to i32
%646 = bitcast float %602 to i32
%647 = bitcast float %231 to i32
%648 = insertelement <4 x i32> undef, i32 %645, i32 0
%649 = insertelement <4 x i32> %648, i32 %646, i32 1
%650 = insertelement <4 x i32> %649, i32 %647, i32 2
%651 = bitcast <8 x i32> %60 to <32 x i8>
%652 = bitcast <4 x i32> %62 to <16 x i8>
%653 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %650, <32 x i8> %651, <16 x i8> %652, i32 2)
%654 = extractelement <4 x float> %653, i32 0
%655 = extractelement <4 x float> %653, i32 1
%656 = extractelement <4 x float> %653, i32 2
%657 = fcmp oeq float %temp42.0, 1.000000e+00
%658 = select i1 %657, float 1.000000e+00, float 0.000000e+00
%659 = bitcast float %600 to i32
%660 = bitcast float %602 to i32
%661 = bitcast float %231 to i32
%662 = insertelement <4 x i32> undef, i32 %659, i32 0
%663 = insertelement <4 x i32> %662, i32 %660, i32 1
%664 = insertelement <4 x i32> %663, i32 %661, i32 2
%665 = bitcast <8 x i32> %52 to <32 x i8>
%666 = bitcast <4 x i32> %54 to <16 x i8>
%667 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %664, <32 x i8> %665, <16 x i8> %666, i32 2)
%668 = extractelement <4 x float> %667, i32 0
%669 = extractelement <4 x float> %667, i32 1
%670 = extractelement <4 x float> %667, i32 2
%671 = fcmp oeq float %temp42.0, 0.000000e+00
%672 = select i1 %671, float 1.000000e+00, float 0.000000e+00
%673 = fmul float %668, %672
%674 = fmul float %669, %672
%675 = fmul float %670, %672
%676 = fmul float %654, %658
%677 = fadd float %676, %673
%678 = fmul float %655, %658
%679 = fadd float %678, %674
%680 = fmul float %656, %658
%681 = fadd float %680, %675
%682 = fmul float %640, %644
%683 = fadd float %682, %677
%684 = fmul float %641, %644
%685 = fadd float %684, %679
%686 = fmul float %642, %644
%687 = fadd float %686, %681
%688 = fmul float %626, %630
%689 = fadd float %688, %683
%690 = fmul float %627, %630
%691 = fadd float %690, %685
%692 = fmul float %628, %630
%693 = fadd float %692, %687
%694 = fmul float %612, %616
%695 = fadd float %694, %689
%696 = fmul float %613, %616
%697 = fadd float %696, %691
%698 = fmul float %614, %616
%699 = fadd float %698, %693
%700 = fcmp une float %38, %temp24.0
%.sink216 = select i1 %700, float %41, float %40
%temp64.0 = select i1 %700, float 1.953125e-03, float 3.906250e-03
%701 = fdiv float 1.000000e+00, %.sink216
%702 = fmul float %105, %701
%703 = fmul float %104, %701
%704 = call float @llvm.floor.f32(float %702)
%705 = fsub float %702, %704
%706 = call float @llvm.floor.f32(float %703)
%707 = fsub float %703, %706
%708 = fmul float %42, 2.000000e+00
%709 = fmul float %708, %temp64.0
%710 = fsub float 1.000000e+00, %709
%711 = fmul float %temp64.0, %42
%712 = fmul float %705, %710
%713 = fadd float %712, %711
%714 = fmul float %707, %710
%715 = fadd float %714, %711
%716 = fmul float %713, %temp24.0
%717 = fadd float %716, %temp40.0
%718 = fmul float %715, %temp24.0
%719 = fadd float %718, %temp41.0
%720 = bitcast float %717 to i32
%721 = bitcast float %719 to i32
%722 = bitcast float %231 to i32
%723 = insertelement <4 x i32> undef, i32 %720, i32 0
%724 = insertelement <4 x i32> %723, i32 %721, i32 1
%725 = insertelement <4 x i32> %724, i32 %722, i32 2
%726 = bitcast <8 x i32> %84 to <32 x i8>
%727 = bitcast <4 x i32> %86 to <16 x i8>
%728 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %725, <32 x i8> %726, <16 x i8> %727, i32 2)
%729 = extractelement <4 x float> %728, i32 0
%730 = extractelement <4 x float> %728, i32 1
%731 = extractelement <4 x float> %728, i32 2
%732 = fcmp oeq float %temp42.0, 4.000000e+00
%733 = select i1 %732, float 1.000000e+00, float 0.000000e+00
%734 = bitcast float %717 to i32
%735 = bitcast float %719 to i32
%736 = bitcast float %231 to i32
%737 = insertelement <4 x i32> undef, i32 %734, i32 0
%738 = insertelement <4 x i32> %737, i32 %735, i32 1
%739 = insertelement <4 x i32> %738, i32 %736, i32 2
%740 = bitcast <8 x i32> %76 to <32 x i8>
%741 = bitcast <4 x i32> %78 to <16 x i8>
%742 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %739, <32 x i8> %740, <16 x i8> %741, i32 2)
%743 = extractelement <4 x float> %742, i32 0
%744 = extractelement <4 x float> %742, i32 1
%745 = extractelement <4 x float> %742, i32 2
%746 = fcmp oeq float %temp42.0, 3.000000e+00
%747 = select i1 %746, float 1.000000e+00, float 0.000000e+00
%748 = bitcast float %717 to i32
%749 = bitcast float %719 to i32
%750 = bitcast float %231 to i32
%751 = insertelement <4 x i32> undef, i32 %748, i32 0
%752 = insertelement <4 x i32> %751, i32 %749, i32 1
%753 = insertelement <4 x i32> %752, i32 %750, i32 2
%754 = bitcast <8 x i32> %68 to <32 x i8>
%755 = bitcast <4 x i32> %70 to <16 x i8>
%756 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %753, <32 x i8> %754, <16 x i8> %755, i32 2)
%757 = extractelement <4 x float> %756, i32 0
%758 = extractelement <4 x float> %756, i32 1
%759 = extractelement <4 x float> %756, i32 2
%760 = fcmp oeq float %temp42.0, 2.000000e+00
%761 = select i1 %760, float 1.000000e+00, float 0.000000e+00
%762 = bitcast float %717 to i32
%763 = bitcast float %719 to i32
%764 = bitcast float %231 to i32
%765 = insertelement <4 x i32> undef, i32 %762, i32 0
%766 = insertelement <4 x i32> %765, i32 %763, i32 1
%767 = insertelement <4 x i32> %766, i32 %764, i32 2
%768 = bitcast <8 x i32> %60 to <32 x i8>
%769 = bitcast <4 x i32> %62 to <16 x i8>
%770 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %767, <32 x i8> %768, <16 x i8> %769, i32 2)
%771 = extractelement <4 x float> %770, i32 0
%772 = extractelement <4 x float> %770, i32 1
%773 = extractelement <4 x float> %770, i32 2
%774 = fcmp oeq float %temp42.0, 1.000000e+00
%775 = select i1 %774, float 1.000000e+00, float 0.000000e+00
%776 = bitcast float %717 to i32
%777 = bitcast float %719 to i32
%778 = bitcast float %231 to i32
%779 = insertelement <4 x i32> undef, i32 %776, i32 0
%780 = insertelement <4 x i32> %779, i32 %777, i32 1
%781 = insertelement <4 x i32> %780, i32 %778, i32 2
%782 = bitcast <8 x i32> %52 to <32 x i8>
%783 = bitcast <4 x i32> %54 to <16 x i8>
%784 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %781, <32 x i8> %782, <16 x i8> %783, i32 2)
%785 = extractelement <4 x float> %784, i32 0
%786 = extractelement <4 x float> %784, i32 1
%787 = extractelement <4 x float> %784, i32 2
%788 = fcmp oeq float %temp42.0, 0.000000e+00
%789 = select i1 %788, float 1.000000e+00, float 0.000000e+00
%790 = fmul float %785, %789
%791 = fmul float %786, %789
%792 = fmul float %787, %789
%793 = fmul float %771, %775
%794 = fadd float %793, %790
%795 = fmul float %772, %775
%796 = fadd float %795, %791
%797 = fmul float %773, %775
%798 = fadd float %797, %792
%799 = fmul float %757, %761
%800 = fadd float %799, %794
%801 = fmul float %758, %761
%802 = fadd float %801, %796
%803 = fmul float %759, %761
%804 = fadd float %803, %798
%805 = fmul float %743, %747
%806 = fadd float %805, %800
%807 = fmul float %744, %747
%808 = fadd float %807, %802
%809 = fmul float %745, %747
%810 = fadd float %809, %804
%811 = fmul float %729, %733
%812 = fadd float %811, %806
%813 = fmul float %730, %733
%814 = fadd float %813, %808
%815 = fmul float %731, %733
%816 = fadd float %815, %810
%817 = fcmp une float %38, %temp24.0
%.sink217 = select i1 %817, float %41, float %40
%temp68.0 = select i1 %817, float 1.953125e-03, float 3.906250e-03
%818 = fdiv float 1.000000e+00, %.sink217
%819 = fmul float %105, %818
%820 = fmul float %103, %818
%821 = call float @llvm.floor.f32(float %819)
%822 = fsub float %819, %821
%823 = call float @llvm.floor.f32(float %820)
%824 = fsub float %820, %823
%825 = fmul float %42, 2.000000e+00
%826 = fmul float %825, %temp68.0
%827 = fsub float 1.000000e+00, %826
%828 = fmul float %temp68.0, %42
%829 = fmul float %822, %827
%830 = fadd float %829, %828
%831 = fmul float %824, %827
%832 = fadd float %831, %828
%833 = fmul float %830, %temp24.0
%834 = fadd float %833, %temp40.0
%835 = fmul float %832, %temp24.0
%836 = fadd float %835, %temp41.0
%837 = bitcast float %834 to i32
%838 = bitcast float %836 to i32
%839 = bitcast float %231 to i32
%840 = insertelement <4 x i32> undef, i32 %837, i32 0
%841 = insertelement <4 x i32> %840, i32 %838, i32 1
%842 = insertelement <4 x i32> %841, i32 %839, i32 2
%843 = bitcast <8 x i32> %84 to <32 x i8>
%844 = bitcast <4 x i32> %86 to <16 x i8>
%845 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %842, <32 x i8> %843, <16 x i8> %844, i32 2)
%846 = extractelement <4 x float> %845, i32 0
%847 = extractelement <4 x float> %845, i32 1
%848 = extractelement <4 x float> %845, i32 2
%849 = fcmp oeq float %temp42.0, 4.000000e+00
%850 = select i1 %849, float 1.000000e+00, float 0.000000e+00
%851 = bitcast float %834 to i32
%852 = bitcast float %836 to i32
%853 = bitcast float %231 to i32
%854 = insertelement <4 x i32> undef, i32 %851, i32 0
%855 = insertelement <4 x i32> %854, i32 %852, i32 1
%856 = insertelement <4 x i32> %855, i32 %853, i32 2
%857 = bitcast <8 x i32> %76 to <32 x i8>
%858 = bitcast <4 x i32> %78 to <16 x i8>
%859 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %856, <32 x i8> %857, <16 x i8> %858, i32 2)
%860 = extractelement <4 x float> %859, i32 0
%861 = extractelement <4 x float> %859, i32 1
%862 = extractelement <4 x float> %859, i32 2
%863 = fcmp oeq float %temp42.0, 3.000000e+00
%864 = select i1 %863, float 1.000000e+00, float 0.000000e+00
%865 = bitcast float %834 to i32
%866 = bitcast float %836 to i32
%867 = bitcast float %231 to i32
%868 = insertelement <4 x i32> undef, i32 %865, i32 0
%869 = insertelement <4 x i32> %868, i32 %866, i32 1
%870 = insertelement <4 x i32> %869, i32 %867, i32 2
%871 = bitcast <8 x i32> %68 to <32 x i8>
%872 = bitcast <4 x i32> %70 to <16 x i8>
%873 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %870, <32 x i8> %871, <16 x i8> %872, i32 2)
%874 = extractelement <4 x float> %873, i32 0
%875 = extractelement <4 x float> %873, i32 1
%876 = extractelement <4 x float> %873, i32 2
%877 = fcmp oeq float %temp42.0, 2.000000e+00
%878 = select i1 %877, float 1.000000e+00, float 0.000000e+00
%879 = bitcast float %834 to i32
%880 = bitcast float %836 to i32
%881 = bitcast float %231 to i32
%882 = insertelement <4 x i32> undef, i32 %879, i32 0
%883 = insertelement <4 x i32> %882, i32 %880, i32 1
%884 = insertelement <4 x i32> %883, i32 %881, i32 2
%885 = bitcast <8 x i32> %60 to <32 x i8>
%886 = bitcast <4 x i32> %62 to <16 x i8>
%887 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %884, <32 x i8> %885, <16 x i8> %886, i32 2)
%888 = extractelement <4 x float> %887, i32 0
%889 = extractelement <4 x float> %887, i32 1
%890 = extractelement <4 x float> %887, i32 2
%891 = fcmp oeq float %temp42.0, 1.000000e+00
%892 = select i1 %891, float 1.000000e+00, float 0.000000e+00
%893 = bitcast float %834 to i32
%894 = bitcast float %836 to i32
%895 = bitcast float %231 to i32
%896 = insertelement <4 x i32> undef, i32 %893, i32 0
%897 = insertelement <4 x i32> %896, i32 %894, i32 1
%898 = insertelement <4 x i32> %897, i32 %895, i32 2
%899 = bitcast <8 x i32> %52 to <32 x i8>
%900 = bitcast <4 x i32> %54 to <16 x i8>
%901 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %898, <32 x i8> %899, <16 x i8> %900, i32 2)
%902 = extractelement <4 x float> %901, i32 0
%903 = extractelement <4 x float> %901, i32 1
%904 = extractelement <4 x float> %901, i32 2
%905 = fcmp oeq float %temp42.0, 0.000000e+00
%906 = select i1 %905, float 1.000000e+00, float 0.000000e+00
%907 = fmul float %902, %906
%908 = fmul float %903, %906
%909 = fmul float %904, %906
%910 = fmul float %888, %892
%911 = fadd float %910, %907
%912 = fmul float %889, %892
%913 = fadd float %912, %908
%914 = fmul float %890, %892
%915 = fadd float %914, %909
%916 = fmul float %874, %878
%917 = fadd float %916, %911
%918 = fmul float %875, %878
%919 = fadd float %918, %913
%920 = fmul float %876, %878
%921 = fadd float %920, %915
%922 = fmul float %860, %864
%923 = fadd float %922, %917
%924 = fmul float %861, %864
%925 = fadd float %924, %919
%926 = fmul float %862, %864
%927 = fadd float %926, %921
%928 = fmul float %846, %850
%929 = fadd float %928, %923
%930 = fmul float %847, %850
%931 = fadd float %930, %925
%932 = fmul float %848, %850
%933 = fadd float %932, %927
%934 = fcmp une float %38, %temp28.0
%.sink218 = select i1 %934, float %41, float %40
%temp72.0 = select i1 %934, float 1.953125e-03, float 3.906250e-03
%935 = fdiv float 1.000000e+00, %.sink218
%936 = fmul float %103, %935
%937 = fmul float %104, %935
%938 = call float @llvm.floor.f32(float %936)
%939 = fsub float %936, %938
%940 = call float @llvm.floor.f32(float %937)
%941 = fsub float %937, %940
%942 = fmul float %42, 2.000000e+00
%943 = fmul float %942, %temp72.0
%944 = fsub float 1.000000e+00, %943
%945 = fmul float %temp72.0, %42
%946 = fmul float %939, %944
%947 = fadd float %946, %945
%948 = fmul float %941, %944
%949 = fadd float %948, %945
%950 = fmul float %947, %temp28.0
%951 = fadd float %950, %temp16.0
%952 = fmul float %949, %temp28.0
%953 = fadd float %952, %temp17.0
%954 = bitcast float %951 to i32
%955 = bitcast float %953 to i32
%956 = bitcast float %231 to i32
%957 = insertelement <4 x i32> undef, i32 %954, i32 0
%958 = insertelement <4 x i32> %957, i32 %955, i32 1
%959 = insertelement <4 x i32> %958, i32 %956, i32 2
%960 = bitcast <8 x i32> %84 to <32 x i8>
%961 = bitcast <4 x i32> %86 to <16 x i8>
%962 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %959, <32 x i8> %960, <16 x i8> %961, i32 2)
%963 = extractelement <4 x float> %962, i32 0
%964 = extractelement <4 x float> %962, i32 1
%965 = extractelement <4 x float> %962, i32 2
%966 = fcmp oeq float %temp18.0, 4.000000e+00
%967 = select i1 %966, float 1.000000e+00, float 0.000000e+00
%968 = bitcast float %951 to i32
%969 = bitcast float %953 to i32
%970 = bitcast float %231 to i32
%971 = insertelement <4 x i32> undef, i32 %968, i32 0
%972 = insertelement <4 x i32> %971, i32 %969, i32 1
%973 = insertelement <4 x i32> %972, i32 %970, i32 2
%974 = bitcast <8 x i32> %76 to <32 x i8>
%975 = bitcast <4 x i32> %78 to <16 x i8>
%976 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %973, <32 x i8> %974, <16 x i8> %975, i32 2)
%977 = extractelement <4 x float> %976, i32 0
%978 = extractelement <4 x float> %976, i32 1
%979 = extractelement <4 x float> %976, i32 2
%980 = fcmp oeq float %temp18.0, 3.000000e+00
%981 = select i1 %980, float 1.000000e+00, float 0.000000e+00
%982 = bitcast float %951 to i32
%983 = bitcast float %953 to i32
%984 = bitcast float %231 to i32
%985 = insertelement <4 x i32> undef, i32 %982, i32 0
%986 = insertelement <4 x i32> %985, i32 %983, i32 1
%987 = insertelement <4 x i32> %986, i32 %984, i32 2
%988 = bitcast <8 x i32> %68 to <32 x i8>
%989 = bitcast <4 x i32> %70 to <16 x i8>
%990 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %987, <32 x i8> %988, <16 x i8> %989, i32 2)
%991 = extractelement <4 x float> %990, i32 0
%992 = extractelement <4 x float> %990, i32 1
%993 = extractelement <4 x float> %990, i32 2
%994 = fcmp oeq float %temp18.0, 2.000000e+00
%995 = select i1 %994, float 1.000000e+00, float 0.000000e+00
%996 = bitcast float %951 to i32
%997 = bitcast float %953 to i32
%998 = bitcast float %231 to i32
%999 = insertelement <4 x i32> undef, i32 %996, i32 0
%1000 = insertelement <4 x i32> %999, i32 %997, i32 1
%1001 = insertelement <4 x i32> %1000, i32 %998, i32 2
%1002 = bitcast <8 x i32> %60 to <32 x i8>
%1003 = bitcast <4 x i32> %62 to <16 x i8>
%1004 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1001, <32 x i8> %1002, <16 x i8> %1003, i32 2)
%1005 = extractelement <4 x float> %1004, i32 0
%1006 = extractelement <4 x float> %1004, i32 1
%1007 = extractelement <4 x float> %1004, i32 2
%1008 = fcmp oeq float %temp18.0, 1.000000e+00
%1009 = select i1 %1008, float 1.000000e+00, float 0.000000e+00
%1010 = bitcast float %951 to i32
%1011 = bitcast float %953 to i32
%1012 = bitcast float %231 to i32
%1013 = insertelement <4 x i32> undef, i32 %1010, i32 0
%1014 = insertelement <4 x i32> %1013, i32 %1011, i32 1
%1015 = insertelement <4 x i32> %1014, i32 %1012, i32 2
%1016 = bitcast <8 x i32> %52 to <32 x i8>
%1017 = bitcast <4 x i32> %54 to <16 x i8>
%1018 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1015, <32 x i8> %1016, <16 x i8> %1017, i32 2)
%1019 = extractelement <4 x float> %1018, i32 0
%1020 = extractelement <4 x float> %1018, i32 1
%1021 = extractelement <4 x float> %1018, i32 2
%1022 = fcmp oeq float %temp18.0, 0.000000e+00
%1023 = select i1 %1022, float 1.000000e+00, float 0.000000e+00
%1024 = fmul float %1019, %1023
%1025 = fmul float %1020, %1023
%1026 = fmul float %1021, %1023
%1027 = fmul float %1005, %1009
%1028 = fadd float %1027, %1024
%1029 = fmul float %1006, %1009
%1030 = fadd float %1029, %1025
%1031 = fmul float %1007, %1009
%1032 = fadd float %1031, %1026
%1033 = fmul float %991, %995
%1034 = fadd float %1033, %1028
%1035 = fmul float %992, %995
%1036 = fadd float %1035, %1030
%1037 = fmul float %993, %995
%1038 = fadd float %1037, %1032
%1039 = fmul float %977, %981
%1040 = fadd float %1039, %1034
%1041 = fmul float %978, %981
%1042 = fadd float %1041, %1036
%1043 = fmul float %979, %981
%1044 = fadd float %1043, %1038
%1045 = fmul float %963, %967
%1046 = fadd float %1045, %1040
%1047 = fmul float %964, %967
%1048 = fadd float %1047, %1042
%1049 = fmul float %965, %967
%1050 = fadd float %1049, %1044
%1051 = fcmp une float %38, %temp28.0
%.sink219 = select i1 %1051, float %41, float %40
%temp76.0 = select i1 %1051, float 1.953125e-03, float 3.906250e-03
%1052 = fdiv float 1.000000e+00, %.sink219
%1053 = fmul float %105, %1052
%1054 = fmul float %104, %1052
%1055 = call float @llvm.floor.f32(float %1053)
%1056 = fsub float %1053, %1055
%1057 = call float @llvm.floor.f32(float %1054)
%1058 = fsub float %1054, %1057
%1059 = fmul float %42, 2.000000e+00
%1060 = fmul float %1059, %temp76.0
%1061 = fsub float 1.000000e+00, %1060
%1062 = fmul float %temp76.0, %42
%1063 = fmul float %1056, %1061
%1064 = fadd float %1063, %1062
%1065 = fmul float %1058, %1061
%1066 = fadd float %1065, %1062
%1067 = fmul float %1064, %temp28.0
%1068 = fadd float %1067, %temp16.0
%1069 = fmul float %1066, %temp28.0
%1070 = fadd float %1069, %temp17.0
%1071 = bitcast float %1068 to i32
%1072 = bitcast float %1070 to i32
%1073 = bitcast float %231 to i32
%1074 = insertelement <4 x i32> undef, i32 %1071, i32 0
%1075 = insertelement <4 x i32> %1074, i32 %1072, i32 1
%1076 = insertelement <4 x i32> %1075, i32 %1073, i32 2
%1077 = bitcast <8 x i32> %84 to <32 x i8>
%1078 = bitcast <4 x i32> %86 to <16 x i8>
%1079 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1076, <32 x i8> %1077, <16 x i8> %1078, i32 2)
%1080 = extractelement <4 x float> %1079, i32 0
%1081 = extractelement <4 x float> %1079, i32 1
%1082 = extractelement <4 x float> %1079, i32 2
%1083 = fcmp oeq float %temp18.0, 4.000000e+00
%1084 = select i1 %1083, float 1.000000e+00, float 0.000000e+00
%1085 = bitcast float %1068 to i32
%1086 = bitcast float %1070 to i32
%1087 = bitcast float %231 to i32
%1088 = insertelement <4 x i32> undef, i32 %1085, i32 0
%1089 = insertelement <4 x i32> %1088, i32 %1086, i32 1
%1090 = insertelement <4 x i32> %1089, i32 %1087, i32 2
%1091 = bitcast <8 x i32> %76 to <32 x i8>
%1092 = bitcast <4 x i32> %78 to <16 x i8>
%1093 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1090, <32 x i8> %1091, <16 x i8> %1092, i32 2)
%1094 = extractelement <4 x float> %1093, i32 0
%1095 = extractelement <4 x float> %1093, i32 1
%1096 = extractelement <4 x float> %1093, i32 2
%1097 = fcmp oeq float %temp18.0, 3.000000e+00
%1098 = select i1 %1097, float 1.000000e+00, float 0.000000e+00
%1099 = bitcast float %1068 to i32
%1100 = bitcast float %1070 to i32
%1101 = bitcast float %231 to i32
%1102 = insertelement <4 x i32> undef, i32 %1099, i32 0
%1103 = insertelement <4 x i32> %1102, i32 %1100, i32 1
%1104 = insertelement <4 x i32> %1103, i32 %1101, i32 2
%1105 = bitcast <8 x i32> %68 to <32 x i8>
%1106 = bitcast <4 x i32> %70 to <16 x i8>
%1107 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1104, <32 x i8> %1105, <16 x i8> %1106, i32 2)
%1108 = extractelement <4 x float> %1107, i32 0
%1109 = extractelement <4 x float> %1107, i32 1
%1110 = extractelement <4 x float> %1107, i32 2
%1111 = fcmp oeq float %temp18.0, 2.000000e+00
%1112 = select i1 %1111, float 1.000000e+00, float 0.000000e+00
%1113 = bitcast float %1068 to i32
%1114 = bitcast float %1070 to i32
%1115 = bitcast float %231 to i32
%1116 = insertelement <4 x i32> undef, i32 %1113, i32 0
%1117 = insertelement <4 x i32> %1116, i32 %1114, i32 1
%1118 = insertelement <4 x i32> %1117, i32 %1115, i32 2
%1119 = bitcast <8 x i32> %60 to <32 x i8>
%1120 = bitcast <4 x i32> %62 to <16 x i8>
%1121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1118, <32 x i8> %1119, <16 x i8> %1120, i32 2)
%1122 = extractelement <4 x float> %1121, i32 0
%1123 = extractelement <4 x float> %1121, i32 1
%1124 = extractelement <4 x float> %1121, i32 2
%1125 = fcmp oeq float %temp18.0, 1.000000e+00
%1126 = select i1 %1125, float 1.000000e+00, float 0.000000e+00
%1127 = bitcast float %1068 to i32
%1128 = bitcast float %1070 to i32
%1129 = bitcast float %231 to i32
%1130 = insertelement <4 x i32> undef, i32 %1127, i32 0
%1131 = insertelement <4 x i32> %1130, i32 %1128, i32 1
%1132 = insertelement <4 x i32> %1131, i32 %1129, i32 2
%1133 = bitcast <8 x i32> %52 to <32 x i8>
%1134 = bitcast <4 x i32> %54 to <16 x i8>
%1135 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1132, <32 x i8> %1133, <16 x i8> %1134, i32 2)
%1136 = extractelement <4 x float> %1135, i32 0
%1137 = extractelement <4 x float> %1135, i32 1
%1138 = extractelement <4 x float> %1135, i32 2
%1139 = fcmp oeq float %temp18.0, 0.000000e+00
%1140 = select i1 %1139, float 1.000000e+00, float 0.000000e+00
%1141 = fmul float %1136, %1140
%1142 = fmul float %1137, %1140
%1143 = fmul float %1138, %1140
%1144 = fmul float %1122, %1126
%1145 = fadd float %1144, %1141
%1146 = fmul float %1123, %1126
%1147 = fadd float %1146, %1142
%1148 = fmul float %1124, %1126
%1149 = fadd float %1148, %1143
%1150 = fmul float %1108, %1112
%1151 = fadd float %1150, %1145
%1152 = fmul float %1109, %1112
%1153 = fadd float %1152, %1147
%1154 = fmul float %1110, %1112
%1155 = fadd float %1154, %1149
%1156 = fmul float %1094, %1098
%1157 = fadd float %1156, %1151
%1158 = fmul float %1095, %1098
%1159 = fadd float %1158, %1153
%1160 = fmul float %1096, %1098
%1161 = fadd float %1160, %1155
%1162 = fmul float %1080, %1084
%1163 = fadd float %1162, %1157
%1164 = fmul float %1081, %1084
%1165 = fadd float %1164, %1159
%1166 = fmul float %1082, %1084
%1167 = fadd float %1166, %1161
%1168 = fcmp une float %38, %temp28.0
%.sink220 = select i1 %1168, float %41, float %40
%temp80.0 = select i1 %1168, float 1.953125e-03, float 3.906250e-03
%1169 = fdiv float 1.000000e+00, %.sink220
%1170 = fmul float %105, %1169
%1171 = fmul float %103, %1169
%1172 = call float @llvm.floor.f32(float %1170)
%1173 = fsub float %1170, %1172
%1174 = call float @llvm.floor.f32(float %1171)
%1175 = fsub float %1171, %1174
%1176 = fmul float %42, 2.000000e+00
%1177 = fmul float %1176, %temp80.0
%1178 = fsub float 1.000000e+00, %1177
%1179 = fmul float %temp80.0, %42
%1180 = fmul float %1173, %1178
%1181 = fadd float %1180, %1179
%1182 = fmul float %1175, %1178
%1183 = fadd float %1182, %1179
%1184 = fmul float %1181, %temp28.0
%1185 = fadd float %1184, %temp16.0
%1186 = fmul float %1183, %temp28.0
%1187 = fadd float %1186, %temp17.0
%1188 = bitcast float %1185 to i32
%1189 = bitcast float %1187 to i32
%1190 = bitcast float %231 to i32
%1191 = insertelement <4 x i32> undef, i32 %1188, i32 0
%1192 = insertelement <4 x i32> %1191, i32 %1189, i32 1
%1193 = insertelement <4 x i32> %1192, i32 %1190, i32 2
%1194 = bitcast <8 x i32> %84 to <32 x i8>
%1195 = bitcast <4 x i32> %86 to <16 x i8>
%1196 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1193, <32 x i8> %1194, <16 x i8> %1195, i32 2)
%1197 = extractelement <4 x float> %1196, i32 0
%1198 = extractelement <4 x float> %1196, i32 1
%1199 = extractelement <4 x float> %1196, i32 2
%1200 = fcmp oeq float %temp18.0, 4.000000e+00
%1201 = select i1 %1200, float 1.000000e+00, float 0.000000e+00
%1202 = bitcast float %1185 to i32
%1203 = bitcast float %1187 to i32
%1204 = bitcast float %231 to i32
%1205 = insertelement <4 x i32> undef, i32 %1202, i32 0
%1206 = insertelement <4 x i32> %1205, i32 %1203, i32 1
%1207 = insertelement <4 x i32> %1206, i32 %1204, i32 2
%1208 = bitcast <8 x i32> %76 to <32 x i8>
%1209 = bitcast <4 x i32> %78 to <16 x i8>
%1210 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1207, <32 x i8> %1208, <16 x i8> %1209, i32 2)
%1211 = extractelement <4 x float> %1210, i32 0
%1212 = extractelement <4 x float> %1210, i32 1
%1213 = extractelement <4 x float> %1210, i32 2
%1214 = fcmp oeq float %temp18.0, 3.000000e+00
%1215 = select i1 %1214, float 1.000000e+00, float 0.000000e+00
%1216 = bitcast float %1185 to i32
%1217 = bitcast float %1187 to i32
%1218 = bitcast float %231 to i32
%1219 = insertelement <4 x i32> undef, i32 %1216, i32 0
%1220 = insertelement <4 x i32> %1219, i32 %1217, i32 1
%1221 = insertelement <4 x i32> %1220, i32 %1218, i32 2
%1222 = bitcast <8 x i32> %68 to <32 x i8>
%1223 = bitcast <4 x i32> %70 to <16 x i8>
%1224 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1221, <32 x i8> %1222, <16 x i8> %1223, i32 2)
%1225 = extractelement <4 x float> %1224, i32 0
%1226 = extractelement <4 x float> %1224, i32 1
%1227 = extractelement <4 x float> %1224, i32 2
%1228 = fcmp oeq float %temp18.0, 2.000000e+00
%1229 = select i1 %1228, float 1.000000e+00, float 0.000000e+00
%1230 = bitcast float %1185 to i32
%1231 = bitcast float %1187 to i32
%1232 = bitcast float %231 to i32
%1233 = insertelement <4 x i32> undef, i32 %1230, i32 0
%1234 = insertelement <4 x i32> %1233, i32 %1231, i32 1
%1235 = insertelement <4 x i32> %1234, i32 %1232, i32 2
%1236 = bitcast <8 x i32> %60 to <32 x i8>
%1237 = bitcast <4 x i32> %62 to <16 x i8>
%1238 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1235, <32 x i8> %1236, <16 x i8> %1237, i32 2)
%1239 = extractelement <4 x float> %1238, i32 0
%1240 = extractelement <4 x float> %1238, i32 1
%1241 = extractelement <4 x float> %1238, i32 2
%1242 = fcmp oeq float %temp18.0, 1.000000e+00
%1243 = select i1 %1242, float 1.000000e+00, float 0.000000e+00
%1244 = bitcast float %1185 to i32
%1245 = bitcast float %1187 to i32
%1246 = bitcast float %231 to i32
%1247 = insertelement <4 x i32> undef, i32 %1244, i32 0
%1248 = insertelement <4 x i32> %1247, i32 %1245, i32 1
%1249 = insertelement <4 x i32> %1248, i32 %1246, i32 2
%1250 = bitcast <8 x i32> %52 to <32 x i8>
%1251 = bitcast <4 x i32> %54 to <16 x i8>
%1252 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1249, <32 x i8> %1250, <16 x i8> %1251, i32 2)
%1253 = extractelement <4 x float> %1252, i32 0
%1254 = extractelement <4 x float> %1252, i32 1
%1255 = extractelement <4 x float> %1252, i32 2
%1256 = fcmp oeq float %temp18.0, 0.000000e+00
%1257 = select i1 %1256, float 1.000000e+00, float 0.000000e+00
%1258 = fmul float %1253, %1257
%1259 = fmul float %1254, %1257
%1260 = fmul float %1255, %1257
%1261 = fmul float %1239, %1243
%1262 = fadd float %1261, %1258
%1263 = fmul float %1240, %1243
%1264 = fadd float %1263, %1259
%1265 = fmul float %1241, %1243
%1266 = fadd float %1265, %1260
%1267 = fmul float %1225, %1229
%1268 = fadd float %1267, %1262
%1269 = fmul float %1226, %1229
%1270 = fadd float %1269, %1264
%1271 = fmul float %1227, %1229
%1272 = fadd float %1271, %1266
%1273 = fmul float %1211, %1215
%1274 = fadd float %1273, %1268
%1275 = fmul float %1212, %1215
%1276 = fadd float %1275, %1270
%1277 = fmul float %1213, %1215
%1278 = fadd float %1277, %1272
%1279 = fmul float %1197, %1201
%1280 = fadd float %1279, %1274
%1281 = fmul float %1198, %1201
%1282 = fadd float %1281, %1276
%1283 = fmul float %1199, %1201
%1284 = fadd float %1283, %1278
%1285 = fmul float %1046, %156
%1286 = fmul float %1048, %156
%1287 = fmul float %1050, %156
%1288 = fmul float %1163, %154
%1289 = fadd float %1288, %1285
%1290 = fmul float %1165, %154
%1291 = fadd float %1290, %1286
%1292 = fmul float %1167, %154
%1293 = fadd float %1292, %1287
%1294 = fmul float %1280, %155
%1295 = fadd float %1294, %1289
%1296 = fmul float %1282, %155
%1297 = fadd float %1296, %1291
%1298 = fmul float %1284, %155
%1299 = fadd float %1298, %1293
%1300 = fmul float %695, %156
%1301 = fmul float %697, %156
%1302 = fmul float %699, %156
%1303 = fmul float %812, %154
%1304 = fadd float %1303, %1300
%1305 = fmul float %814, %154
%1306 = fadd float %1305, %1301
%1307 = fmul float %816, %154
%1308 = fadd float %1307, %1302
%1309 = fmul float %929, %155
%1310 = fadd float %1309, %1304
%1311 = fmul float %931, %155
%1312 = fadd float %1311, %1306
%1313 = fmul float %933, %155
%1314 = fadd float %1313, %1308
%1315 = fmul float %344, %156
%1316 = fmul float %346, %156
%1317 = fmul float %348, %156
%1318 = fmul float %461, %154
%1319 = fadd float %1318, %1315
%1320 = fmul float %463, %154
%1321 = fadd float %1320, %1316
%1322 = fmul float %465, %154
%1323 = fadd float %1322, %1317
%1324 = fmul float %578, %155
%1325 = fadd float %1324, %1319
%1326 = fmul float %580, %155
%1327 = fadd float %1326, %1321
%1328 = fmul float %582, %155
%1329 = fadd float %1328, %1323
%1330 = fmul float %95, %1325
%1331 = fmul float %95, %1327
%1332 = fmul float %95, %1329
%1333 = fmul float %96, %1310
%1334 = fadd float %1333, %1330
%1335 = fmul float %96, %1312
%1336 = fadd float %1335, %1331
%1337 = fmul float %96, %1314
%1338 = fadd float %1337, %1332
%1339 = fmul float %97, %1295
%1340 = fadd float %1339, %1334
%1341 = fmul float %97, %1297
%1342 = fadd float %1341, %1336
%1343 = fmul float %97, %1299
%1344 = fadd float %1343, %1338
%1345 = fcmp une float %38, %temp20.0
%.sink221 = select i1 %1345, float %41, float %40
%temp52.2 = select i1 %1345, float 1.953125e-03, float 3.906250e-03
%1346 = fdiv float 1.000000e+00, %.sink221
%1347 = fmul float %105, %1346
%1348 = fmul float %104, %1346
%1349 = call float @llvm.floor.f32(float %1347)
%1350 = fsub float %1347, %1349
%1351 = call float @llvm.floor.f32(float %1348)
%1352 = fsub float %1348, %1351
%1353 = fmul float %42, 2.000000e+00
%1354 = fmul float %1353, %temp52.2
%1355 = fsub float 1.000000e+00, %1354
%1356 = fmul float %temp52.2, %42
%1357 = fmul float %1350, %1355
%1358 = fadd float %1357, %1356
%1359 = fmul float %1352, %1355
%1360 = fadd float %1359, %1356
%1361 = fmul float %1358, %temp20.0
%1362 = fadd float %1361, %temp32.0
%1363 = fmul float %1360, %temp20.0
%1364 = fadd float %1363, %temp33.0
%1365 = bitcast float %1362 to i32
%1366 = bitcast float %1364 to i32
%1367 = bitcast float %231 to i32
%1368 = insertelement <4 x i32> undef, i32 %1365, i32 0
%1369 = insertelement <4 x i32> %1368, i32 %1366, i32 1
%1370 = insertelement <4 x i32> %1369, i32 %1367, i32 2
%1371 = bitcast <8 x i32> %88 to <32 x i8>
%1372 = bitcast <4 x i32> %90 to <16 x i8>
%1373 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1370, <32 x i8> %1371, <16 x i8> %1372, i32 2)
%1374 = extractelement <4 x float> %1373, i32 1
%1375 = extractelement <4 x float> %1373, i32 3
%1376 = fcmp oeq float %temp34.0, 4.000000e+00
%1377 = select i1 %1376, float 1.000000e+00, float 0.000000e+00
%1378 = bitcast float %1362 to i32
%1379 = bitcast float %1364 to i32
%1380 = bitcast float %231 to i32
%1381 = insertelement <4 x i32> undef, i32 %1378, i32 0
%1382 = insertelement <4 x i32> %1381, i32 %1379, i32 1
%1383 = insertelement <4 x i32> %1382, i32 %1380, i32 2
%1384 = bitcast <8 x i32> %80 to <32 x i8>
%1385 = bitcast <4 x i32> %82 to <16 x i8>
%1386 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1383, <32 x i8> %1384, <16 x i8> %1385, i32 2)
%1387 = extractelement <4 x float> %1386, i32 1
%1388 = extractelement <4 x float> %1386, i32 3
%1389 = fcmp oeq float %temp34.0, 3.000000e+00
%1390 = select i1 %1389, float 1.000000e+00, float 0.000000e+00
%1391 = bitcast float %1362 to i32
%1392 = bitcast float %1364 to i32
%1393 = bitcast float %231 to i32
%1394 = insertelement <4 x i32> undef, i32 %1391, i32 0
%1395 = insertelement <4 x i32> %1394, i32 %1392, i32 1
%1396 = insertelement <4 x i32> %1395, i32 %1393, i32 2
%1397 = bitcast <8 x i32> %72 to <32 x i8>
%1398 = bitcast <4 x i32> %74 to <16 x i8>
%1399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1396, <32 x i8> %1397, <16 x i8> %1398, i32 2)
%1400 = extractelement <4 x float> %1399, i32 1
%1401 = extractelement <4 x float> %1399, i32 3
%1402 = fcmp oeq float %temp34.0, 2.000000e+00
%1403 = select i1 %1402, float 1.000000e+00, float 0.000000e+00
%1404 = bitcast float %1362 to i32
%1405 = bitcast float %1364 to i32
%1406 = bitcast float %231 to i32
%1407 = insertelement <4 x i32> undef, i32 %1404, i32 0
%1408 = insertelement <4 x i32> %1407, i32 %1405, i32 1
%1409 = insertelement <4 x i32> %1408, i32 %1406, i32 2
%1410 = bitcast <8 x i32> %64 to <32 x i8>
%1411 = bitcast <4 x i32> %66 to <16 x i8>
%1412 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1409, <32 x i8> %1410, <16 x i8> %1411, i32 2)
%1413 = extractelement <4 x float> %1412, i32 1
%1414 = extractelement <4 x float> %1412, i32 3
%1415 = fcmp oeq float %temp34.0, 1.000000e+00
%1416 = select i1 %1415, float 1.000000e+00, float 0.000000e+00
%1417 = bitcast float %1362 to i32
%1418 = bitcast float %1364 to i32
%1419 = bitcast float %231 to i32
%1420 = insertelement <4 x i32> undef, i32 %1417, i32 0
%1421 = insertelement <4 x i32> %1420, i32 %1418, i32 1
%1422 = insertelement <4 x i32> %1421, i32 %1419, i32 2
%1423 = bitcast <8 x i32> %56 to <32 x i8>
%1424 = bitcast <4 x i32> %58 to <16 x i8>
%1425 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1422, <32 x i8> %1423, <16 x i8> %1424, i32 2)
%1426 = extractelement <4 x float> %1425, i32 1
%1427 = extractelement <4 x float> %1425, i32 3
%1428 = fcmp oeq float %temp34.0, 0.000000e+00
%1429 = select i1 %1428, float 1.000000e+00, float 0.000000e+00
%1430 = fmul float %1426, %1429
%1431 = fmul float %1427, %1429
%1432 = fmul float %1413, %1416
%1433 = fadd float %1432, %1430
%1434 = fmul float %1414, %1416
%1435 = fadd float %1434, %1431
%1436 = fmul float %1400, %1403
%1437 = fadd float %1436, %1433
%1438 = fmul float %1401, %1403
%1439 = fadd float %1438, %1435
%1440 = fmul float %1387, %1390
%1441 = fadd float %1440, %1437
%1442 = fmul float %1388, %1390
%1443 = fadd float %1442, %1439
%1444 = fmul float %1374, %1377
%1445 = fadd float %1444, %1441
%1446 = fmul float %1375, %1377
%1447 = fadd float %1446, %1443
%1448 = fmul float %1447, 2.000000e+00
%1449 = fadd float %1448, -1.000000e+00
%1450 = fmul float %1445, 2.000000e+00
%1451 = fadd float %1450, -1.000000e+00
%1452 = fmul float %1449, %1449
%1453 = fmul float %1451, %1451
%1454 = fadd float %1452, %1453
%1455 = call float @llvm.AMDIL.clamp.(float %1454, float 0.000000e+00, float 1.000000e+00)
%1456 = fcmp une float %38, %temp20.0
%.sink222 = select i1 %1456, float %41, float %40
%temp56.2 = select i1 %1456, float 1.953125e-03, float 3.906250e-03
%1457 = fdiv float 1.000000e+00, %.sink222
%1458 = fmul float %105, %1457
%1459 = fmul float %103, %1457
%1460 = call float @llvm.floor.f32(float %1458)
%1461 = fsub float %1458, %1460
%1462 = call float @llvm.floor.f32(float %1459)
%1463 = fsub float %1459, %1462
%1464 = fmul float %42, 2.000000e+00
%1465 = fmul float %1464, %temp56.2
%1466 = fsub float 1.000000e+00, %1465
%1467 = fmul float %temp56.2, %42
%1468 = fmul float %1461, %1466
%1469 = fadd float %1468, %1467
%1470 = fmul float %1463, %1466
%1471 = fadd float %1470, %1467
%1472 = fmul float %1469, %temp20.0
%1473 = fadd float %1472, %temp32.0
%1474 = fmul float %1471, %temp20.0
%1475 = fadd float %1474, %temp33.0
%1476 = bitcast float %1473 to i32
%1477 = bitcast float %1475 to i32
%1478 = bitcast float %231 to i32
%1479 = insertelement <4 x i32> undef, i32 %1476, i32 0
%1480 = insertelement <4 x i32> %1479, i32 %1477, i32 1
%1481 = insertelement <4 x i32> %1480, i32 %1478, i32 2
%1482 = bitcast <8 x i32> %88 to <32 x i8>
%1483 = bitcast <4 x i32> %90 to <16 x i8>
%1484 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1481, <32 x i8> %1482, <16 x i8> %1483, i32 2)
%1485 = extractelement <4 x float> %1484, i32 1
%1486 = extractelement <4 x float> %1484, i32 3
%1487 = fcmp oeq float %temp34.0, 4.000000e+00
%1488 = select i1 %1487, float 1.000000e+00, float 0.000000e+00
%1489 = bitcast float %1473 to i32
%1490 = bitcast float %1475 to i32
%1491 = bitcast float %231 to i32
%1492 = insertelement <4 x i32> undef, i32 %1489, i32 0
%1493 = insertelement <4 x i32> %1492, i32 %1490, i32 1
%1494 = insertelement <4 x i32> %1493, i32 %1491, i32 2
%1495 = bitcast <8 x i32> %80 to <32 x i8>
%1496 = bitcast <4 x i32> %82 to <16 x i8>
%1497 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1494, <32 x i8> %1495, <16 x i8> %1496, i32 2)
%1498 = extractelement <4 x float> %1497, i32 1
%1499 = extractelement <4 x float> %1497, i32 3
%1500 = fcmp oeq float %temp34.0, 3.000000e+00
%1501 = select i1 %1500, float 1.000000e+00, float 0.000000e+00
%1502 = bitcast float %1473 to i32
%1503 = bitcast float %1475 to i32
%1504 = bitcast float %231 to i32
%1505 = insertelement <4 x i32> undef, i32 %1502, i32 0
%1506 = insertelement <4 x i32> %1505, i32 %1503, i32 1
%1507 = insertelement <4 x i32> %1506, i32 %1504, i32 2
%1508 = bitcast <8 x i32> %72 to <32 x i8>
%1509 = bitcast <4 x i32> %74 to <16 x i8>
%1510 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1507, <32 x i8> %1508, <16 x i8> %1509, i32 2)
%1511 = extractelement <4 x float> %1510, i32 1
%1512 = extractelement <4 x float> %1510, i32 3
%1513 = fcmp oeq float %temp34.0, 2.000000e+00
%1514 = select i1 %1513, float 1.000000e+00, float 0.000000e+00
%1515 = bitcast float %1473 to i32
%1516 = bitcast float %1475 to i32
%1517 = bitcast float %231 to i32
%1518 = insertelement <4 x i32> undef, i32 %1515, i32 0
%1519 = insertelement <4 x i32> %1518, i32 %1516, i32 1
%1520 = insertelement <4 x i32> %1519, i32 %1517, i32 2
%1521 = bitcast <8 x i32> %64 to <32 x i8>
%1522 = bitcast <4 x i32> %66 to <16 x i8>
%1523 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1520, <32 x i8> %1521, <16 x i8> %1522, i32 2)
%1524 = extractelement <4 x float> %1523, i32 1
%1525 = extractelement <4 x float> %1523, i32 3
%1526 = fcmp oeq float %temp34.0, 1.000000e+00
%1527 = select i1 %1526, float 1.000000e+00, float 0.000000e+00
%1528 = bitcast float %1473 to i32
%1529 = bitcast float %1475 to i32
%1530 = bitcast float %231 to i32
%1531 = insertelement <4 x i32> undef, i32 %1528, i32 0
%1532 = insertelement <4 x i32> %1531, i32 %1529, i32 1
%1533 = insertelement <4 x i32> %1532, i32 %1530, i32 2
%1534 = bitcast <8 x i32> %56 to <32 x i8>
%1535 = bitcast <4 x i32> %58 to <16 x i8>
%1536 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1533, <32 x i8> %1534, <16 x i8> %1535, i32 2)
%1537 = extractelement <4 x float> %1536, i32 1
%1538 = extractelement <4 x float> %1536, i32 3
%1539 = fcmp oeq float %temp34.0, 0.000000e+00
%1540 = select i1 %1539, float 1.000000e+00, float 0.000000e+00
%1541 = fmul float %1537, %1540
%1542 = fmul float %1538, %1540
%1543 = fmul float %1524, %1527
%1544 = fadd float %1543, %1541
%1545 = fmul float %1525, %1527
%1546 = fadd float %1545, %1542
%1547 = fmul float %1511, %1514
%1548 = fadd float %1547, %1544
%1549 = fmul float %1512, %1514
%1550 = fadd float %1549, %1546
%1551 = fmul float %1498, %1501
%1552 = fadd float %1551, %1548
%1553 = fmul float %1499, %1501
%1554 = fadd float %1553, %1550
%1555 = fmul float %1485, %1488
%1556 = fadd float %1555, %1552
%1557 = fmul float %1486, %1488
%1558 = fadd float %1557, %1554
%1559 = fmul float %1558, 2.000000e+00
%1560 = fadd float %1559, -1.000000e+00
%1561 = fmul float %1556, 2.000000e+00
%1562 = fadd float %1561, -1.000000e+00
%1563 = fmul float %1560, %1560
%1564 = fmul float %1562, %1562
%1565 = fadd float %1563, %1564
%1566 = call float @llvm.AMDIL.clamp.(float %1565, float 0.000000e+00, float 1.000000e+00)
%1567 = fcmp une float %38, %temp20.0
%.sink223 = select i1 %1567, float %41, float %40
%temp60.2 = select i1 %1567, float 1.953125e-03, float 3.906250e-03
%1568 = fdiv float 1.000000e+00, %.sink223
%1569 = fmul float %103, %1568
%1570 = fmul float %104, %1568
%1571 = call float @llvm.floor.f32(float %1569)
%1572 = fsub float %1569, %1571
%1573 = call float @llvm.floor.f32(float %1570)
%1574 = fsub float %1570, %1573
%1575 = fmul float %42, 2.000000e+00
%1576 = fmul float %1575, %temp60.2
%1577 = fsub float 1.000000e+00, %1576
%1578 = fmul float %temp60.2, %42
%1579 = fmul float %1572, %1577
%1580 = fadd float %1579, %1578
%1581 = fmul float %1574, %1577
%1582 = fadd float %1581, %1578
%1583 = fmul float %1580, %temp20.0
%1584 = fadd float %1583, %temp32.0
%1585 = fmul float %1582, %temp20.0
%1586 = fadd float %1585, %temp33.0
%1587 = bitcast float %1584 to i32
%1588 = bitcast float %1586 to i32
%1589 = bitcast float %231 to i32
%1590 = insertelement <4 x i32> undef, i32 %1587, i32 0
%1591 = insertelement <4 x i32> %1590, i32 %1588, i32 1
%1592 = insertelement <4 x i32> %1591, i32 %1589, i32 2
%1593 = bitcast <8 x i32> %88 to <32 x i8>
%1594 = bitcast <4 x i32> %90 to <16 x i8>
%1595 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1592, <32 x i8> %1593, <16 x i8> %1594, i32 2)
%1596 = extractelement <4 x float> %1595, i32 1
%1597 = extractelement <4 x float> %1595, i32 3
%1598 = fcmp oeq float %temp34.0, 4.000000e+00
%1599 = select i1 %1598, float 1.000000e+00, float 0.000000e+00
%1600 = bitcast float %1584 to i32
%1601 = bitcast float %1586 to i32
%1602 = bitcast float %231 to i32
%1603 = insertelement <4 x i32> undef, i32 %1600, i32 0
%1604 = insertelement <4 x i32> %1603, i32 %1601, i32 1
%1605 = insertelement <4 x i32> %1604, i32 %1602, i32 2
%1606 = bitcast <8 x i32> %80 to <32 x i8>
%1607 = bitcast <4 x i32> %82 to <16 x i8>
%1608 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1605, <32 x i8> %1606, <16 x i8> %1607, i32 2)
%1609 = extractelement <4 x float> %1608, i32 1
%1610 = extractelement <4 x float> %1608, i32 3
%1611 = fcmp oeq float %temp34.0, 3.000000e+00
%1612 = select i1 %1611, float 1.000000e+00, float 0.000000e+00
%1613 = bitcast float %1584 to i32
%1614 = bitcast float %1586 to i32
%1615 = bitcast float %231 to i32
%1616 = insertelement <4 x i32> undef, i32 %1613, i32 0
%1617 = insertelement <4 x i32> %1616, i32 %1614, i32 1
%1618 = insertelement <4 x i32> %1617, i32 %1615, i32 2
%1619 = bitcast <8 x i32> %72 to <32 x i8>
%1620 = bitcast <4 x i32> %74 to <16 x i8>
%1621 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1618, <32 x i8> %1619, <16 x i8> %1620, i32 2)
%1622 = extractelement <4 x float> %1621, i32 1
%1623 = extractelement <4 x float> %1621, i32 3
%1624 = fcmp oeq float %temp34.0, 2.000000e+00
%1625 = select i1 %1624, float 1.000000e+00, float 0.000000e+00
%1626 = bitcast float %1584 to i32
%1627 = bitcast float %1586 to i32
%1628 = bitcast float %231 to i32
%1629 = insertelement <4 x i32> undef, i32 %1626, i32 0
%1630 = insertelement <4 x i32> %1629, i32 %1627, i32 1
%1631 = insertelement <4 x i32> %1630, i32 %1628, i32 2
%1632 = bitcast <8 x i32> %64 to <32 x i8>
%1633 = bitcast <4 x i32> %66 to <16 x i8>
%1634 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1631, <32 x i8> %1632, <16 x i8> %1633, i32 2)
%1635 = extractelement <4 x float> %1634, i32 1
%1636 = extractelement <4 x float> %1634, i32 3
%1637 = fcmp oeq float %temp34.0, 1.000000e+00
%1638 = select i1 %1637, float 1.000000e+00, float 0.000000e+00
%1639 = bitcast float %1584 to i32
%1640 = bitcast float %1586 to i32
%1641 = bitcast float %231 to i32
%1642 = insertelement <4 x i32> undef, i32 %1639, i32 0
%1643 = insertelement <4 x i32> %1642, i32 %1640, i32 1
%1644 = insertelement <4 x i32> %1643, i32 %1641, i32 2
%1645 = bitcast <8 x i32> %56 to <32 x i8>
%1646 = bitcast <4 x i32> %58 to <16 x i8>
%1647 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1644, <32 x i8> %1645, <16 x i8> %1646, i32 2)
%1648 = extractelement <4 x float> %1647, i32 1
%1649 = extractelement <4 x float> %1647, i32 3
%1650 = fcmp oeq float %temp34.0, 0.000000e+00
%1651 = select i1 %1650, float 1.000000e+00, float 0.000000e+00
%1652 = fmul float %1648, %1651
%1653 = fmul float %1649, %1651
%1654 = fmul float %1635, %1638
%1655 = fadd float %1654, %1652
%1656 = fmul float %1636, %1638
%1657 = fadd float %1656, %1653
%1658 = fmul float %1622, %1625
%1659 = fadd float %1658, %1655
%1660 = fmul float %1623, %1625
%1661 = fadd float %1660, %1657
%1662 = fmul float %1609, %1612
%1663 = fadd float %1662, %1659
%1664 = fmul float %1610, %1612
%1665 = fadd float %1664, %1661
%1666 = fmul float %1596, %1599
%1667 = fadd float %1666, %1663
%1668 = fmul float %1597, %1599
%1669 = fadd float %1668, %1665
%1670 = fmul float %1669, 2.000000e+00
%1671 = fadd float %1670, -1.000000e+00
%1672 = fmul float %1667, 2.000000e+00
%1673 = fadd float %1672, -1.000000e+00
%1674 = fmul float %1671, %1671
%1675 = fmul float %1673, %1673
%1676 = fadd float %1674, %1675
%1677 = call float @llvm.AMDIL.clamp.(float %1676, float 0.000000e+00, float 1.000000e+00)
%1678 = fmul float %154, 0.000000e+00
%1679 = fmul float %1449, %154
%1680 = fmul float %1451, %154
%1681 = fmul float %1562, %155
%1682 = fadd float %1681, %1678
%1683 = fmul float %155, 0.000000e+00
%1684 = fadd float %1683, %1679
%1685 = fmul float %1560, %155
%1686 = fadd float %1685, %1680
%1687 = fmul float %1671, %156
%1688 = fadd float %1687, %1682
%1689 = fmul float %1673, %156
%1690 = fadd float %1689, %1684
%1691 = fmul float %156, 0.000000e+00
%1692 = fadd float %1691, %1686
%1693 = fcmp une float %38, %temp24.0
%.sink224 = select i1 %1693, float %41, float %40
%temp48.3 = select i1 %1693, float 1.953125e-03, float 3.906250e-03
%1694 = fdiv float 1.000000e+00, %.sink224
%1695 = fmul float %105, %1694
%1696 = fmul float %104, %1694
%1697 = call float @llvm.floor.f32(float %1695)
%1698 = fsub float %1695, %1697
%1699 = call float @llvm.floor.f32(float %1696)
%1700 = fsub float %1696, %1699
%1701 = fmul float %42, 2.000000e+00
%1702 = fmul float %1701, %temp48.3
%1703 = fsub float 1.000000e+00, %1702
%1704 = fmul float %temp48.3, %42
%1705 = fmul float %1698, %1703
%1706 = fadd float %1705, %1704
%1707 = fmul float %1700, %1703
%1708 = fadd float %1707, %1704
%1709 = fmul float %1706, %temp24.0
%1710 = fadd float %1709, %temp40.0
%1711 = fmul float %1708, %temp24.0
%1712 = fadd float %1711, %temp41.0
%1713 = bitcast float %1710 to i32
%1714 = bitcast float %1712 to i32
%1715 = bitcast float %231 to i32
%1716 = insertelement <4 x i32> undef, i32 %1713, i32 0
%1717 = insertelement <4 x i32> %1716, i32 %1714, i32 1
%1718 = insertelement <4 x i32> %1717, i32 %1715, i32 2
%1719 = bitcast <8 x i32> %88 to <32 x i8>
%1720 = bitcast <4 x i32> %90 to <16 x i8>
%1721 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1718, <32 x i8> %1719, <16 x i8> %1720, i32 2)
%1722 = extractelement <4 x float> %1721, i32 1
%1723 = extractelement <4 x float> %1721, i32 3
%1724 = fcmp oeq float %temp42.0, 4.000000e+00
%1725 = select i1 %1724, float 1.000000e+00, float 0.000000e+00
%1726 = bitcast float %1710 to i32
%1727 = bitcast float %1712 to i32
%1728 = bitcast float %231 to i32
%1729 = insertelement <4 x i32> undef, i32 %1726, i32 0
%1730 = insertelement <4 x i32> %1729, i32 %1727, i32 1
%1731 = insertelement <4 x i32> %1730, i32 %1728, i32 2
%1732 = bitcast <8 x i32> %80 to <32 x i8>
%1733 = bitcast <4 x i32> %82 to <16 x i8>
%1734 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1731, <32 x i8> %1732, <16 x i8> %1733, i32 2)
%1735 = extractelement <4 x float> %1734, i32 1
%1736 = extractelement <4 x float> %1734, i32 3
%1737 = fcmp oeq float %temp42.0, 3.000000e+00
%1738 = select i1 %1737, float 1.000000e+00, float 0.000000e+00
%1739 = bitcast float %1710 to i32
%1740 = bitcast float %1712 to i32
%1741 = bitcast float %231 to i32
%1742 = insertelement <4 x i32> undef, i32 %1739, i32 0
%1743 = insertelement <4 x i32> %1742, i32 %1740, i32 1
%1744 = insertelement <4 x i32> %1743, i32 %1741, i32 2
%1745 = bitcast <8 x i32> %72 to <32 x i8>
%1746 = bitcast <4 x i32> %74 to <16 x i8>
%1747 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1744, <32 x i8> %1745, <16 x i8> %1746, i32 2)
%1748 = extractelement <4 x float> %1747, i32 1
%1749 = extractelement <4 x float> %1747, i32 3
%1750 = fcmp oeq float %temp42.0, 2.000000e+00
%1751 = select i1 %1750, float 1.000000e+00, float 0.000000e+00
%1752 = bitcast float %1710 to i32
%1753 = bitcast float %1712 to i32
%1754 = bitcast float %231 to i32
%1755 = insertelement <4 x i32> undef, i32 %1752, i32 0
%1756 = insertelement <4 x i32> %1755, i32 %1753, i32 1
%1757 = insertelement <4 x i32> %1756, i32 %1754, i32 2
%1758 = bitcast <8 x i32> %64 to <32 x i8>
%1759 = bitcast <4 x i32> %66 to <16 x i8>
%1760 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1757, <32 x i8> %1758, <16 x i8> %1759, i32 2)
%1761 = extractelement <4 x float> %1760, i32 1
%1762 = extractelement <4 x float> %1760, i32 3
%1763 = fcmp oeq float %temp42.0, 1.000000e+00
%1764 = select i1 %1763, float 1.000000e+00, float 0.000000e+00
%1765 = bitcast float %1710 to i32
%1766 = bitcast float %1712 to i32
%1767 = bitcast float %231 to i32
%1768 = insertelement <4 x i32> undef, i32 %1765, i32 0
%1769 = insertelement <4 x i32> %1768, i32 %1766, i32 1
%1770 = insertelement <4 x i32> %1769, i32 %1767, i32 2
%1771 = bitcast <8 x i32> %56 to <32 x i8>
%1772 = bitcast <4 x i32> %58 to <16 x i8>
%1773 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1770, <32 x i8> %1771, <16 x i8> %1772, i32 2)
%1774 = extractelement <4 x float> %1773, i32 1
%1775 = extractelement <4 x float> %1773, i32 3
%1776 = fcmp oeq float %temp42.0, 0.000000e+00
%1777 = select i1 %1776, float 1.000000e+00, float 0.000000e+00
%1778 = fmul float %1774, %1777
%1779 = fmul float %1775, %1777
%1780 = fmul float %1761, %1764
%1781 = fadd float %1780, %1778
%1782 = fmul float %1762, %1764
%1783 = fadd float %1782, %1779
%1784 = fmul float %1748, %1751
%1785 = fadd float %1784, %1781
%1786 = fmul float %1749, %1751
%1787 = fadd float %1786, %1783
%1788 = fmul float %1735, %1738
%1789 = fadd float %1788, %1785
%1790 = fmul float %1736, %1738
%1791 = fadd float %1790, %1787
%1792 = fmul float %1722, %1725
%1793 = fadd float %1792, %1789
%1794 = fmul float %1723, %1725
%1795 = fadd float %1794, %1791
%1796 = fmul float %1795, 2.000000e+00
%1797 = fadd float %1796, -1.000000e+00
%1798 = fmul float %1793, 2.000000e+00
%1799 = fadd float %1798, -1.000000e+00
%1800 = fmul float %1797, %1797
%1801 = fmul float %1799, %1799
%1802 = fadd float %1800, %1801
%1803 = call float @llvm.AMDIL.clamp.(float %1802, float 0.000000e+00, float 1.000000e+00)
%1804 = fcmp une float %38, %temp24.0
%.sink225 = select i1 %1804, float %41, float %40
%temp52.4 = select i1 %1804, float 1.953125e-03, float 3.906250e-03
%1805 = fdiv float 1.000000e+00, %.sink225
%1806 = fmul float %105, %1805
%1807 = fmul float %103, %1805
%1808 = call float @llvm.floor.f32(float %1806)
%1809 = fsub float %1806, %1808
%1810 = call float @llvm.floor.f32(float %1807)
%1811 = fsub float %1807, %1810
%1812 = fmul float %42, 2.000000e+00
%1813 = fmul float %1812, %temp52.4
%1814 = fsub float 1.000000e+00, %1813
%1815 = fmul float %temp52.4, %42
%1816 = fmul float %1809, %1814
%1817 = fadd float %1816, %1815
%1818 = fmul float %1811, %1814
%1819 = fadd float %1818, %1815
%1820 = fmul float %1817, %temp24.0
%1821 = fadd float %1820, %temp40.0
%1822 = fmul float %1819, %temp24.0
%1823 = fadd float %1822, %temp41.0
%1824 = bitcast float %1821 to i32
%1825 = bitcast float %1823 to i32
%1826 = bitcast float %231 to i32
%1827 = insertelement <4 x i32> undef, i32 %1824, i32 0
%1828 = insertelement <4 x i32> %1827, i32 %1825, i32 1
%1829 = insertelement <4 x i32> %1828, i32 %1826, i32 2
%1830 = bitcast <8 x i32> %88 to <32 x i8>
%1831 = bitcast <4 x i32> %90 to <16 x i8>
%1832 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1829, <32 x i8> %1830, <16 x i8> %1831, i32 2)
%1833 = extractelement <4 x float> %1832, i32 1
%1834 = extractelement <4 x float> %1832, i32 3
%1835 = fcmp oeq float %temp42.0, 4.000000e+00
%1836 = select i1 %1835, float 1.000000e+00, float 0.000000e+00
%1837 = bitcast float %1821 to i32
%1838 = bitcast float %1823 to i32
%1839 = bitcast float %231 to i32
%1840 = insertelement <4 x i32> undef, i32 %1837, i32 0
%1841 = insertelement <4 x i32> %1840, i32 %1838, i32 1
%1842 = insertelement <4 x i32> %1841, i32 %1839, i32 2
%1843 = bitcast <8 x i32> %80 to <32 x i8>
%1844 = bitcast <4 x i32> %82 to <16 x i8>
%1845 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1842, <32 x i8> %1843, <16 x i8> %1844, i32 2)
%1846 = extractelement <4 x float> %1845, i32 1
%1847 = extractelement <4 x float> %1845, i32 3
%1848 = fcmp oeq float %temp42.0, 3.000000e+00
%1849 = select i1 %1848, float 1.000000e+00, float 0.000000e+00
%1850 = bitcast float %1821 to i32
%1851 = bitcast float %1823 to i32
%1852 = bitcast float %231 to i32
%1853 = insertelement <4 x i32> undef, i32 %1850, i32 0
%1854 = insertelement <4 x i32> %1853, i32 %1851, i32 1
%1855 = insertelement <4 x i32> %1854, i32 %1852, i32 2
%1856 = bitcast <8 x i32> %72 to <32 x i8>
%1857 = bitcast <4 x i32> %74 to <16 x i8>
%1858 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1855, <32 x i8> %1856, <16 x i8> %1857, i32 2)
%1859 = extractelement <4 x float> %1858, i32 1
%1860 = extractelement <4 x float> %1858, i32 3
%1861 = fcmp oeq float %temp42.0, 2.000000e+00
%1862 = select i1 %1861, float 1.000000e+00, float 0.000000e+00
%1863 = bitcast float %1821 to i32
%1864 = bitcast float %1823 to i32
%1865 = bitcast float %231 to i32
%1866 = insertelement <4 x i32> undef, i32 %1863, i32 0
%1867 = insertelement <4 x i32> %1866, i32 %1864, i32 1
%1868 = insertelement <4 x i32> %1867, i32 %1865, i32 2
%1869 = bitcast <8 x i32> %64 to <32 x i8>
%1870 = bitcast <4 x i32> %66 to <16 x i8>
%1871 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1868, <32 x i8> %1869, <16 x i8> %1870, i32 2)
%1872 = extractelement <4 x float> %1871, i32 1
%1873 = extractelement <4 x float> %1871, i32 3
%1874 = fcmp oeq float %temp42.0, 1.000000e+00
%1875 = select i1 %1874, float 1.000000e+00, float 0.000000e+00
%1876 = bitcast float %1821 to i32
%1877 = bitcast float %1823 to i32
%1878 = bitcast float %231 to i32
%1879 = insertelement <4 x i32> undef, i32 %1876, i32 0
%1880 = insertelement <4 x i32> %1879, i32 %1877, i32 1
%1881 = insertelement <4 x i32> %1880, i32 %1878, i32 2
%1882 = bitcast <8 x i32> %56 to <32 x i8>
%1883 = bitcast <4 x i32> %58 to <16 x i8>
%1884 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1881, <32 x i8> %1882, <16 x i8> %1883, i32 2)
%1885 = extractelement <4 x float> %1884, i32 1
%1886 = extractelement <4 x float> %1884, i32 3
%1887 = fcmp oeq float %temp42.0, 0.000000e+00
%1888 = select i1 %1887, float 1.000000e+00, float 0.000000e+00
%1889 = fmul float %1885, %1888
%1890 = fmul float %1886, %1888
%1891 = fmul float %1872, %1875
%1892 = fadd float %1891, %1889
%1893 = fmul float %1873, %1875
%1894 = fadd float %1893, %1890
%1895 = fmul float %1859, %1862
%1896 = fadd float %1895, %1892
%1897 = fmul float %1860, %1862
%1898 = fadd float %1897, %1894
%1899 = fmul float %1846, %1849
%1900 = fadd float %1899, %1896
%1901 = fmul float %1847, %1849
%1902 = fadd float %1901, %1898
%1903 = fmul float %1833, %1836
%1904 = fadd float %1903, %1900
%1905 = fmul float %1834, %1836
%1906 = fadd float %1905, %1902
%1907 = fmul float %1906, 2.000000e+00
%1908 = fadd float %1907, -1.000000e+00
%1909 = fmul float %1904, 2.000000e+00
%1910 = fadd float %1909, -1.000000e+00
%1911 = fmul float %1908, %1908
%1912 = fmul float %1910, %1910
%1913 = fadd float %1911, %1912
%1914 = call float @llvm.AMDIL.clamp.(float %1913, float 0.000000e+00, float 1.000000e+00)
%1915 = fcmp une float %38, %temp24.0
%.sink226 = select i1 %1915, float %41, float %40
%temp56.4 = select i1 %1915, float 1.953125e-03, float 3.906250e-03
%1916 = fdiv float 1.000000e+00, %.sink226
%1917 = fmul float %103, %1916
%1918 = fmul float %104, %1916
%1919 = call float @llvm.floor.f32(float %1917)
%1920 = fsub float %1917, %1919
%1921 = call float @llvm.floor.f32(float %1918)
%1922 = fsub float %1918, %1921
%1923 = fmul float %42, 2.000000e+00
%1924 = fmul float %1923, %temp56.4
%1925 = fsub float 1.000000e+00, %1924
%1926 = fmul float %temp56.4, %42
%1927 = fmul float %1920, %1925
%1928 = fadd float %1927, %1926
%1929 = fmul float %1922, %1925
%1930 = fadd float %1929, %1926
%1931 = fmul float %1928, %temp24.0
%1932 = fadd float %1931, %temp40.0
%1933 = fmul float %1930, %temp24.0
%1934 = fadd float %1933, %temp41.0
%1935 = bitcast float %1932 to i32
%1936 = bitcast float %1934 to i32
%1937 = bitcast float %231 to i32
%1938 = insertelement <4 x i32> undef, i32 %1935, i32 0
%1939 = insertelement <4 x i32> %1938, i32 %1936, i32 1
%1940 = insertelement <4 x i32> %1939, i32 %1937, i32 2
%1941 = bitcast <8 x i32> %88 to <32 x i8>
%1942 = bitcast <4 x i32> %90 to <16 x i8>
%1943 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1940, <32 x i8> %1941, <16 x i8> %1942, i32 2)
%1944 = extractelement <4 x float> %1943, i32 1
%1945 = extractelement <4 x float> %1943, i32 3
%1946 = fcmp oeq float %temp42.0, 4.000000e+00
%1947 = select i1 %1946, float 1.000000e+00, float 0.000000e+00
%1948 = bitcast float %1932 to i32
%1949 = bitcast float %1934 to i32
%1950 = bitcast float %231 to i32
%1951 = insertelement <4 x i32> undef, i32 %1948, i32 0
%1952 = insertelement <4 x i32> %1951, i32 %1949, i32 1
%1953 = insertelement <4 x i32> %1952, i32 %1950, i32 2
%1954 = bitcast <8 x i32> %80 to <32 x i8>
%1955 = bitcast <4 x i32> %82 to <16 x i8>
%1956 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1953, <32 x i8> %1954, <16 x i8> %1955, i32 2)
%1957 = extractelement <4 x float> %1956, i32 1
%1958 = extractelement <4 x float> %1956, i32 3
%1959 = fcmp oeq float %temp42.0, 3.000000e+00
%1960 = select i1 %1959, float 1.000000e+00, float 0.000000e+00
%1961 = bitcast float %1932 to i32
%1962 = bitcast float %1934 to i32
%1963 = bitcast float %231 to i32
%1964 = insertelement <4 x i32> undef, i32 %1961, i32 0
%1965 = insertelement <4 x i32> %1964, i32 %1962, i32 1
%1966 = insertelement <4 x i32> %1965, i32 %1963, i32 2
%1967 = bitcast <8 x i32> %72 to <32 x i8>
%1968 = bitcast <4 x i32> %74 to <16 x i8>
%1969 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1966, <32 x i8> %1967, <16 x i8> %1968, i32 2)
%1970 = extractelement <4 x float> %1969, i32 1
%1971 = extractelement <4 x float> %1969, i32 3
%1972 = fcmp oeq float %temp42.0, 2.000000e+00
%1973 = select i1 %1972, float 1.000000e+00, float 0.000000e+00
%1974 = bitcast float %1932 to i32
%1975 = bitcast float %1934 to i32
%1976 = bitcast float %231 to i32
%1977 = insertelement <4 x i32> undef, i32 %1974, i32 0
%1978 = insertelement <4 x i32> %1977, i32 %1975, i32 1
%1979 = insertelement <4 x i32> %1978, i32 %1976, i32 2
%1980 = bitcast <8 x i32> %64 to <32 x i8>
%1981 = bitcast <4 x i32> %66 to <16 x i8>
%1982 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1979, <32 x i8> %1980, <16 x i8> %1981, i32 2)
%1983 = extractelement <4 x float> %1982, i32 1
%1984 = extractelement <4 x float> %1982, i32 3
%1985 = fcmp oeq float %temp42.0, 1.000000e+00
%1986 = select i1 %1985, float 1.000000e+00, float 0.000000e+00
%1987 = bitcast float %1932 to i32
%1988 = bitcast float %1934 to i32
%1989 = bitcast float %231 to i32
%1990 = insertelement <4 x i32> undef, i32 %1987, i32 0
%1991 = insertelement <4 x i32> %1990, i32 %1988, i32 1
%1992 = insertelement <4 x i32> %1991, i32 %1989, i32 2
%1993 = bitcast <8 x i32> %56 to <32 x i8>
%1994 = bitcast <4 x i32> %58 to <16 x i8>
%1995 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1992, <32 x i8> %1993, <16 x i8> %1994, i32 2)
%1996 = extractelement <4 x float> %1995, i32 1
%1997 = extractelement <4 x float> %1995, i32 3
%1998 = fcmp oeq float %temp42.0, 0.000000e+00
%1999 = select i1 %1998, float 1.000000e+00, float 0.000000e+00
%2000 = fmul float %1996, %1999
%2001 = fmul float %1997, %1999
%2002 = fmul float %1983, %1986
%2003 = fadd float %2002, %2000
%2004 = fmul float %1984, %1986
%2005 = fadd float %2004, %2001
%2006 = fmul float %1970, %1973
%2007 = fadd float %2006, %2003
%2008 = fmul float %1971, %1973
%2009 = fadd float %2008, %2005
%2010 = fmul float %1957, %1960
%2011 = fadd float %2010, %2007
%2012 = fmul float %1958, %1960
%2013 = fadd float %2012, %2009
%2014 = fmul float %1944, %1947
%2015 = fadd float %2014, %2011
%2016 = fmul float %1945, %1947
%2017 = fadd float %2016, %2013
%2018 = fmul float %2017, 2.000000e+00
%2019 = fadd float %2018, -1.000000e+00
%2020 = fmul float %2015, 2.000000e+00
%2021 = fadd float %2020, -1.000000e+00
%2022 = fmul float %2019, %2019
%2023 = fmul float %2021, %2021
%2024 = fadd float %2022, %2023
%2025 = call float @llvm.AMDIL.clamp.(float %2024, float 0.000000e+00, float 1.000000e+00)
%2026 = fmul float %154, 0.000000e+00
%2027 = fmul float %1797, %154
%2028 = fmul float %1799, %154
%2029 = fmul float %1910, %155
%2030 = fadd float %2029, %2026
%2031 = fmul float %155, 0.000000e+00
%2032 = fadd float %2031, %2027
%2033 = fmul float %1908, %155
%2034 = fadd float %2033, %2028
%2035 = fmul float %2019, %156
%2036 = fadd float %2035, %2030
%2037 = fmul float %2021, %156
%2038 = fadd float %2037, %2032
%2039 = fmul float %156, 0.000000e+00
%2040 = fadd float %2039, %2034
%2041 = fcmp une float %38, %temp28.0
%.sink227 = select i1 %2041, float %41, float %40
%temp40.1 = select i1 %2041, float 1.953125e-03, float 3.906250e-03
%2042 = fdiv float 1.000000e+00, %.sink227
%2043 = fmul float %105, %2042
%2044 = fmul float %104, %2042
%2045 = call float @llvm.floor.f32(float %2043)
%2046 = fsub float %2043, %2045
%2047 = call float @llvm.floor.f32(float %2044)
%2048 = fsub float %2044, %2047
%2049 = fmul float %42, 2.000000e+00
%2050 = fmul float %2049, %temp40.1
%2051 = fsub float 1.000000e+00, %2050
%2052 = fmul float %temp40.1, %42
%2053 = fmul float %2046, %2051
%2054 = fadd float %2053, %2052
%2055 = fmul float %2048, %2051
%2056 = fadd float %2055, %2052
%2057 = fmul float %2054, %temp28.0
%2058 = fadd float %2057, %temp16.0
%2059 = fmul float %2056, %temp28.0
%2060 = fadd float %2059, %temp17.0
%2061 = bitcast float %2058 to i32
%2062 = bitcast float %2060 to i32
%2063 = bitcast float %231 to i32
%2064 = insertelement <4 x i32> undef, i32 %2061, i32 0
%2065 = insertelement <4 x i32> %2064, i32 %2062, i32 1
%2066 = insertelement <4 x i32> %2065, i32 %2063, i32 2
%2067 = bitcast <8 x i32> %88 to <32 x i8>
%2068 = bitcast <4 x i32> %90 to <16 x i8>
%2069 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2066, <32 x i8> %2067, <16 x i8> %2068, i32 2)
%2070 = extractelement <4 x float> %2069, i32 1
%2071 = extractelement <4 x float> %2069, i32 3
%2072 = fcmp oeq float %temp18.0, 4.000000e+00
%2073 = select i1 %2072, float 1.000000e+00, float 0.000000e+00
%2074 = bitcast float %2058 to i32
%2075 = bitcast float %2060 to i32
%2076 = bitcast float %231 to i32
%2077 = insertelement <4 x i32> undef, i32 %2074, i32 0
%2078 = insertelement <4 x i32> %2077, i32 %2075, i32 1
%2079 = insertelement <4 x i32> %2078, i32 %2076, i32 2
%2080 = bitcast <8 x i32> %80 to <32 x i8>
%2081 = bitcast <4 x i32> %82 to <16 x i8>
%2082 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2079, <32 x i8> %2080, <16 x i8> %2081, i32 2)
%2083 = extractelement <4 x float> %2082, i32 1
%2084 = extractelement <4 x float> %2082, i32 3
%2085 = fcmp oeq float %temp18.0, 3.000000e+00
%2086 = select i1 %2085, float 1.000000e+00, float 0.000000e+00
%2087 = bitcast float %2058 to i32
%2088 = bitcast float %2060 to i32
%2089 = bitcast float %231 to i32
%2090 = insertelement <4 x i32> undef, i32 %2087, i32 0
%2091 = insertelement <4 x i32> %2090, i32 %2088, i32 1
%2092 = insertelement <4 x i32> %2091, i32 %2089, i32 2
%2093 = bitcast <8 x i32> %72 to <32 x i8>
%2094 = bitcast <4 x i32> %74 to <16 x i8>
%2095 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2092, <32 x i8> %2093, <16 x i8> %2094, i32 2)
%2096 = extractelement <4 x float> %2095, i32 1
%2097 = extractelement <4 x float> %2095, i32 3
%2098 = fcmp oeq float %temp18.0, 2.000000e+00
%2099 = select i1 %2098, float 1.000000e+00, float 0.000000e+00
%2100 = bitcast float %2058 to i32
%2101 = bitcast float %2060 to i32
%2102 = bitcast float %231 to i32
%2103 = insertelement <4 x i32> undef, i32 %2100, i32 0
%2104 = insertelement <4 x i32> %2103, i32 %2101, i32 1
%2105 = insertelement <4 x i32> %2104, i32 %2102, i32 2
%2106 = bitcast <8 x i32> %64 to <32 x i8>
%2107 = bitcast <4 x i32> %66 to <16 x i8>
%2108 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2105, <32 x i8> %2106, <16 x i8> %2107, i32 2)
%2109 = extractelement <4 x float> %2108, i32 1
%2110 = extractelement <4 x float> %2108, i32 3
%2111 = fcmp oeq float %temp18.0, 1.000000e+00
%2112 = select i1 %2111, float 1.000000e+00, float 0.000000e+00
%2113 = bitcast float %2058 to i32
%2114 = bitcast float %2060 to i32
%2115 = bitcast float %231 to i32
%2116 = insertelement <4 x i32> undef, i32 %2113, i32 0
%2117 = insertelement <4 x i32> %2116, i32 %2114, i32 1
%2118 = insertelement <4 x i32> %2117, i32 %2115, i32 2
%2119 = bitcast <8 x i32> %56 to <32 x i8>
%2120 = bitcast <4 x i32> %58 to <16 x i8>
%2121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2118, <32 x i8> %2119, <16 x i8> %2120, i32 2)
%2122 = extractelement <4 x float> %2121, i32 1
%2123 = extractelement <4 x float> %2121, i32 3
%2124 = fcmp oeq float %temp18.0, 0.000000e+00
%2125 = select i1 %2124, float 1.000000e+00, float 0.000000e+00
%2126 = fmul float %2122, %2125
%2127 = fmul float %2123, %2125
%2128 = fmul float %2109, %2112
%2129 = fadd float %2128, %2126
%2130 = fmul float %2110, %2112
%2131 = fadd float %2130, %2127
%2132 = fmul float %2096, %2099
%2133 = fadd float %2132, %2129
%2134 = fmul float %2097, %2099
%2135 = fadd float %2134, %2131
%2136 = fmul float %2083, %2086
%2137 = fadd float %2136, %2133
%2138 = fmul float %2084, %2086
%2139 = fadd float %2138, %2135
%2140 = fmul float %2070, %2073
%2141 = fadd float %2140, %2137
%2142 = fmul float %2071, %2073
%2143 = fadd float %2142, %2139
%2144 = fmul float %2143, 2.000000e+00
%2145 = fadd float %2144, -1.000000e+00
%2146 = fmul float %2141, 2.000000e+00
%2147 = fadd float %2146, -1.000000e+00
%2148 = fmul float %2145, %2145
%2149 = fmul float %2147, %2147
%2150 = fadd float %2148, %2149
%2151 = call float @llvm.AMDIL.clamp.(float %2150, float 0.000000e+00, float 1.000000e+00)
%2152 = fcmp une float %38, %temp28.0
%.sink228 = select i1 %2152, float %41, float %40
%temp48.5 = select i1 %2152, float 1.953125e-03, float 3.906250e-03
%2153 = fdiv float 1.000000e+00, %.sink228
%2154 = fmul float %105, %2153
%2155 = fmul float %103, %2153
%2156 = call float @llvm.floor.f32(float %2154)
%2157 = fsub float %2154, %2156
%2158 = call float @llvm.floor.f32(float %2155)
%2159 = fsub float %2155, %2158
%2160 = fmul float %42, 2.000000e+00
%2161 = fmul float %2160, %temp48.5
%2162 = fsub float 1.000000e+00, %2161
%2163 = fmul float %temp48.5, %42
%2164 = fmul float %2157, %2162
%2165 = fadd float %2164, %2163
%2166 = fmul float %2159, %2162
%2167 = fadd float %2166, %2163
%2168 = fmul float %2165, %temp28.0
%2169 = fadd float %2168, %temp16.0
%2170 = fmul float %2167, %temp28.0
%2171 = fadd float %2170, %temp17.0
%2172 = bitcast float %2169 to i32
%2173 = bitcast float %2171 to i32
%2174 = bitcast float %231 to i32
%2175 = insertelement <4 x i32> undef, i32 %2172, i32 0
%2176 = insertelement <4 x i32> %2175, i32 %2173, i32 1
%2177 = insertelement <4 x i32> %2176, i32 %2174, i32 2
%2178 = bitcast <8 x i32> %88 to <32 x i8>
%2179 = bitcast <4 x i32> %90 to <16 x i8>
%2180 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2177, <32 x i8> %2178, <16 x i8> %2179, i32 2)
%2181 = extractelement <4 x float> %2180, i32 1
%2182 = extractelement <4 x float> %2180, i32 3
%2183 = fcmp oeq float %temp18.0, 4.000000e+00
%2184 = select i1 %2183, float 1.000000e+00, float 0.000000e+00
%2185 = bitcast float %2169 to i32
%2186 = bitcast float %2171 to i32
%2187 = bitcast float %231 to i32
%2188 = insertelement <4 x i32> undef, i32 %2185, i32 0
%2189 = insertelement <4 x i32> %2188, i32 %2186, i32 1
%2190 = insertelement <4 x i32> %2189, i32 %2187, i32 2
%2191 = bitcast <8 x i32> %80 to <32 x i8>
%2192 = bitcast <4 x i32> %82 to <16 x i8>
%2193 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2190, <32 x i8> %2191, <16 x i8> %2192, i32 2)
%2194 = extractelement <4 x float> %2193, i32 1
%2195 = extractelement <4 x float> %2193, i32 3
%2196 = fcmp oeq float %temp18.0, 3.000000e+00
%2197 = select i1 %2196, float 1.000000e+00, float 0.000000e+00
%2198 = bitcast float %2169 to i32
%2199 = bitcast float %2171 to i32
%2200 = bitcast float %231 to i32
%2201 = insertelement <4 x i32> undef, i32 %2198, i32 0
%2202 = insertelement <4 x i32> %2201, i32 %2199, i32 1
%2203 = insertelement <4 x i32> %2202, i32 %2200, i32 2
%2204 = bitcast <8 x i32> %72 to <32 x i8>
%2205 = bitcast <4 x i32> %74 to <16 x i8>
%2206 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2203, <32 x i8> %2204, <16 x i8> %2205, i32 2)
%2207 = extractelement <4 x float> %2206, i32 1
%2208 = extractelement <4 x float> %2206, i32 3
%2209 = fcmp oeq float %temp18.0, 2.000000e+00
%2210 = select i1 %2209, float 1.000000e+00, float 0.000000e+00
%2211 = bitcast float %2169 to i32
%2212 = bitcast float %2171 to i32
%2213 = bitcast float %231 to i32
%2214 = insertelement <4 x i32> undef, i32 %2211, i32 0
%2215 = insertelement <4 x i32> %2214, i32 %2212, i32 1
%2216 = insertelement <4 x i32> %2215, i32 %2213, i32 2
%2217 = bitcast <8 x i32> %64 to <32 x i8>
%2218 = bitcast <4 x i32> %66 to <16 x i8>
%2219 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2216, <32 x i8> %2217, <16 x i8> %2218, i32 2)
%2220 = extractelement <4 x float> %2219, i32 1
%2221 = extractelement <4 x float> %2219, i32 3
%2222 = fcmp oeq float %temp18.0, 1.000000e+00
%2223 = select i1 %2222, float 1.000000e+00, float 0.000000e+00
%2224 = bitcast float %2169 to i32
%2225 = bitcast float %2171 to i32
%2226 = bitcast float %231 to i32
%2227 = insertelement <4 x i32> undef, i32 %2224, i32 0
%2228 = insertelement <4 x i32> %2227, i32 %2225, i32 1
%2229 = insertelement <4 x i32> %2228, i32 %2226, i32 2
%2230 = bitcast <8 x i32> %56 to <32 x i8>
%2231 = bitcast <4 x i32> %58 to <16 x i8>
%2232 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2229, <32 x i8> %2230, <16 x i8> %2231, i32 2)
%2233 = extractelement <4 x float> %2232, i32 1
%2234 = extractelement <4 x float> %2232, i32 3
%2235 = fcmp oeq float %temp18.0, 0.000000e+00
%2236 = select i1 %2235, float 1.000000e+00, float 0.000000e+00
%2237 = fmul float %2233, %2236
%2238 = fmul float %2234, %2236
%2239 = fmul float %2220, %2223
%2240 = fadd float %2239, %2237
%2241 = fmul float %2221, %2223
%2242 = fadd float %2241, %2238
%2243 = fmul float %2207, %2210
%2244 = fadd float %2243, %2240
%2245 = fmul float %2208, %2210
%2246 = fadd float %2245, %2242
%2247 = fmul float %2194, %2197
%2248 = fadd float %2247, %2244
%2249 = fmul float %2195, %2197
%2250 = fadd float %2249, %2246
%2251 = fmul float %2181, %2184
%2252 = fadd float %2251, %2248
%2253 = fmul float %2182, %2184
%2254 = fadd float %2253, %2250
%2255 = fmul float %2254, 2.000000e+00
%2256 = fadd float %2255, -1.000000e+00
%2257 = fmul float %2252, 2.000000e+00
%2258 = fadd float %2257, -1.000000e+00
%2259 = fmul float %2256, %2256
%2260 = fmul float %2258, %2258
%2261 = fadd float %2259, %2260
%2262 = call float @llvm.AMDIL.clamp.(float %2261, float 0.000000e+00, float 1.000000e+00)
%2263 = fcmp une float %38, %temp28.0
%.sink229 = select i1 %2263, float %41, float %40
%temp52.6 = select i1 %2263, float 1.953125e-03, float 3.906250e-03
%2264 = fdiv float 1.000000e+00, %.sink229
%2265 = fmul float %103, %2264
%2266 = fmul float %104, %2264
%2267 = call float @llvm.floor.f32(float %2265)
%2268 = fsub float %2265, %2267
%2269 = call float @llvm.floor.f32(float %2266)
%2270 = fsub float %2266, %2269
%2271 = fmul float %42, 2.000000e+00
%2272 = fmul float %2271, %temp52.6
%2273 = fsub float 1.000000e+00, %2272
%2274 = fmul float %temp52.6, %42
%2275 = fmul float %2268, %2273
%2276 = fadd float %2275, %2274
%2277 = fmul float %2270, %2273
%2278 = fadd float %2277, %2274
%2279 = fmul float %2276, %temp28.0
%2280 = fadd float %2279, %temp16.0
%2281 = fmul float %2278, %temp28.0
%2282 = fadd float %2281, %temp17.0
%2283 = bitcast float %2280 to i32
%2284 = bitcast float %2282 to i32
%2285 = bitcast float %231 to i32
%2286 = insertelement <4 x i32> undef, i32 %2283, i32 0
%2287 = insertelement <4 x i32> %2286, i32 %2284, i32 1
%2288 = insertelement <4 x i32> %2287, i32 %2285, i32 2
%2289 = bitcast <8 x i32> %88 to <32 x i8>
%2290 = bitcast <4 x i32> %90 to <16 x i8>
%2291 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2288, <32 x i8> %2289, <16 x i8> %2290, i32 2)
%2292 = extractelement <4 x float> %2291, i32 1
%2293 = extractelement <4 x float> %2291, i32 3
%2294 = fcmp oeq float %temp18.0, 4.000000e+00
%2295 = select i1 %2294, float 1.000000e+00, float 0.000000e+00
%2296 = bitcast float %2280 to i32
%2297 = bitcast float %2282 to i32
%2298 = bitcast float %231 to i32
%2299 = insertelement <4 x i32> undef, i32 %2296, i32 0
%2300 = insertelement <4 x i32> %2299, i32 %2297, i32 1
%2301 = insertelement <4 x i32> %2300, i32 %2298, i32 2
%2302 = bitcast <8 x i32> %80 to <32 x i8>
%2303 = bitcast <4 x i32> %82 to <16 x i8>
%2304 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2301, <32 x i8> %2302, <16 x i8> %2303, i32 2)
%2305 = extractelement <4 x float> %2304, i32 1
%2306 = extractelement <4 x float> %2304, i32 3
%2307 = fcmp oeq float %temp18.0, 3.000000e+00
%2308 = select i1 %2307, float 1.000000e+00, float 0.000000e+00
%2309 = bitcast float %2280 to i32
%2310 = bitcast float %2282 to i32
%2311 = bitcast float %231 to i32
%2312 = insertelement <4 x i32> undef, i32 %2309, i32 0
%2313 = insertelement <4 x i32> %2312, i32 %2310, i32 1
%2314 = insertelement <4 x i32> %2313, i32 %2311, i32 2
%2315 = bitcast <8 x i32> %72 to <32 x i8>
%2316 = bitcast <4 x i32> %74 to <16 x i8>
%2317 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2314, <32 x i8> %2315, <16 x i8> %2316, i32 2)
%2318 = extractelement <4 x float> %2317, i32 1
%2319 = extractelement <4 x float> %2317, i32 3
%2320 = fcmp oeq float %temp18.0, 2.000000e+00
%2321 = select i1 %2320, float 1.000000e+00, float 0.000000e+00
%2322 = bitcast float %2280 to i32
%2323 = bitcast float %2282 to i32
%2324 = bitcast float %231 to i32
%2325 = insertelement <4 x i32> undef, i32 %2322, i32 0
%2326 = insertelement <4 x i32> %2325, i32 %2323, i32 1
%2327 = insertelement <4 x i32> %2326, i32 %2324, i32 2
%2328 = bitcast <8 x i32> %64 to <32 x i8>
%2329 = bitcast <4 x i32> %66 to <16 x i8>
%2330 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2327, <32 x i8> %2328, <16 x i8> %2329, i32 2)
%2331 = extractelement <4 x float> %2330, i32 1
%2332 = extractelement <4 x float> %2330, i32 3
%2333 = fcmp oeq float %temp18.0, 1.000000e+00
%2334 = select i1 %2333, float 1.000000e+00, float 0.000000e+00
%2335 = bitcast float %2280 to i32
%2336 = bitcast float %2282 to i32
%2337 = bitcast float %231 to i32
%2338 = insertelement <4 x i32> undef, i32 %2335, i32 0
%2339 = insertelement <4 x i32> %2338, i32 %2336, i32 1
%2340 = insertelement <4 x i32> %2339, i32 %2337, i32 2
%2341 = bitcast <8 x i32> %56 to <32 x i8>
%2342 = bitcast <4 x i32> %58 to <16 x i8>
%2343 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2340, <32 x i8> %2341, <16 x i8> %2342, i32 2)
%2344 = extractelement <4 x float> %2343, i32 1
%2345 = extractelement <4 x float> %2343, i32 3
%2346 = fcmp oeq float %temp18.0, 0.000000e+00
%2347 = select i1 %2346, float 1.000000e+00, float 0.000000e+00
%2348 = fmul float %2344, %2347
%2349 = fmul float %2345, %2347
%2350 = fmul float %2331, %2334
%2351 = fadd float %2350, %2348
%2352 = fmul float %2332, %2334
%2353 = fadd float %2352, %2349
%2354 = fmul float %2318, %2321
%2355 = fadd float %2354, %2351
%2356 = fmul float %2319, %2321
%2357 = fadd float %2356, %2353
%2358 = fmul float %2305, %2308
%2359 = fadd float %2358, %2355
%2360 = fmul float %2306, %2308
%2361 = fadd float %2360, %2357
%2362 = fmul float %2292, %2295
%2363 = fadd float %2362, %2359
%2364 = fmul float %2293, %2295
%2365 = fadd float %2364, %2361
%2366 = fmul float %2365, 2.000000e+00
%2367 = fadd float %2366, -1.000000e+00
%2368 = fmul float %2363, 2.000000e+00
%2369 = fadd float %2368, -1.000000e+00
%2370 = fmul float %2367, %2367
%2371 = fmul float %2369, %2369
%2372 = fadd float %2370, %2371
%2373 = call float @llvm.AMDIL.clamp.(float %2372, float 0.000000e+00, float 1.000000e+00)
%2374 = fmul float %154, 0.000000e+00
%2375 = fmul float %2145, %154
%2376 = fmul float %2147, %154
%2377 = fmul float %2258, %155
%2378 = fadd float %2377, %2374
%2379 = fmul float %155, 0.000000e+00
%2380 = fadd float %2379, %2375
%2381 = fmul float %2256, %155
%2382 = fadd float %2381, %2376
%2383 = fmul float %2367, %156
%2384 = fadd float %2383, %2378
%2385 = fmul float %2369, %156
%2386 = fadd float %2385, %2380
%2387 = fmul float %156, 0.000000e+00
%2388 = fadd float %2387, %2382
%2389 = fmul float %95, %1688
%2390 = fmul float %95, %1690
%2391 = fmul float %95, %1692
%2392 = fmul float %96, %2036
%2393 = fadd float %2392, %2389
%2394 = fmul float %96, %2038
%2395 = fadd float %2394, %2390
%2396 = fmul float %96, %2040
%2397 = fadd float %2396, %2391
%2398 = fmul float %97, %2384
%2399 = fadd float %2398, %2393
%2400 = fmul float %97, %2386
%2401 = fadd float %2400, %2395
%2402 = fmul float %97, %2388
%2403 = fadd float %2402, %2397
%2404 = fmul float %2399, %2399
%2405 = fmul float %2401, %2401
%2406 = fadd float %2404, %2405
%2407 = fmul float %2403, %2403
%2408 = fadd float %2406, %2407
%2409 = fadd float %2408, 1.000000e+00
%2410 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2409)
%2411 = fmul float %2399, %2410
%2412 = fmul float %2401, %2410
%2413 = fmul float %2403, %2410
%2414 = fmul float %2411, %94
%2415 = fmul float %2412, %94
%2416 = fmul float %2413, %94
%2417 = fsub float %100, %2414
%2418 = fsub float %101, %2415
%2419 = fsub float %102, %2416
%2420 = fmul float %2417, %2417
%2421 = fmul float %2418, %2418
%2422 = fadd float %2421, %2420
%2423 = fmul float %2419, %2419
%2424 = fadd float %2422, %2423
%2425 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2424)
%2426 = fmul float %2417, %2425
%2427 = fmul float %2418, %2425
%2428 = fmul float %2419, %2425
%2429 = fmul float %1340, %106
%2430 = fmul float %1342, %107
%2431 = fmul float %1344, %108
%2432 = fadd float %115, %127
%2433 = fadd float %116, %128
%2434 = fadd float %117, %129
%2435 = fmul float %2432, %2432
%2436 = fmul float %2433, %2433
%2437 = fadd float %2436, %2435
%2438 = fmul float %2434, %2434
%2439 = fadd float %2437, %2438
%2440 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2439)
%2441 = fmul float %2432, %2440
%2442 = fmul float %2433, %2440
%2443 = fmul float %2434, %2440
%2444 = fmul float %2426, %2441
%2445 = fmul float %2427, %2442
%2446 = fadd float %2445, %2444
%2447 = fmul float %2428, %2443
%2448 = fadd float %2446, %2447
%2449 = call float @llvm.maxnum.f32(float %2448, float 0x3F1A36E2E0000000)
%2450 = fmul float %98, 3.200000e+01
%2451 = call float @llvm.pow.f32(float %2449, float %2450)
%2452 = call float @llvm.AMDIL.clamp.(float %2451, float 0.000000e+00, float 1.000000e+00)
%2453 = fmul float %2452, 2.000000e+00
%2454 = fsub float 3.000000e+00, %2453
%2455 = fmul float %2452, %2454
%2456 = fmul float %2452, %2455
%2457 = fmul float %2456, %98
%2458 = fmul float %1340, %35
%2459 = fmul float %1342, %36
%2460 = fmul float %1344, %37
%2461 = fmul float %2426, %115
%2462 = fmul float %2427, %116
%2463 = fadd float %2462, %2461
%2464 = fmul float %2428, %117
%2465 = fadd float %2463, %2464
%2466 = call float @llvm.AMDIL.clamp.(float %2465, float 0.000000e+00, float 1.000000e+00)
%2467 = fmul float %48, 2.000000e+00
%2468 = fmul float %49, 2.000000e+00
%2469 = fmul float %50, 2.000000e+00
%2470 = call float @llvm.maxnum.f32(float %2467, float %45)
%2471 = call float @llvm.maxnum.f32(float %2468, float %46)
%2472 = call float @llvm.maxnum.f32(float %2469, float %47)
%2473 = call float @llvm.minnum.f32(float %2470, float 1.000000e+00)
%2474 = call float @llvm.minnum.f32(float %2471, float 1.000000e+00)
%2475 = call float @llvm.minnum.f32(float %2472, float 1.000000e+00)
%2476 = fmul float %2473, %1340
%2477 = fmul float %2474, %1342
%2478 = fmul float %2475, %1344
%2479 = fmul float %2458, %2466
%2480 = fadd float %2479, %2476
%2481 = fmul float %2459, %2466
%2482 = fadd float %2481, %2477
%2483 = fmul float %2460, %2466
%2484 = fadd float %2483, %2478
%2485 = fmul float %35, %2457
%2486 = fadd float %2485, %2480
%2487 = fmul float %36, %2457
%2488 = fadd float %2487, %2482
%2489 = fmul float %37, %2457
%2490 = fadd float %2489, %2484
%2491 = fmul float %2486, 5.000000e-01
%2492 = fmul float %2488, 5.000000e-01
%2493 = fmul float %2490, 5.000000e-01
%2494 = fadd float %2429, %2491
%2495 = fadd float %2430, %2492
%2496 = fadd float %2431, %2493
%2497 = fmul float %99, %33
%2498 = fadd float %2497, %34
%2499 = call float @llvm.AMDIL.clamp.(float %2498, float 0.000000e+00, float 1.000000e+00)
%2500 = call float @llvm.AMDGPU.lrp(float %2499, float %2494, float %30)
%2501 = call float @llvm.AMDGPU.lrp(float %2499, float %2495, float %31)
%2502 = call float @llvm.AMDGPU.lrp(float %2499, float %2496, float %32)
%2503 = call i32 @llvm.SI.packf16(float %2500, float %2501)
%2504 = bitcast i32 %2503 to float
%2505 = call i32 @llvm.SI.packf16(float %2502, float 1.000000e+00)
%2506 = bitcast i32 %2505 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2504, float %2506, float %2504, float %2506)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_writelane_b32 v254, s10, 60 ; 05FD780A
s_mov_b64 s[100:101], s[4:5] ; BEE40404
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v14, v0, 0, 0, [m0] ; C8380000
v_interp_p2_f32 v14, [v14], v1, 0, 0, [m0] ; C8390001
v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100
v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101
v_interp_p1_f32 v18, v0, 2, 0, [m0] ; C8480200
v_interp_p2_f32 v18, [v18], v1, 2, 0, [m0] ; C8490201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400
v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700
v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701
s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000
s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000
s_mov_b32 s14, -1 ; BE8E03C1
s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000
v_readlane_b32 s12, v254, 60 ; 021979FE
s_nop 2 ; BF800002
buffer_store_dword v2, s[12:15], s12 ; E0700000 0C030200
v_interp_p1_f32 v12, v0, 0, 2, [m0] ; C8300800
v_interp_p2_f32 v12, [v12], v1, 0, 2, [m0] ; C8310801
v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900
v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901
v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00
v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00
v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01
v_interp_p1_f32 v34, v0, 0, 3, [m0] ; C8880C00
v_interp_p2_f32 v34, [v34], v1, 0, 3, [m0] ; C8890C01
v_interp_p1_f32 v38, v0, 1, 3, [m0] ; C8980D00
v_interp_p2_f32 v38, [v38], v1, 1, 3, [m0] ; C8990D01
v_interp_p1_f32 v28, v0, 2, 3, [m0] ; C8700E00
v_interp_p2_f32 v28, [v28], v1, 2, 3, [m0] ; C8710E01
s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700
v_interp_p1_f32 v2, v0, 3, 3, [m0] ; C8080F00
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s1, s[8:11], 0x4 ; C2008904
s_buffer_load_dword s0, s[8:11], 0x5 ; C2000905
v_interp_p2_f32 v2, [v2], v1, 3, 3, [m0] ; C8090F01
s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000
s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000
s_mov_b32 s14, -1 ; BE8E03C1
s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000
buffer_store_dword v2, s[12:15], s12 offset:4 ; E0700004 0C030200
s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000
v_interp_p1_f32 v2, v0, 0, 4, [m0] ; C8081000
s_buffer_load_dword s16, s[8:11], 0x6 ; C2080906
v_interp_p2_f32 v2, [v2], v1, 0, 4, [m0] ; C8091001
s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000
s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000
s_mov_b32 s14, -1 ; BE8E03C1
s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000
buffer_store_dword v2, s[12:15], s12 offset:8 ; E0700008 0C030200
v_interp_p1_f32 v0, v0, 1, 4, [m0] ; C8001100
v_interp_p2_f32 v0, [v0], v1, 1, 4, [m0] ; C8011101
s_mov_b32 s12, SCRATCH_RSRC_DWORD0 ; BE8C03FF 00000000
s_mov_b32 s13, SCRATCH_RSRC_DWORD1 ; BE8D03FF 00000000
s_mov_b32 s14, -1 ; BE8E03C1
s_mov_b32 s15, 0x80f000 ; BE8F03FF 0080F000
buffer_store_dword v0, s[12:15], s12 offset:12 ; E070000C 0C030000
s_buffer_load_dword s42, s[8:11], 0x0 ; C2150900
s_buffer_load_dword s41, s[8:11], 0x1 ; C2148901
v_mul_f32_e64 v1, s1, s1 ; D2100001 00000201
v_mac_f32_e64 v1, s0, s0 ; D23E0001 00000000
s_buffer_load_dword s17, s[8:11], 0x3c ; C208893C
s_buffer_load_dword s2, s[8:11], 0x40 ; C2010940
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e64 v1, s16, s16 ; D23E0001 00002010
s_buffer_load_dword s43, s[8:11], 0x2 ; C2158902
v_rsq_clamp_f32_e32 v13, v1 ; 7E1A5901
v_add_f32_e32 v1, 0.5, v14 ; 06021CF0
v_floor_f32_e32 v16, v1 ; 7E204901
s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700
v_sub_f32_e32 v14, s42, v34 ; 081C442A
v_sub_f32_e32 v1, s41, v38 ; 08024C29
v_mov_b32_e32 v15, 0x42800000 ; 7E1E02FF 42800000
v_cmp_le_f32_e32 vcc, v15, v16 ; 7C06210F
v_mul_f32_e32 v19, v14, v14 ; 10261D0E
v_mac_f32_e32 v19, v1, v1 ; 3E260301
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v15, s43, v28 ; 081E382B
v_mac_f32_e32 v19, v15, v15 ; 3E261F0F
v_rsq_clamp_f32_e32 v19, v19 ; 7E265913
v_mov_b32_e32 v21, 0x7fffffff ; 7E2A02FF 7FFFFFFF
v_and_b32_e32 v20, v9, v21 ; 36282B09
v_and_b32_e32 v22, v10, v21 ; 362C2B0A
v_and_b32_e32 v35, v11, v21 ; 36462B0B
v_mul_f32_e64 v21, |v9|, |v9| ; D2100315 00021309
v_mad_f32 v21, |v10|, |v10|, v21 ; D2820315 0456150A
v_mad_f32 v21, |v11|, |v11|, v21 ; D2820315 0456170B
v_rsq_clamp_f32_e32 v36, v21 ; 7E485915
v_add_f32_e32 v17, 0.5, v17 ; 062222F0
v_add_f32_e32 v18, 0.5, v18 ; 062424F0
v_floor_f32_e32 v21, v17 ; 7E2A4911
v_floor_f32_e32 v17, v18 ; 7E224912
v_mov_b32_e32 v31, s2 ; 7E3E0202
v_mul_f32_e32 v24, s17, v16 ; 10302011
v_floor_f32_e32 v23, v24 ; 7E2E4918
s_and_saveexec_b64 s[12:13], vcc ; BE8C246A
s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E
v_mov_b32_e32 v18, 0xc2800000 ; 7E2402FF C2800000
v_add_f32_e32 v16, v16, v18 ; 06202510
v_mul_f32_e32 v18, s2, v16 ; 10242002
v_floor_f32_e32 v18, v18 ; 7E244912
v_mul_f32_e32 v25, s2, v18 ; 10322402
v_mad_f32 v32, v16, s2, -v18 ; D2820020 84480510
v_floor_f32_e32 v16, v25 ; 7E204919
v_mad_f32 v33, v18, s2, -v16 ; D2820021 84400512
v_add_f32_e32 v18, 4.0, v16 ; 062420F6
s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C
v_mov_b32_e32 v16, s17 ; 7E200211
v_mov_b32_e32 v40, v31 ; 7E50031F
s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E
v_mul_f32_e32 v18, s17, v23 ; 10242E11
v_floor_f32_e32 v25, v24 ; 7E324918
v_subrev_f32_e32 v32, v25, v24 ; 0A403119
v_floor_f32_e32 v18, v18 ; 7E244912
v_mad_f32 v33, v23, s17, -v18 ; D2820021 84482317
v_mov_b32_e32 v40, v16 ; 7E500310
s_or_b64 exec, exec, s[12:13] ; 88FE0C7E
v_mul_f32_e32 v25, s17, v21 ; 10322A11
v_floor_f32_e32 v24, v25 ; 7E304919
v_mov_b32_e32 v23, 0x42800000 ; 7E2E02FF 42800000
v_cmp_le_f32_e32 vcc, v23, v21 ; 7C062B17
s_and_saveexec_b64 s[12:13], vcc ; BE8C246A
s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E
v_mov_b32_e32 v23, 0xc2800000 ; 7E2E02FF C2800000
v_add_f32_e32 v21, v21, v23 ; 062A2F15
v_mul_f32_e32 v23, s2, v21 ; 102E2A02
v_floor_f32_e32 v23, v23 ; 7E2E4917
v_mul_f32_e32 v26, s2, v23 ; 10342E02
v_mad_f32 v29, v21, s2, -v23 ; D282001D 845C0515
v_floor_f32_e32 v21, v26 ; 7E2A491A
v_mad_f32 v30, v23, s2, -v21 ; D282001E 84540517
v_add_f32_e32 v23, 4.0, v21 ; 062E2AF6
s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C
v_mov_b32_e32 v39, v31 ; 7E4E031F
s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E
v_mul_f32_e32 v21, s17, v24 ; 102A3011
v_floor_f32_e32 v23, v25 ; 7E2E4919
v_subrev_f32_e32 v29, v23, v25 ; 0A3A3317
v_floor_f32_e32 v23, v21 ; 7E2E4915
v_mad_f32 v30, v24, s17, -v23 ; D282001E 845C2318
v_mov_b32_e32 v39, v16 ; 7E4E0310
s_or_b64 exec, exec, s[12:13] ; 88FE0C7E
s_buffer_load_dword s14, s[8:11], 0xf ; C207090F
s_buffer_load_dword s15, s[8:11], 0x44 ; C2078944
s_buffer_load_dword s3, s[8:11], 0x48 ; C2018948
v_mul_f32_e32 v25, s17, v17 ; 10322211
v_floor_f32_e32 v21, v25 ; 7E2A4919
v_mov_b32_e32 v24, 0x42800000 ; 7E3002FF 42800000
v_cmp_le_f32_e32 vcc, v24, v17 ; 7C062318
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[12:13], vcc ; BE8C246A
s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E
v_mov_b32_e32 v24, 0xc2800000 ; 7E3002FF C2800000
v_add_f32_e32 v17, v17, v24 ; 06223111
v_mul_f32_e32 v24, s2, v17 ; 10302202
v_floor_f32_e32 v24, v24 ; 7E304918
v_mul_f32_e32 v27, s2, v24 ; 10363002
v_mad_f32 v26, v17, s2, -v24 ; D282001A 84600511
v_floor_f32_e32 v17, v27 ; 7E22491B
v_mad_f32 v27, v24, s2, -v17 ; D282001B 84440518
v_add_f32_e32 v24, 4.0, v17 ; 063022F6
s_or_saveexec_b64 s[12:13], s[12:13] ; BE8C250C
v_mov_b32_e32 v41, s14 ; 7E52020E
v_mov_b32_e32 v37, s15 ; 7E4A020F
s_buffer_load_dword s4, s[8:11], 0x8 ; C2020908
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 6 ; 05FD0C04
s_buffer_load_dword s4, s[8:11], 0x9 ; C2020909
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 5 ; 05FD0A04
s_buffer_load_dword s4, s[8:11], 0xa ; C202090A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 4 ; 05FD0804
s_buffer_load_dword s14, s[8:11], 0xe ; C207090E
s_buffer_load_dword s4, s[8:11], 0x10 ; C2020910
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 46 ; 05FD5C04
s_buffer_load_dword s4, s[8:11], 0x11 ; C2020911
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 47 ; 05FD5E04
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 7 ; 05FD0E04
s_buffer_load_dword s40, s[8:11], 0x4c ; C214094C
s_buffer_load_dword s44, s[8:11], 0x50 ; C2160950
s_buffer_load_dword s28, s[8:11], 0x54 ; C20E0954
s_buffer_load_dword s4, s[8:11], 0x58 ; C2020958
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 8 ; 05FD1004
s_buffer_load_dword s4, s[8:11], 0x59 ; C2020959
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 9 ; 05FD1204
s_buffer_load_dword s4, s[8:11], 0x5a ; C202095A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 10 ; 05FD1404
s_buffer_load_dword s4, s[8:11], 0x5c ; C202095C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 13 ; 05FD1A04
s_buffer_load_dword s4, s[8:11], 0x5d ; C202095D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 11 ; 05FD1604
s_buffer_load_dword s4, s[8:11], 0x5e ; C202095E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s4, 12 ; 05FD1804
v_mov_b32_e32 v42, s3 ; 7E540203
s_xor_b64 exec, exec, s[12:13] ; 89FE0C7E
v_mul_f32_e32 v17, s17, v21 ; 10222A11
v_floor_f32_e32 v24, v25 ; 7E304919
v_subrev_f32_e32 v26, v24, v25 ; 0A343318
v_floor_f32_e32 v24, v17 ; 7E304911
v_mad_f32 v27, v21, s17, -v24 ; D282001B 84602315
v_mov_b32_e32 v31, v16 ; 7E3E0310
s_or_b64 exec, exec, s[12:13] ; 88FE0C7E
v_mac_f32_e32 v41, s14, v12 ; 3E52180E
v_mul_f32_e32 v16, v19, v14 ; 10201D13
v_mul_f32_e32 v17, v19, v1 ; 10220313
v_mul_f32_e32 v12, v19, v15 ; 10181F13
v_mac_f32_e32 v16, s1, v13 ; 3E201A01
v_mac_f32_e32 v17, s0, v13 ; 3E221A00
v_mac_f32_e32 v12, s16, v13 ; 3E181A10
v_add_f32_e64 v1, 0, v41 clamp ; D2060801 00025280
v_cmp_neq_f32_e64 s[22:23], s17, v40 ; D01A0016 00025011
v_cmp_eq_f32_e64 s[2:3], 4.0, v18 ; D0040002 000224F6
v_cmp_eq_f32_e64 s[4:5], 2.0, v18 ; D0040004 000224F4
v_writelane_b32 v254, s4, 28 ; 05FD3804
v_writelane_b32 v254, s5, 29 ; 05FD3A05
v_cmp_eq_f32_e64 s[4:5], 1.0, v18 ; D0040004 000224F2
v_writelane_b32 v254, s4, 26 ; 05FD3404
v_writelane_b32 v254, s5, 27 ; 05FD3605
v_cmp_eq_f32_e32 vcc, 0, v18 ; 7C042480
v_cmp_neq_f32_e64 s[24:25], s17, v39 ; D01A0018 00024E11
v_cmp_neq_f32_e64 s[26:27], s17, v31 ; D01A001A 00023E11
v_cmp_eq_f32_e64 s[4:5], 4.0, v23 ; D0040004 00022EF6
v_writelane_b32 v254, s4, 32 ; 05FD4004
v_writelane_b32 v254, s5, 33 ; 05FD4205
v_cmp_eq_f32_e64 s[4:5], 2.0, v23 ; D0040004 00022EF4
v_writelane_b32 v254, s4, 22 ; 05FD2C04
v_writelane_b32 v254, s5, 23 ; 05FD2E05
v_mul_f32_e32 v15, s1, v13 ; 101E1A01
v_cmp_eq_f32_e64 s[4:5], 1.0, v23 ; D0040004 00022EF2
v_writelane_b32 v254, s4, 24 ; 05FD3004
v_writelane_b32 v254, s5, 25 ; 05FD3205
v_mul_f32_e32 v14, s0, v13 ; 101C1A00
v_mul_f32_e32 v13, s16, v13 ; 101A1A10
v_cmp_eq_f32_e64 s[0:1], 0, v23 ; D0040000 00022E80
v_writelane_b32 v254, s0, 30 ; 05FD3C00
v_writelane_b32 v254, s1, 31 ; 05FD3E01
v_subrev_f32_e32 v19, s42, v34 ; 0A26442A
v_mul_f32_e32 v19, v19, v19 ; 10262713
v_subrev_f32_e32 v21, s41, v38 ; 0A2A4C29
v_mac_f32_e32 v19, v21, v21 ; 3E262B15
v_cmp_eq_f32_e64 s[0:1], 4.0, v24 ; D0040000 000230F6
v_writelane_b32 v254, s0, 14 ; 05FD1C00
v_writelane_b32 v254, s1, 15 ; 05FD1E01
v_subrev_f32_e32 v21, s43, v28 ; 0A2A382B
v_mac_f32_e32 v19, v21, v21 ; 3E262B15
v_mul_f32_e32 v19, s28, v19 ; 1026261C
v_log_f32_e32 v19, v19 ; 7E264F13
v_cmp_eq_f32_e64 s[0:1], 2.0, v24 ; D0040000 000230F4
v_writelane_b32 v254, s0, 16 ; 05FD2000
v_writelane_b32 v254, s1, 17 ; 05FD2201
v_cndmask_b32_e64 v25, 0, 1.0, s[2:3] ; D2000019 0009E480
v_cmp_eq_f32_e64 s[0:1], 1.0, v24 ; D0040000 000230F2
v_writelane_b32 v254, s0, 18 ; 05FD2400
v_writelane_b32 v254, s1, 19 ; 05FD2601
v_mul_f32_e32 v41, 0x3f317218, v19 ; 105226FF 3F317218
v_mov_b32_e32 v19, 0xbe4ccccd ; 7E2602FF BE4CCCCD
v_mad_f32 v21, v36, v20, v19 ; D2820015 044E2924
v_mad_f32 v20, v36, v22, v19 ; D2820014 044E2D24
v_mac_f32_e32 v19, v36, v35 ; 3E264724
v_mov_b32_e32 v22, 0x40e00000 ; 7E2C02FF 40E00000
v_cndmask_b32_e64 v35, v37, v42, s[22:23] ; D2000023 005A5525
v_rcp_f32_e32 v35, v35 ; 7E465523
v_mov_b32_e32 v36, 0x3b000000 ; 7E4802FF 3B000000
v_mov_b32_e32 v43, 0x3b800000 ; 7E5602FF 3B800000
v_cndmask_b32_e64 v44, v43, v36, s[22:23] ; D200002C 005A492B
v_cmp_eq_f32_e64 s[0:1], 0, v24 ; D0040000 00023080
v_writelane_b32 v254, s0, 20 ; 05FD2800
v_writelane_b32 v254, s1, 21 ; 05FD2A01
v_mul_f32_e32 v45, v35, v34 ; 105A4523
v_floor_f32_e32 v45, v45 ; 7E5A492D
v_mad_f32 v45, v34, v35, -v45 ; D282002D 84B64722
v_mul_f32_e32 v46, v35, v38 ; 105C4D23
v_floor_f32_e32 v46, v46 ; 7E5C492E
v_mad_f32 v46, v38, v35, -v46 ; D282002E 84BA4726
v_add_f32_e64 v47, s40, s40 ; D206002F 00005028
v_mad_f32 v48, -v47, v44, 1.0 ; D2820030 23CA592F
v_mul_f32_e32 v44, s40, v44 ; 10585828
v_mad_f32 v49, v48, v45, v44 ; D2820031 04B25B30
v_mad_f32 v45, v48, v46, v44 ; D282002D 04B25D30
v_mul_f32_e32 v46, v35, v28 ; 105C3923
v_floor_f32_e32 v46, v46 ; 7E5C492E
v_mad_f32 v35, v28, v35, -v46 ; D2820023 84BA471C
v_mac_f32_e32 v44, v48, v35 ; 3E584730
v_cndmask_b32_e64 v35, v37, v42, s[24:25] ; D2000023 00625525
v_cndmask_b32_e64 v37, v37, v42, s[26:27] ; D2000025 006A5525
v_rcp_f32_e32 v42, v35 ; 7E545523
v_rcp_f32_e32 v48, v37 ; 7E605525
v_cndmask_b32_e64 v50, v43, v36, s[24:25] ; D2000032 0062492B
v_cndmask_b32_e64 v51, v43, v36, s[26:27] ; D2000033 006A492B
s_load_dwordx4 s[96:99], s[100:101], 0x8 ; C0B06508
v_mul_f32_e32 v35, v42, v34 ; 1046452A
v_floor_f32_e32 v35, v35 ; 7E464923
v_mad_f32 v52, v34, v42, -v35 ; D2820034 848E5522
v_mul_f32_e32 v35, v42, v38 ; 10464D2A
v_floor_f32_e32 v35, v35 ; 7E464923
v_mad_f32 v53, v38, v42, -v35 ; D2820035 848E5526
v_mul_f32_e32 v37, s44, v41 ; 104A522C
v_mad_f32 v35, v40, v49, v32 ; D2820023 04826328
v_mad_f32 v36, v40, v45, v33 ; D2820024 04865B28
v_mac_f32_e32 v32, v40, v44 ; 3E405928
v_mov_b32_e32 v43, v32 ; 7E560320
v_mov_b32_e32 v44, v33 ; 7E580321
v_mov_b32_e32 v45, v34 ; 7E5A0322
v_mov_b32_e32 v46, v35 ; 7E5C0323
v_mac_f32_e32 v33, v40, v49 ; 3E426328
v_mul_f32_e32 v40, v42, v28 ; 1050392A
v_floor_f32_e32 v40, v40 ; 7E504928
v_mad_f32 v40, v28, v42, -v40 ; D2820028 84A2551C
v_mad_f32 v41, -v47, v50, 1.0 ; D2820029 23CA652F
v_mul_f32_e32 v42, s40, v50 ; 10546428
v_mad_f32 v44, v41, v52, v42 ; D282002C 04AA6929
v_mad_f32 v45, v41, v53, v42 ; D282002D 04AA6B29
v_mac_f32_e32 v42, v41, v40 ; 3E545129
v_mul_f32_e32 v40, v48, v34 ; 10504530
v_floor_f32_e32 v40, v40 ; 7E504928
v_mad_f32 v34, v34, v48, -v40 ; D2820022 84A26122
v_mul_f32_e32 v40, v48, v38 ; 10504D30
v_floor_f32_e32 v40, v40 ; 7E504928
v_mad_f32 v38, v38, v48, -v40 ; D2820026 84A26126
v_mul_f32_e32 v40, v48, v28 ; 10503930
v_floor_f32_e32 v40, v40 ; 7E504928
v_mad_f32 v28, v28, v48, -v40 ; D282001C 84A2611C
v_mad_f32 v40, -v47, v51, 1.0 ; D2820028 23CA672F
v_mul_f32_e32 v47, s40, v51 ; 105E6628
v_mad_f32 v34, v40, v34, v47 ; D2820022 04BE4528
v_mad_f32 v38, v40, v38, v47 ; D2820026 04BE4D28
v_mac_f32_e32 v47, v40, v28 ; 3E5E3928
v_mad_f32 v48, v39, v44, v29 ; D2820030 04765927
v_mad_f32 v49, v39, v45, v30 ; D2820031 047A5B27
v_mac_f32_e32 v29, v39, v42 ; 3E3A5527
v_mad_f32 v40, v31, v34, v26 ; D2820028 046A451F
v_mad_f32 v41, v31, v38, v27 ; D2820029 046E4D1F
v_mac_f32_e32 v26, v31, v47 ; 3E345F1F
v_mov_b32_e32 v51, v29 ; 7E66031D
v_mov_b32_e32 v52, v30 ; 7E68031E
v_mov_b32_e32 v53, v31 ; 7E6A031F
v_mov_b32_e32 v54, v32 ; 7E6C0320
v_mac_f32_e32 v30, v39, v44 ; 3E3C5927
v_mov_b32_e32 v55, v26 ; 7E6E031A
v_mov_b32_e32 v56, v27 ; 7E70031B
v_mov_b32_e32 v57, v28 ; 7E72031C
v_mov_b32_e32 v58, v29 ; 7E74031D
s_load_dwordx4 s[44:47], s[100:101], 0x20 ; C0966520
s_load_dwordx8 s[64:71], s[6:7], 0x40 ; C0E00740
v_mac_f32_e32 v27, v31, v34 ; 3E36451F
v_mov_b32_e32 v44, v36 ; 7E580324
v_mov_b32_e32 v50, v37 ; 7E640325
v_mov_b32_e32 v52, v49 ; 7E680331
v_mov_b32_e32 v42, v37 ; 7E540325
v_mov_b32_e32 v56, v41 ; 7E700329
v_mov_b32_e32 v45, v37 ; 7E5A0325
v_mov_b32_e32 v34, v37 ; 7E440325
v_mov_b32_e32 v53, v37 ; 7E6A0325
v_mov_b32_e32 v31, v37 ; 7E3E0325
v_mov_b32_e32 v57, v37 ; 7E720325
v_mov_b32_e32 v28, v37 ; 7E380325
s_load_dwordx4 s[8:11], s[100:101], 0x18 ; C0846518
s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720
s_load_dwordx8 s[56:63], s[6:7], 0x30 ; C0DC0730
s_load_dwordx4 s[12:15], s[100:101], 0x10 ; C0866510
s_load_dwordx4 s[48:51], s[100:101], 0x24 ; C0986524
s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710
s_load_dwordx8 s[80:87], s[6:7], 0x48 ; C0E80748
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[64:71], s[44:47] ; F0900700 01703B23
s_load_dwordx4 s[52:55], s[100:101], 0x0 ; C09A6500
s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700
s_load_dwordx4 s[40:43], s[100:101], 0x1c ; C094651C
s_load_dwordx8 s[72:79], s[6:7], 0x38 ; C0E40738
image_sample_l v[62:64], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[56:63], s[8:11] ; F0900700 004E3E23
image_sample_l v[65:67], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[16:23], s[12:15] ; F0900700 00644123
s_load_dwordx4 s[0:3], s[100:101], 0xc ; C080650C
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v254, s0, 48 ; 05FD6000
v_writelane_b32 v254, s1, 49 ; 05FD6201
v_writelane_b32 v254, s2, 50 ; 05FD6402
v_writelane_b32 v254, s3, 51 ; 05FD6603
s_load_dwordx8 s[88:95], s[6:7], 0x18 ; C0EC0718
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s88, 52 ; 05FD6858
v_writelane_b32 v254, s89, 53 ; 05FD6A59
v_writelane_b32 v254, s90, 54 ; 05FD6C5A
v_writelane_b32 v254, s91, 55 ; 05FD6E5B
v_writelane_b32 v254, s92, 56 ; 05FD705C
v_writelane_b32 v254, s93, 57 ; 05FD725D
v_writelane_b32 v254, s94, 58 ; 05FD745E
v_writelane_b32 v254, s95, 59 ; 05FD765F
image_sample_l v[68:70], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[24:31], s[96:99] ; F0900700 03064423
s_load_dwordx4 s[0:3], s[100:101], 0x4 ; C0806504
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v254, s0, 34 ; 05FD4400
v_writelane_b32 v254, s1, 35 ; 05FD4601
v_writelane_b32 v254, s2, 36 ; 05FD4802
v_writelane_b32 v254, s3, 37 ; 05FD4A03
s_load_dwordx8 s[88:95], s[6:7], 0x8 ; C0EC0708
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v254, s88, 38 ; 05FD4C58
v_writelane_b32 v254, s89, 39 ; 05FD4E59
v_writelane_b32 v254, s90, 40 ; 05FD505A
v_writelane_b32 v254, s91, 41 ; 05FD525B
v_writelane_b32 v254, s92, 42 ; 05FD545C
v_writelane_b32 v254, s93, 43 ; 05FD565D
v_writelane_b32 v254, s94, 44 ; 05FD585E
v_writelane_b32 v254, s95, 45 ; 05FD5A5F
image_sample_l v[71:73], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[32:39], s[52:55] ; F0900700 01A84723
image_sample_l v[74:76], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[64:71], s[44:47] ; F0900700 01704A2B
image_sample_l v[77:79], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[56:63], s[8:11] ; F0900700 004E4D2B
image_sample_l v[80:82], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[16:23], s[12:15] ; F0900700 0064502B
image_sample_l v[83:85], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[24:31], s[96:99] ; F0900700 0306532B
image_sample_l v[86:88], 7, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[32:39], s[52:55] ; F0900700 01A8562B
image_sample_l v[89:91], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[64:71], s[44:47] ; F0900700 01705920
image_sample_l v[92:94], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[56:63], s[8:11] ; F0900700 004E5C20
image_sample_l v[95:97], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[16:23], s[12:15] ; F0900700 00645F20
image_sample_l v[98:100], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[24:31], s[96:99] ; F0900700 03066220
image_sample_l v[101:103], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[32:39], s[52:55] ; F0900700 01A86520
image_sample_l v[104:106], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[64:71], s[44:47] ; F0900700 01706830
image_sample_l v[107:109], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[56:63], s[8:11] ; F0900700 004E6B30
image_sample_l v[110:112], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[16:23], s[12:15] ; F0900700 00646E30
image_sample_l v[113:115], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[24:31], s[96:99] ; F0900700 03067130
image_sample_l v[116:118], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[32:39], s[52:55] ; F0900700 01A87430
image_sample_l v[119:121], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[64:71], s[44:47] ; F0900700 01707733
image_sample_l v[122:124], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[56:63], s[8:11] ; F0900700 004E7A33
image_sample_l v[125:127], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[16:23], s[12:15] ; F0900700 00647D33
image_sample_l v[128:130], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[24:31], s[96:99] ; F0900700 03068033
image_sample_l v[131:133], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[32:39], s[52:55] ; F0900700 01A88333
image_sample_l v[134:136], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[64:71], s[44:47] ; F0900700 0170861D
image_sample_l v[137:139], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[56:63], s[8:11] ; F0900700 004E891D
image_sample_l v[140:142], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[16:23], s[12:15] ; F0900700 00648C1D
image_sample_l v[143:145], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[24:31], s[96:99] ; F0900700 03068F1D
image_sample_l v[146:148], 7, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[32:39], s[52:55] ; F0900700 01A8921D
image_sample_l v[149:151], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[64:71], s[44:47] ; F0900700 01709528
image_sample_l v[152:154], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[56:63], s[8:11] ; F0900700 004E9828
image_sample_l v[155:157], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[16:23], s[12:15] ; F0900700 00649B28
image_sample_l v[158:160], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[24:31], s[96:99] ; F0900700 03069E28
image_sample_l v[161:163], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[32:39], s[52:55] ; F0900700 01A8A128
image_sample_l v[164:166], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[64:71], s[44:47] ; F0900700 0170A437
image_sample_l v[167:169], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[56:63], s[8:11] ; F0900700 004EA737
image_sample_l v[170:172], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[16:23], s[12:15] ; F0900700 0064AA37
s_load_dwordx4 s[88:91], s[100:101], 0x14 ; C0AC6514
s_load_dwordx8 s[0:7], s[6:7], 0x28 ; C0C00728
image_sample_l v[173:175], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[24:31], s[96:99] ; F0900700 0306AD37
image_sample_l v[176:178], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[32:39], s[52:55] ; F0900700 01A8B037
image_sample_l v[179:181], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[64:71], s[44:47] ; F0900700 0170B31A
image_sample_l v[182:184], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[56:63], s[8:11] ; F0900700 004EB61A
image_sample_l v[185:187], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[16:23], s[12:15] ; F0900700 0064B91A
image_sample_l v[188:190], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[24:31], s[96:99] ; F0900700 0306BC1A
image_sample_l v[191:193], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[32:39], s[52:55] ; F0900700 01A8BF1A
image_sample_l v[38:39], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[80:87], s[48:51] ; F0900A00 0194262B
image_sample_l v[194:195], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[72:79], s[40:43] ; F0900A00 0152C22B
s_waitcnt vmcnt(9) lgkmcnt(0) ; BF8C0079
image_sample_l v[196:197], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[0:7], s[88:91] ; F0900A00 02C0C42B
v_readlane_b32 s8, v254, 48 ; 021161FE
v_readlane_b32 s9, v254, 49 ; 021363FE
v_readlane_b32 s10, v254, 50 ; 021565FE
v_readlane_b32 s11, v254, 51 ; 021767FE
s_nop 2 ; BF800002
v_readlane_b32 s24, v254, 52 ; 023169FE
v_readlane_b32 s25, v254, 53 ; 02336BFE
v_readlane_b32 s26, v254, 54 ; 02356DFE
v_readlane_b32 s27, v254, 55 ; 02376FFE
v_readlane_b32 s28, v254, 56 ; 023971FE
v_readlane_b32 s29, v254, 57 ; 023B73FE
v_readlane_b32 s30, v254, 58 ; 023D75FE
v_readlane_b32 s31, v254, 59 ; 023F77FE
s_nop 2 ; BF800002
image_sample_l v[198:199], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[24:31], s[8:11] ; F0900A00 0046C62B
v_readlane_b32 s92, v254, 34 ; 02B945FE
v_readlane_b32 s93, v254, 35 ; 02BB47FE
v_readlane_b32 s94, v254, 36 ; 02BD49FE
v_readlane_b32 s95, v254, 37 ; 02BF4BFE
s_nop 2 ; BF800002
v_readlane_b32 s16, v254, 38 ; 02214DFE
v_readlane_b32 s17, v254, 39 ; 02234FFE
v_readlane_b32 s18, v254, 40 ; 022551FE
v_readlane_b32 s19, v254, 41 ; 022753FE
v_readlane_b32 s20, v254, 42 ; 022955FE
v_readlane_b32 s21, v254, 43 ; 022B57FE
v_readlane_b32 s22, v254, 44 ; 022D59FE
v_readlane_b32 s23, v254, 45 ; 022F5BFE
s_nop 2 ; BF800002
image_sample_l v[43:44], 10, 0, 0, 0, 0, 0, 0, 0, v[43:46], s[16:23], s[92:95] ; F0900A00 02E42B2B
image_sample_l v[45:46], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[80:87], s[48:51] ; F0900A00 01942D20
image_sample_l v[200:201], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[72:79], s[40:43] ; F0900A00 0152C820
image_sample_l v[202:203], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[0:7], s[88:91] ; F0900A00 02C0CA20
image_sample_l v[204:205], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[24:31], s[8:11] ; F0900A00 0046CC20
s_mov_b32 s12, s8 ; BE8C0308
s_mov_b32 s13, s9 ; BE8D0309
s_mov_b32 s14, s10 ; BE8E030A
s_mov_b32 s15, s11 ; BE8F030B
image_sample_l v[32:33], 10, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[16:23], s[92:95] ; F0900A00 02E42020
s_mov_b32 s8, s92 ; BE88035C
s_mov_b32 s9, s93 ; BE89035D
s_mov_b32 s10, s94 ; BE8A035E
s_mov_b32 s11, s95 ; BE8B035F
s_waitcnt vmcnt(9) ; BF8C0779
image_sample_l v[206:207], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[80:87], s[48:51] ; F0900A00 0194CE23
image_sample_l v[208:209], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[72:79], s[40:43] ; F0900A00 0152D023
image_sample_l v[210:211], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[0:7], s[88:91] ; F0900A00 02C0D223
image_sample_l v[212:213], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[24:31], s[12:15] ; F0900A00 0066D423
image_sample_l v[34:35], 10, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[16:23], s[8:11] ; F0900A00 00442223
image_sample_l v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[80:87], s[48:51] ; F0900A00 01942433
image_sample_l v[214:215], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[72:79], s[40:43] ; F0900A00 0152D633
image_sample_l v[216:217], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[0:7], s[88:91] ; F0900A00 02C0D833
image_sample_l v[218:219], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[24:31], s[12:15] ; F0900A00 0066DA33
image_sample_l v[51:52], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[16:23], s[8:11] ; F0900A00 00443333
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[53:54], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[80:87], s[48:51] ; F0900A00 0194351D
image_sample_l v[220:221], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[72:79], s[40:43] ; F0900A00 0152DC1D
image_sample_l v[222:223], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[0:7], s[88:91] ; F0900A00 02C0DE1D
image_sample_l v[224:225], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[24:31], s[12:15] ; F0900A00 0066E01D
image_sample_l v[29:30], 10, 0, 0, 0, 0, 0, 0, 0, v[29:32], s[16:23], s[8:11] ; F0900A00 00441D1D
s_waitcnt vmcnt(5) ; BF8C0775
image_sample_l v[226:227], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[80:87], s[48:51] ; F0900A00 0194E230
image_sample_l v[228:229], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[72:79], s[40:43] ; F0900A00 0152E430
image_sample_l v[230:231], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[0:7], s[88:91] ; F0900A00 02C0E630
image_sample_l v[232:233], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[24:31], s[12:15] ; F0900A00 0066E830
image_sample_l v[47:48], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[16:23], s[8:11] ; F0900A00 00442F30
image_sample_l v[49:50], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[80:87], s[48:51] ; F0900A00 01943137
image_sample_l v[234:235], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[72:79], s[40:43] ; F0900A00 0152EA37
image_sample_l v[236:237], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[0:7], s[88:91] ; F0900A00 02C0EC37
image_sample_l v[238:239], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[24:31], s[12:15] ; F0900A00 0066EE37
image_sample_l v[55:56], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[16:23], s[8:11] ; F0900A00 00443737
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[57:58], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[80:87], s[48:51] ; F0900A00 0194391A
image_sample_l v[240:241], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[72:79], s[40:43] ; F0900A00 0152F01A
image_sample_l v[242:243], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[0:7], s[88:91] ; F0900A00 02C0F21A
image_sample_l v[244:245], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[24:31], s[12:15] ; F0900A00 0066F41A
image_sample_l v[26:27], 10, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[16:23], s[8:11] ; F0900A00 00441A1A
image_sample_l v[246:247], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[80:87], s[48:51] ; F0900A00 0194F628
image_sample_l v[248:249], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[72:79], s[40:43] ; F0900A00 0152F828
image_sample_l v[250:251], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[0:7], s[88:91] ; F0900A00 02C0FA28
v_mov_b32_e32 v28, 0x40400000 ; 7E3802FF 40400000
v_cmp_eq_f32_e64 s[4:5], v18, v28 ; D0040004 00023912
v_cndmask_b32_e64 v18, 0, 1.0, s[4:5] ; D2000012 0011E480
v_readlane_b32 s0, v254, 28 ; 020139FE
v_readlane_b32 s1, v254, 29 ; 02033BFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v31, 0, 1.0, s[0:1] ; D200001F 0001E480
v_readlane_b32 s0, v254, 26 ; 020135FE
v_readlane_b32 s1, v254, 27 ; 020337FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v252, 0, 1.0, s[0:1] ; D20000FC 0001E480
v_cndmask_b32_e64 v253, 0, 1.0, vcc ; D20000FD 01A9E480
v_readlane_b32 s0, v254, 32 ; 020141FE
v_readlane_b32 s1, v254, 33 ; 020343FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v3, 0, 1.0, s[0:1] ; D2000003 0001E480
v_cmp_eq_f32_e64 s[4:5], v23, v28 ; D0040004 00023917
v_cndmask_b32_e64 v23, 0, 1.0, s[4:5] ; D2000017 0011E480
v_readlane_b32 s0, v254, 22 ; 02012DFE
v_readlane_b32 s1, v254, 23 ; 02032FFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v4, 0, 1.0, s[0:1] ; D2000004 0001E480
v_readlane_b32 s0, v254, 24 ; 020131FE
v_readlane_b32 s1, v254, 25 ; 020333FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v0, 0, 1.0, s[0:1] ; D2000000 0001E480
v_readlane_b32 s0, v254, 30 ; 02013DFE
v_readlane_b32 s1, v254, 31 ; 02033FFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v2, 0, 1.0, s[0:1] ; D2000002 0001E480
v_mul_f32_e32 v71, v253, v71 ; 108E8FFD
v_mul_f32_e32 v72, v253, v72 ; 109091FD
v_mul_f32_e32 v73, v253, v73 ; 109293FD
v_mac_f32_e32 v71, v252, v68 ; 3E8E89FC
v_mac_f32_e32 v72, v252, v69 ; 3E908BFC
v_mac_f32_e32 v73, v252, v70 ; 3E928DFC
v_mac_f32_e32 v71, v31, v65 ; 3E8E831F
v_mac_f32_e32 v72, v31, v66 ; 3E90851F
v_mac_f32_e32 v73, v31, v67 ; 3E92871F
v_mac_f32_e32 v71, v18, v62 ; 3E8E7D12
v_mac_f32_e32 v72, v18, v63 ; 3E907F12
v_mac_f32_e32 v73, v18, v64 ; 3E928112
v_mac_f32_e32 v71, v25, v59 ; 3E8E7719
v_mac_f32_e32 v72, v25, v60 ; 3E907919
v_mac_f32_e32 v73, v25, v61 ; 3E927B19
v_mul_f32_e32 v59, v253, v86 ; 1076ADFD
v_mul_f32_e32 v60, v253, v87 ; 1078AFFD
v_mul_f32_e32 v61, v253, v88 ; 107AB1FD
v_mac_f32_e32 v59, v252, v83 ; 3E76A7FC
v_mac_f32_e32 v60, v252, v84 ; 3E78A9FC
v_mac_f32_e32 v61, v252, v85 ; 3E7AABFC
v_mac_f32_e32 v59, v31, v80 ; 3E76A11F
v_mac_f32_e32 v60, v31, v81 ; 3E78A31F
v_mac_f32_e32 v61, v31, v82 ; 3E7AA51F
v_mac_f32_e32 v59, v18, v77 ; 3E769B12
v_mac_f32_e32 v60, v18, v78 ; 3E789D12
v_mac_f32_e32 v61, v18, v79 ; 3E7A9F12
v_mac_f32_e32 v59, v25, v74 ; 3E769519
v_mac_f32_e32 v60, v25, v75 ; 3E789719
v_mac_f32_e32 v61, v25, v76 ; 3E7A9919
v_mul_f32_e32 v62, v253, v101 ; 107CCBFD
v_mul_f32_e32 v63, v253, v102 ; 107ECDFD
v_mul_f32_e32 v64, v253, v103 ; 1080CFFD
v_mac_f32_e32 v62, v252, v98 ; 3E7CC5FC
v_mac_f32_e32 v63, v252, v99 ; 3E7EC7FC
v_mac_f32_e32 v64, v252, v100 ; 3E80C9FC
v_mac_f32_e32 v62, v31, v95 ; 3E7CBF1F
v_mac_f32_e32 v63, v31, v96 ; 3E7EC11F
v_mac_f32_e32 v64, v31, v97 ; 3E80C31F
v_mac_f32_e32 v62, v18, v92 ; 3E7CB912
v_mac_f32_e32 v63, v18, v93 ; 3E7EBB12
v_mac_f32_e32 v64, v18, v94 ; 3E80BD12
v_mac_f32_e32 v62, v25, v89 ; 3E7CB319
v_mac_f32_e32 v63, v25, v90 ; 3E7EB519
v_mac_f32_e32 v64, v25, v91 ; 3E80B719
v_mul_f32_e32 v65, v2, v116 ; 1082E902
v_mul_f32_e32 v66, v2, v117 ; 1084EB02
v_mul_f32_e32 v67, v2, v118 ; 1086ED02
v_mac_f32_e32 v65, v0, v113 ; 3E82E300
v_mac_f32_e32 v66, v0, v114 ; 3E84E500
v_mac_f32_e32 v67, v0, v115 ; 3E86E700
v_mac_f32_e32 v65, v4, v110 ; 3E82DD04
v_mac_f32_e32 v66, v4, v111 ; 3E84DF04
v_mac_f32_e32 v67, v4, v112 ; 3E86E104
v_mac_f32_e32 v65, v23, v107 ; 3E82D717
v_mac_f32_e32 v66, v23, v108 ; 3E84D917
v_mac_f32_e32 v67, v23, v109 ; 3E86DB17
v_mac_f32_e32 v65, v3, v104 ; 3E82D103
v_mac_f32_e32 v66, v3, v105 ; 3E84D303
v_mac_f32_e32 v67, v3, v106 ; 3E86D503
v_mul_f32_e32 v68, v2, v131 ; 10890702
v_mul_f32_e32 v69, v2, v132 ; 108B0902
v_mul_f32_e32 v70, v2, v133 ; 108D0B02
v_mac_f32_e32 v68, v0, v128 ; 3E890100
v_mac_f32_e32 v69, v0, v129 ; 3E8B0300
v_mac_f32_e32 v70, v0, v130 ; 3E8D0500
v_mac_f32_e32 v68, v4, v125 ; 3E88FB04
v_mac_f32_e32 v69, v4, v126 ; 3E8AFD04
v_mac_f32_e32 v70, v4, v127 ; 3E8CFF04
v_mac_f32_e32 v68, v23, v122 ; 3E88F517
v_mac_f32_e32 v69, v23, v123 ; 3E8AF717
v_mac_f32_e32 v70, v23, v124 ; 3E8CF917
v_mac_f32_e32 v68, v3, v119 ; 3E88EF03
v_mac_f32_e32 v69, v3, v120 ; 3E8AF103
v_mac_f32_e32 v70, v3, v121 ; 3E8CF303
v_mul_f32_e32 v74, v2, v146 ; 10952502
v_mul_f32_e32 v75, v2, v147 ; 10972702
v_mul_f32_e32 v76, v2, v148 ; 10992902
v_mac_f32_e32 v74, v0, v143 ; 3E951F00
v_mac_f32_e32 v75, v0, v144 ; 3E972100
v_mac_f32_e32 v76, v0, v145 ; 3E992300
v_mac_f32_e32 v74, v4, v140 ; 3E951904
v_mac_f32_e32 v75, v4, v141 ; 3E971B04
v_mac_f32_e32 v76, v4, v142 ; 3E991D04
v_mac_f32_e32 v74, v23, v137 ; 3E951317
v_mac_f32_e32 v75, v23, v138 ; 3E971517
v_mac_f32_e32 v76, v23, v139 ; 3E991717
v_mac_f32_e32 v74, v3, v134 ; 3E950D03
v_mac_f32_e32 v75, v3, v135 ; 3E970F03
v_mac_f32_e32 v76, v3, v136 ; 3E991103
v_readlane_b32 s0, v254, 20 ; 020129FE
v_readlane_b32 s1, v254, 21 ; 02032BFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v77, 0, 1.0, s[0:1] ; D200004D 0001E480
v_mul_f32_e32 v78, v77, v161 ; 109D434D
v_mul_f32_e32 v79, v77, v162 ; 109F454D
v_mul_f32_e32 v80, v77, v163 ; 10A1474D
v_readlane_b32 s0, v254, 18 ; 020125FE
v_readlane_b32 s1, v254, 19 ; 020327FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v81, 0, 1.0, s[0:1] ; D2000051 0001E480
v_mac_f32_e32 v78, v81, v158 ; 3E9D3D51
v_mac_f32_e32 v79, v81, v159 ; 3E9F3F51
v_mac_f32_e32 v80, v81, v160 ; 3EA14151
v_readlane_b32 s0, v254, 16 ; 020121FE
v_readlane_b32 s1, v254, 17 ; 020323FE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v82, 0, 1.0, s[0:1] ; D2000052 0001E480
v_mac_f32_e32 v78, v82, v155 ; 3E9D3752
v_mac_f32_e32 v79, v82, v156 ; 3E9F3952
v_mac_f32_e32 v80, v82, v157 ; 3EA13B52
v_cmp_eq_f32_e64 s[0:1], v24, v28 ; D0040000 00023918
v_cndmask_b32_e64 v24, 0, 1.0, s[0:1] ; D2000018 0001E480
v_mac_f32_e32 v78, v24, v152 ; 3E9D3118
v_mac_f32_e32 v79, v24, v153 ; 3E9F3318
v_mac_f32_e32 v80, v24, v154 ; 3EA13518
v_readlane_b32 s0, v254, 14 ; 02011DFE
v_readlane_b32 s1, v254, 15 ; 02031FFE
s_nop 2 ; BF800002
v_cndmask_b32_e64 v83, 0, 1.0, s[0:1] ; D2000053 0001E480
v_mac_f32_e32 v78, v83, v149 ; 3E9D2B53
v_mac_f32_e32 v79, v83, v150 ; 3E9F2D53
v_mac_f32_e32 v80, v83, v151 ; 3EA12F53
v_mul_f32_e32 v84, v77, v176 ; 10A9614D
v_mul_f32_e32 v85, v77, v177 ; 10AB634D
v_mul_f32_e32 v86, v77, v178 ; 10AD654D
v_mac_f32_e32 v84, v81, v173 ; 3EA95B51
v_mac_f32_e32 v85, v81, v174 ; 3EAB5D51
v_mac_f32_e32 v86, v81, v175 ; 3EAD5F51
v_mac_f32_e32 v84, v82, v170 ; 3EA95552
v_mac_f32_e32 v85, v82, v171 ; 3EAB5752
v_mac_f32_e32 v86, v82, v172 ; 3EAD5952
v_mac_f32_e32 v84, v24, v167 ; 3EA94F18
v_mac_f32_e32 v85, v24, v168 ; 3EAB5118
v_mac_f32_e32 v86, v24, v169 ; 3EAD5318
v_mac_f32_e32 v84, v83, v164 ; 3EA94953
v_mac_f32_e32 v85, v83, v165 ; 3EAB4B53
v_mac_f32_e32 v86, v83, v166 ; 3EAD4D53
v_mul_f32_e32 v87, v77, v191 ; 10AF7F4D
v_mul_f32_e32 v88, v77, v192 ; 10B1814D
v_mul_f32_e32 v89, v77, v193 ; 10B3834D
v_mac_f32_e32 v87, v81, v188 ; 3EAF7951
v_mac_f32_e32 v88, v81, v189 ; 3EB17B51
v_mac_f32_e32 v89, v81, v190 ; 3EB37D51
v_mac_f32_e32 v87, v82, v185 ; 3EAF7352
v_mac_f32_e32 v88, v82, v186 ; 3EB17552
v_mac_f32_e32 v89, v82, v187 ; 3EB37752
v_mac_f32_e32 v87, v24, v182 ; 3EAF6D18
v_mac_f32_e32 v88, v24, v183 ; 3EB16F18
v_mac_f32_e32 v89, v24, v184 ; 3EB37118
v_mac_f32_e32 v87, v83, v179 ; 3EAF6753
v_mac_f32_e32 v88, v83, v180 ; 3EB16953
v_mac_f32_e32 v89, v83, v181 ; 3EB36B53
v_mul_f32_e32 v43, v253, v43 ; 105657FD
v_mul_f32_e32 v44, v253, v44 ; 105859FD
v_mac_f32_e32 v43, v252, v198 ; 3E578DFC
v_mac_f32_e32 v44, v252, v199 ; 3E598FFC
v_mul_f32_e32 v32, v253, v32 ; 104041FD
v_mul_f32_e32 v33, v253, v33 ; 104243FD
v_mac_f32_e32 v32, v252, v204 ; 3E4199FC
v_mac_f32_e32 v33, v252, v205 ; 3E439BFC
v_mul_f32_e32 v34, v253, v34 ; 104445FD
v_mul_f32_e32 v35, v253, v35 ; 104647FD
v_mac_f32_e32 v34, v252, v212 ; 3E45A9FC
v_mac_f32_e32 v35, v252, v213 ; 3E47ABFC
v_mac_f32_e32 v43, v31, v196 ; 3E57891F
v_mac_f32_e32 v44, v31, v197 ; 3E598B1F
v_mac_f32_e32 v32, v31, v202 ; 3E41951F
v_mac_f32_e32 v33, v31, v203 ; 3E43971F
v_mac_f32_e32 v34, v31, v210 ; 3E45A51F
v_mac_f32_e32 v35, v31, v211 ; 3E47A71F
v_mac_f32_e32 v43, v18, v194 ; 3E578512
v_mac_f32_e32 v44, v18, v195 ; 3E598712
v_mac_f32_e32 v32, v18, v200 ; 3E419112
v_mac_f32_e32 v33, v18, v201 ; 3E439312
v_mac_f32_e32 v34, v18, v208 ; 3E45A112
v_mac_f32_e32 v35, v18, v209 ; 3E47A312
v_mac_f32_e32 v43, v25, v38 ; 3E564D19
v_mac_f32_e32 v44, v25, v39 ; 3E584F19
v_mac_f32_e32 v32, v25, v45 ; 3E405B19
v_mac_f32_e32 v33, v25, v46 ; 3E425D19
v_mac_f32_e32 v34, v25, v206 ; 3E459D19
v_mac_f32_e32 v35, v25, v207 ; 3E479F19
v_mul_f32_e32 v18, v2, v51 ; 10246702
v_mul_f32_e32 v25, v2, v52 ; 10326902
v_mac_f32_e32 v18, v0, v218 ; 3E25B500
v_mac_f32_e32 v25, v0, v219 ; 3E33B700
v_mul_f32_e32 v29, v2, v29 ; 103A3B02
v_mul_f32_e32 v30, v2, v30 ; 103C3D02
v_mac_f32_e32 v29, v0, v224 ; 3E3BC100
v_mac_f32_e32 v30, v0, v225 ; 3E3DC300
s_waitcnt vmcnt(13) ; BF8C077D
v_mul_f32_e32 v31, v2, v47 ; 103E5F02
v_mul_f32_e32 v2, v2, v48 ; 10046102
v_mac_f32_e32 v31, v0, v232 ; 3E3FD100
v_mac_f32_e32 v2, v0, v233 ; 3E05D300
v_mac_f32_e32 v18, v4, v216 ; 3E25B104
v_mac_f32_e32 v25, v4, v217 ; 3E33B304
v_mac_f32_e32 v29, v4, v222 ; 3E3BBD04
v_mac_f32_e32 v30, v4, v223 ; 3E3DBF04
v_mac_f32_e32 v31, v4, v230 ; 3E3FCD04
v_mac_f32_e32 v2, v4, v231 ; 3E05CF04
v_mac_f32_e32 v18, v23, v214 ; 3E25AD17
v_mac_f32_e32 v25, v23, v215 ; 3E33AF17
v_mac_f32_e32 v29, v23, v220 ; 3E3BB917
v_mac_f32_e32 v30, v23, v221 ; 3E3DBB17
v_mac_f32_e32 v31, v23, v228 ; 3E3FC917
v_mac_f32_e32 v2, v23, v229 ; 3E05CB17
v_mac_f32_e32 v18, v3, v36 ; 3E244903
v_mac_f32_e32 v25, v3, v37 ; 3E324B03
v_mac_f32_e32 v29, v3, v53 ; 3E3A6B03
v_mac_f32_e32 v30, v3, v54 ; 3E3C6D03
v_mac_f32_e32 v31, v3, v226 ; 3E3FC503
v_mac_f32_e32 v2, v3, v227 ; 3E05C703
s_waitcnt vmcnt(8) ; BF8C0778
v_mul_f32_e32 v0, v77, v55 ; 10006F4D
v_mul_f32_e32 v3, v77, v56 ; 1006714D
v_mac_f32_e32 v0, v81, v238 ; 3E01DD51
v_mac_f32_e32 v3, v81, v239 ; 3E07DF51
s_waitcnt vmcnt(3) ; BF8C0773
v_mul_f32_e32 v4, v77, v26 ; 1008354D
v_mul_f32_e32 v23, v77, v27 ; 102E374D
v_mac_f32_e32 v4, v81, v244 ; 3E09E951
v_mac_f32_e32 v23, v81, v245 ; 3E2FEB51
image_sample_l v[26:27], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[24:31], s[12:15] ; F0900A00 00661A28
image_sample_l v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[16:23], s[8:11] ; F0900A00 00442428
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v36, v77, v36 ; 1048494D
v_mul_f32_e32 v37, v77, v37 ; 104A4B4D
v_mac_f32_e32 v36, v81, v26 ; 3E483551
v_mac_f32_e32 v37, v81, v27 ; 3E4A3751
v_mac_f32_e32 v0, v82, v236 ; 3E01D952
v_mac_f32_e32 v3, v82, v237 ; 3E07DB52
v_mac_f32_e32 v4, v82, v242 ; 3E09E552
v_mac_f32_e32 v23, v82, v243 ; 3E2FE752
v_mac_f32_e32 v36, v82, v250 ; 3E49F552
v_mac_f32_e32 v37, v82, v251 ; 3E4BF752
v_mac_f32_e32 v0, v24, v234 ; 3E01D518
v_mac_f32_e32 v3, v24, v235 ; 3E07D718
v_mac_f32_e32 v4, v24, v240 ; 3E09E118
v_mac_f32_e32 v23, v24, v241 ; 3E2FE318
v_mac_f32_e32 v36, v24, v248 ; 3E49F118
v_mac_f32_e32 v37, v24, v249 ; 3E4BF318
v_mac_f32_e32 v0, v83, v49 ; 3E006353
v_mac_f32_e32 v3, v83, v50 ; 3E066553
v_mul_f32_e32 v21, v22, v21 ; 102A2B16
v_mul_f32_e32 v20, v22, v20 ; 10282916
v_mul_f32_e32 v19, v22, v19 ; 10262716
v_mov_b32_e32 v22, 0x3c23d70a ; 7E2C02FF 3C23D70A
v_max_f32_e32 v21, v22, v21 ; 202A2B16
v_max_f32_e32 v20, v22, v20 ; 20282916
v_max_f32_e32 v19, v22, v19 ; 20262716
v_add_f32_e32 v22, v20, v21 ; 062C2B14
v_add_f32_e32 v22, v19, v22 ; 062C2D13
v_rcp_f32_e32 v22, v22 ; 7E2C5516
v_mac_f32_e32 v4, v83, v57 ; 3E087353
v_mac_f32_e32 v23, v83, v58 ; 3E2E7553
v_mac_f32_e32 v36, v83, v246 ; 3E49ED53
v_mac_f32_e32 v37, v83, v247 ; 3E4BEF53
v_mul_f32_e32 v21, v22, v21 ; 102A2B16
v_mul_f32_e32 v20, v22, v20 ; 10282916
v_mul_f32_e32 v19, v22, v19 ; 10262716
v_mul_f32_e32 v22, v19, v78 ; 102C9D13
v_mac_f32_e32 v22, v21, v84 ; 3E2CA915
v_mul_f32_e32 v24, v19, v79 ; 10309F13
v_mac_f32_e32 v24, v21, v85 ; 3E30AB15
v_mul_f32_e32 v26, v19, v80 ; 1034A113
v_mac_f32_e32 v26, v21, v86 ; 3E34AD15
v_mac_f32_e32 v22, v20, v87 ; 3E2CAF14
v_mac_f32_e32 v24, v20, v88 ; 3E30B114
v_mac_f32_e32 v26, v20, v89 ; 3E34B314
v_mul_f32_e32 v27, v19, v65 ; 10368313
v_mac_f32_e32 v27, v21, v68 ; 3E368915
v_mul_f32_e32 v38, v19, v66 ; 104C8513
v_mac_f32_e32 v38, v21, v69 ; 3E4C8B15
v_mul_f32_e32 v39, v19, v67 ; 104E8713
v_mac_f32_e32 v39, v21, v70 ; 3E4E8D15
v_mac_f32_e32 v27, v20, v74 ; 3E369514
v_mac_f32_e32 v38, v20, v75 ; 3E4C9714
v_mac_f32_e32 v39, v20, v76 ; 3E4E9914
v_mul_f32_e32 v40, v19, v71 ; 10508F13
v_mac_f32_e32 v40, v21, v59 ; 3E507715
v_mul_f32_e32 v41, v19, v72 ; 10529113
v_mac_f32_e32 v41, v21, v60 ; 3E527915
v_mul_f32_e32 v42, v19, v73 ; 10549313
v_mac_f32_e32 v42, v21, v61 ; 3E547B15
v_mac_f32_e32 v40, v20, v62 ; 3E507D14
v_mac_f32_e32 v41, v20, v63 ; 3E527F14
v_mac_f32_e32 v42, v20, v64 ; 3E548114
v_mad_f32 v43, 2.0, v43, -1.0 ; D282002B 03CE56F4
v_mad_f32 v33, 2.0, v33, -1.0 ; D2820021 03CE42F4
v_mul_f32_e32 v43, v21, v43 ; 10565715
v_mac_f32_e32 v43, v20, v33 ; 3E564314
v_mad_f32 v32, 2.0, v32, -1.0 ; D2820020 03CE40F4
v_mad_f32 v33, 2.0, v35, -1.0 ; D2820021 03CE46F4
v_mul_f32_e32 v35, 0, v21 ; 10462A80
v_mad_f32 v32, v20, v32, v35 ; D2820020 048E4114
v_mac_f32_e32 v32, v19, v33 ; 3E404313
v_mad_f32 v33, 2.0, v44, -1.0 ; D2820021 03CE58F4
v_mad_f32 v34, 2.0, v34, -1.0 ; D2820022 03CE44F4
v_mul_f32_e32 v33, v21, v33 ; 10424315
v_mac_f32_e32 v33, 0, v20 ; 3E422880
v_mac_f32_e32 v33, v19, v34 ; 3E424513
v_mad_f32 v18, 2.0, v18, -1.0 ; D2820012 03CE24F4
v_mad_f32 v30, 2.0, v30, -1.0 ; D282001E 03CE3CF4
v_mul_f32_e32 v18, v21, v18 ; 10242515
v_mac_f32_e32 v18, v20, v30 ; 3E243D14
v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4
v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4
v_mad_f32 v29, v20, v29, v35 ; D282001D 048E3B14
v_mac_f32_e32 v29, v19, v2 ; 3E3A0513
v_mad_f32 v2, 2.0, v25, -1.0 ; D2820002 03CE32F4
v_mad_f32 v25, 2.0, v31, -1.0 ; D2820019 03CE3EF4
v_mul_f32_e32 v2, v21, v2 ; 10040515
v_mac_f32_e32 v2, 0, v20 ; 3E042880
v_mac_f32_e32 v2, v19, v25 ; 3E043313
v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
v_mul_f32_e32 v3, v21, v3 ; 10060715
v_mul_f32_e32 v0, v21, v0 ; 10000115
v_mad_f32 v4, 2.0, v4, -1.0 ; D2820004 03CE08F4
v_mac_f32_e32 v35, v20, v4 ; 3E460914
v_mad_f32 v4, 2.0, v23, -1.0 ; D2820004 03CE2EF4
v_mac_f32_e32 v0, v20, v4 ; 3E000914
v_mac_f32_e32 v3, 0, v20 ; 3E062880
v_mad_f32 v4, 2.0, v37, -1.0 ; D2820004 03CE4AF4
v_mac_f32_e32 v35, v19, v4 ; 3E460913
v_mad_f32 v4, 2.0, v36, -1.0 ; D2820004 03CE48F4
v_mac_f32_e32 v3, v19, v4 ; 3E060913
v_mac_f32_e32 v43, 0, v19 ; 3E562680
v_mac_f32_e32 v18, 0, v19 ; 3E242680
v_mac_f32_e32 v0, 0, v19 ; 3E002680
v_mul_f32_e32 v4, v40, v8 ; 10081128
v_mul_f32_e32 v19, v41, v8 ; 10261129
v_mul_f32_e32 v20, v42, v8 ; 1028112A
v_mul_f32_e32 v21, v32, v8 ; 102A1120
v_mul_f32_e32 v23, v33, v8 ; 102E1121
v_mul_f32_e32 v8, v43, v8 ; 1010112B
v_mac_f32_e32 v21, v29, v7 ; 3E2A0F1D
v_mac_f32_e32 v23, v2, v7 ; 3E2E0F02
v_mac_f32_e32 v8, v18, v7 ; 3E100F12
v_mac_f32_e32 v21, v35, v6 ; 3E2A0D23
v_mac_f32_e32 v23, v3, v6 ; 3E2E0D03
v_mac_f32_e32 v8, v0, v6 ; 3E100D00
v_mul_f32_e32 v0, v23, v23 ; 10002F17
v_mac_f32_e32 v0, v21, v21 ; 3E002B15
v_mac_f32_e32 v0, v8, v8 ; 3E001108
v_add_f32_e32 v0, 1.0, v0 ; 060000F2
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mac_f32_e32 v4, v27, v7 ; 3E080F1B
v_mac_f32_e32 v19, v38, v7 ; 3E260F26
v_mac_f32_e32 v20, v39, v7 ; 3E280F27
v_mul_f32_e32 v2, v0, v21 ; 10042B00
v_mul_f32_e32 v3, v0, v23 ; 10062F00
v_mul_f32_e32 v0, v0, v8 ; 10001100
v_mad_f32 v2, -v2, v5, v9 ; D2820002 24260B02
v_mad_f32 v3, -v3, v5, v10 ; D2820003 242A0B03
v_mad_f32 v0, -v0, v5, v11 ; D2820000 242E0B00
v_mac_f32_e32 v4, v22, v6 ; 3E080D16
v_mac_f32_e32 v19, v24, v6 ; 3E260D18
v_mul_f32_e32 v5, v2, v2 ; 100A0502
v_mac_f32_e32 v5, v3, v3 ; 3E0A0703
v_mac_f32_e32 v5, v0, v0 ; 3E0A0100
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v7, v16, v16 ; 100E2110
v_mac_f32_e32 v7, v17, v17 ; 3E0E2311
v_mac_f32_e32 v7, v12, v12 ; 3E0E190C
v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907
v_mac_f32_e32 v20, v26, v6 ; 3E280D1A
v_mul_f32_e32 v2, v5, v2 ; 10040505
v_mul_f32_e32 v3, v5, v3 ; 10060705
v_mul_f32_e32 v6, v7, v16 ; 100C2107
v_mul_f32_e32 v8, v7, v17 ; 10102307
v_mul_f32_e32 v6, v6, v2 ; 100C0506
v_mac_f32_e32 v6, v8, v3 ; 3E0C0708
v_mul_f32_e32 v2, v15, v2 ; 1004050F
v_mac_f32_e32 v2, v14, v3 ; 3E04070E
v_mul_f32_e32 v0, v5, v0 ; 10000105
v_mul_f32_e32 v3, v7, v12 ; 10061907
v_mac_f32_e32 v6, v3, v0 ; 3E0C0103
v_mac_f32_e32 v2, v13, v0 ; 3E04010D
v_readlane_b32 s0, v254, 13 ; 02011BFE
s_nop 2 ; BF800002
v_add_f32_e64 v0, s0, s0 ; D2060000 00000000
v_readlane_b32 s0, v254, 8 ; 020111FE
s_nop 2 ; BF800002
v_max_f32_e32 v0, s0, v0 ; 20000000
v_readlane_b32 s0, v254, 11 ; 020117FE
s_nop 2 ; BF800002
v_add_f32_e64 v3, s0, s0 ; D2060003 00000000
v_readlane_b32 s0, v254, 9 ; 020113FE
s_nop 2 ; BF800002
v_max_f32_e32 v3, s0, v3 ; 20060600
v_readlane_b32 s0, v254, 12 ; 020119FE
s_nop 2 ; BF800002
v_add_f32_e64 v5, s0, s0 ; D2060005 00000000
v_readlane_b32 s0, v254, 10 ; 020115FE
s_nop 2 ; BF800002
v_max_f32_e32 v5, s0, v5 ; 200A0A00
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_min_f32_e32 v0, 1.0, v0 ; 1E0000F2
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_readlane_b32 s1, v254, 46 ; 02035DFE
s_nop 2 ; BF800002
v_mul_f32_e32 v7, s1, v4 ; 100E0801
v_mac_f32_e32 v0, v2, v7 ; 3E000F02
v_min_f32_e32 v3, 1.0, v3 ; 1E0606F2
v_mul_f32_e32 v3, v19, v3 ; 10060713
v_readlane_b32 s2, v254, 47 ; 02055FFE
s_nop 2 ; BF800002
v_mul_f32_e32 v7, s2, v19 ; 100E2602
v_mac_f32_e32 v3, v2, v7 ; 3E060F02
v_max_f32_e32 v6, 0x38d1b717, v6 ; 200C0CFF 38D1B717
v_log_f32_e32 v6, v6 ; 7E0C4F06
v_min_f32_e32 v5, 1.0, v5 ; 1E0A0AF2
v_mul_f32_e32 v5, v20, v5 ; 100A0B14
v_readlane_b32 s0, v254, 7 ; 02010FFE
s_nop 2 ; BF800002
v_mul_f32_e32 v7, s0, v20 ; 100E2800
v_mac_f32_e32 v5, v2, v7 ; 3E0A0F02
s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF 00000000
s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF 00000000
s_mov_b32 s6, -1 ; BE8603C1
s_mov_b32 s7, 0x80f000 ; BE8703FF 0080F000
buffer_load_dword v7, s[4:7], s12 ; E0300000 0C010700
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v2, 0x42000000, v7 ; 10040EFF 42000000
v_mul_legacy_f32_e32 v2, v2, v6 ; 0E040D02
v_exp_f32_e32 v2, v2 ; 7E044B02
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_mac_f32_e32 v28, -2.0, v2 ; 3E3804F5
v_mul_f32_e32 v6, v28, v2 ; 100C051C
v_mul_f32_e32 v2, v6, v2 ; 10040506
v_mul_f32_e32 v2, v7, v2 ; 10040507
v_mac_f32_e32 v0, s1, v2 ; 3E000401
v_mac_f32_e32 v3, s2, v2 ; 3E060402
v_mac_f32_e32 v5, s0, v2 ; 3E0A0400
v_mul_f32_e32 v0, 0.5, v0 ; 100000F0
v_mul_f32_e32 v2, 0.5, v3 ; 100406F0
v_mul_f32_e32 v3, 0.5, v5 ; 10060AF0
s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF 00000000
s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF 00000000
s_mov_b32 s6, -1 ; BE8603C1
s_mov_b32 s7, 0x80f000 ; BE8703FF 0080F000
buffer_load_dword v5, s[4:7], s12 offset:4 ; E0300004 0C010500
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, v5, v4 ; 3E000905
s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF 00000000
s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF 00000000
s_mov_b32 s6, -1 ; BE8603C1
s_mov_b32 s7, 0x80f000 ; BE8703FF 0080F000
buffer_load_dword v4, s[4:7], s12 offset:8 ; E0300008 0C010400
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v2, v4, v19 ; 3E042704
s_mov_b32 s4, SCRATCH_RSRC_DWORD0 ; BE8403FF 00000000
s_mov_b32 s5, SCRATCH_RSRC_DWORD1 ; BE8503FF 00000000
s_mov_b32 s6, -1 ; BE8603C1
s_mov_b32 s7, 0x80f000 ; BE8703FF 0080F000
buffer_load_dword v4, s[4:7], s12 offset:12 ; E030000C 0C010400
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v3, v4, v20 ; 3E062904
v_sub_f32_e32 v4, 1.0, v1 ; 080802F2
v_readlane_b32 s0, v254, 6 ; 02010DFE
s_nop 2 ; BF800002
v_mul_f32_e32 v5, s0, v4 ; 100A0800
v_mac_f32_e32 v5, v0, v1 ; 3E0A0300
v_readlane_b32 s0, v254, 5 ; 02010BFE
s_nop 2 ; BF800002
v_mul_f32_e32 v0, s0, v4 ; 10000800
v_mac_f32_e32 v0, v2, v1 ; 3E000302
v_readlane_b32 s0, v254, 4 ; 020109FE
s_nop 2 ; BF800002
v_mul_f32_e32 v2, s0, v4 ; 10040800
v_mac_f32_e32 v2, v3, v1 ; 3E040303
v_cvt_pkrtz_f16_f32_e32 v0, v5, v0 ; 5E000105
v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 256
Code Size: 5240 bytes
LDS: 0 blocks
Scratch: 18432 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..9]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx
5: MOV TEMP[2].x, TEMP[1].xxxx
6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
7: MOV TEMP[2].y, TEMP[3].xxxx
8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
9: MOV TEMP[1].zw, TEMP[0].wwzw
10: MUL TEMP[2], CONST[6], IN[0].xxxx
11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2]
13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2]
14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz
15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz
16: MOV OUT[1], TEMP[1]
17: MOV OUT[2], TEMP[2]
18: MOV OUT[0], TEMP[0]
19: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = add i32 %5, %7
%49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48)
%50 = extractelement <4 x float> %49, i32 0
%51 = extractelement <4 x float> %49, i32 1
%52 = extractelement <4 x float> %49, i32 2
%53 = extractelement <4 x float> %49, i32 3
%54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = add i32 %5, %7
%57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = fmul float %15, %50
%62 = fmul float %16, %50
%63 = fmul float %17, %50
%64 = fmul float %18, %50
%65 = fmul float %19, %51
%66 = fadd float %65, %61
%67 = fmul float %20, %51
%68 = fadd float %67, %62
%69 = fmul float %21, %51
%70 = fadd float %69, %63
%71 = fmul float %22, %51
%72 = fadd float %71, %64
%73 = fmul float %23, %52
%74 = fadd float %73, %66
%75 = fmul float %24, %52
%76 = fadd float %75, %68
%77 = fmul float %25, %52
%78 = fadd float %77, %70
%79 = fmul float %26, %52
%80 = fadd float %79, %72
%81 = fmul float %27, %53
%82 = fadd float %81, %74
%83 = fmul float %28, %53
%84 = fadd float %83, %76
%85 = fmul float %29, %53
%86 = fadd float %85, %78
%87 = fmul float %30, %53
%88 = fadd float %87, %80
%89 = fmul float %82, 5.000000e-01
%90 = fmul float %84, 5.000000e-01
%91 = fmul float %88, 5.000000e-01
%92 = fmul float %90, %13
%93 = fadd float %89, %91
%94 = fadd float %92, %91
%95 = fmul float %31, %50
%96 = fmul float %32, %50
%97 = fmul float %33, %50
%98 = fmul float %34, %50
%99 = fmul float %35, %51
%100 = fadd float %99, %95
%101 = fmul float %36, %51
%102 = fadd float %101, %96
%103 = fmul float %37, %51
%104 = fadd float %103, %97
%105 = fmul float %38, %51
%106 = fadd float %105, %98
%107 = fmul float %39, %52
%108 = fadd float %107, %100
%109 = fmul float %40, %52
%110 = fadd float %109, %102
%111 = fmul float %41, %52
%112 = fadd float %111, %104
%113 = fmul float %42, %52
%114 = fadd float %113, %106
%115 = fmul float %43, %53
%116 = fadd float %115, %108
%117 = fmul float %44, %53
%118 = fadd float %117, %110
%119 = fmul float %45, %53
%120 = fadd float %119, %112
%121 = fsub float -0.000000e+00, %116
%122 = fsub float -0.000000e+00, %118
%123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121)
%124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122)
%125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xd ; C203810D
s_buffer_load_dword s8, s[0:3], 0xa ; C204010A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
s_buffer_load_dword s4, s[0:3], 0xe ; C202010E
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v8, s6, v1 ; 10100206
v_mac_f32_e32 v8, s7, v2 ; 3E100407
s_buffer_load_dword s5, s[0:3], 0xb ; C202810B
v_mul_f32_e32 v9, s8, v1 ; 10120208
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C
s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119
s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D
s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A
s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v9, s4, v2 ; 3E120404
s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B
v_mul_f32_e32 v10, s5, v1 ; 10140205
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_mul_f32_e32 v11, s6, v1 ; 10160206
v_mac_f32_e32 v11, s7, v2 ; 3E160407
v_mul_f32_e32 v12, s8, v1 ; 10180208
v_mac_f32_e32 v12, s10, v2 ; 3E18040A
v_mul_f32_e32 v13, s11, v1 ; 101A020B
v_mac_f32_e32 v13, s12, v2 ; 3E1A040C
s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s4, v1 ; 10020204
s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110
s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111
s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112
s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113
s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120
s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123
v_mac_f32_e32 v1, s5, v2 ; 3E020405
s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s4, v3 ; 3E000604
v_mac_f32_e32 v8, s6, v3 ; 3E100606
v_mac_f32_e32 v9, s7, v3 ; 3E120607
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mac_f32_e32 v11, s9, v3 ; 3E160609
v_mac_f32_e32 v12, s10, v3 ; 3E18060A
s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115
s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116
s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117
s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124
s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125
s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126
v_mac_f32_e32 v13, s11, v3 ; 3E1A060B
s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104
v_mac_f32_e32 v1, s12, v3 ; 3E02060C
v_mac_f32_e32 v0, s5, v4 ; 3E000805
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v8, s4, v4 ; 3E100804
v_mac_f32_e32 v9, s6, v4 ; 3E120806
v_mac_f32_e32 v10, s7, v4 ; 3E140807
v_mac_f32_e32 v11, s8, v4 ; 3E160808
v_mac_f32_e32 v12, s9, v4 ; 3E180809
v_mac_f32_e32 v13, s10, v4 ; 3E1A080A
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2
v_mul_f32_e32 v3, v11, v2 ; 1006050B
v_mul_f32_e32 v4, v12, v2 ; 1008050C
v_mul_f32_e32 v2, v13, v2 ; 1004050D
v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B
v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B
v_mac_f32_e32 v2, s11, v7 ; 3E040E0B
v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0
v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0
v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00
exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607
exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403
exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Code Size: 408 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[6..12]
DCL TEMP[0..9], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000}
IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000}
0: RCP TEMP[0].x, IN[0].wwww
1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx
2: RCP TEMP[1].x, IN[1].zzzz
3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx
4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
5: MOV TEMP[3].xy, TEMP[0].xyyy
6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D
7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy
8: RCP TEMP[3].x, TEMP[3].xxxx
9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx
10: MUL TEMP[2], CONST[9], TEMP[1].xxxx
11: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[11], TEMP[1].zzzz, TEMP[2]
13: ADD TEMP[2].xyz, TEMP[2], CONST[12]
14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[3].xyzz
15: ADD TEMP[4].xyz, TEMP[2].xyzz, -CONST[6].xyzz
16: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
17: RSQ TEMP[5].x, TEMP[5].xxxx
18: MUL TEMP[5].xyz, TEMP[4].xyzz, TEMP[5].xxxx
19: MOV TEMP[5].xyz, -TEMP[5].xyzx
20: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[4].xyzz
21: MUL TEMP[4].x, TEMP[4].xxxx, CONST[6].wwww
22: MOV TEMP[4].xy, TEMP[4].xxxx
23: TEX TEMP[4].w, TEMP[4], SAMP[1], 2D
24: MOV TEMP[0].xy, TEMP[0].xyyy
25: TEX TEMP[0], TEMP[0], SAMP[2], 2D
26: MAD TEMP[6].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz
27: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz
28: RSQ TEMP[7].x, TEMP[7].xxxx
29: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx
30: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[6].xyzz
31: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx
32: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[4].wwww
33: MUL TEMP[7].xyz, CONST[7].xyzz, TEMP[7].xxxx
34: MUL TEMP[8].xyz, CONST[7].xyzz, CONST[4].xyzz
35: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz
36: DP3 TEMP[9].x, TEMP[2].xyzz, TEMP[2].xyzz
37: RSQ TEMP[9].x, TEMP[9].xxxx
38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[9].xxxx
39: ADD TEMP[2].xyz, TEMP[5].xyzz, -TEMP[2].xyzz
40: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
41: RSQ TEMP[5].x, TEMP[5].xxxx
42: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
43: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[6].xyzz
44: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx
45: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx
46: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx
47: MOV_SAT TEMP[2].x, TEMP[4].wwww
48: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
49: ADD TEMP[2].x, TEMP[8].xxxx, TEMP[8].zzzz
50: MUL TEMP[2].x, TEMP[8].yyyy, TEMP[2].xxxx
51: SQRT TEMP[2].x, TEMP[2].xxxx
52: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
53: ADD TEMP[4].x, TEMP[8].xxxx, TEMP[8].yyyy
54: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[8].zzzz
55: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].wwww, TEMP[4].xxxx
56: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
57: MOV TEMP[7].w, TEMP[0].xxxx
58: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz
59: SQRT TEMP[0].x, TEMP[0].xxxx
60: LRP TEMP[0].x, CONST[3].wwww, TEMP[0].xxxx, TEMP[1].zzzz
61: MAD TEMP[0].x, TEMP[0].xxxx, CONST[8].zzzz, CONST[8].wwww
62: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx
63: MOV_SAT TEMP[0].x, TEMP[0].xxxx
64: MUL TEMP[0], TEMP[7], TEMP[0].xxxx
65: EX2 TEMP[1].x, -TEMP[0].xxxx
66: EX2 TEMP[1].y, -TEMP[0].yyyy
67: EX2 TEMP[1].z, -TEMP[0].zzzz
68: EX2 TEMP[1].w, -TEMP[0].wwww
69: MOV OUT[0], TEMP[1]
70: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%59 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%60 = load <32 x i8>, <32 x i8> addrspace(2)* %59, align 32, !tbaa !0
%61 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !tbaa !0
%63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%64 = bitcast <8 x i32> addrspace(2)* %63 to <32 x i8> addrspace(2)*
%65 = load <32 x i8>, <32 x i8> addrspace(2)* %64, align 32, !tbaa !0
%66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%67 = bitcast <4 x i32> addrspace(2)* %66 to <16 x i8> addrspace(2)*
%68 = load <16 x i8>, <16 x i8> addrspace(2)* %67, align 16, !tbaa !0
%69 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%70 = bitcast <8 x i32> addrspace(2)* %69 to <32 x i8> addrspace(2)*
%71 = load <32 x i8>, <32 x i8> addrspace(2)* %70, align 32, !tbaa !0
%72 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%73 = bitcast <4 x i32> addrspace(2)* %72 to <16 x i8> addrspace(2)*
%74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0
%75 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%76 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%77 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%81 = fdiv float 1.000000e+00, %77
%82 = fmul float %75, %81
%83 = fmul float %76, %81
%84 = fdiv float 1.000000e+00, %80
%85 = fmul float %27, %84
%86 = fmul float %78, %85
%87 = fmul float %79, %85
%88 = fmul float %80, %85
%89 = bitcast float %82 to i32
%90 = bitcast float %83 to i32
%91 = insertelement <2 x i32> undef, i32 %89, i32 0
%92 = insertelement <2 x i32> %91, i32 %90, i32 1
%93 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %92, <32 x i8> %60, <16 x i8> %62, i32 2)
%94 = extractelement <4 x float> %93, i32 0
%95 = fmul float %28, %94
%96 = fadd float %95, %29
%97 = fdiv float 1.000000e+00, %96
%98 = fmul float %86, %97
%99 = fmul float %87, %97
%100 = fmul float %88, %97
%101 = fmul float %47, %98
%102 = fmul float %48, %98
%103 = fmul float %49, %98
%104 = fmul float %50, %99
%105 = fadd float %104, %101
%106 = fmul float %51, %99
%107 = fadd float %106, %102
%108 = fmul float %52, %99
%109 = fadd float %108, %103
%110 = fmul float %53, %100
%111 = fadd float %110, %105
%112 = fmul float %54, %100
%113 = fadd float %112, %107
%114 = fmul float %55, %100
%115 = fadd float %114, %109
%116 = fadd float %111, %56
%117 = fadd float %113, %57
%118 = fadd float %115, %58
%119 = fsub float %116, %30
%120 = fsub float %117, %31
%121 = fsub float %118, %32
%122 = fsub float %116, %38
%123 = fsub float %117, %39
%124 = fsub float %118, %40
%125 = fmul float %122, %122
%126 = fmul float %123, %123
%127 = fadd float %126, %125
%128 = fmul float %124, %124
%129 = fadd float %127, %128
%130 = call float @llvm.AMDGPU.rsq.clamped.f32(float %129)
%131 = fmul float %122, %130
%132 = fmul float %123, %130
%133 = fmul float %124, %130
%134 = fmul float %122, %122
%135 = fmul float %123, %123
%136 = fadd float %135, %134
%137 = fmul float %124, %124
%138 = fadd float %136, %137
%139 = fmul float %138, %41
%140 = bitcast float %139 to i32
%141 = bitcast float %139 to i32
%142 = insertelement <2 x i32> undef, i32 %140, i32 0
%143 = insertelement <2 x i32> %142, i32 %141, i32 1
%144 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %143, <32 x i8> %65, <16 x i8> %68, i32 2)
%145 = extractelement <4 x float> %144, i32 3
%146 = bitcast float %82 to i32
%147 = bitcast float %83 to i32
%148 = insertelement <2 x i32> undef, i32 %146, i32 0
%149 = insertelement <2 x i32> %148, i32 %147, i32 1
%150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %71, <16 x i8> %74, i32 2)
%151 = extractelement <4 x float> %150, i32 0
%152 = extractelement <4 x float> %150, i32 1
%153 = extractelement <4 x float> %150, i32 2
%154 = extractelement <4 x float> %150, i32 3
%155 = fmul float %151, 2.000000e+00
%156 = fadd float %155, -1.000000e+00
%157 = fmul float %152, 2.000000e+00
%158 = fadd float %157, -1.000000e+00
%159 = fmul float %153, 2.000000e+00
%160 = fadd float %159, -1.000000e+00
%161 = fmul float %156, %156
%162 = fmul float %158, %158
%163 = fadd float %162, %161
%164 = fmul float %160, %160
%165 = fadd float %163, %164
%166 = call float @llvm.AMDGPU.rsq.clamped.f32(float %165)
%167 = fmul float %156, %166
%168 = fmul float %158, %166
%169 = fmul float %160, %166
%170 = fmul float %131, %167
%171 = fsub float -0.000000e+00, %170
%172 = fmul float %132, %168
%173 = fsub float %171, %172
%174 = fmul float %133, %169
%175 = fsub float %173, %174
%176 = call float @llvm.maxnum.f32(float %175, float 0.000000e+00)
%177 = fmul float %176, %145
%178 = fmul float %42, %177
%179 = fmul float %43, %177
%180 = fmul float %44, %177
%181 = fmul float %42, %34
%182 = fmul float %43, %35
%183 = fmul float %44, %36
%184 = fsub float %116, %24
%185 = fsub float %117, %25
%186 = fsub float %118, %26
%187 = fmul float %184, %184
%188 = fmul float %185, %185
%189 = fadd float %188, %187
%190 = fmul float %186, %186
%191 = fadd float %189, %190
%192 = call float @llvm.AMDGPU.rsq.clamped.f32(float %191)
%193 = fmul float %184, %192
%194 = fmul float %185, %192
%195 = fmul float %186, %192
%196 = fsub float -0.000000e+00, %193
%197 = fsub float %196, %131
%198 = fsub float -0.000000e+00, %194
%199 = fsub float %198, %132
%200 = fsub float -0.000000e+00, %195
%201 = fsub float %200, %133
%202 = fmul float %197, %197
%203 = fmul float %199, %199
%204 = fadd float %203, %202
%205 = fmul float %201, %201
%206 = fadd float %204, %205
%207 = call float @llvm.AMDGPU.rsq.clamped.f32(float %206)
%208 = fmul float %197, %207
%209 = fmul float %199, %207
%210 = fmul float %201, %207
%211 = fmul float %208, %167
%212 = fmul float %209, %168
%213 = fadd float %212, %211
%214 = fmul float %210, %169
%215 = fadd float %213, %214
%216 = call float @llvm.maxnum.f32(float %215, float 0.000000e+00)
%217 = fmul float %154, 1.280000e+02
%218 = call float @llvm.pow.f32(float %216, float %217)
%219 = call float @llvm.AMDIL.clamp.(float %145, float 0.000000e+00, float 1.000000e+00)
%220 = fmul float %218, %219
%221 = fadd float %181, %183
%222 = fmul float %182, %221
%223 = call float @llvm.sqrt.f32(float %222)
%224 = fmul float %223, 2.000000e+00
%225 = fadd float %181, %182
%226 = fadd float %225, %183
%227 = fmul float %224, %37
%228 = fadd float %227, %226
%229 = fmul float %220, %228
%230 = fmul float %119, %119
%231 = fmul float %120, %120
%232 = fadd float %231, %230
%233 = fmul float %121, %121
%234 = fadd float %232, %233
%235 = call float @llvm.sqrt.f32(float %234)
%236 = call float @llvm.AMDGPU.lrp(float %33, float %235, float %100)
%237 = fmul float %236, %45
%238 = fadd float %237, %46
%239 = fsub float 1.000000e+00, %238
%240 = call float @llvm.AMDIL.clamp.(float %239, float 0.000000e+00, float 1.000000e+00)
%241 = fmul float %178, %240
%242 = fmul float %179, %240
%243 = fmul float %180, %240
%244 = fmul float %229, %240
%245 = fsub float -0.000000e+00, %241
%246 = call float @llvm.AMDIL.exp.(float %245)
%247 = fsub float -0.000000e+00, %242
%248 = call float @llvm.AMDIL.exp.(float %247)
%249 = fsub float -0.000000e+00, %243
%250 = call float @llvm.AMDIL.exp.(float %249)
%251 = fsub float -0.000000e+00, %244
%252 = call float @llvm.AMDIL.exp.(float %251)
%253 = call i32 @llvm.SI.packf16(float %246, float %248)
%254 = bitcast i32 %253 to float
%255 = call i32 @llvm.SI.packf16(float %250, float %252)
%256 = bitcast i32 %255 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %254, float %256, float %254, float %256)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_rcp_f32_e32 v4, v4 ; 7E085504
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500
s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v2, v4, v3 ; 10040704
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s36, s[0:3], 0x9 ; C2120109
s_buffer_load_dword s37, s[0:3], 0x6 ; C2128106
s_buffer_load_dword s38, s[0:3], 0x8 ; C2130108
s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504
s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508
s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708
s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710
image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[28:35], s[24:27] ; F0800100 00C70301
s_buffer_load_dword s4, s[0:3], 0xc ; C202010C
s_buffer_load_dword s5, s[0:3], 0xd ; C202810D
s_buffer_load_dword s6, s[0:3], 0xe ; C203010E
s_buffer_load_dword s7, s[0:3], 0xf ; C203810F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v4, s36 ; 7E080224
v_rcp_f32_e32 v7, v0 ; 7E0E5500
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v4, s38, v3 ; 3E080626
s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124
s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125
v_mul_f32_e32 v3, s37, v7 ; 10060E25
s_buffer_load_dword s26, s[0:3], 0x26 ; C20D0126
v_rcp_f32_e32 v4, v4 ; 7E085504
v_mul_f32_e32 v5, v3, v5 ; 100A0B03
s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128
s_buffer_load_dword s28, s[0:3], 0x29 ; C20E0129
v_mul_f32_e32 v5, v4, v5 ; 100A0B04
s_buffer_load_dword s29, s[0:3], 0x2a ; C20E812A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v7, s24, v5 ; 100E0A18
v_mul_f32_e32 v8, s25, v5 ; 10100A19
s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C
v_mul_f32_e32 v5, s26, v5 ; 100A0A1A
v_mul_f32_e32 v6, v3, v6 ; 100C0D03
v_mul_f32_e32 v6, v4, v6 ; 100C0D04
v_mac_f32_e32 v7, s27, v6 ; 3E0E0C1B
v_mac_f32_e32 v8, s28, v6 ; 3E100C1C
s_buffer_load_dword s25, s[0:3], 0x2d ; C20C812D
v_mac_f32_e32 v5, s29, v6 ; 3E0A0C1D
v_mul_f32_e32 v0, v3, v0 ; 10000103
s_buffer_load_dword s26, s[0:3], 0x2e ; C20D012E
s_buffer_load_dword s27, s[0:3], 0x30 ; C20D8130
s_buffer_load_dword s28, s[0:3], 0x31 ; C20E0131
v_mul_f32_e32 v0, v4, v0 ; 10000104
s_buffer_load_dword s29, s[0:3], 0x32 ; C20E8132
s_buffer_load_dword s30, s[0:3], 0x18 ; C20F0118
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v7, s24, v0 ; 3E0E0018
v_mac_f32_e32 v8, s25, v0 ; 3E100019
s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119
s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A
v_mac_f32_e32 v5, s26, v0 ; 3E0A001A
v_add_f32_e32 v3, s27, v7 ; 06060E1B
v_add_f32_e32 v4, s28, v8 ; 0608101C
s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B
v_add_f32_e32 v5, s29, v5 ; 060A0A1D
v_subrev_f32_e32 v6, s30, v3 ; 0A0C061E
s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C
s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v7, s24, v4 ; 0A0E0818
v_subrev_f32_e32 v8, s25, v5 ; 0A100A19
v_mul_f32_e32 v9, v6, v6 ; 10120D06
v_mac_f32_e32 v9, v7, v7 ; 3E120F07
v_mac_f32_e32 v9, v8, v8 ; 3E121108
v_mul_f32_e32 v10, s26, v9 ; 1014121A
v_mov_b32_e32 v11, v10 ; 7E16030A
s_buffer_load_dword s24, s[0:3], 0x12 ; C20C0112
s_buffer_load_dword s25, s[0:3], 0x1e ; C20C811E
s_buffer_load_dword s26, s[0:3], 0x10 ; C20D0110
s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111
image_sample v10, 8, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[40:43] ; F0800800 01420A0A
image_sample v[11:14], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[44:47] ; F0800F00 01640B01
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
s_buffer_load_dword s11, s[0:3], 0x13 ; C2058113
s_buffer_load_dword s12, s[0:3], 0x22 ; C2060122
s_buffer_load_dword s0, s[0:3], 0x23 ; C2000123
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v1, s24 ; 7E020218
v_mul_f32_e32 v1, s25, v1 ; 10020219
v_mov_b32_e32 v2, s26 ; 7E04021A
v_mac_f32_e32 v1, s27, v2 ; 3E02041B
v_mov_b32_e32 v2, s26 ; 7E04021A
v_mov_b32_e32 v15, s29 ; 7E1E021D
v_mul_f32_e32 v15, s28, v15 ; 101E1E1C
v_mul_f32_e32 v1, v1, v15 ; 10021F01
v_mac_f32_e32 v15, s27, v2 ; 3E1E041B
v_mov_b32_e32 v2, s24 ; 7E040218
v_mac_f32_e32 v15, s25, v2 ; 3E1E0419
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v2, s8, v3 ; 0A040608
v_subrev_f32_e32 v16, s9, v4 ; 0A200809
v_subrev_f32_e32 v17, s10, v5 ; 0A220A0A
v_mul_f32_e32 v18, v2, v2 ; 10240502
v_mac_f32_e32 v18, v16, v16 ; 3E242110
v_mac_f32_e32 v18, v17, v17 ; 3E242311
v_rsq_clamp_f32_e32 v18, v18 ; 7E245912
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_add_f32_e32 v1, v1, v1 ; 06020301
v_mac_f32_e32 v15, s11, v1 ; 3E1E020B
v_mul_f32_e32 v1, v18, v2 ; 10020512
v_mul_f32_e32 v2, v18, v16 ; 10042112
v_mul_f32_e32 v16, v18, v17 ; 10202312
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mad_f32 v11, 2.0, v11, -1.0 ; D282000B 03CE16F4
v_mad_f32 v12, 2.0, v12, -1.0 ; D282000C 03CE18F4
v_mul_f32_e32 v17, v11, v11 ; 1022170B
v_mac_f32_e32 v17, v12, v12 ; 3E22190C
v_mad_f32 v13, 2.0, v13, -1.0 ; D282000D 03CE1AF4
v_mac_f32_e32 v17, v13, v13 ; 3E221B0D
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
v_mad_f32 v1, -v6, v9, -v1 ; D2820001 A4061306
v_mul_f32_e32 v6, v9, v6 ; 100C0D09
v_mad_f32 v2, -v7, v9, -v2 ; D2820002 A40A1307
v_mul_f32_e32 v7, v9, v7 ; 100E0F09
v_mul_f32_e32 v11, v17, v11 ; 10161711
v_mad_f32 v16, -v8, v9, -v16 ; D2820010 A4421308
v_mul_f32_e32 v18, v1, v1 ; 10240301
v_mac_f32_e32 v18, v2, v2 ; 3E240502
v_mac_f32_e32 v18, v16, v16 ; 3E242110
v_rsq_clamp_f32_e32 v18, v18 ; 7E245912
v_mul_f32_e32 v12, v17, v12 ; 10181911
v_mul_f32_e32 v6, v11, v6 ; 100C0D0B
v_mad_f32 v6, -v7, v12, -v6 ; D2820006 A41A1907
v_mul_f32_e32 v1, v18, v1 ; 10020312
v_mul_f32_e32 v1, v11, v1 ; 1002030B
v_mul_f32_e32 v2, v18, v2 ; 10040512
v_mac_f32_e32 v1, v12, v2 ; 3E02050C
v_mul_f32_e32 v2, v9, v8 ; 10041109
v_mul_f32_e32 v7, v17, v13 ; 100E1B11
v_mad_f32 v2, -v2, v7, v6 ; D2820002 241A0F02
v_mul_f32_e32 v6, v18, v16 ; 100C2112
v_mac_f32_e32 v1, v7, v6 ; 3E020D07
v_subrev_f32_e32 v3, s4, v3 ; 0A060604
v_subrev_f32_e32 v4, s5, v4 ; 0A080805
v_subrev_f32_e32 v5, s6, v5 ; 0A0A0A06
v_max_f32_e32 v2, 0, v2 ; 20040480
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_mul_f32_e32 v6, s27, v2 ; 100C041B
v_mul_f32_e32 v7, s28, v2 ; 100E041C
v_mul_f32_e32 v2, s25, v2 ; 10040419
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mac_f32_e32 v3, v4, v4 ; 3E060904
v_mac_f32_e32 v3, v5, v5 ; 3E060B05
v_sub_f32_e64 v4, 1.0, s7 ; D2080004 00000EF2
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_max_f32_e32 v1, 0, v1 ; 20020280
v_log_f32_e32 v1, v1 ; 7E024F01
v_sqrt_f32_e32 v3, v3 ; 7E066703
v_mac_f32_e32 v0, s7, v3 ; 3E000607
v_mov_b32_e32 v3, s0 ; 7E060200
v_mac_f32_e32 v3, s12, v0 ; 3E06000C
v_mul_f32_e32 v0, 0x43000000, v14 ; 10001CFF 43000000
v_mul_legacy_f32_e32 v0, v0, v1 ; 0E000300
v_exp_f32_e32 v0, v0 ; 7E004B00
v_add_f32_e64 v1, 0, v10 clamp ; D2060801 00021480
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mul_f32_e32 v0, v15, v0 ; 1000010F
v_sub_f32_e32 v1, 1.0, v3 ; 080206F2
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v3, v1, v6 ; 10060D01
v_mul_f32_e32 v4, v1, v7 ; 10080F01
v_mul_f32_e32 v2, v1, v2 ; 10040501
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_exp_f32_e64 v1, -v3 ; D34A0001 20000103
v_exp_f32_e64 v3, -v4 ; D34A0003 20000104
v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701
v_exp_f32_e64 v2, -v2 ; D34A0002 20000102
v_exp_f32_e64 v0, -v0 ; D34A0000 20000100
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 20
Code Size: 856 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..9]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx
5: MOV TEMP[2].x, TEMP[1].xxxx
6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
7: MOV TEMP[2].y, TEMP[3].xxxx
8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
9: MOV TEMP[1].zw, TEMP[0].wwzw
10: MUL TEMP[2], CONST[6], IN[0].xxxx
11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2]
13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2]
14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz
15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz
16: MOV OUT[1], TEMP[1]
17: MOV OUT[2], TEMP[2]
18: MOV OUT[0], TEMP[0]
19: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = add i32 %5, %7
%49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48)
%50 = extractelement <4 x float> %49, i32 0
%51 = extractelement <4 x float> %49, i32 1
%52 = extractelement <4 x float> %49, i32 2
%53 = extractelement <4 x float> %49, i32 3
%54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = add i32 %5, %7
%57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = fmul float %15, %50
%62 = fmul float %16, %50
%63 = fmul float %17, %50
%64 = fmul float %18, %50
%65 = fmul float %19, %51
%66 = fadd float %65, %61
%67 = fmul float %20, %51
%68 = fadd float %67, %62
%69 = fmul float %21, %51
%70 = fadd float %69, %63
%71 = fmul float %22, %51
%72 = fadd float %71, %64
%73 = fmul float %23, %52
%74 = fadd float %73, %66
%75 = fmul float %24, %52
%76 = fadd float %75, %68
%77 = fmul float %25, %52
%78 = fadd float %77, %70
%79 = fmul float %26, %52
%80 = fadd float %79, %72
%81 = fmul float %27, %53
%82 = fadd float %81, %74
%83 = fmul float %28, %53
%84 = fadd float %83, %76
%85 = fmul float %29, %53
%86 = fadd float %85, %78
%87 = fmul float %30, %53
%88 = fadd float %87, %80
%89 = fmul float %82, 5.000000e-01
%90 = fmul float %84, 5.000000e-01
%91 = fmul float %88, 5.000000e-01
%92 = fmul float %90, %13
%93 = fadd float %89, %91
%94 = fadd float %92, %91
%95 = fmul float %31, %50
%96 = fmul float %32, %50
%97 = fmul float %33, %50
%98 = fmul float %34, %50
%99 = fmul float %35, %51
%100 = fadd float %99, %95
%101 = fmul float %36, %51
%102 = fadd float %101, %96
%103 = fmul float %37, %51
%104 = fadd float %103, %97
%105 = fmul float %38, %51
%106 = fadd float %105, %98
%107 = fmul float %39, %52
%108 = fadd float %107, %100
%109 = fmul float %40, %52
%110 = fadd float %109, %102
%111 = fmul float %41, %52
%112 = fadd float %111, %104
%113 = fmul float %42, %52
%114 = fadd float %113, %106
%115 = fmul float %43, %53
%116 = fadd float %115, %108
%117 = fmul float %44, %53
%118 = fadd float %117, %110
%119 = fmul float %45, %53
%120 = fadd float %119, %112
%121 = fsub float -0.000000e+00, %116
%122 = fsub float -0.000000e+00, %118
%123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121)
%124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122)
%125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xd ; C203810D
s_buffer_load_dword s8, s[0:3], 0xa ; C204010A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
s_buffer_load_dword s4, s[0:3], 0xe ; C202010E
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v8, s6, v1 ; 10100206
v_mac_f32_e32 v8, s7, v2 ; 3E100407
s_buffer_load_dword s5, s[0:3], 0xb ; C202810B
v_mul_f32_e32 v9, s8, v1 ; 10120208
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C
s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119
s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D
s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A
s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v9, s4, v2 ; 3E120404
s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B
v_mul_f32_e32 v10, s5, v1 ; 10140205
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_mul_f32_e32 v11, s6, v1 ; 10160206
v_mac_f32_e32 v11, s7, v2 ; 3E160407
v_mul_f32_e32 v12, s8, v1 ; 10180208
v_mac_f32_e32 v12, s10, v2 ; 3E18040A
v_mul_f32_e32 v13, s11, v1 ; 101A020B
v_mac_f32_e32 v13, s12, v2 ; 3E1A040C
s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s4, v1 ; 10020204
s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110
s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111
s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112
s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113
s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120
s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123
v_mac_f32_e32 v1, s5, v2 ; 3E020405
s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s4, v3 ; 3E000604
v_mac_f32_e32 v8, s6, v3 ; 3E100606
v_mac_f32_e32 v9, s7, v3 ; 3E120607
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mac_f32_e32 v11, s9, v3 ; 3E160609
v_mac_f32_e32 v12, s10, v3 ; 3E18060A
s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115
s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116
s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117
s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124
s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125
s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126
v_mac_f32_e32 v13, s11, v3 ; 3E1A060B
s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104
v_mac_f32_e32 v1, s12, v3 ; 3E02060C
v_mac_f32_e32 v0, s5, v4 ; 3E000805
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v8, s4, v4 ; 3E100804
v_mac_f32_e32 v9, s6, v4 ; 3E120806
v_mac_f32_e32 v10, s7, v4 ; 3E140807
v_mac_f32_e32 v11, s8, v4 ; 3E160808
v_mac_f32_e32 v12, s9, v4 ; 3E180809
v_mac_f32_e32 v13, s10, v4 ; 3E1A080A
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2
v_mul_f32_e32 v3, v11, v2 ; 1006050B
v_mul_f32_e32 v4, v12, v2 ; 1008050C
v_mul_f32_e32 v2, v13, v2 ; 1004050D
v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B
v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B
v_mac_f32_e32 v2, s11, v7 ; 3E040E0B
v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0
v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0
v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00
exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607
exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403
exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Code Size: 408 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[6..12]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000}
IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000}
0: RCP TEMP[0].x, IN[0].wwww
1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx
2: RCP TEMP[1].x, IN[1].zzzz
3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx
4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
5: MOV TEMP[3].xy, TEMP[0].xyyy
6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D
7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy
8: RCP TEMP[3].x, TEMP[3].xxxx
9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx
10: MUL TEMP[2], CONST[9], TEMP[1].xxxx
11: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[11], TEMP[1].zzzz, TEMP[2]
13: ADD TEMP[2].xyz, TEMP[2], CONST[12]
14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[3].xyzz
15: MOV TEMP[4].xyz, -CONST[6].xyzx
16: MOV TEMP[0].xy, TEMP[0].xyyy
17: TEX TEMP[0], TEMP[0], SAMP[1], 2D
18: MAD TEMP[5].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz
19: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz
20: RSQ TEMP[6].x, TEMP[6].xxxx
21: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx
22: DP3 TEMP[6].x, TEMP[4].xyzz, TEMP[5].xyzz
23: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
24: MUL TEMP[6].xyz, CONST[7].xyzz, TEMP[6].xxxx
25: MUL TEMP[7].xyz, CONST[7].xyzz, CONST[4].xyzz
26: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz
27: DP3 TEMP[8].x, TEMP[2].xyzz, TEMP[2].xyzz
28: RSQ TEMP[8].x, TEMP[8].xxxx
29: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
30: ADD TEMP[2].xyz, TEMP[4].xyzz, -TEMP[2].xyzz
31: DP3 TEMP[4].x, TEMP[2].xyzz, TEMP[2].xyzz
32: RSQ TEMP[4].x, TEMP[4].xxxx
33: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
34: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[5].xyzz
35: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx
36: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx
37: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx
38: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[7].zzzz
39: MUL TEMP[2].x, TEMP[7].yyyy, TEMP[2].xxxx
40: SQRT TEMP[2].x, TEMP[2].xxxx
41: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
42: ADD TEMP[4].x, TEMP[7].xxxx, TEMP[7].yyyy
43: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[7].zzzz
44: MAD TEMP[2].x, TEMP[2].xxxx, CONST[4].wwww, TEMP[4].xxxx
45: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
46: MOV TEMP[6].w, TEMP[0].xxxx
47: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz
48: SQRT TEMP[0].x, TEMP[0].xxxx
49: LRP TEMP[0].x, CONST[3].wwww, TEMP[0].xxxx, TEMP[1].zzzz
50: MAD TEMP[0].x, TEMP[0].xxxx, CONST[8].zzzz, CONST[8].wwww
51: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx
52: MOV_SAT TEMP[0].x, TEMP[0].xxxx
53: MUL TEMP[0], TEMP[6], TEMP[0].xxxx
54: EX2 TEMP[1].x, -TEMP[0].xxxx
55: EX2 TEMP[1].y, -TEMP[0].yyyy
56: EX2 TEMP[1].z, -TEMP[0].zzzz
57: EX2 TEMP[1].w, -TEMP[0].wwww
58: MOV OUT[0], TEMP[1]
59: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%58 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%59 = load <32 x i8>, <32 x i8> addrspace(2)* %58, align 32, !tbaa !0
%60 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0
%62 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%63 = bitcast <8 x i32> addrspace(2)* %62 to <32 x i8> addrspace(2)*
%64 = load <32 x i8>, <32 x i8> addrspace(2)* %63, align 32, !tbaa !0
%65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%66 = bitcast <4 x i32> addrspace(2)* %65 to <16 x i8> addrspace(2)*
%67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0
%68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%71 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%74 = fdiv float 1.000000e+00, %70
%75 = fmul float %68, %74
%76 = fmul float %69, %74
%77 = fdiv float 1.000000e+00, %73
%78 = fmul float %27, %77
%79 = fmul float %71, %78
%80 = fmul float %72, %78
%81 = fmul float %73, %78
%82 = bitcast float %75 to i32
%83 = bitcast float %76 to i32
%84 = insertelement <2 x i32> undef, i32 %82, i32 0
%85 = insertelement <2 x i32> %84, i32 %83, i32 1
%86 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %85, <32 x i8> %59, <16 x i8> %61, i32 2)
%87 = extractelement <4 x float> %86, i32 0
%88 = fmul float %28, %87
%89 = fadd float %88, %29
%90 = fdiv float 1.000000e+00, %89
%91 = fmul float %79, %90
%92 = fmul float %80, %90
%93 = fmul float %81, %90
%94 = fmul float %46, %91
%95 = fmul float %47, %91
%96 = fmul float %48, %91
%97 = fmul float %49, %92
%98 = fadd float %97, %94
%99 = fmul float %50, %92
%100 = fadd float %99, %95
%101 = fmul float %51, %92
%102 = fadd float %101, %96
%103 = fmul float %52, %93
%104 = fadd float %103, %98
%105 = fmul float %53, %93
%106 = fadd float %105, %100
%107 = fmul float %54, %93
%108 = fadd float %107, %102
%109 = fadd float %104, %55
%110 = fadd float %106, %56
%111 = fadd float %108, %57
%112 = fsub float %109, %30
%113 = fsub float %110, %31
%114 = fsub float %111, %32
%115 = bitcast float %75 to i32
%116 = bitcast float %76 to i32
%117 = insertelement <2 x i32> undef, i32 %115, i32 0
%118 = insertelement <2 x i32> %117, i32 %116, i32 1
%119 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %118, <32 x i8> %64, <16 x i8> %67, i32 2)
%120 = extractelement <4 x float> %119, i32 0
%121 = extractelement <4 x float> %119, i32 1
%122 = extractelement <4 x float> %119, i32 2
%123 = extractelement <4 x float> %119, i32 3
%124 = fmul float %120, 2.000000e+00
%125 = fadd float %124, -1.000000e+00
%126 = fmul float %121, 2.000000e+00
%127 = fadd float %126, -1.000000e+00
%128 = fmul float %122, 2.000000e+00
%129 = fadd float %128, -1.000000e+00
%130 = fmul float %125, %125
%131 = fmul float %127, %127
%132 = fadd float %131, %130
%133 = fmul float %129, %129
%134 = fadd float %132, %133
%135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134)
%136 = fmul float %125, %135
%137 = fmul float %127, %135
%138 = fmul float %129, %135
%139 = fmul float %38, %136
%140 = fsub float -0.000000e+00, %139
%141 = fmul float %39, %137
%142 = fsub float %140, %141
%143 = fmul float %40, %138
%144 = fsub float %142, %143
%145 = call float @llvm.maxnum.f32(float %144, float 0.000000e+00)
%146 = fmul float %41, %145
%147 = fmul float %42, %145
%148 = fmul float %43, %145
%149 = fmul float %41, %34
%150 = fmul float %42, %35
%151 = fmul float %43, %36
%152 = fsub float %109, %24
%153 = fsub float %110, %25
%154 = fsub float %111, %26
%155 = fmul float %152, %152
%156 = fmul float %153, %153
%157 = fadd float %156, %155
%158 = fmul float %154, %154
%159 = fadd float %157, %158
%160 = call float @llvm.AMDGPU.rsq.clamped.f32(float %159)
%161 = fmul float %152, %160
%162 = fmul float %153, %160
%163 = fmul float %154, %160
%164 = fsub float -0.000000e+00, %161
%165 = fsub float %164, %38
%166 = fsub float -0.000000e+00, %162
%167 = fsub float %166, %39
%168 = fsub float -0.000000e+00, %163
%169 = fsub float %168, %40
%170 = fmul float %165, %165
%171 = fmul float %167, %167
%172 = fadd float %171, %170
%173 = fmul float %169, %169
%174 = fadd float %172, %173
%175 = call float @llvm.AMDGPU.rsq.clamped.f32(float %174)
%176 = fmul float %165, %175
%177 = fmul float %167, %175
%178 = fmul float %169, %175
%179 = fmul float %176, %136
%180 = fmul float %177, %137
%181 = fadd float %180, %179
%182 = fmul float %178, %138
%183 = fadd float %181, %182
%184 = call float @llvm.maxnum.f32(float %183, float 0.000000e+00)
%185 = fmul float %123, 1.280000e+02
%186 = call float @llvm.pow.f32(float %184, float %185)
%187 = fadd float %149, %151
%188 = fmul float %150, %187
%189 = call float @llvm.sqrt.f32(float %188)
%190 = fmul float %189, 2.000000e+00
%191 = fadd float %149, %150
%192 = fadd float %191, %151
%193 = fmul float %190, %37
%194 = fadd float %193, %192
%195 = fmul float %186, %194
%196 = fmul float %112, %112
%197 = fmul float %113, %113
%198 = fadd float %197, %196
%199 = fmul float %114, %114
%200 = fadd float %198, %199
%201 = call float @llvm.sqrt.f32(float %200)
%202 = call float @llvm.AMDGPU.lrp(float %33, float %201, float %93)
%203 = fmul float %202, %44
%204 = fadd float %203, %45
%205 = fsub float 1.000000e+00, %204
%206 = call float @llvm.AMDIL.clamp.(float %205, float 0.000000e+00, float 1.000000e+00)
%207 = fmul float %146, %206
%208 = fmul float %147, %206
%209 = fmul float %148, %206
%210 = fmul float %195, %206
%211 = fsub float -0.000000e+00, %207
%212 = call float @llvm.AMDIL.exp.(float %211)
%213 = fsub float -0.000000e+00, %208
%214 = call float @llvm.AMDIL.exp.(float %213)
%215 = fsub float -0.000000e+00, %209
%216 = call float @llvm.AMDIL.exp.(float %215)
%217 = fsub float -0.000000e+00, %210
%218 = call float @llvm.AMDIL.exp.(float %217)
%219 = call i32 @llvm.SI.packf16(float %212, float %214)
%220 = bitcast i32 %219 to float
%221 = call i32 @llvm.SI.packf16(float %216, float %218)
%222 = bitcast i32 %221 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %220, float %222, float %220, float %222)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_rcp_f32_e32 v4, v4 ; 7E085504
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v2, v4, v3 ; 10040704
image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[16:23], s[8:11] ; F0800100 00440301
image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[24:31], s[12:15] ; F0800F00 00660701
s_buffer_load_dword s6, s[0:3], 0xc ; C203010C
s_buffer_load_dword s7, s[0:3], 0xd ; C203810D
s_buffer_load_dword s8, s[0:3], 0xe ; C204010E
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v1, s4 ; 7E020204
s_waitcnt vmcnt(1) ; BF8C0771
v_mac_f32_e32 v1, s5, v3 ; 3E020605
v_rcp_f32_e32 v1, v1 ; 7E025501
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106
s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110
s_buffer_load_dword s13, s[0:3], 0x11 ; C2068111
s_buffer_load_dword s14, s[0:3], 0x12 ; C2070112
s_buffer_load_dword s15, s[0:3], 0x13 ; C2078113
s_buffer_load_dword s16, s[0:3], 0x18 ; C2080118
s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119
s_buffer_load_dword s18, s[0:3], 0x1a ; C209011A
s_buffer_load_dword s19, s[0:3], 0x1c ; C209811C
s_buffer_load_dword s20, s[0:3], 0x1d ; C20A011D
s_buffer_load_dword s21, s[0:3], 0x1e ; C20A811E
s_buffer_load_dword s22, s[0:3], 0x22 ; C20B0122
s_buffer_load_dword s23, s[0:3], 0x23 ; C20B8123
s_buffer_load_dword s24, s[0:3], 0x24 ; C20C0124
s_buffer_load_dword s25, s[0:3], 0x25 ; C20C8125
s_buffer_load_dword s26, s[0:3], 0x26 ; C20D0126
s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128
s_buffer_load_dword s28, s[0:3], 0x29 ; C20E0129
s_buffer_load_dword s29, s[0:3], 0x2a ; C20E812A
s_buffer_load_dword s30, s[0:3], 0x2c ; C20F012C
s_buffer_load_dword s31, s[0:3], 0x2d ; C20F812D
v_rcp_f32_e32 v2, v0 ; 7E045500
s_buffer_load_dword s32, s[0:3], 0x2e ; C210012E
s_buffer_load_dword s33, s[0:3], 0x30 ; C2108130
s_buffer_load_dword s34, s[0:3], 0x31 ; C2110131
s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v2, s11, v2 ; 1004040B
v_mul_f32_e32 v3, v2, v5 ; 10060B02
v_mul_f32_e32 v3, v1, v3 ; 10060701
v_mul_f32_e32 v4, s24, v3 ; 10080618
v_mul_f32_e32 v5, s25, v3 ; 100A0619
v_mul_f32_e32 v3, s26, v3 ; 1006061A
v_mul_f32_e32 v6, v2, v6 ; 100C0D02
v_mul_f32_e32 v6, v1, v6 ; 100C0D01
v_mac_f32_e32 v4, s27, v6 ; 3E080C1B
v_mac_f32_e32 v5, s28, v6 ; 3E0A0C1C
v_mac_f32_e32 v3, s29, v6 ; 3E060C1D
v_mov_b32_e32 v6, s14 ; 7E0C020E
v_mul_f32_e32 v6, s21, v6 ; 100C0C15
v_mov_b32_e32 v11, s12 ; 7E16020C
v_mac_f32_e32 v6, s19, v11 ; 3E0C1613
v_mov_b32_e32 v11, s12 ; 7E16020C
v_mov_b32_e32 v12, s13 ; 7E18020D
v_mul_f32_e32 v12, s20, v12 ; 10181814
v_mul_f32_e32 v6, v6, v12 ; 100C1906
v_mac_f32_e32 v12, s19, v11 ; 3E181613
v_mov_b32_e32 v11, s14 ; 7E16020E
v_mac_f32_e32 v12, s21, v11 ; 3E181615
v_sqrt_f32_e32 v6, v6 ; 7E0C6706
v_add_f32_e32 v6, v6, v6 ; 060C0D06
v_mac_f32_e32 v12, s15, v6 ; 3E180C0F
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mac_f32_e32 v4, s30, v0 ; 3E08001E
v_mac_f32_e32 v5, s31, v0 ; 3E0A001F
v_mac_f32_e32 v3, s32, v0 ; 3E060020
v_add_f32_e32 v1, s33, v4 ; 06020821
v_add_f32_e32 v2, s34, v5 ; 06040A22
v_add_f32_e32 v3, s0, v3 ; 06060600
v_subrev_f32_e32 v4, s4, v1 ; 0A080204
v_subrev_f32_e32 v5, s5, v2 ; 0A0A0405
v_subrev_f32_e32 v6, s10, v3 ; 0A0C060A
v_mad_f32 v7, 2.0, v7, -1.0 ; D2820007 03CE0EF4
v_mad_f32 v8, 2.0, v8, -1.0 ; D2820008 03CE10F4
v_mul_f32_e32 v11, v7, v7 ; 10160F07
v_mac_f32_e32 v11, v8, v8 ; 3E161108
v_mul_f32_e32 v13, v4, v4 ; 101A0904
v_mac_f32_e32 v13, v5, v5 ; 3E1A0B05
v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4
v_mac_f32_e32 v11, v9, v9 ; 3E161309
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_mad_f32 v4, -v4, v13, -s16 ; D2820004 A0421B04
v_mad_f32 v5, -v5, v13, -s17 ; D2820005 A0461B05
v_mad_f32 v6, -v6, v13, -s18 ; D2820006 A04A1B06
v_mul_f32_e32 v7, v11, v7 ; 100E0F0B
v_mul_f32_e32 v13, v4, v4 ; 101A0904
v_mac_f32_e32 v13, v5, v5 ; 3E1A0B05
v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mul_f32_e32 v14, s16, v7 ; 101C0E10
v_mul_f32_e32 v8, v11, v8 ; 1010110B
v_mad_f32 v14, -s17, v8, -v14 ; D282000E A43A1011
v_mul_f32_e32 v4, v13, v4 ; 1008090D
v_mul_f32_e32 v4, v7, v4 ; 10080907
v_mul_f32_e32 v5, v13, v5 ; 100A0B0D
v_mac_f32_e32 v4, v8, v5 ; 3E080B08
v_subrev_f32_e32 v1, s6, v1 ; 0A020206
v_subrev_f32_e32 v2, s7, v2 ; 0A040407
v_subrev_f32_e32 v3, s8, v3 ; 0A060608
v_mul_f32_e32 v5, v11, v9 ; 100A130B
v_mad_f32 v7, -s18, v5, v14 ; D2820007 243A0A12
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_mul_f32_e32 v8, s19, v7 ; 10100E13
v_mul_f32_e32 v9, s20, v7 ; 10120E14
v_mul_f32_e32 v7, s21, v7 ; 100E0E15
v_mul_f32_e32 v6, v13, v6 ; 100C0D0D
v_mac_f32_e32 v4, v5, v6 ; 3E080D05
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_mac_f32_e32 v1, v2, v2 ; 3E020502
v_mac_f32_e32 v1, v3, v3 ; 3E020703
v_sub_f32_e64 v2, 1.0, s9 ; D2080002 000012F2
v_mul_f32_e32 v0, v0, v2 ; 10000500
v_max_f32_e32 v2, 0, v4 ; 20040880
v_log_f32_e32 v2, v2 ; 7E044F02
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mac_f32_e32 v0, s9, v1 ; 3E000209
v_mov_b32_e32 v1, s23 ; 7E020217
v_mac_f32_e32 v1, s22, v0 ; 3E020016
v_mul_f32_e32 v0, 0x43000000, v10 ; 100014FF 43000000
v_mul_legacy_f32_e32 v0, v0, v2 ; 0E000500
v_exp_f32_e32 v0, v0 ; 7E004B00
v_mul_f32_e32 v0, v12, v0 ; 1000010C
v_sub_f32_e32 v1, 1.0, v1 ; 080202F2
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v2, v1, v8 ; 10041101
v_mul_f32_e32 v3, v1, v9 ; 10061301
v_mul_f32_e32 v4, v1, v7 ; 10080F01
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_exp_f32_e64 v1, -v2 ; D34A0001 20000102
v_exp_f32_e64 v2, -v3 ; D34A0002 20000103
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_exp_f32_e64 v2, -v4 ; D34A0002 20000104
v_exp_f32_e64 v0, -v0 ; D34A0000 20000100
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 744 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0]
DCL CONST[2..7]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: RCP TEMP[0].x, CONST[2].xxxx
1: ADD TEMP[1].x, IN[0].xxxx, -CONST[4].xxxx
2: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx
3: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[1].xxxx
4: ADD TEMP[2].x, IN[0].yyyy, -CONST[5].xxxx
5: MUL TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx
6: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx
7: MUL TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx
8: MUL TEMP[0].x, TEMP[0].xxxx, CONST[3].xxxx
9: MOV_SAT TEMP[0].x, TEMP[0].xxxx
10: MUL TEMP[0].x, TEMP[0].xxxx, CONST[6].xxxx
11: MOV TEMP[0].w, TEMP[0].xxxx
12: MOV TEMP[1].xy, IN[0].xyyy
13: TEX TEMP[1], TEMP[1], SAMP[0], 2D
14: MUL TEMP[1].xyz, TEMP[1], CONST[0]
15: MUL TEMP[0].xyz, TEMP[1].xyzz, CONST[7].xxxx
16: MOV OUT[0], TEMP[0]
17: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%33 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0
%35 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%39 = fdiv float 1.000000e+00, %27
%40 = fsub float %37, %29
%41 = fmul float %40, %39
%42 = fsub float 1.000000e+00, %41
%43 = fsub float %38, %30
%44 = fmul float %43, %39
%45 = fsub float 1.000000e+00, %44
%46 = fmul float %42, %45
%47 = fmul float %46, %28
%48 = call float @llvm.AMDIL.clamp.(float %47, float 0.000000e+00, float 1.000000e+00)
%49 = fmul float %48, %31
%50 = bitcast float %37 to i32
%51 = bitcast float %38 to i32
%52 = insertelement <2 x i32> undef, i32 %50, i32 0
%53 = insertelement <2 x i32> %52, i32 %51, i32 1
%54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %34, <16 x i8> %36, i32 2)
%55 = extractelement <4 x float> %54, i32 0
%56 = extractelement <4 x float> %54, i32 1
%57 = extractelement <4 x float> %54, i32 2
%58 = fmul float %55, %24
%59 = fmul float %56, %25
%60 = fmul float %57, %26
%61 = fmul float %58, %32
%62 = fmul float %59, %32
%63 = fmul float %60, %32
%64 = call i32 @llvm.SI.packf16(float %61, float %62)
%65 = bitcast i32 %64 to float
%66 = call i32 @llvm.SI.packf16(float %63, float %49)
%67 = bitcast i32 %66 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %65, float %67, float %65, float %67)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100
s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101
s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102
s_buffer_load_dword s20, s[0:3], 0x10 ; C20A0110
s_buffer_load_dword s21, s[0:3], 0x14 ; C20A8114
image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430402
s_buffer_load_dword s8, s[0:3], 0xc ; C204010C
s_buffer_load_dword s9, s[0:3], 0x18 ; C2048118
s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_rcp_f32_e32 v0, s4 ; 7E005404
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v1, s5, v4 ; 10020805
v_mul_f32_e32 v4, s6, v5 ; 10080A06
v_mul_f32_e32 v5, s7, v6 ; 100A0C07
v_subrev_f32_e32 v2, s20, v2 ; 0A040414
v_subrev_f32_e32 v3, s21, v3 ; 0A060615
v_mad_f32 v2, -v2, v0, 1.0 ; D2820002 23CA0102
v_mad_f32 v0, -v3, v0, 1.0 ; D2820000 23CA0103
v_mul_f32_e32 v0, v0, v2 ; 10000500
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_mul_f32_e32 v0, s9, v0 ; 10000009
v_mul_f32_e32 v1, s0, v1 ; 10020200
v_mul_f32_e32 v2, s0, v4 ; 10040800
v_mul_f32_e32 v3, s0, v5 ; 10060A00
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 184 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..7]
DCL CONST[9..12]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999}
IMM[1] INT32 {256, 0, 1, 2}
IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039}
IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000}
IMM[4] INT32 {4, 0, 0, 0}
0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx
1: F2I TEMP[0].x, TEMP[0].xxxx
2: F2I TEMP[1].x, IN[2].yyyy
3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx
4: I2F TEMP[3].x, TEMP[0].xxxx
5: I2F TEMP[4].x, TEMP[2].xxxx
6: MOV TEMP[3].y, TEMP[4].xxxx
7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
8: INEG TEMP[2].x, TEMP[2].xxxx
9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx
10: I2F TEMP[2].x, TEMP[2].xxxx
11: MOV TEMP[3].z, TEMP[2].xxxx
12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy
13: I2F TEMP[1].x, TEMP[1].xxxx
14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx
15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww
16: F2I TEMP[1].x, TEMP[1].xxxx
17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy
18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx
19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz
20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx
21: MOV TEMP[4].y, TEMP[5].xxxx
22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww
23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx
24: MOV TEMP[4].z, TEMP[1].xxxx
25: MOV TEMP[1].xyz, TEMP[4].xyzx
26: MOV TEMP[4].w, IMM[2].yyyy
27: MOV TEMP[4].xyz, TEMP[3].xyzx
28: MOV TEMP[3].y, IMM[2].yzyy
29: DP4 TEMP[4].x, TEMP[1], TEMP[4]
30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww
31: MOV TEMP[3].xy, TEMP[3].xyyy
32: MOV TEMP[3].w, IMM[2].yyyy
33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D
34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy
35: MOV TEMP[2].w, TEMP[4].xxxx
36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz
37: MOV TEMP[1].w, TEMP[3].xxxx
38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx
39: I2F TEMP[0].x, TEMP[0].xxxx
40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx
41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww
42: MUL TEMP[3], CONST[9], IN[0].xxxx
43: MAD TEMP[3], CONST[10], IN[0].yyyy, TEMP[3]
44: MAD TEMP[3], CONST[11], IN[0].zzzz, TEMP[3]
45: MAD TEMP[3], CONST[12], IN[0].wwww, TEMP[3]
46: MOV TEMP[4].x, CONST[4].xxxx
47: MOV TEMP[4].y, CONST[5].xxxx
48: MOV TEMP[4].z, CONST[6].xxxx
49: MOV TEMP[5].x, CONST[4].yyyy
50: MOV TEMP[5].y, CONST[5].yyyy
51: MOV TEMP[5].z, CONST[6].yyyy
52: MOV TEMP[6].x, CONST[4].zzzz
53: MOV TEMP[6].y, CONST[5].zzzz
54: MOV TEMP[6].z, CONST[6].zzzz
55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx
56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz
57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz
58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
59: RSQ TEMP[4].x, TEMP[4].xxxx
60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
61: MOV TEMP[0].yzw, TEMP[0].yxyz
62: MUL TEMP[4], CONST[0], IN[0].xxxx
63: MAD TEMP[4], CONST[1], IN[0].yyyy, TEMP[4]
64: MAD TEMP[4], CONST[2], IN[0].zzzz, TEMP[4]
65: MAD TEMP[4].xyz, CONST[3], IN[0].wwww, TEMP[4]
66: MOV TEMP[4].xyz, TEMP[4].xyzx
67: MOV TEMP[0].x, TEMP[3].zzzz
68: MOV OUT[1], TEMP[2]
69: MOV OUT[4], TEMP[4]
70: MOV OUT[2], TEMP[1]
71: MOV OUT[3], TEMP[0]
72: MOV OUT[0], TEMP[3]
73: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0
%55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0
%57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0
%59 = add i32 %5, %7
%60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59)
%61 = extractelement <4 x float> %60, i32 0
%62 = extractelement <4 x float> %60, i32 1
%63 = extractelement <4 x float> %60, i32 2
%64 = extractelement <4 x float> %60, i32 3
%65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0
%67 = add i32 %5, %7
%68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67)
%69 = extractelement <4 x float> %68, i32 0
%70 = extractelement <4 x float> %68, i32 1
%71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0
%73 = add i32 %5, %7
%74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73)
%75 = extractelement <4 x float> %74, i32 0
%76 = extractelement <4 x float> %74, i32 1
%77 = fmul float %75, 2.500000e-01
%78 = fptosi float %77 to i32
%79 = fptosi float %76 to i32
%80 = sdiv i32 %79, 256
%81 = sitofp i32 %78 to float
%82 = sitofp i32 %80 to float
%83 = shl nsw i32 %80, 8
%84 = sub i32 %79, %83
%85 = sitofp i32 %84 to float
%86 = fadd float %81, -1.000000e+00
%87 = fadd float %82, -1.000000e+00
%88 = fadd float %85, -1.000000e+00
%89 = sitofp i32 %79 to float
%90 = fsub float %76, %89
%91 = fmul float %90, 1.000000e+01
%92 = fadd float %91, 0x3FDFFE5CA0000000
%93 = fptosi float %92 to i32
%94 = icmp eq i32 %93, 0
%95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
%96 = icmp eq i32 %93, 1
%97 = select i1 %96, float 1.000000e+00, float 0.000000e+00
%98 = icmp eq i32 %93, 2
%99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
%100 = fmul float %95, %81
%101 = fmul float %97, %82
%102 = fadd float %100, %101
%103 = fmul float %99, %85
%104 = fadd float %102, %103
%105 = fadd float %104, 0.000000e+00
%106 = fmul float %105, 0x3F70101020000000
%107 = bitcast float %106 to i32
%108 = insertelement <4 x i32> undef, i32 %107, i32 0
%109 = insertelement <4 x i32> %108, i32 1036831949, i32 1
%110 = insertelement <4 x i32> %109, i32 0, i32 2
%111 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %110, <32 x i8> %54, <16 x i8> %56, i32 2)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = fmul float %112, 1.600000e+01
%115 = fadd float %114, -8.000000e+00
%116 = fmul float %113, 4.000000e+00
%117 = shl i32 %78, 2
%118 = sitofp i32 %117 to float
%119 = fsub float %75, %118
%120 = fadd float %119, -2.000000e+00
%121 = fmul float %37, %61
%122 = fmul float %38, %61
%123 = fmul float %39, %61
%124 = fmul float %40, %61
%125 = fmul float %41, %62
%126 = fadd float %125, %121
%127 = fmul float %42, %62
%128 = fadd float %127, %122
%129 = fmul float %43, %62
%130 = fadd float %129, %123
%131 = fmul float %44, %62
%132 = fadd float %131, %124
%133 = fmul float %45, %63
%134 = fadd float %133, %126
%135 = fmul float %46, %63
%136 = fadd float %135, %128
%137 = fmul float %47, %63
%138 = fadd float %137, %130
%139 = fmul float %48, %63
%140 = fadd float %139, %132
%141 = fmul float %49, %64
%142 = fadd float %141, %134
%143 = fmul float %50, %64
%144 = fadd float %143, %136
%145 = fmul float %51, %64
%146 = fadd float %145, %138
%147 = fmul float %52, %64
%148 = fadd float %147, %140
%149 = fmul float %28, %69
%150 = fmul float %31, %69
%151 = fmul float %34, %69
%152 = fmul float %29, %70
%153 = fadd float %152, %149
%154 = fmul float %32, %70
%155 = fadd float %154, %150
%156 = fmul float %35, %70
%157 = fadd float %156, %151
%158 = fmul float %30, %120
%159 = fadd float %158, %153
%160 = fmul float %33, %120
%161 = fadd float %160, %155
%162 = fmul float %36, %120
%163 = fadd float %162, %157
%164 = fmul float %159, %159
%165 = fmul float %161, %161
%166 = fadd float %165, %164
%167 = fmul float %163, %163
%168 = fadd float %166, %167
%169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168)
%170 = fmul float %159, %169
%171 = fmul float %161, %169
%172 = fmul float %163, %169
%173 = fmul float %13, %61
%174 = fmul float %14, %61
%175 = fmul float %15, %61
%176 = fmul float %16, %61
%177 = fmul float %17, %62
%178 = fadd float %177, %173
%179 = fmul float %18, %62
%180 = fadd float %179, %174
%181 = fmul float %19, %62
%182 = fadd float %181, %175
%183 = fmul float %20, %62
%184 = fadd float %183, %176
%185 = fmul float %21, %63
%186 = fadd float %185, %178
%187 = fmul float %22, %63
%188 = fadd float %187, %180
%189 = fmul float %23, %63
%190 = fadd float %189, %182
%191 = fmul float %24, %63
%192 = fadd float %191, %184
%193 = fmul float %25, %64
%194 = fadd float %193, %186
%195 = fmul float %26, %64
%196 = fadd float %195, %188
%197 = fmul float %27, %64
%198 = fadd float %197, %190
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float %88, float %115)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %97, float %99, float %116)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %146, float %170, float %171, float %172)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %194, float %196, float %198, float %192)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %142, float %144, float %146, float %148)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5
v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000
v_mov_b32_e32 v5, 0 ; 7E0A0280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[16:19], s[8:9], 0x0 ; C0880900
s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904
s_load_dwordx4 s[24:27], s[8:9], 0x8 ; C08C0908
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s28, s[0:3], 0x10 ; C20E0110
s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111
buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[12:15], v0, s[24:27], 0 idxen ; E00C2000 80060C00
s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112
s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114
s_buffer_load_dword s18, s[0:3], 0x15 ; C2090115
s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116
s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118
s_buffer_load_dword s21, s[0:3], 0x19 ; C20A8119
s_buffer_load_dword s22, s[0:3], 0x1a ; C20B011A
s_buffer_load_dword s23, s[0:3], 0x24 ; C20B8124
s_buffer_load_dword s24, s[0:3], 0x25 ; C20C0125
s_buffer_load_dword s25, s[0:3], 0x26 ; C20C8126
s_buffer_load_dword s26, s[0:3], 0x27 ; C20D0127
s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128
s_buffer_load_dword s30, s[0:3], 0x29 ; C20F0129
s_buffer_load_dword s31, s[0:3], 0x0 ; C20F8100
s_buffer_load_dword s32, s[0:3], 0x1 ; C2100101
s_buffer_load_dword s33, s[0:3], 0x2 ; C2108102
s_buffer_load_dword s34, s[0:3], 0x3 ; C2110103
s_buffer_load_dword s35, s[0:3], 0x4 ; C2118104
s_buffer_load_dword s36, s[0:3], 0x2a ; C212012A
s_buffer_load_dword s37, s[0:3], 0x2b ; C212812B
s_buffer_load_dword s38, s[0:3], 0x2c ; C213012C
s_buffer_load_dword s39, s[0:3], 0x2d ; C213812D
s_buffer_load_dword s40, s[0:3], 0x2e ; C214012E
s_buffer_load_dword s41, s[0:3], 0x5 ; C2148105
s_buffer_load_dword s42, s[0:3], 0x6 ; C2150106
s_buffer_load_dword s43, s[0:3], 0x7 ; C2158107
s_buffer_load_dword s44, s[0:3], 0x8 ; C2160108
s_buffer_load_dword s45, s[0:3], 0x9 ; C2168109
s_buffer_load_dword s46, s[0:3], 0xa ; C217010A
s_buffer_load_dword s47, s[0:3], 0xb ; C217810B
s_buffer_load_dword s48, s[0:3], 0xc ; C218010C
s_buffer_load_dword s49, s[0:3], 0xd ; C218810D
s_buffer_load_dword s50, s[0:3], 0xe ; C219010E
s_buffer_load_dword s51, s[0:3], 0x2f ; C219812F
s_buffer_load_dword s52, s[0:3], 0x30 ; C21A0130
s_buffer_load_dword s53, s[0:3], 0x31 ; C21A8131
s_buffer_load_dword s54, s[0:3], 0x32 ; C21B0132
s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133
v_mul_f32_e32 v0, s28, v10 ; 1000141C
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v14, s17, v10 ; 101C1411
v_mul_f32_e32 v10, s20, v10 ; 10141414
v_mac_f32_e32 v0, s29, v11 ; 3E00161D
v_mac_f32_e32 v14, s18, v11 ; 3E1C1612
v_mac_f32_e32 v10, s21, v11 ; 3E141615
v_mul_f32_e32 v11, s23, v6 ; 10160C17
v_mac_f32_e32 v11, s27, v7 ; 3E160E1B
v_mul_f32_e32 v15, s24, v6 ; 101E0C18
v_mac_f32_e32 v15, s30, v7 ; 3E1E0E1E
v_mul_f32_e32 v16, s25, v6 ; 10200C19
v_mac_f32_e32 v16, s36, v7 ; 3E200E24
v_mul_f32_e32 v17, s26, v6 ; 10220C1A
v_mac_f32_e32 v17, s37, v7 ; 3E220E25
v_mul_f32_e32 v18, s31, v6 ; 10240C1F
v_mac_f32_e32 v18, s35, v7 ; 3E240E23
v_mul_f32_e32 v19, s32, v6 ; 10260C20
v_mac_f32_e32 v19, s41, v7 ; 3E260E29
v_mul_f32_e32 v20, s33, v6 ; 10280C21
v_mac_f32_e32 v20, s42, v7 ; 3E280E2A
v_mul_f32_e32 v6, s34, v6 ; 100C0C22
v_mac_f32_e32 v6, s43, v7 ; 3E0C0E2B
v_mac_f32_e32 v11, s38, v8 ; 3E161026
v_mac_f32_e32 v15, s39, v8 ; 3E1E1027
v_mac_f32_e32 v16, s40, v8 ; 3E201028
v_mac_f32_e32 v17, s51, v8 ; 3E221033
v_mac_f32_e32 v18, s44, v8 ; 3E24102C
v_mac_f32_e32 v19, s45, v8 ; 3E26102D
v_mac_f32_e32 v20, s46, v8 ; 3E28102E
v_mac_f32_e32 v6, s47, v8 ; 3E0C102F
v_mac_f32_e32 v11, s52, v9 ; 3E161234
v_mac_f32_e32 v15, s53, v9 ; 3E1E1235
v_mac_f32_e32 v16, s54, v9 ; 3E201236
v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000
v_cvt_i32_f32_e32 v4, v13 ; 7E08110D
v_cvt_i32_f32_e32 v3, v3 ; 7E061103
v_mac_f32_e32 v17, s0, v9 ; 3E221200
v_mac_f32_e32 v18, s48, v9 ; 3E241230
v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04
v_lshlrev_b32_e32 v8, 2, v3 ; 34100682
v_cvt_f32_i32_e32 v8, v8 ; 7E100B08
v_mac_f32_e32 v19, s49, v9 ; 3E261231
v_mac_f32_e32 v20, s50, v9 ; 3E281232
v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07
v_subrev_f32_e32 v8, v8, v12 ; 0A101908
v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000
v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F
v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98
v_cvt_i32_f32_e32 v1, v1 ; 7E021101
v_add_i32_e32 v7, v4, v7 ; 4A0E0F04
v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00
v_sub_i32_e32 v4, v4, v9 ; 4C081304
v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280
v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480
v_cvt_f32_i32_e32 v12, v3 ; 7E180B03
v_ashrrev_i32_e32 v3, 8, v7 ; 30060E88
v_cvt_f32_i32_e32 v7, v3 ; 7E0E0B03
v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281
v_cmp_eq_i32_e64 s[0:1], 2, v1 ; D1040000 00020282
v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480
v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04
v_mul_f32_e32 v3, v7, v1 ; 10060307
v_mac_f32_e32 v3, v12, v9 ; 3E06130C
v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480
v_mac_f32_e32 v3, v13, v21 ; 3E062B0D
v_add_f32_e32 v3, 0, v3 ; 06060680
v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081
v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD
image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[4:11], s[12:15] ; F0900300 00610303
v_add_f32_e32 v5, -2.0, v8 ; 060A10F5
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v14, s19, v5 ; 3E1C0A13
v_mac_f32_e32 v10, s22, v5 ; 3E140A16
v_add_f32_e32 v5, -1.0, v12 ; 060A18F3
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_mul_f32_e32 v8, v0, v0 ; 10100100
v_mac_f32_e32 v8, v14, v14 ; 3E101D0E
v_mac_f32_e32 v8, v10, v10 ; 3E10150A
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_add_f32_e32 v12, -1.0, v13 ; 06181AF3
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000
exp 15, 32, 0, 0, 0, v5, v7, v12, v2 ; F800020F 020C0705
v_mul_f32_e32 v0, v8, v0 ; 10000108
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v2, v8, v14 ; 10041D08
v_mul_f32_e32 v3, 4.0, v4 ; 100608F6
v_mul_f32_e32 v4, v8, v10 ; 10081508
exp 15, 33, 0, 0, 0, v9, v1, v21, v3 ; F800021F 03150109
exp 15, 34, 0, 0, 0, v16, v0, v2, v4 ; F800022F 04020010
exp 15, 35, 0, 0, 0, v18, v19, v20, v6 ; F800023F 06141312
exp 15, 12, 0, 1, 0, v11, v15, v16, v17 ; F80008CF 11100F0B
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 24
Code Size: 680 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SAMP[6]
DCL SAMP[7]
DCL SAMP[8]
DCL SAMP[9]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL SVIEW[6], 2D, FLOAT
DCL SVIEW[7], 2D, FLOAT
DCL SVIEW[8], 2D, FLOAT
DCL SVIEW[9], 2D, FLOAT
DCL CONST[0..3]
DCL CONST[14..22]
DCL TEMP[0..36], LOCAL
IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000}
IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931}
IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000}
IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001}
IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000}
0: DP3 TEMP[0].x, CONST[1].xyzz, CONST[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, CONST[1].xyzz, TEMP[0].xxxx
3: ADD TEMP[1].xyz, CONST[0].xyzz, -IN[3].xyzz
4: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
5: RSQ TEMP[2].x, TEMP[2].xxxx
6: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
7: ABS TEMP[2].xyz, IN[2].yzww
8: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
9: RSQ TEMP[3].x, TEMP[3].xxxx
10: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, IMM[0].xxxx
11: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy
12: MAX TEMP[2].xyz, TEMP[2].xyzz, IMM[0].zzzz
13: ADD TEMP[3].x, TEMP[2].xxxx, TEMP[2].yyyy
14: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[2].zzzz
15: RCP TEMP[3].xyz, TEMP[3].xxxx
16: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz
17: ADD TEMP[3], IN[0], IMM[0].wwww
18: FLR TEMP[3].xyz, TEMP[3]
19: MOV TEMP[4].x, CONST[14].xxxx
20: MUL TEMP[5].x, TEMP[3].xxxx, CONST[14].xxxx
21: MOV TEMP[6].x, TEMP[5].xxxx
22: FLR TEMP[5].x, TEMP[5].xxxx
23: MUL TEMP[5].x, TEMP[5].xxxx, CONST[14].xxxx
24: FSGE TEMP[7].x, TEMP[3].xxxx, IMM[1].xxxx
25: UIF TEMP[7].xxxx :0
26: MOV TEMP[4].x, CONST[15].xxxx
27: ADD TEMP[7].x, TEMP[3].xxxx, IMM[1].yyyy
28: MUL TEMP[7].x, TEMP[7].xxxx, CONST[15].xxxx
29: MOV TEMP[6].x, TEMP[7].xxxx
30: FLR TEMP[8].x, TEMP[7].xxxx
31: MUL TEMP[8].x, TEMP[8].xxxx, CONST[15].xxxx
32: MOV TEMP[5].x, TEMP[8].xxxx
33: FRC TEMP[7].x, TEMP[7].xxxx
34: FRC TEMP[9].x, TEMP[8].xxxx
35: MOV TEMP[7].y, TEMP[9].xxxx
36: FLR TEMP[8].x, TEMP[8].xxxx
37: ADD TEMP[8].x, TEMP[8].xxxx, IMM[1].zzzz
38: MOV TEMP[7].z, TEMP[8].xxxx
39: MOV TEMP[7].xyz, TEMP[7].xyzx
40: ELSE :0
41: FRC TEMP[6].x, TEMP[6].xxxx
42: FRC TEMP[8].x, TEMP[5].xxxx
43: MOV TEMP[6].y, TEMP[8].xxxx
44: FLR TEMP[5].x, TEMP[5].xxxx
45: MOV TEMP[6].z, TEMP[5].xxxx
46: MOV TEMP[7].xyz, TEMP[6].xyzx
47: ENDIF
48: MOV TEMP[5].x, CONST[14].xxxx
49: MUL TEMP[6].x, TEMP[3].yyyy, CONST[14].xxxx
50: MOV TEMP[8].x, TEMP[6].xxxx
51: FLR TEMP[6].x, TEMP[6].xxxx
52: MUL TEMP[6].x, TEMP[6].xxxx, CONST[14].xxxx
53: FSGE TEMP[9].x, TEMP[3].yyyy, IMM[1].xxxx
54: UIF TEMP[9].xxxx :0
55: MOV TEMP[5].x, CONST[15].xxxx
56: ADD TEMP[9].x, TEMP[3].yyyy, IMM[1].yyyy
57: MUL TEMP[9].x, TEMP[9].xxxx, CONST[15].xxxx
58: MOV TEMP[8].x, TEMP[9].xxxx
59: FLR TEMP[10].x, TEMP[9].xxxx
60: MUL TEMP[10].x, TEMP[10].xxxx, CONST[15].xxxx
61: MOV TEMP[6].x, TEMP[10].xxxx
62: FRC TEMP[9].x, TEMP[9].xxxx
63: FRC TEMP[11].x, TEMP[10].xxxx
64: MOV TEMP[9].y, TEMP[11].xxxx
65: FLR TEMP[10].x, TEMP[10].xxxx
66: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
67: MOV TEMP[9].z, TEMP[10].xxxx
68: MOV TEMP[9].xyz, TEMP[9].xyzx
69: ELSE :0
70: FRC TEMP[8].x, TEMP[8].xxxx
71: FRC TEMP[10].x, TEMP[6].xxxx
72: MOV TEMP[8].y, TEMP[10].xxxx
73: FLR TEMP[6].x, TEMP[6].xxxx
74: MOV TEMP[8].z, TEMP[6].xxxx
75: MOV TEMP[9].xyz, TEMP[8].xyzx
76: ENDIF
77: MOV TEMP[6].x, CONST[14].xxxx
78: MUL TEMP[8].x, TEMP[3].zzzz, CONST[14].xxxx
79: MOV TEMP[10].x, TEMP[8].xxxx
80: FLR TEMP[8].x, TEMP[8].xxxx
81: MUL TEMP[8].x, TEMP[8].xxxx, CONST[14].xxxx
82: FSGE TEMP[11].x, TEMP[3].zzzz, IMM[1].xxxx
83: UIF TEMP[11].xxxx :0
84: MOV TEMP[6].x, CONST[15].xxxx
85: ADD TEMP[3].x, TEMP[3].zzzz, IMM[1].yyyy
86: MUL TEMP[3].x, TEMP[3].xxxx, CONST[15].xxxx
87: MOV TEMP[10].x, TEMP[3].xxxx
88: FLR TEMP[11].x, TEMP[3].xxxx
89: MUL TEMP[11].x, TEMP[11].xxxx, CONST[15].xxxx
90: MOV TEMP[8].x, TEMP[11].xxxx
91: FRC TEMP[3].x, TEMP[3].xxxx
92: FRC TEMP[12].x, TEMP[11].xxxx
93: MOV TEMP[3].y, TEMP[12].xxxx
94: FLR TEMP[11].x, TEMP[11].xxxx
95: ADD TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz
96: MOV TEMP[3].z, TEMP[11].xxxx
97: MOV TEMP[3].xyz, TEMP[3].xyzx
98: ELSE :0
99: FRC TEMP[10].x, TEMP[10].xxxx
100: FRC TEMP[11].x, TEMP[8].xxxx
101: MOV TEMP[10].y, TEMP[11].xxxx
102: FLR TEMP[8].x, TEMP[8].xxxx
103: MOV TEMP[10].z, TEMP[8].xxxx
104: MOV TEMP[3].xyz, TEMP[10].xyzx
105: ENDIF
106: ADD TEMP[8].xyz, IN[3].xyzz, -CONST[0].xyzz
107: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[8].xyzz
108: MUL TEMP[8].x, CONST[20].xxxx, TEMP[8].xxxx
109: LG2 TEMP[8].x, TEMP[8].xxxx
110: MUL TEMP[8].x, TEMP[8].xxxx, IMM[1].wwww
111: MUL TEMP[8].x, TEMP[8].xxxx, CONST[19].xxxx
112: MOV TEMP[10].xy, IN[3].xyxx
113: MOV TEMP[11].x, IMM[2].xxxx
114: FSNE TEMP[12].x, CONST[14].xxxx, TEMP[4].xxxx
115: UIF TEMP[12].xxxx :0
116: MOV TEMP[11].x, IMM[2].yyyy
117: RCP TEMP[12].x, CONST[17].xxxx
118: MUL TEMP[10].xy, IN[3].xyyy, TEMP[12].xxxx
119: ELSE :0
120: RCP TEMP[12].x, CONST[16].xxxx
121: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx
122: ENDIF
123: FRC TEMP[10].xy, TEMP[10].xyyy
124: MUL TEMP[12].x, CONST[18].xxxx, IMM[2].wwww
125: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
126: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
127: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].xxxx
128: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
129: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
130: MOV TEMP[11].xy, TEMP[10].xyyy
131: MOV TEMP[11].w, TEMP[8].xxxx
132: TXL TEMP[11], TEMP[11], SAMP[8], 2D
133: FSEQ TEMP[12].x, TEMP[7].zzzz, IMM[1].zzzz
134: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
135: MOV TEMP[13].xy, TEMP[10].xyyy
136: MOV TEMP[13].w, TEMP[8].xxxx
137: TXL TEMP[13], TEMP[13], SAMP[6], 2D
138: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[3].xxxx
139: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
140: MOV TEMP[15].xy, TEMP[10].xyyy
141: MOV TEMP[15].w, TEMP[8].xxxx
142: TXL TEMP[15], TEMP[15], SAMP[4], 2D
143: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[2].wwww
144: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
145: MOV TEMP[17].xy, TEMP[10].xyyy
146: MOV TEMP[17].w, TEMP[8].xxxx
147: TXL TEMP[17], TEMP[17], SAMP[2], 2D
148: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].zzzz
149: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
150: MOV TEMP[10].xy, TEMP[10].xyyy
151: MOV TEMP[10].w, TEMP[8].xxxx
152: TXL TEMP[10], TEMP[10], SAMP[0], 2D
153: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[3].yyyy
154: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
155: MUL TEMP[10], TEMP[10], TEMP[19].xxxx
156: MAD TEMP[10], TEMP[17], TEMP[18].xxxx, TEMP[10]
157: MAD TEMP[10], TEMP[15], TEMP[16].xxxx, TEMP[10]
158: MAD TEMP[10], TEMP[13], TEMP[14].xxxx, TEMP[10]
159: MAD TEMP[10], TEMP[11], TEMP[12].xxxx, TEMP[10]
160: MOV TEMP[11].xy, IN[3].zyzz
161: MOV TEMP[12].x, IMM[2].xxxx
162: FSNE TEMP[13].x, CONST[14].xxxx, TEMP[4].xxxx
163: UIF TEMP[13].xxxx :0
164: MOV TEMP[12].x, IMM[2].yyyy
165: RCP TEMP[13].x, CONST[17].xxxx
166: MUL TEMP[11].xy, IN[3].zyyy, TEMP[13].xxxx
167: ELSE :0
168: RCP TEMP[13].x, CONST[16].xxxx
169: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
170: ENDIF
171: FRC TEMP[11].xy, TEMP[11].xyyy
172: MUL TEMP[13].x, CONST[18].xxxx, IMM[2].wwww
173: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
174: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
175: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].xxxx
176: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
177: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
178: MOV TEMP[12].xy, TEMP[11].xyyy
179: MOV TEMP[12].w, TEMP[8].xxxx
180: TXL TEMP[12], TEMP[12], SAMP[8], 2D
181: FSEQ TEMP[13].x, TEMP[7].zzzz, IMM[1].zzzz
182: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
183: MOV TEMP[14].xy, TEMP[11].xyyy
184: MOV TEMP[14].w, TEMP[8].xxxx
185: TXL TEMP[14], TEMP[14], SAMP[6], 2D
186: FSEQ TEMP[15].x, TEMP[7].zzzz, IMM[3].xxxx
187: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
188: MOV TEMP[16].xy, TEMP[11].xyyy
189: MOV TEMP[16].w, TEMP[8].xxxx
190: TXL TEMP[16], TEMP[16], SAMP[4], 2D
191: FSEQ TEMP[17].x, TEMP[7].zzzz, IMM[2].wwww
192: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
193: MOV TEMP[18].xy, TEMP[11].xyyy
194: MOV TEMP[18].w, TEMP[8].xxxx
195: TXL TEMP[18], TEMP[18], SAMP[2], 2D
196: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[2].zzzz
197: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
198: MOV TEMP[11].xy, TEMP[11].xyyy
199: MOV TEMP[11].w, TEMP[8].xxxx
200: TXL TEMP[11], TEMP[11], SAMP[0], 2D
201: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[3].yyyy
202: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
203: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
204: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
205: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
206: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
207: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11]
208: MOV TEMP[12].xy, IN[3].zxzz
209: MOV TEMP[13].x, IMM[2].xxxx
210: FSNE TEMP[14].x, CONST[14].xxxx, TEMP[4].xxxx
211: UIF TEMP[14].xxxx :0
212: MOV TEMP[13].x, IMM[2].yyyy
213: RCP TEMP[14].x, CONST[17].xxxx
214: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx
215: ELSE :0
216: RCP TEMP[14].x, CONST[16].xxxx
217: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
218: ENDIF
219: FRC TEMP[12].xy, TEMP[12].xyyy
220: MUL TEMP[14].x, CONST[18].xxxx, IMM[2].wwww
221: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
222: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
223: MUL TEMP[13].x, TEMP[13].xxxx, CONST[18].xxxx
224: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
225: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
226: MOV TEMP[13].xy, TEMP[12].xyyy
227: MOV TEMP[13].w, TEMP[8].xxxx
228: TXL TEMP[13], TEMP[13], SAMP[8], 2D
229: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz
230: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
231: MOV TEMP[15].xy, TEMP[12].xyyy
232: MOV TEMP[15].w, TEMP[8].xxxx
233: TXL TEMP[15], TEMP[15], SAMP[6], 2D
234: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx
235: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
236: MOV TEMP[17].xy, TEMP[12].xyyy
237: MOV TEMP[17].w, TEMP[8].xxxx
238: TXL TEMP[17], TEMP[17], SAMP[4], 2D
239: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww
240: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
241: MOV TEMP[19].xy, TEMP[12].xyyy
242: MOV TEMP[19].w, TEMP[8].xxxx
243: TXL TEMP[19], TEMP[19], SAMP[2], 2D
244: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz
245: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
246: MOV TEMP[12].xy, TEMP[12].xyyy
247: MOV TEMP[12].w, TEMP[8].xxxx
248: TXL TEMP[12], TEMP[12], SAMP[0], 2D
249: FSEQ TEMP[21].x, TEMP[7].zzzz, IMM[3].yyyy
250: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
251: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
252: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
253: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
254: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
255: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12]
256: MOV TEMP[13].xy, IN[3].xyxx
257: MOV TEMP[14].x, IMM[2].xxxx
258: FSNE TEMP[15].x, CONST[14].xxxx, TEMP[5].xxxx
259: UIF TEMP[15].xxxx :0
260: MOV TEMP[14].x, IMM[2].yyyy
261: RCP TEMP[15].x, CONST[17].xxxx
262: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx
263: ELSE :0
264: RCP TEMP[15].x, CONST[16].xxxx
265: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
266: ENDIF
267: FRC TEMP[13].xy, TEMP[13].xyyy
268: MUL TEMP[15].x, CONST[18].xxxx, IMM[2].wwww
269: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
270: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
271: MUL TEMP[14].x, TEMP[14].xxxx, CONST[18].xxxx
272: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
273: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
274: MOV TEMP[14].xy, TEMP[13].xyyy
275: MOV TEMP[14].w, TEMP[8].xxxx
276: TXL TEMP[14], TEMP[14], SAMP[8], 2D
277: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[1].zzzz
278: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
279: MOV TEMP[16].xy, TEMP[13].xyyy
280: MOV TEMP[16].w, TEMP[8].xxxx
281: TXL TEMP[16], TEMP[16], SAMP[6], 2D
282: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[3].xxxx
283: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
284: MOV TEMP[18].xy, TEMP[13].xyyy
285: MOV TEMP[18].w, TEMP[8].xxxx
286: TXL TEMP[18], TEMP[18], SAMP[4], 2D
287: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].wwww
288: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
289: MOV TEMP[20].xy, TEMP[13].xyyy
290: MOV TEMP[20].w, TEMP[8].xxxx
291: TXL TEMP[20], TEMP[20], SAMP[2], 2D
292: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].zzzz
293: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
294: MOV TEMP[13].xy, TEMP[13].xyyy
295: MOV TEMP[13].w, TEMP[8].xxxx
296: TXL TEMP[13], TEMP[13], SAMP[0], 2D
297: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[3].yyyy
298: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
299: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
300: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
301: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
302: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
303: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13]
304: MOV TEMP[14].xy, IN[3].zyzz
305: MOV TEMP[15].x, IMM[2].xxxx
306: FSNE TEMP[16].x, CONST[14].xxxx, TEMP[5].xxxx
307: UIF TEMP[16].xxxx :0
308: MOV TEMP[15].x, IMM[2].yyyy
309: RCP TEMP[16].x, CONST[17].xxxx
310: MUL TEMP[14].xy, IN[3].zyyy, TEMP[16].xxxx
311: ELSE :0
312: RCP TEMP[16].x, CONST[16].xxxx
313: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
314: ENDIF
315: FRC TEMP[14].xy, TEMP[14].xyyy
316: MUL TEMP[16].x, CONST[18].xxxx, IMM[2].wwww
317: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
318: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
319: MUL TEMP[15].x, TEMP[15].xxxx, CONST[18].xxxx
320: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
321: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
322: MOV TEMP[15].xy, TEMP[14].xyyy
323: MOV TEMP[15].w, TEMP[8].xxxx
324: TXL TEMP[15], TEMP[15], SAMP[8], 2D
325: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz
326: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
327: MOV TEMP[17].xy, TEMP[14].xyyy
328: MOV TEMP[17].w, TEMP[8].xxxx
329: TXL TEMP[17], TEMP[17], SAMP[6], 2D
330: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx
331: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
332: MOV TEMP[19].xy, TEMP[14].xyyy
333: MOV TEMP[19].w, TEMP[8].xxxx
334: TXL TEMP[19], TEMP[19], SAMP[4], 2D
335: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww
336: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
337: MOV TEMP[21].xy, TEMP[14].xyyy
338: MOV TEMP[21].w, TEMP[8].xxxx
339: TXL TEMP[21], TEMP[21], SAMP[2], 2D
340: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz
341: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
342: MOV TEMP[14].xy, TEMP[14].xyyy
343: MOV TEMP[14].w, TEMP[8].xxxx
344: TXL TEMP[14], TEMP[14], SAMP[0], 2D
345: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[3].yyyy
346: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
347: MUL TEMP[14], TEMP[14], TEMP[23].xxxx
348: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14]
349: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14]
350: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14]
351: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14]
352: MOV TEMP[15].xy, IN[3].zxzz
353: MOV TEMP[16].x, IMM[2].xxxx
354: FSNE TEMP[17].x, CONST[14].xxxx, TEMP[5].xxxx
355: UIF TEMP[17].xxxx :0
356: MOV TEMP[16].x, IMM[2].yyyy
357: RCP TEMP[17].x, CONST[17].xxxx
358: MUL TEMP[15].xy, IN[3].zxxx, TEMP[17].xxxx
359: ELSE :0
360: RCP TEMP[17].x, CONST[16].xxxx
361: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx
362: ENDIF
363: FRC TEMP[15].xy, TEMP[15].xyyy
364: MUL TEMP[17].x, CONST[18].xxxx, IMM[2].wwww
365: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx
366: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx
367: MUL TEMP[16].x, TEMP[16].xxxx, CONST[18].xxxx
368: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx
369: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
370: MOV TEMP[16].xy, TEMP[15].xyyy
371: MOV TEMP[16].w, TEMP[8].xxxx
372: TXL TEMP[16], TEMP[16], SAMP[8], 2D
373: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[1].zzzz
374: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
375: MOV TEMP[18].xy, TEMP[15].xyyy
376: MOV TEMP[18].w, TEMP[8].xxxx
377: TXL TEMP[18], TEMP[18], SAMP[6], 2D
378: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[3].xxxx
379: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
380: MOV TEMP[20].xy, TEMP[15].xyyy
381: MOV TEMP[20].w, TEMP[8].xxxx
382: TXL TEMP[20], TEMP[20], SAMP[4], 2D
383: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].wwww
384: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
385: MOV TEMP[22].xy, TEMP[15].xyyy
386: MOV TEMP[22].w, TEMP[8].xxxx
387: TXL TEMP[22], TEMP[22], SAMP[2], 2D
388: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[2].zzzz
389: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
390: MOV TEMP[15].xy, TEMP[15].xyyy
391: MOV TEMP[15].w, TEMP[8].xxxx
392: TXL TEMP[15], TEMP[15], SAMP[0], 2D
393: FSEQ TEMP[24].x, TEMP[9].zzzz, IMM[3].yyyy
394: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
395: MUL TEMP[15], TEMP[15], TEMP[24].xxxx
396: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15]
397: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15]
398: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15]
399: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15]
400: MOV TEMP[16].xy, IN[3].xyxx
401: MOV TEMP[17].x, IMM[2].xxxx
402: FSNE TEMP[18].x, CONST[14].xxxx, TEMP[6].xxxx
403: UIF TEMP[18].xxxx :0
404: MOV TEMP[17].x, IMM[2].yyyy
405: RCP TEMP[18].x, CONST[17].xxxx
406: MUL TEMP[16].xy, IN[3].xyyy, TEMP[18].xxxx
407: ELSE :0
408: RCP TEMP[18].x, CONST[16].xxxx
409: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx
410: ENDIF
411: FRC TEMP[16].xy, TEMP[16].xyyy
412: MUL TEMP[18].x, CONST[18].xxxx, IMM[2].wwww
413: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx
414: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx
415: MUL TEMP[17].x, TEMP[17].xxxx, CONST[18].xxxx
416: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx
417: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
418: MOV TEMP[17].xy, TEMP[16].xyyy
419: MOV TEMP[17].w, TEMP[8].xxxx
420: TXL TEMP[17], TEMP[17], SAMP[8], 2D
421: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[1].zzzz
422: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
423: MOV TEMP[19].xy, TEMP[16].xyyy
424: MOV TEMP[19].w, TEMP[8].xxxx
425: TXL TEMP[19], TEMP[19], SAMP[6], 2D
426: FSEQ TEMP[20].x, TEMP[3].zzzz, IMM[3].xxxx
427: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
428: MOV TEMP[21].xy, TEMP[16].xyyy
429: MOV TEMP[21].w, TEMP[8].xxxx
430: TXL TEMP[21], TEMP[21], SAMP[4], 2D
431: FSEQ TEMP[22].x, TEMP[3].zzzz, IMM[2].wwww
432: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
433: MOV TEMP[23].xy, TEMP[16].xyyy
434: MOV TEMP[23].w, TEMP[8].xxxx
435: TXL TEMP[23], TEMP[23], SAMP[2], 2D
436: FSEQ TEMP[24].x, TEMP[3].zzzz, IMM[2].zzzz
437: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
438: MOV TEMP[16].xy, TEMP[16].xyyy
439: MOV TEMP[16].w, TEMP[8].xxxx
440: TXL TEMP[16], TEMP[16], SAMP[0], 2D
441: FSEQ TEMP[25].x, TEMP[3].zzzz, IMM[3].yyyy
442: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
443: MUL TEMP[16], TEMP[16], TEMP[25].xxxx
444: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16]
445: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16]
446: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16]
447: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16]
448: MOV TEMP[17].xy, IN[3].zyzz
449: MOV TEMP[18].x, IMM[2].xxxx
450: FSNE TEMP[19].x, CONST[14].xxxx, TEMP[6].xxxx
451: UIF TEMP[19].xxxx :0
452: MOV TEMP[18].x, IMM[2].yyyy
453: RCP TEMP[19].x, CONST[17].xxxx
454: MUL TEMP[17].xy, IN[3].zyyy, TEMP[19].xxxx
455: ELSE :0
456: RCP TEMP[19].x, CONST[16].xxxx
457: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx
458: ENDIF
459: FRC TEMP[17].xy, TEMP[17].xyyy
460: MUL TEMP[19].x, CONST[18].xxxx, IMM[2].wwww
461: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx
462: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx
463: MUL TEMP[18].x, TEMP[18].xxxx, CONST[18].xxxx
464: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx
465: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
466: MOV TEMP[18].xy, TEMP[17].xyyy
467: MOV TEMP[18].w, TEMP[8].xxxx
468: TXL TEMP[18], TEMP[18], SAMP[8], 2D
469: FSEQ TEMP[19].x, TEMP[3].zzzz, IMM[1].zzzz
470: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
471: MOV TEMP[20].xy, TEMP[17].xyyy
472: MOV TEMP[20].w, TEMP[8].xxxx
473: TXL TEMP[20], TEMP[20], SAMP[6], 2D
474: FSEQ TEMP[21].x, TEMP[3].zzzz, IMM[3].xxxx
475: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
476: MOV TEMP[22].xy, TEMP[17].xyyy
477: MOV TEMP[22].w, TEMP[8].xxxx
478: TXL TEMP[22], TEMP[22], SAMP[4], 2D
479: FSEQ TEMP[23].x, TEMP[3].zzzz, IMM[2].wwww
480: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
481: MOV TEMP[24].xy, TEMP[17].xyyy
482: MOV TEMP[24].w, TEMP[8].xxxx
483: TXL TEMP[24], TEMP[24], SAMP[2], 2D
484: FSEQ TEMP[25].x, TEMP[3].zzzz, IMM[2].zzzz
485: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
486: MOV TEMP[17].xy, TEMP[17].xyyy
487: MOV TEMP[17].w, TEMP[8].xxxx
488: TXL TEMP[17], TEMP[17], SAMP[0], 2D
489: FSEQ TEMP[26].x, TEMP[3].zzzz, IMM[3].yyyy
490: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
491: MUL TEMP[17], TEMP[17], TEMP[26].xxxx
492: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17]
493: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17]
494: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17]
495: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17]
496: MOV TEMP[18].xy, IN[3].zxzz
497: MOV TEMP[19].x, IMM[2].xxxx
498: FSNE TEMP[20].x, CONST[14].xxxx, TEMP[6].xxxx
499: UIF TEMP[20].xxxx :0
500: MOV TEMP[19].x, IMM[2].yyyy
501: RCP TEMP[20].x, CONST[17].xxxx
502: MUL TEMP[18].xy, IN[3].zxxx, TEMP[20].xxxx
503: ELSE :0
504: RCP TEMP[20].x, CONST[16].xxxx
505: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx
506: ENDIF
507: FRC TEMP[18].xy, TEMP[18].xyyy
508: MUL TEMP[20].x, CONST[18].xxxx, IMM[2].wwww
509: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx
510: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx
511: MUL TEMP[19].x, TEMP[19].xxxx, CONST[18].xxxx
512: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx
513: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
514: MOV TEMP[19].xy, TEMP[18].xyyy
515: MOV TEMP[19].w, TEMP[8].xxxx
516: TXL TEMP[19], TEMP[19], SAMP[8], 2D
517: FSEQ TEMP[20].x, TEMP[3].zzzz, IMM[1].zzzz
518: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
519: MOV TEMP[21].xy, TEMP[18].xyyy
520: MOV TEMP[21].w, TEMP[8].xxxx
521: TXL TEMP[21], TEMP[21], SAMP[6], 2D
522: FSEQ TEMP[22].x, TEMP[3].zzzz, IMM[3].xxxx
523: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
524: MOV TEMP[23].xy, TEMP[18].xyyy
525: MOV TEMP[23].w, TEMP[8].xxxx
526: TXL TEMP[23], TEMP[23], SAMP[4], 2D
527: FSEQ TEMP[24].x, TEMP[3].zzzz, IMM[2].wwww
528: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
529: MOV TEMP[25].xy, TEMP[18].xyyy
530: MOV TEMP[25].w, TEMP[8].xxxx
531: TXL TEMP[25], TEMP[25], SAMP[2], 2D
532: FSEQ TEMP[26].x, TEMP[3].zzzz, IMM[2].zzzz
533: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
534: MOV TEMP[18].xy, TEMP[18].xyyy
535: MOV TEMP[18].w, TEMP[8].xxxx
536: TXL TEMP[18], TEMP[18], SAMP[0], 2D
537: FSEQ TEMP[27].x, TEMP[3].zzzz, IMM[3].yyyy
538: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz
539: MUL TEMP[18], TEMP[18], TEMP[27].xxxx
540: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18]
541: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18]
542: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18]
543: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18]
544: MUL TEMP[16], TEMP[16], TEMP[2].zzzz
545: MAD TEMP[16], TEMP[17], TEMP[2].xxxx, TEMP[16]
546: MAD TEMP[16], TEMP[18], TEMP[2].yyyy, TEMP[16]
547: MUL TEMP[13], TEMP[13], TEMP[2].zzzz
548: MAD TEMP[13], TEMP[14], TEMP[2].xxxx, TEMP[13]
549: MAD TEMP[13], TEMP[15], TEMP[2].yyyy, TEMP[13]
550: MUL TEMP[10], TEMP[10], TEMP[2].zzzz
551: MAD TEMP[10], TEMP[11], TEMP[2].xxxx, TEMP[10]
552: MAD TEMP[10], TEMP[12], TEMP[2].yyyy, TEMP[10]
553: MUL TEMP[10], IN[1].xxxx, TEMP[10]
554: MAD TEMP[10], IN[1].yyyy, TEMP[13], TEMP[10]
555: MAD TEMP[10].xyz, IN[1].zzzz, TEMP[16], TEMP[10]
556: MOV TEMP[11].xy, IN[3].zyzz
557: MOV TEMP[12].x, IMM[2].xxxx
558: FSNE TEMP[13].x, CONST[14].xxxx, TEMP[4].xxxx
559: UIF TEMP[13].xxxx :0
560: MOV TEMP[12].x, IMM[2].yyyy
561: RCP TEMP[13].x, CONST[17].xxxx
562: MUL TEMP[11].xy, IN[3].zyyy, TEMP[13].xxxx
563: ELSE :0
564: RCP TEMP[13].x, CONST[16].xxxx
565: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
566: ENDIF
567: FRC TEMP[11].xy, TEMP[11].xyyy
568: MUL TEMP[13].x, CONST[18].xxxx, IMM[2].wwww
569: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
570: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
571: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].xxxx
572: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
573: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
574: MOV TEMP[12].xy, TEMP[11].xyyy
575: MOV TEMP[12].w, TEMP[8].xxxx
576: TXL TEMP[12], TEMP[12], SAMP[9], 2D
577: FSEQ TEMP[13].x, TEMP[7].zzzz, IMM[1].zzzz
578: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
579: MOV TEMP[14].xy, TEMP[11].xyyy
580: MOV TEMP[14].w, TEMP[8].xxxx
581: TXL TEMP[14], TEMP[14], SAMP[7], 2D
582: FSEQ TEMP[15].x, TEMP[7].zzzz, IMM[3].xxxx
583: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
584: MOV TEMP[16].xy, TEMP[11].xyyy
585: MOV TEMP[16].w, TEMP[8].xxxx
586: TXL TEMP[16], TEMP[16], SAMP[5], 2D
587: FSEQ TEMP[17].x, TEMP[7].zzzz, IMM[2].wwww
588: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
589: MOV TEMP[18].xy, TEMP[11].xyyy
590: MOV TEMP[18].w, TEMP[8].xxxx
591: TXL TEMP[18], TEMP[18], SAMP[3], 2D
592: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[2].zzzz
593: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
594: MOV TEMP[11].xy, TEMP[11].xyyy
595: MOV TEMP[11].w, TEMP[8].xxxx
596: TXL TEMP[11], TEMP[11], SAMP[1], 2D
597: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[3].yyyy
598: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
599: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
600: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
601: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
602: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
603: MAD TEMP[11].yw, TEMP[12], TEMP[13].xxxx, TEMP[11]
604: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz
605: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy
606: MOV_SAT TEMP[28].x, TEMP[12].xxxx
607: MOV TEMP[12].xy, IN[3].zxzz
608: MOV TEMP[13].x, IMM[2].xxxx
609: FSNE TEMP[14].x, CONST[14].xxxx, TEMP[4].xxxx
610: UIF TEMP[14].xxxx :0
611: MOV TEMP[13].x, IMM[2].yyyy
612: RCP TEMP[14].x, CONST[17].xxxx
613: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx
614: ELSE :0
615: RCP TEMP[14].x, CONST[16].xxxx
616: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
617: ENDIF
618: FRC TEMP[12].xy, TEMP[12].xyyy
619: MUL TEMP[14].x, CONST[18].xxxx, IMM[2].wwww
620: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
621: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
622: MUL TEMP[13].x, TEMP[13].xxxx, CONST[18].xxxx
623: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
624: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
625: MOV TEMP[13].xy, TEMP[12].xyyy
626: MOV TEMP[13].w, TEMP[8].xxxx
627: TXL TEMP[13], TEMP[13], SAMP[9], 2D
628: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz
629: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
630: MOV TEMP[15].xy, TEMP[12].xyyy
631: MOV TEMP[15].w, TEMP[8].xxxx
632: TXL TEMP[15], TEMP[15], SAMP[7], 2D
633: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx
634: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
635: MOV TEMP[17].xy, TEMP[12].xyyy
636: MOV TEMP[17].w, TEMP[8].xxxx
637: TXL TEMP[17], TEMP[17], SAMP[5], 2D
638: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww
639: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
640: MOV TEMP[19].xy, TEMP[12].xyyy
641: MOV TEMP[19].w, TEMP[8].xxxx
642: TXL TEMP[19], TEMP[19], SAMP[3], 2D
643: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz
644: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
645: MOV TEMP[12].xy, TEMP[12].xyyy
646: MOV TEMP[12].w, TEMP[8].xxxx
647: TXL TEMP[12], TEMP[12], SAMP[1], 2D
648: FSEQ TEMP[21].x, TEMP[7].zzzz, IMM[3].yyyy
649: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
650: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
651: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
652: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
653: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
654: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12]
655: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz
656: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy
657: MOV_SAT TEMP[29].x, TEMP[13].xxxx
658: MOV TEMP[13].xy, IN[3].xyxx
659: MOV TEMP[14].x, IMM[2].xxxx
660: FSNE TEMP[15].x, CONST[14].xxxx, TEMP[4].xxxx
661: UIF TEMP[15].xxxx :0
662: MOV TEMP[14].x, IMM[2].yyyy
663: RCP TEMP[15].x, CONST[17].xxxx
664: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx
665: ELSE :0
666: RCP TEMP[15].x, CONST[16].xxxx
667: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
668: ENDIF
669: FRC TEMP[13].xy, TEMP[13].xyyy
670: MUL TEMP[15].x, CONST[18].xxxx, IMM[2].wwww
671: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
672: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
673: MUL TEMP[14].x, TEMP[14].xxxx, CONST[18].xxxx
674: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
675: MAD TEMP[4].xy, TEMP[13].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
676: MOV TEMP[13].xy, TEMP[4].xyyy
677: MOV TEMP[13].w, TEMP[8].xxxx
678: TXL TEMP[13], TEMP[13], SAMP[9], 2D
679: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz
680: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
681: MOV TEMP[15].xy, TEMP[4].xyyy
682: MOV TEMP[15].w, TEMP[8].xxxx
683: TXL TEMP[15], TEMP[15], SAMP[7], 2D
684: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx
685: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
686: MOV TEMP[17].xy, TEMP[4].xyyy
687: MOV TEMP[17].w, TEMP[8].xxxx
688: TXL TEMP[17], TEMP[17], SAMP[5], 2D
689: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww
690: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
691: MOV TEMP[19].xy, TEMP[4].xyyy
692: MOV TEMP[19].w, TEMP[8].xxxx
693: TXL TEMP[19], TEMP[19], SAMP[3], 2D
694: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz
695: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
696: MOV TEMP[4].xy, TEMP[4].xyyy
697: MOV TEMP[4].w, TEMP[8].xxxx
698: TXL TEMP[4], TEMP[4], SAMP[1], 2D
699: FSEQ TEMP[7].x, TEMP[7].zzzz, IMM[3].yyyy
700: AND TEMP[7].x, TEMP[7].xxxx, IMM[2].zzzz
701: MUL TEMP[4], TEMP[4], TEMP[7].xxxx
702: MAD TEMP[4], TEMP[19], TEMP[20].xxxx, TEMP[4]
703: MAD TEMP[4], TEMP[17], TEMP[18].xxxx, TEMP[4]
704: MAD TEMP[4], TEMP[15], TEMP[16].xxxx, TEMP[4]
705: MAD TEMP[4].yw, TEMP[13], TEMP[14].xxxx, TEMP[4]
706: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz
707: DP2 TEMP[7].x, TEMP[4].xyyy, TEMP[4].xyyy
708: MOV_SAT TEMP[30].x, TEMP[7].xxxx
709: MOV TEMP[7].x, IMM[3].yyyy
710: MOV TEMP[7].y, TEMP[11].xxxx
711: MOV TEMP[7].z, TEMP[11].yyyy
712: MOV TEMP[11].y, IMM[3].yyyy
713: MOV TEMP[11].x, TEMP[12].yyyy
714: MOV TEMP[11].z, TEMP[12].xxxx
715: MOV TEMP[12].z, IMM[3].yyyy
716: MOV TEMP[12].xy, TEMP[4].xyxx
717: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[2].xxxx
718: MAD TEMP[4].xyz, TEMP[11].xyzz, TEMP[2].yyyy, TEMP[4].xyzz
719: MAD TEMP[4].xyz, TEMP[12].xyzz, TEMP[2].zzzz, TEMP[4].xyzz
720: MOV TEMP[7].xy, IN[3].zyzz
721: MOV TEMP[11].x, IMM[2].xxxx
722: FSNE TEMP[12].x, CONST[14].xxxx, TEMP[5].xxxx
723: UIF TEMP[12].xxxx :0
724: MOV TEMP[11].x, IMM[2].yyyy
725: RCP TEMP[12].x, CONST[17].xxxx
726: MUL TEMP[7].xy, IN[3].zyyy, TEMP[12].xxxx
727: ELSE :0
728: RCP TEMP[12].x, CONST[16].xxxx
729: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[12].xxxx
730: ENDIF
731: FRC TEMP[7].xy, TEMP[7].xyyy
732: MUL TEMP[12].x, CONST[18].xxxx, IMM[2].wwww
733: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
734: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
735: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].xxxx
736: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
737: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
738: MOV TEMP[11].xy, TEMP[7].xyyy
739: MOV TEMP[11].w, TEMP[8].xxxx
740: TXL TEMP[11], TEMP[11], SAMP[9], 2D
741: FSEQ TEMP[12].x, TEMP[9].zzzz, IMM[1].zzzz
742: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
743: MOV TEMP[13].xy, TEMP[7].xyyy
744: MOV TEMP[13].w, TEMP[8].xxxx
745: TXL TEMP[13], TEMP[13], SAMP[7], 2D
746: FSEQ TEMP[14].x, TEMP[9].zzzz, IMM[3].xxxx
747: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
748: MOV TEMP[15].xy, TEMP[7].xyyy
749: MOV TEMP[15].w, TEMP[8].xxxx
750: TXL TEMP[15], TEMP[15], SAMP[5], 2D
751: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[2].wwww
752: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
753: MOV TEMP[17].xy, TEMP[7].xyyy
754: MOV TEMP[17].w, TEMP[8].xxxx
755: TXL TEMP[17], TEMP[17], SAMP[3], 2D
756: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[2].zzzz
757: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
758: MOV TEMP[7].xy, TEMP[7].xyyy
759: MOV TEMP[7].w, TEMP[8].xxxx
760: TXL TEMP[7], TEMP[7], SAMP[1], 2D
761: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[3].yyyy
762: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
763: MUL TEMP[7], TEMP[7], TEMP[19].xxxx
764: MAD TEMP[7], TEMP[17], TEMP[18].xxxx, TEMP[7]
765: MAD TEMP[7], TEMP[15], TEMP[16].xxxx, TEMP[7]
766: MAD TEMP[7], TEMP[13], TEMP[14].xxxx, TEMP[7]
767: MAD TEMP[7].yw, TEMP[11], TEMP[12].xxxx, TEMP[7]
768: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz
769: DP2 TEMP[11].x, TEMP[7].xyyy, TEMP[7].xyyy
770: MOV_SAT TEMP[31].x, TEMP[11].xxxx
771: MOV TEMP[11].xy, IN[3].zxzz
772: MOV TEMP[12].x, IMM[2].xxxx
773: FSNE TEMP[13].x, CONST[14].xxxx, TEMP[5].xxxx
774: UIF TEMP[13].xxxx :0
775: MOV TEMP[12].x, IMM[2].yyyy
776: RCP TEMP[13].x, CONST[17].xxxx
777: MUL TEMP[11].xy, IN[3].zxxx, TEMP[13].xxxx
778: ELSE :0
779: RCP TEMP[13].x, CONST[16].xxxx
780: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
781: ENDIF
782: FRC TEMP[11].xy, TEMP[11].xyyy
783: MUL TEMP[13].x, CONST[18].xxxx, IMM[2].wwww
784: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
785: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
786: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].xxxx
787: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
788: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
789: MOV TEMP[12].xy, TEMP[11].xyyy
790: MOV TEMP[12].w, TEMP[8].xxxx
791: TXL TEMP[12], TEMP[12], SAMP[9], 2D
792: FSEQ TEMP[13].x, TEMP[9].zzzz, IMM[1].zzzz
793: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
794: MOV TEMP[14].xy, TEMP[11].xyyy
795: MOV TEMP[14].w, TEMP[8].xxxx
796: TXL TEMP[14], TEMP[14], SAMP[7], 2D
797: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[3].xxxx
798: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
799: MOV TEMP[16].xy, TEMP[11].xyyy
800: MOV TEMP[16].w, TEMP[8].xxxx
801: TXL TEMP[16], TEMP[16], SAMP[5], 2D
802: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[2].wwww
803: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
804: MOV TEMP[18].xy, TEMP[11].xyyy
805: MOV TEMP[18].w, TEMP[8].xxxx
806: TXL TEMP[18], TEMP[18], SAMP[3], 2D
807: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].zzzz
808: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
809: MOV TEMP[11].xy, TEMP[11].xyyy
810: MOV TEMP[11].w, TEMP[8].xxxx
811: TXL TEMP[11], TEMP[11], SAMP[1], 2D
812: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[3].yyyy
813: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
814: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
815: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
816: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
817: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
818: MAD TEMP[11].yw, TEMP[12], TEMP[13].xxxx, TEMP[11]
819: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz
820: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy
821: MOV_SAT TEMP[32].x, TEMP[12].xxxx
822: MOV TEMP[12].xy, IN[3].xyxx
823: MOV TEMP[13].x, IMM[2].xxxx
824: FSNE TEMP[14].x, CONST[14].xxxx, TEMP[5].xxxx
825: UIF TEMP[14].xxxx :0
826: MOV TEMP[13].x, IMM[2].yyyy
827: RCP TEMP[14].x, CONST[17].xxxx
828: MUL TEMP[12].xy, IN[3].xyyy, TEMP[14].xxxx
829: ELSE :0
830: RCP TEMP[14].x, CONST[16].xxxx
831: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
832: ENDIF
833: FRC TEMP[12].xy, TEMP[12].xyyy
834: MUL TEMP[14].x, CONST[18].xxxx, IMM[2].wwww
835: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
836: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
837: MUL TEMP[13].x, TEMP[13].xxxx, CONST[18].xxxx
838: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
839: MAD TEMP[5].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
840: MOV TEMP[12].xy, TEMP[5].xyyy
841: MOV TEMP[12].w, TEMP[8].xxxx
842: TXL TEMP[12], TEMP[12], SAMP[9], 2D
843: FSEQ TEMP[13].x, TEMP[9].zzzz, IMM[1].zzzz
844: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
845: MOV TEMP[14].xy, TEMP[5].xyyy
846: MOV TEMP[14].w, TEMP[8].xxxx
847: TXL TEMP[14], TEMP[14], SAMP[7], 2D
848: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[3].xxxx
849: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
850: MOV TEMP[16].xy, TEMP[5].xyyy
851: MOV TEMP[16].w, TEMP[8].xxxx
852: TXL TEMP[16], TEMP[16], SAMP[5], 2D
853: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[2].wwww
854: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
855: MOV TEMP[18].xy, TEMP[5].xyyy
856: MOV TEMP[18].w, TEMP[8].xxxx
857: TXL TEMP[18], TEMP[18], SAMP[3], 2D
858: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].zzzz
859: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
860: MOV TEMP[5].xy, TEMP[5].xyyy
861: MOV TEMP[5].w, TEMP[8].xxxx
862: TXL TEMP[5], TEMP[5], SAMP[1], 2D
863: FSEQ TEMP[9].x, TEMP[9].zzzz, IMM[3].yyyy
864: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz
865: MUL TEMP[5], TEMP[5], TEMP[9].xxxx
866: MAD TEMP[5], TEMP[18], TEMP[19].xxxx, TEMP[5]
867: MAD TEMP[5], TEMP[16], TEMP[17].xxxx, TEMP[5]
868: MAD TEMP[5], TEMP[14], TEMP[15].xxxx, TEMP[5]
869: MAD TEMP[5].yw, TEMP[12], TEMP[13].xxxx, TEMP[5]
870: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[2].wwww, IMM[3].zzzz
871: DP2 TEMP[9].x, TEMP[5].xyyy, TEMP[5].xyyy
872: MOV_SAT TEMP[33].x, TEMP[9].xxxx
873: MOV TEMP[9].x, IMM[3].yyyy
874: MOV TEMP[9].y, TEMP[7].xxxx
875: MOV TEMP[9].z, TEMP[7].yyyy
876: MOV TEMP[7].y, IMM[3].yyyy
877: MOV TEMP[7].x, TEMP[11].yyyy
878: MOV TEMP[7].z, TEMP[11].xxxx
879: MOV TEMP[11].z, IMM[3].yyyy
880: MOV TEMP[11].xy, TEMP[5].xyxx
881: MUL TEMP[5].xyz, TEMP[9].xyzz, TEMP[2].xxxx
882: MAD TEMP[5].xyz, TEMP[7].xyzz, TEMP[2].yyyy, TEMP[5].xyzz
883: MAD TEMP[5].xyz, TEMP[11].xyzz, TEMP[2].zzzz, TEMP[5].xyzz
884: MOV TEMP[7].xy, IN[3].zyzz
885: MOV TEMP[9].x, IMM[2].xxxx
886: FSNE TEMP[11].x, CONST[14].xxxx, TEMP[6].xxxx
887: UIF TEMP[11].xxxx :0
888: MOV TEMP[9].x, IMM[2].yyyy
889: RCP TEMP[11].x, CONST[17].xxxx
890: MUL TEMP[7].xy, IN[3].zyyy, TEMP[11].xxxx
891: ELSE :0
892: RCP TEMP[11].x, CONST[16].xxxx
893: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[11].xxxx
894: ENDIF
895: FRC TEMP[7].xy, TEMP[7].xyyy
896: MUL TEMP[11].x, CONST[18].xxxx, IMM[2].wwww
897: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[9].xxxx
898: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx
899: MUL TEMP[9].x, TEMP[9].xxxx, CONST[18].xxxx
900: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[11].xxxx, TEMP[9].xxxx
901: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
902: MOV TEMP[9].xy, TEMP[7].xyyy
903: MOV TEMP[9].w, TEMP[8].xxxx
904: TXL TEMP[9], TEMP[9], SAMP[9], 2D
905: FSEQ TEMP[11].x, TEMP[3].zzzz, IMM[1].zzzz
906: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
907: MOV TEMP[12].xy, TEMP[7].xyyy
908: MOV TEMP[12].w, TEMP[8].xxxx
909: TXL TEMP[12], TEMP[12], SAMP[7], 2D
910: FSEQ TEMP[13].x, TEMP[3].zzzz, IMM[3].xxxx
911: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
912: MOV TEMP[14].xy, TEMP[7].xyyy
913: MOV TEMP[14].w, TEMP[8].xxxx
914: TXL TEMP[14], TEMP[14], SAMP[5], 2D
915: FSEQ TEMP[15].x, TEMP[3].zzzz, IMM[2].wwww
916: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
917: MOV TEMP[16].xy, TEMP[7].xyyy
918: MOV TEMP[16].w, TEMP[8].xxxx
919: TXL TEMP[16], TEMP[16], SAMP[3], 2D
920: FSEQ TEMP[17].x, TEMP[3].zzzz, IMM[2].zzzz
921: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
922: MOV TEMP[7].xy, TEMP[7].xyyy
923: MOV TEMP[7].w, TEMP[8].xxxx
924: TXL TEMP[7], TEMP[7], SAMP[1], 2D
925: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[3].yyyy
926: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
927: MUL TEMP[7], TEMP[7], TEMP[18].xxxx
928: MAD TEMP[7], TEMP[16], TEMP[17].xxxx, TEMP[7]
929: MAD TEMP[7], TEMP[14], TEMP[15].xxxx, TEMP[7]
930: MAD TEMP[7], TEMP[12], TEMP[13].xxxx, TEMP[7]
931: MAD TEMP[7].yw, TEMP[9], TEMP[11].xxxx, TEMP[7]
932: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz
933: DP2 TEMP[9].x, TEMP[7].xyyy, TEMP[7].xyyy
934: MOV_SAT TEMP[34].x, TEMP[9].xxxx
935: MOV TEMP[9].xy, IN[3].zxzz
936: MOV TEMP[11].x, IMM[2].xxxx
937: FSNE TEMP[12].x, CONST[14].xxxx, TEMP[6].xxxx
938: UIF TEMP[12].xxxx :0
939: MOV TEMP[11].x, IMM[2].yyyy
940: RCP TEMP[12].x, CONST[17].xxxx
941: MUL TEMP[9].xy, IN[3].zxxx, TEMP[12].xxxx
942: ELSE :0
943: RCP TEMP[12].x, CONST[16].xxxx
944: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[12].xxxx
945: ENDIF
946: FRC TEMP[9].xy, TEMP[9].xyyy
947: MUL TEMP[12].x, CONST[18].xxxx, IMM[2].wwww
948: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
949: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
950: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].xxxx
951: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
952: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
953: MOV TEMP[11].xy, TEMP[9].xyyy
954: MOV TEMP[11].w, TEMP[8].xxxx
955: TXL TEMP[11], TEMP[11], SAMP[9], 2D
956: FSEQ TEMP[12].x, TEMP[3].zzzz, IMM[1].zzzz
957: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
958: MOV TEMP[13].xy, TEMP[9].xyyy
959: MOV TEMP[13].w, TEMP[8].xxxx
960: TXL TEMP[13], TEMP[13], SAMP[7], 2D
961: FSEQ TEMP[14].x, TEMP[3].zzzz, IMM[3].xxxx
962: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
963: MOV TEMP[15].xy, TEMP[9].xyyy
964: MOV TEMP[15].w, TEMP[8].xxxx
965: TXL TEMP[15], TEMP[15], SAMP[5], 2D
966: FSEQ TEMP[16].x, TEMP[3].zzzz, IMM[2].wwww
967: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
968: MOV TEMP[17].xy, TEMP[9].xyyy
969: MOV TEMP[17].w, TEMP[8].xxxx
970: TXL TEMP[17], TEMP[17], SAMP[3], 2D
971: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[2].zzzz
972: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
973: MOV TEMP[9].xy, TEMP[9].xyyy
974: MOV TEMP[9].w, TEMP[8].xxxx
975: TXL TEMP[9], TEMP[9], SAMP[1], 2D
976: FSEQ TEMP[19].x, TEMP[3].zzzz, IMM[3].yyyy
977: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
978: MUL TEMP[9], TEMP[9], TEMP[19].xxxx
979: MAD TEMP[9], TEMP[17], TEMP[18].xxxx, TEMP[9]
980: MAD TEMP[9], TEMP[15], TEMP[16].xxxx, TEMP[9]
981: MAD TEMP[9], TEMP[13], TEMP[14].xxxx, TEMP[9]
982: MAD TEMP[9].yw, TEMP[11], TEMP[12].xxxx, TEMP[9]
983: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz
984: DP2 TEMP[11].x, TEMP[9].xyyy, TEMP[9].xyyy
985: MOV_SAT TEMP[35].x, TEMP[11].xxxx
986: MOV TEMP[11].xy, IN[3].xyxx
987: MOV TEMP[12].x, IMM[2].xxxx
988: FSNE TEMP[13].x, CONST[14].xxxx, TEMP[6].xxxx
989: UIF TEMP[13].xxxx :0
990: MOV TEMP[12].x, IMM[2].yyyy
991: RCP TEMP[13].x, CONST[17].xxxx
992: MUL TEMP[11].xy, IN[3].xyyy, TEMP[13].xxxx
993: ELSE :0
994: RCP TEMP[13].x, CONST[16].xxxx
995: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
996: ENDIF
997: FRC TEMP[11].xy, TEMP[11].xyyy
998: MUL TEMP[13].x, CONST[18].xxxx, IMM[2].wwww
999: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
1000: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
1001: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].xxxx
1002: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
1003: MAD TEMP[6].xy, TEMP[11].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
1004: MOV TEMP[11].xy, TEMP[6].xyyy
1005: MOV TEMP[11].w, TEMP[8].xxxx
1006: TXL TEMP[11], TEMP[11], SAMP[9], 2D
1007: FSEQ TEMP[12].x, TEMP[3].zzzz, IMM[1].zzzz
1008: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
1009: MOV TEMP[13].xy, TEMP[6].xyyy
1010: MOV TEMP[13].w, TEMP[8].xxxx
1011: TXL TEMP[13], TEMP[13], SAMP[7], 2D
1012: FSEQ TEMP[14].x, TEMP[3].zzzz, IMM[3].xxxx
1013: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
1014: MOV TEMP[15].xy, TEMP[6].xyyy
1015: MOV TEMP[15].w, TEMP[8].xxxx
1016: TXL TEMP[15], TEMP[15], SAMP[5], 2D
1017: FSEQ TEMP[16].x, TEMP[3].zzzz, IMM[2].wwww
1018: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
1019: MOV TEMP[17].xy, TEMP[6].xyyy
1020: MOV TEMP[17].w, TEMP[8].xxxx
1021: TXL TEMP[17], TEMP[17], SAMP[3], 2D
1022: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[2].zzzz
1023: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
1024: MOV TEMP[6].xy, TEMP[6].xyyy
1025: MOV TEMP[6].w, TEMP[8].xxxx
1026: TXL TEMP[6], TEMP[6], SAMP[1], 2D
1027: FSEQ TEMP[3].x, TEMP[3].zzzz, IMM[3].yyyy
1028: AND TEMP[3].x, TEMP[3].xxxx, IMM[2].zzzz
1029: MUL TEMP[3], TEMP[6], TEMP[3].xxxx
1030: MAD TEMP[3], TEMP[17], TEMP[18].xxxx, TEMP[3]
1031: MAD TEMP[3], TEMP[15], TEMP[16].xxxx, TEMP[3]
1032: MAD TEMP[3], TEMP[13], TEMP[14].xxxx, TEMP[3]
1033: MAD TEMP[3].yw, TEMP[11], TEMP[12].xxxx, TEMP[3]
1034: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[2].wwww, IMM[3].zzzz
1035: DP2 TEMP[6].x, TEMP[3].xyyy, TEMP[3].xyyy
1036: MOV_SAT TEMP[36].x, TEMP[6].xxxx
1037: MOV TEMP[6].x, IMM[3].yyyy
1038: MOV TEMP[6].y, TEMP[7].xxxx
1039: MOV TEMP[6].z, TEMP[7].yyyy
1040: MOV TEMP[7].y, IMM[3].yyyy
1041: MOV TEMP[7].x, TEMP[9].yyyy
1042: MOV TEMP[7].z, TEMP[9].xxxx
1043: MOV TEMP[8].z, IMM[3].yyyy
1044: MOV TEMP[8].xy, TEMP[3].xyxx
1045: MOV TEMP[3].w, IMM[2].zzzz
1046: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[2].xxxx
1047: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[2].yyyy, TEMP[6].xyzz
1048: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[2].zzzz, TEMP[6].xyzz
1049: MUL TEMP[4].xyz, IN[1].xxxx, TEMP[4].xyzz
1050: MAD TEMP[4].xyz, IN[1].yyyy, TEMP[5].xyzz, TEMP[4].xyzz
1051: MAD TEMP[3].xyz, IN[1].zzzz, TEMP[2].xyzz, TEMP[4].xyzz
1052: DP4 TEMP[2].x, TEMP[3], TEMP[3]
1053: RSQ TEMP[2].x, TEMP[2].xxxx
1054: MUL TEMP[2].xyz, TEMP[3], TEMP[2].xxxx
1055: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[0].wwww
1056: ADD TEMP[2].xyz, IN[2].yzww, -TEMP[2].xyzz
1057: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
1058: RSQ TEMP[3].x, TEMP[3].xxxx
1059: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
1060: ADD TEMP[1].xyz, TEMP[0].xyzz, TEMP[1].xyzz
1061: DP3 TEMP[3].x, TEMP[1].xyzz, TEMP[1].xyzz
1062: RSQ TEMP[3].x, TEMP[3].xxxx
1063: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
1064: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[1].xyzz
1065: MAX TEMP[1].x, IMM[3].wwww, TEMP[1].xxxx
1066: MUL TEMP[3].x, IMM[4].xxxx, IN[1].wwww
1067: POW TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx
1068: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1069: MOV TEMP[3].w, IMM[3].yyyy
1070: MOV TEMP[3].xyz, CONST[21].xyzx
1071: MUL TEMP[4].x, IMM[2].wwww, TEMP[1].xxxx
1072: ADD TEMP[4].x, IMM[3].xxxx, -TEMP[4].xxxx
1073: MUL TEMP[4].x, TEMP[1].xxxx, TEMP[4].xxxx
1074: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx
1075: MUL TEMP[1].x, TEMP[1].xxxx, IN[1].wwww
1076: MUL TEMP[4].xyz, TEMP[10].xyzz, CONST[3].xyzz
1077: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[0].xyzz
1078: MOV_SAT TEMP[2].x, TEMP[2].xxxx
1079: MUL TEMP[5], CONST[22], IMM[2].wwww
1080: MAX TEMP[3], TEMP[5], TEMP[3]
1081: MIN TEMP[3].xyz, TEMP[3], IMM[4].yyyz
1082: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[10].xyzz
1083: MAD TEMP[2].xyz, TEMP[4].xyzz, TEMP[2].xxxx, TEMP[3].xyzz
1084: MAD TEMP[1].xyz, CONST[3].xyzz, TEMP[1].xxxx, TEMP[2].xyzz
1085: MUL TEMP[0].xyz, TEMP[1].xyzz, IMM[0].wwww
1086: MAD TEMP[1].x, IN[2].xxxx, CONST[2].zzzz, CONST[2].wwww
1087: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1088: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
1089: MOV TEMP[0].w, IMM[2].zzzz
1090: MOV OUT[0], TEMP[0]
1091: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360)
%48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%49 = load <8 x i32>, <8 x i32> addrspace(2)* %48, align 32, !tbaa !0
%50 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%51 = load <4 x i32>, <4 x i32> addrspace(2)* %50, align 16, !tbaa !0
%52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%53 = load <8 x i32>, <8 x i32> addrspace(2)* %52, align 32, !tbaa !0
%54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%55 = load <4 x i32>, <4 x i32> addrspace(2)* %54, align 16, !tbaa !0
%56 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%57 = load <8 x i32>, <8 x i32> addrspace(2)* %56, align 32, !tbaa !0
%58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%59 = load <4 x i32>, <4 x i32> addrspace(2)* %58, align 16, !tbaa !0
%60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0
%62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0
%64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0
%66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0
%68 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0
%70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%71 = load <4 x i32>, <4 x i32> addrspace(2)* %70, align 16, !tbaa !0
%72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6
%73 = load <8 x i32>, <8 x i32> addrspace(2)* %72, align 32, !tbaa !0
%74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6
%75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0
%76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7
%77 = load <8 x i32>, <8 x i32> addrspace(2)* %76, align 32, !tbaa !0
%78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7
%79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0
%80 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8
%81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0
%82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8
%83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0
%84 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9
%85 = load <8 x i32>, <8 x i32> addrspace(2)* %84, align 32, !tbaa !0
%86 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9
%87 = load <4 x i32>, <4 x i32> addrspace(2)* %86, align 16, !tbaa !0
%88 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%89 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%90 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%91 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%92 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%93 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%94 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%95 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%96 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%97 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%98 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%99 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%100 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%101 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%102 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%103 = fmul float %27, %27
%104 = fmul float %28, %28
%105 = fadd float %104, %103
%106 = fmul float %29, %29
%107 = fadd float %105, %106
%108 = call float @llvm.AMDGPU.rsq.clamped.f32(float %107)
%109 = fmul float %27, %108
%110 = fmul float %28, %108
%111 = fmul float %29, %108
%112 = fsub float %24, %100
%113 = fsub float %25, %101
%114 = fsub float %26, %102
%115 = fmul float %112, %112
%116 = fmul float %113, %113
%117 = fadd float %116, %115
%118 = fmul float %114, %114
%119 = fadd float %117, %118
%120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119)
%121 = fmul float %112, %120
%122 = fmul float %113, %120
%123 = fmul float %114, %120
%124 = call float @llvm.fabs.f32(float %97)
%125 = call float @llvm.fabs.f32(float %98)
%126 = call float @llvm.fabs.f32(float %99)
%127 = fmul float %124, %124
%128 = fmul float %125, %125
%129 = fadd float %128, %127
%130 = fmul float %126, %126
%131 = fadd float %129, %130
%132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131)
%133 = fmul float %124, %132
%134 = fadd float %133, 0xBFC99999A0000000
%135 = fmul float %125, %132
%136 = fadd float %135, 0xBFC99999A0000000
%137 = fmul float %126, %132
%138 = fadd float %137, 0xBFC99999A0000000
%139 = fmul float %134, 7.000000e+00
%140 = fmul float %136, 7.000000e+00
%141 = fmul float %138, 7.000000e+00
%142 = call float @llvm.maxnum.f32(float %139, float 0x3F847AE140000000)
%143 = call float @llvm.maxnum.f32(float %140, float 0x3F847AE140000000)
%144 = call float @llvm.maxnum.f32(float %141, float 0x3F847AE140000000)
%145 = fadd float %142, %143
%146 = fadd float %145, %144
%147 = fdiv float 1.000000e+00, %146
%148 = fmul float %142, %147
%149 = fmul float %143, %147
%150 = fmul float %144, %147
%151 = fadd float %88, 5.000000e-01
%152 = fadd float %89, 5.000000e-01
%153 = fadd float %90, 5.000000e-01
%154 = call float @llvm.floor.f32(float %151)
%155 = call float @llvm.floor.f32(float %152)
%156 = call float @llvm.floor.f32(float %153)
%157 = fmul float %154, %35
%158 = call float @llvm.floor.f32(float %157)
%159 = fmul float %158, %35
%160 = fcmp ult float %154, 6.400000e+01
br i1 %160, label %ELSE, label %IF
IF: ; preds = %main_body
%161 = fadd float %154, -6.400000e+01
%162 = fmul float %161, %36
%163 = call float @llvm.floor.f32(float %162)
%164 = fmul float %163, %36
%165 = call float @llvm.floor.f32(float %162)
%166 = fsub float %162, %165
%167 = call float @llvm.floor.f32(float %164)
%168 = fsub float %164, %167
%169 = call float @llvm.floor.f32(float %164)
%170 = fadd float %169, 4.000000e+00
br label %ENDIF
ELSE: ; preds = %main_body
%171 = call float @llvm.floor.f32(float %157)
%172 = fsub float %157, %171
%173 = call float @llvm.floor.f32(float %159)
%174 = fsub float %159, %173
%175 = call float @llvm.floor.f32(float %159)
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp30.0 = phi float [ %170, %IF ], [ %175, %ELSE ]
%temp29.0 = phi float [ %168, %IF ], [ %174, %ELSE ]
%temp28.0 = phi float [ %166, %IF ], [ %172, %ELSE ]
%temp16.0 = phi float [ %36, %IF ], [ %35, %ELSE ]
%176 = fmul float %155, %35
%177 = call float @llvm.floor.f32(float %176)
%178 = fmul float %177, %35
%179 = fcmp ult float %155, 6.400000e+01
br i1 %179, label %ELSE150, label %IF149
IF149: ; preds = %ENDIF
%180 = fadd float %155, -6.400000e+01
%181 = fmul float %180, %36
%182 = call float @llvm.floor.f32(float %181)
%183 = fmul float %182, %36
%184 = call float @llvm.floor.f32(float %181)
%185 = fsub float %181, %184
%186 = call float @llvm.floor.f32(float %183)
%187 = fsub float %183, %186
%188 = call float @llvm.floor.f32(float %183)
%189 = fadd float %188, 4.000000e+00
br label %ENDIF148
ELSE150: ; preds = %ENDIF
%190 = call float @llvm.floor.f32(float %176)
%191 = fsub float %176, %190
%192 = call float @llvm.floor.f32(float %178)
%193 = fsub float %178, %192
%194 = call float @llvm.floor.f32(float %178)
br label %ENDIF148
ENDIF148: ; preds = %ELSE150, %IF149
%temp36.0 = phi float [ %185, %IF149 ], [ %191, %ELSE150 ]
%temp37.0 = phi float [ %187, %IF149 ], [ %193, %ELSE150 ]
%temp38.0 = phi float [ %189, %IF149 ], [ %194, %ELSE150 ]
%temp20.0 = phi float [ %36, %IF149 ], [ %35, %ELSE150 ]
%195 = fmul float %156, %35
%196 = call float @llvm.floor.f32(float %195)
%197 = fmul float %196, %35
%198 = fcmp ult float %156, 6.400000e+01
br i1 %198, label %ELSE153, label %IF152
IF152: ; preds = %ENDIF148
%199 = fadd float %156, -6.400000e+01
%200 = fmul float %199, %36
%201 = call float @llvm.floor.f32(float %200)
%202 = fmul float %201, %36
%203 = call float @llvm.floor.f32(float %200)
%204 = fsub float %200, %203
%205 = call float @llvm.floor.f32(float %202)
%206 = fsub float %202, %205
%207 = call float @llvm.floor.f32(float %202)
%208 = fadd float %207, 4.000000e+00
br label %ENDIF151
ELSE153: ; preds = %ENDIF148
%209 = call float @llvm.floor.f32(float %195)
%210 = fsub float %195, %209
%211 = call float @llvm.floor.f32(float %197)
%212 = fsub float %197, %211
%213 = call float @llvm.floor.f32(float %197)
br label %ENDIF151
ENDIF151: ; preds = %ELSE153, %IF152
%temp24.0 = phi float [ %36, %IF152 ], [ %35, %ELSE153 ]
%temp14.0 = phi float [ %208, %IF152 ], [ %213, %ELSE153 ]
%temp13.0 = phi float [ %206, %IF152 ], [ %212, %ELSE153 ]
%temp12.0 = phi float [ %204, %IF152 ], [ %210, %ELSE153 ]
%214 = fsub float %100, %24
%215 = fsub float %101, %25
%216 = fsub float %102, %26
%217 = fmul float %214, %214
%218 = fmul float %215, %215
%219 = fadd float %218, %217
%220 = fmul float %216, %216
%221 = fadd float %219, %220
%222 = fmul float %41, %221
%223 = call float @llvm.log2.f32(float %222)
%224 = fmul float %223, 0x3FE62E4300000000
%225 = fmul float %224, %40
%226 = fcmp une float %35, %temp16.0
%.sink208 = select i1 %226, float %38, float %37
%temp44.0 = select i1 %226, float 1.953125e-03, float 3.906250e-03
%227 = fdiv float 1.000000e+00, %.sink208
%228 = fmul float %100, %227
%229 = fmul float %101, %227
%230 = call float @llvm.floor.f32(float %228)
%231 = fsub float %228, %230
%232 = call float @llvm.floor.f32(float %229)
%233 = fsub float %229, %232
%234 = fmul float %39, 2.000000e+00
%235 = fmul float %234, %temp44.0
%236 = fsub float 1.000000e+00, %235
%237 = fmul float %temp44.0, %39
%238 = fmul float %231, %236
%239 = fadd float %238, %237
%240 = fmul float %233, %236
%241 = fadd float %240, %237
%242 = fmul float %239, %temp16.0
%243 = fadd float %242, %temp28.0
%244 = fmul float %241, %temp16.0
%245 = fadd float %244, %temp29.0
%246 = bitcast float %243 to i32
%247 = bitcast float %245 to i32
%248 = bitcast float %225 to i32
%249 = insertelement <4 x i32> undef, i32 %246, i32 0
%250 = insertelement <4 x i32> %249, i32 %247, i32 1
%251 = insertelement <4 x i32> %250, i32 %248, i32 2
%252 = bitcast <8 x i32> %81 to <32 x i8>
%253 = bitcast <4 x i32> %83 to <16 x i8>
%254 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %251, <32 x i8> %252, <16 x i8> %253, i32 2)
%255 = extractelement <4 x float> %254, i32 0
%256 = extractelement <4 x float> %254, i32 1
%257 = extractelement <4 x float> %254, i32 2
%258 = fcmp oeq float %temp30.0, 4.000000e+00
%259 = select i1 %258, float 1.000000e+00, float 0.000000e+00
%260 = bitcast float %243 to i32
%261 = bitcast float %245 to i32
%262 = bitcast float %225 to i32
%263 = insertelement <4 x i32> undef, i32 %260, i32 0
%264 = insertelement <4 x i32> %263, i32 %261, i32 1
%265 = insertelement <4 x i32> %264, i32 %262, i32 2
%266 = bitcast <8 x i32> %73 to <32 x i8>
%267 = bitcast <4 x i32> %75 to <16 x i8>
%268 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %265, <32 x i8> %266, <16 x i8> %267, i32 2)
%269 = extractelement <4 x float> %268, i32 0
%270 = extractelement <4 x float> %268, i32 1
%271 = extractelement <4 x float> %268, i32 2
%272 = fcmp oeq float %temp30.0, 3.000000e+00
%273 = select i1 %272, float 1.000000e+00, float 0.000000e+00
%274 = bitcast float %243 to i32
%275 = bitcast float %245 to i32
%276 = bitcast float %225 to i32
%277 = insertelement <4 x i32> undef, i32 %274, i32 0
%278 = insertelement <4 x i32> %277, i32 %275, i32 1
%279 = insertelement <4 x i32> %278, i32 %276, i32 2
%280 = bitcast <8 x i32> %65 to <32 x i8>
%281 = bitcast <4 x i32> %67 to <16 x i8>
%282 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %279, <32 x i8> %280, <16 x i8> %281, i32 2)
%283 = extractelement <4 x float> %282, i32 0
%284 = extractelement <4 x float> %282, i32 1
%285 = extractelement <4 x float> %282, i32 2
%286 = fcmp oeq float %temp30.0, 2.000000e+00
%287 = select i1 %286, float 1.000000e+00, float 0.000000e+00
%288 = bitcast float %243 to i32
%289 = bitcast float %245 to i32
%290 = bitcast float %225 to i32
%291 = insertelement <4 x i32> undef, i32 %288, i32 0
%292 = insertelement <4 x i32> %291, i32 %289, i32 1
%293 = insertelement <4 x i32> %292, i32 %290, i32 2
%294 = bitcast <8 x i32> %57 to <32 x i8>
%295 = bitcast <4 x i32> %59 to <16 x i8>
%296 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %293, <32 x i8> %294, <16 x i8> %295, i32 2)
%297 = extractelement <4 x float> %296, i32 0
%298 = extractelement <4 x float> %296, i32 1
%299 = extractelement <4 x float> %296, i32 2
%300 = fcmp oeq float %temp30.0, 1.000000e+00
%301 = select i1 %300, float 1.000000e+00, float 0.000000e+00
%302 = bitcast float %243 to i32
%303 = bitcast float %245 to i32
%304 = bitcast float %225 to i32
%305 = insertelement <4 x i32> undef, i32 %302, i32 0
%306 = insertelement <4 x i32> %305, i32 %303, i32 1
%307 = insertelement <4 x i32> %306, i32 %304, i32 2
%308 = bitcast <8 x i32> %49 to <32 x i8>
%309 = bitcast <4 x i32> %51 to <16 x i8>
%310 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %307, <32 x i8> %308, <16 x i8> %309, i32 2)
%311 = extractelement <4 x float> %310, i32 0
%312 = extractelement <4 x float> %310, i32 1
%313 = extractelement <4 x float> %310, i32 2
%314 = fcmp oeq float %temp30.0, 0.000000e+00
%315 = select i1 %314, float 1.000000e+00, float 0.000000e+00
%316 = fmul float %311, %315
%317 = fmul float %312, %315
%318 = fmul float %313, %315
%319 = fmul float %297, %301
%320 = fadd float %319, %316
%321 = fmul float %298, %301
%322 = fadd float %321, %317
%323 = fmul float %299, %301
%324 = fadd float %323, %318
%325 = fmul float %283, %287
%326 = fadd float %325, %320
%327 = fmul float %284, %287
%328 = fadd float %327, %322
%329 = fmul float %285, %287
%330 = fadd float %329, %324
%331 = fmul float %269, %273
%332 = fadd float %331, %326
%333 = fmul float %270, %273
%334 = fadd float %333, %328
%335 = fmul float %271, %273
%336 = fadd float %335, %330
%337 = fmul float %255, %259
%338 = fadd float %337, %332
%339 = fmul float %256, %259
%340 = fadd float %339, %334
%341 = fmul float %257, %259
%342 = fadd float %341, %336
%343 = fcmp une float %35, %temp16.0
%.sink209 = select i1 %343, float %38, float %37
%temp48.0 = select i1 %343, float 1.953125e-03, float 3.906250e-03
%344 = fdiv float 1.000000e+00, %.sink209
%345 = fmul float %102, %344
%346 = fmul float %101, %344
%347 = call float @llvm.floor.f32(float %345)
%348 = fsub float %345, %347
%349 = call float @llvm.floor.f32(float %346)
%350 = fsub float %346, %349
%351 = fmul float %39, 2.000000e+00
%352 = fmul float %351, %temp48.0
%353 = fsub float 1.000000e+00, %352
%354 = fmul float %temp48.0, %39
%355 = fmul float %348, %353
%356 = fadd float %355, %354
%357 = fmul float %350, %353
%358 = fadd float %357, %354
%359 = fmul float %356, %temp16.0
%360 = fadd float %359, %temp28.0
%361 = fmul float %358, %temp16.0
%362 = fadd float %361, %temp29.0
%363 = bitcast float %360 to i32
%364 = bitcast float %362 to i32
%365 = bitcast float %225 to i32
%366 = insertelement <4 x i32> undef, i32 %363, i32 0
%367 = insertelement <4 x i32> %366, i32 %364, i32 1
%368 = insertelement <4 x i32> %367, i32 %365, i32 2
%369 = bitcast <8 x i32> %81 to <32 x i8>
%370 = bitcast <4 x i32> %83 to <16 x i8>
%371 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %368, <32 x i8> %369, <16 x i8> %370, i32 2)
%372 = extractelement <4 x float> %371, i32 0
%373 = extractelement <4 x float> %371, i32 1
%374 = extractelement <4 x float> %371, i32 2
%375 = fcmp oeq float %temp30.0, 4.000000e+00
%376 = select i1 %375, float 1.000000e+00, float 0.000000e+00
%377 = bitcast float %360 to i32
%378 = bitcast float %362 to i32
%379 = bitcast float %225 to i32
%380 = insertelement <4 x i32> undef, i32 %377, i32 0
%381 = insertelement <4 x i32> %380, i32 %378, i32 1
%382 = insertelement <4 x i32> %381, i32 %379, i32 2
%383 = bitcast <8 x i32> %73 to <32 x i8>
%384 = bitcast <4 x i32> %75 to <16 x i8>
%385 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %382, <32 x i8> %383, <16 x i8> %384, i32 2)
%386 = extractelement <4 x float> %385, i32 0
%387 = extractelement <4 x float> %385, i32 1
%388 = extractelement <4 x float> %385, i32 2
%389 = fcmp oeq float %temp30.0, 3.000000e+00
%390 = select i1 %389, float 1.000000e+00, float 0.000000e+00
%391 = bitcast float %360 to i32
%392 = bitcast float %362 to i32
%393 = bitcast float %225 to i32
%394 = insertelement <4 x i32> undef, i32 %391, i32 0
%395 = insertelement <4 x i32> %394, i32 %392, i32 1
%396 = insertelement <4 x i32> %395, i32 %393, i32 2
%397 = bitcast <8 x i32> %65 to <32 x i8>
%398 = bitcast <4 x i32> %67 to <16 x i8>
%399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2)
%400 = extractelement <4 x float> %399, i32 0
%401 = extractelement <4 x float> %399, i32 1
%402 = extractelement <4 x float> %399, i32 2
%403 = fcmp oeq float %temp30.0, 2.000000e+00
%404 = select i1 %403, float 1.000000e+00, float 0.000000e+00
%405 = bitcast float %360 to i32
%406 = bitcast float %362 to i32
%407 = bitcast float %225 to i32
%408 = insertelement <4 x i32> undef, i32 %405, i32 0
%409 = insertelement <4 x i32> %408, i32 %406, i32 1
%410 = insertelement <4 x i32> %409, i32 %407, i32 2
%411 = bitcast <8 x i32> %57 to <32 x i8>
%412 = bitcast <4 x i32> %59 to <16 x i8>
%413 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %410, <32 x i8> %411, <16 x i8> %412, i32 2)
%414 = extractelement <4 x float> %413, i32 0
%415 = extractelement <4 x float> %413, i32 1
%416 = extractelement <4 x float> %413, i32 2
%417 = fcmp oeq float %temp30.0, 1.000000e+00
%418 = select i1 %417, float 1.000000e+00, float 0.000000e+00
%419 = bitcast float %360 to i32
%420 = bitcast float %362 to i32
%421 = bitcast float %225 to i32
%422 = insertelement <4 x i32> undef, i32 %419, i32 0
%423 = insertelement <4 x i32> %422, i32 %420, i32 1
%424 = insertelement <4 x i32> %423, i32 %421, i32 2
%425 = bitcast <8 x i32> %49 to <32 x i8>
%426 = bitcast <4 x i32> %51 to <16 x i8>
%427 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %424, <32 x i8> %425, <16 x i8> %426, i32 2)
%428 = extractelement <4 x float> %427, i32 0
%429 = extractelement <4 x float> %427, i32 1
%430 = extractelement <4 x float> %427, i32 2
%431 = fcmp oeq float %temp30.0, 0.000000e+00
%432 = select i1 %431, float 1.000000e+00, float 0.000000e+00
%433 = fmul float %428, %432
%434 = fmul float %429, %432
%435 = fmul float %430, %432
%436 = fmul float %414, %418
%437 = fadd float %436, %433
%438 = fmul float %415, %418
%439 = fadd float %438, %434
%440 = fmul float %416, %418
%441 = fadd float %440, %435
%442 = fmul float %400, %404
%443 = fadd float %442, %437
%444 = fmul float %401, %404
%445 = fadd float %444, %439
%446 = fmul float %402, %404
%447 = fadd float %446, %441
%448 = fmul float %386, %390
%449 = fadd float %448, %443
%450 = fmul float %387, %390
%451 = fadd float %450, %445
%452 = fmul float %388, %390
%453 = fadd float %452, %447
%454 = fmul float %372, %376
%455 = fadd float %454, %449
%456 = fmul float %373, %376
%457 = fadd float %456, %451
%458 = fmul float %374, %376
%459 = fadd float %458, %453
%460 = fcmp une float %35, %temp16.0
%.sink210 = select i1 %460, float %38, float %37
%temp52.0 = select i1 %460, float 1.953125e-03, float 3.906250e-03
%461 = fdiv float 1.000000e+00, %.sink210
%462 = fmul float %102, %461
%463 = fmul float %100, %461
%464 = call float @llvm.floor.f32(float %462)
%465 = fsub float %462, %464
%466 = call float @llvm.floor.f32(float %463)
%467 = fsub float %463, %466
%468 = fmul float %39, 2.000000e+00
%469 = fmul float %468, %temp52.0
%470 = fsub float 1.000000e+00, %469
%471 = fmul float %temp52.0, %39
%472 = fmul float %465, %470
%473 = fadd float %472, %471
%474 = fmul float %467, %470
%475 = fadd float %474, %471
%476 = fmul float %473, %temp16.0
%477 = fadd float %476, %temp28.0
%478 = fmul float %475, %temp16.0
%479 = fadd float %478, %temp29.0
%480 = bitcast float %477 to i32
%481 = bitcast float %479 to i32
%482 = bitcast float %225 to i32
%483 = insertelement <4 x i32> undef, i32 %480, i32 0
%484 = insertelement <4 x i32> %483, i32 %481, i32 1
%485 = insertelement <4 x i32> %484, i32 %482, i32 2
%486 = bitcast <8 x i32> %81 to <32 x i8>
%487 = bitcast <4 x i32> %83 to <16 x i8>
%488 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %485, <32 x i8> %486, <16 x i8> %487, i32 2)
%489 = extractelement <4 x float> %488, i32 0
%490 = extractelement <4 x float> %488, i32 1
%491 = extractelement <4 x float> %488, i32 2
%492 = fcmp oeq float %temp30.0, 4.000000e+00
%493 = select i1 %492, float 1.000000e+00, float 0.000000e+00
%494 = bitcast float %477 to i32
%495 = bitcast float %479 to i32
%496 = bitcast float %225 to i32
%497 = insertelement <4 x i32> undef, i32 %494, i32 0
%498 = insertelement <4 x i32> %497, i32 %495, i32 1
%499 = insertelement <4 x i32> %498, i32 %496, i32 2
%500 = bitcast <8 x i32> %73 to <32 x i8>
%501 = bitcast <4 x i32> %75 to <16 x i8>
%502 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %499, <32 x i8> %500, <16 x i8> %501, i32 2)
%503 = extractelement <4 x float> %502, i32 0
%504 = extractelement <4 x float> %502, i32 1
%505 = extractelement <4 x float> %502, i32 2
%506 = fcmp oeq float %temp30.0, 3.000000e+00
%507 = select i1 %506, float 1.000000e+00, float 0.000000e+00
%508 = bitcast float %477 to i32
%509 = bitcast float %479 to i32
%510 = bitcast float %225 to i32
%511 = insertelement <4 x i32> undef, i32 %508, i32 0
%512 = insertelement <4 x i32> %511, i32 %509, i32 1
%513 = insertelement <4 x i32> %512, i32 %510, i32 2
%514 = bitcast <8 x i32> %65 to <32 x i8>
%515 = bitcast <4 x i32> %67 to <16 x i8>
%516 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %513, <32 x i8> %514, <16 x i8> %515, i32 2)
%517 = extractelement <4 x float> %516, i32 0
%518 = extractelement <4 x float> %516, i32 1
%519 = extractelement <4 x float> %516, i32 2
%520 = fcmp oeq float %temp30.0, 2.000000e+00
%521 = select i1 %520, float 1.000000e+00, float 0.000000e+00
%522 = bitcast float %477 to i32
%523 = bitcast float %479 to i32
%524 = bitcast float %225 to i32
%525 = insertelement <4 x i32> undef, i32 %522, i32 0
%526 = insertelement <4 x i32> %525, i32 %523, i32 1
%527 = insertelement <4 x i32> %526, i32 %524, i32 2
%528 = bitcast <8 x i32> %57 to <32 x i8>
%529 = bitcast <4 x i32> %59 to <16 x i8>
%530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %527, <32 x i8> %528, <16 x i8> %529, i32 2)
%531 = extractelement <4 x float> %530, i32 0
%532 = extractelement <4 x float> %530, i32 1
%533 = extractelement <4 x float> %530, i32 2
%534 = fcmp oeq float %temp30.0, 1.000000e+00
%535 = select i1 %534, float 1.000000e+00, float 0.000000e+00
%536 = bitcast float %477 to i32
%537 = bitcast float %479 to i32
%538 = bitcast float %225 to i32
%539 = insertelement <4 x i32> undef, i32 %536, i32 0
%540 = insertelement <4 x i32> %539, i32 %537, i32 1
%541 = insertelement <4 x i32> %540, i32 %538, i32 2
%542 = bitcast <8 x i32> %49 to <32 x i8>
%543 = bitcast <4 x i32> %51 to <16 x i8>
%544 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %541, <32 x i8> %542, <16 x i8> %543, i32 2)
%545 = extractelement <4 x float> %544, i32 0
%546 = extractelement <4 x float> %544, i32 1
%547 = extractelement <4 x float> %544, i32 2
%548 = fcmp oeq float %temp30.0, 0.000000e+00
%549 = select i1 %548, float 1.000000e+00, float 0.000000e+00
%550 = fmul float %545, %549
%551 = fmul float %546, %549
%552 = fmul float %547, %549
%553 = fmul float %531, %535
%554 = fadd float %553, %550
%555 = fmul float %532, %535
%556 = fadd float %555, %551
%557 = fmul float %533, %535
%558 = fadd float %557, %552
%559 = fmul float %517, %521
%560 = fadd float %559, %554
%561 = fmul float %518, %521
%562 = fadd float %561, %556
%563 = fmul float %519, %521
%564 = fadd float %563, %558
%565 = fmul float %503, %507
%566 = fadd float %565, %560
%567 = fmul float %504, %507
%568 = fadd float %567, %562
%569 = fmul float %505, %507
%570 = fadd float %569, %564
%571 = fmul float %489, %493
%572 = fadd float %571, %566
%573 = fmul float %490, %493
%574 = fadd float %573, %568
%575 = fmul float %491, %493
%576 = fadd float %575, %570
%577 = fcmp une float %35, %temp20.0
%.sink211 = select i1 %577, float %38, float %37
%temp56.0 = select i1 %577, float 1.953125e-03, float 3.906250e-03
%578 = fdiv float 1.000000e+00, %.sink211
%579 = fmul float %100, %578
%580 = fmul float %101, %578
%581 = call float @llvm.floor.f32(float %579)
%582 = fsub float %579, %581
%583 = call float @llvm.floor.f32(float %580)
%584 = fsub float %580, %583
%585 = fmul float %39, 2.000000e+00
%586 = fmul float %585, %temp56.0
%587 = fsub float 1.000000e+00, %586
%588 = fmul float %temp56.0, %39
%589 = fmul float %582, %587
%590 = fadd float %589, %588
%591 = fmul float %584, %587
%592 = fadd float %591, %588
%593 = fmul float %590, %temp20.0
%594 = fadd float %593, %temp36.0
%595 = fmul float %592, %temp20.0
%596 = fadd float %595, %temp37.0
%597 = bitcast float %594 to i32
%598 = bitcast float %596 to i32
%599 = bitcast float %225 to i32
%600 = insertelement <4 x i32> undef, i32 %597, i32 0
%601 = insertelement <4 x i32> %600, i32 %598, i32 1
%602 = insertelement <4 x i32> %601, i32 %599, i32 2
%603 = bitcast <8 x i32> %81 to <32 x i8>
%604 = bitcast <4 x i32> %83 to <16 x i8>
%605 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %602, <32 x i8> %603, <16 x i8> %604, i32 2)
%606 = extractelement <4 x float> %605, i32 0
%607 = extractelement <4 x float> %605, i32 1
%608 = extractelement <4 x float> %605, i32 2
%609 = fcmp oeq float %temp38.0, 4.000000e+00
%610 = select i1 %609, float 1.000000e+00, float 0.000000e+00
%611 = bitcast float %594 to i32
%612 = bitcast float %596 to i32
%613 = bitcast float %225 to i32
%614 = insertelement <4 x i32> undef, i32 %611, i32 0
%615 = insertelement <4 x i32> %614, i32 %612, i32 1
%616 = insertelement <4 x i32> %615, i32 %613, i32 2
%617 = bitcast <8 x i32> %73 to <32 x i8>
%618 = bitcast <4 x i32> %75 to <16 x i8>
%619 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %616, <32 x i8> %617, <16 x i8> %618, i32 2)
%620 = extractelement <4 x float> %619, i32 0
%621 = extractelement <4 x float> %619, i32 1
%622 = extractelement <4 x float> %619, i32 2
%623 = fcmp oeq float %temp38.0, 3.000000e+00
%624 = select i1 %623, float 1.000000e+00, float 0.000000e+00
%625 = bitcast float %594 to i32
%626 = bitcast float %596 to i32
%627 = bitcast float %225 to i32
%628 = insertelement <4 x i32> undef, i32 %625, i32 0
%629 = insertelement <4 x i32> %628, i32 %626, i32 1
%630 = insertelement <4 x i32> %629, i32 %627, i32 2
%631 = bitcast <8 x i32> %65 to <32 x i8>
%632 = bitcast <4 x i32> %67 to <16 x i8>
%633 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %630, <32 x i8> %631, <16 x i8> %632, i32 2)
%634 = extractelement <4 x float> %633, i32 0
%635 = extractelement <4 x float> %633, i32 1
%636 = extractelement <4 x float> %633, i32 2
%637 = fcmp oeq float %temp38.0, 2.000000e+00
%638 = select i1 %637, float 1.000000e+00, float 0.000000e+00
%639 = bitcast float %594 to i32
%640 = bitcast float %596 to i32
%641 = bitcast float %225 to i32
%642 = insertelement <4 x i32> undef, i32 %639, i32 0
%643 = insertelement <4 x i32> %642, i32 %640, i32 1
%644 = insertelement <4 x i32> %643, i32 %641, i32 2
%645 = bitcast <8 x i32> %57 to <32 x i8>
%646 = bitcast <4 x i32> %59 to <16 x i8>
%647 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %644, <32 x i8> %645, <16 x i8> %646, i32 2)
%648 = extractelement <4 x float> %647, i32 0
%649 = extractelement <4 x float> %647, i32 1
%650 = extractelement <4 x float> %647, i32 2
%651 = fcmp oeq float %temp38.0, 1.000000e+00
%652 = select i1 %651, float 1.000000e+00, float 0.000000e+00
%653 = bitcast float %594 to i32
%654 = bitcast float %596 to i32
%655 = bitcast float %225 to i32
%656 = insertelement <4 x i32> undef, i32 %653, i32 0
%657 = insertelement <4 x i32> %656, i32 %654, i32 1
%658 = insertelement <4 x i32> %657, i32 %655, i32 2
%659 = bitcast <8 x i32> %49 to <32 x i8>
%660 = bitcast <4 x i32> %51 to <16 x i8>
%661 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %658, <32 x i8> %659, <16 x i8> %660, i32 2)
%662 = extractelement <4 x float> %661, i32 0
%663 = extractelement <4 x float> %661, i32 1
%664 = extractelement <4 x float> %661, i32 2
%665 = fcmp oeq float %temp38.0, 0.000000e+00
%666 = select i1 %665, float 1.000000e+00, float 0.000000e+00
%667 = fmul float %662, %666
%668 = fmul float %663, %666
%669 = fmul float %664, %666
%670 = fmul float %648, %652
%671 = fadd float %670, %667
%672 = fmul float %649, %652
%673 = fadd float %672, %668
%674 = fmul float %650, %652
%675 = fadd float %674, %669
%676 = fmul float %634, %638
%677 = fadd float %676, %671
%678 = fmul float %635, %638
%679 = fadd float %678, %673
%680 = fmul float %636, %638
%681 = fadd float %680, %675
%682 = fmul float %620, %624
%683 = fadd float %682, %677
%684 = fmul float %621, %624
%685 = fadd float %684, %679
%686 = fmul float %622, %624
%687 = fadd float %686, %681
%688 = fmul float %606, %610
%689 = fadd float %688, %683
%690 = fmul float %607, %610
%691 = fadd float %690, %685
%692 = fmul float %608, %610
%693 = fadd float %692, %687
%694 = fcmp une float %35, %temp20.0
%.sink212 = select i1 %694, float %38, float %37
%temp60.0 = select i1 %694, float 1.953125e-03, float 3.906250e-03
%695 = fdiv float 1.000000e+00, %.sink212
%696 = fmul float %102, %695
%697 = fmul float %101, %695
%698 = call float @llvm.floor.f32(float %696)
%699 = fsub float %696, %698
%700 = call float @llvm.floor.f32(float %697)
%701 = fsub float %697, %700
%702 = fmul float %39, 2.000000e+00
%703 = fmul float %702, %temp60.0
%704 = fsub float 1.000000e+00, %703
%705 = fmul float %temp60.0, %39
%706 = fmul float %699, %704
%707 = fadd float %706, %705
%708 = fmul float %701, %704
%709 = fadd float %708, %705
%710 = fmul float %707, %temp20.0
%711 = fadd float %710, %temp36.0
%712 = fmul float %709, %temp20.0
%713 = fadd float %712, %temp37.0
%714 = bitcast float %711 to i32
%715 = bitcast float %713 to i32
%716 = bitcast float %225 to i32
%717 = insertelement <4 x i32> undef, i32 %714, i32 0
%718 = insertelement <4 x i32> %717, i32 %715, i32 1
%719 = insertelement <4 x i32> %718, i32 %716, i32 2
%720 = bitcast <8 x i32> %81 to <32 x i8>
%721 = bitcast <4 x i32> %83 to <16 x i8>
%722 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %719, <32 x i8> %720, <16 x i8> %721, i32 2)
%723 = extractelement <4 x float> %722, i32 0
%724 = extractelement <4 x float> %722, i32 1
%725 = extractelement <4 x float> %722, i32 2
%726 = fcmp oeq float %temp38.0, 4.000000e+00
%727 = select i1 %726, float 1.000000e+00, float 0.000000e+00
%728 = bitcast float %711 to i32
%729 = bitcast float %713 to i32
%730 = bitcast float %225 to i32
%731 = insertelement <4 x i32> undef, i32 %728, i32 0
%732 = insertelement <4 x i32> %731, i32 %729, i32 1
%733 = insertelement <4 x i32> %732, i32 %730, i32 2
%734 = bitcast <8 x i32> %73 to <32 x i8>
%735 = bitcast <4 x i32> %75 to <16 x i8>
%736 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %733, <32 x i8> %734, <16 x i8> %735, i32 2)
%737 = extractelement <4 x float> %736, i32 0
%738 = extractelement <4 x float> %736, i32 1
%739 = extractelement <4 x float> %736, i32 2
%740 = fcmp oeq float %temp38.0, 3.000000e+00
%741 = select i1 %740, float 1.000000e+00, float 0.000000e+00
%742 = bitcast float %711 to i32
%743 = bitcast float %713 to i32
%744 = bitcast float %225 to i32
%745 = insertelement <4 x i32> undef, i32 %742, i32 0
%746 = insertelement <4 x i32> %745, i32 %743, i32 1
%747 = insertelement <4 x i32> %746, i32 %744, i32 2
%748 = bitcast <8 x i32> %65 to <32 x i8>
%749 = bitcast <4 x i32> %67 to <16 x i8>
%750 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %747, <32 x i8> %748, <16 x i8> %749, i32 2)
%751 = extractelement <4 x float> %750, i32 0
%752 = extractelement <4 x float> %750, i32 1
%753 = extractelement <4 x float> %750, i32 2
%754 = fcmp oeq float %temp38.0, 2.000000e+00
%755 = select i1 %754, float 1.000000e+00, float 0.000000e+00
%756 = bitcast float %711 to i32
%757 = bitcast float %713 to i32
%758 = bitcast float %225 to i32
%759 = insertelement <4 x i32> undef, i32 %756, i32 0
%760 = insertelement <4 x i32> %759, i32 %757, i32 1
%761 = insertelement <4 x i32> %760, i32 %758, i32 2
%762 = bitcast <8 x i32> %57 to <32 x i8>
%763 = bitcast <4 x i32> %59 to <16 x i8>
%764 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %761, <32 x i8> %762, <16 x i8> %763, i32 2)
%765 = extractelement <4 x float> %764, i32 0
%766 = extractelement <4 x float> %764, i32 1
%767 = extractelement <4 x float> %764, i32 2
%768 = fcmp oeq float %temp38.0, 1.000000e+00
%769 = select i1 %768, float 1.000000e+00, float 0.000000e+00
%770 = bitcast float %711 to i32
%771 = bitcast float %713 to i32
%772 = bitcast float %225 to i32
%773 = insertelement <4 x i32> undef, i32 %770, i32 0
%774 = insertelement <4 x i32> %773, i32 %771, i32 1
%775 = insertelement <4 x i32> %774, i32 %772, i32 2
%776 = bitcast <8 x i32> %49 to <32 x i8>
%777 = bitcast <4 x i32> %51 to <16 x i8>
%778 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %775, <32 x i8> %776, <16 x i8> %777, i32 2)
%779 = extractelement <4 x float> %778, i32 0
%780 = extractelement <4 x float> %778, i32 1
%781 = extractelement <4 x float> %778, i32 2
%782 = fcmp oeq float %temp38.0, 0.000000e+00
%783 = select i1 %782, float 1.000000e+00, float 0.000000e+00
%784 = fmul float %779, %783
%785 = fmul float %780, %783
%786 = fmul float %781, %783
%787 = fmul float %765, %769
%788 = fadd float %787, %784
%789 = fmul float %766, %769
%790 = fadd float %789, %785
%791 = fmul float %767, %769
%792 = fadd float %791, %786
%793 = fmul float %751, %755
%794 = fadd float %793, %788
%795 = fmul float %752, %755
%796 = fadd float %795, %790
%797 = fmul float %753, %755
%798 = fadd float %797, %792
%799 = fmul float %737, %741
%800 = fadd float %799, %794
%801 = fmul float %738, %741
%802 = fadd float %801, %796
%803 = fmul float %739, %741
%804 = fadd float %803, %798
%805 = fmul float %723, %727
%806 = fadd float %805, %800
%807 = fmul float %724, %727
%808 = fadd float %807, %802
%809 = fmul float %725, %727
%810 = fadd float %809, %804
%811 = fcmp une float %35, %temp20.0
%.sink213 = select i1 %811, float %38, float %37
%temp64.0 = select i1 %811, float 1.953125e-03, float 3.906250e-03
%812 = fdiv float 1.000000e+00, %.sink213
%813 = fmul float %102, %812
%814 = fmul float %100, %812
%815 = call float @llvm.floor.f32(float %813)
%816 = fsub float %813, %815
%817 = call float @llvm.floor.f32(float %814)
%818 = fsub float %814, %817
%819 = fmul float %39, 2.000000e+00
%820 = fmul float %819, %temp64.0
%821 = fsub float 1.000000e+00, %820
%822 = fmul float %temp64.0, %39
%823 = fmul float %816, %821
%824 = fadd float %823, %822
%825 = fmul float %818, %821
%826 = fadd float %825, %822
%827 = fmul float %824, %temp20.0
%828 = fadd float %827, %temp36.0
%829 = fmul float %826, %temp20.0
%830 = fadd float %829, %temp37.0
%831 = bitcast float %828 to i32
%832 = bitcast float %830 to i32
%833 = bitcast float %225 to i32
%834 = insertelement <4 x i32> undef, i32 %831, i32 0
%835 = insertelement <4 x i32> %834, i32 %832, i32 1
%836 = insertelement <4 x i32> %835, i32 %833, i32 2
%837 = bitcast <8 x i32> %81 to <32 x i8>
%838 = bitcast <4 x i32> %83 to <16 x i8>
%839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %836, <32 x i8> %837, <16 x i8> %838, i32 2)
%840 = extractelement <4 x float> %839, i32 0
%841 = extractelement <4 x float> %839, i32 1
%842 = extractelement <4 x float> %839, i32 2
%843 = fcmp oeq float %temp38.0, 4.000000e+00
%844 = select i1 %843, float 1.000000e+00, float 0.000000e+00
%845 = bitcast float %828 to i32
%846 = bitcast float %830 to i32
%847 = bitcast float %225 to i32
%848 = insertelement <4 x i32> undef, i32 %845, i32 0
%849 = insertelement <4 x i32> %848, i32 %846, i32 1
%850 = insertelement <4 x i32> %849, i32 %847, i32 2
%851 = bitcast <8 x i32> %73 to <32 x i8>
%852 = bitcast <4 x i32> %75 to <16 x i8>
%853 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %850, <32 x i8> %851, <16 x i8> %852, i32 2)
%854 = extractelement <4 x float> %853, i32 0
%855 = extractelement <4 x float> %853, i32 1
%856 = extractelement <4 x float> %853, i32 2
%857 = fcmp oeq float %temp38.0, 3.000000e+00
%858 = select i1 %857, float 1.000000e+00, float 0.000000e+00
%859 = bitcast float %828 to i32
%860 = bitcast float %830 to i32
%861 = bitcast float %225 to i32
%862 = insertelement <4 x i32> undef, i32 %859, i32 0
%863 = insertelement <4 x i32> %862, i32 %860, i32 1
%864 = insertelement <4 x i32> %863, i32 %861, i32 2
%865 = bitcast <8 x i32> %65 to <32 x i8>
%866 = bitcast <4 x i32> %67 to <16 x i8>
%867 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %864, <32 x i8> %865, <16 x i8> %866, i32 2)
%868 = extractelement <4 x float> %867, i32 0
%869 = extractelement <4 x float> %867, i32 1
%870 = extractelement <4 x float> %867, i32 2
%871 = fcmp oeq float %temp38.0, 2.000000e+00
%872 = select i1 %871, float 1.000000e+00, float 0.000000e+00
%873 = bitcast float %828 to i32
%874 = bitcast float %830 to i32
%875 = bitcast float %225 to i32
%876 = insertelement <4 x i32> undef, i32 %873, i32 0
%877 = insertelement <4 x i32> %876, i32 %874, i32 1
%878 = insertelement <4 x i32> %877, i32 %875, i32 2
%879 = bitcast <8 x i32> %57 to <32 x i8>
%880 = bitcast <4 x i32> %59 to <16 x i8>
%881 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %878, <32 x i8> %879, <16 x i8> %880, i32 2)
%882 = extractelement <4 x float> %881, i32 0
%883 = extractelement <4 x float> %881, i32 1
%884 = extractelement <4 x float> %881, i32 2
%885 = fcmp oeq float %temp38.0, 1.000000e+00
%886 = select i1 %885, float 1.000000e+00, float 0.000000e+00
%887 = bitcast float %828 to i32
%888 = bitcast float %830 to i32
%889 = bitcast float %225 to i32
%890 = insertelement <4 x i32> undef, i32 %887, i32 0
%891 = insertelement <4 x i32> %890, i32 %888, i32 1
%892 = insertelement <4 x i32> %891, i32 %889, i32 2
%893 = bitcast <8 x i32> %49 to <32 x i8>
%894 = bitcast <4 x i32> %51 to <16 x i8>
%895 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %892, <32 x i8> %893, <16 x i8> %894, i32 2)
%896 = extractelement <4 x float> %895, i32 0
%897 = extractelement <4 x float> %895, i32 1
%898 = extractelement <4 x float> %895, i32 2
%899 = fcmp oeq float %temp38.0, 0.000000e+00
%900 = select i1 %899, float 1.000000e+00, float 0.000000e+00
%901 = fmul float %896, %900
%902 = fmul float %897, %900
%903 = fmul float %898, %900
%904 = fmul float %882, %886
%905 = fadd float %904, %901
%906 = fmul float %883, %886
%907 = fadd float %906, %902
%908 = fmul float %884, %886
%909 = fadd float %908, %903
%910 = fmul float %868, %872
%911 = fadd float %910, %905
%912 = fmul float %869, %872
%913 = fadd float %912, %907
%914 = fmul float %870, %872
%915 = fadd float %914, %909
%916 = fmul float %854, %858
%917 = fadd float %916, %911
%918 = fmul float %855, %858
%919 = fadd float %918, %913
%920 = fmul float %856, %858
%921 = fadd float %920, %915
%922 = fmul float %840, %844
%923 = fadd float %922, %917
%924 = fmul float %841, %844
%925 = fadd float %924, %919
%926 = fmul float %842, %844
%927 = fadd float %926, %921
%928 = fcmp une float %35, %temp24.0
%.sink214 = select i1 %928, float %38, float %37
%temp68.0 = select i1 %928, float 1.953125e-03, float 3.906250e-03
%929 = fdiv float 1.000000e+00, %.sink214
%930 = fmul float %100, %929
%931 = fmul float %101, %929
%932 = call float @llvm.floor.f32(float %930)
%933 = fsub float %930, %932
%934 = call float @llvm.floor.f32(float %931)
%935 = fsub float %931, %934
%936 = fmul float %39, 2.000000e+00
%937 = fmul float %936, %temp68.0
%938 = fsub float 1.000000e+00, %937
%939 = fmul float %temp68.0, %39
%940 = fmul float %933, %938
%941 = fadd float %940, %939
%942 = fmul float %935, %938
%943 = fadd float %942, %939
%944 = fmul float %941, %temp24.0
%945 = fadd float %944, %temp12.0
%946 = fmul float %943, %temp24.0
%947 = fadd float %946, %temp13.0
%948 = bitcast float %945 to i32
%949 = bitcast float %947 to i32
%950 = bitcast float %225 to i32
%951 = insertelement <4 x i32> undef, i32 %948, i32 0
%952 = insertelement <4 x i32> %951, i32 %949, i32 1
%953 = insertelement <4 x i32> %952, i32 %950, i32 2
%954 = bitcast <8 x i32> %81 to <32 x i8>
%955 = bitcast <4 x i32> %83 to <16 x i8>
%956 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %953, <32 x i8> %954, <16 x i8> %955, i32 2)
%957 = extractelement <4 x float> %956, i32 0
%958 = extractelement <4 x float> %956, i32 1
%959 = extractelement <4 x float> %956, i32 2
%960 = fcmp oeq float %temp14.0, 4.000000e+00
%961 = select i1 %960, float 1.000000e+00, float 0.000000e+00
%962 = bitcast float %945 to i32
%963 = bitcast float %947 to i32
%964 = bitcast float %225 to i32
%965 = insertelement <4 x i32> undef, i32 %962, i32 0
%966 = insertelement <4 x i32> %965, i32 %963, i32 1
%967 = insertelement <4 x i32> %966, i32 %964, i32 2
%968 = bitcast <8 x i32> %73 to <32 x i8>
%969 = bitcast <4 x i32> %75 to <16 x i8>
%970 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %967, <32 x i8> %968, <16 x i8> %969, i32 2)
%971 = extractelement <4 x float> %970, i32 0
%972 = extractelement <4 x float> %970, i32 1
%973 = extractelement <4 x float> %970, i32 2
%974 = fcmp oeq float %temp14.0, 3.000000e+00
%975 = select i1 %974, float 1.000000e+00, float 0.000000e+00
%976 = bitcast float %945 to i32
%977 = bitcast float %947 to i32
%978 = bitcast float %225 to i32
%979 = insertelement <4 x i32> undef, i32 %976, i32 0
%980 = insertelement <4 x i32> %979, i32 %977, i32 1
%981 = insertelement <4 x i32> %980, i32 %978, i32 2
%982 = bitcast <8 x i32> %65 to <32 x i8>
%983 = bitcast <4 x i32> %67 to <16 x i8>
%984 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %981, <32 x i8> %982, <16 x i8> %983, i32 2)
%985 = extractelement <4 x float> %984, i32 0
%986 = extractelement <4 x float> %984, i32 1
%987 = extractelement <4 x float> %984, i32 2
%988 = fcmp oeq float %temp14.0, 2.000000e+00
%989 = select i1 %988, float 1.000000e+00, float 0.000000e+00
%990 = bitcast float %945 to i32
%991 = bitcast float %947 to i32
%992 = bitcast float %225 to i32
%993 = insertelement <4 x i32> undef, i32 %990, i32 0
%994 = insertelement <4 x i32> %993, i32 %991, i32 1
%995 = insertelement <4 x i32> %994, i32 %992, i32 2
%996 = bitcast <8 x i32> %57 to <32 x i8>
%997 = bitcast <4 x i32> %59 to <16 x i8>
%998 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %995, <32 x i8> %996, <16 x i8> %997, i32 2)
%999 = extractelement <4 x float> %998, i32 0
%1000 = extractelement <4 x float> %998, i32 1
%1001 = extractelement <4 x float> %998, i32 2
%1002 = fcmp oeq float %temp14.0, 1.000000e+00
%1003 = select i1 %1002, float 1.000000e+00, float 0.000000e+00
%1004 = bitcast float %945 to i32
%1005 = bitcast float %947 to i32
%1006 = bitcast float %225 to i32
%1007 = insertelement <4 x i32> undef, i32 %1004, i32 0
%1008 = insertelement <4 x i32> %1007, i32 %1005, i32 1
%1009 = insertelement <4 x i32> %1008, i32 %1006, i32 2
%1010 = bitcast <8 x i32> %49 to <32 x i8>
%1011 = bitcast <4 x i32> %51 to <16 x i8>
%1012 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1009, <32 x i8> %1010, <16 x i8> %1011, i32 2)
%1013 = extractelement <4 x float> %1012, i32 0
%1014 = extractelement <4 x float> %1012, i32 1
%1015 = extractelement <4 x float> %1012, i32 2
%1016 = fcmp oeq float %temp14.0, 0.000000e+00
%1017 = select i1 %1016, float 1.000000e+00, float 0.000000e+00
%1018 = fmul float %1013, %1017
%1019 = fmul float %1014, %1017
%1020 = fmul float %1015, %1017
%1021 = fmul float %999, %1003
%1022 = fadd float %1021, %1018
%1023 = fmul float %1000, %1003
%1024 = fadd float %1023, %1019
%1025 = fmul float %1001, %1003
%1026 = fadd float %1025, %1020
%1027 = fmul float %985, %989
%1028 = fadd float %1027, %1022
%1029 = fmul float %986, %989
%1030 = fadd float %1029, %1024
%1031 = fmul float %987, %989
%1032 = fadd float %1031, %1026
%1033 = fmul float %971, %975
%1034 = fadd float %1033, %1028
%1035 = fmul float %972, %975
%1036 = fadd float %1035, %1030
%1037 = fmul float %973, %975
%1038 = fadd float %1037, %1032
%1039 = fmul float %957, %961
%1040 = fadd float %1039, %1034
%1041 = fmul float %958, %961
%1042 = fadd float %1041, %1036
%1043 = fmul float %959, %961
%1044 = fadd float %1043, %1038
%1045 = fcmp une float %35, %temp24.0
%.sink215 = select i1 %1045, float %38, float %37
%temp72.0 = select i1 %1045, float 1.953125e-03, float 3.906250e-03
%1046 = fdiv float 1.000000e+00, %.sink215
%1047 = fmul float %102, %1046
%1048 = fmul float %101, %1046
%1049 = call float @llvm.floor.f32(float %1047)
%1050 = fsub float %1047, %1049
%1051 = call float @llvm.floor.f32(float %1048)
%1052 = fsub float %1048, %1051
%1053 = fmul float %39, 2.000000e+00
%1054 = fmul float %1053, %temp72.0
%1055 = fsub float 1.000000e+00, %1054
%1056 = fmul float %temp72.0, %39
%1057 = fmul float %1050, %1055
%1058 = fadd float %1057, %1056
%1059 = fmul float %1052, %1055
%1060 = fadd float %1059, %1056
%1061 = fmul float %1058, %temp24.0
%1062 = fadd float %1061, %temp12.0
%1063 = fmul float %1060, %temp24.0
%1064 = fadd float %1063, %temp13.0
%1065 = bitcast float %1062 to i32
%1066 = bitcast float %1064 to i32
%1067 = bitcast float %225 to i32
%1068 = insertelement <4 x i32> undef, i32 %1065, i32 0
%1069 = insertelement <4 x i32> %1068, i32 %1066, i32 1
%1070 = insertelement <4 x i32> %1069, i32 %1067, i32 2
%1071 = bitcast <8 x i32> %81 to <32 x i8>
%1072 = bitcast <4 x i32> %83 to <16 x i8>
%1073 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1070, <32 x i8> %1071, <16 x i8> %1072, i32 2)
%1074 = extractelement <4 x float> %1073, i32 0
%1075 = extractelement <4 x float> %1073, i32 1
%1076 = extractelement <4 x float> %1073, i32 2
%1077 = fcmp oeq float %temp14.0, 4.000000e+00
%1078 = select i1 %1077, float 1.000000e+00, float 0.000000e+00
%1079 = bitcast float %1062 to i32
%1080 = bitcast float %1064 to i32
%1081 = bitcast float %225 to i32
%1082 = insertelement <4 x i32> undef, i32 %1079, i32 0
%1083 = insertelement <4 x i32> %1082, i32 %1080, i32 1
%1084 = insertelement <4 x i32> %1083, i32 %1081, i32 2
%1085 = bitcast <8 x i32> %73 to <32 x i8>
%1086 = bitcast <4 x i32> %75 to <16 x i8>
%1087 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1084, <32 x i8> %1085, <16 x i8> %1086, i32 2)
%1088 = extractelement <4 x float> %1087, i32 0
%1089 = extractelement <4 x float> %1087, i32 1
%1090 = extractelement <4 x float> %1087, i32 2
%1091 = fcmp oeq float %temp14.0, 3.000000e+00
%1092 = select i1 %1091, float 1.000000e+00, float 0.000000e+00
%1093 = bitcast float %1062 to i32
%1094 = bitcast float %1064 to i32
%1095 = bitcast float %225 to i32
%1096 = insertelement <4 x i32> undef, i32 %1093, i32 0
%1097 = insertelement <4 x i32> %1096, i32 %1094, i32 1
%1098 = insertelement <4 x i32> %1097, i32 %1095, i32 2
%1099 = bitcast <8 x i32> %65 to <32 x i8>
%1100 = bitcast <4 x i32> %67 to <16 x i8>
%1101 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1098, <32 x i8> %1099, <16 x i8> %1100, i32 2)
%1102 = extractelement <4 x float> %1101, i32 0
%1103 = extractelement <4 x float> %1101, i32 1
%1104 = extractelement <4 x float> %1101, i32 2
%1105 = fcmp oeq float %temp14.0, 2.000000e+00
%1106 = select i1 %1105, float 1.000000e+00, float 0.000000e+00
%1107 = bitcast float %1062 to i32
%1108 = bitcast float %1064 to i32
%1109 = bitcast float %225 to i32
%1110 = insertelement <4 x i32> undef, i32 %1107, i32 0
%1111 = insertelement <4 x i32> %1110, i32 %1108, i32 1
%1112 = insertelement <4 x i32> %1111, i32 %1109, i32 2
%1113 = bitcast <8 x i32> %57 to <32 x i8>
%1114 = bitcast <4 x i32> %59 to <16 x i8>
%1115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1112, <32 x i8> %1113, <16 x i8> %1114, i32 2)
%1116 = extractelement <4 x float> %1115, i32 0
%1117 = extractelement <4 x float> %1115, i32 1
%1118 = extractelement <4 x float> %1115, i32 2
%1119 = fcmp oeq float %temp14.0, 1.000000e+00
%1120 = select i1 %1119, float 1.000000e+00, float 0.000000e+00
%1121 = bitcast float %1062 to i32
%1122 = bitcast float %1064 to i32
%1123 = bitcast float %225 to i32
%1124 = insertelement <4 x i32> undef, i32 %1121, i32 0
%1125 = insertelement <4 x i32> %1124, i32 %1122, i32 1
%1126 = insertelement <4 x i32> %1125, i32 %1123, i32 2
%1127 = bitcast <8 x i32> %49 to <32 x i8>
%1128 = bitcast <4 x i32> %51 to <16 x i8>
%1129 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1126, <32 x i8> %1127, <16 x i8> %1128, i32 2)
%1130 = extractelement <4 x float> %1129, i32 0
%1131 = extractelement <4 x float> %1129, i32 1
%1132 = extractelement <4 x float> %1129, i32 2
%1133 = fcmp oeq float %temp14.0, 0.000000e+00
%1134 = select i1 %1133, float 1.000000e+00, float 0.000000e+00
%1135 = fmul float %1130, %1134
%1136 = fmul float %1131, %1134
%1137 = fmul float %1132, %1134
%1138 = fmul float %1116, %1120
%1139 = fadd float %1138, %1135
%1140 = fmul float %1117, %1120
%1141 = fadd float %1140, %1136
%1142 = fmul float %1118, %1120
%1143 = fadd float %1142, %1137
%1144 = fmul float %1102, %1106
%1145 = fadd float %1144, %1139
%1146 = fmul float %1103, %1106
%1147 = fadd float %1146, %1141
%1148 = fmul float %1104, %1106
%1149 = fadd float %1148, %1143
%1150 = fmul float %1088, %1092
%1151 = fadd float %1150, %1145
%1152 = fmul float %1089, %1092
%1153 = fadd float %1152, %1147
%1154 = fmul float %1090, %1092
%1155 = fadd float %1154, %1149
%1156 = fmul float %1074, %1078
%1157 = fadd float %1156, %1151
%1158 = fmul float %1075, %1078
%1159 = fadd float %1158, %1153
%1160 = fmul float %1076, %1078
%1161 = fadd float %1160, %1155
%1162 = fcmp une float %35, %temp24.0
%.sink216 = select i1 %1162, float %38, float %37
%temp76.0 = select i1 %1162, float 1.953125e-03, float 3.906250e-03
%1163 = fdiv float 1.000000e+00, %.sink216
%1164 = fmul float %102, %1163
%1165 = fmul float %100, %1163
%1166 = call float @llvm.floor.f32(float %1164)
%1167 = fsub float %1164, %1166
%1168 = call float @llvm.floor.f32(float %1165)
%1169 = fsub float %1165, %1168
%1170 = fmul float %39, 2.000000e+00
%1171 = fmul float %1170, %temp76.0
%1172 = fsub float 1.000000e+00, %1171
%1173 = fmul float %temp76.0, %39
%1174 = fmul float %1167, %1172
%1175 = fadd float %1174, %1173
%1176 = fmul float %1169, %1172
%1177 = fadd float %1176, %1173
%1178 = fmul float %1175, %temp24.0
%1179 = fadd float %1178, %temp12.0
%1180 = fmul float %1177, %temp24.0
%1181 = fadd float %1180, %temp13.0
%1182 = bitcast float %1179 to i32
%1183 = bitcast float %1181 to i32
%1184 = bitcast float %225 to i32
%1185 = insertelement <4 x i32> undef, i32 %1182, i32 0
%1186 = insertelement <4 x i32> %1185, i32 %1183, i32 1
%1187 = insertelement <4 x i32> %1186, i32 %1184, i32 2
%1188 = bitcast <8 x i32> %81 to <32 x i8>
%1189 = bitcast <4 x i32> %83 to <16 x i8>
%1190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1187, <32 x i8> %1188, <16 x i8> %1189, i32 2)
%1191 = extractelement <4 x float> %1190, i32 0
%1192 = extractelement <4 x float> %1190, i32 1
%1193 = extractelement <4 x float> %1190, i32 2
%1194 = fcmp oeq float %temp14.0, 4.000000e+00
%1195 = select i1 %1194, float 1.000000e+00, float 0.000000e+00
%1196 = bitcast float %1179 to i32
%1197 = bitcast float %1181 to i32
%1198 = bitcast float %225 to i32
%1199 = insertelement <4 x i32> undef, i32 %1196, i32 0
%1200 = insertelement <4 x i32> %1199, i32 %1197, i32 1
%1201 = insertelement <4 x i32> %1200, i32 %1198, i32 2
%1202 = bitcast <8 x i32> %73 to <32 x i8>
%1203 = bitcast <4 x i32> %75 to <16 x i8>
%1204 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1201, <32 x i8> %1202, <16 x i8> %1203, i32 2)
%1205 = extractelement <4 x float> %1204, i32 0
%1206 = extractelement <4 x float> %1204, i32 1
%1207 = extractelement <4 x float> %1204, i32 2
%1208 = fcmp oeq float %temp14.0, 3.000000e+00
%1209 = select i1 %1208, float 1.000000e+00, float 0.000000e+00
%1210 = bitcast float %1179 to i32
%1211 = bitcast float %1181 to i32
%1212 = bitcast float %225 to i32
%1213 = insertelement <4 x i32> undef, i32 %1210, i32 0
%1214 = insertelement <4 x i32> %1213, i32 %1211, i32 1
%1215 = insertelement <4 x i32> %1214, i32 %1212, i32 2
%1216 = bitcast <8 x i32> %65 to <32 x i8>
%1217 = bitcast <4 x i32> %67 to <16 x i8>
%1218 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1215, <32 x i8> %1216, <16 x i8> %1217, i32 2)
%1219 = extractelement <4 x float> %1218, i32 0
%1220 = extractelement <4 x float> %1218, i32 1
%1221 = extractelement <4 x float> %1218, i32 2
%1222 = fcmp oeq float %temp14.0, 2.000000e+00
%1223 = select i1 %1222, float 1.000000e+00, float 0.000000e+00
%1224 = bitcast float %1179 to i32
%1225 = bitcast float %1181 to i32
%1226 = bitcast float %225 to i32
%1227 = insertelement <4 x i32> undef, i32 %1224, i32 0
%1228 = insertelement <4 x i32> %1227, i32 %1225, i32 1
%1229 = insertelement <4 x i32> %1228, i32 %1226, i32 2
%1230 = bitcast <8 x i32> %57 to <32 x i8>
%1231 = bitcast <4 x i32> %59 to <16 x i8>
%1232 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1229, <32 x i8> %1230, <16 x i8> %1231, i32 2)
%1233 = extractelement <4 x float> %1232, i32 0
%1234 = extractelement <4 x float> %1232, i32 1
%1235 = extractelement <4 x float> %1232, i32 2
%1236 = fcmp oeq float %temp14.0, 1.000000e+00
%1237 = select i1 %1236, float 1.000000e+00, float 0.000000e+00
%1238 = bitcast float %1179 to i32
%1239 = bitcast float %1181 to i32
%1240 = bitcast float %225 to i32
%1241 = insertelement <4 x i32> undef, i32 %1238, i32 0
%1242 = insertelement <4 x i32> %1241, i32 %1239, i32 1
%1243 = insertelement <4 x i32> %1242, i32 %1240, i32 2
%1244 = bitcast <8 x i32> %49 to <32 x i8>
%1245 = bitcast <4 x i32> %51 to <16 x i8>
%1246 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1243, <32 x i8> %1244, <16 x i8> %1245, i32 2)
%1247 = extractelement <4 x float> %1246, i32 0
%1248 = extractelement <4 x float> %1246, i32 1
%1249 = extractelement <4 x float> %1246, i32 2
%1250 = fcmp oeq float %temp14.0, 0.000000e+00
%1251 = select i1 %1250, float 1.000000e+00, float 0.000000e+00
%1252 = fmul float %1247, %1251
%1253 = fmul float %1248, %1251
%1254 = fmul float %1249, %1251
%1255 = fmul float %1233, %1237
%1256 = fadd float %1255, %1252
%1257 = fmul float %1234, %1237
%1258 = fadd float %1257, %1253
%1259 = fmul float %1235, %1237
%1260 = fadd float %1259, %1254
%1261 = fmul float %1219, %1223
%1262 = fadd float %1261, %1256
%1263 = fmul float %1220, %1223
%1264 = fadd float %1263, %1258
%1265 = fmul float %1221, %1223
%1266 = fadd float %1265, %1260
%1267 = fmul float %1205, %1209
%1268 = fadd float %1267, %1262
%1269 = fmul float %1206, %1209
%1270 = fadd float %1269, %1264
%1271 = fmul float %1207, %1209
%1272 = fadd float %1271, %1266
%1273 = fmul float %1191, %1195
%1274 = fadd float %1273, %1268
%1275 = fmul float %1192, %1195
%1276 = fadd float %1275, %1270
%1277 = fmul float %1193, %1195
%1278 = fadd float %1277, %1272
%1279 = fmul float %1040, %150
%1280 = fmul float %1042, %150
%1281 = fmul float %1044, %150
%1282 = fmul float %1157, %148
%1283 = fadd float %1282, %1279
%1284 = fmul float %1159, %148
%1285 = fadd float %1284, %1280
%1286 = fmul float %1161, %148
%1287 = fadd float %1286, %1281
%1288 = fmul float %1274, %149
%1289 = fadd float %1288, %1283
%1290 = fmul float %1276, %149
%1291 = fadd float %1290, %1285
%1292 = fmul float %1278, %149
%1293 = fadd float %1292, %1287
%1294 = fmul float %689, %150
%1295 = fmul float %691, %150
%1296 = fmul float %693, %150
%1297 = fmul float %806, %148
%1298 = fadd float %1297, %1294
%1299 = fmul float %808, %148
%1300 = fadd float %1299, %1295
%1301 = fmul float %810, %148
%1302 = fadd float %1301, %1296
%1303 = fmul float %923, %149
%1304 = fadd float %1303, %1298
%1305 = fmul float %925, %149
%1306 = fadd float %1305, %1300
%1307 = fmul float %927, %149
%1308 = fadd float %1307, %1302
%1309 = fmul float %338, %150
%1310 = fmul float %340, %150
%1311 = fmul float %342, %150
%1312 = fmul float %455, %148
%1313 = fadd float %1312, %1309
%1314 = fmul float %457, %148
%1315 = fadd float %1314, %1310
%1316 = fmul float %459, %148
%1317 = fadd float %1316, %1311
%1318 = fmul float %572, %149
%1319 = fadd float %1318, %1313
%1320 = fmul float %574, %149
%1321 = fadd float %1320, %1315
%1322 = fmul float %576, %149
%1323 = fadd float %1322, %1317
%1324 = fmul float %92, %1319
%1325 = fmul float %92, %1321
%1326 = fmul float %92, %1323
%1327 = fmul float %93, %1304
%1328 = fadd float %1327, %1324
%1329 = fmul float %93, %1306
%1330 = fadd float %1329, %1325
%1331 = fmul float %93, %1308
%1332 = fadd float %1331, %1326
%1333 = fmul float %94, %1289
%1334 = fadd float %1333, %1328
%1335 = fmul float %94, %1291
%1336 = fadd float %1335, %1330
%1337 = fmul float %94, %1293
%1338 = fadd float %1337, %1332
%1339 = fcmp une float %35, %temp16.0
%.sink217 = select i1 %1339, float %38, float %37
%temp48.2 = select i1 %1339, float 1.953125e-03, float 3.906250e-03
%1340 = fdiv float 1.000000e+00, %.sink217
%1341 = fmul float %102, %1340
%1342 = fmul float %101, %1340
%1343 = call float @llvm.floor.f32(float %1341)
%1344 = fsub float %1341, %1343
%1345 = call float @llvm.floor.f32(float %1342)
%1346 = fsub float %1342, %1345
%1347 = fmul float %39, 2.000000e+00
%1348 = fmul float %1347, %temp48.2
%1349 = fsub float 1.000000e+00, %1348
%1350 = fmul float %temp48.2, %39
%1351 = fmul float %1344, %1349
%1352 = fadd float %1351, %1350
%1353 = fmul float %1346, %1349
%1354 = fadd float %1353, %1350
%1355 = fmul float %1352, %temp16.0
%1356 = fadd float %1355, %temp28.0
%1357 = fmul float %1354, %temp16.0
%1358 = fadd float %1357, %temp29.0
%1359 = bitcast float %1356 to i32
%1360 = bitcast float %1358 to i32
%1361 = bitcast float %225 to i32
%1362 = insertelement <4 x i32> undef, i32 %1359, i32 0
%1363 = insertelement <4 x i32> %1362, i32 %1360, i32 1
%1364 = insertelement <4 x i32> %1363, i32 %1361, i32 2
%1365 = bitcast <8 x i32> %85 to <32 x i8>
%1366 = bitcast <4 x i32> %87 to <16 x i8>
%1367 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1364, <32 x i8> %1365, <16 x i8> %1366, i32 2)
%1368 = extractelement <4 x float> %1367, i32 1
%1369 = extractelement <4 x float> %1367, i32 3
%1370 = fcmp oeq float %temp30.0, 4.000000e+00
%1371 = select i1 %1370, float 1.000000e+00, float 0.000000e+00
%1372 = bitcast float %1356 to i32
%1373 = bitcast float %1358 to i32
%1374 = bitcast float %225 to i32
%1375 = insertelement <4 x i32> undef, i32 %1372, i32 0
%1376 = insertelement <4 x i32> %1375, i32 %1373, i32 1
%1377 = insertelement <4 x i32> %1376, i32 %1374, i32 2
%1378 = bitcast <8 x i32> %77 to <32 x i8>
%1379 = bitcast <4 x i32> %79 to <16 x i8>
%1380 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1377, <32 x i8> %1378, <16 x i8> %1379, i32 2)
%1381 = extractelement <4 x float> %1380, i32 1
%1382 = extractelement <4 x float> %1380, i32 3
%1383 = fcmp oeq float %temp30.0, 3.000000e+00
%1384 = select i1 %1383, float 1.000000e+00, float 0.000000e+00
%1385 = bitcast float %1356 to i32
%1386 = bitcast float %1358 to i32
%1387 = bitcast float %225 to i32
%1388 = insertelement <4 x i32> undef, i32 %1385, i32 0
%1389 = insertelement <4 x i32> %1388, i32 %1386, i32 1
%1390 = insertelement <4 x i32> %1389, i32 %1387, i32 2
%1391 = bitcast <8 x i32> %69 to <32 x i8>
%1392 = bitcast <4 x i32> %71 to <16 x i8>
%1393 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1390, <32 x i8> %1391, <16 x i8> %1392, i32 2)
%1394 = extractelement <4 x float> %1393, i32 1
%1395 = extractelement <4 x float> %1393, i32 3
%1396 = fcmp oeq float %temp30.0, 2.000000e+00
%1397 = select i1 %1396, float 1.000000e+00, float 0.000000e+00
%1398 = bitcast float %1356 to i32
%1399 = bitcast float %1358 to i32
%1400 = bitcast float %225 to i32
%1401 = insertelement <4 x i32> undef, i32 %1398, i32 0
%1402 = insertelement <4 x i32> %1401, i32 %1399, i32 1
%1403 = insertelement <4 x i32> %1402, i32 %1400, i32 2
%1404 = bitcast <8 x i32> %61 to <32 x i8>
%1405 = bitcast <4 x i32> %63 to <16 x i8>
%1406 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1403, <32 x i8> %1404, <16 x i8> %1405, i32 2)
%1407 = extractelement <4 x float> %1406, i32 1
%1408 = extractelement <4 x float> %1406, i32 3
%1409 = fcmp oeq float %temp30.0, 1.000000e+00
%1410 = select i1 %1409, float 1.000000e+00, float 0.000000e+00
%1411 = bitcast float %1356 to i32
%1412 = bitcast float %1358 to i32
%1413 = bitcast float %225 to i32
%1414 = insertelement <4 x i32> undef, i32 %1411, i32 0
%1415 = insertelement <4 x i32> %1414, i32 %1412, i32 1
%1416 = insertelement <4 x i32> %1415, i32 %1413, i32 2
%1417 = bitcast <8 x i32> %53 to <32 x i8>
%1418 = bitcast <4 x i32> %55 to <16 x i8>
%1419 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1416, <32 x i8> %1417, <16 x i8> %1418, i32 2)
%1420 = extractelement <4 x float> %1419, i32 1
%1421 = extractelement <4 x float> %1419, i32 3
%1422 = fcmp oeq float %temp30.0, 0.000000e+00
%1423 = select i1 %1422, float 1.000000e+00, float 0.000000e+00
%1424 = fmul float %1420, %1423
%1425 = fmul float %1421, %1423
%1426 = fmul float %1407, %1410
%1427 = fadd float %1426, %1424
%1428 = fmul float %1408, %1410
%1429 = fadd float %1428, %1425
%1430 = fmul float %1394, %1397
%1431 = fadd float %1430, %1427
%1432 = fmul float %1395, %1397
%1433 = fadd float %1432, %1429
%1434 = fmul float %1381, %1384
%1435 = fadd float %1434, %1431
%1436 = fmul float %1382, %1384
%1437 = fadd float %1436, %1433
%1438 = fmul float %1368, %1371
%1439 = fadd float %1438, %1435
%1440 = fmul float %1369, %1371
%1441 = fadd float %1440, %1437
%1442 = fmul float %1441, 2.000000e+00
%1443 = fadd float %1442, -1.000000e+00
%1444 = fmul float %1439, 2.000000e+00
%1445 = fadd float %1444, -1.000000e+00
%1446 = fmul float %1443, %1443
%1447 = fmul float %1445, %1445
%1448 = fadd float %1446, %1447
%1449 = call float @llvm.AMDIL.clamp.(float %1448, float 0.000000e+00, float 1.000000e+00)
%1450 = fcmp une float %35, %temp16.0
%.sink218 = select i1 %1450, float %38, float %37
%temp52.2 = select i1 %1450, float 1.953125e-03, float 3.906250e-03
%1451 = fdiv float 1.000000e+00, %.sink218
%1452 = fmul float %102, %1451
%1453 = fmul float %100, %1451
%1454 = call float @llvm.floor.f32(float %1452)
%1455 = fsub float %1452, %1454
%1456 = call float @llvm.floor.f32(float %1453)
%1457 = fsub float %1453, %1456
%1458 = fmul float %39, 2.000000e+00
%1459 = fmul float %1458, %temp52.2
%1460 = fsub float 1.000000e+00, %1459
%1461 = fmul float %temp52.2, %39
%1462 = fmul float %1455, %1460
%1463 = fadd float %1462, %1461
%1464 = fmul float %1457, %1460
%1465 = fadd float %1464, %1461
%1466 = fmul float %1463, %temp16.0
%1467 = fadd float %1466, %temp28.0
%1468 = fmul float %1465, %temp16.0
%1469 = fadd float %1468, %temp29.0
%1470 = bitcast float %1467 to i32
%1471 = bitcast float %1469 to i32
%1472 = bitcast float %225 to i32
%1473 = insertelement <4 x i32> undef, i32 %1470, i32 0
%1474 = insertelement <4 x i32> %1473, i32 %1471, i32 1
%1475 = insertelement <4 x i32> %1474, i32 %1472, i32 2
%1476 = bitcast <8 x i32> %85 to <32 x i8>
%1477 = bitcast <4 x i32> %87 to <16 x i8>
%1478 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1475, <32 x i8> %1476, <16 x i8> %1477, i32 2)
%1479 = extractelement <4 x float> %1478, i32 1
%1480 = extractelement <4 x float> %1478, i32 3
%1481 = fcmp oeq float %temp30.0, 4.000000e+00
%1482 = select i1 %1481, float 1.000000e+00, float 0.000000e+00
%1483 = bitcast float %1467 to i32
%1484 = bitcast float %1469 to i32
%1485 = bitcast float %225 to i32
%1486 = insertelement <4 x i32> undef, i32 %1483, i32 0
%1487 = insertelement <4 x i32> %1486, i32 %1484, i32 1
%1488 = insertelement <4 x i32> %1487, i32 %1485, i32 2
%1489 = bitcast <8 x i32> %77 to <32 x i8>
%1490 = bitcast <4 x i32> %79 to <16 x i8>
%1491 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1488, <32 x i8> %1489, <16 x i8> %1490, i32 2)
%1492 = extractelement <4 x float> %1491, i32 1
%1493 = extractelement <4 x float> %1491, i32 3
%1494 = fcmp oeq float %temp30.0, 3.000000e+00
%1495 = select i1 %1494, float 1.000000e+00, float 0.000000e+00
%1496 = bitcast float %1467 to i32
%1497 = bitcast float %1469 to i32
%1498 = bitcast float %225 to i32
%1499 = insertelement <4 x i32> undef, i32 %1496, i32 0
%1500 = insertelement <4 x i32> %1499, i32 %1497, i32 1
%1501 = insertelement <4 x i32> %1500, i32 %1498, i32 2
%1502 = bitcast <8 x i32> %69 to <32 x i8>
%1503 = bitcast <4 x i32> %71 to <16 x i8>
%1504 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1501, <32 x i8> %1502, <16 x i8> %1503, i32 2)
%1505 = extractelement <4 x float> %1504, i32 1
%1506 = extractelement <4 x float> %1504, i32 3
%1507 = fcmp oeq float %temp30.0, 2.000000e+00
%1508 = select i1 %1507, float 1.000000e+00, float 0.000000e+00
%1509 = bitcast float %1467 to i32
%1510 = bitcast float %1469 to i32
%1511 = bitcast float %225 to i32
%1512 = insertelement <4 x i32> undef, i32 %1509, i32 0
%1513 = insertelement <4 x i32> %1512, i32 %1510, i32 1
%1514 = insertelement <4 x i32> %1513, i32 %1511, i32 2
%1515 = bitcast <8 x i32> %61 to <32 x i8>
%1516 = bitcast <4 x i32> %63 to <16 x i8>
%1517 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1514, <32 x i8> %1515, <16 x i8> %1516, i32 2)
%1518 = extractelement <4 x float> %1517, i32 1
%1519 = extractelement <4 x float> %1517, i32 3
%1520 = fcmp oeq float %temp30.0, 1.000000e+00
%1521 = select i1 %1520, float 1.000000e+00, float 0.000000e+00
%1522 = bitcast float %1467 to i32
%1523 = bitcast float %1469 to i32
%1524 = bitcast float %225 to i32
%1525 = insertelement <4 x i32> undef, i32 %1522, i32 0
%1526 = insertelement <4 x i32> %1525, i32 %1523, i32 1
%1527 = insertelement <4 x i32> %1526, i32 %1524, i32 2
%1528 = bitcast <8 x i32> %53 to <32 x i8>
%1529 = bitcast <4 x i32> %55 to <16 x i8>
%1530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1527, <32 x i8> %1528, <16 x i8> %1529, i32 2)
%1531 = extractelement <4 x float> %1530, i32 1
%1532 = extractelement <4 x float> %1530, i32 3
%1533 = fcmp oeq float %temp30.0, 0.000000e+00
%1534 = select i1 %1533, float 1.000000e+00, float 0.000000e+00
%1535 = fmul float %1531, %1534
%1536 = fmul float %1532, %1534
%1537 = fmul float %1518, %1521
%1538 = fadd float %1537, %1535
%1539 = fmul float %1519, %1521
%1540 = fadd float %1539, %1536
%1541 = fmul float %1505, %1508
%1542 = fadd float %1541, %1538
%1543 = fmul float %1506, %1508
%1544 = fadd float %1543, %1540
%1545 = fmul float %1492, %1495
%1546 = fadd float %1545, %1542
%1547 = fmul float %1493, %1495
%1548 = fadd float %1547, %1544
%1549 = fmul float %1479, %1482
%1550 = fadd float %1549, %1546
%1551 = fmul float %1480, %1482
%1552 = fadd float %1551, %1548
%1553 = fmul float %1552, 2.000000e+00
%1554 = fadd float %1553, -1.000000e+00
%1555 = fmul float %1550, 2.000000e+00
%1556 = fadd float %1555, -1.000000e+00
%1557 = fmul float %1554, %1554
%1558 = fmul float %1556, %1556
%1559 = fadd float %1557, %1558
%1560 = call float @llvm.AMDIL.clamp.(float %1559, float 0.000000e+00, float 1.000000e+00)
%1561 = fcmp une float %35, %temp16.0
%.sink219 = select i1 %1561, float %38, float %37
%temp56.2 = select i1 %1561, float 1.953125e-03, float 3.906250e-03
%1562 = fdiv float 1.000000e+00, %.sink219
%1563 = fmul float %100, %1562
%1564 = fmul float %101, %1562
%1565 = call float @llvm.floor.f32(float %1563)
%1566 = fsub float %1563, %1565
%1567 = call float @llvm.floor.f32(float %1564)
%1568 = fsub float %1564, %1567
%1569 = fmul float %39, 2.000000e+00
%1570 = fmul float %1569, %temp56.2
%1571 = fsub float 1.000000e+00, %1570
%1572 = fmul float %temp56.2, %39
%1573 = fmul float %1566, %1571
%1574 = fadd float %1573, %1572
%1575 = fmul float %1568, %1571
%1576 = fadd float %1575, %1572
%1577 = fmul float %1574, %temp16.0
%1578 = fadd float %1577, %temp28.0
%1579 = fmul float %1576, %temp16.0
%1580 = fadd float %1579, %temp29.0
%1581 = bitcast float %1578 to i32
%1582 = bitcast float %1580 to i32
%1583 = bitcast float %225 to i32
%1584 = insertelement <4 x i32> undef, i32 %1581, i32 0
%1585 = insertelement <4 x i32> %1584, i32 %1582, i32 1
%1586 = insertelement <4 x i32> %1585, i32 %1583, i32 2
%1587 = bitcast <8 x i32> %85 to <32 x i8>
%1588 = bitcast <4 x i32> %87 to <16 x i8>
%1589 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1586, <32 x i8> %1587, <16 x i8> %1588, i32 2)
%1590 = extractelement <4 x float> %1589, i32 1
%1591 = extractelement <4 x float> %1589, i32 3
%1592 = fcmp oeq float %temp30.0, 4.000000e+00
%1593 = select i1 %1592, float 1.000000e+00, float 0.000000e+00
%1594 = bitcast float %1578 to i32
%1595 = bitcast float %1580 to i32
%1596 = bitcast float %225 to i32
%1597 = insertelement <4 x i32> undef, i32 %1594, i32 0
%1598 = insertelement <4 x i32> %1597, i32 %1595, i32 1
%1599 = insertelement <4 x i32> %1598, i32 %1596, i32 2
%1600 = bitcast <8 x i32> %77 to <32 x i8>
%1601 = bitcast <4 x i32> %79 to <16 x i8>
%1602 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1599, <32 x i8> %1600, <16 x i8> %1601, i32 2)
%1603 = extractelement <4 x float> %1602, i32 1
%1604 = extractelement <4 x float> %1602, i32 3
%1605 = fcmp oeq float %temp30.0, 3.000000e+00
%1606 = select i1 %1605, float 1.000000e+00, float 0.000000e+00
%1607 = bitcast float %1578 to i32
%1608 = bitcast float %1580 to i32
%1609 = bitcast float %225 to i32
%1610 = insertelement <4 x i32> undef, i32 %1607, i32 0
%1611 = insertelement <4 x i32> %1610, i32 %1608, i32 1
%1612 = insertelement <4 x i32> %1611, i32 %1609, i32 2
%1613 = bitcast <8 x i32> %69 to <32 x i8>
%1614 = bitcast <4 x i32> %71 to <16 x i8>
%1615 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1612, <32 x i8> %1613, <16 x i8> %1614, i32 2)
%1616 = extractelement <4 x float> %1615, i32 1
%1617 = extractelement <4 x float> %1615, i32 3
%1618 = fcmp oeq float %temp30.0, 2.000000e+00
%1619 = select i1 %1618, float 1.000000e+00, float 0.000000e+00
%1620 = bitcast float %1578 to i32
%1621 = bitcast float %1580 to i32
%1622 = bitcast float %225 to i32
%1623 = insertelement <4 x i32> undef, i32 %1620, i32 0
%1624 = insertelement <4 x i32> %1623, i32 %1621, i32 1
%1625 = insertelement <4 x i32> %1624, i32 %1622, i32 2
%1626 = bitcast <8 x i32> %61 to <32 x i8>
%1627 = bitcast <4 x i32> %63 to <16 x i8>
%1628 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1625, <32 x i8> %1626, <16 x i8> %1627, i32 2)
%1629 = extractelement <4 x float> %1628, i32 1
%1630 = extractelement <4 x float> %1628, i32 3
%1631 = fcmp oeq float %temp30.0, 1.000000e+00
%1632 = select i1 %1631, float 1.000000e+00, float 0.000000e+00
%1633 = bitcast float %1578 to i32
%1634 = bitcast float %1580 to i32
%1635 = bitcast float %225 to i32
%1636 = insertelement <4 x i32> undef, i32 %1633, i32 0
%1637 = insertelement <4 x i32> %1636, i32 %1634, i32 1
%1638 = insertelement <4 x i32> %1637, i32 %1635, i32 2
%1639 = bitcast <8 x i32> %53 to <32 x i8>
%1640 = bitcast <4 x i32> %55 to <16 x i8>
%1641 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1638, <32 x i8> %1639, <16 x i8> %1640, i32 2)
%1642 = extractelement <4 x float> %1641, i32 1
%1643 = extractelement <4 x float> %1641, i32 3
%1644 = fcmp oeq float %temp30.0, 0.000000e+00
%1645 = select i1 %1644, float 1.000000e+00, float 0.000000e+00
%1646 = fmul float %1642, %1645
%1647 = fmul float %1643, %1645
%1648 = fmul float %1629, %1632
%1649 = fadd float %1648, %1646
%1650 = fmul float %1630, %1632
%1651 = fadd float %1650, %1647
%1652 = fmul float %1616, %1619
%1653 = fadd float %1652, %1649
%1654 = fmul float %1617, %1619
%1655 = fadd float %1654, %1651
%1656 = fmul float %1603, %1606
%1657 = fadd float %1656, %1653
%1658 = fmul float %1604, %1606
%1659 = fadd float %1658, %1655
%1660 = fmul float %1590, %1593
%1661 = fadd float %1660, %1657
%1662 = fmul float %1591, %1593
%1663 = fadd float %1662, %1659
%1664 = fmul float %1663, 2.000000e+00
%1665 = fadd float %1664, -1.000000e+00
%1666 = fmul float %1661, 2.000000e+00
%1667 = fadd float %1666, -1.000000e+00
%1668 = fmul float %1665, %1665
%1669 = fmul float %1667, %1667
%1670 = fadd float %1668, %1669
%1671 = call float @llvm.AMDIL.clamp.(float %1670, float 0.000000e+00, float 1.000000e+00)
%1672 = fmul float %148, 0.000000e+00
%1673 = fmul float %1443, %148
%1674 = fmul float %1445, %148
%1675 = fmul float %1556, %149
%1676 = fadd float %1675, %1672
%1677 = fmul float %149, 0.000000e+00
%1678 = fadd float %1677, %1673
%1679 = fmul float %1554, %149
%1680 = fadd float %1679, %1674
%1681 = fmul float %1665, %150
%1682 = fadd float %1681, %1676
%1683 = fmul float %1667, %150
%1684 = fadd float %1683, %1678
%1685 = fmul float %150, 0.000000e+00
%1686 = fadd float %1685, %1680
%1687 = fcmp une float %35, %temp20.0
%.sink220 = select i1 %1687, float %38, float %37
%temp44.3 = select i1 %1687, float 1.953125e-03, float 3.906250e-03
%1688 = fdiv float 1.000000e+00, %.sink220
%1689 = fmul float %102, %1688
%1690 = fmul float %101, %1688
%1691 = call float @llvm.floor.f32(float %1689)
%1692 = fsub float %1689, %1691
%1693 = call float @llvm.floor.f32(float %1690)
%1694 = fsub float %1690, %1693
%1695 = fmul float %39, 2.000000e+00
%1696 = fmul float %1695, %temp44.3
%1697 = fsub float 1.000000e+00, %1696
%1698 = fmul float %temp44.3, %39
%1699 = fmul float %1692, %1697
%1700 = fadd float %1699, %1698
%1701 = fmul float %1694, %1697
%1702 = fadd float %1701, %1698
%1703 = fmul float %1700, %temp20.0
%1704 = fadd float %1703, %temp36.0
%1705 = fmul float %1702, %temp20.0
%1706 = fadd float %1705, %temp37.0
%1707 = bitcast float %1704 to i32
%1708 = bitcast float %1706 to i32
%1709 = bitcast float %225 to i32
%1710 = insertelement <4 x i32> undef, i32 %1707, i32 0
%1711 = insertelement <4 x i32> %1710, i32 %1708, i32 1
%1712 = insertelement <4 x i32> %1711, i32 %1709, i32 2
%1713 = bitcast <8 x i32> %85 to <32 x i8>
%1714 = bitcast <4 x i32> %87 to <16 x i8>
%1715 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1712, <32 x i8> %1713, <16 x i8> %1714, i32 2)
%1716 = extractelement <4 x float> %1715, i32 1
%1717 = extractelement <4 x float> %1715, i32 3
%1718 = fcmp oeq float %temp38.0, 4.000000e+00
%1719 = select i1 %1718, float 1.000000e+00, float 0.000000e+00
%1720 = bitcast float %1704 to i32
%1721 = bitcast float %1706 to i32
%1722 = bitcast float %225 to i32
%1723 = insertelement <4 x i32> undef, i32 %1720, i32 0
%1724 = insertelement <4 x i32> %1723, i32 %1721, i32 1
%1725 = insertelement <4 x i32> %1724, i32 %1722, i32 2
%1726 = bitcast <8 x i32> %77 to <32 x i8>
%1727 = bitcast <4 x i32> %79 to <16 x i8>
%1728 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1725, <32 x i8> %1726, <16 x i8> %1727, i32 2)
%1729 = extractelement <4 x float> %1728, i32 1
%1730 = extractelement <4 x float> %1728, i32 3
%1731 = fcmp oeq float %temp38.0, 3.000000e+00
%1732 = select i1 %1731, float 1.000000e+00, float 0.000000e+00
%1733 = bitcast float %1704 to i32
%1734 = bitcast float %1706 to i32
%1735 = bitcast float %225 to i32
%1736 = insertelement <4 x i32> undef, i32 %1733, i32 0
%1737 = insertelement <4 x i32> %1736, i32 %1734, i32 1
%1738 = insertelement <4 x i32> %1737, i32 %1735, i32 2
%1739 = bitcast <8 x i32> %69 to <32 x i8>
%1740 = bitcast <4 x i32> %71 to <16 x i8>
%1741 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1738, <32 x i8> %1739, <16 x i8> %1740, i32 2)
%1742 = extractelement <4 x float> %1741, i32 1
%1743 = extractelement <4 x float> %1741, i32 3
%1744 = fcmp oeq float %temp38.0, 2.000000e+00
%1745 = select i1 %1744, float 1.000000e+00, float 0.000000e+00
%1746 = bitcast float %1704 to i32
%1747 = bitcast float %1706 to i32
%1748 = bitcast float %225 to i32
%1749 = insertelement <4 x i32> undef, i32 %1746, i32 0
%1750 = insertelement <4 x i32> %1749, i32 %1747, i32 1
%1751 = insertelement <4 x i32> %1750, i32 %1748, i32 2
%1752 = bitcast <8 x i32> %61 to <32 x i8>
%1753 = bitcast <4 x i32> %63 to <16 x i8>
%1754 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1751, <32 x i8> %1752, <16 x i8> %1753, i32 2)
%1755 = extractelement <4 x float> %1754, i32 1
%1756 = extractelement <4 x float> %1754, i32 3
%1757 = fcmp oeq float %temp38.0, 1.000000e+00
%1758 = select i1 %1757, float 1.000000e+00, float 0.000000e+00
%1759 = bitcast float %1704 to i32
%1760 = bitcast float %1706 to i32
%1761 = bitcast float %225 to i32
%1762 = insertelement <4 x i32> undef, i32 %1759, i32 0
%1763 = insertelement <4 x i32> %1762, i32 %1760, i32 1
%1764 = insertelement <4 x i32> %1763, i32 %1761, i32 2
%1765 = bitcast <8 x i32> %53 to <32 x i8>
%1766 = bitcast <4 x i32> %55 to <16 x i8>
%1767 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1764, <32 x i8> %1765, <16 x i8> %1766, i32 2)
%1768 = extractelement <4 x float> %1767, i32 1
%1769 = extractelement <4 x float> %1767, i32 3
%1770 = fcmp oeq float %temp38.0, 0.000000e+00
%1771 = select i1 %1770, float 1.000000e+00, float 0.000000e+00
%1772 = fmul float %1768, %1771
%1773 = fmul float %1769, %1771
%1774 = fmul float %1755, %1758
%1775 = fadd float %1774, %1772
%1776 = fmul float %1756, %1758
%1777 = fadd float %1776, %1773
%1778 = fmul float %1742, %1745
%1779 = fadd float %1778, %1775
%1780 = fmul float %1743, %1745
%1781 = fadd float %1780, %1777
%1782 = fmul float %1729, %1732
%1783 = fadd float %1782, %1779
%1784 = fmul float %1730, %1732
%1785 = fadd float %1784, %1781
%1786 = fmul float %1716, %1719
%1787 = fadd float %1786, %1783
%1788 = fmul float %1717, %1719
%1789 = fadd float %1788, %1785
%1790 = fmul float %1789, 2.000000e+00
%1791 = fadd float %1790, -1.000000e+00
%1792 = fmul float %1787, 2.000000e+00
%1793 = fadd float %1792, -1.000000e+00
%1794 = fmul float %1791, %1791
%1795 = fmul float %1793, %1793
%1796 = fadd float %1794, %1795
%1797 = call float @llvm.AMDIL.clamp.(float %1796, float 0.000000e+00, float 1.000000e+00)
%1798 = fcmp une float %35, %temp20.0
%.sink221 = select i1 %1798, float %38, float %37
%temp48.4 = select i1 %1798, float 1.953125e-03, float 3.906250e-03
%1799 = fdiv float 1.000000e+00, %.sink221
%1800 = fmul float %102, %1799
%1801 = fmul float %100, %1799
%1802 = call float @llvm.floor.f32(float %1800)
%1803 = fsub float %1800, %1802
%1804 = call float @llvm.floor.f32(float %1801)
%1805 = fsub float %1801, %1804
%1806 = fmul float %39, 2.000000e+00
%1807 = fmul float %1806, %temp48.4
%1808 = fsub float 1.000000e+00, %1807
%1809 = fmul float %temp48.4, %39
%1810 = fmul float %1803, %1808
%1811 = fadd float %1810, %1809
%1812 = fmul float %1805, %1808
%1813 = fadd float %1812, %1809
%1814 = fmul float %1811, %temp20.0
%1815 = fadd float %1814, %temp36.0
%1816 = fmul float %1813, %temp20.0
%1817 = fadd float %1816, %temp37.0
%1818 = bitcast float %1815 to i32
%1819 = bitcast float %1817 to i32
%1820 = bitcast float %225 to i32
%1821 = insertelement <4 x i32> undef, i32 %1818, i32 0
%1822 = insertelement <4 x i32> %1821, i32 %1819, i32 1
%1823 = insertelement <4 x i32> %1822, i32 %1820, i32 2
%1824 = bitcast <8 x i32> %85 to <32 x i8>
%1825 = bitcast <4 x i32> %87 to <16 x i8>
%1826 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1823, <32 x i8> %1824, <16 x i8> %1825, i32 2)
%1827 = extractelement <4 x float> %1826, i32 1
%1828 = extractelement <4 x float> %1826, i32 3
%1829 = fcmp oeq float %temp38.0, 4.000000e+00
%1830 = select i1 %1829, float 1.000000e+00, float 0.000000e+00
%1831 = bitcast float %1815 to i32
%1832 = bitcast float %1817 to i32
%1833 = bitcast float %225 to i32
%1834 = insertelement <4 x i32> undef, i32 %1831, i32 0
%1835 = insertelement <4 x i32> %1834, i32 %1832, i32 1
%1836 = insertelement <4 x i32> %1835, i32 %1833, i32 2
%1837 = bitcast <8 x i32> %77 to <32 x i8>
%1838 = bitcast <4 x i32> %79 to <16 x i8>
%1839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1836, <32 x i8> %1837, <16 x i8> %1838, i32 2)
%1840 = extractelement <4 x float> %1839, i32 1
%1841 = extractelement <4 x float> %1839, i32 3
%1842 = fcmp oeq float %temp38.0, 3.000000e+00
%1843 = select i1 %1842, float 1.000000e+00, float 0.000000e+00
%1844 = bitcast float %1815 to i32
%1845 = bitcast float %1817 to i32
%1846 = bitcast float %225 to i32
%1847 = insertelement <4 x i32> undef, i32 %1844, i32 0
%1848 = insertelement <4 x i32> %1847, i32 %1845, i32 1
%1849 = insertelement <4 x i32> %1848, i32 %1846, i32 2
%1850 = bitcast <8 x i32> %69 to <32 x i8>
%1851 = bitcast <4 x i32> %71 to <16 x i8>
%1852 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1849, <32 x i8> %1850, <16 x i8> %1851, i32 2)
%1853 = extractelement <4 x float> %1852, i32 1
%1854 = extractelement <4 x float> %1852, i32 3
%1855 = fcmp oeq float %temp38.0, 2.000000e+00
%1856 = select i1 %1855, float 1.000000e+00, float 0.000000e+00
%1857 = bitcast float %1815 to i32
%1858 = bitcast float %1817 to i32
%1859 = bitcast float %225 to i32
%1860 = insertelement <4 x i32> undef, i32 %1857, i32 0
%1861 = insertelement <4 x i32> %1860, i32 %1858, i32 1
%1862 = insertelement <4 x i32> %1861, i32 %1859, i32 2
%1863 = bitcast <8 x i32> %61 to <32 x i8>
%1864 = bitcast <4 x i32> %63 to <16 x i8>
%1865 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1862, <32 x i8> %1863, <16 x i8> %1864, i32 2)
%1866 = extractelement <4 x float> %1865, i32 1
%1867 = extractelement <4 x float> %1865, i32 3
%1868 = fcmp oeq float %temp38.0, 1.000000e+00
%1869 = select i1 %1868, float 1.000000e+00, float 0.000000e+00
%1870 = bitcast float %1815 to i32
%1871 = bitcast float %1817 to i32
%1872 = bitcast float %225 to i32
%1873 = insertelement <4 x i32> undef, i32 %1870, i32 0
%1874 = insertelement <4 x i32> %1873, i32 %1871, i32 1
%1875 = insertelement <4 x i32> %1874, i32 %1872, i32 2
%1876 = bitcast <8 x i32> %53 to <32 x i8>
%1877 = bitcast <4 x i32> %55 to <16 x i8>
%1878 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1875, <32 x i8> %1876, <16 x i8> %1877, i32 2)
%1879 = extractelement <4 x float> %1878, i32 1
%1880 = extractelement <4 x float> %1878, i32 3
%1881 = fcmp oeq float %temp38.0, 0.000000e+00
%1882 = select i1 %1881, float 1.000000e+00, float 0.000000e+00
%1883 = fmul float %1879, %1882
%1884 = fmul float %1880, %1882
%1885 = fmul float %1866, %1869
%1886 = fadd float %1885, %1883
%1887 = fmul float %1867, %1869
%1888 = fadd float %1887, %1884
%1889 = fmul float %1853, %1856
%1890 = fadd float %1889, %1886
%1891 = fmul float %1854, %1856
%1892 = fadd float %1891, %1888
%1893 = fmul float %1840, %1843
%1894 = fadd float %1893, %1890
%1895 = fmul float %1841, %1843
%1896 = fadd float %1895, %1892
%1897 = fmul float %1827, %1830
%1898 = fadd float %1897, %1894
%1899 = fmul float %1828, %1830
%1900 = fadd float %1899, %1896
%1901 = fmul float %1900, 2.000000e+00
%1902 = fadd float %1901, -1.000000e+00
%1903 = fmul float %1898, 2.000000e+00
%1904 = fadd float %1903, -1.000000e+00
%1905 = fmul float %1902, %1902
%1906 = fmul float %1904, %1904
%1907 = fadd float %1905, %1906
%1908 = call float @llvm.AMDIL.clamp.(float %1907, float 0.000000e+00, float 1.000000e+00)
%1909 = fcmp une float %35, %temp20.0
%.sink222 = select i1 %1909, float %38, float %37
%temp52.4 = select i1 %1909, float 1.953125e-03, float 3.906250e-03
%1910 = fdiv float 1.000000e+00, %.sink222
%1911 = fmul float %100, %1910
%1912 = fmul float %101, %1910
%1913 = call float @llvm.floor.f32(float %1911)
%1914 = fsub float %1911, %1913
%1915 = call float @llvm.floor.f32(float %1912)
%1916 = fsub float %1912, %1915
%1917 = fmul float %39, 2.000000e+00
%1918 = fmul float %1917, %temp52.4
%1919 = fsub float 1.000000e+00, %1918
%1920 = fmul float %temp52.4, %39
%1921 = fmul float %1914, %1919
%1922 = fadd float %1921, %1920
%1923 = fmul float %1916, %1919
%1924 = fadd float %1923, %1920
%1925 = fmul float %1922, %temp20.0
%1926 = fadd float %1925, %temp36.0
%1927 = fmul float %1924, %temp20.0
%1928 = fadd float %1927, %temp37.0
%1929 = bitcast float %1926 to i32
%1930 = bitcast float %1928 to i32
%1931 = bitcast float %225 to i32
%1932 = insertelement <4 x i32> undef, i32 %1929, i32 0
%1933 = insertelement <4 x i32> %1932, i32 %1930, i32 1
%1934 = insertelement <4 x i32> %1933, i32 %1931, i32 2
%1935 = bitcast <8 x i32> %85 to <32 x i8>
%1936 = bitcast <4 x i32> %87 to <16 x i8>
%1937 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1934, <32 x i8> %1935, <16 x i8> %1936, i32 2)
%1938 = extractelement <4 x float> %1937, i32 1
%1939 = extractelement <4 x float> %1937, i32 3
%1940 = fcmp oeq float %temp38.0, 4.000000e+00
%1941 = select i1 %1940, float 1.000000e+00, float 0.000000e+00
%1942 = bitcast float %1926 to i32
%1943 = bitcast float %1928 to i32
%1944 = bitcast float %225 to i32
%1945 = insertelement <4 x i32> undef, i32 %1942, i32 0
%1946 = insertelement <4 x i32> %1945, i32 %1943, i32 1
%1947 = insertelement <4 x i32> %1946, i32 %1944, i32 2
%1948 = bitcast <8 x i32> %77 to <32 x i8>
%1949 = bitcast <4 x i32> %79 to <16 x i8>
%1950 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1947, <32 x i8> %1948, <16 x i8> %1949, i32 2)
%1951 = extractelement <4 x float> %1950, i32 1
%1952 = extractelement <4 x float> %1950, i32 3
%1953 = fcmp oeq float %temp38.0, 3.000000e+00
%1954 = select i1 %1953, float 1.000000e+00, float 0.000000e+00
%1955 = bitcast float %1926 to i32
%1956 = bitcast float %1928 to i32
%1957 = bitcast float %225 to i32
%1958 = insertelement <4 x i32> undef, i32 %1955, i32 0
%1959 = insertelement <4 x i32> %1958, i32 %1956, i32 1
%1960 = insertelement <4 x i32> %1959, i32 %1957, i32 2
%1961 = bitcast <8 x i32> %69 to <32 x i8>
%1962 = bitcast <4 x i32> %71 to <16 x i8>
%1963 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1960, <32 x i8> %1961, <16 x i8> %1962, i32 2)
%1964 = extractelement <4 x float> %1963, i32 1
%1965 = extractelement <4 x float> %1963, i32 3
%1966 = fcmp oeq float %temp38.0, 2.000000e+00
%1967 = select i1 %1966, float 1.000000e+00, float 0.000000e+00
%1968 = bitcast float %1926 to i32
%1969 = bitcast float %1928 to i32
%1970 = bitcast float %225 to i32
%1971 = insertelement <4 x i32> undef, i32 %1968, i32 0
%1972 = insertelement <4 x i32> %1971, i32 %1969, i32 1
%1973 = insertelement <4 x i32> %1972, i32 %1970, i32 2
%1974 = bitcast <8 x i32> %61 to <32 x i8>
%1975 = bitcast <4 x i32> %63 to <16 x i8>
%1976 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1973, <32 x i8> %1974, <16 x i8> %1975, i32 2)
%1977 = extractelement <4 x float> %1976, i32 1
%1978 = extractelement <4 x float> %1976, i32 3
%1979 = fcmp oeq float %temp38.0, 1.000000e+00
%1980 = select i1 %1979, float 1.000000e+00, float 0.000000e+00
%1981 = bitcast float %1926 to i32
%1982 = bitcast float %1928 to i32
%1983 = bitcast float %225 to i32
%1984 = insertelement <4 x i32> undef, i32 %1981, i32 0
%1985 = insertelement <4 x i32> %1984, i32 %1982, i32 1
%1986 = insertelement <4 x i32> %1985, i32 %1983, i32 2
%1987 = bitcast <8 x i32> %53 to <32 x i8>
%1988 = bitcast <4 x i32> %55 to <16 x i8>
%1989 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1986, <32 x i8> %1987, <16 x i8> %1988, i32 2)
%1990 = extractelement <4 x float> %1989, i32 1
%1991 = extractelement <4 x float> %1989, i32 3
%1992 = fcmp oeq float %temp38.0, 0.000000e+00
%1993 = select i1 %1992, float 1.000000e+00, float 0.000000e+00
%1994 = fmul float %1990, %1993
%1995 = fmul float %1991, %1993
%1996 = fmul float %1977, %1980
%1997 = fadd float %1996, %1994
%1998 = fmul float %1978, %1980
%1999 = fadd float %1998, %1995
%2000 = fmul float %1964, %1967
%2001 = fadd float %2000, %1997
%2002 = fmul float %1965, %1967
%2003 = fadd float %2002, %1999
%2004 = fmul float %1951, %1954
%2005 = fadd float %2004, %2001
%2006 = fmul float %1952, %1954
%2007 = fadd float %2006, %2003
%2008 = fmul float %1938, %1941
%2009 = fadd float %2008, %2005
%2010 = fmul float %1939, %1941
%2011 = fadd float %2010, %2007
%2012 = fmul float %2011, 2.000000e+00
%2013 = fadd float %2012, -1.000000e+00
%2014 = fmul float %2009, 2.000000e+00
%2015 = fadd float %2014, -1.000000e+00
%2016 = fmul float %2013, %2013
%2017 = fmul float %2015, %2015
%2018 = fadd float %2016, %2017
%2019 = call float @llvm.AMDIL.clamp.(float %2018, float 0.000000e+00, float 1.000000e+00)
%2020 = fmul float %148, 0.000000e+00
%2021 = fmul float %1791, %148
%2022 = fmul float %1793, %148
%2023 = fmul float %1904, %149
%2024 = fadd float %2023, %2020
%2025 = fmul float %149, 0.000000e+00
%2026 = fadd float %2025, %2021
%2027 = fmul float %1902, %149
%2028 = fadd float %2027, %2022
%2029 = fmul float %2013, %150
%2030 = fadd float %2029, %2024
%2031 = fmul float %2015, %150
%2032 = fadd float %2031, %2026
%2033 = fmul float %150, 0.000000e+00
%2034 = fadd float %2033, %2028
%2035 = fcmp une float %35, %temp24.0
%.sink223 = select i1 %2035, float %38, float %37
%temp36.1 = select i1 %2035, float 1.953125e-03, float 3.906250e-03
%2036 = fdiv float 1.000000e+00, %.sink223
%2037 = fmul float %102, %2036
%2038 = fmul float %101, %2036
%2039 = call float @llvm.floor.f32(float %2037)
%2040 = fsub float %2037, %2039
%2041 = call float @llvm.floor.f32(float %2038)
%2042 = fsub float %2038, %2041
%2043 = fmul float %39, 2.000000e+00
%2044 = fmul float %2043, %temp36.1
%2045 = fsub float 1.000000e+00, %2044
%2046 = fmul float %temp36.1, %39
%2047 = fmul float %2040, %2045
%2048 = fadd float %2047, %2046
%2049 = fmul float %2042, %2045
%2050 = fadd float %2049, %2046
%2051 = fmul float %2048, %temp24.0
%2052 = fadd float %2051, %temp12.0
%2053 = fmul float %2050, %temp24.0
%2054 = fadd float %2053, %temp13.0
%2055 = bitcast float %2052 to i32
%2056 = bitcast float %2054 to i32
%2057 = bitcast float %225 to i32
%2058 = insertelement <4 x i32> undef, i32 %2055, i32 0
%2059 = insertelement <4 x i32> %2058, i32 %2056, i32 1
%2060 = insertelement <4 x i32> %2059, i32 %2057, i32 2
%2061 = bitcast <8 x i32> %85 to <32 x i8>
%2062 = bitcast <4 x i32> %87 to <16 x i8>
%2063 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2060, <32 x i8> %2061, <16 x i8> %2062, i32 2)
%2064 = extractelement <4 x float> %2063, i32 1
%2065 = extractelement <4 x float> %2063, i32 3
%2066 = fcmp oeq float %temp14.0, 4.000000e+00
%2067 = select i1 %2066, float 1.000000e+00, float 0.000000e+00
%2068 = bitcast float %2052 to i32
%2069 = bitcast float %2054 to i32
%2070 = bitcast float %225 to i32
%2071 = insertelement <4 x i32> undef, i32 %2068, i32 0
%2072 = insertelement <4 x i32> %2071, i32 %2069, i32 1
%2073 = insertelement <4 x i32> %2072, i32 %2070, i32 2
%2074 = bitcast <8 x i32> %77 to <32 x i8>
%2075 = bitcast <4 x i32> %79 to <16 x i8>
%2076 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2073, <32 x i8> %2074, <16 x i8> %2075, i32 2)
%2077 = extractelement <4 x float> %2076, i32 1
%2078 = extractelement <4 x float> %2076, i32 3
%2079 = fcmp oeq float %temp14.0, 3.000000e+00
%2080 = select i1 %2079, float 1.000000e+00, float 0.000000e+00
%2081 = bitcast float %2052 to i32
%2082 = bitcast float %2054 to i32
%2083 = bitcast float %225 to i32
%2084 = insertelement <4 x i32> undef, i32 %2081, i32 0
%2085 = insertelement <4 x i32> %2084, i32 %2082, i32 1
%2086 = insertelement <4 x i32> %2085, i32 %2083, i32 2
%2087 = bitcast <8 x i32> %69 to <32 x i8>
%2088 = bitcast <4 x i32> %71 to <16 x i8>
%2089 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2086, <32 x i8> %2087, <16 x i8> %2088, i32 2)
%2090 = extractelement <4 x float> %2089, i32 1
%2091 = extractelement <4 x float> %2089, i32 3
%2092 = fcmp oeq float %temp14.0, 2.000000e+00
%2093 = select i1 %2092, float 1.000000e+00, float 0.000000e+00
%2094 = bitcast float %2052 to i32
%2095 = bitcast float %2054 to i32
%2096 = bitcast float %225 to i32
%2097 = insertelement <4 x i32> undef, i32 %2094, i32 0
%2098 = insertelement <4 x i32> %2097, i32 %2095, i32 1
%2099 = insertelement <4 x i32> %2098, i32 %2096, i32 2
%2100 = bitcast <8 x i32> %61 to <32 x i8>
%2101 = bitcast <4 x i32> %63 to <16 x i8>
%2102 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2099, <32 x i8> %2100, <16 x i8> %2101, i32 2)
%2103 = extractelement <4 x float> %2102, i32 1
%2104 = extractelement <4 x float> %2102, i32 3
%2105 = fcmp oeq float %temp14.0, 1.000000e+00
%2106 = select i1 %2105, float 1.000000e+00, float 0.000000e+00
%2107 = bitcast float %2052 to i32
%2108 = bitcast float %2054 to i32
%2109 = bitcast float %225 to i32
%2110 = insertelement <4 x i32> undef, i32 %2107, i32 0
%2111 = insertelement <4 x i32> %2110, i32 %2108, i32 1
%2112 = insertelement <4 x i32> %2111, i32 %2109, i32 2
%2113 = bitcast <8 x i32> %53 to <32 x i8>
%2114 = bitcast <4 x i32> %55 to <16 x i8>
%2115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2112, <32 x i8> %2113, <16 x i8> %2114, i32 2)
%2116 = extractelement <4 x float> %2115, i32 1
%2117 = extractelement <4 x float> %2115, i32 3
%2118 = fcmp oeq float %temp14.0, 0.000000e+00
%2119 = select i1 %2118, float 1.000000e+00, float 0.000000e+00
%2120 = fmul float %2116, %2119
%2121 = fmul float %2117, %2119
%2122 = fmul float %2103, %2106
%2123 = fadd float %2122, %2120
%2124 = fmul float %2104, %2106
%2125 = fadd float %2124, %2121
%2126 = fmul float %2090, %2093
%2127 = fadd float %2126, %2123
%2128 = fmul float %2091, %2093
%2129 = fadd float %2128, %2125
%2130 = fmul float %2077, %2080
%2131 = fadd float %2130, %2127
%2132 = fmul float %2078, %2080
%2133 = fadd float %2132, %2129
%2134 = fmul float %2064, %2067
%2135 = fadd float %2134, %2131
%2136 = fmul float %2065, %2067
%2137 = fadd float %2136, %2133
%2138 = fmul float %2137, 2.000000e+00
%2139 = fadd float %2138, -1.000000e+00
%2140 = fmul float %2135, 2.000000e+00
%2141 = fadd float %2140, -1.000000e+00
%2142 = fmul float %2139, %2139
%2143 = fmul float %2141, %2141
%2144 = fadd float %2142, %2143
%2145 = call float @llvm.AMDIL.clamp.(float %2144, float 0.000000e+00, float 1.000000e+00)
%2146 = fcmp une float %35, %temp24.0
%.sink224 = select i1 %2146, float %38, float %37
%temp44.5 = select i1 %2146, float 1.953125e-03, float 3.906250e-03
%2147 = fdiv float 1.000000e+00, %.sink224
%2148 = fmul float %102, %2147
%2149 = fmul float %100, %2147
%2150 = call float @llvm.floor.f32(float %2148)
%2151 = fsub float %2148, %2150
%2152 = call float @llvm.floor.f32(float %2149)
%2153 = fsub float %2149, %2152
%2154 = fmul float %39, 2.000000e+00
%2155 = fmul float %2154, %temp44.5
%2156 = fsub float 1.000000e+00, %2155
%2157 = fmul float %temp44.5, %39
%2158 = fmul float %2151, %2156
%2159 = fadd float %2158, %2157
%2160 = fmul float %2153, %2156
%2161 = fadd float %2160, %2157
%2162 = fmul float %2159, %temp24.0
%2163 = fadd float %2162, %temp12.0
%2164 = fmul float %2161, %temp24.0
%2165 = fadd float %2164, %temp13.0
%2166 = bitcast float %2163 to i32
%2167 = bitcast float %2165 to i32
%2168 = bitcast float %225 to i32
%2169 = insertelement <4 x i32> undef, i32 %2166, i32 0
%2170 = insertelement <4 x i32> %2169, i32 %2167, i32 1
%2171 = insertelement <4 x i32> %2170, i32 %2168, i32 2
%2172 = bitcast <8 x i32> %85 to <32 x i8>
%2173 = bitcast <4 x i32> %87 to <16 x i8>
%2174 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2171, <32 x i8> %2172, <16 x i8> %2173, i32 2)
%2175 = extractelement <4 x float> %2174, i32 1
%2176 = extractelement <4 x float> %2174, i32 3
%2177 = fcmp oeq float %temp14.0, 4.000000e+00
%2178 = select i1 %2177, float 1.000000e+00, float 0.000000e+00
%2179 = bitcast float %2163 to i32
%2180 = bitcast float %2165 to i32
%2181 = bitcast float %225 to i32
%2182 = insertelement <4 x i32> undef, i32 %2179, i32 0
%2183 = insertelement <4 x i32> %2182, i32 %2180, i32 1
%2184 = insertelement <4 x i32> %2183, i32 %2181, i32 2
%2185 = bitcast <8 x i32> %77 to <32 x i8>
%2186 = bitcast <4 x i32> %79 to <16 x i8>
%2187 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2184, <32 x i8> %2185, <16 x i8> %2186, i32 2)
%2188 = extractelement <4 x float> %2187, i32 1
%2189 = extractelement <4 x float> %2187, i32 3
%2190 = fcmp oeq float %temp14.0, 3.000000e+00
%2191 = select i1 %2190, float 1.000000e+00, float 0.000000e+00
%2192 = bitcast float %2163 to i32
%2193 = bitcast float %2165 to i32
%2194 = bitcast float %225 to i32
%2195 = insertelement <4 x i32> undef, i32 %2192, i32 0
%2196 = insertelement <4 x i32> %2195, i32 %2193, i32 1
%2197 = insertelement <4 x i32> %2196, i32 %2194, i32 2
%2198 = bitcast <8 x i32> %69 to <32 x i8>
%2199 = bitcast <4 x i32> %71 to <16 x i8>
%2200 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2197, <32 x i8> %2198, <16 x i8> %2199, i32 2)
%2201 = extractelement <4 x float> %2200, i32 1
%2202 = extractelement <4 x float> %2200, i32 3
%2203 = fcmp oeq float %temp14.0, 2.000000e+00
%2204 = select i1 %2203, float 1.000000e+00, float 0.000000e+00
%2205 = bitcast float %2163 to i32
%2206 = bitcast float %2165 to i32
%2207 = bitcast float %225 to i32
%2208 = insertelement <4 x i32> undef, i32 %2205, i32 0
%2209 = insertelement <4 x i32> %2208, i32 %2206, i32 1
%2210 = insertelement <4 x i32> %2209, i32 %2207, i32 2
%2211 = bitcast <8 x i32> %61 to <32 x i8>
%2212 = bitcast <4 x i32> %63 to <16 x i8>
%2213 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2210, <32 x i8> %2211, <16 x i8> %2212, i32 2)
%2214 = extractelement <4 x float> %2213, i32 1
%2215 = extractelement <4 x float> %2213, i32 3
%2216 = fcmp oeq float %temp14.0, 1.000000e+00
%2217 = select i1 %2216, float 1.000000e+00, float 0.000000e+00
%2218 = bitcast float %2163 to i32
%2219 = bitcast float %2165 to i32
%2220 = bitcast float %225 to i32
%2221 = insertelement <4 x i32> undef, i32 %2218, i32 0
%2222 = insertelement <4 x i32> %2221, i32 %2219, i32 1
%2223 = insertelement <4 x i32> %2222, i32 %2220, i32 2
%2224 = bitcast <8 x i32> %53 to <32 x i8>
%2225 = bitcast <4 x i32> %55 to <16 x i8>
%2226 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2223, <32 x i8> %2224, <16 x i8> %2225, i32 2)
%2227 = extractelement <4 x float> %2226, i32 1
%2228 = extractelement <4 x float> %2226, i32 3
%2229 = fcmp oeq float %temp14.0, 0.000000e+00
%2230 = select i1 %2229, float 1.000000e+00, float 0.000000e+00
%2231 = fmul float %2227, %2230
%2232 = fmul float %2228, %2230
%2233 = fmul float %2214, %2217
%2234 = fadd float %2233, %2231
%2235 = fmul float %2215, %2217
%2236 = fadd float %2235, %2232
%2237 = fmul float %2201, %2204
%2238 = fadd float %2237, %2234
%2239 = fmul float %2202, %2204
%2240 = fadd float %2239, %2236
%2241 = fmul float %2188, %2191
%2242 = fadd float %2241, %2238
%2243 = fmul float %2189, %2191
%2244 = fadd float %2243, %2240
%2245 = fmul float %2175, %2178
%2246 = fadd float %2245, %2242
%2247 = fmul float %2176, %2178
%2248 = fadd float %2247, %2244
%2249 = fmul float %2248, 2.000000e+00
%2250 = fadd float %2249, -1.000000e+00
%2251 = fmul float %2246, 2.000000e+00
%2252 = fadd float %2251, -1.000000e+00
%2253 = fmul float %2250, %2250
%2254 = fmul float %2252, %2252
%2255 = fadd float %2253, %2254
%2256 = call float @llvm.AMDIL.clamp.(float %2255, float 0.000000e+00, float 1.000000e+00)
%2257 = fcmp une float %35, %temp24.0
%.sink225 = select i1 %2257, float %38, float %37
%temp48.6 = select i1 %2257, float 1.953125e-03, float 3.906250e-03
%2258 = fdiv float 1.000000e+00, %.sink225
%2259 = fmul float %100, %2258
%2260 = fmul float %101, %2258
%2261 = call float @llvm.floor.f32(float %2259)
%2262 = fsub float %2259, %2261
%2263 = call float @llvm.floor.f32(float %2260)
%2264 = fsub float %2260, %2263
%2265 = fmul float %39, 2.000000e+00
%2266 = fmul float %2265, %temp48.6
%2267 = fsub float 1.000000e+00, %2266
%2268 = fmul float %temp48.6, %39
%2269 = fmul float %2262, %2267
%2270 = fadd float %2269, %2268
%2271 = fmul float %2264, %2267
%2272 = fadd float %2271, %2268
%2273 = fmul float %2270, %temp24.0
%2274 = fadd float %2273, %temp12.0
%2275 = fmul float %2272, %temp24.0
%2276 = fadd float %2275, %temp13.0
%2277 = bitcast float %2274 to i32
%2278 = bitcast float %2276 to i32
%2279 = bitcast float %225 to i32
%2280 = insertelement <4 x i32> undef, i32 %2277, i32 0
%2281 = insertelement <4 x i32> %2280, i32 %2278, i32 1
%2282 = insertelement <4 x i32> %2281, i32 %2279, i32 2
%2283 = bitcast <8 x i32> %85 to <32 x i8>
%2284 = bitcast <4 x i32> %87 to <16 x i8>
%2285 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2282, <32 x i8> %2283, <16 x i8> %2284, i32 2)
%2286 = extractelement <4 x float> %2285, i32 1
%2287 = extractelement <4 x float> %2285, i32 3
%2288 = fcmp oeq float %temp14.0, 4.000000e+00
%2289 = select i1 %2288, float 1.000000e+00, float 0.000000e+00
%2290 = bitcast float %2274 to i32
%2291 = bitcast float %2276 to i32
%2292 = bitcast float %225 to i32
%2293 = insertelement <4 x i32> undef, i32 %2290, i32 0
%2294 = insertelement <4 x i32> %2293, i32 %2291, i32 1
%2295 = insertelement <4 x i32> %2294, i32 %2292, i32 2
%2296 = bitcast <8 x i32> %77 to <32 x i8>
%2297 = bitcast <4 x i32> %79 to <16 x i8>
%2298 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2295, <32 x i8> %2296, <16 x i8> %2297, i32 2)
%2299 = extractelement <4 x float> %2298, i32 1
%2300 = extractelement <4 x float> %2298, i32 3
%2301 = fcmp oeq float %temp14.0, 3.000000e+00
%2302 = select i1 %2301, float 1.000000e+00, float 0.000000e+00
%2303 = bitcast float %2274 to i32
%2304 = bitcast float %2276 to i32
%2305 = bitcast float %225 to i32
%2306 = insertelement <4 x i32> undef, i32 %2303, i32 0
%2307 = insertelement <4 x i32> %2306, i32 %2304, i32 1
%2308 = insertelement <4 x i32> %2307, i32 %2305, i32 2
%2309 = bitcast <8 x i32> %69 to <32 x i8>
%2310 = bitcast <4 x i32> %71 to <16 x i8>
%2311 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2308, <32 x i8> %2309, <16 x i8> %2310, i32 2)
%2312 = extractelement <4 x float> %2311, i32 1
%2313 = extractelement <4 x float> %2311, i32 3
%2314 = fcmp oeq float %temp14.0, 2.000000e+00
%2315 = select i1 %2314, float 1.000000e+00, float 0.000000e+00
%2316 = bitcast float %2274 to i32
%2317 = bitcast float %2276 to i32
%2318 = bitcast float %225 to i32
%2319 = insertelement <4 x i32> undef, i32 %2316, i32 0
%2320 = insertelement <4 x i32> %2319, i32 %2317, i32 1
%2321 = insertelement <4 x i32> %2320, i32 %2318, i32 2
%2322 = bitcast <8 x i32> %61 to <32 x i8>
%2323 = bitcast <4 x i32> %63 to <16 x i8>
%2324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2321, <32 x i8> %2322, <16 x i8> %2323, i32 2)
%2325 = extractelement <4 x float> %2324, i32 1
%2326 = extractelement <4 x float> %2324, i32 3
%2327 = fcmp oeq float %temp14.0, 1.000000e+00
%2328 = select i1 %2327, float 1.000000e+00, float 0.000000e+00
%2329 = bitcast float %2274 to i32
%2330 = bitcast float %2276 to i32
%2331 = bitcast float %225 to i32
%2332 = insertelement <4 x i32> undef, i32 %2329, i32 0
%2333 = insertelement <4 x i32> %2332, i32 %2330, i32 1
%2334 = insertelement <4 x i32> %2333, i32 %2331, i32 2
%2335 = bitcast <8 x i32> %53 to <32 x i8>
%2336 = bitcast <4 x i32> %55 to <16 x i8>
%2337 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2334, <32 x i8> %2335, <16 x i8> %2336, i32 2)
%2338 = extractelement <4 x float> %2337, i32 1
%2339 = extractelement <4 x float> %2337, i32 3
%2340 = fcmp oeq float %temp14.0, 0.000000e+00
%2341 = select i1 %2340, float 1.000000e+00, float 0.000000e+00
%2342 = fmul float %2338, %2341
%2343 = fmul float %2339, %2341
%2344 = fmul float %2325, %2328
%2345 = fadd float %2344, %2342
%2346 = fmul float %2326, %2328
%2347 = fadd float %2346, %2343
%2348 = fmul float %2312, %2315
%2349 = fadd float %2348, %2345
%2350 = fmul float %2313, %2315
%2351 = fadd float %2350, %2347
%2352 = fmul float %2299, %2302
%2353 = fadd float %2352, %2349
%2354 = fmul float %2300, %2302
%2355 = fadd float %2354, %2351
%2356 = fmul float %2286, %2289
%2357 = fadd float %2356, %2353
%2358 = fmul float %2287, %2289
%2359 = fadd float %2358, %2355
%2360 = fmul float %2359, 2.000000e+00
%2361 = fadd float %2360, -1.000000e+00
%2362 = fmul float %2357, 2.000000e+00
%2363 = fadd float %2362, -1.000000e+00
%2364 = fmul float %2361, %2361
%2365 = fmul float %2363, %2363
%2366 = fadd float %2364, %2365
%2367 = call float @llvm.AMDIL.clamp.(float %2366, float 0.000000e+00, float 1.000000e+00)
%2368 = fmul float %148, 0.000000e+00
%2369 = fmul float %2139, %148
%2370 = fmul float %2141, %148
%2371 = fmul float %2252, %149
%2372 = fadd float %2371, %2368
%2373 = fmul float %149, 0.000000e+00
%2374 = fadd float %2373, %2369
%2375 = fmul float %2250, %149
%2376 = fadd float %2375, %2370
%2377 = fmul float %2361, %150
%2378 = fadd float %2377, %2372
%2379 = fmul float %2363, %150
%2380 = fadd float %2379, %2374
%2381 = fmul float %150, 0.000000e+00
%2382 = fadd float %2381, %2376
%2383 = fmul float %92, %1682
%2384 = fmul float %92, %1684
%2385 = fmul float %92, %1686
%2386 = fmul float %93, %2030
%2387 = fadd float %2386, %2383
%2388 = fmul float %93, %2032
%2389 = fadd float %2388, %2384
%2390 = fmul float %93, %2034
%2391 = fadd float %2390, %2385
%2392 = fmul float %94, %2378
%2393 = fadd float %2392, %2387
%2394 = fmul float %94, %2380
%2395 = fadd float %2394, %2389
%2396 = fmul float %94, %2382
%2397 = fadd float %2396, %2391
%2398 = fmul float %2393, %2393
%2399 = fmul float %2395, %2395
%2400 = fadd float %2398, %2399
%2401 = fmul float %2397, %2397
%2402 = fadd float %2400, %2401
%2403 = fadd float %2402, 1.000000e+00
%2404 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2403)
%2405 = fmul float %2393, %2404
%2406 = fmul float %2395, %2404
%2407 = fmul float %2397, %2404
%2408 = fmul float %2405, %91
%2409 = fmul float %2406, %91
%2410 = fmul float %2407, %91
%2411 = fsub float %97, %2408
%2412 = fsub float %98, %2409
%2413 = fsub float %99, %2410
%2414 = fmul float %2411, %2411
%2415 = fmul float %2412, %2412
%2416 = fadd float %2415, %2414
%2417 = fmul float %2413, %2413
%2418 = fadd float %2416, %2417
%2419 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2418)
%2420 = fmul float %2411, %2419
%2421 = fmul float %2412, %2419
%2422 = fmul float %2413, %2419
%2423 = fadd float %109, %121
%2424 = fadd float %110, %122
%2425 = fadd float %111, %123
%2426 = fmul float %2423, %2423
%2427 = fmul float %2424, %2424
%2428 = fadd float %2427, %2426
%2429 = fmul float %2425, %2425
%2430 = fadd float %2428, %2429
%2431 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2430)
%2432 = fmul float %2423, %2431
%2433 = fmul float %2424, %2431
%2434 = fmul float %2425, %2431
%2435 = fmul float %2420, %2432
%2436 = fmul float %2421, %2433
%2437 = fadd float %2436, %2435
%2438 = fmul float %2422, %2434
%2439 = fadd float %2437, %2438
%2440 = call float @llvm.maxnum.f32(float %2439, float 0x3F1A36E2E0000000)
%2441 = fmul float %95, 3.200000e+01
%2442 = call float @llvm.pow.f32(float %2440, float %2441)
%2443 = call float @llvm.AMDIL.clamp.(float %2442, float 0.000000e+00, float 1.000000e+00)
%2444 = fmul float %2443, 2.000000e+00
%2445 = fsub float 3.000000e+00, %2444
%2446 = fmul float %2443, %2445
%2447 = fmul float %2443, %2446
%2448 = fmul float %2447, %95
%2449 = fmul float %1334, %32
%2450 = fmul float %1336, %33
%2451 = fmul float %1338, %34
%2452 = fmul float %2420, %109
%2453 = fmul float %2421, %110
%2454 = fadd float %2453, %2452
%2455 = fmul float %2422, %111
%2456 = fadd float %2454, %2455
%2457 = call float @llvm.AMDIL.clamp.(float %2456, float 0.000000e+00, float 1.000000e+00)
%2458 = fmul float %45, 2.000000e+00
%2459 = fmul float %46, 2.000000e+00
%2460 = fmul float %47, 2.000000e+00
%2461 = call float @llvm.maxnum.f32(float %2458, float %42)
%2462 = call float @llvm.maxnum.f32(float %2459, float %43)
%2463 = call float @llvm.maxnum.f32(float %2460, float %44)
%2464 = call float @llvm.minnum.f32(float %2461, float 1.000000e+00)
%2465 = call float @llvm.minnum.f32(float %2462, float 1.000000e+00)
%2466 = call float @llvm.minnum.f32(float %2463, float 1.000000e+00)
%2467 = fmul float %2464, %1334
%2468 = fmul float %2465, %1336
%2469 = fmul float %2466, %1338
%2470 = fmul float %2449, %2457
%2471 = fadd float %2470, %2467
%2472 = fmul float %2450, %2457
%2473 = fadd float %2472, %2468
%2474 = fmul float %2451, %2457
%2475 = fadd float %2474, %2469
%2476 = fmul float %32, %2448
%2477 = fadd float %2476, %2471
%2478 = fmul float %33, %2448
%2479 = fadd float %2478, %2473
%2480 = fmul float %34, %2448
%2481 = fadd float %2480, %2475
%2482 = fmul float %2477, 5.000000e-01
%2483 = fmul float %2479, 5.000000e-01
%2484 = fmul float %2481, 5.000000e-01
%2485 = fmul float %96, %30
%2486 = fadd float %2485, %31
%2487 = call float @llvm.AMDIL.clamp.(float %2486, float 0.000000e+00, float 1.000000e+00)
%2488 = fmul float %2482, %2487
%2489 = fmul float %2483, %2487
%2490 = fmul float %2484, %2487
%2491 = call i32 @llvm.SI.packf16(float %2488, float %2489)
%2492 = bitcast i32 %2491 to float
%2493 = call i32 @llvm.SI.packf16(float %2490, float 1.000000e+00)
%2494 = bitcast i32 %2493 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2492, float %2494, float %2492, float %2494)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_mov_b64 s[84:85], s[4:5] ; BED40404
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000
v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001
v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100
v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101
v_interp_p1_f32 v15, v0, 2, 0, [m0] ; C83C0200
v_interp_p2_f32 v15, [v15], v1, 2, 0, [m0] ; C83D0201
v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300
v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301
v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400
v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401
v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500
v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501
v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600
v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601
v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700
v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900
v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901
v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00
v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01
v_interp_p1_f32 v9, v0, 3, 2, [m0] ; C8240B00
v_interp_p2_f32 v9, [v9], v1, 3, 2, [m0] ; C8250B01
v_interp_p1_f32 v30, v0, 0, 3, [m0] ; C8780C00
v_interp_p2_f32 v30, [v30], v1, 0, 3, [m0] ; C8790C01
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s37, s[8:11], 0x4 ; C2128904
s_buffer_load_dword s36, s[8:11], 0x5 ; C2120905
v_interp_p1_f32 v31, v0, 1, 3, [m0] ; C87C0D00
v_interp_p2_f32 v31, [v31], v1, 1, 3, [m0] ; C87D0D01
s_buffer_load_dword s42, s[8:11], 0x6 ; C2150906
v_interp_p1_f32 v27, v0, 2, 3, [m0] ; C86C0E00
s_buffer_load_dword s39, s[8:11], 0x0 ; C2138900
s_buffer_load_dword s38, s[8:11], 0x1 ; C2130901
v_interp_p2_f32 v27, [v27], v1, 2, 3, [m0] ; C86D0E01
s_buffer_load_dword s41, s[8:11], 0x2 ; C2148902
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e64 v0, s37, s37 ; D2100000 00004A25
v_mac_f32_e64 v0, s36, s36 ; D23E0000 00004824
s_buffer_load_dword s0, s[8:11], 0x38 ; C2000938
s_buffer_load_dword s1, s[8:11], 0x3c ; C200893C
v_mac_f32_e64 v0, s42, s42 ; D23E0000 0000542A
v_rsq_clamp_f32_e32 v1, v0 ; 7E025900
v_sub_f32_e32 v0, s39, v30 ; 08003C27
v_sub_f32_e32 v11, s38, v31 ; 08163E26
v_mul_f32_e32 v16, v0, v0 ; 10200100
v_mac_f32_e32 v16, v11, v11 ; 3E20170B
v_sub_f32_e32 v12, s41, v27 ; 08183629
v_mac_f32_e32 v16, v12, v12 ; 3E20190C
v_rsq_clamp_f32_e32 v17, v16 ; 7E225910
v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0
v_floor_f32_e32 v13, v13 ; 7E1A490D
v_mov_b32_e32 v16, 0x7fffffff ; 7E2002FF 7FFFFFFF
v_mov_b32_e32 v18, 0x42800000 ; 7E2402FF 42800000
v_cmp_le_f32_e32 vcc, v18, v13 ; 7C061B12
v_and_b32_e32 v18, v5, v16 ; 36242105
v_and_b32_e32 v19, v6, v16 ; 36262106
v_and_b32_e32 v20, v9, v16 ; 36282109
v_mul_f32_e64 v16, |v5|, |v5| ; D2100310 00020B05
v_mad_f32 v16, |v6|, |v6|, v16 ; D2820310 04420D06
v_mad_f32 v16, |v9|, |v9|, v16 ; D2820310 04421309
v_rsq_clamp_f32_e32 v34, v16 ; 7E445910
v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0
v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0
v_floor_f32_e32 v21, v14 ; 7E2A490E
v_floor_f32_e32 v14, v15 ; 7E1C490F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v24, s1 ; 7E300201
v_mul_f32_e32 v22, s0, v13 ; 102C1A00
v_floor_f32_e32 v16, v22 ; 7E204916
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v15, 0xc2800000 ; 7E1E02FF C2800000
v_add_f32_e32 v13, v13, v15 ; 061A1F0D
v_mul_f32_e32 v15, s1, v13 ; 101E1A01
v_floor_f32_e32 v15, v15 ; 7E1E490F
v_mul_f32_e32 v23, s1, v15 ; 102E1E01
v_mad_f32 v28, v13, s1, -v15 ; D282001C 843C030D
v_floor_f32_e32 v13, v23 ; 7E1A4917
v_mad_f32 v29, v15, s1, -v13 ; D282001D 8434030F
v_add_f32_e32 v15, 4.0, v13 ; 061E1AF6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
v_mov_b32_e32 v13, s0 ; 7E1A0200
v_mov_b32_e32 v32, v24 ; 7E400318
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v15, s0, v16 ; 101E2000
v_floor_f32_e32 v23, v22 ; 7E2E4916
v_subrev_f32_e32 v28, v23, v22 ; 0A382D17
v_floor_f32_e32 v15, v15 ; 7E1E490F
v_mad_f32 v29, v16, s0, -v15 ; D282001D 843C0110
v_mov_b32_e32 v32, v13 ; 7E40030D
s_or_b64 exec, exec, s[2:3] ; 88FE027E
v_mul_f32_e32 v23, s0, v21 ; 102E2A00
v_floor_f32_e32 v22, v23 ; 7E2C4917
v_mov_b32_e32 v16, 0x42800000 ; 7E2002FF 42800000
v_cmp_le_f32_e32 vcc, v16, v21 ; 7C062B10
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v16, 0xc2800000 ; 7E2002FF C2800000
v_add_f32_e32 v16, v21, v16 ; 06202115
v_mul_f32_e32 v21, s1, v16 ; 102A2001
v_floor_f32_e32 v21, v21 ; 7E2A4915
v_mul_f32_e32 v26, s1, v21 ; 10342A01
v_mad_f32 v25, v16, s1, -v21 ; D2820019 84540310
v_floor_f32_e32 v16, v26 ; 7E20491A
v_mad_f32 v26, v21, s1, -v16 ; D282001A 84400315
v_add_f32_e32 v16, 4.0, v16 ; 062020F6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
v_mov_b32_e32 v33, v24 ; 7E420318
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v16, s0, v22 ; 10202C00
v_floor_f32_e32 v21, v23 ; 7E2A4917
v_subrev_f32_e32 v25, v21, v23 ; 0A322F15
v_floor_f32_e32 v16, v16 ; 7E204910
v_mad_f32 v26, v22, s0, -v16 ; D282001A 84400116
v_mov_b32_e32 v33, v13 ; 7E42030D
s_or_b64 exec, exec, s[2:3] ; 88FE027E
s_buffer_load_dword s13, s[8:11], 0xb ; C206890B
s_buffer_load_dword s14, s[8:11], 0x40 ; C2070940
s_buffer_load_dword s12, s[8:11], 0x44 ; C2060944
v_mul_f32_e32 v38, s0, v14 ; 104C1C00
v_floor_f32_e32 v35, v38 ; 7E464926
v_mov_b32_e32 v21, 0x42800000 ; 7E2A02FF 42800000
v_cmp_le_f32_e32 vcc, v21, v14 ; 7C061D15
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v21, 0xc2800000 ; 7E2A02FF C2800000
v_add_f32_e32 v14, v14, v21 ; 061C2B0E
v_mul_f32_e32 v21, s1, v14 ; 102A1C01
v_floor_f32_e32 v21, v21 ; 7E2A4915
v_mul_f32_e32 v23, s1, v21 ; 102E2A01
v_mad_f32 v22, v14, s1, -v21 ; D2820016 8454030E
v_floor_f32_e32 v14, v23 ; 7E1C4917
v_mad_f32 v23, v21, s1, -v14 ; D2820017 84380315
v_add_f32_e32 v21, 4.0, v14 ; 062A1CF6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
v_mov_b32_e32 v39, s13 ; 7E4E020D
v_mov_b32_e32 v36, s14 ; 7E48020E
s_buffer_load_dword s14, s[8:11], 0xa ; C207090A
s_buffer_load_dword s100, s[8:11], 0xc ; C232090C
s_buffer_load_dword s101, s[8:11], 0xd ; C232890D
s_buffer_load_dword s4, s[8:11], 0xe ; C202090E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v251, s4, 19 ; 05F72604
s_buffer_load_dword s35, s[8:11], 0x48 ; C2118948
s_buffer_load_dword s40, s[8:11], 0x4c ; C214094C
s_buffer_load_dword s13, s[8:11], 0x50 ; C2068950
s_buffer_load_dword s4, s[8:11], 0x54 ; C2020954
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v251, s4, 20 ; 05F72804
s_buffer_load_dword s4, s[8:11], 0x55 ; C2020955
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v251, s4, 21 ; 05F72A04
s_buffer_load_dword s4, s[8:11], 0x56 ; C2020956
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v251, s4, 22 ; 05F72C04
s_buffer_load_dword s4, s[8:11], 0x58 ; C2020958
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v251, s4, 0 ; 05F70004
s_buffer_load_dword s4, s[8:11], 0x59 ; C2020959
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v251, s4, 1 ; 05F70204
s_buffer_load_dword s4, s[8:11], 0x5a ; C202095A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v251, s4, 2 ; 05F70404
v_mov_b32_e32 v37, s12 ; 7E4A020C
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v14, s0, v35 ; 101C4600
v_floor_f32_e32 v21, v38 ; 7E2A4926
v_subrev_f32_e32 v22, v21, v38 ; 0A2C4D15
v_floor_f32_e32 v21, v14 ; 7E2A490E
v_mad_f32 v23, v35, s0, -v21 ; D2820017 84540123
v_mov_b32_e32 v24, v13 ; 7E30030D
s_or_b64 exec, exec, s[2:3] ; 88FE027E
v_mac_f32_e32 v39, s14, v10 ; 3E4E140E
v_mul_f32_e32 v13, v17, v0 ; 101A0111
v_mul_f32_e32 v14, v17, v11 ; 101C1711
v_mul_f32_e32 v12, v17, v12 ; 10181911
v_mac_f32_e32 v13, s37, v1 ; 3E1A0225
v_mac_f32_e32 v14, s36, v1 ; 3E1C0224
v_mac_f32_e32 v12, s42, v1 ; 3E18022A
v_add_f32_e64 v0, 0, v39 clamp ; D2060800 00024E80
v_cmp_neq_f32_e64 s[24:25], s0, v32 ; D01A0018 00024000
v_cmp_eq_f32_e32 vcc, 4.0, v15 ; 7C041EF6
v_cmp_eq_f32_e64 s[2:3], 2.0, v15 ; D0040002 00021EF4
v_writelane_b32 v251, s2, 17 ; 05F72202
v_writelane_b32 v251, s3, 18 ; 05F72403
v_cmp_eq_f32_e64 s[2:3], 1.0, v15 ; D0040002 00021EF2
v_writelane_b32 v251, s2, 13 ; 05F71A02
v_writelane_b32 v251, s3, 14 ; 05F71C03
v_cmp_eq_f32_e64 s[2:3], 0, v15 ; D0040002 00021E80
v_writelane_b32 v251, s2, 15 ; 05F71E02
v_writelane_b32 v251, s3, 16 ; 05F72003
v_cmp_neq_f32_e64 s[22:23], s0, v33 ; D01A0016 00024200
v_cmp_neq_f32_e64 s[20:21], s0, v24 ; D01A0014 00023000
v_cmp_eq_f32_e64 s[0:1], 4.0, v16 ; D0040000 000220F6
v_writelane_b32 v251, s0, 5 ; 05F70A00
v_writelane_b32 v251, s1, 6 ; 05F70C01
v_cmp_eq_f32_e64 s[0:1], 2.0, v16 ; D0040000 000220F4
v_writelane_b32 v251, s0, 7 ; 05F70E00
v_writelane_b32 v251, s1, 8 ; 05F71001
v_cmp_eq_f32_e64 s[0:1], 1.0, v16 ; D0040000 000220F2
v_writelane_b32 v251, s0, 9 ; 05F71200
v_writelane_b32 v251, s1, 10 ; 05F71401
v_cmp_eq_f32_e64 s[0:1], 0, v16 ; D0040000 00022080
v_writelane_b32 v251, s0, 11 ; 05F71600
v_writelane_b32 v251, s1, 12 ; 05F71801
v_mul_f32_e32 v11, s37, v1 ; 10160225
v_mul_f32_e32 v10, s36, v1 ; 10140224
v_mul_f32_e32 v1, s42, v1 ; 1002022A
v_cmp_eq_f32_e64 s[0:1], 4.0, v21 ; D0040000 00022AF6
v_writelane_b32 v251, s0, 3 ; 05F70600
v_writelane_b32 v251, s1, 4 ; 05F70801
v_mov_b32_e32 v17, 0xbe4ccccd ; 7E2202FF BE4CCCCD
v_mad_f32 v18, v34, v18, v17 ; D2820012 04462522
v_mad_f32 v19, v34, v19, v17 ; D2820013 04462722
v_mac_f32_e32 v17, v34, v20 ; 3E222922
v_mov_b32_e32 v20, 0x40e00000 ; 7E2802FF 40E00000
v_mul_f32_e32 v34, v20, v18 ; 10442514
v_mul_f32_e32 v18, v20, v19 ; 10242714
v_mul_f32_e32 v19, v20, v17 ; 10262314
v_mov_b32_e32 v20, 0x3c23d70a ; 7E2802FF 3C23D70A
v_max_f32_e32 v17, v20, v34 ; 20224514
v_subrev_f32_e32 v34, s39, v30 ; 0A443C27
v_mul_f32_e32 v34, v34, v34 ; 10444522
v_subrev_f32_e32 v35, s38, v31 ; 0A463E26
v_mac_f32_e32 v34, v35, v35 ; 3E444723
v_subrev_f32_e32 v35, s41, v27 ; 0A463629
v_mac_f32_e32 v34, v35, v35 ; 3E444723
v_mul_f32_e32 v34, s13, v34 ; 1044440D
v_log_f32_e32 v34, v34 ; 7E444F22
s_load_dwordx4 s[92:95], s[84:85], 0x8 ; C0AE5508
v_cndmask_b32_e64 v35, v36, v37, s[24:25] ; D2000023 00624B24
v_rcp_f32_e32 v35, v35 ; 7E465523
v_mul_f32_e32 v34, 0x3f317218, v34 ; 104444FF 3F317218
v_mov_b32_e32 v38, 0x3b000000 ; 7E4C02FF 3B000000
v_mov_b32_e32 v39, 0x3b800000 ; 7E4E02FF 3B800000
v_cndmask_b32_e64 v40, v39, v38, s[24:25] ; D2000028 00624D27
v_mul_f32_e32 v41, v35, v30 ; 10523D23
v_floor_f32_e32 v41, v41 ; 7E524929
v_mad_f32 v41, v30, v35, -v41 ; D2820029 84A6471E
v_mul_f32_e32 v42, v35, v31 ; 10543F23
v_floor_f32_e32 v42, v42 ; 7E54492A
v_mad_f32 v42, v31, v35, -v42 ; D282002A 84AA471F
v_add_f32_e64 v43, s35, s35 ; D206002B 00004623
v_mul_f32_e32 v44, v35, v27 ; 10583723
v_floor_f32_e32 v44, v44 ; 7E58492C
v_mad_f32 v35, v27, v35, -v44 ; D2820023 84B2471B
v_mad_f32 v44, -v43, v40, 1.0 ; D282002C 23CA512B
v_mul_f32_e32 v40, s35, v40 ; 10505023
v_cndmask_b32_e64 v45, v36, v37, s[22:23] ; D200002D 005A4B24
v_rcp_f32_e32 v45, v45 ; 7E5A552D
v_mad_f32 v41, v44, v41, v40 ; D2820029 04A2532C
v_mad_f32 v42, v44, v42, v40 ; D282002A 04A2552C
v_mac_f32_e32 v40, v44, v35 ; 3E50472C
v_mul_f32_e32 v35, v45, v30 ; 10463D2D
v_floor_f32_e32 v35, v35 ; 7E464923
v_mad_f32 v35, v30, v45, -v35 ; D2820023 848E5B1E
v_mul_f32_e32 v44, v45, v31 ; 10583F2D
v_floor_f32_e32 v44, v44 ; 7E58492C
v_mad_f32 v44, v31, v45, -v44 ; D282002C 84B25B1F
v_mul_f32_e32 v46, v45, v27 ; 105C372D
v_floor_f32_e32 v46, v46 ; 7E5C492E
v_mad_f32 v45, v27, v45, -v46 ; D282002D 84BA5B1B
v_cndmask_b32_e64 v46, v39, v38, s[22:23] ; D200002E 005A4D27
v_mad_f32 v47, -v43, v46, 1.0 ; D282002F 23CA5D2B
v_mul_f32_e32 v46, s35, v46 ; 105C5C23
v_mad_f32 v48, v47, v35, v46 ; D2820030 04BA472F
v_mad_f32 v35, v47, v44, v46 ; D2820023 04BA592F
v_mac_f32_e32 v46, v47, v45 ; 3E5C5B2F
v_mul_f32_e32 v51, s40, v34 ; 10664428
v_mad_f32 v49, v32, v41, v28 ; D2820031 04725320
v_mad_f32 v50, v32, v42, v29 ; D2820032 04765520
v_mac_f32_e32 v28, v32, v40 ; 3E385120
v_mad_f32 v34, v33, v48, v25 ; D2820022 04666121
v_mad_f32 v35, v33, v35, v26 ; D2820023 046A4721
v_cndmask_b32_e64 v36, v36, v37, s[20:21] ; D2000024 00524B24
v_rcp_f32_e32 v36, v36 ; 7E485524
v_mac_f32_e32 v25, v33, v46 ; 3E325D21
v_mov_b32_e32 v44, v28 ; 7E58031C
v_mov_b32_e32 v45, v29 ; 7E5A031D
v_mov_b32_e32 v46, v30 ; 7E5C031E
v_mov_b32_e32 v47, v31 ; 7E5E031F
v_mac_f32_e32 v29, v32, v41 ; 3E3A5320
v_mov_b32_e32 v52, v25 ; 7E680319
v_mov_b32_e32 v53, v26 ; 7E6A031A
v_mov_b32_e32 v54, v27 ; 7E6C031B
v_mov_b32_e32 v55, v28 ; 7E6E031C
v_mac_f32_e32 v26, v33, v48 ; 3E346121
v_mul_f32_e32 v32, v36, v30 ; 10403D24
v_floor_f32_e32 v32, v32 ; 7E404920
v_mad_f32 v30, v30, v36, -v32 ; D282001E 8482491E
v_mul_f32_e32 v32, v36, v31 ; 10403F24
v_floor_f32_e32 v32, v32 ; 7E404920
v_mad_f32 v31, v31, v36, -v32 ; D282001F 8482491F
v_mul_f32_e32 v32, v36, v27 ; 10403724
v_floor_f32_e32 v32, v32 ; 7E404920
v_mad_f32 v27, v27, v36, -v32 ; D282001B 8482491B
v_cndmask_b32_e64 v32, v39, v38, s[20:21] ; D2000020 00524D27
v_mad_f32 v33, -v43, v32, 1.0 ; D2820021 23CA412B
v_mul_f32_e32 v36, s35, v32 ; 10484023
v_mad_f32 v30, v33, v30, v36 ; D282001E 04923D21
v_mad_f32 v32, v33, v31, v36 ; D2820020 04923F21
v_mac_f32_e32 v36, v33, v27 ; 3E483721
v_mad_f32 v31, v24, v30, v22 ; D282001F 045A3D18
v_mad_f32 v32, v24, v32, v23 ; D2820020 045E4118
v_mac_f32_e32 v22, v24, v36 ; 3E2C4918
s_load_dwordx8 s[68:75], s[6:7], 0x40 ; C0E20740
v_mov_b32_e32 v37, v22 ; 7E4A0316
v_mov_b32_e32 v38, v23 ; 7E4C0317
v_mov_b32_e32 v39, v24 ; 7E4E0318
v_mov_b32_e32 v40, v25 ; 7E500319
s_load_dwordx4 s[0:3], s[84:85], 0x20 ; C0805520
v_mac_f32_e32 v23, v24, v30 ; 3E2E3D18
v_mov_b32_e32 v45, v50 ; 7E5A0332
v_mov_b32_e32 v36, v51 ; 7E480333
v_mov_b32_e32 v53, v35 ; 7E6A0323
v_mov_b32_e32 v33, v51 ; 7E420333
v_mov_b32_e32 v38, v32 ; 7E4C0320
v_mov_b32_e32 v46, v51 ; 7E5C0333
v_mov_b32_e32 v30, v51 ; 7E3C0333
v_mov_b32_e32 v54, v51 ; 7E6C0333
v_mov_b32_e32 v27, v51 ; 7E360333
v_mov_b32_e32 v39, v51 ; 7E4E0333
v_mov_b32_e32 v24, v51 ; 7E300333
s_load_dwordx4 s[8:11], s[84:85], 0x18 ; C0845518
s_load_dwordx8 s[12:19], s[6:7], 0x20 ; C0C60720
s_load_dwordx8 s[32:39], s[6:7], 0x30 ; C0D00730
s_load_dwordx4 s[28:31], s[84:85], 0x10 ; C08E5510
s_load_dwordx4 s[88:91], s[84:85], 0x24 ; C0AC5524
s_load_dwordx8 s[20:27], s[6:7], 0x10 ; C0CA0710
s_load_dwordx8 s[76:83], s[6:7], 0x48 ; C0E60748
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[41:43], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[68:75], s[0:3] ; F0900700 00112931
s_load_dwordx4 s[40:43], s[84:85], 0x0 ; C0945500
s_load_dwordx8 s[60:67], s[6:7], 0x0 ; C0DE0700
s_load_dwordx4 s[56:59], s[84:85], 0x1c ; C09C551C
s_load_dwordx8 s[44:51], s[6:7], 0x38 ; C0D60738
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v251, s44, 39 ; 05F74E2C
v_writelane_b32 v251, s45, 40 ; 05F7502D
v_writelane_b32 v251, s46, 41 ; 05F7522E
v_writelane_b32 v251, s47, 42 ; 05F7542F
v_writelane_b32 v251, s48, 43 ; 05F75630
v_writelane_b32 v251, s49, 44 ; 05F75831
v_writelane_b32 v251, s50, 45 ; 05F75A32
v_writelane_b32 v251, s51, 46 ; 05F75C33
image_sample_l v[56:58], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[32:39], s[8:11] ; F0900700 00483831
image_sample_l v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[12:19], s[28:31] ; F0900700 00E33B31
s_load_dwordx4 s[44:47], s[84:85], 0xc ; C096550C
s_load_dwordx8 s[48:55], s[6:7], 0x18 ; C0D80718
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v251, s48, 31 ; 05F73E30
v_writelane_b32 v251, s49, 32 ; 05F74031
v_writelane_b32 v251, s50, 33 ; 05F74232
v_writelane_b32 v251, s51, 34 ; 05F74433
v_writelane_b32 v251, s52, 35 ; 05F74634
v_writelane_b32 v251, s53, 36 ; 05F74835
v_writelane_b32 v251, s54, 37 ; 05F74A36
v_writelane_b32 v251, s55, 38 ; 05F74C37
image_sample_l v[62:64], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[20:27], s[92:95] ; F0900700 02E53E31
s_load_dwordx4 s[96:99], s[84:85], 0x4 ; C0B05504
s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v251, s48, 23 ; 05F72E30
v_writelane_b32 v251, s49, 24 ; 05F73031
v_writelane_b32 v251, s50, 25 ; 05F73232
v_writelane_b32 v251, s51, 26 ; 05F73433
v_writelane_b32 v251, s52, 27 ; 05F73634
v_writelane_b32 v251, s53, 28 ; 05F73835
v_writelane_b32 v251, s54, 29 ; 05F73A36
v_writelane_b32 v251, s55, 30 ; 05F73C37
image_sample_l v[65:67], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[60:67], s[40:43] ; F0900700 014F4131
image_sample_l v[68:70], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[68:75], s[0:3] ; F0900700 0011442C
image_sample_l v[71:73], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[32:39], s[8:11] ; F0900700 0048472C
image_sample_l v[74:76], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[12:19], s[28:31] ; F0900700 00E34A2C
image_sample_l v[77:79], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[20:27], s[92:95] ; F0900700 02E54D2C
image_sample_l v[80:82], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[60:67], s[40:43] ; F0900700 014F502C
image_sample_l v[83:85], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[68:75], s[0:3] ; F0900700 0011531C
image_sample_l v[86:88], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[32:39], s[8:11] ; F0900700 0048561C
image_sample_l v[89:91], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[12:19], s[28:31] ; F0900700 00E3591C
image_sample_l v[92:94], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[20:27], s[92:95] ; F0900700 02E55C1C
image_sample_l v[95:97], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[60:67], s[40:43] ; F0900700 014F5F1C
image_sample_l v[98:100], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[68:75], s[0:3] ; F0900700 00116222
image_sample_l v[101:103], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[32:39], s[8:11] ; F0900700 00486522
image_sample_l v[104:106], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[12:19], s[28:31] ; F0900700 00E36822
image_sample_l v[107:109], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[20:27], s[92:95] ; F0900700 02E56B22
image_sample_l v[110:112], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[60:67], s[40:43] ; F0900700 014F6E22
image_sample_l v[113:115], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[68:75], s[0:3] ; F0900700 00117134
image_sample_l v[116:118], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[32:39], s[8:11] ; F0900700 00487434
image_sample_l v[119:121], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[12:19], s[28:31] ; F0900700 00E37734
image_sample_l v[122:124], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[20:27], s[92:95] ; F0900700 02E57A34
image_sample_l v[125:127], 7, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[60:67], s[40:43] ; F0900700 014F7D34
image_sample_l v[128:130], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[68:75], s[0:3] ; F0900700 00118019
image_sample_l v[131:133], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[32:39], s[8:11] ; F0900700 00488319
image_sample_l v[134:136], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[28:31] ; F0900700 00E38619
image_sample_l v[137:139], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[20:27], s[92:95] ; F0900700 02E58919
image_sample_l v[140:142], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[60:67], s[40:43] ; F0900700 014F8C19
image_sample_l v[143:145], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[68:75], s[0:3] ; F0900700 00118F1F
image_sample_l v[146:148], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[32:39], s[8:11] ; F0900700 0048921F
image_sample_l v[149:151], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[12:19], s[28:31] ; F0900700 00E3951F
image_sample_l v[152:154], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[20:27], s[92:95] ; F0900700 02E5981F
image_sample_l v[155:157], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[60:67], s[40:43] ; F0900700 014F9B1F
image_sample_l v[158:160], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[68:75], s[0:3] ; F0900700 00119E25
image_sample_l v[161:163], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[32:39], s[8:11] ; F0900700 0048A125
image_sample_l v[164:166], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[12:19], s[28:31] ; F0900700 00E3A425
s_load_dwordx4 s[84:87], s[84:85], 0x14 ; C0AA5514
s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728
image_sample_l v[167:169], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[20:27], s[92:95] ; F0900700 02E5A725
image_sample_l v[170:172], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[60:67], s[40:43] ; F0900700 014FAA25
image_sample_l v[173:175], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[68:75], s[0:3] ; F0900700 0011AD16
image_sample_l v[176:178], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[32:39], s[8:11] ; F0900700 0048B016
image_sample_l v[179:181], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[12:19], s[28:31] ; F0900700 00E3B316
image_sample_l v[182:184], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[20:27], s[92:95] ; F0900700 02E5B616
image_sample_l v[185:187], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[60:67], s[40:43] ; F0900700 014FB916
image_sample_l v[188:189], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[76:83], s[88:91] ; F0900A00 02D3BC2C
v_readlane_b32 s16, v251, 39 ; 02214FFB
v_readlane_b32 s17, v251, 40 ; 022351FB
v_readlane_b32 s18, v251, 41 ; 022553FB
v_readlane_b32 s19, v251, 42 ; 022755FB
v_readlane_b32 s20, v251, 43 ; 022957FB
v_readlane_b32 s21, v251, 44 ; 022B59FB
v_readlane_b32 s22, v251, 45 ; 022D5BFB
v_readlane_b32 s23, v251, 46 ; 022F5DFB
s_nop 2 ; BF800002
image_sample_l v[190:191], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[16:23], s[56:59] ; F0900A00 01C4BE2C
s_waitcnt vmcnt(9) lgkmcnt(0) ; BF8C0079
image_sample_l v[192:193], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[48:55], s[84:87] ; F0900A00 02ACC02C
v_readlane_b32 s8, v251, 31 ; 02113FFB
v_readlane_b32 s9, v251, 32 ; 021341FB
v_readlane_b32 s10, v251, 33 ; 021543FB
v_readlane_b32 s11, v251, 34 ; 021745FB
v_readlane_b32 s12, v251, 35 ; 021947FB
v_readlane_b32 s13, v251, 36 ; 021B49FB
v_readlane_b32 s14, v251, 37 ; 021D4BFB
v_readlane_b32 s15, v251, 38 ; 021F4DFB
s_nop 2 ; BF800002
image_sample_l v[194:195], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[8:15], s[44:47] ; F0900A00 0162C22C
v_readlane_b32 s0, v251, 23 ; 02012FFB
v_readlane_b32 s1, v251, 24 ; 020331FB
v_readlane_b32 s2, v251, 25 ; 020533FB
v_readlane_b32 s3, v251, 26 ; 020735FB
v_readlane_b32 s4, v251, 27 ; 020937FB
v_readlane_b32 s5, v251, 28 ; 020B39FB
v_readlane_b32 s6, v251, 29 ; 020D3BFB
v_readlane_b32 s7, v251, 30 ; 020F3DFB
s_nop 2 ; BF800002
image_sample_l v[44:45], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[0:7], s[96:99] ; F0900A00 03002C2C
image_sample_l v[46:47], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[76:83], s[88:91] ; F0900A00 02D32E1C
image_sample_l v[196:197], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[16:23], s[56:59] ; F0900A00 01C4C41C
s_mov_b32 s28, s16 ; BE9C0310
s_mov_b32 s29, s17 ; BE9D0311
s_mov_b32 s30, s18 ; BE9E0312
s_mov_b32 s31, s19 ; BE9F0313
s_mov_b32 s32, s20 ; BEA00314
s_mov_b32 s33, s21 ; BEA10315
s_mov_b32 s34, s22 ; BEA20316
s_mov_b32 s35, s23 ; BEA30317
image_sample_l v[198:199], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[48:55], s[84:87] ; F0900A00 02ACC61C
image_sample_l v[200:201], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[8:15], s[44:47] ; F0900A00 0162C81C
s_mov_b32 s20, s8 ; BE940308
s_mov_b32 s21, s9 ; BE950309
s_mov_b32 s22, s10 ; BE96030A
s_mov_b32 s23, s11 ; BE97030B
s_mov_b32 s24, s12 ; BE98030C
s_mov_b32 s25, s13 ; BE99030D
s_mov_b32 s26, s14 ; BE9A030E
s_mov_b32 s27, s15 ; BE9B030F
image_sample_l v[28:29], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[0:7], s[96:99] ; F0900A00 03001C1C
s_mov_b32 s12, s0 ; BE8C0300
s_mov_b32 s13, s1 ; BE8D0301
s_mov_b32 s14, s2 ; BE8E0302
s_mov_b32 s15, s3 ; BE8F0303
s_mov_b32 s16, s4 ; BE900304
s_mov_b32 s17, s5 ; BE910305
s_mov_b32 s18, s6 ; BE920306
s_mov_b32 s19, s7 ; BE930307
image_sample_l v[202:203], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[76:83], s[88:91] ; F0900A00 02D3CA31
image_sample_l v[204:205], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[28:35], s[56:59] ; F0900A00 01C7CC31
image_sample_l v[206:207], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[48:55], s[84:87] ; F0900A00 02ACCE31
image_sample_l v[208:209], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[20:27], s[44:47] ; F0900A00 0165D031
image_sample_l v[48:49], 10, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[12:19], s[96:99] ; F0900A00 03033031
image_sample_l v[50:51], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[76:83], s[88:91] ; F0900A00 02D33234
image_sample_l v[210:211], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[28:35], s[56:59] ; F0900A00 01C7D234
image_sample_l v[212:213], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[48:55], s[84:87] ; F0900A00 02ACD434
image_sample_l v[214:215], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[20:27], s[44:47] ; F0900A00 0165D634
image_sample_l v[52:53], 10, 0, 0, 0, 0, 0, 0, 0, v[52:55], s[12:19], s[96:99] ; F0900A00 03033434
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[54:55], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[76:83], s[88:91] ; F0900A00 02D33619
image_sample_l v[216:217], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[28:35], s[56:59] ; F0900A00 01C7D819
image_sample_l v[218:219], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[48:55], s[84:87] ; F0900A00 02ACDA19
image_sample_l v[220:221], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[20:27], s[44:47] ; F0900A00 0165DC19
image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[12:19], s[96:99] ; F0900A00 03031919
image_sample_l v[222:223], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[76:83], s[88:91] ; F0900A00 02D3DE22
image_sample_l v[224:225], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[28:35], s[56:59] ; F0900A00 01C7E022
image_sample_l v[226:227], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[48:55], s[84:87] ; F0900A00 02ACE222
image_sample_l v[228:229], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[20:27], s[44:47] ; F0900A00 0165E422
image_sample_l v[34:35], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[12:19], s[96:99] ; F0900A00 03032222
image_sample_l v[230:231], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[76:83], s[88:91] ; F0900A00 02D3E625
image_sample_l v[232:233], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[28:35], s[56:59] ; F0900A00 01C7E825
image_sample_l v[234:235], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[48:55], s[84:87] ; F0900A00 02ACEA25
image_sample_l v[236:237], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[20:27], s[44:47] ; F0900A00 0165EC25
image_sample_l v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[12:19], s[96:99] ; F0900A00 03032425
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[38:39], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[76:83], s[88:91] ; F0900A00 02D32616
image_sample_l v[238:239], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[28:35], s[56:59] ; F0900A00 01C7EE16
image_sample_l v[240:241], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[48:55], s[84:87] ; F0900A00 02ACF016
image_sample_l v[242:243], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[20:27], s[44:47] ; F0900A00 0165F216
image_sample_l v[22:23], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[12:19], s[96:99] ; F0900A00 03031616
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[244:245], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[76:83], s[88:91] ; F0900A00 02D3F41F
image_sample_l v[246:247], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[28:35], s[56:59] ; F0900A00 01C7F61F
image_sample_l v[248:249], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[48:55], s[84:87] ; F0900A00 02ACF81F
v_cndmask_b32_e64 v24, 0, 1.0, vcc ; D2000018 01A9E480
v_mov_b32_e32 v27, 0x40400000 ; 7E3602FF 40400000
v_cmp_eq_f32_e64 s[4:5], v15, v27 ; D0040004 0002370F
v_cndmask_b32_e64 v15, 0, 1.0, s[4:5] ; D200000F 0011E480
v_readlane_b32 s0, v251, 17 ; 020123FB
v_readlane_b32 s1, v251, 18 ; 020325FB
s_nop 2 ; BF800002
v_cndmask_b32_e64 v30, 0, 1.0, s[0:1] ; D200001E 0001E480
v_readlane_b32 s0, v251, 13 ; 02011BFB
v_readlane_b32 s1, v251, 14 ; 02031DFB
s_nop 2 ; BF800002
v_cndmask_b32_e64 v40, 0, 1.0, s[0:1] ; D2000028 0001E480
v_readlane_b32 s0, v251, 15 ; 02011FFB
v_readlane_b32 s1, v251, 16 ; 020321FB
s_nop 2 ; BF800002
v_cndmask_b32_e64 v250, 0, 1.0, s[0:1] ; D20000FA 0001E480
v_mul_f32_e32 v65, v250, v65 ; 108283FA
v_mul_f32_e32 v66, v250, v66 ; 108485FA
v_mul_f32_e32 v67, v250, v67 ; 108687FA
v_mac_f32_e32 v65, v40, v62 ; 3E827D28
v_mac_f32_e32 v66, v40, v63 ; 3E847F28
v_mac_f32_e32 v67, v40, v64 ; 3E868128
v_mac_f32_e32 v65, v30, v59 ; 3E82771E
v_mac_f32_e32 v66, v30, v60 ; 3E84791E
v_mac_f32_e32 v67, v30, v61 ; 3E867B1E
v_mac_f32_e32 v65, v15, v56 ; 3E82710F
v_mac_f32_e32 v66, v15, v57 ; 3E84730F
v_mac_f32_e32 v67, v15, v58 ; 3E86750F
v_mac_f32_e32 v65, v24, v41 ; 3E825318
v_mac_f32_e32 v66, v24, v42 ; 3E845518
v_mac_f32_e32 v67, v24, v43 ; 3E865718
v_mul_f32_e32 v41, v250, v80 ; 1052A1FA
v_mul_f32_e32 v42, v250, v81 ; 1054A3FA
v_mul_f32_e32 v43, v250, v82 ; 1056A5FA
v_mac_f32_e32 v41, v40, v77 ; 3E529B28
v_mac_f32_e32 v42, v40, v78 ; 3E549D28
v_mac_f32_e32 v43, v40, v79 ; 3E569F28
v_mac_f32_e32 v41, v30, v74 ; 3E52951E
v_mac_f32_e32 v42, v30, v75 ; 3E54971E
v_mac_f32_e32 v43, v30, v76 ; 3E56991E
v_mac_f32_e32 v41, v15, v71 ; 3E528F0F
v_mac_f32_e32 v42, v15, v72 ; 3E54910F
v_mac_f32_e32 v43, v15, v73 ; 3E56930F
v_mac_f32_e32 v41, v24, v68 ; 3E528918
v_mac_f32_e32 v42, v24, v69 ; 3E548B18
v_mac_f32_e32 v43, v24, v70 ; 3E568D18
v_mul_f32_e32 v56, v250, v95 ; 1070BFFA
v_mul_f32_e32 v57, v250, v96 ; 1072C1FA
v_mul_f32_e32 v58, v250, v97 ; 1074C3FA
v_mac_f32_e32 v56, v40, v92 ; 3E70B928
v_mac_f32_e32 v57, v40, v93 ; 3E72BB28
v_mac_f32_e32 v58, v40, v94 ; 3E74BD28
v_mac_f32_e32 v56, v30, v89 ; 3E70B31E
v_mac_f32_e32 v57, v30, v90 ; 3E72B51E
v_mac_f32_e32 v58, v30, v91 ; 3E74B71E
v_mac_f32_e32 v56, v15, v86 ; 3E70AD0F
v_mac_f32_e32 v57, v15, v87 ; 3E72AF0F
v_mac_f32_e32 v58, v15, v88 ; 3E74B10F
v_mac_f32_e32 v56, v24, v83 ; 3E70A718
v_mac_f32_e32 v57, v24, v84 ; 3E72A918
v_mac_f32_e32 v58, v24, v85 ; 3E74AB18
v_readlane_b32 s0, v251, 11 ; 020117FB
v_readlane_b32 s1, v251, 12 ; 020319FB
s_nop 2 ; BF800002
v_cndmask_b32_e64 v59, 0, 1.0, s[0:1] ; D200003B 0001E480
v_mul_f32_e32 v60, v59, v110 ; 1078DD3B
v_mul_f32_e32 v61, v59, v111 ; 107ADF3B
v_mul_f32_e32 v62, v59, v112 ; 107CE13B
v_readlane_b32 s0, v251, 9 ; 020113FB
v_readlane_b32 s1, v251, 10 ; 020315FB
s_nop 2 ; BF800002
v_cndmask_b32_e64 v63, 0, 1.0, s[0:1] ; D200003F 0001E480
v_mac_f32_e32 v60, v63, v107 ; 3E78D73F
v_mac_f32_e32 v61, v63, v108 ; 3E7AD93F
v_mac_f32_e32 v62, v63, v109 ; 3E7CDB3F
v_readlane_b32 s0, v251, 7 ; 02010FFB
v_readlane_b32 s1, v251, 8 ; 020311FB
s_nop 2 ; BF800002
v_cndmask_b32_e64 v64, 0, 1.0, s[0:1] ; D2000040 0001E480
v_mac_f32_e32 v60, v64, v104 ; 3E78D140
v_mac_f32_e32 v61, v64, v105 ; 3E7AD340
v_mac_f32_e32 v62, v64, v106 ; 3E7CD540
v_cmp_eq_f32_e64 s[2:3], v16, v27 ; D0040002 00023710
v_cndmask_b32_e64 v16, 0, 1.0, s[2:3] ; D2000010 0009E480
v_mac_f32_e32 v60, v16, v101 ; 3E78CB10
v_mac_f32_e32 v61, v16, v102 ; 3E7ACD10
v_mac_f32_e32 v62, v16, v103 ; 3E7CCF10
v_readlane_b32 s0, v251, 5 ; 02010BFB
v_readlane_b32 s1, v251, 6 ; 02030DFB
s_nop 2 ; BF800002
v_cndmask_b32_e64 v68, 0, 1.0, s[0:1] ; D2000044 0001E480
v_mac_f32_e32 v60, v68, v98 ; 3E78C544
v_mac_f32_e32 v61, v68, v99 ; 3E7AC744
v_mac_f32_e32 v62, v68, v100 ; 3E7CC944
v_mul_f32_e32 v69, v59, v125 ; 108AFB3B
v_mul_f32_e32 v70, v59, v126 ; 108CFD3B
v_mul_f32_e32 v71, v59, v127 ; 108EFF3B
v_mac_f32_e32 v69, v63, v122 ; 3E8AF53F
v_mac_f32_e32 v70, v63, v123 ; 3E8CF73F
v_mac_f32_e32 v71, v63, v124 ; 3E8EF93F
v_mac_f32_e32 v69, v64, v119 ; 3E8AEF40
v_mac_f32_e32 v70, v64, v120 ; 3E8CF140
v_mac_f32_e32 v71, v64, v121 ; 3E8EF340
v_mac_f32_e32 v69, v16, v116 ; 3E8AE910
v_mac_f32_e32 v70, v16, v117 ; 3E8CEB10
v_mac_f32_e32 v71, v16, v118 ; 3E8EED10
v_mac_f32_e32 v69, v68, v113 ; 3E8AE344
v_mac_f32_e32 v70, v68, v114 ; 3E8CE544
v_mac_f32_e32 v71, v68, v115 ; 3E8EE744
v_mul_f32_e32 v72, v59, v140 ; 1091193B
v_mul_f32_e32 v73, v59, v141 ; 10931B3B
v_mul_f32_e32 v74, v59, v142 ; 10951D3B
v_mac_f32_e32 v72, v63, v137 ; 3E91133F
v_mac_f32_e32 v73, v63, v138 ; 3E93153F
v_mac_f32_e32 v74, v63, v139 ; 3E95173F
v_mac_f32_e32 v72, v64, v134 ; 3E910D40
v_mac_f32_e32 v73, v64, v135 ; 3E930F40
v_mac_f32_e32 v74, v64, v136 ; 3E951140
v_mac_f32_e32 v72, v16, v131 ; 3E910710
v_mac_f32_e32 v73, v16, v132 ; 3E930910
v_mac_f32_e32 v74, v16, v133 ; 3E950B10
v_mac_f32_e32 v72, v68, v128 ; 3E910144
v_mac_f32_e32 v73, v68, v129 ; 3E930344
v_mac_f32_e32 v74, v68, v130 ; 3E950544
v_cmp_eq_f32_e64 s[0:1], 0, v21 ; D0040000 00022A80
v_cndmask_b32_e64 v75, 0, 1.0, s[0:1] ; D200004B 0001E480
v_mul_f32_e32 v76, v75, v155 ; 1099374B
v_mul_f32_e32 v77, v75, v156 ; 109B394B
v_mul_f32_e32 v78, v75, v157 ; 109D3B4B
v_cmp_eq_f32_e64 s[0:1], 1.0, v21 ; D0040000 00022AF2
v_cndmask_b32_e64 v79, 0, 1.0, s[0:1] ; D200004F 0001E480
v_mac_f32_e32 v76, v79, v152 ; 3E99314F
v_mac_f32_e32 v77, v79, v153 ; 3E9B334F
v_mac_f32_e32 v78, v79, v154 ; 3E9D354F
v_cmp_eq_f32_e64 s[0:1], 2.0, v21 ; D0040000 00022AF4
v_cndmask_b32_e64 v80, 0, 1.0, s[0:1] ; D2000050 0001E480
v_mac_f32_e32 v76, v80, v149 ; 3E992B50
v_mac_f32_e32 v77, v80, v150 ; 3E9B2D50
v_mac_f32_e32 v78, v80, v151 ; 3E9D2F50
v_cmp_eq_f32_e64 s[0:1], v21, v27 ; D0040000 00023715
v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480
v_mac_f32_e32 v76, v21, v146 ; 3E992515
v_mac_f32_e32 v77, v21, v147 ; 3E9B2715
v_mac_f32_e32 v78, v21, v148 ; 3E9D2915
v_readlane_b32 s0, v251, 3 ; 020107FB
v_readlane_b32 s1, v251, 4 ; 020309FB
s_nop 2 ; BF800002
v_cndmask_b32_e64 v81, 0, 1.0, s[0:1] ; D2000051 0001E480
v_mac_f32_e32 v76, v81, v143 ; 3E991F51
v_mac_f32_e32 v77, v81, v144 ; 3E9B2151
v_mac_f32_e32 v78, v81, v145 ; 3E9D2351
v_mul_f32_e32 v82, v75, v170 ; 10A5554B
v_mul_f32_e32 v83, v75, v171 ; 10A7574B
v_mul_f32_e32 v84, v75, v172 ; 10A9594B
v_mac_f32_e32 v82, v79, v167 ; 3EA54F4F
v_mac_f32_e32 v83, v79, v168 ; 3EA7514F
v_mac_f32_e32 v84, v79, v169 ; 3EA9534F
v_mac_f32_e32 v82, v80, v164 ; 3EA54950
v_mac_f32_e32 v83, v80, v165 ; 3EA74B50
v_mac_f32_e32 v84, v80, v166 ; 3EA94D50
v_mac_f32_e32 v82, v21, v161 ; 3EA54315
v_mac_f32_e32 v83, v21, v162 ; 3EA74515
v_mac_f32_e32 v84, v21, v163 ; 3EA94715
v_mac_f32_e32 v82, v81, v158 ; 3EA53D51
v_mac_f32_e32 v83, v81, v159 ; 3EA73F51
v_mac_f32_e32 v84, v81, v160 ; 3EA94151
v_mul_f32_e32 v85, v75, v185 ; 10AB734B
v_mul_f32_e32 v86, v75, v186 ; 10AD754B
v_mul_f32_e32 v87, v75, v187 ; 10AF774B
v_mac_f32_e32 v85, v79, v182 ; 3EAB6D4F
v_mac_f32_e32 v86, v79, v183 ; 3EAD6F4F
v_mac_f32_e32 v87, v79, v184 ; 3EAF714F
v_mac_f32_e32 v85, v80, v179 ; 3EAB6750
v_mac_f32_e32 v86, v80, v180 ; 3EAD6950
v_mac_f32_e32 v87, v80, v181 ; 3EAF6B50
v_mac_f32_e32 v85, v21, v176 ; 3EAB6115
v_mac_f32_e32 v86, v21, v177 ; 3EAD6315
v_mac_f32_e32 v87, v21, v178 ; 3EAF6515
v_mac_f32_e32 v85, v81, v173 ; 3EAB5B51
v_mac_f32_e32 v86, v81, v174 ; 3EAD5D51
v_mac_f32_e32 v87, v81, v175 ; 3EAF5F51
v_mul_f32_e32 v44, v250, v44 ; 105859FA
v_mul_f32_e32 v45, v250, v45 ; 105A5BFA
v_mac_f32_e32 v44, v40, v194 ; 3E598528
v_mac_f32_e32 v45, v40, v195 ; 3E5B8728
v_mul_f32_e32 v28, v250, v28 ; 103839FA
v_mul_f32_e32 v29, v250, v29 ; 103A3BFA
v_mac_f32_e32 v28, v40, v200 ; 3E399128
v_mac_f32_e32 v29, v40, v201 ; 3E3B9328
v_mul_f32_e32 v48, v250, v48 ; 106061FA
v_mul_f32_e32 v49, v250, v49 ; 106263FA
v_mac_f32_e32 v48, v40, v208 ; 3E61A128
v_mac_f32_e32 v49, v40, v209 ; 3E63A328
v_mac_f32_e32 v44, v30, v192 ; 3E59811E
v_mac_f32_e32 v45, v30, v193 ; 3E5B831E
v_mac_f32_e32 v28, v30, v198 ; 3E398D1E
v_mac_f32_e32 v29, v30, v199 ; 3E3B8F1E
v_mac_f32_e32 v48, v30, v206 ; 3E619D1E
v_mac_f32_e32 v49, v30, v207 ; 3E639F1E
v_mac_f32_e32 v44, v15, v190 ; 3E597D0F
v_mac_f32_e32 v45, v15, v191 ; 3E5B7F0F
v_mac_f32_e32 v28, v15, v196 ; 3E39890F
v_mac_f32_e32 v29, v15, v197 ; 3E3B8B0F
v_mac_f32_e32 v48, v15, v204 ; 3E61990F
v_mac_f32_e32 v49, v15, v205 ; 3E639B0F
v_mac_f32_e32 v44, v24, v188 ; 3E597918
v_mac_f32_e32 v45, v24, v189 ; 3E5B7B18
v_mac_f32_e32 v28, v24, v46 ; 3E385D18
v_mac_f32_e32 v29, v24, v47 ; 3E3A5F18
v_mac_f32_e32 v48, v24, v202 ; 3E619518
v_mac_f32_e32 v49, v24, v203 ; 3E639718
v_mul_f32_e32 v15, v59, v52 ; 101E693B
v_mul_f32_e32 v24, v59, v53 ; 10306B3B
v_mac_f32_e32 v15, v63, v214 ; 3E1FAD3F
v_mac_f32_e32 v24, v63, v215 ; 3E31AF3F
v_mul_f32_e32 v25, v59, v25 ; 1032333B
v_mul_f32_e32 v26, v59, v26 ; 1034353B
v_mac_f32_e32 v25, v63, v220 ; 3E33B93F
v_mac_f32_e32 v26, v63, v221 ; 3E35BB3F
v_mul_f32_e32 v30, v59, v34 ; 103C453B
v_mul_f32_e32 v34, v59, v35 ; 1044473B
v_mac_f32_e32 v30, v63, v228 ; 3E3DC93F
v_mac_f32_e32 v34, v63, v229 ; 3E45CB3F
v_mac_f32_e32 v15, v64, v212 ; 3E1FA940
v_mac_f32_e32 v24, v64, v213 ; 3E31AB40
v_mac_f32_e32 v25, v64, v218 ; 3E33B540
v_mac_f32_e32 v26, v64, v219 ; 3E35B740
v_mac_f32_e32 v30, v64, v226 ; 3E3DC540
v_mac_f32_e32 v34, v64, v227 ; 3E45C740
v_mac_f32_e32 v15, v16, v210 ; 3E1FA510
v_mac_f32_e32 v24, v16, v211 ; 3E31A710
v_mac_f32_e32 v25, v16, v216 ; 3E33B110
v_mac_f32_e32 v26, v16, v217 ; 3E35B310
v_mac_f32_e32 v30, v16, v224 ; 3E3DC110
v_mac_f32_e32 v34, v16, v225 ; 3E45C310
v_mac_f32_e32 v15, v68, v50 ; 3E1E6544
v_mac_f32_e32 v24, v68, v51 ; 3E306744
v_mac_f32_e32 v25, v68, v54 ; 3E326D44
v_mac_f32_e32 v26, v68, v55 ; 3E346F44
v_mac_f32_e32 v30, v68, v222 ; 3E3DBD44
v_mac_f32_e32 v34, v68, v223 ; 3E45BF44
s_waitcnt vmcnt(8) ; BF8C0778
v_mul_f32_e32 v16, v75, v36 ; 1020494B
v_mul_f32_e32 v35, v75, v37 ; 10464B4B
v_mac_f32_e32 v16, v79, v236 ; 3E21D94F
v_mac_f32_e32 v35, v79, v237 ; 3E47DB4F
s_waitcnt vmcnt(3) ; BF8C0773
v_mul_f32_e32 v22, v75, v22 ; 102C2D4B
v_mul_f32_e32 v23, v75, v23 ; 102E2F4B
v_mac_f32_e32 v22, v79, v242 ; 3E2DE54F
v_mac_f32_e32 v23, v79, v243 ; 3E2FE74F
image_sample_l v[36:37], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[20:27], s[44:47] ; F0900A00 0165241F
image_sample_l v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[12:19], s[96:99] ; F0900A00 03031F1F
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v31, v75, v31 ; 103E3F4B
v_mul_f32_e32 v32, v75, v32 ; 1040414B
v_mac_f32_e32 v31, v79, v36 ; 3E3E494F
v_mac_f32_e32 v32, v79, v37 ; 3E404B4F
v_mac_f32_e32 v16, v80, v234 ; 3E21D550
v_mac_f32_e32 v35, v80, v235 ; 3E47D750
v_mac_f32_e32 v22, v80, v240 ; 3E2DE150
v_mac_f32_e32 v23, v80, v241 ; 3E2FE350
v_mac_f32_e32 v31, v80, v248 ; 3E3FF150
v_mac_f32_e32 v32, v80, v249 ; 3E41F350
v_mac_f32_e32 v16, v21, v232 ; 3E21D115
v_mac_f32_e32 v35, v21, v233 ; 3E47D315
v_mac_f32_e32 v22, v21, v238 ; 3E2DDD15
v_mac_f32_e32 v23, v21, v239 ; 3E2FDF15
v_mac_f32_e32 v31, v21, v246 ; 3E3FED15
v_mac_f32_e32 v32, v21, v247 ; 3E41EF15
v_mac_f32_e32 v16, v81, v230 ; 3E21CD51
v_mac_f32_e32 v35, v81, v231 ; 3E47CF51
v_max_f32_e32 v18, v20, v18 ; 20242514
v_max_f32_e32 v19, v20, v19 ; 20262714
v_add_f32_e32 v20, v18, v17 ; 06282312
v_add_f32_e32 v20, v19, v20 ; 06282913
v_rcp_f32_e32 v20, v20 ; 7E285514
v_mac_f32_e32 v22, v81, v38 ; 3E2C4D51
v_mac_f32_e32 v23, v81, v39 ; 3E2E4F51
v_mac_f32_e32 v31, v81, v244 ; 3E3FE951
v_mac_f32_e32 v32, v81, v245 ; 3E41EB51
v_mul_f32_e32 v17, v20, v17 ; 10222314
v_mul_f32_e32 v18, v20, v18 ; 10242514
v_mul_f32_e32 v19, v20, v19 ; 10262714
v_mul_f32_e32 v20, v19, v60 ; 10287913
v_mac_f32_e32 v20, v17, v69 ; 3E288B11
v_mul_f32_e32 v21, v19, v61 ; 102A7B13
v_mac_f32_e32 v21, v17, v70 ; 3E2A8D11
v_mul_f32_e32 v33, v19, v62 ; 10427D13
v_mac_f32_e32 v33, v17, v71 ; 3E428F11
v_mac_f32_e32 v20, v18, v72 ; 3E289112
v_mac_f32_e32 v21, v18, v73 ; 3E2A9312
v_mac_f32_e32 v33, v18, v74 ; 3E429512
v_mul_f32_e32 v36, v19, v65 ; 10488313
v_mac_f32_e32 v36, v17, v41 ; 3E485311
v_mul_f32_e32 v37, v19, v66 ; 104A8513
v_mac_f32_e32 v37, v17, v42 ; 3E4A5511
v_mul_f32_e32 v38, v19, v67 ; 104C8713
v_mac_f32_e32 v38, v17, v43 ; 3E4C5711
v_mac_f32_e32 v36, v18, v56 ; 3E487112
v_mac_f32_e32 v37, v18, v57 ; 3E4A7312
v_mac_f32_e32 v38, v18, v58 ; 3E4C7512
v_mul_f32_e32 v36, v36, v7 ; 10480F24
v_mac_f32_e32 v36, v20, v8 ; 3E481114
v_mul_f32_e32 v20, v37, v7 ; 10280F25
v_mac_f32_e32 v20, v21, v8 ; 3E281115
v_mul_f32_e32 v21, v38, v7 ; 102A0F26
v_mac_f32_e32 v21, v33, v8 ; 3E2A1121
v_mad_f32 v33, 2.0, v44, -1.0 ; D2820021 03CE58F4
v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4
v_mul_f32_e32 v33, v17, v33 ; 10424311
v_mac_f32_e32 v33, v18, v29 ; 3E423B12
v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4
v_mad_f32 v29, 2.0, v49, -1.0 ; D282001D 03CE62F4
v_mul_f32_e32 v37, 0, v17 ; 104A2280
v_mad_f32 v28, v18, v28, v37 ; D282001C 04963912
v_mac_f32_e32 v28, v19, v29 ; 3E383B13
v_mad_f32 v29, 2.0, v45, -1.0 ; D282001D 03CE5AF4
v_mad_f32 v38, 2.0, v48, -1.0 ; D2820026 03CE60F4
v_mul_f32_e32 v29, v17, v29 ; 103A3B11
v_mac_f32_e32 v29, 0, v18 ; 3E3A2480
v_mac_f32_e32 v29, v19, v38 ; 3E3A4D13
v_mad_f32 v15, 2.0, v15, -1.0 ; D282000F 03CE1EF4
v_mad_f32 v26, 2.0, v26, -1.0 ; D282001A 03CE34F4
v_mul_f32_e32 v15, v17, v15 ; 101E1F11
v_mac_f32_e32 v15, v18, v26 ; 3E1E3512
v_mad_f32 v25, 2.0, v25, -1.0 ; D2820019 03CE32F4
v_mad_f32 v26, 2.0, v34, -1.0 ; D282001A 03CE44F4
v_mad_f32 v25, v18, v25, v37 ; D2820019 04963312
v_mac_f32_e32 v25, v19, v26 ; 3E323513
v_mad_f32 v24, 2.0, v24, -1.0 ; D2820018 03CE30F4
v_mad_f32 v26, 2.0, v30, -1.0 ; D282001A 03CE3CF4
v_mul_f32_e32 v24, v17, v24 ; 10303111
v_mac_f32_e32 v24, 0, v18 ; 3E302480
v_mac_f32_e32 v24, v19, v26 ; 3E303513
v_mac_f32_e32 v33, 0, v19 ; 3E422680
v_mul_f32_e32 v26, v28, v7 ; 10340F1C
v_mul_f32_e32 v28, v29, v7 ; 10380F1D
v_mul_f32_e32 v7, v33, v7 ; 100E0F21
v_mac_f32_e32 v26, v25, v8 ; 3E341119
v_mac_f32_e32 v28, v24, v8 ; 3E381118
v_mac_f32_e32 v15, 0, v19 ; 3E1E2680
v_mac_f32_e32 v7, v15, v8 ; 3E0E110F
v_mul_f32_e32 v8, v19, v76 ; 10109913
v_mac_f32_e32 v8, v17, v82 ; 3E10A511
v_mul_f32_e32 v15, v19, v77 ; 101E9B13
v_mac_f32_e32 v15, v17, v83 ; 3E1EA711
v_mul_f32_e32 v24, v19, v78 ; 10309D13
v_mac_f32_e32 v24, v17, v84 ; 3E30A911
v_mac_f32_e32 v8, v18, v85 ; 3E10AB12
v_mac_f32_e32 v15, v18, v86 ; 3E1EAD12
v_mac_f32_e32 v24, v18, v87 ; 3E30AF12
v_mad_f32 v25, 2.0, v35, -1.0 ; D2820019 03CE46F4
v_mad_f32 v16, 2.0, v16, -1.0 ; D2820010 03CE20F4
v_mul_f32_e32 v25, v17, v25 ; 10323311
v_mul_f32_e32 v16, v17, v16 ; 10202111
v_mad_f32 v17, 2.0, v22, -1.0 ; D2820011 03CE2CF4
v_mac_f32_e32 v37, v18, v17 ; 3E4A2312
v_mad_f32 v17, 2.0, v23, -1.0 ; D2820011 03CE2EF4
v_mac_f32_e32 v16, v18, v17 ; 3E202312
v_mac_f32_e32 v25, 0, v18 ; 3E322480
v_mad_f32 v17, 2.0, v32, -1.0 ; D2820011 03CE40F4
v_mac_f32_e32 v37, v19, v17 ; 3E4A2313
v_mad_f32 v17, 2.0, v31, -1.0 ; D2820011 03CE3EF4
v_mac_f32_e32 v25, v19, v17 ; 3E322313
v_mac_f32_e32 v16, 0, v19 ; 3E202680
v_mac_f32_e32 v26, v37, v4 ; 3E340925
v_mac_f32_e32 v28, v25, v4 ; 3E380919
v_mac_f32_e32 v7, v16, v4 ; 3E0E0910
v_mul_f32_e32 v16, v28, v28 ; 1020391C
v_mac_f32_e32 v16, v26, v26 ; 3E20351A
v_mac_f32_e32 v16, v7, v7 ; 3E200F07
v_add_f32_e32 v16, 1.0, v16 ; 062020F2
v_rsq_clamp_f32_e32 v16, v16 ; 7E205910
v_mac_f32_e32 v36, v8, v4 ; 3E480908
v_mac_f32_e32 v20, v15, v4 ; 3E28090F
v_mac_f32_e32 v21, v24, v4 ; 3E2A0918
v_mul_f32_e32 v4, v16, v26 ; 10083510
v_mul_f32_e32 v8, v16, v28 ; 10103910
v_mul_f32_e32 v7, v16, v7 ; 100E0F10
v_mad_f32 v4, -v4, v3, v5 ; D2820004 24160704
v_mad_f32 v5, -v8, v3, v6 ; D2820005 241A0708
v_mad_f32 v3, -v7, v3, v9 ; D2820003 24260707
v_mul_f32_e32 v6, v4, v4 ; 100C0904
v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05
v_mac_f32_e32 v6, v3, v3 ; 3E0C0703
v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906
v_mul_f32_e32 v7, v13, v13 ; 100E1B0D
v_mac_f32_e32 v7, v14, v14 ; 3E0E1D0E
v_mac_f32_e32 v7, v12, v12 ; 3E0E190C
v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907
v_mul_f32_e32 v4, v6, v4 ; 10080906
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_mul_f32_e32 v3, v6, v3 ; 10060706
v_mul_f32_e32 v6, v7, v13 ; 100C1B07
v_mul_f32_e32 v8, v7, v14 ; 10101D07
v_mul_f32_e32 v7, v7, v12 ; 100E1907
v_mul_f32_e32 v6, v6, v4 ; 100C0906
v_mac_f32_e32 v6, v8, v5 ; 3E0C0B08
v_mac_f32_e32 v6, v7, v3 ; 3E0C0707
v_max_f32_e32 v6, 0x38d1b717, v6 ; 200C0CFF 38D1B717
v_log_f32_e32 v6, v6 ; 7E0C4F06
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mac_f32_e32 v4, v10, v5 ; 3E080B0A
v_mul_f32_e32 v5, 0x42000000, v2 ; 100A04FF 42000000
v_mul_legacy_f32_e32 v5, v5, v6 ; 0E0A0D05
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80
v_mac_f32_e32 v27, -2.0, v5 ; 3E360AF5
v_mul_f32_e32 v6, v27, v5 ; 100C0B1B
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_mul_f32_e32 v5, s100, v36 ; 100A4864
v_mac_f32_e32 v4, v1, v3 ; 3E080701
v_mul_f32_e32 v1, s101, v20 ; 10022865
v_readlane_b32 s1, v251, 19 ; 020327FB
s_nop 2 ; BF800002
v_mul_f32_e32 v3, s1, v21 ; 10062A01
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_readlane_b32 s0, v251, 0 ; 020101FB
s_nop 2 ; BF800002
v_add_f32_e64 v6, s0, s0 ; D2060006 00000000
v_readlane_b32 s0, v251, 1 ; 020103FB
s_nop 2 ; BF800002
v_add_f32_e64 v7, s0, s0 ; D2060007 00000000
v_readlane_b32 s0, v251, 2 ; 020105FB
s_nop 2 ; BF800002
v_add_f32_e64 v8, s0, s0 ; D2060008 00000000
v_readlane_b32 s0, v251, 20 ; 020129FB
s_nop 2 ; BF800002
v_max_f32_e32 v6, s0, v6 ; 200C0C00
v_readlane_b32 s0, v251, 21 ; 02012BFB
s_nop 2 ; BF800002
v_max_f32_e32 v7, s0, v7 ; 200E0E00
v_readlane_b32 s0, v251, 22 ; 02012DFB
s_nop 2 ; BF800002
v_max_f32_e32 v8, s0, v8 ; 20101000
v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2
v_min_f32_e32 v7, 1.0, v7 ; 1E0E0EF2
v_min_f32_e32 v8, 1.0, v8 ; 1E1010F2
v_mul_f32_e32 v6, v36, v6 ; 100C0D24
v_mul_f32_e32 v7, v20, v7 ; 100E0F14
v_mul_f32_e32 v8, v21, v8 ; 10101115
v_mac_f32_e32 v6, v4, v5 ; 3E0C0B04
v_mac_f32_e32 v7, v4, v1 ; 3E0E0304
v_mac_f32_e32 v8, v4, v3 ; 3E100704
v_mac_f32_e32 v6, s100, v2 ; 3E0C0464
v_mac_f32_e32 v7, s101, v2 ; 3E0E0465
v_mac_f32_e32 v8, s1, v2 ; 3E100401
v_mul_f32_e32 v1, 0.5, v6 ; 10020CF0
v_mul_f32_e32 v2, 0.5, v7 ; 10040EF0
v_mul_f32_e32 v3, 0.5, v8 ; 100610F0
v_mul_f32_e32 v1, v0, v1 ; 10020300
v_mul_f32_e32 v2, v0, v2 ; 10040500
v_mul_f32_e32 v0, v0, v3 ; 10000700
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 252
Code Size: 4776 bytes
LDS: 0 blocks
Scratch: 12288 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..7]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[4], IN[0].xxxx
1: MAD TEMP[0], CONST[5], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[7], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[0], IN[0].xxxx
5: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1].xyz, CONST[3], IN[0].wwww, TEMP[1]
8: MOV TEMP[1].yzw, TEMP[1].yxyz
9: MOV TEMP[1].x, TEMP[0].zzzz
10: MOV OUT[1], TEMP[0]
11: MOV OUT[2], TEMP[1]
12: MOV OUT[0], TEMP[0]
13: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = extractelement <4 x float> %44, i32 2
%48 = extractelement <4 x float> %44, i32 3
%49 = fmul float %25, %45
%50 = fmul float %26, %45
%51 = fmul float %27, %45
%52 = fmul float %28, %45
%53 = fmul float %29, %46
%54 = fadd float %53, %49
%55 = fmul float %30, %46
%56 = fadd float %55, %50
%57 = fmul float %31, %46
%58 = fadd float %57, %51
%59 = fmul float %32, %46
%60 = fadd float %59, %52
%61 = fmul float %33, %47
%62 = fadd float %61, %54
%63 = fmul float %34, %47
%64 = fadd float %63, %56
%65 = fmul float %35, %47
%66 = fadd float %65, %58
%67 = fmul float %36, %47
%68 = fadd float %67, %60
%69 = fmul float %37, %48
%70 = fadd float %69, %62
%71 = fmul float %38, %48
%72 = fadd float %71, %64
%73 = fmul float %39, %48
%74 = fadd float %73, %66
%75 = fmul float %40, %48
%76 = fadd float %75, %68
%77 = fmul float %13, %45
%78 = fmul float %14, %45
%79 = fmul float %15, %45
%80 = fmul float %16, %46
%81 = fadd float %80, %77
%82 = fmul float %17, %46
%83 = fadd float %82, %78
%84 = fmul float %18, %46
%85 = fadd float %84, %79
%86 = fmul float %19, %47
%87 = fadd float %86, %81
%88 = fmul float %20, %47
%89 = fadd float %88, %83
%90 = fmul float %21, %47
%91 = fadd float %90, %85
%92 = fmul float %22, %48
%93 = fadd float %92, %87
%94 = fmul float %23, %48
%95 = fadd float %94, %89
%96 = fmul float %24, %48
%97 = fadd float %96, %91
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %70, float %72, float %74, float %76)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %74, float %93, float %95, float %97)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s12, s[0:3], 0xa ; C206010A
s_buffer_load_dword s13, s[0:3], 0xc ; C206810C
s_buffer_load_dword s14, s[0:3], 0xd ; C207010D
s_buffer_load_dword s15, s[0:3], 0xe ; C207810E
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113
s_buffer_load_dword s20, s[0:3], 0x14 ; C20A0114
s_buffer_load_dword s21, s[0:3], 0x15 ; C20A8115
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117
s_buffer_load_dword s24, s[0:3], 0x18 ; C20C0118
s_buffer_load_dword s25, s[0:3], 0x19 ; C20C8119
s_buffer_load_dword s26, s[0:3], 0x1a ; C20D011A
s_buffer_load_dword s27, s[0:3], 0x1b ; C20D811B
s_buffer_load_dword s28, s[0:3], 0x1c ; C20E011C
s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D
s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mac_f32_e32 v4, s20, v1 ; 3E080214
v_mul_f32_e32 v5, s17, v0 ; 100A0011
v_mac_f32_e32 v5, s21, v1 ; 3E0A0215
v_mul_f32_e32 v6, s18, v0 ; 100C0012
v_mac_f32_e32 v6, s22, v1 ; 3E0C0216
v_mul_f32_e32 v7, s19, v0 ; 100E0013
v_mac_f32_e32 v7, s23, v1 ; 3E0E0217
v_mul_f32_e32 v8, s4, v0 ; 10100004
v_mac_f32_e32 v8, s7, v1 ; 3E100207
v_mul_f32_e32 v9, s5, v0 ; 10120005
v_mac_f32_e32 v9, s8, v1 ; 3E120208
v_mul_f32_e32 v0, s6, v0 ; 10000006
v_mac_f32_e32 v0, s9, v1 ; 3E000209
v_mac_f32_e32 v4, s24, v2 ; 3E080418
v_mac_f32_e32 v5, s25, v2 ; 3E0A0419
v_mac_f32_e32 v6, s26, v2 ; 3E0C041A
v_mac_f32_e32 v7, s27, v2 ; 3E0E041B
v_mac_f32_e32 v8, s10, v2 ; 3E10040A
v_mac_f32_e32 v9, s11, v2 ; 3E12040B
v_mac_f32_e32 v0, s12, v2 ; 3E00040C
v_mac_f32_e32 v4, s28, v3 ; 3E08061C
v_mac_f32_e32 v5, s29, v3 ; 3E0A061D
v_mac_f32_e32 v6, s30, v3 ; 3E0C061E
v_mac_f32_e32 v7, s0, v3 ; 3E0E0600
v_mac_f32_e32 v8, s13, v3 ; 3E10060D
v_mac_f32_e32 v9, s14, v3 ; 3E12060E
v_mac_f32_e32 v0, s15, v3 ; 3E00060F
exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504
exp 15, 33, 0, 0, 0, v6, v8, v9, v0 ; F800021F 00090806
exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 284 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[6]
DCL CONST[9..13]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.5000, 2.0000, 1.0000, 0.0000}
IMM[1] FLT32 { -1.0000, -0.5000, 8.0000, 0.8000}
IMM[2] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0].xyw, IN[0], IMM[0].xxxx
1: MOV TEMP[1].x, TEMP[0].xxxx
2: MUL TEMP[2].x, TEMP[0].yyyy, CONST[2].xxxx
3: MOV TEMP[1].y, TEMP[2].xxxx
4: ADD TEMP[0].xy, TEMP[1].xyyy, TEMP[0].wwww
5: MUL TEMP[1].x, CONST[9].xxxx, IMM[0].yyyy
6: MUL TEMP[2].xy, IN[1].ywww, TEMP[1].xxxx
7: MUL TEMP[3].x, IMM[0].xxxx, CONST[0].xxxx
8: MUL TEMP[4].x, IMM[0].yyyy, TEMP[3].xxxx
9: ADD TEMP[5].x, TEMP[2].xxxx, IMM[0].xxxx
10: MOV TEMP[5].y, TEMP[2].yyyy
11: MOV TEMP[6].x, -TEMP[2].xxxx
12: MOV TEMP[6].y, TEMP[2].yyyy
13: MOV TEMP[7].x, TEMP[2].xxxx
14: ADD TEMP[8].x, TEMP[2].yyyy, IMM[0].xxxx
15: MOV TEMP[7].y, TEMP[8].xxxx
16: MOV TEMP[8].x, TEMP[2].xxxx
17: MOV TEMP[8].y, -TEMP[2].yyyy
18: MAD TEMP[2].xy, IMM[0].zwww, TEMP[4].xxxx, TEMP[5].xyyy
19: MOV TEMP[2].xy, TEMP[2].xyyy
20: TEX TEMP[2], TEMP[2], SAMP[1], 2D
21: MAD TEMP[5].xy, IMM[0].zwww, TEMP[4].xxxx, TEMP[6].xyyy
22: MOV TEMP[5].xy, TEMP[5].xyyy
23: TEX TEMP[5], TEMP[5], SAMP[1], 2D
24: ADD TEMP[2], TEMP[2], TEMP[5]
25: MAD TEMP[5].xy, IMM[0].wzzz, TEMP[4].xxxx, TEMP[7].xyyy
26: MOV TEMP[5].xy, TEMP[5].xyyy
27: TEX TEMP[5], TEMP[5], SAMP[1], 2D
28: MAD TEMP[4].xy, IMM[0].wzzz, TEMP[4].xxxx, TEMP[8].xyyy
29: MOV TEMP[4].xy, TEMP[4].xyyy
30: TEX TEMP[4], TEMP[4], SAMP[1], 2D
31: ADD TEMP[4], TEMP[5], TEMP[4]
32: ADD TEMP[2].xy, TEMP[2], TEMP[4]
33: MAD TEMP[4].xy, TEMP[2].xyyy, IMM[0].xxxx, IMM[1].xxxx
34: MUL TEMP[2].xy, TEMP[4].xyyy, IMM[0].yyyy
35: MOV TEMP[2].z, IMM[0].zzzz
36: MUL TEMP[1].xy, IN[1].ywww, TEMP[1].xxxx
37: MUL TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx
38: ADD TEMP[4].x, TEMP[1].xxxx, IMM[0].xxxx
39: MOV TEMP[4].y, TEMP[1].yyyy
40: MOV TEMP[5].x, -TEMP[1].xxxx
41: MOV TEMP[5].y, TEMP[1].yyyy
42: MOV TEMP[6].x, TEMP[1].xxxx
43: ADD TEMP[7].x, TEMP[1].yyyy, IMM[0].xxxx
44: MOV TEMP[6].y, TEMP[7].xxxx
45: MOV TEMP[7].x, TEMP[1].xxxx
46: MOV TEMP[7].y, -TEMP[1].yyyy
47: MAD TEMP[1].xy, IMM[0].zwww, TEMP[3].xxxx, TEMP[4].xyyy
48: MOV TEMP[1].xy, TEMP[1].xyyy
49: TEX TEMP[1], TEMP[1], SAMP[1], 2D
50: MAD TEMP[4].xy, IMM[0].zwww, TEMP[3].xxxx, TEMP[5].xyyy
51: MOV TEMP[4].xy, TEMP[4].xyyy
52: TEX TEMP[4], TEMP[4], SAMP[1], 2D
53: ADD TEMP[1], TEMP[1], TEMP[4]
54: MAD TEMP[4].xy, IMM[0].wzzz, TEMP[3].xxxx, TEMP[6].xyyy
55: MOV TEMP[4].xy, TEMP[4].xyyy
56: TEX TEMP[4], TEMP[4], SAMP[1], 2D
57: MAD TEMP[3].xy, IMM[0].wzzz, TEMP[3].xxxx, TEMP[7].xyyy
58: MOV TEMP[3].xy, TEMP[3].xyyy
59: TEX TEMP[3], TEMP[3], SAMP[1], 2D
60: ADD TEMP[3], TEMP[4], TEMP[3]
61: ADD TEMP[1].xy, TEMP[1], TEMP[3]
62: MAD TEMP[3].xy, TEMP[1].xyyy, IMM[0].xxxx, IMM[1].xxxx
63: MUL TEMP[1].xy, TEMP[3].xyyy, IMM[0].yyyy
64: MOV TEMP[1].z, IMM[0].zzzz
65: MAX TEMP[3].x, IN[0].wwww, IMM[0].wwww
66: MUL TEMP[4].x, CONST[12].xxxx, IMM[0].xxxx
67: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
68: POW TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx
69: MOV_SAT TEMP[3].x, TEMP[3].xxxx
70: MOV TEMP[4].zw, IN[0].wwzw
71: DP3 TEMP[5].x, TEMP[1].xyzz, TEMP[1].xyzz
72: RSQ TEMP[5].x, TEMP[5].xxxx
73: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xxxx
74: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
75: RSQ TEMP[5].x, TEMP[5].xxxx
76: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
77: MUL TEMP[5].x, IMM[0].yyyy, CONST[0].xxxx
78: FRC TEMP[5].x, TEMP[5].xxxx
79: LRP TEMP[1].xyz, TEMP[5].xxxx, TEMP[2].xzyy, TEMP[1].xzyy
80: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
81: RSQ TEMP[2].x, TEMP[2].xxxx
82: MOV TEMP[0].xy, TEMP[0].xyyy
83: MOV TEMP[0].w, IN[0].wwww
84: TXP TEMP[0].xyz, TEMP[0], SAMP[2], 2D
85: ADD TEMP[0].xyz, TEMP[0].xzyy, IMM[1].yxyy
86: MAD TEMP[0].xz, TEMP[1].xyzz, TEMP[2].xxxx, TEMP[0].xyzz
87: MUL TEMP[0].xy, TEMP[0].xzzz, CONST[11].xxxx
88: MUL TEMP[1].xy, CONST[6].xyyy, IMM[1].zzzz
89: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xyyy
90: MOV_SAT TEMP[1].x, TEMP[3].xxxx
91: MUL TEMP[0].xy, TEMP[0].xyyy, TEMP[1].xxxx
92: POW TEMP[1].x, IN[0].zzzz, IMM[1].wwww
93: MAD TEMP[4].xy, TEMP[0].xyyy, TEMP[1].xxxx, IN[0].xyyy
94: MUL TEMP[0].xyw, TEMP[4], IMM[0].xxxx
95: ADD TEMP[0].xy, TEMP[0].xyyy, TEMP[0].wwww
96: ADD TEMP[1].xyz, IN[1].yzww, -CONST[1].xyzz
97: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
98: RSQ TEMP[2].x, TEMP[2].xxxx
99: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
100: MOV_SAT TEMP[2].x, TEMP[1].yyyy
101: POW TEMP[2].x, TEMP[2].xxxx, CONST[12].zzzz
102: MOV TEMP[0].xy, TEMP[0].xyyy
103: MOV TEMP[0].w, IN[0].wwww
104: TXP TEMP[0].xyz, TEMP[0], SAMP[0], 2D
105: DP3 TEMP[1].x, TEMP[1].xyzz, -CONST[13].xyzz
106: MOV_SAT TEMP[1].x, TEMP[1].xxxx
107: MUL TEMP[4].x, CONST[12].zzzz, IMM[2].xxxx
108: POW TEMP[1].x, TEMP[1].xxxx, TEMP[4].xxxx
109: MAD TEMP[1].x, TEMP[1].xxxx, CONST[12].zzzz, IMM[0].zzzz
110: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
111: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
112: ADD TEMP[1].x, TEMP[3].xxxx, CONST[12].yyyy
113: MOV_SAT TEMP[1].x, TEMP[1].xxxx
114: LRP TEMP[0].xyz, TEMP[1].xxxx, CONST[10].xyzz, TEMP[0].xyzz
115: ADD TEMP[1].x, IMM[0].zzzz, -TEMP[2].xxxx
116: MOV_SAT TEMP[1].x, TEMP[1].xxxx
117: LRP TEMP[0].xyz, TEMP[1].xxxx, CONST[10].xyzz, TEMP[0].xyzz
118: MUL TEMP[1].x, IN[0].wwww, IMM[0].yyyy
119: MOV_SAT TEMP[1].x, TEMP[1].xxxx
120: MOV TEMP[1].w, TEMP[1].xxxx
121: MAD TEMP[2].x, IN[1].xxxx, CONST[4].zzzz, CONST[4].wwww
122: MOV_SAT TEMP[2].x, TEMP[2].xxxx
123: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[3].xyzz
124: MOV OUT[0], TEMP[1]
125: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%47 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0
%49 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0
%51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%52 = load <8 x i32>, <8 x i32> addrspace(2)* %51, align 32, !tbaa !0
%53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%54 = load <4 x i32>, <4 x i32> addrspace(2)* %53, align 16, !tbaa !0
%55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%56 = bitcast <8 x i32> addrspace(2)* %55 to <32 x i8> addrspace(2)*
%57 = load <32 x i8>, <32 x i8> addrspace(2)* %56, align 32, !tbaa !0
%58 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%59 = bitcast <4 x i32> addrspace(2)* %58 to <16 x i8> addrspace(2)*
%60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0
%61 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%68 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%69 = fmul float %61, 5.000000e-01
%70 = fmul float %62, 5.000000e-01
%71 = fmul float %64, 5.000000e-01
%72 = fmul float %70, %28
%73 = fadd float %69, %71
%74 = fadd float %72, %71
%75 = fmul float %36, 2.000000e+00
%76 = fmul float %66, %75
%77 = fmul float %68, %75
%78 = fmul float %24, 5.000000e-01
%79 = fmul float %78, 2.000000e+00
%80 = fadd float %76, 5.000000e-01
%81 = fadd float %77, 5.000000e-01
%82 = fadd float %79, %80
%83 = fmul float %79, 0.000000e+00
%84 = fadd float %83, %77
%85 = bitcast float %82 to i32
%86 = bitcast float %84 to i32
%87 = insertelement <2 x i32> undef, i32 %85, i32 0
%88 = insertelement <2 x i32> %87, i32 %86, i32 1
%89 = bitcast <8 x i32> %52 to <32 x i8>
%90 = bitcast <4 x i32> %54 to <16 x i8>
%91 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %88, <32 x i8> %89, <16 x i8> %90, i32 2)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = fsub float %79, %76
%95 = fmul float %79, 0.000000e+00
%96 = fadd float %95, %77
%97 = bitcast float %94 to i32
%98 = bitcast float %96 to i32
%99 = insertelement <2 x i32> undef, i32 %97, i32 0
%100 = insertelement <2 x i32> %99, i32 %98, i32 1
%101 = bitcast <8 x i32> %52 to <32 x i8>
%102 = bitcast <4 x i32> %54 to <16 x i8>
%103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %100, <32 x i8> %101, <16 x i8> %102, i32 2)
%104 = extractelement <4 x float> %103, i32 0
%105 = extractelement <4 x float> %103, i32 1
%106 = fadd float %92, %104
%107 = fadd float %93, %105
%108 = fmul float %79, 0.000000e+00
%109 = fadd float %108, %76
%110 = fadd float %79, %81
%111 = bitcast float %109 to i32
%112 = bitcast float %110 to i32
%113 = insertelement <2 x i32> undef, i32 %111, i32 0
%114 = insertelement <2 x i32> %113, i32 %112, i32 1
%115 = bitcast <8 x i32> %52 to <32 x i8>
%116 = bitcast <4 x i32> %54 to <16 x i8>
%117 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %114, <32 x i8> %115, <16 x i8> %116, i32 2)
%118 = extractelement <4 x float> %117, i32 0
%119 = extractelement <4 x float> %117, i32 1
%120 = fmul float %79, 0.000000e+00
%121 = fadd float %120, %76
%122 = fsub float %79, %77
%123 = bitcast float %121 to i32
%124 = bitcast float %122 to i32
%125 = insertelement <2 x i32> undef, i32 %123, i32 0
%126 = insertelement <2 x i32> %125, i32 %124, i32 1
%127 = bitcast <8 x i32> %52 to <32 x i8>
%128 = bitcast <4 x i32> %54 to <16 x i8>
%129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2)
%130 = extractelement <4 x float> %129, i32 0
%131 = extractelement <4 x float> %129, i32 1
%132 = fadd float %118, %130
%133 = fadd float %119, %131
%134 = fadd float %106, %132
%135 = fadd float %107, %133
%136 = fmul float %134, 5.000000e-01
%137 = fadd float %136, -1.000000e+00
%138 = fmul float %135, 5.000000e-01
%139 = fadd float %138, -1.000000e+00
%140 = fmul float %137, 2.000000e+00
%141 = fmul float %139, 2.000000e+00
%142 = fmul float %66, %75
%143 = fmul float %68, %75
%144 = fmul float %78, 2.000000e+00
%145 = fadd float %142, 5.000000e-01
%146 = fadd float %143, 5.000000e-01
%147 = fadd float %144, %145
%148 = fmul float %144, 0.000000e+00
%149 = fadd float %148, %143
%150 = bitcast float %147 to i32
%151 = bitcast float %149 to i32
%152 = insertelement <2 x i32> undef, i32 %150, i32 0
%153 = insertelement <2 x i32> %152, i32 %151, i32 1
%154 = bitcast <8 x i32> %52 to <32 x i8>
%155 = bitcast <4 x i32> %54 to <16 x i8>
%156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %154, <16 x i8> %155, i32 2)
%157 = extractelement <4 x float> %156, i32 0
%158 = extractelement <4 x float> %156, i32 1
%159 = fsub float %144, %142
%160 = fmul float %144, 0.000000e+00
%161 = fadd float %160, %143
%162 = bitcast float %159 to i32
%163 = bitcast float %161 to i32
%164 = insertelement <2 x i32> undef, i32 %162, i32 0
%165 = insertelement <2 x i32> %164, i32 %163, i32 1
%166 = bitcast <8 x i32> %52 to <32 x i8>
%167 = bitcast <4 x i32> %54 to <16 x i8>
%168 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %166, <16 x i8> %167, i32 2)
%169 = extractelement <4 x float> %168, i32 0
%170 = extractelement <4 x float> %168, i32 1
%171 = fadd float %157, %169
%172 = fadd float %158, %170
%173 = fmul float %144, 0.000000e+00
%174 = fadd float %173, %142
%175 = fadd float %144, %146
%176 = bitcast float %174 to i32
%177 = bitcast float %175 to i32
%178 = insertelement <2 x i32> undef, i32 %176, i32 0
%179 = insertelement <2 x i32> %178, i32 %177, i32 1
%180 = bitcast <8 x i32> %52 to <32 x i8>
%181 = bitcast <4 x i32> %54 to <16 x i8>
%182 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %179, <32 x i8> %180, <16 x i8> %181, i32 2)
%183 = extractelement <4 x float> %182, i32 0
%184 = extractelement <4 x float> %182, i32 1
%185 = fmul float %144, 0.000000e+00
%186 = fadd float %185, %142
%187 = fsub float %144, %143
%188 = bitcast float %186 to i32
%189 = bitcast float %187 to i32
%190 = insertelement <2 x i32> undef, i32 %188, i32 0
%191 = insertelement <2 x i32> %190, i32 %189, i32 1
%192 = bitcast <8 x i32> %52 to <32 x i8>
%193 = bitcast <4 x i32> %54 to <16 x i8>
%194 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %191, <32 x i8> %192, <16 x i8> %193, i32 2)
%195 = extractelement <4 x float> %194, i32 0
%196 = extractelement <4 x float> %194, i32 1
%197 = fadd float %183, %195
%198 = fadd float %184, %196
%199 = fadd float %171, %197
%200 = fadd float %172, %198
%201 = fmul float %199, 5.000000e-01
%202 = fadd float %201, -1.000000e+00
%203 = fmul float %200, 5.000000e-01
%204 = fadd float %203, -1.000000e+00
%205 = fmul float %202, 2.000000e+00
%206 = fmul float %204, 2.000000e+00
%207 = call float @llvm.maxnum.f32(float %64, float 0.000000e+00)
%208 = fmul float %41, 5.000000e-01
%209 = fmul float %207, %208
%sqrtf = call float @sqrtf(float %209) #1
%fabsf = call float @fabsf(float %sqrtf) #1
%210 = fcmp oeq float %209, 0xFFF0000000000000
%211 = select i1 %210, float 0x7FF0000000000000, float %fabsf
%212 = call float @llvm.AMDIL.clamp.(float %211, float 0.000000e+00, float 1.000000e+00)
%213 = fmul float %205, %205
%214 = fmul float %206, %206
%215 = fadd float %214, %213
%216 = fadd float %215, 1.000000e+00
%217 = call float @llvm.AMDGPU.rsq.clamped.f32(float %216)
%218 = fmul float %205, %217
%219 = fmul float %206, %217
%220 = fmul float %140, %140
%221 = fmul float %141, %141
%222 = fadd float %221, %220
%223 = fadd float %222, 1.000000e+00
%224 = call float @llvm.AMDGPU.rsq.clamped.f32(float %223)
%225 = fmul float %140, %224
%226 = fmul float %141, %224
%227 = fmul float %24, 2.000000e+00
%228 = call float @llvm.floor.f32(float %227)
%229 = fsub float %227, %228
%230 = call float @llvm.AMDGPU.lrp(float %229, float %225, float %218)
%231 = call float @llvm.AMDGPU.lrp(float %229, float %224, float %217)
%232 = call float @llvm.AMDGPU.lrp(float %229, float %226, float %219)
%233 = fmul float %230, %230
%234 = fmul float %231, %231
%235 = fadd float %234, %233
%236 = fmul float %232, %232
%237 = fadd float %235, %236
%238 = call float @llvm.AMDGPU.rsq.clamped.f32(float %237)
%239 = fdiv float %73, %64
%240 = fdiv float %74, %64
%241 = bitcast float %239 to i32
%242 = bitcast float %240 to i32
%243 = insertelement <2 x i32> undef, i32 %241, i32 0
%244 = insertelement <2 x i32> %243, i32 %242, i32 1
%245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %244, <32 x i8> %57, <16 x i8> %60, i32 2)
%246 = extractelement <4 x float> %245, i32 0
%247 = extractelement <4 x float> %245, i32 1
%248 = fadd float %246, -5.000000e-01
%249 = fadd float %247, -5.000000e-01
%250 = fmul float %230, %238
%251 = fadd float %250, %248
%252 = fmul float %232, %238
%253 = fadd float %252, %249
%254 = fmul float %251, %40
%255 = fmul float %253, %40
%256 = fmul float %34, 8.000000e+00
%257 = fmul float %35, 8.000000e+00
%258 = fmul float %254, %256
%259 = fmul float %255, %257
%260 = call float @llvm.AMDIL.clamp.(float %212, float 0.000000e+00, float 1.000000e+00)
%261 = fmul float %258, %260
%262 = fmul float %259, %260
%263 = call float @llvm.pow.f32(float %63, float 0x3FE99999A0000000)
%264 = fmul float %261, %263
%265 = fadd float %264, %61
%266 = fmul float %262, %263
%267 = fadd float %266, %62
%268 = fmul float %265, 5.000000e-01
%269 = fmul float %267, 5.000000e-01
%270 = fmul float %64, 5.000000e-01
%271 = fadd float %268, %270
%272 = fadd float %269, %270
%273 = fsub float %66, %25
%274 = fsub float %67, %26
%275 = fsub float %68, %27
%276 = fmul float %273, %273
%277 = fmul float %274, %274
%278 = fadd float %277, %276
%279 = fmul float %275, %275
%280 = fadd float %278, %279
%281 = call float @llvm.AMDGPU.rsq.clamped.f32(float %280)
%282 = fmul float %273, %281
%283 = fmul float %274, %281
%284 = fmul float %275, %281
%285 = call float @llvm.AMDIL.clamp.(float %283, float 0.000000e+00, float 1.000000e+00)
%286 = call float @llvm.pow.f32(float %285, float %43)
%287 = fdiv float %271, %64
%288 = fdiv float %272, %64
%289 = bitcast float %287 to i32
%290 = bitcast float %288 to i32
%291 = insertelement <2 x i32> undef, i32 %289, i32 0
%292 = insertelement <2 x i32> %291, i32 %290, i32 1
%293 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %292, <32 x i8> %48, <16 x i8> %50, i32 2)
%294 = extractelement <4 x float> %293, i32 0
%295 = extractelement <4 x float> %293, i32 1
%296 = extractelement <4 x float> %293, i32 2
%297 = fmul float %44, %282
%298 = fsub float -0.000000e+00, %297
%299 = fmul float %45, %283
%300 = fsub float %298, %299
%301 = fmul float %46, %284
%302 = fsub float %300, %301
%303 = call float @llvm.AMDIL.clamp.(float %302, float 0.000000e+00, float 1.000000e+00)
%304 = fmul float %43, 4.000000e+00
%305 = call float @llvm.pow.f32(float %303, float %304)
%306 = fmul float %305, %43
%307 = fadd float %306, 1.000000e+00
%308 = fadd float %307, %286
%309 = fmul float %294, %308
%310 = fmul float %295, %308
%311 = fmul float %296, %308
%312 = fadd float %212, %42
%313 = call float @llvm.AMDIL.clamp.(float %312, float 0.000000e+00, float 1.000000e+00)
%314 = call float @llvm.AMDGPU.lrp(float %313, float %37, float %309)
%315 = call float @llvm.AMDGPU.lrp(float %313, float %38, float %310)
%316 = call float @llvm.AMDGPU.lrp(float %313, float %39, float %311)
%317 = fsub float 1.000000e+00, %286
%318 = call float @llvm.AMDIL.clamp.(float %317, float 0.000000e+00, float 1.000000e+00)
%319 = call float @llvm.AMDGPU.lrp(float %318, float %37, float %314)
%320 = call float @llvm.AMDGPU.lrp(float %318, float %38, float %315)
%321 = call float @llvm.AMDGPU.lrp(float %318, float %39, float %316)
%322 = fmul float %64, 2.000000e+00
%323 = call float @llvm.AMDIL.clamp.(float %322, float 0.000000e+00, float 1.000000e+00)
%324 = fmul float %65, %32
%325 = fadd float %324, %33
%326 = call float @llvm.AMDIL.clamp.(float %325, float 0.000000e+00, float 1.000000e+00)
%327 = call float @llvm.AMDGPU.lrp(float %326, float %319, float %29)
%328 = call float @llvm.AMDGPU.lrp(float %326, float %320, float %30)
%329 = call float @llvm.AMDGPU.lrp(float %326, float %321, float %31)
%330 = call i32 @llvm.SI.packf16(float %327, float %328)
%331 = bitcast i32 %330 to float
%332 = call i32 @llvm.SI.packf16(float %329, float %323)
%333 = bitcast i32 %332 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %331, float %333, float %331, float %333)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare float @sqrtf(float)
declare float @fabsf(float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
s_buffer_load_dword s3, s[8:11], 0x4 ; C2018904
s_buffer_load_dword s36, s[8:11], 0x5 ; C2120905
s_buffer_load_dword s37, s[8:11], 0x6 ; C2128906
s_buffer_load_dword s38, s[8:11], 0x8 ; C2130908
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700
v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e64 v1, 0.5, s2 ; D2100001 000004F0
v_mad_f32 v9, 0.5, s2, v1 ; D2820009 040404F0
s_buffer_load_dword s24, s[8:11], 0x24 ; C20C0924
s_buffer_load_dword s1, s[8:11], 0x28 ; C2008928
s_buffer_load_dword s0, s[8:11], 0x29 ; C2000929
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708
s_waitcnt lgkmcnt(0) ; BF8C007F
v_add_f32_e64 v10, s24, s24 ; D206000A 00003018
v_mul_f32_e32 v12, v10, v0 ; 1018010A
v_mad_f32 v11, v7, v10, 0.5 ; D282000B 03C21507
v_mul_f32_e32 v13, v10, v7 ; 101A0F0A
v_mad_f32 v14, v0, v10, 0.5 ; D282000E 03C21500
v_mac_f32_e32 v11, 2.0, v1 ; 3E1602F4
v_mac_f32_e32 v13, 0, v9 ; 3E1A1280
v_mac_f32_e32 v14, 2.0, v1 ; 3E1C02F4
v_mac_f32_e32 v12, 0, v9 ; 3E181280
s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508
s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710
image_sample v[15:16], 3, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800300 00640F0B
v_mad_f32 v11, -v7, v10, v9 ; D282000B 24261507
image_sample v[11:12], 3, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[16:23], s[12:15] ; F0800300 00640B0B
image_sample v[17:18], 3, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[16:23], s[12:15] ; F0800300 0064110D
v_mad_f32 v14, -v0, v10, v9 ; D282000E 24261500
image_sample v[9:10], 3, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[16:23], s[12:15] ; F0800300 0064090D
v_mov_b32_e32 v1, 0x6f800000 ; 7E0202FF 6F800000
v_cmp_gt_f32_e64 vcc, |v5|, v1 ; D008016A 00020305
v_mov_b32_e32 v1, 0x2f800000 ; 7E0202FF 2F800000
v_cndmask_b32_e32 v1, 1.0, v1 ; 000202F2
v_mul_f32_e32 v13, v1, v5 ; 101A0B01
v_rcp_f32_e32 v13, v13 ; 7E1A550D
v_mul_f32_e32 v14, 0.5, v3 ; 101C06F0
v_mul_f32_e32 v19, 0.5, v5 ; 10260AF0
v_mad_f32 v14, s38, v14, v19 ; D282000E 044E1C26
v_mad_f32 v20, 0.5, v2, v19 ; D2820014 044E04F0
v_mul_f32_e32 v20, v13, v20 ; 1028290D
v_mul_f32_e32 v14, v13, v14 ; 101C1D0D
v_mul_f32_e32 v20, v20, v1 ; 10280314
v_mul_f32_e32 v21, v14, v1 ; 102A030E
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[20:21], 3, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[28:35], s[24:27] ; F0800300 00C71414
s_waitcnt vmcnt(3) ; BF8C0773
v_add_f32_e32 v11, v11, v15 ; 06161F0B
v_add_f32_e32 v12, v12, v16 ; 0618210C
s_waitcnt vmcnt(1) ; BF8C0771
v_add_f32_e32 v9, v9, v17 ; 06122309
v_add_f32_e32 v10, v10, v18 ; 0614250A
v_add_f32_e32 v9, v9, v11 ; 06121709
v_add_f32_e32 v10, v10, v12 ; 0614190A
v_mad_f32 v9, 0.5, v9, -1.0 ; D2820009 03CE12F0
v_mad_f32 v10, 0.5, v10, -1.0 ; D282000A 03CE14F0
v_add_f32_e32 v9, v9, v9 ; 06121309
v_add_f32_e32 v10, v10, v10 ; 0614150A
v_mad_f32 v11, v9, v9, 1.0 ; D282000B 03CA1309
v_mac_f32_e32 v11, v10, v10 ; 3E16150A
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_add_f32_e64 v12, s2, s2 ; D206000C 00000402
v_floor_f32_e32 v12, v12 ; 7E18490C
v_mad_f32 v12, 2.0, s2, -v12 ; D282000C 843004F4
v_mul_f32_e32 v9, v11, v9 ; 1012130B
v_sub_f32_e32 v14, 1.0, v12 ; 081C18F2
s_buffer_load_dword s2, s[8:11], 0x30 ; C2010930
v_mul_f32_e32 v15, v9, v14 ; 101E1D09
v_mac_f32_e32 v15, v9, v12 ; 3E1E1909
v_mul_f32_e32 v9, v11, v10 ; 1012150B
v_mul_f32_e32 v10, v11, v14 ; 10141D0B
v_mac_f32_e32 v10, v11, v12 ; 3E14190B
v_mul_f32_e32 v11, v9, v14 ; 10161D09
v_mac_f32_e32 v11, v9, v12 ; 3E161909
s_buffer_load_dword s12, s[8:11], 0x31 ; C2060931
s_buffer_load_dword s13, s[8:11], 0x32 ; C2068932
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e64 v9, 0.5, s2 ; D2100009 000004F0
v_max_f32_e32 v12, 0, v5 ; 20180A80
v_mul_f32_e32 v9, v9, v12 ; 10121909
v_mul_f32_e32 v12, v15, v15 ; 10181F0F
v_mac_f32_e32 v12, v10, v10 ; 3E18150A
v_sqrt_f32_e32 v10, v9 ; 7E146709
v_and_b32_e32 v10, 0x7fffffff, v10 ; 361414FF 7FFFFFFF
v_mov_b32_e32 v14, 0xff800000 ; 7E1C02FF FF800000
v_cmp_eq_f32_e32 vcc, v14, v9 ; 7C04130E
v_mac_f32_e32 v12, v11, v11 ; 3E18170B
s_buffer_load_dword s2, s[8:11], 0x2c ; C201092C
v_rsq_clamp_f32_e32 v9, v12 ; 7E12590C
s_buffer_load_dword s14, s[8:11], 0x18 ; C2070918
s_buffer_load_dword s15, s[8:11], 0x19 ; C2078919
v_mov_b32_e32 v12, 0x7f800000 ; 7E1802FF 7F800000
v_cndmask_b32_e32 v10, v10, v12 ; 0014190A
v_add_f32_e32 v12, -0.5, v20 ; 061828F1
v_add_f32_e32 v14, -0.5, v21 ; 061C2AF1
v_mac_f32_e32 v12, v9, v15 ; 3E181F09
v_mac_f32_e32 v14, v9, v11 ; 3E1C1709
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v9, s2, v12 ; 10121802
v_mul_f32_e32 v11, s2, v14 ; 10161C02
v_mov_b32_e32 v12, 0x41000000 ; 7E1802FF 41000000
v_mul_f32_e32 v14, s14, v12 ; 101C180E
v_mul_f32_e32 v12, s15, v12 ; 1018180F
v_mul_f32_e32 v9, v14, v9 ; 1012130E
v_mul_f32_e32 v11, v12, v11 ; 1016170C
v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480
v_log_f32_e32 v4, v4 ; 7E084F04
v_add_f32_e64 v12, 0, v10 clamp ; D206080C 00021480
v_mul_f32_e32 v9, v12, v9 ; 1012130C
v_mul_f32_e32 v11, v12, v11 ; 1016170C
v_mul_legacy_f32_e32 v4, 0x3f4ccccd, v4 ; 0E0808FF 3F4CCCCD
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mac_f32_e32 v2, v4, v9 ; 3E041304
v_mac_f32_e32 v3, v4, v11 ; 3E061704
v_mad_f32 v2, 0.5, v2, v19 ; D2820002 044E04F0
v_mac_f32_e32 v19, 0.5, v3 ; 3E2606F0
v_subrev_f32_e32 v3, s3, v7 ; 0A060E03
v_subrev_f32_e32 v4, s36, v8 ; 0A081024
v_subrev_f32_e32 v0, s37, v0 ; 0A000025
v_mul_f32_e32 v2, v13, v2 ; 1004050D
v_mul_f32_e32 v7, v13, v19 ; 100E270D
s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500
s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700
v_mul_f32_e32 v8, v3, v3 ; 10100703
v_mac_f32_e32 v8, v4, v4 ; 3E100904
v_mac_f32_e32 v8, v0, v0 ; 3E100100
s_buffer_load_dword s2, s[8:11], 0x34 ; C2010934
s_buffer_load_dword s3, s[8:11], 0x35 ; C2018935
s_buffer_load_dword s4, s[8:11], 0x36 ; C2020936
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v11, v2, v1 ; 10160302
v_mul_f32_e32 v12, v7, v1 ; 10180307
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[20:27], s[16:19] ; F0800700 00850B0B
v_mul_f32_e32 v1, v8, v3 ; 10020708
v_mul_f32_e32 v2, v8, v4 ; 10040908
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v1, s2, v1 ; 10020202
v_mad_f32 v1, -s3, v2, -v1 ; D2820001 A4060403
v_mad_f32 v0, -s4, v0, v1 ; D2820000 24060004
s_buffer_load_dword s2, s[8:11], 0x2a ; C201092A
v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480
v_log_f32_e32 v1, v1 ; 7E024F01
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_log_f32_e32 v0, v0 ; 7E004F00
s_buffer_load_dword s3, s[8:11], 0xc ; C201890C
s_buffer_load_dword s4, s[8:11], 0xd ; C202090D
s_buffer_load_dword s5, s[8:11], 0xe ; C202890E
s_buffer_load_dword s6, s[8:11], 0x12 ; C2030912
s_buffer_load_dword s7, s[8:11], 0x13 ; C2038913
v_mul_f32_e64 v2, 4.0, s13 ; D2100002 00001AF6
v_mul_legacy_f32_e32 v0, v2, v0 ; 0E000102
v_mul_legacy_f32_e32 v1, s13, v1 ; 0E02020D
v_exp_f32_e32 v0, v0 ; 7E004B00
v_mad_f32 v0, v0, s13, 1.0 ; D2820000 03C81B00
v_add_f32_e32 v2, s12, v10 ; 0604140C
v_exp_f32_e32 v1, v1 ; 7E024B01
v_add_f32_e32 v0, v1, v0 ; 06000101
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v3, v0, v11 ; 10061700
v_mul_f32_e32 v4, v0, v12 ; 10081900
v_mul_f32_e32 v0, v0, v13 ; 10001B00
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_sub_f32_e32 v7, 1.0, v2 ; 080E04F2
v_mul_f32_e32 v3, v3, v7 ; 10060F03
v_mac_f32_e32 v3, s1, v2 ; 3E060401
v_sub_f32_e32 v1, 1.0, v1 ; 080202F2
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_sub_f32_e32 v8, 1.0, v1 ; 081002F2
v_mul_f32_e32 v3, v3, v8 ; 10061103
v_mac_f32_e32 v3, s1, v1 ; 3E060201
v_mul_f32_e32 v4, v4, v7 ; 10080F04
v_mul_f32_e32 v0, v0, v7 ; 10000F00
v_mac_f32_e32 v4, s0, v2 ; 3E080400
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s2, v2 ; 3E000402
v_mul_f32_e32 v2, v4, v8 ; 10041104
v_mac_f32_e32 v2, s0, v1 ; 3E040200
v_mul_f32_e32 v0, v0, v8 ; 10001100
v_mac_f32_e32 v0, s2, v1 ; 3E000202
v_add_f32_e32 v1, v5, v5 ; 06020B05
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mov_b32_e32 v4, s7 ; 7E080207
v_mac_f32_e32 v4, s6, v6 ; 3E080C06
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
v_mul_f32_e32 v6, s3, v5 ; 100C0A03
v_mac_f32_e32 v6, v3, v4 ; 3E0C0903
v_mul_f32_e32 v3, s4, v5 ; 10060A04
v_mac_f32_e32 v3, v2, v4 ; 3E060902
v_mul_f32_e32 v2, s5, v5 ; 10040A05
v_mac_f32_e32 v2, v0, v4 ; 3E040900
v_cvt_pkrtz_f16_f32_e32 v0, v6, v3 ; 5E000706
v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 24
Code Size: 1040 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..7]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[4], IN[0].xxxx
1: MAD TEMP[0], CONST[5], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[6], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[7], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[0], IN[0].xxxx
5: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1].xyz, CONST[3], IN[0].wwww, TEMP[1]
8: MOV TEMP[1].yzw, TEMP[1].yxyz
9: MOV TEMP[1].x, TEMP[0].zzzz
10: MOV OUT[1], TEMP[0]
11: MOV OUT[2], TEMP[1]
12: MOV OUT[0], TEMP[0]
13: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = extractelement <4 x float> %44, i32 2
%48 = extractelement <4 x float> %44, i32 3
%49 = fmul float %25, %45
%50 = fmul float %26, %45
%51 = fmul float %27, %45
%52 = fmul float %28, %45
%53 = fmul float %29, %46
%54 = fadd float %53, %49
%55 = fmul float %30, %46
%56 = fadd float %55, %50
%57 = fmul float %31, %46
%58 = fadd float %57, %51
%59 = fmul float %32, %46
%60 = fadd float %59, %52
%61 = fmul float %33, %47
%62 = fadd float %61, %54
%63 = fmul float %34, %47
%64 = fadd float %63, %56
%65 = fmul float %35, %47
%66 = fadd float %65, %58
%67 = fmul float %36, %47
%68 = fadd float %67, %60
%69 = fmul float %37, %48
%70 = fadd float %69, %62
%71 = fmul float %38, %48
%72 = fadd float %71, %64
%73 = fmul float %39, %48
%74 = fadd float %73, %66
%75 = fmul float %40, %48
%76 = fadd float %75, %68
%77 = fmul float %13, %45
%78 = fmul float %14, %45
%79 = fmul float %15, %45
%80 = fmul float %16, %46
%81 = fadd float %80, %77
%82 = fmul float %17, %46
%83 = fadd float %82, %78
%84 = fmul float %18, %46
%85 = fadd float %84, %79
%86 = fmul float %19, %47
%87 = fadd float %86, %81
%88 = fmul float %20, %47
%89 = fadd float %88, %83
%90 = fmul float %21, %47
%91 = fadd float %90, %85
%92 = fmul float %22, %48
%93 = fadd float %92, %87
%94 = fmul float %23, %48
%95 = fadd float %94, %89
%96 = fmul float %24, %48
%97 = fadd float %96, %91
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %70, float %72, float %74, float %76)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %74, float %93, float %95, float %97)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s12, s[0:3], 0xa ; C206010A
s_buffer_load_dword s13, s[0:3], 0xc ; C206810C
s_buffer_load_dword s14, s[0:3], 0xd ; C207010D
s_buffer_load_dword s15, s[0:3], 0xe ; C207810E
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113
s_buffer_load_dword s20, s[0:3], 0x14 ; C20A0114
s_buffer_load_dword s21, s[0:3], 0x15 ; C20A8115
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x17 ; C20B8117
s_buffer_load_dword s24, s[0:3], 0x18 ; C20C0118
s_buffer_load_dword s25, s[0:3], 0x19 ; C20C8119
s_buffer_load_dword s26, s[0:3], 0x1a ; C20D011A
s_buffer_load_dword s27, s[0:3], 0x1b ; C20D811B
s_buffer_load_dword s28, s[0:3], 0x1c ; C20E011C
s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D
s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mac_f32_e32 v4, s20, v1 ; 3E080214
v_mul_f32_e32 v5, s17, v0 ; 100A0011
v_mac_f32_e32 v5, s21, v1 ; 3E0A0215
v_mul_f32_e32 v6, s18, v0 ; 100C0012
v_mac_f32_e32 v6, s22, v1 ; 3E0C0216
v_mul_f32_e32 v7, s19, v0 ; 100E0013
v_mac_f32_e32 v7, s23, v1 ; 3E0E0217
v_mul_f32_e32 v8, s4, v0 ; 10100004
v_mac_f32_e32 v8, s7, v1 ; 3E100207
v_mul_f32_e32 v9, s5, v0 ; 10120005
v_mac_f32_e32 v9, s8, v1 ; 3E120208
v_mul_f32_e32 v0, s6, v0 ; 10000006
v_mac_f32_e32 v0, s9, v1 ; 3E000209
v_mac_f32_e32 v4, s24, v2 ; 3E080418
v_mac_f32_e32 v5, s25, v2 ; 3E0A0419
v_mac_f32_e32 v6, s26, v2 ; 3E0C041A
v_mac_f32_e32 v7, s27, v2 ; 3E0E041B
v_mac_f32_e32 v8, s10, v2 ; 3E10040A
v_mac_f32_e32 v9, s11, v2 ; 3E12040B
v_mac_f32_e32 v0, s12, v2 ; 3E00040C
v_mac_f32_e32 v4, s28, v3 ; 3E08061C
v_mac_f32_e32 v5, s29, v3 ; 3E0A061D
v_mac_f32_e32 v6, s30, v3 ; 3E0C061E
v_mac_f32_e32 v7, s0, v3 ; 3E0E0600
v_mac_f32_e32 v8, s13, v3 ; 3E10060D
v_mac_f32_e32 v9, s14, v3 ; 3E12060E
v_mac_f32_e32 v0, s15, v3 ; 3E00060F
exp 15, 32, 0, 0, 0, v4, v5, v6, v7 ; F800020F 07060504
exp 15, 33, 0, 0, 0, v6, v8, v9, v0 ; F800021F 00090806
exp 15, 12, 0, 1, 0, v4, v5, v6, v7 ; F80008CF 07060504
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 284 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8]
DCL CONST[13..24]
DCL TEMP[0..14], LOCAL
IMM[0] FLT32 { 0.5000, 1.0000, 2.0000, 0.0000}
IMM[1] FLT32 { -1.0000, -0.5000, 0.8000, 0.7000}
IMM[2] FLT32 { 8.0000, 0.0100, 20.0000, 0.0050}
IMM[3] FLT32 { 3.0000, 0.0500, 0.3000, 3.1416}
IMM[4] FLT32 { -0.3000, 1.5000, 0.6000, 6.0000}
IMM[5] FLT32 { 0.2000, 0.1000, 0.0000, 0.0000}
0: MUL TEMP[0].xyw, IN[0], IMM[0].xxxx
1: ADD TEMP[1].xy, TEMP[0].xyyy, TEMP[0].wwww
2: MOV TEMP[1].zw, IN[0].wwzw
3: MOV TEMP[2].x, TEMP[0].xxxx
4: MUL TEMP[3].x, TEMP[0].yyyy, CONST[2].xxxx
5: MOV TEMP[2].y, TEMP[3].xxxx
6: ADD TEMP[0].xy, TEMP[2].xyyy, TEMP[0].wwww
7: MOV TEMP[0].zw, IN[0].wwzw
8: ADD TEMP[2].xyz, IN[1].yzww, -CONST[1].xyzz
9: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
10: RSQ TEMP[3].x, TEMP[3].xxxx
11: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx
12: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz
13: SQRT TEMP[2].x, TEMP[2].xxxx
14: MUL TEMP[4].xy, IN[1].ywww, CONST[13].xxxx
15: MUL TEMP[5].x, IMM[0].xxxx, CONST[0].xxxx
16: MUL TEMP[6].x, IMM[0].zzzz, TEMP[5].xxxx
17: ADD TEMP[7].x, TEMP[4].xxxx, IMM[0].xxxx
18: MOV TEMP[7].y, TEMP[4].yyyy
19: MOV TEMP[8].x, -TEMP[4].xxxx
20: MOV TEMP[8].y, TEMP[4].yyyy
21: MOV TEMP[9].x, TEMP[4].xxxx
22: ADD TEMP[10].x, TEMP[4].yyyy, IMM[0].xxxx
23: MOV TEMP[9].y, TEMP[10].xxxx
24: MOV TEMP[10].x, TEMP[4].xxxx
25: MOV TEMP[10].y, -TEMP[4].yyyy
26: MAD TEMP[7].xy, IMM[0].ywww, TEMP[6].xxxx, TEMP[7].xyyy
27: MOV TEMP[7].xy, TEMP[7].xyyy
28: TEX TEMP[7], TEMP[7], SAMP[2], 2D
29: MAD TEMP[8].xy, IMM[0].ywww, TEMP[6].xxxx, TEMP[8].xyyy
30: MOV TEMP[8].xy, TEMP[8].xyyy
31: TEX TEMP[8], TEMP[8], SAMP[2], 2D
32: ADD TEMP[7], TEMP[7], TEMP[8]
33: MAD TEMP[8].xy, IMM[0].wyyy, TEMP[6].xxxx, TEMP[9].xyyy
34: MOV TEMP[8].xy, TEMP[8].xyyy
35: TEX TEMP[8], TEMP[8], SAMP[2], 2D
36: MAD TEMP[6].xy, IMM[0].wyyy, TEMP[6].xxxx, TEMP[10].xyyy
37: MOV TEMP[6].xy, TEMP[6].xyyy
38: TEX TEMP[6], TEMP[6], SAMP[2], 2D
39: ADD TEMP[6], TEMP[8], TEMP[6]
40: ADD TEMP[6].xy, TEMP[7], TEMP[6]
41: MAD TEMP[7].xy, TEMP[6].xyyy, IMM[0].xxxx, IMM[1].xxxx
42: MUL TEMP[6].xy, TEMP[7].xyyy, IMM[0].zzzz
43: MOV TEMP[6].z, IMM[0].yyyy
44: MUL TEMP[5].x, IMM[0].zzzz, TEMP[5].xxxx
45: ADD TEMP[7].x, TEMP[4].xxxx, IMM[0].xxxx
46: MOV TEMP[7].y, TEMP[4].yyyy
47: MOV TEMP[8].x, -TEMP[4].xxxx
48: MOV TEMP[8].y, TEMP[4].yyyy
49: MOV TEMP[9].x, TEMP[4].xxxx
50: ADD TEMP[10].x, TEMP[4].yyyy, IMM[0].xxxx
51: MOV TEMP[9].y, TEMP[10].xxxx
52: MOV TEMP[10].x, TEMP[4].xxxx
53: MOV TEMP[10].y, -TEMP[4].yyyy
54: MAD TEMP[4].xy, IMM[0].ywww, TEMP[5].xxxx, TEMP[7].xyyy
55: MOV TEMP[4].xy, TEMP[4].xyyy
56: TEX TEMP[4], TEMP[4], SAMP[2], 2D
57: MAD TEMP[7].xy, IMM[0].ywww, TEMP[5].xxxx, TEMP[8].xyyy
58: MOV TEMP[7].xy, TEMP[7].xyyy
59: TEX TEMP[7], TEMP[7], SAMP[2], 2D
60: ADD TEMP[4], TEMP[4], TEMP[7]
61: MAD TEMP[7].xy, IMM[0].wyyy, TEMP[5].xxxx, TEMP[9].xyyy
62: MOV TEMP[7].xy, TEMP[7].xyyy
63: TEX TEMP[7], TEMP[7], SAMP[2], 2D
64: MAD TEMP[5].xy, IMM[0].wyyy, TEMP[5].xxxx, TEMP[10].xyyy
65: MOV TEMP[5].xy, TEMP[5].xyyy
66: TEX TEMP[5], TEMP[5], SAMP[2], 2D
67: ADD TEMP[5], TEMP[7], TEMP[5]
68: ADD TEMP[4].xy, TEMP[4], TEMP[5]
69: MAD TEMP[5].xy, TEMP[4].xyyy, IMM[0].xxxx, IMM[1].xxxx
70: MUL TEMP[4].xy, TEMP[5].xyyy, IMM[0].zzzz
71: MOV TEMP[4].z, IMM[0].yyyy
72: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
73: RSQ TEMP[5].x, TEMP[5].xxxx
74: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
75: DP3 TEMP[5].x, TEMP[6].xyzz, TEMP[6].xyzz
76: RSQ TEMP[5].x, TEMP[5].xxxx
77: MUL TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].xxxx
78: MUL TEMP[6].x, IMM[0].zzzz, CONST[0].xxxx
79: FRC TEMP[6].x, TEMP[6].xxxx
80: LRP TEMP[4].xyz, TEMP[6].xxxx, TEMP[5].xzyy, TEMP[4].xzyy
81: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
82: RSQ TEMP[5].x, TEMP[5].xxxx
83: MOV TEMP[6].xy, TEMP[0].xyyy
84: MOV TEMP[6].w, IN[0].wwww
85: TXP TEMP[6].xyz, TEMP[6], SAMP[5], 2D
86: ADD TEMP[6].xyz, TEMP[6].xzyy, IMM[1].yxyy
87: MAD TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx, TEMP[6].xyzz
88: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
89: RSQ TEMP[5].x, TEMP[5].xxxx
90: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
91: MOV TEMP[5].x, -IN[0].wwww
92: MOV TEMP[6].xy, TEMP[0].xyyy
93: MOV TEMP[6].w, IN[0].wwww
94: TXP TEMP[6].x, TEMP[6], SAMP[1], 2D
95: MAD TEMP[6].x, CONST[3].zzzz, TEMP[6].xxxx, CONST[3].wwww
96: RCP TEMP[6].x, TEMP[6].xxxx
97: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[5].xxxx
98: MAX TEMP[7].x, TEMP[6].xxxx, IMM[0].wwww
99: MUL TEMP[7].x, TEMP[7].xxxx, CONST[20].xxxx
100: POW TEMP[7].x, TEMP[7].xxxx, IMM[0].xxxx
101: MOV_SAT TEMP[7].x, TEMP[7].xxxx
102: MOV TEMP[8].zw, IN[0].wwzw
103: MUL TEMP[9].xy, TEMP[4].xzzz, CONST[19].xxxx
104: MOV_SAT TEMP[10].x, TEMP[7].xxxx
105: MUL TEMP[10].xy, CONST[8].xyyy, TEMP[10].xxxx
106: POW TEMP[11].x, IN[0].zzzz, IMM[1].zzzz
107: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[11].xxxx
108: MAD TEMP[8].xy, TEMP[9].xyyy, TEMP[10].xyyy, IN[0].xyyy
109: MUL TEMP[9].xyw, TEMP[8], IMM[0].xxxx
110: ADD TEMP[9].xy, TEMP[9].xyyy, TEMP[9].wwww
111: MOV TEMP[9].zw, IN[0].wwzw
112: MUL TEMP[8].xyw, TEMP[8], IMM[0].xxxx
113: MOV TEMP[10].x, TEMP[8].xxxx
114: MUL TEMP[11].x, TEMP[8].yyyy, CONST[2].xxxx
115: MOV TEMP[10].y, TEMP[11].xxxx
116: ADD TEMP[8].xy, TEMP[10].xyyy, TEMP[8].wwww
117: MOV TEMP[8].xy, TEMP[8].xyyy
118: MOV TEMP[8].w, IN[0].wwww
119: TXP TEMP[8].x, TEMP[8], SAMP[1], 2D
120: MAD TEMP[8].x, CONST[3].zzzz, TEMP[8].xxxx, CONST[3].wwww
121: RCP TEMP[8].x, TEMP[8].xxxx
122: ADD TEMP[5].x, TEMP[8].xxxx, TEMP[5].xxxx
123: MAX TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
124: MUL TEMP[5].x, TEMP[5].xxxx, CONST[20].xxxx
125: POW TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx
126: MOV_SAT TEMP[5].x, TEMP[5].xxxx
127: MUL TEMP[8].x, TEMP[7].xxxx, IMM[1].wwww
128: FSLT TEMP[8].x, TEMP[8].xxxx, TEMP[5].xxxx
129: UIF TEMP[8].xxxx :0
130: MOV TEMP[8].xyw, TEMP[9]
131: ELSE :0
132: MOV TEMP[8].xyw, TEMP[1]
133: ENDIF
134: MOV TEMP[1].xy, TEMP[8].xyyy
135: MOV TEMP[1].w, TEMP[8].wwww
136: TXP TEMP[1].xyz, TEMP[1], SAMP[0], 2D
137: MUL TEMP[8].x, TEMP[7].xxxx, IMM[1].wwww
138: FSLT TEMP[8].x, TEMP[8].xxxx, TEMP[5].xxxx
139: UIF TEMP[8].xxxx :0
140: MOV TEMP[5].x, TEMP[5].xxxx
141: ELSE :0
142: MOV TEMP[5].x, TEMP[7].xxxx
143: ENDIF
144: MOV TEMP[7].zw, IMM[0].wwww
145: MUL TEMP[8].xy, TEMP[4].xzzz, IMM[2].xxxx
146: MUL TEMP[9].x, TEMP[2].xxxx, IMM[2].yyyy
147: MOV_SAT TEMP[9].x, TEMP[9].xxxx
148: MUL TEMP[7].xy, TEMP[8].xyyy, TEMP[9].xxxx
149: ADD TEMP[8].x, TEMP[6].xxxx, TEMP[4].xxxx
150: RCP TEMP[9].x, CONST[21].xxxx
151: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
152: MUL TEMP[9].x, CONST[0].xxxx, IMM[2].zzzz
153: ADD TEMP[10].x, IN[1].yyyy, IN[1].wwww
154: MAD TEMP[11].x, TEMP[9].xxxx, CONST[21].zzzz, TEMP[8].xxxx
155: MAD TEMP[10].x, TEMP[10].xxxx, IMM[2].wwww, TEMP[11].xxxx
156: MUL TEMP[10].x, TEMP[10].xxxx, IMM[3].xxxx
157: ADD TEMP[11].x, IN[1].yyyy, TEMP[9].xxxx
158: MOV TEMP[11].y, IN[1].wwww
159: ADD TEMP[9].x, -IN[1].yyyy, TEMP[9].xxxx
160: MOV TEMP[9].y, IN[1].wwww
161: DP3 TEMP[12].x, TEMP[4].xyzz, TEMP[4].xyzz
162: RSQ TEMP[12].x, TEMP[12].xxxx
163: MUL TEMP[12].xyz, TEMP[4].xyzz, TEMP[12].xxxx
164: DP4 TEMP[13].x, CONST[22], CONST[22]
165: RSQ TEMP[13].x, TEMP[13].xxxx
166: MUL TEMP[13].xyz, CONST[22], TEMP[13].xxxx
167: ADD TEMP[0].xyw, TEMP[0], TEMP[7]
168: DP3 TEMP[4].x, -TEMP[3].xyzz, TEMP[4].xyzz
169: MOV_SAT TEMP[4].x, TEMP[4].xxxx
170: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
171: MOV_SAT TEMP[7].x, -TEMP[3].yyyy
172: ADD TEMP[7].x, IMM[0].yyyy, -TEMP[7].xxxx
173: MUL TEMP[7].x, TEMP[7].xxxx, TEMP[7].xxxx
174: DP3 TEMP[14].x, TEMP[12].xyzz, TEMP[13].xyzz
175: MUL TEMP[12].xyz, TEMP[14].xxxx, TEMP[12].xyzz
176: MUL TEMP[12].xyz, IMM[0].zzzz, TEMP[12].xyzz
177: ADD TEMP[12].xyz, TEMP[13].xyzz, -TEMP[12].xyzz
178: DP3 TEMP[3].x, -TEMP[3].xyzz, TEMP[12].xyzz
179: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx
180: POW TEMP[3].x, TEMP[3].xxxx, CONST[24].xxxx
181: MUL TEMP[3].xyz, CONST[23].xyzz, TEMP[3].xxxx
182: ADD TEMP[12].x, -TEMP[13].yyyy, IMM[3].yyyy
183: MUL TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
184: MOV_SAT TEMP[12].x, TEMP[12].xxxx
185: MUL TEMP[12].x, IMM[3].xxxx, TEMP[12].xxxx
186: ADD TEMP[13].x, TEMP[5].xxxx, CONST[20].yyyy
187: MOV_SAT TEMP[13].x, TEMP[13].xxxx
188: LRP TEMP[1].xyz, TEMP[13].xxxx, CONST[14].xyzz, TEMP[1].xyzz
189: MUL TEMP[11].xy, TEMP[11].xyyy, CONST[21].yyyy
190: MOV TEMP[11].xy, TEMP[11].xyyy
191: TEX TEMP[11].x, TEMP[11], SAMP[4], 2D
192: MUL TEMP[9].xy, TEMP[9].xyyy, CONST[21].yyyy
193: MOV TEMP[9].xy, TEMP[9].xyyy
194: TEX TEMP[9].x, TEMP[9], SAMP[4], 2D
195: ADD TEMP[9].x, TEMP[11].xxxx, TEMP[9].xxxx
196: MUL TEMP[9].x, TEMP[9].xxxx, IMM[0].xxxx
197: ADD TEMP[8].x, IMM[0].yyyy, -TEMP[8].xxxx
198: MOV_SAT TEMP[8].x, TEMP[8].xxxx
199: POW TEMP[8].x, TEMP[8].xxxx, IMM[3].zzzz
200: FLR TEMP[11].x, TEMP[10].xxxx
201: FRC TEMP[10].x, TEMP[10].xxxx
202: POW TEMP[10].x, TEMP[10].xxxx, IMM[3].zzzz
203: ADD TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx
204: MUL TEMP[10].x, TEMP[10].xxxx, IMM[3].wwww
205: SIN TEMP[10].x, TEMP[10].xxxx
206: ADD TEMP[10].x, TEMP[10].xxxx, IMM[4].xxxx
207: MUL TEMP[10].x, TEMP[10].xxxx, IMM[4].yyyy
208: POW TEMP[10].x, TEMP[10].xxxx, IMM[3].xxxx
209: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx
210: MOV_SAT TEMP[8].x, TEMP[8].xxxx
211: MUL TEMP[8].x, TEMP[9].xxxx, TEMP[8].xxxx
212: MUL TEMP[8].x, TEMP[8].xxxx, CONST[21].wwww
213: MUL TEMP[9].x, TEMP[2].xxxx, IMM[2].yyyy
214: ADD TEMP[9].x, IMM[0].yyyy, -TEMP[9].xxxx
215: MAX TEMP[9].x, TEMP[9].xxxx, IMM[4].zzzz
216: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
217: MOV_SAT TEMP[8].x, TEMP[8].xxxx
218: LRP TEMP[1].xyz, TEMP[8].xxxx, CONST[15].xyzz, TEMP[1].xyzz
219: MOV TEMP[8].xy, TEMP[0].xyyy
220: MOV TEMP[8].w, TEMP[0].wwww
221: TXP TEMP[0].xyz, TEMP[8], SAMP[3], 2D
222: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx
223: LRP TEMP[4].xyz, TEMP[4].xxxx, CONST[17], CONST[16]
224: LRP TEMP[0].xyz, CONST[18].xxxx, TEMP[4].xyzz, TEMP[0].xyzz
225: MAD TEMP[4].x, CONST[17].zzzz, IMM[4].wwww, IMM[5].xxxx
226: MOV_SAT TEMP[4].x, TEMP[4].xxxx
227: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
228: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[7].xxxx
229: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[5].xxxx
230: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
231: LRP TEMP[4].x, IMM[5].xxxx, IMM[0].yyyy, TEMP[4].xxxx
232: MOV_SAT TEMP[4].x, TEMP[4].xxxx
233: MUL TEMP[2].x, TEMP[2].xxxx, IMM[5].yyyy
234: MOV_SAT TEMP[2].x, TEMP[2].xxxx
235: MUL TEMP[2].x, TEMP[4].xxxx, TEMP[2].xxxx
236: LRP TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].xyzz, TEMP[1].xyzz
237: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[12].xxxx, TEMP[0].xyzz
238: MUL TEMP[1].x, TEMP[6].xxxx, IMM[0].zzzz
239: MOV_SAT TEMP[1].x, TEMP[1].xxxx
240: MOV TEMP[1].w, TEMP[1].xxxx
241: MAD TEMP[2].x, IN[1].xxxx, CONST[5].zzzz, CONST[5].wwww
242: MOV_SAT TEMP[2].x, TEMP[2].xxxx
243: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[4].xyzz
244: MOV OUT[0], TEMP[1]
245: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 348)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 364)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%67 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%68 = load <32 x i8>, <32 x i8> addrspace(2)* %67, align 32, !tbaa !0
%69 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0
%71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0
%73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0
%75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0
%77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0
%79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%80 = bitcast <8 x i32> addrspace(2)* %79 to <32 x i8> addrspace(2)*
%81 = load <32 x i8>, <32 x i8> addrspace(2)* %80, align 32, !tbaa !0
%82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%83 = bitcast <4 x i32> addrspace(2)* %82 to <16 x i8> addrspace(2)*
%84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0
%85 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%86 = load <8 x i32>, <8 x i32> addrspace(2)* %85, align 32, !tbaa !0
%87 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%88 = load <4 x i32>, <4 x i32> addrspace(2)* %87, align 16, !tbaa !0
%89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)*
%91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0
%92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)*
%94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0
%95 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%96 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%97 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%98 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%99 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%100 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%101 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%102 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%103 = fmul float %95, 5.000000e-01
%104 = fmul float %96, 5.000000e-01
%105 = fmul float %98, 5.000000e-01
%106 = fadd float %103, %105
%107 = fadd float %104, %105
%108 = fmul float %104, %28
%109 = fadd float %103, %105
%110 = fadd float %108, %105
%111 = fsub float %100, %25
%112 = fsub float %101, %26
%113 = fsub float %102, %27
%114 = fmul float %111, %111
%115 = fmul float %112, %112
%116 = fadd float %115, %114
%117 = fmul float %113, %113
%118 = fadd float %116, %117
%119 = call float @llvm.AMDGPU.rsq.clamped.f32(float %118)
%120 = fmul float %111, %119
%121 = fmul float %112, %119
%122 = fmul float %113, %119
%123 = fmul float %111, %111
%124 = fmul float %112, %112
%125 = fadd float %124, %123
%126 = fmul float %113, %113
%127 = fadd float %125, %126
%128 = call float @llvm.sqrt.f32(float %127)
%129 = fmul float %100, %38
%130 = fmul float %102, %38
%131 = fmul float %24, 5.000000e-01
%132 = fmul float %131, 2.000000e+00
%133 = fadd float %129, 5.000000e-01
%134 = fadd float %130, 5.000000e-01
%135 = fadd float %132, %133
%136 = fmul float %132, 0.000000e+00
%137 = fadd float %136, %130
%138 = bitcast float %135 to i32
%139 = bitcast float %137 to i32
%140 = insertelement <2 x i32> undef, i32 %138, i32 0
%141 = insertelement <2 x i32> %140, i32 %139, i32 1
%142 = bitcast <8 x i32> %76 to <32 x i8>
%143 = bitcast <4 x i32> %78 to <16 x i8>
%144 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %141, <32 x i8> %142, <16 x i8> %143, i32 2)
%145 = extractelement <4 x float> %144, i32 0
%146 = extractelement <4 x float> %144, i32 1
%147 = fsub float %132, %129
%148 = fmul float %132, 0.000000e+00
%149 = fadd float %148, %130
%150 = bitcast float %147 to i32
%151 = bitcast float %149 to i32
%152 = insertelement <2 x i32> undef, i32 %150, i32 0
%153 = insertelement <2 x i32> %152, i32 %151, i32 1
%154 = bitcast <8 x i32> %76 to <32 x i8>
%155 = bitcast <4 x i32> %78 to <16 x i8>
%156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %154, <16 x i8> %155, i32 2)
%157 = extractelement <4 x float> %156, i32 0
%158 = extractelement <4 x float> %156, i32 1
%159 = fadd float %145, %157
%160 = fadd float %146, %158
%161 = fmul float %132, 0.000000e+00
%162 = fadd float %161, %129
%163 = fadd float %132, %134
%164 = bitcast float %162 to i32
%165 = bitcast float %163 to i32
%166 = insertelement <2 x i32> undef, i32 %164, i32 0
%167 = insertelement <2 x i32> %166, i32 %165, i32 1
%168 = bitcast <8 x i32> %76 to <32 x i8>
%169 = bitcast <4 x i32> %78 to <16 x i8>
%170 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %167, <32 x i8> %168, <16 x i8> %169, i32 2)
%171 = extractelement <4 x float> %170, i32 0
%172 = extractelement <4 x float> %170, i32 1
%173 = fmul float %132, 0.000000e+00
%174 = fadd float %173, %129
%175 = fsub float %132, %130
%176 = bitcast float %174 to i32
%177 = bitcast float %175 to i32
%178 = insertelement <2 x i32> undef, i32 %176, i32 0
%179 = insertelement <2 x i32> %178, i32 %177, i32 1
%180 = bitcast <8 x i32> %76 to <32 x i8>
%181 = bitcast <4 x i32> %78 to <16 x i8>
%182 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %179, <32 x i8> %180, <16 x i8> %181, i32 2)
%183 = extractelement <4 x float> %182, i32 0
%184 = extractelement <4 x float> %182, i32 1
%185 = fadd float %171, %183
%186 = fadd float %172, %184
%187 = fadd float %159, %185
%188 = fadd float %160, %186
%189 = fmul float %187, 5.000000e-01
%190 = fadd float %189, -1.000000e+00
%191 = fmul float %188, 5.000000e-01
%192 = fadd float %191, -1.000000e+00
%193 = fmul float %190, 2.000000e+00
%194 = fmul float %192, 2.000000e+00
%195 = fmul float %131, 2.000000e+00
%196 = fadd float %129, 5.000000e-01
%197 = fadd float %130, 5.000000e-01
%198 = fadd float %195, %196
%199 = fmul float %195, 0.000000e+00
%200 = fadd float %199, %130
%201 = bitcast float %198 to i32
%202 = bitcast float %200 to i32
%203 = insertelement <2 x i32> undef, i32 %201, i32 0
%204 = insertelement <2 x i32> %203, i32 %202, i32 1
%205 = bitcast <8 x i32> %76 to <32 x i8>
%206 = bitcast <4 x i32> %78 to <16 x i8>
%207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %204, <32 x i8> %205, <16 x i8> %206, i32 2)
%208 = extractelement <4 x float> %207, i32 0
%209 = extractelement <4 x float> %207, i32 1
%210 = fsub float %195, %129
%211 = fmul float %195, 0.000000e+00
%212 = fadd float %211, %130
%213 = bitcast float %210 to i32
%214 = bitcast float %212 to i32
%215 = insertelement <2 x i32> undef, i32 %213, i32 0
%216 = insertelement <2 x i32> %215, i32 %214, i32 1
%217 = bitcast <8 x i32> %76 to <32 x i8>
%218 = bitcast <4 x i32> %78 to <16 x i8>
%219 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %216, <32 x i8> %217, <16 x i8> %218, i32 2)
%220 = extractelement <4 x float> %219, i32 0
%221 = extractelement <4 x float> %219, i32 1
%222 = fadd float %208, %220
%223 = fadd float %209, %221
%224 = fmul float %195, 0.000000e+00
%225 = fadd float %224, %129
%226 = fadd float %195, %197
%227 = bitcast float %225 to i32
%228 = bitcast float %226 to i32
%229 = insertelement <2 x i32> undef, i32 %227, i32 0
%230 = insertelement <2 x i32> %229, i32 %228, i32 1
%231 = bitcast <8 x i32> %76 to <32 x i8>
%232 = bitcast <4 x i32> %78 to <16 x i8>
%233 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %230, <32 x i8> %231, <16 x i8> %232, i32 2)
%234 = extractelement <4 x float> %233, i32 0
%235 = extractelement <4 x float> %233, i32 1
%236 = fmul float %195, 0.000000e+00
%237 = fadd float %236, %129
%238 = fsub float %195, %130
%239 = bitcast float %237 to i32
%240 = bitcast float %238 to i32
%241 = insertelement <2 x i32> undef, i32 %239, i32 0
%242 = insertelement <2 x i32> %241, i32 %240, i32 1
%243 = bitcast <8 x i32> %76 to <32 x i8>
%244 = bitcast <4 x i32> %78 to <16 x i8>
%245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %242, <32 x i8> %243, <16 x i8> %244, i32 2)
%246 = extractelement <4 x float> %245, i32 0
%247 = extractelement <4 x float> %245, i32 1
%248 = fadd float %234, %246
%249 = fadd float %235, %247
%250 = fadd float %222, %248
%251 = fadd float %223, %249
%252 = fmul float %250, 5.000000e-01
%253 = fadd float %252, -1.000000e+00
%254 = fmul float %251, 5.000000e-01
%255 = fadd float %254, -1.000000e+00
%256 = fmul float %253, 2.000000e+00
%257 = fmul float %255, 2.000000e+00
%258 = fmul float %256, %256
%259 = fmul float %257, %257
%260 = fadd float %259, %258
%261 = fadd float %260, 1.000000e+00
%262 = call float @llvm.AMDGPU.rsq.clamped.f32(float %261)
%263 = fmul float %256, %262
%264 = fmul float %257, %262
%265 = fmul float %193, %193
%266 = fmul float %194, %194
%267 = fadd float %266, %265
%268 = fadd float %267, 1.000000e+00
%269 = call float @llvm.AMDGPU.rsq.clamped.f32(float %268)
%270 = fmul float %193, %269
%271 = fmul float %194, %269
%272 = fmul float %24, 2.000000e+00
%273 = call float @llvm.floor.f32(float %272)
%274 = fsub float %272, %273
%275 = call float @llvm.AMDGPU.lrp(float %274, float %270, float %263)
%276 = call float @llvm.AMDGPU.lrp(float %274, float %269, float %262)
%277 = call float @llvm.AMDGPU.lrp(float %274, float %271, float %264)
%278 = fmul float %275, %275
%279 = fmul float %276, %276
%280 = fadd float %279, %278
%281 = fmul float %277, %277
%282 = fadd float %280, %281
%283 = call float @llvm.AMDGPU.rsq.clamped.f32(float %282)
%284 = fdiv float %109, %98
%285 = fdiv float %110, %98
%286 = bitcast float %284 to i32
%287 = bitcast float %285 to i32
%288 = insertelement <2 x i32> undef, i32 %286, i32 0
%289 = insertelement <2 x i32> %288, i32 %287, i32 1
%290 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %289, <32 x i8> %91, <16 x i8> %94, i32 2)
%291 = extractelement <4 x float> %290, i32 0
%292 = extractelement <4 x float> %290, i32 1
%293 = extractelement <4 x float> %290, i32 2
%294 = fadd float %291, -5.000000e-01
%295 = fadd float %293, -1.000000e+00
%296 = fadd float %292, -5.000000e-01
%297 = fmul float %275, %283
%298 = fadd float %297, %294
%299 = fmul float %276, %283
%300 = fadd float %299, %295
%301 = fmul float %277, %283
%302 = fadd float %301, %296
%303 = fmul float %298, %298
%304 = fmul float %300, %300
%305 = fadd float %304, %303
%306 = fmul float %302, %302
%307 = fadd float %305, %306
%308 = call float @llvm.AMDGPU.rsq.clamped.f32(float %307)
%309 = fmul float %298, %308
%310 = fmul float %300, %308
%311 = fmul float %302, %308
%312 = fdiv float %109, %98
%313 = fdiv float %110, %98
%314 = bitcast float %312 to i32
%315 = bitcast float %313 to i32
%316 = insertelement <2 x i32> undef, i32 %314, i32 0
%317 = insertelement <2 x i32> %316, i32 %315, i32 1
%318 = bitcast <8 x i32> %72 to <32 x i8>
%319 = bitcast <4 x i32> %74 to <16 x i8>
%320 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %317, <32 x i8> %318, <16 x i8> %319, i32 2)
%321 = extractelement <4 x float> %320, i32 0
%322 = fmul float %29, %321
%323 = fadd float %322, %30
%324 = fdiv float 1.000000e+00, %323
%325 = fsub float %324, %98
%326 = call float @llvm.maxnum.f32(float %325, float 0.000000e+00)
%327 = fmul float %326, %53
%sqrtf = call float @sqrtf(float %327) #1
%fabsf = call float @fabsf(float %sqrtf) #1
%328 = fcmp oeq float %327, 0xFFF0000000000000
%329 = select i1 %328, float 0x7FF0000000000000, float %fabsf
%330 = call float @llvm.AMDIL.clamp.(float %329, float 0.000000e+00, float 1.000000e+00)
%331 = fmul float %309, %52
%332 = fmul float %311, %52
%333 = call float @llvm.AMDIL.clamp.(float %330, float 0.000000e+00, float 1.000000e+00)
%334 = fmul float %36, %333
%335 = fmul float %37, %333
%336 = call float @llvm.pow.f32(float %97, float 0x3FE99999A0000000)
%337 = fmul float %334, %336
%338 = fmul float %335, %336
%339 = fmul float %331, %337
%340 = fadd float %339, %95
%341 = fmul float %332, %338
%342 = fadd float %341, %96
%343 = fmul float %340, 5.000000e-01
%344 = fmul float %342, 5.000000e-01
%345 = fmul float %98, 5.000000e-01
%346 = fadd float %343, %345
%347 = fadd float %344, %345
%348 = fmul float %340, 5.000000e-01
%349 = fmul float %342, 5.000000e-01
%350 = fmul float %98, 5.000000e-01
%351 = fmul float %349, %28
%352 = fadd float %348, %350
%353 = fadd float %351, %350
%354 = fdiv float %352, %98
%355 = fdiv float %353, %98
%356 = bitcast float %354 to i32
%357 = bitcast float %355 to i32
%358 = insertelement <2 x i32> undef, i32 %356, i32 0
%359 = insertelement <2 x i32> %358, i32 %357, i32 1
%360 = bitcast <8 x i32> %72 to <32 x i8>
%361 = bitcast <4 x i32> %74 to <16 x i8>
%362 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %359, <32 x i8> %360, <16 x i8> %361, i32 2)
%363 = extractelement <4 x float> %362, i32 0
%364 = fmul float %29, %363
%365 = fadd float %364, %30
%366 = fdiv float 1.000000e+00, %365
%367 = fsub float %366, %98
%368 = call float @llvm.maxnum.f32(float %367, float 0.000000e+00)
%369 = fmul float %368, %53
%sqrtf64 = call float @sqrtf(float %369) #1
%fabsf65 = call float @fabsf(float %sqrtf64) #1
%370 = fcmp oeq float %369, 0xFFF0000000000000
%371 = select i1 %370, float 0x7FF0000000000000, float %fabsf65
%372 = call float @llvm.AMDIL.clamp.(float %371, float 0.000000e+00, float 1.000000e+00)
%373 = fmul float %330, 0x3FE6666660000000
%374 = fcmp olt float %373, %372
%. = select i1 %374, float %346, float %106
%.63 = select i1 %374, float %347, float %107
%375 = fdiv float %., %98
%376 = fdiv float %.63, %98
%377 = bitcast float %375 to i32
%378 = bitcast float %376 to i32
%379 = insertelement <2 x i32> undef, i32 %377, i32 0
%380 = insertelement <2 x i32> %379, i32 %378, i32 1
%381 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %380, <32 x i8> %68, <16 x i8> %70, i32 2)
%382 = extractelement <4 x float> %381, i32 0
%383 = extractelement <4 x float> %381, i32 1
%384 = extractelement <4 x float> %381, i32 2
%385 = fmul float %330, 0x3FE6666660000000
%386 = fcmp olt float %385, %372
%temp20.0 = select i1 %386, float %372, float %330
%387 = fmul float %309, 8.000000e+00
%388 = fmul float %311, 8.000000e+00
%389 = fmul float %128, 0x3F847AE140000000
%390 = call float @llvm.AMDIL.clamp.(float %389, float 0.000000e+00, float 1.000000e+00)
%391 = fmul float %387, %390
%392 = fmul float %388, %390
%393 = fadd float %325, %309
%394 = fdiv float 1.000000e+00, %55
%395 = fmul float %393, %394
%396 = fmul float %24, 2.000000e+01
%397 = fadd float %100, %102
%398 = fmul float %396, %57
%399 = fadd float %398, %395
%400 = fmul float %397, 0x3F747AE140000000
%401 = fadd float %400, %399
%402 = fmul float %401, 3.000000e+00
%403 = fadd float %100, %396
%404 = fsub float %396, %100
%405 = fmul float %309, %309
%406 = fmul float %310, %310
%407 = fadd float %406, %405
%408 = fmul float %311, %311
%409 = fadd float %407, %408
%410 = call float @llvm.AMDGPU.rsq.clamped.f32(float %409)
%411 = fmul float %309, %410
%412 = fmul float %310, %410
%413 = fmul float %311, %410
%414 = fmul float %59, %59
%415 = fmul float %60, %60
%416 = fadd float %414, %415
%417 = fmul float %61, %61
%418 = fadd float %416, %417
%419 = fmul float %62, %62
%420 = fadd float %418, %419
%421 = call float @llvm.AMDGPU.rsq.clamped.f32(float %420)
%422 = fmul float %59, %421
%423 = fmul float %60, %421
%424 = fmul float %61, %421
%425 = fadd float %109, %391
%426 = fadd float %110, %392
%427 = fadd float %98, 0.000000e+00
%428 = fmul float %120, %309
%429 = fsub float -0.000000e+00, %428
%430 = fmul float %121, %310
%431 = fsub float %429, %430
%432 = fmul float %122, %311
%433 = fsub float %431, %432
%434 = call float @llvm.AMDIL.clamp.(float %433, float 0.000000e+00, float 1.000000e+00)
%435 = fsub float 1.000000e+00, %434
%436 = fsub float -0.000000e+00, %121
%437 = call float @llvm.AMDIL.clamp.(float %436, float 0.000000e+00, float 1.000000e+00)
%438 = fsub float 1.000000e+00, %437
%439 = fmul float %438, %438
%440 = fmul float %411, %422
%441 = fmul float %412, %423
%442 = fadd float %441, %440
%443 = fmul float %413, %424
%444 = fadd float %442, %443
%445 = fmul float %444, %411
%446 = fmul float %444, %412
%447 = fmul float %444, %413
%448 = fmul float %445, 2.000000e+00
%449 = fmul float %446, 2.000000e+00
%450 = fmul float %447, 2.000000e+00
%451 = fsub float %422, %448
%452 = fsub float %423, %449
%453 = fsub float %424, %450
%454 = fmul float %120, %451
%455 = fsub float -0.000000e+00, %454
%456 = fmul float %121, %452
%457 = fsub float %455, %456
%458 = fmul float %122, %453
%459 = fsub float %457, %458
%460 = call float @llvm.maxnum.f32(float %459, float 0.000000e+00)
%461 = call float @llvm.pow.f32(float %460, float %66)
%462 = fmul float %63, %461
%463 = fmul float %64, %461
%464 = fmul float %65, %461
%465 = fsub float 0x3FA99999A0000000, %423
%466 = fmul float %465, 2.000000e+01
%467 = call float @llvm.AMDIL.clamp.(float %466, float 0.000000e+00, float 1.000000e+00)
%468 = fmul float %467, 3.000000e+00
%469 = fadd float %temp20.0, %54
%470 = call float @llvm.AMDIL.clamp.(float %469, float 0.000000e+00, float 1.000000e+00)
%471 = call float @llvm.AMDGPU.lrp(float %470, float %39, float %382)
%472 = call float @llvm.AMDGPU.lrp(float %470, float %40, float %383)
%473 = call float @llvm.AMDGPU.lrp(float %470, float %41, float %384)
%474 = fmul float %403, %56
%475 = fmul float %102, %56
%476 = bitcast float %474 to i32
%477 = bitcast float %475 to i32
%478 = insertelement <2 x i32> undef, i32 %476, i32 0
%479 = insertelement <2 x i32> %478, i32 %477, i32 1
%480 = bitcast <8 x i32> %86 to <32 x i8>
%481 = bitcast <4 x i32> %88 to <16 x i8>
%482 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %479, <32 x i8> %480, <16 x i8> %481, i32 2)
%483 = extractelement <4 x float> %482, i32 0
%484 = fmul float %404, %56
%485 = fmul float %102, %56
%486 = bitcast float %484 to i32
%487 = bitcast float %485 to i32
%488 = insertelement <2 x i32> undef, i32 %486, i32 0
%489 = insertelement <2 x i32> %488, i32 %487, i32 1
%490 = bitcast <8 x i32> %86 to <32 x i8>
%491 = bitcast <4 x i32> %88 to <16 x i8>
%492 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %489, <32 x i8> %490, <16 x i8> %491, i32 2)
%493 = extractelement <4 x float> %492, i32 0
%494 = fadd float %483, %493
%495 = fmul float %494, 5.000000e-01
%496 = fsub float 1.000000e+00, %395
%497 = call float @llvm.AMDIL.clamp.(float %496, float 0.000000e+00, float 1.000000e+00)
%498 = call float @llvm.pow.f32(float %497, float 0x3FD3333340000000)
%499 = call float @llvm.floor.f32(float %402)
%500 = call float @llvm.floor.f32(float %402)
%501 = fsub float %402, %500
%502 = call float @llvm.pow.f32(float %501, float 0x3FD3333340000000)
%503 = fadd float %499, %502
%504 = fmul float %503, 0x400921FB80000000
%505 = call float @llvm.sin.f32(float %504)
%506 = fadd float %505, 0xBFD3333340000000
%507 = fmul float %506, 1.500000e+00
%508 = call float @llvm.pow.f32(float %507, float 3.000000e+00)
%509 = fmul float %498, %508
%510 = call float @llvm.AMDIL.clamp.(float %509, float 0.000000e+00, float 1.000000e+00)
%511 = fmul float %495, %510
%512 = fmul float %511, %58
%513 = fmul float %128, 0x3F847AE140000000
%514 = fsub float 1.000000e+00, %513
%515 = call float @llvm.maxnum.f32(float %514, float 0x3FE3333340000000)
%516 = fmul float %512, %515
%517 = call float @llvm.AMDIL.clamp.(float %516, float 0.000000e+00, float 1.000000e+00)
%518 = call float @llvm.AMDGPU.lrp(float %517, float %42, float %471)
%519 = call float @llvm.AMDGPU.lrp(float %517, float %43, float %472)
%520 = call float @llvm.AMDGPU.lrp(float %517, float %44, float %473)
%521 = fdiv float %425, %427
%522 = fdiv float %426, %427
%523 = bitcast float %521 to i32
%524 = bitcast float %522 to i32
%525 = insertelement <2 x i32> undef, i32 %523, i32 0
%526 = insertelement <2 x i32> %525, i32 %524, i32 1
%527 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %526, <32 x i8> %81, <16 x i8> %84, i32 2)
%528 = extractelement <4 x float> %527, i32 0
%529 = extractelement <4 x float> %527, i32 1
%530 = extractelement <4 x float> %527, i32 2
%531 = fmul float %435, %435
%532 = call float @llvm.AMDGPU.lrp(float %531, float %48, float %45)
%533 = call float @llvm.AMDGPU.lrp(float %531, float %49, float %46)
%534 = call float @llvm.AMDGPU.lrp(float %531, float %50, float %47)
%535 = call float @llvm.AMDGPU.lrp(float %51, float %532, float %528)
%536 = call float @llvm.AMDGPU.lrp(float %51, float %533, float %529)
%537 = call float @llvm.AMDGPU.lrp(float %51, float %534, float %530)
%538 = fmul float %50, 6.000000e+00
%539 = fadd float %538, 0x3FC99999A0000000
%540 = call float @llvm.AMDIL.clamp.(float %539, float 0.000000e+00, float 1.000000e+00)
%541 = fmul float %535, %540
%542 = fmul float %536, %540
%543 = fmul float %537, %540
%544 = fmul float %439, %439
%545 = fmul float %temp20.0, %temp20.0
%546 = fmul float %544, %545
%547 = call float @llvm.AMDGPU.lrp(float 0x3FC99999A0000000, float 1.000000e+00, float %546)
%548 = call float @llvm.AMDIL.clamp.(float %547, float 0.000000e+00, float 1.000000e+00)
%549 = fmul float %128, 0x3FB99999A0000000
%550 = call float @llvm.AMDIL.clamp.(float %549, float 0.000000e+00, float 1.000000e+00)
%551 = fmul float %548, %550
%552 = call float @llvm.AMDGPU.lrp(float %551, float %541, float %518)
%553 = call float @llvm.AMDGPU.lrp(float %551, float %542, float %519)
%554 = call float @llvm.AMDGPU.lrp(float %551, float %543, float %520)
%555 = fmul float %462, %468
%556 = fadd float %555, %552
%557 = fmul float %463, %468
%558 = fadd float %557, %553
%559 = fmul float %464, %468
%560 = fadd float %559, %554
%561 = fmul float %325, 2.000000e+00
%562 = call float @llvm.AMDIL.clamp.(float %561, float 0.000000e+00, float 1.000000e+00)
%563 = fmul float %99, %34
%564 = fadd float %563, %35
%565 = call float @llvm.AMDIL.clamp.(float %564, float 0.000000e+00, float 1.000000e+00)
%566 = call float @llvm.AMDGPU.lrp(float %565, float %556, float %31)
%567 = call float @llvm.AMDGPU.lrp(float %565, float %558, float %32)
%568 = call float @llvm.AMDGPU.lrp(float %565, float %560, float %33)
%569 = call i32 @llvm.SI.packf16(float %566, float %567)
%570 = bitcast i32 %569 to float
%571 = call i32 @llvm.SI.packf16(float %568, float %562)
%572 = bitcast i32 %571 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %570, float %572, float %570, float %572)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sin.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
declare float @sqrtf(float)
declare float @fabsf(float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
s_load_dwordx4 s[48:51], s[4:5], 0x0 ; C0980500
v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000
v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001
v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100
v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101
v_interp_p1_f32 v2, v0, 2, 0, [m0] ; C8080200
v_interp_p2_f32 v2, [v2], v1, 2, 0, [m0] ; C8090201
v_log_f32_e32 v5, v2 ; 7E0A4F02
v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300
v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[8:11], 0x0 ; C2060900
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
s_buffer_load_dword s0, s[8:11], 0x4 ; C2000904
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
s_buffer_load_dword s20, s[8:11], 0x5 ; C20A0905
s_buffer_load_dword s21, s[8:11], 0x6 ; C20A8906
s_buffer_load_dword s47, s[8:11], 0x8 ; C2178908
s_buffer_load_dword s45, s[8:11], 0xe ; C216890E
s_buffer_load_dword s46, s[8:11], 0xf ; C217090F
s_buffer_load_dword s2, s[8:11], 0x10 ; C2010910
s_buffer_load_dword s1, s[8:11], 0x11 ; C2008911
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v8, s0, v7 ; 0A100E00
s_buffer_load_dword s0, s[8:11], 0x12 ; C2000912
s_buffer_load_dword s44, s[8:11], 0x51 ; C2160951
s_buffer_load_dword s17, s[8:11], 0x54 ; C2088954
s_buffer_load_dword s19, s[8:11], 0x55 ; C2098955
s_buffer_load_dword s16, s[8:11], 0x56 ; C2080956
s_buffer_load_dword s3, s[8:11], 0x57 ; C2018957
s_buffer_load_dword s14, s[8:11], 0x58 ; C2070958
s_buffer_load_dword s13, s[8:11], 0x59 ; C2068959
s_buffer_load_dword s15, s[8:11], 0x5a ; C207895A
s_buffer_load_dword s18, s[8:11], 0x5b ; C209095B
v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600
v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601
v_subrev_f32_e32 v9, s20, v9 ; 0A121214
v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700
v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701
v_subrev_f32_e32 v1, s21, v0 ; 0A020015
v_mul_f32_e32 v10, 0.5, v4 ; 101408F0
v_mul_f32_e32 v11, 0.5, v6 ; 10160CF0
v_mad_f32 v10, s47, v10, v11 ; D282000A 042E142F
v_mad_f32 v12, 0.5, v3, v11 ; D282000C 042E06F0
v_mad_f32 v13, 0.5, v4, v11 ; D282000D 042E08F0
v_mul_f32_e32 v14, v8, v8 ; 101C1108
v_mac_f32_e32 v14, v9, v9 ; 3E1C1309
v_mac_f32_e32 v14, v1, v1 ; 3E1C0301
v_mul_f32_e64 v15, 0.5, s12 ; D210000F 000018F0
v_mad_f32 v16, 0.5, s12, v15 ; D2820010 043C18F0
v_add_f32_e64 v17, s12, s12 ; D2060011 0000180C
v_floor_f32_e32 v17, v17 ; 7E224911
v_mad_f32 v17, 2.0, s12, -v17 ; D2820011 844418F4
v_sub_f32_e32 v18, 1.0, v17 ; 082422F2
v_mov_b32_e32 v19, 0x6f800000 ; 7E2602FF 6F800000
v_mov_b32_e32 v20, 0x2f800000 ; 7E2802FF 2F800000
v_mov_b32_e32 v21, 0x7fffffff ; 7E2A02FF 7FFFFFFF
s_buffer_load_dword s53, s[8:11], 0x20 ; C21A8920
s_buffer_load_dword s54, s[8:11], 0x21 ; C21B0921
s_buffer_load_dword s55, s[8:11], 0x34 ; C21B8934
s_buffer_load_dword s84, s[8:11], 0x4c ; C22A094C
s_buffer_load_dword s52, s[8:11], 0x50 ; C21A0950
v_cmp_gt_f32_e64 vcc, |v6|, v19 ; D008016A 00022706
s_load_dwordx4 s[56:59], s[4:5], 0x4 ; C09C0504
s_load_dwordx4 s[72:75], s[4:5], 0x8 ; C0A40508
v_cndmask_b32_e32 v22, 1.0, v20 ; 002C28F2
v_mul_f32_e32 v23, v22, v6 ; 102E0D16
v_rcp_f32_e32 v23, v23 ; 7E2E5517
v_mov_b32_e32 v24, s46 ; 7E30022E
s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v25, s55, v7 ; 10320E37
v_mad_f32 v26, v0, s55, 0.5 ; D282001A 03C06F00
v_mac_f32_e32 v25, 0, v16 ; 3E322080
v_mac_f32_e32 v26, 2.0, v15 ; 3E341EF4
v_mul_f32_e32 v28, s55, v0 ; 10380037
v_mad_f32 v27, v7, s55, 0.5 ; D282001B 03C06F07
v_mac_f32_e32 v27, 2.0, v15 ; 3E361EF4
v_mac_f32_e32 v28, 0, v16 ; 3E382080
s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510
s_load_dwordx4 s[60:63], s[4:5], 0x14 ; C09E0514
s_load_dwordx8 s[64:71], s[6:7], 0x28 ; C0E00728
s_load_dwordx8 s[76:83], s[6:7], 0x10 ; C0E60710
s_load_dwordx8 s[28:35], s[6:7], 0x18 ; C0CE0718
s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[29:30], 3, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[76:83], s[72:75] ; F0800300 02531D1B
v_mad_f32 v27, -v7, s55, v16 ; D282001B 24406F07
image_sample v[27:28], 3, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[76:83], s[72:75] ; F0800300 02531B1B
image_sample v[31:32], 3, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[76:83], s[72:75] ; F0800300 02531F19
v_mad_f32 v26, -v0, s55, v16 ; D282001A 24406F00
image_sample v[15:16], 3, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[76:83], s[72:75] ; F0800300 02530F19
s_waitcnt vmcnt(2) ; BF8C0772
v_add_f32_e32 v25, v27, v29 ; 06323B1B
v_add_f32_e32 v26, v28, v30 ; 06343D1C
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v15, v15, v31 ; 061E3F0F
v_add_f32_e32 v16, v16, v32 ; 06204110
v_add_f32_e32 v15, v15, v25 ; 061E330F
v_add_f32_e32 v16, v16, v26 ; 06203510
v_mad_f32 v15, 0.5, v15, -1.0 ; D282000F 03CE1EF0
v_mad_f32 v16, 0.5, v16, -1.0 ; D2820010 03CE20F0
v_add_f32_e32 v15, v15, v15 ; 061E1F0F
v_add_f32_e32 v16, v16, v16 ; 06202110
v_mad_f32 v25, v15, v15, 1.0 ; D2820019 03CA1F0F
v_mac_f32_e32 v25, v16, v16 ; 3E322110
v_rsq_clamp_f32_e32 v25, v25 ; 7E325919
v_mul_f32_e32 v26, v23, v12 ; 10341917
v_mul_f32_e32 v26, v26, v22 ; 10342D1A
v_mul_f32_e32 v27, v23, v10 ; 10361517
v_mul_f32_e32 v27, v27, v22 ; 10362D1B
image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[64:71], s[60:63] ; F0800700 01F01C1A
v_mul_f32_e32 v15, v25, v15 ; 101E1F19
v_mul_f32_e32 v31, v15, v18 ; 103E250F
v_mac_f32_e32 v31, v15, v17 ; 3E3E230F
v_mul_f32_e32 v15, v25, v16 ; 101E2119
v_mul_f32_e32 v16, v25, v18 ; 10202519
v_mac_f32_e32 v16, v25, v17 ; 3E202319
v_mul_f32_e32 v18, v15, v18 ; 1024250F
v_mac_f32_e32 v18, v15, v17 ; 3E24230F
v_mul_f32_e32 v15, v31, v31 ; 101E3F1F
v_mac_f32_e32 v15, v16, v16 ; 3E1E2110
v_mac_f32_e32 v15, v18, v18 ; 3E1E2512
v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v17, -0.5, v28 ; 062238F1
v_add_f32_e32 v25, -1.0, v30 ; 06323CF3
s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708
v_add_f32_e32 v28, -0.5, v29 ; 06383AF1
v_mac_f32_e32 v17, v15, v31 ; 3E223F0F
v_mac_f32_e32 v25, v15, v16 ; 3E32210F
v_mac_f32_e32 v28, v15, v18 ; 3E38250F
v_mul_f32_e32 v15, v17, v17 ; 101E2311
v_mac_f32_e32 v15, v25, v25 ; 3E1E3319
v_mac_f32_e32 v15, v28, v28 ; 3E1E391C
v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v16, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[60:67], s[56:59] ; F0800100 01CF101A
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v16, s45, v16, v24 ; D2820010 0462202D
v_mul_f32_e32 v18, v15, v28 ; 1024390F
v_rcp_f32_e32 v16, v16 ; 7E205510
v_mul_f32_e32 v24, v15, v17 ; 1030230F
v_mul_f32_e32 v26, s84, v24 ; 10343054
v_mul_f32_e32 v27, s84, v18 ; 10362454
v_subrev_f32_e32 v16, v6, v16 ; 0A202106
v_max_f32_e32 v28, 0, v16 ; 20382080
v_mul_f32_e32 v28, s52, v28 ; 10383834
v_sqrt_f32_e32 v29, v28 ; 7E3A671C
v_and_b32_e32 v29, v29, v21 ; 363A2B1D
v_mov_b32_e32 v30, 0xff800000 ; 7E3C02FF FF800000
v_mov_b32_e32 v31, 0x7f800000 ; 7E3E02FF 7F800000
v_cmp_eq_f32_e32 vcc, v28, v30 ; 7C043D1C
v_cndmask_b32_e32 v28, v29, v31 ; 00383F1D
v_add_f32_e64 v28, 0, v28 clamp ; D206081C 00023880
v_add_f32_e64 v29, 0, v28 clamp ; D206081D 00023880
v_mul_f32_e32 v32, s53, v29 ; 10403A35
v_mul_f32_e32 v29, s54, v29 ; 103A3A36
v_mov_b32_e32 v33, 0x3f4ccccd ; 7E4202FF 3F4CCCCD
v_mul_legacy_f32_e32 v5, v33, v5 ; 0E0A0B21
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_mul_f32_e32 v29, v5, v29 ; 103A3B05
v_mac_f32_e32 v4, v29, v27 ; 3E08371D
v_mul_f32_e32 v27, 0.5, v4 ; 103608F0
v_mad_f32 v27, s47, v27, v11 ; D282001B 042E362F
v_mul_f32_e32 v5, v5, v32 ; 100A4105
v_mac_f32_e32 v3, v5, v26 ; 3E063505
v_mad_f32 v3, 0.5, v3, v11 ; D2820003 042E06F0
v_mul_f32_e32 v5, v23, v27 ; 100A3717
v_mul_f32_e32 v26, v23, v3 ; 10340717
v_mul_f32_e32 v26, v26, v22 ; 10342D1A
v_mul_f32_e32 v27, v5, v22 ; 10362D05
image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[60:67], s[56:59] ; F0800100 01CF051A
v_mov_b32_e32 v26, s46 ; 7E34022E
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v26, s45, v5 ; 3E340A2D
v_rcp_f32_e32 v5, v26 ; 7E0A551A
v_subrev_f32_e32 v5, v6, v5 ; 0A0A0B06
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_mul_f32_e32 v5, s52, v5 ; 100A0A34
v_sqrt_f32_e32 v26, v5 ; 7E346705
v_and_b32_e32 v21, v26, v21 ; 362A2B1A
v_cmp_eq_f32_e32 vcc, v5, v30 ; 7C043D05
v_cndmask_b32_e32 v5, v21, v31 ; 000A3F15
v_mad_f32 v4, 0.5, v4, v11 ; D2820004 042E08F0
s_load_dwordx8 s[52:59], s[6:7], 0x0 ; C0DA0700
v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80
v_mul_f32_e32 v11, 0x3f333333, v28 ; 101638FF 3F333333
v_cmp_lt_f32_e32 vcc, v11, v5 ; 7C020B0B
v_cndmask_b32_e32 v4, v13, v4 ; 0008090D
v_cndmask_b32_e32 v3, v12, v3 ; 0006070C
v_mul_f32_e32 v3, v23, v3 ; 10060717
v_mul_f32_e32 v4, v23, v4 ; 10080917
v_mul_f32_e32 v26, v3, v22 ; 10342D03
v_mul_f32_e32 v27, v4, v22 ; 10362D04
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[52:59], s[48:51] ; F0800700 018D151A
v_cndmask_b32_e32 v3, v28, v5 ; 00060B1C
v_add_f32_e32 v4, s44, v3 ; 0608062C
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v11, v21, v5 ; 10160B15
v_mul_f32_e32 v13, v22, v5 ; 101A0B16
v_mul_f32_e32 v5, v23, v5 ; 100A0B17
v_add_f32_e32 v6, 0, v6 ; 060C0C80
v_cmp_gt_f32_e64 vcc, |v6|, v19 ; D008016A 00022706
v_cndmask_b32_e32 v19, 1.0, v20 ; 002628F2
v_sqrt_f32_e32 v20, v14 ; 7E28670E
v_mov_b32_e32 v21, 0x3c23d70a ; 7E2A02FF 3C23D70A
v_mul_f32_e32 v22, v21, v20 ; 102C2915
v_add_f32_e64 v22, 0, v22 clamp ; D2060816 00022C80
v_mul_f32_e32 v6, v19, v6 ; 100C0D13
v_rcp_f32_e32 v6, v6 ; 7E0C5506
v_mov_b32_e32 v23, 0x41000000 ; 7E2E02FF 41000000
v_mul_f32_e32 v26, v23, v24 ; 10343117
v_mac_f32_e32 v12, v22, v26 ; 3E183516
v_mul_f32_e32 v23, v23, v18 ; 102E2517
v_mac_f32_e32 v10, v22, v23 ; 3E142F16
v_mul_f32_e32 v12, v6, v12 ; 10181906
v_mul_f32_e32 v6, v6, v10 ; 100C1506
v_mul_f32_e32 v22, v12, v19 ; 102C270C
v_mul_f32_e32 v23, v6, v19 ; 102E2706
v_mov_b32_e32 v6, 0x41a00000 ; 7E0C02FF 41A00000
v_mad_f32 v10, s12, v6, v7 ; D282000A 041E0C0C
v_mul_f32_e32 v26, s19, v10 ; 10341413
v_mul_f32_e32 v27, s19, v0 ; 10360013
v_mad_f32 v10, s12, v6, -v7 ; D282000A 841E0C0C
image_sample v12, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[36:43], s[24:27] ; F0800100 00C90C1A
v_mul_f32_e32 v26, s19, v10 ; 10341413
image_sample v10, 1, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[36:43], s[24:27] ; F0800100 00C90A1A
s_buffer_load_dword s4, s[8:11], 0x5c ; C202095C
v_mul_f32_e64 v19, s13, s13 ; D2100013 00001A0D
v_mac_f32_e64 v19, s14, s14 ; D23E0013 00001C0E
s_buffer_load_dword s5, s[8:11], 0x48 ; C2028948
v_mac_f32_e64 v19, s15, s15 ; D23E0013 00001E0F
v_mac_f32_e64 v19, s18, s18 ; D23E0013 00002412
v_mul_f32_e32 v25, v15, v25 ; 1032330F
v_mul_f32_e32 v26, v24, v24 ; 10343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_mac_f32_e32 v26, v18, v18 ; 3E342512
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
v_rsq_clamp_f32_e32 v19, v19 ; 7E265913
image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[28:35], s[20:23] ; F0800700 00A71B16
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_sub_f32_e64 v22, 1.0, s5 ; D2080016 00000AF2
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v23, v27, v22 ; 102E2D1B
v_mul_f32_e32 v27, v28, v22 ; 10362D1C
v_mul_f32_e32 v22, v29, v22 ; 102C2D1D
v_mul_f32_e32 v28, v26, v24 ; 1038311A
v_mul_f32_e32 v29, s14, v19 ; 103A260E
v_mul_f32_e32 v29, v29, v28 ; 103A391D
v_mul_f32_e32 v30, v26, v25 ; 103C331A
v_mul_f32_e32 v31, s13, v19 ; 103E260D
v_mac_f32_e32 v29, v31, v30 ; 3E3A3D1F
v_mul_f32_e32 v26, v26, v18 ; 1034251A
v_mul_f32_e32 v31, s15, v19 ; 103E260F
v_mac_f32_e32 v29, v31, v26 ; 3E3A351F
v_mul_f32_e32 v31, v28, v29 ; 103E3B1C
v_mac_f32_e32 v31, v28, v29 ; 3E3E3B1C
v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E
v_mul_f32_e32 v28, v30, v29 ; 10383B1E
v_mac_f32_e32 v28, v30, v29 ; 3E383B1E
v_mul_f32_e32 v30, v26, v29 ; 103C3B1A
v_mac_f32_e32 v30, v26, v29 ; 3E3C3B1A
v_mul_f32_e32 v8, v14, v8 ; 1010110E
v_mul_f32_e32 v9, v14, v9 ; 1012130E
v_rcp_f32_e32 v26, s17 ; 7E345411
v_mul_f32_e32 v1, v14, v1 ; 1002030E
v_mad_f32 v14, v15, v17, v16 ; D282000E 0442230F
v_add_f32_e32 v0, v0, v7 ; 06000F00
v_mul_f32_e32 v7, v26, v14 ; 100E1D1A
v_mul_f32_e32 v15, s12, v6 ; 101E0C0C
v_mac_f32_e32 v7, s16, v15 ; 3E0E1E10
v_madmk_f32_e32 v0, v0, v7, 0x3ba3d70a ; 40000F00 3BA3D70A
v_mul_f32_e32 v7, v24, v8 ; 100E1118
v_mad_f32 v7, -v9, v25, -v7 ; D2820007 A41E3309
s_buffer_load_dword s6, s[8:11], 0x38 ; C2030938
s_buffer_load_dword s7, s[8:11], 0x39 ; C2038939
v_mad_f32 v15, s14, v19, -v31 ; D282000F 847E260E
v_mad_f32 v17, s15, v19, -v30 ; D2820011 847A260F
v_mad_f32 v24, s13, v19, -v28 ; D2820018 8472260D
v_mov_b32_e32 v25, 0x3d4ccccd ; 7E3202FF 3D4CCCCD
v_mad_f32 v19, -s13, v19, v25 ; D2820013 2466260D
s_buffer_load_dword s12, s[8:11], 0x3a ; C206093A
s_buffer_load_dword s13, s[8:11], 0x3c ; C206893C
s_buffer_load_dword s14, s[8:11], 0x3d ; C207093D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v11, s6, v4 ; 3E160806
v_mac_f32_e32 v13, s7, v4 ; 3E1A0807
v_mov_b32_e32 v25, 0x40400000 ; 7E3202FF 40400000
v_mul_f32_e32 v28, v25, v0 ; 10380119
v_floor_f32_e32 v28, v28 ; 7E38491C
v_mad_f32 v0, v0, v25, -v28 ; D2820000 84723300
v_log_f32_e32 v0, v0 ; 7E004F00
v_mac_f32_e32 v5, s12, v4 ; 3E0A080C
v_mad_f32 v4, -v14, v26, 1.0 ; D2820004 23CA350E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mul_legacy_f32_e32 v0, v14, v0 ; 0E00010E
v_exp_f32_e32 v0, v0 ; 7E004B00
v_add_f32_e32 v0, v0, v28 ; 06003900
v_mul_f32_e32 v0, 0x40490fdc, v0 ; 100000FF 40490FDC
v_mul_f32_e32 v0, 0x3e22f983, v0 ; 100000FF 3E22F983
v_fract_f32_e32 v0, v0 ; 7E004100
v_sin_f32_e32 v0, v0 ; 7E006B00
v_mov_b32_e32 v26, 0xbe99999a ; 7E3402FF BE99999A
v_add_f32_e32 v0, v0, v26 ; 06003500
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_log_f32_e32 v4, v4 ; 7E084F04
v_mul_f32_e32 v0, 0x3fc00000, v0 ; 100000FF 3FC00000
v_log_f32_e32 v0, v0 ; 7E004F00
v_add_f32_e32 v10, v10, v12 ; 0614190A
v_mul_legacy_f32_e32 v4, v14, v4 ; 0E08090E
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mul_legacy_f32_e32 v0, v25, v0 ; 0E000119
v_exp_f32_e32 v0, v0 ; 7E004B00
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_mul_f32_e32 v4, 0.5, v10 ; 100814F0
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_mul_f32_e32 v0, s3, v0 ; 10000003
v_mad_f32 v4, -v20, v21, 1.0 ; D2820004 23CA2B14
v_max_f32_e32 v4, 0x3f19999a, v4 ; 200808FF 3F19999A
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
s_buffer_load_dword s3, s[8:11], 0x3e ; C201893E
v_sub_f32_e32 v4, 1.0, v0 ; 080800F2
v_mul_f32_e32 v10, v11, v4 ; 1014090B
v_mul_f32_e32 v11, v13, v4 ; 1016090D
v_mul_f32_e32 v4, v5, v4 ; 10080905
v_mac_f32_e32 v10, s13, v0 ; 3E14000D
v_mac_f32_e32 v11, s14, v0 ; 3E16000E
s_buffer_load_dword s6, s[8:11], 0x40 ; C2030940
s_buffer_load_dword s7, s[8:11], 0x41 ; C2038941
s_buffer_load_dword s12, s[8:11], 0x42 ; C2060942
s_buffer_load_dword s13, s[8:11], 0x44 ; C2068944
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v4, s3, v0 ; 3E080003
s_buffer_load_dword s3, s[8:11], 0x45 ; C2018945
v_mad_f32 v0, -v1, v18, v7 ; D2820000 241E2501
s_buffer_load_dword s14, s[8:11], 0x46 ; C2070946
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_mul_f32_e32 v5, v0, v0 ; 100A0100
v_mad_f32 v0, -v0, v0, 1.0 ; D2820000 23CA0100
v_mul_f32_e32 v7, s6, v0 ; 100E0006
v_mac_f32_e32 v7, s13, v5 ; 3E0E0A0D
v_mul_f32_e32 v12, s7, v0 ; 10180007
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v12, s3, v5 ; 3E180A03
v_mul_f32_e32 v0, s12, v0 ; 1000000C
v_mac_f32_e32 v0, s14, v5 ; 3E000A0E
v_mac_f32_e32 v23, s5, v7 ; 3E2E0E05
s_buffer_load_dword s3, s[8:11], 0x16 ; C2018916
s_buffer_load_dword s6, s[8:11], 0x17 ; C2030917
s_buffer_load_dword s7, s[8:11], 0x5d ; C203895D
s_buffer_load_dword s12, s[8:11], 0x5e ; C206095E
s_buffer_load_dword s8, s[8:11], 0x60 ; C2040960
v_mac_f32_e32 v27, s5, v12 ; 3E361805
v_mac_f32_e32 v22, s5, v0 ; 3E2C0005
v_mul_f32_e32 v0, v15, v8 ; 1000110F
v_mad_f32 v0, -v9, v24, -v0 ; D2820000 A4023109
v_mad_f32 v0, -v1, v17, v0 ; D2820000 24022301
v_mov_b32_e32 v1, 0x3e4ccccd ; 7E0202FF 3E4CCCCD
v_mov_b32_e32 v5, 0x40c00000 ; 7E0A02FF 40C00000
v_mad_f32 v5, s14, v5, v1 ; D2820005 04060A0E
v_add_f32_e64 v7, 0, -v9 clamp ; D2060807 40021280
v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2
v_mul_f32_e32 v7, v7, v7 ; 100E0F07
v_mul_f32_e32 v7, v7, v7 ; 100E0F07
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mul_f32_e32 v3, v3, v7 ; 10060F03
v_mac_f32_e32 v1, v33, v3 ; 3E020721
v_add_f32_e64 v3, 0, v5 clamp ; D2060803 00020A80
v_mul_f32_e32 v5, v3, v23 ; 100A2F03
v_mul_f32_e32 v7, v3, v27 ; 100E3703
v_mul_f32_e32 v3, v3, v22 ; 10062D03
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v8, 0x3dcccccd, v20 ; 101028FF 3DCCCCCD
v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080
v_mul_f32_e32 v9, v8, v1 ; 10120308
v_mad_f32 v1, -v1, v8, 1.0 ; D2820001 23CA1101
v_mul_f32_e32 v8, v10, v1 ; 1010030A
v_mac_f32_e32 v8, v5, v9 ; 3E101305
v_max_f32_e32 v0, 0, v0 ; 20000080
v_log_f32_e32 v0, v0 ; 7E004F00
v_mul_f32_e32 v5, v11, v1 ; 100A030B
v_mac_f32_e32 v5, v7, v9 ; 3E0A1307
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mac_f32_e32 v1, v3, v9 ; 3E021303
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_legacy_f32_e32 v0, s8, v0 ; 0E000008
v_exp_f32_e32 v0, v0 ; 7E004B00
v_mul_f32_e32 v3, s4, v0 ; 10060004
v_mul_f32_e32 v4, v6, v19 ; 10082706
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_mul_f32_e32 v4, v25, v4 ; 10080919
v_mac_f32_e32 v8, v4, v3 ; 3E100704
v_mul_f32_e32 v3, s7, v0 ; 10060007
v_mac_f32_e32 v5, v4, v3 ; 3E0A0704
v_mul_f32_e32 v0, s12, v0 ; 1000000C
v_mac_f32_e32 v1, v4, v0 ; 3E020104
v_mov_b32_e32 v0, s6 ; 7E000206
v_mac_f32_e32 v0, s3, v2 ; 3E000403
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v2, 1.0, v0 ; 080400F2
v_mul_f32_e32 v3, s2, v2 ; 10060402
v_mac_f32_e32 v3, v8, v0 ; 3E060108
v_mul_f32_e32 v4, s1, v2 ; 10080401
v_mac_f32_e32 v4, v5, v0 ; 3E080105
v_mul_f32_e32 v2, s0, v2 ; 10040400
v_mac_f32_e32 v2, v1, v0 ; 3E040101
v_cvt_pkrtz_f16_f32_e32 v0, v3, v4 ; 5E000903
v_add_f32_e32 v1, v16, v16 ; 06022110
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 88
VGPRS: 36
Code Size: 2064 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL CONST[0..1]
DCL CONST[6..10]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 0.0000, 0.0156, 0.3000, 4.0000}
IMM[1] FLT32 { 64.0000, 10.0000, 0.1000, 3.1416}
IMM[2] INT32 {64, 0, 0, 0}
IMM[3] FLT32 { 0.2000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[0].xxxx
1: MOV TEMP[1].y, IMM[0].xxxx
2: MUL TEMP[2].x, CONST[9].xxxx, IN[0].xxxx
3: MOV TEMP[1].x, TEMP[2].xxxx
4: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy
5: MOV TEMP[1].xy, TEMP[1].xyyy
6: TEX TEMP[1].y, TEMP[1], SAMP[2], 2D
7: ADD TEMP[1].x, TEMP[1].yyyy, IMM[0].zzzz
8: MUL TEMP[1].x, CONST[0].yyyy, TEMP[1].xxxx
9: MAD TEMP[1].x, TEMP[1].xxxx, CONST[7].xxxx, IN[0].yyyy
10: MOV TEMP[0].y, TEMP[1].xxxx
11: MOV TEMP[1].y, IMM[0].xxxx
12: MOV TEMP[1].x, TEMP[2].xxxx
13: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy
14: MOV TEMP[1].xy, TEMP[1].xyyy
15: TEX TEMP[1].y, TEMP[1], SAMP[2], 2D
16: ADD TEMP[1].x, TEMP[1].yyyy, IMM[0].zzzz
17: MUL TEMP[1].x, CONST[0].yyyy, TEMP[1].xxxx
18: MAD TEMP[1].x, TEMP[1].xxxx, CONST[7].xxxx, IN[0].yyyy
19: MOV TEMP[1].y, TEMP[1].xxxx
20: MUL TEMP[3].x, CONST[0].yyyy, IMM[0].wwww
21: MUL TEMP[4].x, TEMP[3].xxxx, IMM[1].xxxx
22: F2I TEMP[4].x, TEMP[4].xxxx
23: IDIV TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx
24: I2F TEMP[4].x, TEMP[4].xxxx
25: ADD TEMP[1].x, IN[0].xxxx, TEMP[4].xxxx
26: MOV TEMP[4].x, IN[0].xxxx
27: MOV TEMP[5].y, IMM[0].xxxx
28: MUL TEMP[1].xy, TEMP[1].xyyy, CONST[9].xyyy
29: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy
30: MOV TEMP[1].xy, TEMP[1].xyyy
31: TEX TEMP[1].x, TEMP[1], SAMP[2], 2D
32: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy
33: F2I TEMP[1].x, TEMP[1].xxxx
34: I2F TEMP[1].x, TEMP[1].xxxx
35: MUL TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz
36: MOV TEMP[1].y, IMM[0].xxxx
37: MOV TEMP[1].x, TEMP[2].xxxx
38: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy
39: MOV TEMP[1].xy, TEMP[1].xyyy
40: TEX TEMP[1].y, TEMP[1], SAMP[2], 2D
41: ADD TEMP[1].x, TEMP[1].yyyy, IMM[0].zzzz
42: MUL TEMP[1].x, CONST[0].yyyy, TEMP[1].xxxx
43: MAD TEMP[1].x, TEMP[1].xxxx, CONST[7].xxxx, IN[0].yyyy
44: MOV TEMP[4].y, TEMP[1].xxxx
45: MUL TEMP[1].x, CONST[9].xxxx, IMM[1].zzzz
46: MOV TEMP[1].y, CONST[9].yyyy
47: MAD TEMP[1].xy, TEMP[4].xyyy, TEMP[1].xyyy, TEMP[5].xyyy
48: MUL TEMP[2].x, IN[0].yyyy, CONST[8].xxxx
49: MUL TEMP[2].x, TEMP[2].xxxx, IMM[1].wwww
50: SIN TEMP[2].x, TEMP[2].xxxx
51: MUL TEMP[4].xy, CONST[0].xyyy, IMM[1].yyyy
52: MOV TEMP[5].y, IMM[0].xxxx
53: MUL TEMP[5].x, CONST[9].xxxx, TEMP[4].xxxx
54: MUL TEMP[5].xy, TEMP[5].xyyy, IMM[0].yyyy
55: MOV TEMP[5].xy, TEMP[5].xyyy
56: TEX TEMP[5].y, TEMP[5], SAMP[2], 2D
57: ADD TEMP[5].x, TEMP[5].yyyy, IMM[0].zzzz
58: MUL TEMP[5].x, CONST[0].yyyy, TEMP[5].xxxx
59: MAD TEMP[5].x, TEMP[5].xxxx, CONST[7].xxxx, TEMP[4].yyyy
60: MOV TEMP[4].y, TEMP[5].xxxx
61: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx
62: F2I TEMP[3].x, TEMP[3].xxxx
63: IDIV TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx
64: I2F TEMP[3].x, TEMP[3].xxxx
65: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[3].xxxx
66: MUL TEMP[0].xy, TEMP[0].xyyy, CONST[9].xyyy
67: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy
68: MOV TEMP[0].xy, TEMP[0].xyyy
69: TEX TEMP[0].x, TEMP[0], SAMP[1], 2D
70: MOV TEMP[3].xy, IN[0].xyyy
71: TEX TEMP[3], TEMP[3], SAMP[0], 2D
72: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[0].xxxx
73: MOV TEMP[1].xy, TEMP[1].xyyy
74: MOV TEMP[1].w, IMM[0].xxxx
75: TXL TEMP[1], TEMP[1], SAMP[3], 2D
76: MUL TEMP[0], TEMP[0].xxxx, TEMP[1]
77: MAD TEMP[0], TEMP[2].xxxx, IMM[3].xxxx, TEMP[0]
78: MUL TEMP[0], TEMP[3], TEMP[0]
79: MUL TEMP[0], TEMP[0], CONST[6].xxxx
80: MUL TEMP[0], TEMP[0], CONST[1]
81: MAD TEMP[1].x, TEMP[2].xxxx, IMM[0].zzzz, IMM[3].yyyy
82: MUL TEMP[0], TEMP[0], TEMP[1].xxxx
83: MUL TEMP[1].xy, TEMP[4].xyyy, CONST[9].xyyy
84: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy
85: MOV TEMP[1].xy, TEMP[1].xyyy
86: TEX TEMP[1].x, TEMP[1], SAMP[2], 2D
87: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].yyyy
88: F2I TEMP[1].x, TEMP[1].xxxx
89: I2F TEMP[1].x, TEMP[1].xxxx
90: MUL TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz
91: MAD TEMP[1].x, TEMP[1].xxxx, CONST[10].xxxx, IMM[3].yyyy
92: MUL TEMP[0], TEMP[0], TEMP[1].xxxx
93: MOV OUT[0], TEMP[0]
94: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%36 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0
%38 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0
%40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%41 = bitcast <8 x i32> addrspace(2)* %40 to <32 x i8> addrspace(2)*
%42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0
%43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%44 = bitcast <4 x i32> addrspace(2)* %43 to <16 x i8> addrspace(2)*
%45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0
%46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0
%48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0
%50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)*
%52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0
%53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)*
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%57 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%58 = fmul float %33, %56
%59 = fmul float %58, 1.562500e-02
%60 = bitcast float %59 to i32
%61 = insertelement <2 x i32> undef, i32 %60, i32 0
%62 = insertelement <2 x i32> %61, i32 0, i32 1
%63 = bitcast <8 x i32> %47 to <32 x i8>
%64 = bitcast <4 x i32> %49 to <16 x i8>
%65 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %62, <32 x i8> %63, <16 x i8> %64, i32 2)
%66 = extractelement <4 x float> %65, i32 1
%67 = fadd float %66, 0x3FD3333340000000
%68 = fmul float %25, %67
%69 = fmul float %68, %31
%70 = fadd float %69, %57
%71 = fmul float %58, 1.562500e-02
%72 = bitcast float %71 to i32
%73 = insertelement <2 x i32> undef, i32 %72, i32 0
%74 = insertelement <2 x i32> %73, i32 0, i32 1
%75 = bitcast <8 x i32> %47 to <32 x i8>
%76 = bitcast <4 x i32> %49 to <16 x i8>
%77 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %74, <32 x i8> %75, <16 x i8> %76, i32 2)
%78 = extractelement <4 x float> %77, i32 1
%79 = fadd float %78, 0x3FD3333340000000
%80 = fmul float %25, %79
%81 = fmul float %80, %31
%82 = fadd float %81, %57
%83 = fmul float %25, 4.000000e+00
%84 = fmul float %83, 6.400000e+01
%85 = fptosi float %84 to i32
%86 = sdiv i32 %85, 64
%87 = sitofp i32 %86 to float
%88 = fadd float %56, %87
%89 = fmul float %88, %33
%90 = fmul float %82, %34
%91 = fmul float %89, 1.562500e-02
%92 = fmul float %90, 1.562500e-02
%93 = bitcast float %91 to i32
%94 = bitcast float %92 to i32
%95 = insertelement <2 x i32> undef, i32 %93, i32 0
%96 = insertelement <2 x i32> %95, i32 %94, i32 1
%97 = bitcast <8 x i32> %47 to <32 x i8>
%98 = bitcast <4 x i32> %49 to <16 x i8>
%99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2)
%100 = extractelement <4 x float> %99, i32 0
%101 = fmul float %100, 1.000000e+01
%102 = fptosi float %101 to i32
%103 = sitofp i32 %102 to float
%104 = fmul float %103, 0x3FB99999A0000000
%105 = fmul float %58, 1.562500e-02
%106 = bitcast float %105 to i32
%107 = insertelement <2 x i32> undef, i32 %106, i32 0
%108 = insertelement <2 x i32> %107, i32 0, i32 1
%109 = bitcast <8 x i32> %47 to <32 x i8>
%110 = bitcast <4 x i32> %49 to <16 x i8>
%111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %108, <32 x i8> %109, <16 x i8> %110, i32 2)
%112 = extractelement <4 x float> %111, i32 1
%113 = fadd float %112, 0x3FD3333340000000
%114 = fmul float %25, %113
%115 = fmul float %114, %31
%116 = fadd float %115, %57
%117 = fmul float %33, 0x3FB99999A0000000
%118 = fmul float %56, %117
%119 = fadd float %118, %104
%120 = fmul float %116, %34
%121 = fadd float %120, 0.000000e+00
%122 = fmul float %57, %32
%123 = fmul float %122, 0x400921FA00000000
%124 = call float @llvm.sin.f32(float %123)
%125 = fmul float %24, 1.000000e+01
%126 = fmul float %25, 1.000000e+01
%127 = fmul float %33, %125
%128 = fmul float %127, 1.562500e-02
%129 = bitcast float %128 to i32
%130 = insertelement <2 x i32> undef, i32 %129, i32 0
%131 = insertelement <2 x i32> %130, i32 0, i32 1
%132 = bitcast <8 x i32> %47 to <32 x i8>
%133 = bitcast <4 x i32> %49 to <16 x i8>
%134 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %131, <32 x i8> %132, <16 x i8> %133, i32 2)
%135 = extractelement <4 x float> %134, i32 1
%136 = fadd float %135, 0x3FD3333340000000
%137 = fmul float %25, %136
%138 = fmul float %137, %31
%139 = fadd float %138, %126
%140 = fmul float %83, 6.400000e+01
%141 = fptosi float %140 to i32
%142 = sdiv i32 %141, 64
%143 = sitofp i32 %142 to float
%144 = fadd float %125, %143
%145 = fmul float %56, %33
%146 = fmul float %70, %34
%147 = fmul float %145, 1.562500e-02
%148 = fmul float %146, 1.562500e-02
%149 = bitcast float %147 to i32
%150 = bitcast float %148 to i32
%151 = insertelement <2 x i32> undef, i32 %149, i32 0
%152 = insertelement <2 x i32> %151, i32 %150, i32 1
%153 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %152, <32 x i8> %42, <16 x i8> %45, i32 2)
%154 = extractelement <4 x float> %153, i32 0
%155 = bitcast float %56 to i32
%156 = bitcast float %57 to i32
%157 = insertelement <2 x i32> undef, i32 %155, i32 0
%158 = insertelement <2 x i32> %157, i32 %156, i32 1
%159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %37, <16 x i8> %39, i32 2)
%160 = extractelement <4 x float> %159, i32 0
%161 = extractelement <4 x float> %159, i32 1
%162 = extractelement <4 x float> %159, i32 2
%163 = extractelement <4 x float> %159, i32 3
%164 = fmul float %154, %154
%165 = bitcast float %119 to i32
%166 = bitcast float %121 to i32
%167 = insertelement <4 x i32> undef, i32 %165, i32 0
%168 = insertelement <4 x i32> %167, i32 %166, i32 1
%169 = insertelement <4 x i32> %168, i32 0, i32 2
%170 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %169, <32 x i8> %52, <16 x i8> %55, i32 2)
%171 = extractelement <4 x float> %170, i32 0
%172 = extractelement <4 x float> %170, i32 1
%173 = extractelement <4 x float> %170, i32 2
%174 = extractelement <4 x float> %170, i32 3
%175 = fmul float %164, %171
%176 = fmul float %164, %172
%177 = fmul float %164, %173
%178 = fmul float %164, %174
%179 = fmul float %124, 0x3FC99999A0000000
%180 = fadd float %179, %175
%181 = fmul float %124, 0x3FC99999A0000000
%182 = fadd float %181, %176
%183 = fmul float %124, 0x3FC99999A0000000
%184 = fadd float %183, %177
%185 = fmul float %124, 0x3FC99999A0000000
%186 = fadd float %185, %178
%187 = fmul float %160, %180
%188 = fmul float %161, %182
%189 = fmul float %162, %184
%190 = fmul float %163, %186
%191 = fmul float %187, %30
%192 = fmul float %188, %30
%193 = fmul float %189, %30
%194 = fmul float %190, %30
%195 = fmul float %191, %26
%196 = fmul float %192, %27
%197 = fmul float %193, %28
%198 = fmul float %194, %29
%199 = fmul float %124, 0x3FD3333340000000
%200 = fadd float %199, 1.000000e+00
%201 = fmul float %195, %200
%202 = fmul float %196, %200
%203 = fmul float %197, %200
%204 = fmul float %198, %200
%205 = fmul float %144, %33
%206 = fmul float %139, %34
%207 = fmul float %205, 1.562500e-02
%208 = fmul float %206, 1.562500e-02
%209 = bitcast float %207 to i32
%210 = bitcast float %208 to i32
%211 = insertelement <2 x i32> undef, i32 %209, i32 0
%212 = insertelement <2 x i32> %211, i32 %210, i32 1
%213 = bitcast <8 x i32> %47 to <32 x i8>
%214 = bitcast <4 x i32> %49 to <16 x i8>
%215 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %213, <16 x i8> %214, i32 2)
%216 = extractelement <4 x float> %215, i32 0
%217 = fmul float %216, 1.000000e+01
%218 = fptosi float %217 to i32
%219 = sitofp i32 %218 to float
%220 = fmul float %219, 0x3FB99999A0000000
%221 = fmul float %220, %35
%222 = fadd float %221, 1.000000e+00
%223 = fmul float %201, %222
%224 = fmul float %202, %222
%225 = fmul float %203, %222
%226 = fmul float %204, %222
%227 = call i32 @llvm.SI.packf16(float %223, float %224)
%228 = bitcast i32 %227 to float
%229 = call i32 @llvm.SI.packf16(float %225, float %226)
%230 = bitcast i32 %229 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %228, float %230, float %228, float %230)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.sin.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_mov_b32_e32 v2, 0x3c800000 ; 7E0402FF 3C800000
v_mov_b32_e32 v3, 0x3e99999a ; 7E0602FF 3E99999A
v_mov_b32_e32 v4, 0x41200000 ; 7E0802FF 41200000
v_mov_b32_e32 v5, 0x3dcccccd ; 7E0A02FF 3DCCCCCD
v_mov_b32_e32 v6, 0x3e4ccccd ; 7E0C02FF 3E4CCCCD
v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000
v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001
v_interp_p1_f32 v8, v0, 1, 0, [m0] ; C8200100
v_interp_p2_f32 v8, [v8], v1, 1, 0, [m0] ; C8210101
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504
s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508
s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
s_load_dwordx8 s[48:55], s[6:7], 0x8 ; C0D80708
s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710
s_load_dwordx8 s[24:31], s[6:7], 0x18 ; C0CC0718
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s56, s[0:3], 0x6 ; C21C0106
s_buffer_load_dword s57, s[0:3], 0x25 ; C21C8125
s_buffer_load_dword s58, s[0:3], 0x28 ; C21D0128
s_buffer_load_dword s59, s[0:3], 0x24 ; C21D8124
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e64 v0, 4.0, s5 ; D2100000 00000AF6
v_mul_f32_e32 v0, 0x42800000, v0 ; 100000FF 42800000
v_cvt_i32_f32_e32 v0, v0 ; 7E001100
v_ashrrev_i32_e32 v1, 31, v0 ; 3002009F
v_lshrrev_b32_e32 v1, 26, v1 ; 2C02029A
v_add_i32_e32 v0, v0, v1 ; 4A000300
s_buffer_load_dword s60, s[0:3], 0x1c ; C21E011C
v_mul_f32_e32 v1, s59, v7 ; 10020E3B
v_mul_f32_e32 v9, v2, v1 ; 10120302
v_mov_b32_e32 v10, 0 ; 7E140280
image_sample v1, 2, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[12:19], s[8:11] ; F0800200 00430109
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_add_f32_e32 v1, v3, v1 ; 06020303
v_ashrrev_i32_e32 v0, 6, v0 ; 30000086
v_cvt_f32_i32_e32 v0, v0 ; 7E000B00
v_mul_f32_e32 v1, s5, v1 ; 10020205
s_buffer_load_dword s61, s[0:3], 0x20 ; C21E8120
v_mad_f32 v1, s60, v1, v8 ; D2820001 0422023C
v_add_f32_e32 v11, v0, v7 ; 06160F00
v_mul_f32_e32 v11, s59, v11 ; 1016163B
v_mul_f32_e32 v12, s57, v1 ; 10180239
v_mul_f32_e32 v13, v2, v11 ; 101A1702
v_mul_f32_e32 v14, v2, v12 ; 101C1902
image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[8:11] ; F0800100 00430B0D
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v11, v4, v11 ; 10161704
v_cvt_i32_f32_e32 v11, v11 ; 7E16110B
v_mul_f32_e32 v12, s4, v4 ; 10180804
v_mul_f32_e32 v12, s59, v12 ; 1018183B
v_mul_f32_e32 v12, v2, v12 ; 10181902
v_cvt_f32_i32_e32 v11, v11 ; 7E160B0B
v_mov_b32_e32 v13, v10 ; 7E1A030A
image_sample v12, 2, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[12:19], s[8:11] ; F0800200 00430C0C
v_mov_b32_e32 v13, v9 ; 7E1A0309
image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[48:55], s[44:47] ; F0800100 016C090D
v_mul_f32_e32 v13, s59, v5 ; 101A0A3B
v_mul_f32_e32 v14, v5, v11 ; 101C1705
v_mac_f32_e32 v14, v13, v7 ; 3E1C0F0D
v_mad_f32 v15, v1, s57, 0 ; D282000F 02007301
image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[36:43], s[32:35] ; F0800F00 01091107
v_mov_b32_e32 v16, v10 ; 7E20030A
s_waitcnt vmcnt(0) ; BF8C0770
image_sample_l v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[24:31], s[20:23] ; F0900F00 00A60D0E
v_add_f32_e32 v1, v3, v12 ; 06021903
v_mul_f32_e32 v1, s5, v1 ; 10020205
v_mul_f32_e32 v7, s5, v4 ; 100E0805
v_mac_f32_e32 v7, s60, v1 ; 3E0E023C
v_mac_f32_e32 v0, s4, v4 ; 3E000804
v_mul_f32_e32 v0, s59, v0 ; 1000003B
v_mul_f32_e32 v1, s57, v7 ; 10020E39
v_mul_f32_e32 v10, v2, v0 ; 10140102
v_mul_f32_e32 v11, v2, v1 ; 10160302
image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[12:19], s[8:11] ; F0800100 0043000A
v_mul_f32_e32 v1, v9, v9 ; 10021309
s_waitcnt vmcnt(1) ; BF8C0771
v_mul_f32_e32 v2, v13, v1 ; 1004030D
v_mul_f32_e32 v7, v14, v1 ; 100E030E
v_mul_f32_e32 v9, v15, v1 ; 1012030F
v_mul_f32_e32 v1, v16, v1 ; 10020310
v_mul_f32_e32 v8, s61, v8 ; 1010103D
v_mul_f32_e32 v8, 0x40490fd0, v8 ; 101010FF 40490FD0
v_mul_f32_e32 v8, 0x3e22f983, v8 ; 101010FF 3E22F983
v_fract_f32_e32 v8, v8 ; 7E104108
v_sin_f32_e32 v8, v8 ; 7E106B08
s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118
v_mac_f32_e32 v2, v6, v8 ; 3E041106
v_mac_f32_e32 v7, v6, v8 ; 3E0E1106
v_mac_f32_e32 v9, v6, v8 ; 3E121106
v_mac_f32_e32 v1, v6, v8 ; 3E021106
v_mul_f32_e32 v2, v2, v17 ; 10042302
v_mul_f32_e32 v6, v7, v18 ; 100C2507
v_mul_f32_e32 v7, v9, v19 ; 100E2709
v_mul_f32_e32 v1, v1, v20 ; 10022901
s_buffer_load_dword s0, s[0:3], 0x7 ; C2000107
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v2, s4, v2 ; 10040404
v_mul_f32_e32 v6, s4, v6 ; 100C0C04
v_mul_f32_e32 v7, s4, v7 ; 100E0E04
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mul_f32_e32 v2, s6, v2 ; 10040406
v_mul_f32_e32 v6, s7, v6 ; 100C0C07
v_mul_f32_e32 v7, s56, v7 ; 100E0E38
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_cvt_i32_f32_e32 v0, v0 ; 7E001100
v_mul_f32_e32 v1, s0, v1 ; 10020200
v_mad_f32 v3, v8, v3, 1.0 ; D2820003 03CA0708
v_mul_f32_e32 v2, v3, v2 ; 10040503
v_cvt_f32_i32_e32 v0, v0 ; 7E000B00
v_mul_f32_e32 v4, v3, v6 ; 10080D03
v_mul_f32_e32 v6, v3, v7 ; 100C0F03
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_mul_f32_e32 v0, v5, v0 ; 10000105
v_mad_f32 v0, v0, s58, 1.0 ; D2820000 03C87500
v_mul_f32_e32 v2, v0, v2 ; 10040500
v_mul_f32_e32 v3, v0, v4 ; 10060900
v_mul_f32_e32 v4, v0, v6 ; 10080D00
v_mul_f32_e32 v0, v0, v1 ; 10000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 24
Code Size: 608 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..4]
DCL TEMP[0..2], LOCAL
0: MUL TEMP[0], CONST[1], IN[0].xxxx
1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[2].xyxx
5: MAD TEMP[2].xy, IN[0].xyyy, CONST[0].xyyy, CONST[0].zwww
6: MOV TEMP[1].zw, TEMP[2].yyxy
7: MOV OUT[2], TEMP[1]
8: MOV OUT[0], TEMP[0]
9: MOV OUT[1], IN[1]
10: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0
%35 = add i32 %5, %7
%36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35)
%37 = extractelement <4 x float> %36, i32 0
%38 = extractelement <4 x float> %36, i32 1
%39 = extractelement <4 x float> %36, i32 2
%40 = extractelement <4 x float> %36, i32 3
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = extractelement <4 x float> %44, i32 2
%48 = extractelement <4 x float> %44, i32 3
%49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0
%51 = add i32 %5, %7
%52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %51)
%53 = extractelement <4 x float> %52, i32 0
%54 = extractelement <4 x float> %52, i32 1
%55 = fmul float %17, %37
%56 = fmul float %18, %37
%57 = fmul float %19, %37
%58 = fmul float %20, %37
%59 = fmul float %21, %38
%60 = fadd float %59, %55
%61 = fmul float %22, %38
%62 = fadd float %61, %56
%63 = fmul float %23, %38
%64 = fadd float %63, %57
%65 = fmul float %24, %38
%66 = fadd float %65, %58
%67 = fmul float %25, %39
%68 = fadd float %67, %60
%69 = fmul float %26, %39
%70 = fadd float %69, %62
%71 = fmul float %27, %39
%72 = fadd float %71, %64
%73 = fmul float %28, %39
%74 = fadd float %73, %66
%75 = fmul float %29, %40
%76 = fadd float %75, %68
%77 = fmul float %30, %40
%78 = fadd float %77, %70
%79 = fmul float %31, %40
%80 = fadd float %79, %72
%81 = fmul float %32, %40
%82 = fadd float %81, %74
%83 = fmul float %37, %13
%84 = fadd float %83, %15
%85 = fmul float %38, %14
%86 = fadd float %85, %16
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %45, float %46, float %47, float %48)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %53, float %54, float %84, float %86)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %76, float %78, float %80, float %82)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s4 ; 7E000204
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v5, s5 ; 7E0A0205
v_mac_f32_e32 v0, s6, v1 ; 3E000206
v_mac_f32_e32 v5, s7, v2 ; 3E0A0407
exp 15, 33, 0, 0, 0, v9, v10, v0, v5 ; F800021F 05000A09
s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105
s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106
s_buffer_load_dword s6, s[0:3], 0x7 ; C2030107
s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108
s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109
s_buffer_load_dword s10, s[0:3], 0xa ; C205010A
s_buffer_load_dword s11, s[0:3], 0xb ; C205810B
s_buffer_load_dword s12, s[0:3], 0xc ; C206010C
s_buffer_load_dword s13, s[0:3], 0xd ; C206810D
s_buffer_load_dword s14, s[0:3], 0xe ; C207010E
s_buffer_load_dword s15, s[0:3], 0xf ; C207810F
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s8, v1 ; 10000208
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s7, v2 ; 3E000407
v_mul_f32_e32 v5, s4, v1 ; 100A0204
v_mac_f32_e32 v5, s9, v2 ; 3E0A0409
v_mul_f32_e32 v6, s5, v1 ; 100C0205
v_mac_f32_e32 v6, s10, v2 ; 3E0C040A
v_mul_f32_e32 v1, s6, v1 ; 10020206
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v5, s13, v3 ; 3E0A060D
v_mac_f32_e32 v6, s14, v3 ; 3E0C060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v5, s17, v4 ; 3E0A0811
v_mac_f32_e32 v6, s18, v4 ; 3E0C0812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 256 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[1]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: ABS TEMP[0].xy, IN[1].zwww
1: ADD TEMP[0].xy, IMM[0].xxxx, -TEMP[0].xyyy
2: MUL TEMP[0].xy, TEMP[0].xyyy, CONST[1].xyyy
3: MOV TEMP[1].xy, IN[1].xyyy
4: TEX TEMP[1], TEMP[1], SAMP[0], 2D
5: MUL TEMP[1], TEMP[1], IN[0]
6: MOV TEMP[2].xyz, TEMP[1].xyzx
7: MIN TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy
8: MOV_SAT TEMP[0].x, TEMP[0].xxxx
9: MUL TEMP[0].x, TEMP[1].wwww, TEMP[0].xxxx
10: MOV TEMP[2].w, TEMP[0].xxxx
11: MOV OUT[0], TEMP[2]
12: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0
%28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0
%30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%38 = call float @llvm.fabs.f32(float %36)
%39 = call float @llvm.fabs.f32(float %37)
%40 = fsub float 1.000000e+00, %38
%41 = fsub float 1.000000e+00, %39
%42 = fmul float %40, %24
%43 = fmul float %41, %25
%44 = bitcast float %34 to i32
%45 = bitcast float %35 to i32
%46 = insertelement <2 x i32> undef, i32 %44, i32 0
%47 = insertelement <2 x i32> %46, i32 %45, i32 1
%48 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %47, <32 x i8> %27, <16 x i8> %29, i32 2)
%49 = extractelement <4 x float> %48, i32 0
%50 = extractelement <4 x float> %48, i32 1
%51 = extractelement <4 x float> %48, i32 2
%52 = extractelement <4 x float> %48, i32 3
%53 = fmul float %49, %30
%54 = fmul float %50, %31
%55 = fmul float %51, %32
%56 = fmul float %52, %33
%57 = call float @llvm.minnum.f32(float %42, float %43)
%58 = call float @llvm.AMDIL.clamp.(float %57, float 0.000000e+00, float 1.000000e+00)
%59 = fmul float %56, %58
%60 = call i32 @llvm.SI.packf16(float %53, float %54)
%61 = bitcast i32 %60 to float
%62 = call i32 @llvm.SI.packf16(float %55, float %59)
%63 = bitcast i32 %62 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %61, float %63, float %61, float %63)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s0, s[0:3], 0x5 ; C2000105
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v0, v0, 3, 1, [m0] ; C8000700
v_interp_p2_f32 v0, [v0], v1, 3, 1, [m0] ; C8010701
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800F00 00430906
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v1, v2, v9 ; 10021302
v_mul_f32_e32 v2, v3, v10 ; 10041503
v_mul_f32_e32 v3, v4, v11 ; 10061704
v_mul_f32_e32 v4, v5, v12 ; 10081905
v_sub_f32_e64 v5, 1.0, |v8| ; D2080205 000210F2
v_mul_f32_e32 v5, s4, v5 ; 100A0A04
v_sub_f32_e64 v0, 1.0, |v0| ; D2080200 000200F2
v_mul_f32_e32 v0, s0, v0 ; 10000000
v_min_f32_e32 v0, v0, v5 ; 1E000B00
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 188 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xyz, IN[1].xyzx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = extractelement <4 x float> %40, i32 2
%44 = fmul float %13, %33
%45 = fmul float %14, %33
%46 = fmul float %15, %33
%47 = fmul float %16, %33
%48 = fmul float %17, %34
%49 = fadd float %48, %44
%50 = fmul float %18, %34
%51 = fadd float %50, %45
%52 = fmul float %19, %34
%53 = fadd float %52, %46
%54 = fmul float %20, %34
%55 = fadd float %54, %47
%56 = fmul float %21, %35
%57 = fadd float %56, %49
%58 = fmul float %22, %35
%59 = fadd float %58, %51
%60 = fmul float %23, %35
%61 = fadd float %60, %53
%62 = fmul float %24, %35
%63 = fadd float %62, %55
%64 = fmul float %25, %36
%65 = fadd float %64, %57
%66 = fmul float %26, %36
%67 = fadd float %66, %59
%68 = fmul float %27, %36
%69 = fadd float %68, %61
%70 = fmul float %28, %36
%71 = fadd float %70, %63
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %43, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %65, float %67, float %69, float %71)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v9, s13, v2 ; 1012040D
v_mac_f32_e32 v9, s7, v3 ; 3E120607
v_mul_f32_e32 v10, s4, v2 ; 10140404
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v9, s11, v4 ; 3E12080B
v_mac_f32_e32 v10, s14, v4 ; 3E14080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v9, s17, v5 ; 3E120A11
v_mac_f32_e32 v10, s18, v5 ; 3E140A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v8, v1 ; F800020F 01080706
exp 15, 12, 0, 1, 0, v0, v9, v10, v2 ; F80008CF 020A0900
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { -0.5000, 2.0000, 0.0000, 1.0000}
0: ADD TEMP[0].xy, IN[0].xyyy, IMM[0].xxxx
1: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[0].yyyy
2: MOV TEMP[1].zw, IMM[0].zzzz
3: DP2 TEMP[2].x, TEMP[0].xyyy, TEMP[0].xyyy
4: SQRT TEMP[2].x, TEMP[2].xxxx
5: ADD TEMP[2].xy, IMM[0].wwww, -TEMP[2].xxxx
6: MOV_SAT TEMP[2].xy, TEMP[2].xyyy
7: MUL TEMP[1].xy, TEMP[2].xyyy, TEMP[0].xyyy
8: MUL TEMP[0], TEMP[1], CONST[0].xxxx
9: MOV OUT[0], TEMP[0]
10: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%26 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%27 = fadd float %25, -5.000000e-01
%28 = fadd float %26, -5.000000e-01
%29 = fmul float %27, 2.000000e+00
%30 = fmul float %28, 2.000000e+00
%31 = fmul float %29, %29
%32 = fmul float %30, %30
%33 = fadd float %31, %32
%34 = call float @llvm.sqrt.f32(float %33)
%35 = fsub float 1.000000e+00, %34
%36 = fsub float 1.000000e+00, %34
%37 = call float @llvm.AMDIL.clamp.(float %35, float 0.000000e+00, float 1.000000e+00)
%38 = call float @llvm.AMDIL.clamp.(float %36, float 0.000000e+00, float 1.000000e+00)
%39 = fmul float %37, %29
%40 = fmul float %38, %30
%41 = fmul float %39, %24
%42 = fmul float %40, %24
%43 = fmul float %24, 0.000000e+00
%44 = fmul float %24, 0.000000e+00
%45 = call i32 @llvm.SI.packf16(float %41, float %42)
%46 = bitcast i32 %45 to float
%47 = call i32 @llvm.SI.packf16(float %43, float %44)
%48 = bitcast i32 %47 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %46, float %48, float %46, float %48)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100
v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101
v_add_f32_e32 v1, -0.5, v2 ; 060204F1
v_add_f32_e32 v0, -0.5, v0 ; 060000F1
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
v_add_f32_e32 v1, v1, v1 ; 06020301
v_add_f32_e32 v0, v0, v0 ; 06000100
v_mul_f32_e32 v2, v0, v0 ; 10040100
v_mac_f32_e32 v2, v1, v1 ; 3E040301
v_sqrt_f32_e32 v2, v2 ; 7E046702
v_sub_f32_e32 v2, 1.0, v2 ; 080404F2
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_mul_f32_e32 v1, v1, v2 ; 10020501
v_mul_f32_e32 v0, v0, v2 ; 10000500
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s0, v1 ; 10020200
v_mul_f32_e32 v0, s0, v0 ; 10000000
v_mul_f32_e64 v2, 0, s0 ; D2100002 00000080
v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101
v_cvt_pkrtz_f16_f32_e32 v1, v2, v2 ; 5E020502
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 120 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..6]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { -0.2500, -0.5000, 2.0000, 0.0000}
IMM[1] FLT32 { 20.0000, 1.0000, 0.5000, -3.7600}
IMM[2] FLT32 { 0.1000, -0.0500, 0.0000, 1.0000}
0: ADD TEMP[0].xy, IN[0].xyyy, IMM[0].xyyy
1: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[0].zzzz
2: MUL TEMP[1].x, CONST[2].xxxx, CONST[4].xxxx
3: MOV TEMP[2].y, IMM[0].wwww
4: ABS TEMP[3].x, TEMP[0].yyyy
5: MUL TEMP[3].x, TEMP[1].xxxx, TEMP[3].xxxx
6: ADD TEMP[3].x, TEMP[0].xxxx, -TEMP[3].xxxx
7: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
8: MIN TEMP[3].x, TEMP[3].xxxx, CONST[5].xxxx
9: MOV TEMP[2].x, TEMP[3].xxxx
10: ADD TEMP[4].xy, TEMP[0].xyyy, -TEMP[2].xyyy
11: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[1].xxxx
12: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx, CONST[5].xxxx
13: RSQ TEMP[1].x, TEMP[1].xxxx
14: DP2 TEMP[4].x, TEMP[4].xyyy, TEMP[4].xyyy
15: SQRT TEMP[4].x, TEMP[4].xxxx
16: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx, TEMP[4].xxxx
17: ADD TEMP[0].xy, TEMP[0].xyyy, -TEMP[2].xyyy
18: MUL TEMP[2].xy, TEMP[0].xyyy, IMM[1].xxxx
19: MOV TEMP[3].xy, TEMP[2].xyxx
20: MUL TEMP[0].xy, TEMP[0].xyyy, IMM[1].xxxx
21: DP2 TEMP[0].x, TEMP[0].xyyy, TEMP[0].xyyy
22: SQRT TEMP[0].x, TEMP[0].xxxx
23: FSLT TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx
24: UIF TEMP[0].xxxx :0
25: DP2 TEMP[0].x, TEMP[2].xyyy, TEMP[2].xyyy
26: RSQ TEMP[0].x, TEMP[0].xxxx
27: MUL TEMP[3].xy, TEMP[2].xyyy, TEMP[0].xxxx
28: ENDIF
29: SQRT TEMP[0].x, TEMP[1].xxxx
30: MAD TEMP[0].x, CONST[3].xxxx, CONST[2].xxxx, -TEMP[0].xxxx
31: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww
32: MUL TEMP[1].x, CONST[0].xxxx, TEMP[0].xxxx
33: MAD TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz, IMM[1].wwww
34: ADD TEMP[2].xy, IN[0].xyyy, IMM[0].yyyy
35: MUL TEMP[0].x, CONST[0].xxxx, TEMP[0].xxxx
36: SIN TEMP[0].x, TEMP[0].xxxx
37: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1].xxxx
38: MAX TEMP[4].x, CONST[3].xxxx, IMM[2].xxxx
39: MUL TEMP[4].x, CONST[6].xxxx, TEMP[4].xxxx
40: RSQ TEMP[4].x, TEMP[4].xxxx
41: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx
42: MAD TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx, IMM[0].zzzz
43: MUL TEMP[1].x, CONST[6].yyyy, TEMP[1].xxxx
44: RCP TEMP[1].x, TEMP[1].xxxx
45: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx
46: DP2 TEMP[1].x, TEMP[2].xyyy, TEMP[2].xyyy
47: SQRT TEMP[1].x, TEMP[1].xxxx
48: ADD TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy
49: MOV_SAT TEMP[1].x, TEMP[1].xxxx
50: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz
51: ADD TEMP[1].x, IMM[1].yyyy, -TEMP[1].xxxx
52: MOV_SAT TEMP[1].x, TEMP[1].xxxx
53: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx
54: MUL TEMP[0].xy, TEMP[3].xyyy, TEMP[0].xxxx
55: MOV TEMP[1].zw, IMM[2].wwzw
56: MOV TEMP[1].xy, TEMP[0].xyxx
57: MOV OUT[0], TEMP[1]
58: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%34 = fadd float %32, -2.500000e-01
%35 = fadd float %33, -5.000000e-01
%36 = fmul float %34, 2.000000e+00
%37 = fmul float %35, 2.000000e+00
%38 = fmul float %26, %28
%39 = call float @llvm.fabs.f32(float %37)
%40 = fmul float %38, %39
%41 = fsub float %36, %40
%42 = call float @llvm.maxnum.f32(float %41, float 0.000000e+00)
%43 = call float @llvm.minnum.f32(float %42, float %29)
%44 = fsub float %36, %43
%45 = fmul float %43, %38
%46 = fmul float %38, %38
%47 = fadd float %46, %29
%48 = call float @llvm.AMDGPU.rsq.clamped.f32(float %47)
%49 = fmul float %44, %44
%50 = fmul float %37, %37
%51 = fadd float %49, %50
%52 = call float @llvm.sqrt.f32(float %51)
%53 = fmul float %45, %48
%54 = fadd float %53, %52
%55 = fsub float %36, %43
%56 = fmul float %55, 2.000000e+01
%57 = fmul float %37, 2.000000e+01
%58 = fmul float %55, 2.000000e+01
%59 = fmul float %37, 2.000000e+01
%60 = fmul float %58, %58
%61 = fmul float %59, %59
%62 = fadd float %60, %61
%63 = call float @llvm.sqrt.f32(float %62)
%64 = fcmp ogt float %63, 1.000000e+00
br i1 %64, label %IF, label %ENDIF
IF: ; preds = %main_body
%65 = fmul float %56, %56
%66 = fmul float %57, %57
%67 = fadd float %65, %66
%68 = call float @llvm.AMDGPU.rsq.clamped.f32(float %67)
%69 = fmul float %56, %68
%70 = fmul float %57, %68
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp12.0 = phi float [ %69, %IF ], [ %56, %main_body ]
%temp13.0 = phi float [ %70, %IF ], [ %57, %main_body ]
%71 = call float @llvm.sqrt.f32(float %54)
%72 = fmul float %27, %26
%73 = fsub float %72, %71
%74 = call float @llvm.maxnum.f32(float %73, float 0.000000e+00)
%75 = fmul float %24, %74
%76 = fmul float %75, 5.000000e-01
%77 = fadd float %76, 0xC00E147AE0000000
%78 = fadd float %32, -5.000000e-01
%79 = fadd float %33, -5.000000e-01
%80 = fmul float %24, %74
%81 = call float @llvm.sin.f32(float %80)
%82 = fmul float %81, %25
%83 = call float @llvm.maxnum.f32(float %27, float 0x3FB99999A0000000)
%84 = fmul float %30, %83
%85 = call float @llvm.AMDGPU.rsq.clamped.f32(float %84)
%86 = fmul float %82, %85
%87 = fmul float %77, %77
%88 = fadd float %87, 2.000000e+00
%89 = fmul float %31, %88
%90 = fdiv float 1.000000e+00, %89
%91 = fmul float %86, %90
%92 = fmul float %78, %78
%93 = fmul float %79, %79
%94 = fadd float %92, %93
%95 = call float @llvm.sqrt.f32(float %94)
%96 = fadd float %95, 0xBFA99999A0000000
%97 = call float @llvm.AMDIL.clamp.(float %96, float 0.000000e+00, float 1.000000e+00)
%98 = fmul float %97, 2.000000e+00
%99 = fsub float 1.000000e+00, %98
%100 = call float @llvm.AMDIL.clamp.(float %99, float 0.000000e+00, float 1.000000e+00)
%101 = fmul float %91, %100
%102 = fmul float %temp12.0, %101
%103 = fmul float %temp13.0, %101
%104 = call i32 @llvm.SI.packf16(float %102, float %103)
%105 = bitcast i32 %104 to float
%106 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 1.000000e+00)
%107 = bitcast i32 %106 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %105, float %107, float %105, float %107)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sin.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_mov_b32 m0, s9 ; BEFC0309
v_mov_b32_e32 v3, 0xbe800000 ; 7E0602FF BE800000
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s3, s[12:15], 0x0 ; C2018D00
s_buffer_load_dword s0, s[12:15], 0x4 ; C2000D04
s_buffer_load_dword s4, s[12:15], 0x8 ; C2020D08
s_buffer_load_dword s5, s[12:15], 0xc ; C2028D0C
s_buffer_load_dword s6, s[12:15], 0x10 ; C2030D10
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100
s_buffer_load_dword s7, s[12:15], 0x14 ; C2038D14
v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101
v_add_f32_e32 v0, v2, v3 ; 06000702
v_add_f32_e32 v1, -0.5, v4 ; 060208F1
v_add_f32_e32 v7, v0, v0 ; 060E0100
v_add_f32_e32 v5, v1, v1 ; 060A0301
s_buffer_load_dword s2, s[12:15], 0x18 ; C2010D18
s_buffer_load_dword s1, s[12:15], 0x19 ; C2008D19
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v3, s5 ; 7E060205
v_mov_b32_e32 v1, s6 ; 7E020206
v_mul_f32_e32 v8, s4, v1 ; 10100204
v_mad_f32 v1, -v8, |v5|, v7 ; D2820201 241E0B08
v_mad_f32 v6, v8, v8, s7 ; D2820006 001E1108
v_max_f32_e32 v1, 0, v1 ; 20020280
v_min_f32_e32 v9, s7, v1 ; 1E120207
v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906
v_mad_f32 v0, 2.0, v0, -v9 ; D2820000 842600F4
v_mov_b32_e32 v10, 0x41a00000 ; 7E1402FF 41A00000
v_mul_f32_e32 v1, v10, v0 ; 1002010A
v_mul_f32_e32 v0, v10, v5 ; 10000B0A
v_mul_f32_e32 v10, v0, v0 ; 10140100
v_mac_f32_e32 v10, v1, v1 ; 3E140301
v_sqrt_f32_e32 v10, v10 ; 7E14670A
v_cmp_lt_f32_e32 vcc, 1.0, v10 ; 7C0214F2
s_and_saveexec_b64 s[6:7], vcc ; BE86246A
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
v_mul_f32_e32 v10, v0, v0 ; 10140100
v_mac_f32_e32 v10, v1, v1 ; 3E140301
v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A
v_mul_f32_e32 v1, v10, v1 ; 1002030A
v_mul_f32_e32 v0, v10, v0 ; 1000010A
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_subrev_f32_e32 v7, v9, v7 ; 0A0E0F09
v_mul_f32_e32 v8, v8, v9 ; 10101308
v_mul_f32_e32 v5, v5, v5 ; 100A0B05
v_mac_f32_e32 v5, v7, v7 ; 3E0A0F07
v_sqrt_f32_e32 v5, v5 ; 7E0A6705
v_mac_f32_e32 v5, v6, v8 ; 3E0A1106
v_sqrt_f32_e32 v5, v5 ; 7E0A6705
v_mad_f32 v5, s4, v3, -v5 ; D2820005 84160604
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_mul_f32_e32 v5, s3, v5 ; 100A0A03
v_madak_f32_e32 v6, 0.5, v5, 0xc070a3d7 ; 420C0AF0 C070A3D7
v_add_f32_e32 v2, -0.5, v2 ; 060404F1
v_add_f32_e32 v4, -0.5, v4 ; 060808F1
v_mul_f32_e32 v5, 0x3e22f983, v5 ; 100A0AFF 3E22F983
v_fract_f32_e32 v5, v5 ; 7E0A4105
v_max_f32_e32 v3, 0x3dcccccd, v3 ; 200606FF 3DCCCCCD
v_mul_f32_e32 v3, s2, v3 ; 10060602
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mad_f32 v6, v6, v6, 2.0 ; D2820006 03D20D06
v_mul_f32_e32 v6, s1, v6 ; 100C0C01
v_rcp_f32_e32 v6, v6 ; 7E0C5506
v_sin_f32_e32 v5, v5 ; 7E0A6B05
v_mul_f32_e32 v5, s0, v5 ; 100A0A00
v_mul_f32_e32 v3, v3, v5 ; 10060B03
v_mul_f32_e32 v3, v6, v3 ; 10060706
v_mul_f32_e32 v4, v4, v4 ; 10080904
v_mac_f32_e32 v4, v2, v2 ; 3E080502
v_sqrt_f32_e32 v2, v4 ; 7E046704
v_mov_b32_e32 v4, 0xbd4ccccd ; 7E0802FF BD4CCCCD
v_add_f32_e32 v2, v2, v4 ; 06040902
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_mad_f32 v2, -2.0, v2, 1.0 ; D2820002 03CA04F5
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_mul_f32_e32 v2, v2, v3 ; 10040702
v_mul_f32_e32 v1, v2, v1 ; 10020302
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101
v_cvt_pkrtz_f16_f32_e64 v1, 0, 1.0 ; D25E0001 0001E480
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 404 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL CONST[0..12]
DCL TEMP[0..5], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].x, CONST[4].xxxx
5: MOV TEMP[1].y, CONST[5].xxxx
6: MOV TEMP[1].z, CONST[6].xxxx
7: MOV TEMP[2].x, CONST[4].yyyy
8: MOV TEMP[2].y, CONST[5].yyyy
9: MOV TEMP[2].z, CONST[6].yyyy
10: MOV TEMP[3].x, CONST[4].zzzz
11: MOV TEMP[3].y, CONST[5].zzzz
12: MOV TEMP[3].z, CONST[6].zzzz
13: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx
14: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz
15: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz
16: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
17: RSQ TEMP[2].x, TEMP[2].xxxx
18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
19: MUL TEMP[2].xyz, CONST[0].xyzz, IN[3].xxxx
20: MAD TEMP[2].xyz, CONST[1].xyzz, IN[3].yyyy, TEMP[2].xyzz
21: MAD TEMP[2].xyz, CONST[2].xyzz, IN[3].zzzz, TEMP[2].xyzz
22: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
23: RSQ TEMP[3].x, TEMP[3].xxxx
24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
25: MUL TEMP[3].xyz, TEMP[1].zxyy, TEMP[2].yzxx
26: MAD TEMP[3].xyz, TEMP[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz
27: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[3].wwww
28: MOV TEMP[4].x, TEMP[2].xxxx
29: MOV TEMP[4].y, TEMP[3].xxxx
30: MOV TEMP[4].z, TEMP[1].xxxx
31: MOV TEMP[4].w, TEMP[0].xxxx
32: MOV TEMP[5].x, TEMP[2].yyyy
33: MOV TEMP[5].y, TEMP[3].yyyy
34: MOV TEMP[5].z, TEMP[1].yyyy
35: MOV TEMP[5].w, TEMP[0].yyyy
36: MOV TEMP[2].x, TEMP[2].zzzz
37: MOV TEMP[2].y, TEMP[3].zzzz
38: MOV TEMP[2].z, TEMP[1].zzzz
39: MOV TEMP[2].w, TEMP[0].zzzz
40: MUL TEMP[0], CONST[9], IN[0].xxxx
41: MAD TEMP[0], CONST[10], IN[0].yyyy, TEMP[0]
42: MAD TEMP[0], CONST[11], IN[0].zzzz, TEMP[0]
43: MAD TEMP[0], CONST[12], IN[0].wwww, TEMP[0]
44: MAD TEMP[1].xy, IN[2].xyyy, CONST[8].xyyy, CONST[8].zwww
45: MOV OUT[1], TEMP[4]
46: MOV OUT[4], TEMP[1]
47: MOV OUT[2], TEMP[5]
48: MOV OUT[3], TEMP[2]
49: MOV OUT[0], TEMP[0]
50: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = add i32 %5, %7
%57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = extractelement <4 x float> %57, i32 3
%62 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0
%64 = add i32 %5, %7
%65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64)
%66 = extractelement <4 x float> %65, i32 0
%67 = extractelement <4 x float> %65, i32 1
%68 = extractelement <4 x float> %65, i32 2
%69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0
%71 = add i32 %5, %7
%72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71)
%73 = extractelement <4 x float> %72, i32 0
%74 = extractelement <4 x float> %72, i32 1
%75 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%76 = load <16 x i8>, <16 x i8> addrspace(2)* %75, align 16, !tbaa !0
%77 = add i32 %5, %7
%78 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %76, i32 0, i32 %77)
%79 = extractelement <4 x float> %78, i32 0
%80 = extractelement <4 x float> %78, i32 1
%81 = extractelement <4 x float> %78, i32 2
%82 = extractelement <4 x float> %78, i32 3
%83 = fmul float %13, %58
%84 = fmul float %14, %58
%85 = fmul float %15, %58
%86 = fmul float %16, %59
%87 = fadd float %86, %83
%88 = fmul float %17, %59
%89 = fadd float %88, %84
%90 = fmul float %18, %59
%91 = fadd float %90, %85
%92 = fmul float %19, %60
%93 = fadd float %92, %87
%94 = fmul float %20, %60
%95 = fadd float %94, %89
%96 = fmul float %21, %60
%97 = fadd float %96, %91
%98 = fmul float %22, %61
%99 = fadd float %98, %93
%100 = fmul float %23, %61
%101 = fadd float %100, %95
%102 = fmul float %24, %61
%103 = fadd float %102, %97
%104 = fmul float %25, %66
%105 = fmul float %28, %66
%106 = fmul float %31, %66
%107 = fmul float %26, %67
%108 = fadd float %107, %104
%109 = fmul float %29, %67
%110 = fadd float %109, %105
%111 = fmul float %32, %67
%112 = fadd float %111, %106
%113 = fmul float %27, %68
%114 = fadd float %113, %108
%115 = fmul float %30, %68
%116 = fadd float %115, %110
%117 = fmul float %33, %68
%118 = fadd float %117, %112
%119 = fmul float %114, %114
%120 = fmul float %116, %116
%121 = fadd float %120, %119
%122 = fmul float %118, %118
%123 = fadd float %121, %122
%124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123)
%125 = fmul float %114, %124
%126 = fmul float %116, %124
%127 = fmul float %118, %124
%128 = fmul float %13, %79
%129 = fmul float %14, %79
%130 = fmul float %15, %79
%131 = fmul float %16, %80
%132 = fadd float %131, %128
%133 = fmul float %17, %80
%134 = fadd float %133, %129
%135 = fmul float %18, %80
%136 = fadd float %135, %130
%137 = fmul float %19, %81
%138 = fadd float %137, %132
%139 = fmul float %20, %81
%140 = fadd float %139, %134
%141 = fmul float %21, %81
%142 = fadd float %141, %136
%143 = fmul float %138, %138
%144 = fmul float %140, %140
%145 = fadd float %144, %143
%146 = fmul float %142, %142
%147 = fadd float %145, %146
%148 = call float @llvm.AMDGPU.rsq.clamped.f32(float %147)
%149 = fmul float %138, %148
%150 = fmul float %140, %148
%151 = fmul float %142, %148
%152 = fmul float %127, %150
%153 = fmul float %125, %151
%154 = fmul float %126, %149
%155 = fmul float %126, %151
%156 = fsub float %155, %152
%157 = fmul float %127, %149
%158 = fsub float %157, %153
%159 = fmul float %125, %150
%160 = fsub float %159, %154
%161 = fmul float %156, %82
%162 = fmul float %158, %82
%163 = fmul float %160, %82
%164 = fmul float %38, %58
%165 = fmul float %39, %58
%166 = fmul float %40, %58
%167 = fmul float %41, %58
%168 = fmul float %42, %59
%169 = fadd float %168, %164
%170 = fmul float %43, %59
%171 = fadd float %170, %165
%172 = fmul float %44, %59
%173 = fadd float %172, %166
%174 = fmul float %45, %59
%175 = fadd float %174, %167
%176 = fmul float %46, %60
%177 = fadd float %176, %169
%178 = fmul float %47, %60
%179 = fadd float %178, %171
%180 = fmul float %48, %60
%181 = fadd float %180, %173
%182 = fmul float %49, %60
%183 = fadd float %182, %175
%184 = fmul float %50, %61
%185 = fadd float %184, %177
%186 = fmul float %51, %61
%187 = fadd float %186, %179
%188 = fmul float %52, %61
%189 = fadd float %188, %181
%190 = fmul float %53, %61
%191 = fadd float %190, %183
%192 = fmul float %73, %34
%193 = fadd float %192, %36
%194 = fmul float %74, %35
%195 = fadd float %194, %37
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %149, float %161, float %125, float %99)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %150, float %162, float %126, float %101)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %151, float %163, float %127, float %103)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %193, float %195, float %127, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %185, float %187, float %189, float %191)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s20, s[0:3], 0x0 ; C20A0100
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[12:15], 0 idxen ; E00C2000 80030600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101
s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0xd ; C204010D
s_buffer_load_dword s9, s[0:3], 0xe ; C204810E
s_buffer_load_dword s10, s[0:3], 0x10 ; C2050110
s_buffer_load_dword s11, s[0:3], 0x11 ; C2058111
s_buffer_load_dword s12, s[0:3], 0x12 ; C2060112
s_buffer_load_dword s13, s[0:3], 0x14 ; C2068114
s_buffer_load_dword s14, s[0:3], 0x15 ; C2070115
s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116
s_buffer_load_dword s16, s[0:3], 0x18 ; C2080118
s_buffer_load_dword s17, s[0:3], 0x19 ; C2088119
s_buffer_load_dword s18, s[0:3], 0x6 ; C2090106
s_buffer_load_dword s19, s[0:3], 0x8 ; C2098108
s_buffer_load_dword s21, s[0:3], 0x9 ; C20A8109
s_buffer_load_dword s22, s[0:3], 0xa ; C20B010A
s_buffer_load_dword s23, s[0:3], 0xc ; C20B810C
s_buffer_load_dword s24, s[0:3], 0x1a ; C20C011A
s_buffer_load_dword s25, s[0:3], 0x20 ; C20C8120
s_buffer_load_dword s26, s[0:3], 0x21 ; C20D0121
s_buffer_load_dword s27, s[0:3], 0x22 ; C20D8122
s_buffer_load_dword s28, s[0:3], 0x23 ; C20E0123
s_buffer_load_dword s29, s[0:3], 0x24 ; C20E8124
s_buffer_load_dword s30, s[0:3], 0x25 ; C20F0125
s_buffer_load_dword s31, s[0:3], 0x26 ; C20F8126
s_buffer_load_dword s32, s[0:3], 0x27 ; C2100127
s_buffer_load_dword s33, s[0:3], 0x28 ; C2108128
s_buffer_load_dword s34, s[0:3], 0x29 ; C2110129
s_buffer_load_dword s35, s[0:3], 0x2a ; C211812A
s_buffer_load_dword s36, s[0:3], 0x2b ; C212012B
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s27 ; 7E00021B
s_buffer_load_dword s27, s[0:3], 0x2c ; C20D812C
v_mov_b32_e32 v15, s28 ; 7E1E021C
s_buffer_load_dword s28, s[0:3], 0x2d ; C20E012D
s_buffer_load_dword s37, s[0:3], 0x2e ; C212812E
s_buffer_load_dword s38, s[0:3], 0x2f ; C213012F
s_buffer_load_dword s39, s[0:3], 0x30 ; C2138130
s_buffer_load_dword s40, s[0:3], 0x31 ; C2140131
s_buffer_load_dword s41, s[0:3], 0x32 ; C2148132
s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133
v_mul_f32_e32 v16, s29, v2 ; 1020041D
v_mac_f32_e32 v16, s33, v3 ; 3E200621
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v16, s27, v4 ; 3E20081B
v_mac_f32_e32 v16, s39, v5 ; 3E200A27
v_mac_f32_e32 v0, s25, v9 ; 3E001219
v_mac_f32_e32 v15, s26, v10 ; 3E1E141A
v_mul_f32_e32 v9, s10, v6 ; 10120C0A
v_mac_f32_e32 v9, s11, v7 ; 3E120E0B
v_mac_f32_e32 v9, s12, v8 ; 3E12100C
v_mul_f32_e32 v10, s13, v6 ; 10140C0D
v_mac_f32_e32 v10, s14, v7 ; 3E140E0E
v_mac_f32_e32 v10, s15, v8 ; 3E14100F
v_mul_f32_e32 v6, s16, v6 ; 100C0C10
v_mac_f32_e32 v6, s17, v7 ; 3E0C0E11
v_mac_f32_e32 v6, s24, v8 ; 3E0C1018
v_mul_f32_e32 v7, s30, v2 ; 100E041E
v_mac_f32_e32 v7, s34, v3 ; 3E0E0622
v_mac_f32_e32 v7, s28, v4 ; 3E0E081C
v_mac_f32_e32 v7, s40, v5 ; 3E0E0A28
v_mul_f32_e32 v8, s31, v2 ; 1010041F
v_mac_f32_e32 v8, s35, v3 ; 3E100623
v_mac_f32_e32 v8, s37, v4 ; 3E100825
v_mac_f32_e32 v8, s41, v5 ; 3E100A29
v_mul_f32_e32 v17, s32, v2 ; 10220420
v_mac_f32_e32 v17, s36, v3 ; 3E220624
v_mac_f32_e32 v17, s38, v4 ; 3E220826
v_mac_f32_e32 v17, s0, v5 ; 3E220A00
v_mul_f32_e32 v18, s20, v2 ; 10240414
v_mac_f32_e32 v18, s6, v3 ; 3E240606
v_mac_f32_e32 v18, s19, v4 ; 3E240813
v_mac_f32_e32 v18, s23, v5 ; 3E240A17
v_mul_f32_e32 v19, s4, v2 ; 10260404
v_mac_f32_e32 v19, s7, v3 ; 3E260607
v_mac_f32_e32 v19, s21, v4 ; 3E260815
v_mac_f32_e32 v19, s8, v5 ; 3E260A08
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s18, v3 ; 3E040612
v_mac_f32_e32 v2, s22, v4 ; 3E040816
v_mac_f32_e32 v2, s9, v5 ; 3E040A09
v_mul_f32_e32 v3, s20, v11 ; 10061614
v_mac_f32_e32 v3, s6, v12 ; 3E061806
v_mul_f32_e32 v4, s4, v11 ; 10081604
v_mac_f32_e32 v4, s7, v12 ; 3E081807
v_mul_f32_e32 v5, s5, v11 ; 100A1605
v_mac_f32_e32 v5, s18, v12 ; 3E0A1812
v_mac_f32_e32 v3, s19, v13 ; 3E061A13
v_mac_f32_e32 v4, s21, v13 ; 3E081A15
v_mac_f32_e32 v5, s22, v13 ; 3E0A1A16
v_mul_f32_e32 v11, v9, v9 ; 10161309
v_mac_f32_e32 v11, v10, v10 ; 3E16150A
v_mac_f32_e32 v11, v6, v6 ; 3E160D06
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_mul_f32_e32 v12, v3, v3 ; 10180703
v_mac_f32_e32 v12, v4, v4 ; 3E180904
v_mac_f32_e32 v12, v5, v5 ; 3E180B05
v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C
v_mul_f32_e32 v9, v11, v9 ; 1012130B
v_mul_f32_e32 v10, v11, v10 ; 1014150B
v_mul_f32_e32 v6, v11, v6 ; 100C0D0B
v_mul_f32_e32 v3, v12, v3 ; 1006070C
v_mul_f32_e32 v4, v12, v4 ; 1008090C
v_mul_f32_e32 v5, v12, v5 ; 100A0B0C
v_mul_f32_e32 v11, v4, v6 ; 10160D04
v_mad_f32 v11, v10, v5, -v11 ; D282000B 842E0B0A
v_mul_f32_e32 v12, v5, v9 ; 10181305
v_mad_f32 v12, v6, v3, -v12 ; D282000C 84320706
v_mul_f32_e32 v13, v3, v10 ; 101A1503
v_mad_f32 v13, v9, v4, -v13 ; D282000D 84360909
v_mul_f32_e32 v11, v14, v11 ; 1016170E
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mul_f32_e32 v13, v14, v13 ; 101A1B0E
exp 15, 32, 0, 0, 0, v3, v11, v9, v18 ; F800020F 12090B03
exp 15, 33, 0, 0, 0, v4, v12, v10, v19 ; F800021F 130A0C04
exp 15, 34, 0, 0, 0, v5, v13, v6, v2 ; F800022F 02060D05
exp 15, 35, 0, 0, 0, v0, v15, v6, v1 ; F800023F 01060F00
exp 15, 12, 0, 1, 0, v16, v7, v8, v17 ; F80008CF 11080710
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 20
Code Size: 592 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000}
IMM[1] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[3].xyyy
1: TEX TEMP[0].yw, TEMP[0], SAMP[0], 2D
2: MAD TEMP[0].xy, TEMP[0].wyyy, IMM[0].xxxx, IMM[0].yyyy
3: DP2 TEMP[1].x, TEMP[0].xyyy, TEMP[0].xyyy
4: MOV_SAT TEMP[1].x, TEMP[1].xxxx
5: ADD TEMP[1].x, IMM[0].zzzz, -TEMP[1].xxxx
6: SQRT TEMP[1].x, TEMP[1].xxxx
7: MOV TEMP[0].z, TEMP[1].xxxx
8: DP3 TEMP[1].x, IN[0].xyzz, TEMP[0].xyzz
9: DP3 TEMP[2].x, IN[1].xyzz, TEMP[0].xyzz
10: MOV TEMP[1].y, TEMP[2].xxxx
11: DP3 TEMP[0].x, IN[2].xyzz, TEMP[0].xyzz
12: MOV TEMP[1].z, TEMP[0].xxxx
13: MAD TEMP[0].xyz, TEMP[1].xyzz, IMM[0].wwww, IMM[0].wwww
14: MOV TEMP[0].w, IMM[1].xxxx
15: MOV OUT[0], TEMP[0]
16: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%34 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%37 = bitcast float %35 to i32
%38 = bitcast float %36 to i32
%39 = insertelement <2 x i32> undef, i32 %37, i32 0
%40 = insertelement <2 x i32> %39, i32 %38, i32 1
%41 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %23, <16 x i8> %25, i32 2)
%42 = extractelement <4 x float> %41, i32 1
%43 = extractelement <4 x float> %41, i32 3
%44 = fmul float %43, 2.000000e+00
%45 = fadd float %44, -1.000000e+00
%46 = fmul float %42, 2.000000e+00
%47 = fadd float %46, -1.000000e+00
%48 = fmul float %45, %45
%49 = fmul float %47, %47
%50 = fadd float %48, %49
%51 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00)
%52 = fsub float 1.000000e+00, %51
%53 = call float @llvm.sqrt.f32(float %52)
%54 = fmul float %26, %45
%55 = fmul float %27, %47
%56 = fadd float %55, %54
%57 = fmul float %28, %53
%58 = fadd float %56, %57
%59 = fmul float %29, %45
%60 = fmul float %30, %47
%61 = fadd float %60, %59
%62 = fmul float %31, %53
%63 = fadd float %61, %62
%64 = fmul float %32, %45
%65 = fmul float %33, %47
%66 = fadd float %65, %64
%67 = fmul float %34, %53
%68 = fadd float %66, %67
%69 = fmul float %58, 5.000000e-01
%70 = fadd float %69, 5.000000e-01
%71 = fmul float %63, 5.000000e-01
%72 = fadd float %71, 5.000000e-01
%73 = fmul float %68, 5.000000e-01
%74 = fadd float %73, 5.000000e-01
%75 = call i32 @llvm.SI.packf16(float %70, float %72)
%76 = bitcast i32 %75 to float
%77 = call i32 @llvm.SI.packf16(float %74, float 0.000000e+00)
%78 = bitcast i32 %77 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %76, float %78, float %76, float %78)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600
v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601
v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801
v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900
v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901
v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00
v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01
v_interp_p1_f32 v11, v0, 0, 3, [m0] ; C82C0C00
v_interp_p2_f32 v11, [v11], v1, 0, 3, [m0] ; C82D0C01
v_interp_p1_f32 v12, v0, 1, 3, [m0] ; C8300D00
v_interp_p2_f32 v12, [v12], v1, 1, 3, [m0] ; C8310D01
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[4:11], s[0:3] ; F0800A00 0001000B
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
v_mul_f32_e32 v2, v1, v2 ; 10040501
v_mac_f32_e32 v2, v0, v3 ; 3E040700
v_mul_f32_e32 v3, v1, v5 ; 10060B01
v_mac_f32_e32 v3, v0, v6 ; 3E060D00
v_mul_f32_e32 v5, v1, v8 ; 100A1101
v_mac_f32_e32 v5, v0, v9 ; 3E0A1300
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v2, v0, v4 ; 3E040900
v_mac_f32_e32 v3, v0, v7 ; 3E060F00
v_mac_f32_e32 v5, v0, v10 ; 3E0A1500
v_mad_f32 v0, 0.5, v2, 0.5 ; D2820000 03C204F0
v_mad_f32 v1, 0.5, v3, 0.5 ; D2820001 03C206F0
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_mad_f32 v1, 0.5, v5, 0.5 ; D2820001 03C20AF0
v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Code Size: 244 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..7]
DCL CONST[9..12]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999}
IMM[1] INT32 {256, 0, 1, 2}
IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039}
IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000}
IMM[4] INT32 {4, 0, 0, 0}
0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx
1: F2I TEMP[0].x, TEMP[0].xxxx
2: F2I TEMP[1].x, IN[2].yyyy
3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx
4: I2F TEMP[3].x, TEMP[0].xxxx
5: I2F TEMP[4].x, TEMP[2].xxxx
6: MOV TEMP[3].y, TEMP[4].xxxx
7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
8: INEG TEMP[2].x, TEMP[2].xxxx
9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx
10: I2F TEMP[2].x, TEMP[2].xxxx
11: MOV TEMP[3].z, TEMP[2].xxxx
12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy
13: I2F TEMP[1].x, TEMP[1].xxxx
14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx
15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww
16: F2I TEMP[1].x, TEMP[1].xxxx
17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy
18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx
19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz
20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx
21: MOV TEMP[4].y, TEMP[5].xxxx
22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww
23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx
24: MOV TEMP[4].z, TEMP[1].xxxx
25: MOV TEMP[1].xyz, TEMP[4].xyzx
26: MOV TEMP[4].w, IMM[2].yyyy
27: MOV TEMP[4].xyz, TEMP[3].xyzx
28: MOV TEMP[3].y, IMM[2].yzyy
29: DP4 TEMP[4].x, TEMP[1], TEMP[4]
30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww
31: MOV TEMP[3].xy, TEMP[3].xyyy
32: MOV TEMP[3].w, IMM[2].yyyy
33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D
34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy
35: MOV TEMP[2].w, TEMP[4].xxxx
36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz
37: MOV TEMP[1].w, TEMP[3].xxxx
38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx
39: I2F TEMP[0].x, TEMP[0].xxxx
40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx
41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww
42: MOV TEMP[3].x, CONST[4].xxxx
43: MOV TEMP[3].y, CONST[5].xxxx
44: MOV TEMP[3].z, CONST[6].xxxx
45: MOV TEMP[4].x, CONST[4].yyyy
46: MOV TEMP[4].y, CONST[5].yyyy
47: MOV TEMP[4].z, CONST[6].yyyy
48: MOV TEMP[5].x, CONST[4].zzzz
49: MOV TEMP[5].y, CONST[5].zzzz
50: MOV TEMP[5].z, CONST[6].zzzz
51: MUL TEMP[6], CONST[9], IN[0].xxxx
52: MAD TEMP[6], CONST[10], IN[0].yyyy, TEMP[6]
53: MAD TEMP[6], CONST[11], IN[0].zzzz, TEMP[6]
54: MAD TEMP[6], CONST[12], IN[0].wwww, TEMP[6]
55: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
56: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
57: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].xxxx, TEMP[3].xyzz
58: MUL TEMP[3], CONST[0], IN[0].xxxx
59: MAD TEMP[3], CONST[1], IN[0].yyyy, TEMP[3]
60: MAD TEMP[3], CONST[2], IN[0].zzzz, TEMP[3]
61: MAD TEMP[3].xyz, CONST[3], IN[0].wwww, TEMP[3]
62: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
63: RSQ TEMP[4].x, TEMP[4].xxxx
64: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
65: MOV TEMP[0].w, TEMP[3].xxxx
66: MOV TEMP[3].xy, TEMP[3].yzyy
67: MOV OUT[1], TEMP[2]
68: MOV OUT[2], TEMP[1]
69: MOV OUT[4], TEMP[3]
70: MOV OUT[3], TEMP[0]
71: MOV OUT[0], TEMP[6]
72: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0
%55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0
%57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0
%59 = add i32 %5, %7
%60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59)
%61 = extractelement <4 x float> %60, i32 0
%62 = extractelement <4 x float> %60, i32 1
%63 = extractelement <4 x float> %60, i32 2
%64 = extractelement <4 x float> %60, i32 3
%65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0
%67 = add i32 %5, %7
%68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67)
%69 = extractelement <4 x float> %68, i32 0
%70 = extractelement <4 x float> %68, i32 1
%71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0
%73 = add i32 %5, %7
%74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73)
%75 = extractelement <4 x float> %74, i32 0
%76 = extractelement <4 x float> %74, i32 1
%77 = fmul float %75, 2.500000e-01
%78 = fptosi float %77 to i32
%79 = fptosi float %76 to i32
%80 = sdiv i32 %79, 256
%81 = sitofp i32 %78 to float
%82 = sitofp i32 %80 to float
%83 = shl nsw i32 %80, 8
%84 = sub i32 %79, %83
%85 = sitofp i32 %84 to float
%86 = fadd float %81, -1.000000e+00
%87 = fadd float %82, -1.000000e+00
%88 = fadd float %85, -1.000000e+00
%89 = sitofp i32 %79 to float
%90 = fsub float %76, %89
%91 = fmul float %90, 1.000000e+01
%92 = fadd float %91, 0x3FDFFE5CA0000000
%93 = fptosi float %92 to i32
%94 = icmp eq i32 %93, 0
%95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
%96 = icmp eq i32 %93, 1
%97 = select i1 %96, float 1.000000e+00, float 0.000000e+00
%98 = icmp eq i32 %93, 2
%99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
%100 = fmul float %95, %81
%101 = fmul float %97, %82
%102 = fadd float %100, %101
%103 = fmul float %99, %85
%104 = fadd float %102, %103
%105 = fadd float %104, 0.000000e+00
%106 = fmul float %105, 0x3F70101020000000
%107 = bitcast float %106 to i32
%108 = insertelement <4 x i32> undef, i32 %107, i32 0
%109 = insertelement <4 x i32> %108, i32 1036831949, i32 1
%110 = insertelement <4 x i32> %109, i32 0, i32 2
%111 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %110, <32 x i8> %54, <16 x i8> %56, i32 2)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = fmul float %112, 1.600000e+01
%115 = fadd float %114, -8.000000e+00
%116 = fmul float %113, 4.000000e+00
%117 = shl i32 %78, 2
%118 = sitofp i32 %117 to float
%119 = fsub float %75, %118
%120 = fadd float %119, -2.000000e+00
%121 = fmul float %37, %61
%122 = fmul float %38, %61
%123 = fmul float %39, %61
%124 = fmul float %40, %61
%125 = fmul float %41, %62
%126 = fadd float %125, %121
%127 = fmul float %42, %62
%128 = fadd float %127, %122
%129 = fmul float %43, %62
%130 = fadd float %129, %123
%131 = fmul float %44, %62
%132 = fadd float %131, %124
%133 = fmul float %45, %63
%134 = fadd float %133, %126
%135 = fmul float %46, %63
%136 = fadd float %135, %128
%137 = fmul float %47, %63
%138 = fadd float %137, %130
%139 = fmul float %48, %63
%140 = fadd float %139, %132
%141 = fmul float %49, %64
%142 = fadd float %141, %134
%143 = fmul float %50, %64
%144 = fadd float %143, %136
%145 = fmul float %51, %64
%146 = fadd float %145, %138
%147 = fmul float %52, %64
%148 = fadd float %147, %140
%149 = fmul float %28, %69
%150 = fmul float %31, %69
%151 = fmul float %34, %69
%152 = fmul float %29, %70
%153 = fadd float %152, %149
%154 = fmul float %32, %70
%155 = fadd float %154, %150
%156 = fmul float %35, %70
%157 = fadd float %156, %151
%158 = fmul float %30, %120
%159 = fadd float %158, %153
%160 = fmul float %33, %120
%161 = fadd float %160, %155
%162 = fmul float %36, %120
%163 = fadd float %162, %157
%164 = fmul float %13, %61
%165 = fmul float %14, %61
%166 = fmul float %15, %61
%167 = fmul float %16, %61
%168 = fmul float %17, %62
%169 = fadd float %168, %164
%170 = fmul float %18, %62
%171 = fadd float %170, %165
%172 = fmul float %19, %62
%173 = fadd float %172, %166
%174 = fmul float %20, %62
%175 = fadd float %174, %167
%176 = fmul float %21, %63
%177 = fadd float %176, %169
%178 = fmul float %22, %63
%179 = fadd float %178, %171
%180 = fmul float %23, %63
%181 = fadd float %180, %173
%182 = fmul float %24, %63
%183 = fadd float %182, %175
%184 = fmul float %25, %64
%185 = fadd float %184, %177
%186 = fmul float %26, %64
%187 = fadd float %186, %179
%188 = fmul float %27, %64
%189 = fadd float %188, %181
%190 = fmul float %159, %159
%191 = fmul float %161, %161
%192 = fadd float %191, %190
%193 = fmul float %163, %163
%194 = fadd float %192, %193
%195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194)
%196 = fmul float %159, %195
%197 = fmul float %161, %195
%198 = fmul float %163, %195
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float %88, float %115)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %97, float %99, float %116)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %196, float %197, float %198, float %185)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %187, float %189, float %189, float %183)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %142, float %144, float %146, float %148)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5
v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000
v_mov_b32_e32 v5, 0 ; 7E0A0280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[16:19], s[8:9], 0x0 ; C0880900
s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904
s_load_dwordx4 s[24:27], s[8:9], 0x8 ; C08C0908
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s28, s[0:3], 0x10 ; C20E0110
s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111
buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[12:15], v0, s[24:27], 0 idxen ; E00C2000 80060C00
s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112
s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114
s_buffer_load_dword s18, s[0:3], 0x15 ; C2090115
s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116
s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118
s_buffer_load_dword s21, s[0:3], 0x19 ; C20A8119
s_buffer_load_dword s22, s[0:3], 0x1a ; C20B011A
s_buffer_load_dword s23, s[0:3], 0x24 ; C20B8124
s_buffer_load_dword s24, s[0:3], 0x0 ; C20C0100
s_buffer_load_dword s25, s[0:3], 0x1 ; C20C8101
s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102
s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103
s_buffer_load_dword s30, s[0:3], 0x4 ; C20F0104
s_buffer_load_dword s31, s[0:3], 0x5 ; C20F8105
s_buffer_load_dword s32, s[0:3], 0x6 ; C2100106
s_buffer_load_dword s33, s[0:3], 0x7 ; C2108107
s_buffer_load_dword s34, s[0:3], 0x8 ; C2110108
s_buffer_load_dword s35, s[0:3], 0x9 ; C2118109
s_buffer_load_dword s36, s[0:3], 0x25 ; C2120125
s_buffer_load_dword s37, s[0:3], 0x26 ; C2128126
s_buffer_load_dword s38, s[0:3], 0x27 ; C2130127
s_buffer_load_dword s39, s[0:3], 0x28 ; C2138128
s_buffer_load_dword s40, s[0:3], 0x29 ; C2140129
s_buffer_load_dword s41, s[0:3], 0xa ; C214810A
s_buffer_load_dword s42, s[0:3], 0xb ; C215010B
s_buffer_load_dword s43, s[0:3], 0xc ; C215810C
s_buffer_load_dword s44, s[0:3], 0xd ; C216010D
s_buffer_load_dword s45, s[0:3], 0xe ; C216810E
s_buffer_load_dword s46, s[0:3], 0x2a ; C217012A
s_buffer_load_dword s47, s[0:3], 0x2b ; C217812B
s_buffer_load_dword s48, s[0:3], 0x2c ; C218012C
s_buffer_load_dword s49, s[0:3], 0x2d ; C218812D
s_buffer_load_dword s50, s[0:3], 0x2e ; C219012E
s_buffer_load_dword s51, s[0:3], 0x2f ; C219812F
s_buffer_load_dword s52, s[0:3], 0x30 ; C21A0130
s_buffer_load_dword s53, s[0:3], 0x31 ; C21A8131
s_buffer_load_dword s54, s[0:3], 0x32 ; C21B0132
s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s23, v6 ; 10000C17
v_mul_f32_e32 v14, s28, v10 ; 101C141C
v_mul_f32_e32 v15, s17, v10 ; 101E1411
v_mul_f32_e32 v10, s20, v10 ; 10141414
v_mac_f32_e32 v0, s39, v7 ; 3E000E27
v_mac_f32_e32 v14, s29, v11 ; 3E1C161D
v_mac_f32_e32 v15, s18, v11 ; 3E1E1612
v_mac_f32_e32 v10, s21, v11 ; 3E141615
v_mul_f32_e32 v11, s36, v6 ; 10160C24
v_mac_f32_e32 v11, s40, v7 ; 3E160E28
v_mul_f32_e32 v16, s37, v6 ; 10200C25
v_mac_f32_e32 v16, s46, v7 ; 3E200E2E
v_mul_f32_e32 v17, s38, v6 ; 10220C26
v_mac_f32_e32 v17, s47, v7 ; 3E220E2F
v_mul_f32_e32 v18, s24, v6 ; 10240C18
v_mac_f32_e32 v18, s30, v7 ; 3E240E1E
v_mul_f32_e32 v19, s25, v6 ; 10260C19
v_mac_f32_e32 v19, s31, v7 ; 3E260E1F
v_mul_f32_e32 v20, s26, v6 ; 10280C1A
v_mac_f32_e32 v20, s32, v7 ; 3E280E20
v_mul_f32_e32 v6, s27, v6 ; 100C0C1B
v_mac_f32_e32 v6, s33, v7 ; 3E0C0E21
v_mac_f32_e32 v0, s48, v8 ; 3E001030
v_mac_f32_e32 v11, s49, v8 ; 3E161031
v_mac_f32_e32 v16, s50, v8 ; 3E201032
v_mac_f32_e32 v17, s51, v8 ; 3E221033
v_mac_f32_e32 v18, s34, v8 ; 3E241022
v_mac_f32_e32 v19, s35, v8 ; 3E261023
v_mac_f32_e32 v20, s41, v8 ; 3E281029
v_mac_f32_e32 v6, s42, v8 ; 3E0C102A
v_mac_f32_e32 v0, s52, v9 ; 3E001234
v_mac_f32_e32 v11, s53, v9 ; 3E161235
v_mac_f32_e32 v16, s54, v9 ; 3E201236
v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000
v_cvt_i32_f32_e32 v4, v13 ; 7E08110D
v_cvt_i32_f32_e32 v3, v3 ; 7E061103
v_mac_f32_e32 v17, s0, v9 ; 3E221200
v_mac_f32_e32 v18, s43, v9 ; 3E24122B
v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04
v_lshlrev_b32_e32 v8, 2, v3 ; 34100682
v_cvt_f32_i32_e32 v8, v8 ; 7E100B08
v_mac_f32_e32 v19, s44, v9 ; 3E26122C
v_mac_f32_e32 v20, s45, v9 ; 3E28122D
v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07
v_subrev_f32_e32 v8, v8, v12 ; 0A101908
v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000
v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F
v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98
v_cvt_i32_f32_e32 v1, v1 ; 7E021101
v_add_i32_e32 v7, v4, v7 ; 4A0E0F04
v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00
v_sub_i32_e32 v4, v4, v9 ; 4C081304
v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280
v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480
v_cvt_f32_i32_e32 v12, v3 ; 7E180B03
v_ashrrev_i32_e32 v3, 8, v7 ; 30060E88
v_cvt_f32_i32_e32 v7, v3 ; 7E0E0B03
v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281
v_cmp_eq_i32_e64 s[0:1], 2, v1 ; D1040000 00020282
v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480
v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04
v_mul_f32_e32 v3, v7, v1 ; 10060307
v_mac_f32_e32 v3, v12, v9 ; 3E06130C
v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480
v_mac_f32_e32 v3, v13, v21 ; 3E062B0D
v_add_f32_e32 v3, 0, v3 ; 06060680
v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081
v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD
image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[4:11], s[12:15] ; F0900300 00610303
v_add_f32_e32 v5, -2.0, v8 ; 060A10F5
v_mac_f32_e32 v14, s16, v5 ; 3E1C0A10
v_mac_f32_e32 v15, s19, v5 ; 3E1E0A13
v_mac_f32_e32 v10, s22, v5 ; 3E140A16
v_add_f32_e32 v5, -1.0, v12 ; 060A18F3
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_mul_f32_e32 v8, v14, v14 ; 10101D0E
v_mac_f32_e32 v8, v15, v15 ; 3E101F0F
v_mac_f32_e32 v8, v10, v10 ; 3E10150A
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_add_f32_e32 v12, -1.0, v13 ; 06181AF3
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000
exp 15, 32, 0, 0, 0, v5, v7, v12, v2 ; F800020F 020C0705
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v2, v8, v14 ; 10041D08
v_mul_f32_e32 v3, v8, v15 ; 10061F08
v_mul_f32_e32 v4, 4.0, v4 ; 100808F6
v_mul_f32_e32 v5, v8, v10 ; 100A1508
exp 15, 33, 0, 0, 0, v9, v1, v21, v4 ; F800021F 04150109
exp 15, 34, 0, 0, 0, v2, v3, v5, v18 ; F800022F 12050302
exp 15, 35, 0, 0, 0, v19, v20, v20, v6 ; F800023F 06141413
exp 15, 12, 0, 1, 0, v0, v11, v16, v17 ; F80008CF 11100B00
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 24
Code Size: 680 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0]
DCL CONST[6..12]
DCL TEMP[0..28], LOCAL
IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000}
IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931}
IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000}
IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0000}
0: MOV TEMP[0].x, IN[2].wwww
1: MOV TEMP[0].yz, IN[3].yxyy
2: ABS TEMP[1].xyz, IN[2].xyzz
3: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
4: RSQ TEMP[2].x, TEMP[2].xxxx
5: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, IMM[0].xxxx
6: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].yyyy
7: MAX TEMP[1].xyz, TEMP[1].xyzz, IMM[0].zzzz
8: ADD TEMP[2].x, TEMP[1].xxxx, TEMP[1].yyyy
9: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[1].zzzz
10: RCP TEMP[2].xyz, TEMP[2].xxxx
11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz
12: ADD TEMP[2], IN[0], IMM[0].wwww
13: FLR TEMP[2].xyz, TEMP[2]
14: MOV TEMP[3].x, CONST[6].xxxx
15: MUL TEMP[4].x, TEMP[2].xxxx, CONST[6].xxxx
16: MOV TEMP[5].x, TEMP[4].xxxx
17: FLR TEMP[4].x, TEMP[4].xxxx
18: MUL TEMP[4].x, TEMP[4].xxxx, CONST[6].xxxx
19: FSGE TEMP[6].x, TEMP[2].xxxx, IMM[1].xxxx
20: UIF TEMP[6].xxxx :0
21: MOV TEMP[3].x, CONST[7].xxxx
22: ADD TEMP[6].x, TEMP[2].xxxx, IMM[1].yyyy
23: MUL TEMP[6].x, TEMP[6].xxxx, CONST[7].xxxx
24: MOV TEMP[5].x, TEMP[6].xxxx
25: FLR TEMP[7].x, TEMP[6].xxxx
26: MUL TEMP[7].x, TEMP[7].xxxx, CONST[7].xxxx
27: MOV TEMP[4].x, TEMP[7].xxxx
28: FRC TEMP[6].x, TEMP[6].xxxx
29: FRC TEMP[8].x, TEMP[7].xxxx
30: MOV TEMP[6].y, TEMP[8].xxxx
31: FLR TEMP[7].x, TEMP[7].xxxx
32: ADD TEMP[7].x, TEMP[7].xxxx, IMM[1].zzzz
33: MOV TEMP[6].z, TEMP[7].xxxx
34: MOV TEMP[6].xyz, TEMP[6].xyzx
35: ELSE :0
36: FRC TEMP[5].x, TEMP[5].xxxx
37: FRC TEMP[7].x, TEMP[4].xxxx
38: MOV TEMP[5].y, TEMP[7].xxxx
39: FLR TEMP[4].x, TEMP[4].xxxx
40: MOV TEMP[5].z, TEMP[4].xxxx
41: MOV TEMP[6].xyz, TEMP[5].xyzx
42: ENDIF
43: MOV TEMP[4].x, CONST[6].xxxx
44: MUL TEMP[5].x, TEMP[2].yyyy, CONST[6].xxxx
45: MOV TEMP[7].x, TEMP[5].xxxx
46: FLR TEMP[5].x, TEMP[5].xxxx
47: MUL TEMP[5].x, TEMP[5].xxxx, CONST[6].xxxx
48: FSGE TEMP[8].x, TEMP[2].yyyy, IMM[1].xxxx
49: UIF TEMP[8].xxxx :0
50: MOV TEMP[4].x, CONST[7].xxxx
51: ADD TEMP[8].x, TEMP[2].yyyy, IMM[1].yyyy
52: MUL TEMP[8].x, TEMP[8].xxxx, CONST[7].xxxx
53: MOV TEMP[7].x, TEMP[8].xxxx
54: FLR TEMP[9].x, TEMP[8].xxxx
55: MUL TEMP[9].x, TEMP[9].xxxx, CONST[7].xxxx
56: MOV TEMP[5].x, TEMP[9].xxxx
57: FRC TEMP[8].x, TEMP[8].xxxx
58: FRC TEMP[10].x, TEMP[9].xxxx
59: MOV TEMP[8].y, TEMP[10].xxxx
60: FLR TEMP[9].x, TEMP[9].xxxx
61: ADD TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz
62: MOV TEMP[8].z, TEMP[9].xxxx
63: MOV TEMP[8].xyz, TEMP[8].xyzx
64: ELSE :0
65: FRC TEMP[7].x, TEMP[7].xxxx
66: FRC TEMP[9].x, TEMP[5].xxxx
67: MOV TEMP[7].y, TEMP[9].xxxx
68: FLR TEMP[5].x, TEMP[5].xxxx
69: MOV TEMP[7].z, TEMP[5].xxxx
70: MOV TEMP[8].xyz, TEMP[7].xyzx
71: ENDIF
72: MOV TEMP[5].x, CONST[6].xxxx
73: MUL TEMP[7].x, TEMP[2].zzzz, CONST[6].xxxx
74: MOV TEMP[9].x, TEMP[7].xxxx
75: FLR TEMP[7].x, TEMP[7].xxxx
76: MUL TEMP[7].x, TEMP[7].xxxx, CONST[6].xxxx
77: FSGE TEMP[10].x, TEMP[2].zzzz, IMM[1].xxxx
78: UIF TEMP[10].xxxx :0
79: MOV TEMP[5].x, CONST[7].xxxx
80: ADD TEMP[2].x, TEMP[2].zzzz, IMM[1].yyyy
81: MUL TEMP[2].x, TEMP[2].xxxx, CONST[7].xxxx
82: MOV TEMP[9].x, TEMP[2].xxxx
83: FLR TEMP[10].x, TEMP[2].xxxx
84: MUL TEMP[10].x, TEMP[10].xxxx, CONST[7].xxxx
85: MOV TEMP[7].x, TEMP[10].xxxx
86: FRC TEMP[2].x, TEMP[2].xxxx
87: FRC TEMP[11].x, TEMP[10].xxxx
88: MOV TEMP[2].y, TEMP[11].xxxx
89: FLR TEMP[10].x, TEMP[10].xxxx
90: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
91: MOV TEMP[2].z, TEMP[10].xxxx
92: MOV TEMP[2].xyz, TEMP[2].xyzx
93: ELSE :0
94: FRC TEMP[9].x, TEMP[9].xxxx
95: FRC TEMP[10].x, TEMP[7].xxxx
96: MOV TEMP[9].y, TEMP[10].xxxx
97: FLR TEMP[7].x, TEMP[7].xxxx
98: MOV TEMP[9].z, TEMP[7].xxxx
99: MOV TEMP[2].xyz, TEMP[9].xyzx
100: ENDIF
101: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz
102: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[7].xyzz
103: MUL TEMP[7].x, CONST[12].xxxx, TEMP[7].xxxx
104: LG2 TEMP[7].x, TEMP[7].xxxx
105: MUL TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww
106: MUL TEMP[7].x, TEMP[7].xxxx, CONST[11].xxxx
107: MOV TEMP[9].xy, IN[3].yxyy
108: MOV TEMP[10].x, IMM[2].xxxx
109: FSNE TEMP[11].x, CONST[6].xxxx, TEMP[3].xxxx
110: UIF TEMP[11].xxxx :0
111: MOV TEMP[10].x, IMM[2].yyyy
112: RCP TEMP[11].x, CONST[9].xxxx
113: MUL TEMP[9].xy, IN[3].yxxx, TEMP[11].xxxx
114: ELSE :0
115: RCP TEMP[11].x, CONST[8].xxxx
116: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx
117: ENDIF
118: FRC TEMP[9].xy, TEMP[9].xyyy
119: MUL TEMP[11].x, CONST[10].xxxx, IMM[2].wwww
120: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[10].xxxx
121: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx
122: MUL TEMP[10].x, TEMP[10].xxxx, CONST[10].xxxx
123: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx, TEMP[10].xxxx
124: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[3].xxxx, TEMP[6].xyyy
125: MOV TEMP[10].xy, TEMP[9].xyyy
126: MOV TEMP[10].w, TEMP[7].xxxx
127: TXL TEMP[10], TEMP[10], SAMP[4], 2D
128: FSEQ TEMP[11].x, TEMP[6].zzzz, IMM[1].zzzz
129: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
130: MOV TEMP[12].xy, TEMP[9].xyyy
131: MOV TEMP[12].w, TEMP[7].xxxx
132: TXL TEMP[12], TEMP[12], SAMP[3], 2D
133: FSEQ TEMP[13].x, TEMP[6].zzzz, IMM[3].xxxx
134: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
135: MOV TEMP[14].xy, TEMP[9].xyyy
136: MOV TEMP[14].w, TEMP[7].xxxx
137: TXL TEMP[14], TEMP[14], SAMP[2], 2D
138: FSEQ TEMP[15].x, TEMP[6].zzzz, IMM[2].wwww
139: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
140: MOV TEMP[16].xy, TEMP[9].xyyy
141: MOV TEMP[16].w, TEMP[7].xxxx
142: TXL TEMP[16], TEMP[16], SAMP[1], 2D
143: FSEQ TEMP[17].x, TEMP[6].zzzz, IMM[2].zzzz
144: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
145: MOV TEMP[9].xy, TEMP[9].xyyy
146: MOV TEMP[9].w, TEMP[7].xxxx
147: TXL TEMP[9], TEMP[9], SAMP[0], 2D
148: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[3].yyyy
149: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
150: MUL TEMP[9], TEMP[9], TEMP[18].xxxx
151: MAD TEMP[9], TEMP[16], TEMP[17].xxxx, TEMP[9]
152: MAD TEMP[9], TEMP[14], TEMP[15].xxxx, TEMP[9]
153: MAD TEMP[9], TEMP[12], TEMP[13].xxxx, TEMP[9]
154: MAD TEMP[9].yw, TEMP[10], TEMP[11].xxxx, TEMP[9]
155: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz
156: DP2 TEMP[10].x, TEMP[9].xyyy, TEMP[9].xyyy
157: MOV_SAT TEMP[19].x, TEMP[10].xxxx
158: MOV TEMP[10].xy, TEMP[0].zxzz
159: MOV TEMP[11].x, IMM[2].xxxx
160: FSNE TEMP[12].x, CONST[6].xxxx, TEMP[3].xxxx
161: UIF TEMP[12].xxxx :0
162: MOV TEMP[11].x, IMM[2].yyyy
163: RCP TEMP[12].x, CONST[9].xxxx
164: MUL TEMP[10].xy, TEMP[0].zxxx, TEMP[12].xxxx
165: ELSE :0
166: RCP TEMP[12].x, CONST[8].xxxx
167: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx
168: ENDIF
169: FRC TEMP[10].xy, TEMP[10].xyyy
170: MUL TEMP[12].x, CONST[10].xxxx, IMM[2].wwww
171: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
172: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
173: MUL TEMP[11].x, TEMP[11].xxxx, CONST[10].xxxx
174: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
175: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[3].xxxx, TEMP[6].xyyy
176: MOV TEMP[11].xy, TEMP[10].xyyy
177: MOV TEMP[11].w, TEMP[7].xxxx
178: TXL TEMP[11], TEMP[11], SAMP[4], 2D
179: FSEQ TEMP[12].x, TEMP[6].zzzz, IMM[1].zzzz
180: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
181: MOV TEMP[13].xy, TEMP[10].xyyy
182: MOV TEMP[13].w, TEMP[7].xxxx
183: TXL TEMP[13], TEMP[13], SAMP[3], 2D
184: FSEQ TEMP[14].x, TEMP[6].zzzz, IMM[3].xxxx
185: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
186: MOV TEMP[15].xy, TEMP[10].xyyy
187: MOV TEMP[15].w, TEMP[7].xxxx
188: TXL TEMP[15], TEMP[15], SAMP[2], 2D
189: FSEQ TEMP[16].x, TEMP[6].zzzz, IMM[2].wwww
190: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
191: MOV TEMP[17].xy, TEMP[10].xyyy
192: MOV TEMP[17].w, TEMP[7].xxxx
193: TXL TEMP[17], TEMP[17], SAMP[1], 2D
194: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[2].zzzz
195: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
196: MOV TEMP[10].xy, TEMP[10].xyyy
197: MOV TEMP[10].w, TEMP[7].xxxx
198: TXL TEMP[10], TEMP[10], SAMP[0], 2D
199: FSEQ TEMP[20].x, TEMP[6].zzzz, IMM[3].yyyy
200: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
201: MUL TEMP[10], TEMP[10], TEMP[20].xxxx
202: MAD TEMP[10], TEMP[17], TEMP[18].xxxx, TEMP[10]
203: MAD TEMP[10], TEMP[15], TEMP[16].xxxx, TEMP[10]
204: MAD TEMP[10], TEMP[13], TEMP[14].xxxx, TEMP[10]
205: MAD TEMP[10].yw, TEMP[11], TEMP[12].xxxx, TEMP[10]
206: MAD TEMP[10].xy, TEMP[10].wyyy, IMM[2].wwww, IMM[3].zzzz
207: DP2 TEMP[11].x, TEMP[10].xyyy, TEMP[10].xyyy
208: MOV_SAT TEMP[21].x, TEMP[11].xxxx
209: MOV TEMP[11].xy, TEMP[0].xyxx
210: MOV TEMP[12].x, IMM[2].xxxx
211: FSNE TEMP[13].x, CONST[6].xxxx, TEMP[3].xxxx
212: UIF TEMP[13].xxxx :0
213: MOV TEMP[12].x, IMM[2].yyyy
214: RCP TEMP[13].x, CONST[9].xxxx
215: MUL TEMP[11].xy, TEMP[0].xyyy, TEMP[13].xxxx
216: ELSE :0
217: RCP TEMP[13].x, CONST[8].xxxx
218: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
219: ENDIF
220: FRC TEMP[11].xy, TEMP[11].xyyy
221: MUL TEMP[13].x, CONST[10].xxxx, IMM[2].wwww
222: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
223: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
224: MUL TEMP[12].x, TEMP[12].xxxx, CONST[10].xxxx
225: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
226: MAD TEMP[3].xy, TEMP[11].xyyy, TEMP[3].xxxx, TEMP[6].xyyy
227: MOV TEMP[11].xy, TEMP[3].xyyy
228: MOV TEMP[11].w, TEMP[7].xxxx
229: TXL TEMP[11], TEMP[11], SAMP[4], 2D
230: FSEQ TEMP[12].x, TEMP[6].zzzz, IMM[1].zzzz
231: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
232: MOV TEMP[13].xy, TEMP[3].xyyy
233: MOV TEMP[13].w, TEMP[7].xxxx
234: TXL TEMP[13], TEMP[13], SAMP[3], 2D
235: FSEQ TEMP[14].x, TEMP[6].zzzz, IMM[3].xxxx
236: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
237: MOV TEMP[15].xy, TEMP[3].xyyy
238: MOV TEMP[15].w, TEMP[7].xxxx
239: TXL TEMP[15], TEMP[15], SAMP[2], 2D
240: FSEQ TEMP[16].x, TEMP[6].zzzz, IMM[2].wwww
241: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
242: MOV TEMP[17].xy, TEMP[3].xyyy
243: MOV TEMP[17].w, TEMP[7].xxxx
244: TXL TEMP[17], TEMP[17], SAMP[1], 2D
245: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[2].zzzz
246: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
247: MOV TEMP[3].xy, TEMP[3].xyyy
248: MOV TEMP[3].w, TEMP[7].xxxx
249: TXL TEMP[3], TEMP[3], SAMP[0], 2D
250: FSEQ TEMP[6].x, TEMP[6].zzzz, IMM[3].yyyy
251: AND TEMP[6].x, TEMP[6].xxxx, IMM[2].zzzz
252: MUL TEMP[3], TEMP[3], TEMP[6].xxxx
253: MAD TEMP[3], TEMP[17], TEMP[18].xxxx, TEMP[3]
254: MAD TEMP[3], TEMP[15], TEMP[16].xxxx, TEMP[3]
255: MAD TEMP[3], TEMP[13], TEMP[14].xxxx, TEMP[3]
256: MAD TEMP[3].yw, TEMP[11], TEMP[12].xxxx, TEMP[3]
257: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[2].wwww, IMM[3].zzzz
258: DP2 TEMP[6].x, TEMP[3].xyyy, TEMP[3].xyyy
259: MOV_SAT TEMP[22].x, TEMP[6].xxxx
260: MOV TEMP[6].x, IMM[3].yyyy
261: MOV TEMP[6].y, TEMP[9].xxxx
262: MOV TEMP[6].z, TEMP[9].yyyy
263: MOV TEMP[9].y, IMM[3].yyyy
264: MOV TEMP[9].x, TEMP[10].yyyy
265: MOV TEMP[9].z, TEMP[10].xxxx
266: MOV TEMP[10].z, IMM[3].yyyy
267: MOV TEMP[10].xy, TEMP[3].xyxx
268: MUL TEMP[3].xyz, TEMP[6].xyzz, TEMP[1].xxxx
269: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[1].yyyy, TEMP[3].xyzz
270: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[1].zzzz, TEMP[3].xyzz
271: MOV TEMP[6].xy, IN[3].yxyy
272: MOV TEMP[9].x, IMM[2].xxxx
273: FSNE TEMP[10].x, CONST[6].xxxx, TEMP[4].xxxx
274: UIF TEMP[10].xxxx :0
275: MOV TEMP[9].x, IMM[2].yyyy
276: RCP TEMP[10].x, CONST[9].xxxx
277: MUL TEMP[6].xy, IN[3].yxxx, TEMP[10].xxxx
278: ELSE :0
279: RCP TEMP[10].x, CONST[8].xxxx
280: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[10].xxxx
281: ENDIF
282: FRC TEMP[6].xy, TEMP[6].xyyy
283: MUL TEMP[10].x, CONST[10].xxxx, IMM[2].wwww
284: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx
285: ADD TEMP[10].x, IMM[2].zzzz, -TEMP[10].xxxx
286: MUL TEMP[9].x, TEMP[9].xxxx, CONST[10].xxxx
287: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[10].xxxx, TEMP[9].xxxx
288: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[4].xxxx, TEMP[8].xyyy
289: MOV TEMP[9].xy, TEMP[6].xyyy
290: MOV TEMP[9].w, TEMP[7].xxxx
291: TXL TEMP[9], TEMP[9], SAMP[4], 2D
292: FSEQ TEMP[10].x, TEMP[8].zzzz, IMM[1].zzzz
293: AND TEMP[10].x, TEMP[10].xxxx, IMM[2].zzzz
294: MOV TEMP[11].xy, TEMP[6].xyyy
295: MOV TEMP[11].w, TEMP[7].xxxx
296: TXL TEMP[11], TEMP[11], SAMP[3], 2D
297: FSEQ TEMP[12].x, TEMP[8].zzzz, IMM[3].xxxx
298: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
299: MOV TEMP[13].xy, TEMP[6].xyyy
300: MOV TEMP[13].w, TEMP[7].xxxx
301: TXL TEMP[13], TEMP[13], SAMP[2], 2D
302: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[2].wwww
303: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
304: MOV TEMP[15].xy, TEMP[6].xyyy
305: MOV TEMP[15].w, TEMP[7].xxxx
306: TXL TEMP[15], TEMP[15], SAMP[1], 2D
307: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[2].zzzz
308: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
309: MOV TEMP[6].xy, TEMP[6].xyyy
310: MOV TEMP[6].w, TEMP[7].xxxx
311: TXL TEMP[6], TEMP[6], SAMP[0], 2D
312: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].yyyy
313: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
314: MUL TEMP[6], TEMP[6], TEMP[17].xxxx
315: MAD TEMP[6], TEMP[15], TEMP[16].xxxx, TEMP[6]
316: MAD TEMP[6], TEMP[13], TEMP[14].xxxx, TEMP[6]
317: MAD TEMP[6], TEMP[11], TEMP[12].xxxx, TEMP[6]
318: MAD TEMP[6].yw, TEMP[9], TEMP[10].xxxx, TEMP[6]
319: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz
320: DP2 TEMP[9].x, TEMP[6].xyyy, TEMP[6].xyyy
321: MOV_SAT TEMP[23].x, TEMP[9].xxxx
322: MOV TEMP[9].xy, TEMP[0].zxzz
323: MOV TEMP[10].x, IMM[2].xxxx
324: FSNE TEMP[11].x, CONST[6].xxxx, TEMP[4].xxxx
325: UIF TEMP[11].xxxx :0
326: MOV TEMP[10].x, IMM[2].yyyy
327: RCP TEMP[11].x, CONST[9].xxxx
328: MUL TEMP[9].xy, TEMP[0].zxxx, TEMP[11].xxxx
329: ELSE :0
330: RCP TEMP[11].x, CONST[8].xxxx
331: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx
332: ENDIF
333: FRC TEMP[9].xy, TEMP[9].xyyy
334: MUL TEMP[11].x, CONST[10].xxxx, IMM[2].wwww
335: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[10].xxxx
336: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx
337: MUL TEMP[10].x, TEMP[10].xxxx, CONST[10].xxxx
338: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx, TEMP[10].xxxx
339: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[4].xxxx, TEMP[8].xyyy
340: MOV TEMP[10].xy, TEMP[9].xyyy
341: MOV TEMP[10].w, TEMP[7].xxxx
342: TXL TEMP[10], TEMP[10], SAMP[4], 2D
343: FSEQ TEMP[11].x, TEMP[8].zzzz, IMM[1].zzzz
344: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
345: MOV TEMP[12].xy, TEMP[9].xyyy
346: MOV TEMP[12].w, TEMP[7].xxxx
347: TXL TEMP[12], TEMP[12], SAMP[3], 2D
348: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[3].xxxx
349: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
350: MOV TEMP[14].xy, TEMP[9].xyyy
351: MOV TEMP[14].w, TEMP[7].xxxx
352: TXL TEMP[14], TEMP[14], SAMP[2], 2D
353: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[2].wwww
354: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
355: MOV TEMP[16].xy, TEMP[9].xyyy
356: MOV TEMP[16].w, TEMP[7].xxxx
357: TXL TEMP[16], TEMP[16], SAMP[1], 2D
358: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].zzzz
359: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
360: MOV TEMP[9].xy, TEMP[9].xyyy
361: MOV TEMP[9].w, TEMP[7].xxxx
362: TXL TEMP[9], TEMP[9], SAMP[0], 2D
363: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[3].yyyy
364: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
365: MUL TEMP[9], TEMP[9], TEMP[18].xxxx
366: MAD TEMP[9], TEMP[16], TEMP[17].xxxx, TEMP[9]
367: MAD TEMP[9], TEMP[14], TEMP[15].xxxx, TEMP[9]
368: MAD TEMP[9], TEMP[12], TEMP[13].xxxx, TEMP[9]
369: MAD TEMP[9].yw, TEMP[10], TEMP[11].xxxx, TEMP[9]
370: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz
371: DP2 TEMP[10].x, TEMP[9].xyyy, TEMP[9].xyyy
372: MOV_SAT TEMP[24].x, TEMP[10].xxxx
373: MOV TEMP[10].xy, TEMP[0].xyxx
374: MOV TEMP[11].x, IMM[2].xxxx
375: FSNE TEMP[12].x, CONST[6].xxxx, TEMP[4].xxxx
376: UIF TEMP[12].xxxx :0
377: MOV TEMP[11].x, IMM[2].yyyy
378: RCP TEMP[12].x, CONST[9].xxxx
379: MUL TEMP[10].xy, TEMP[0].xyyy, TEMP[12].xxxx
380: ELSE :0
381: RCP TEMP[12].x, CONST[8].xxxx
382: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx
383: ENDIF
384: FRC TEMP[10].xy, TEMP[10].xyyy
385: MUL TEMP[12].x, CONST[10].xxxx, IMM[2].wwww
386: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
387: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
388: MUL TEMP[11].x, TEMP[11].xxxx, CONST[10].xxxx
389: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
390: MAD TEMP[4].xy, TEMP[10].xyyy, TEMP[4].xxxx, TEMP[8].xyyy
391: MOV TEMP[10].xy, TEMP[4].xyyy
392: MOV TEMP[10].w, TEMP[7].xxxx
393: TXL TEMP[10], TEMP[10], SAMP[4], 2D
394: FSEQ TEMP[11].x, TEMP[8].zzzz, IMM[1].zzzz
395: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
396: MOV TEMP[12].xy, TEMP[4].xyyy
397: MOV TEMP[12].w, TEMP[7].xxxx
398: TXL TEMP[12], TEMP[12], SAMP[3], 2D
399: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[3].xxxx
400: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
401: MOV TEMP[14].xy, TEMP[4].xyyy
402: MOV TEMP[14].w, TEMP[7].xxxx
403: TXL TEMP[14], TEMP[14], SAMP[2], 2D
404: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[2].wwww
405: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
406: MOV TEMP[16].xy, TEMP[4].xyyy
407: MOV TEMP[16].w, TEMP[7].xxxx
408: TXL TEMP[16], TEMP[16], SAMP[1], 2D
409: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].zzzz
410: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
411: MOV TEMP[4].xy, TEMP[4].xyyy
412: MOV TEMP[4].w, TEMP[7].xxxx
413: TXL TEMP[4], TEMP[4], SAMP[0], 2D
414: FSEQ TEMP[8].x, TEMP[8].zzzz, IMM[3].yyyy
415: AND TEMP[8].x, TEMP[8].xxxx, IMM[2].zzzz
416: MUL TEMP[4], TEMP[4], TEMP[8].xxxx
417: MAD TEMP[4], TEMP[16], TEMP[17].xxxx, TEMP[4]
418: MAD TEMP[4], TEMP[14], TEMP[15].xxxx, TEMP[4]
419: MAD TEMP[4], TEMP[12], TEMP[13].xxxx, TEMP[4]
420: MAD TEMP[4].yw, TEMP[10], TEMP[11].xxxx, TEMP[4]
421: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz
422: DP2 TEMP[8].x, TEMP[4].xyyy, TEMP[4].xyyy
423: MOV_SAT TEMP[25].x, TEMP[8].xxxx
424: MOV TEMP[8].x, IMM[3].yyyy
425: MOV TEMP[8].y, TEMP[6].xxxx
426: MOV TEMP[8].z, TEMP[6].yyyy
427: MOV TEMP[6].y, IMM[3].yyyy
428: MOV TEMP[6].x, TEMP[9].yyyy
429: MOV TEMP[6].z, TEMP[9].xxxx
430: MOV TEMP[9].z, IMM[3].yyyy
431: MOV TEMP[9].xy, TEMP[4].xyxx
432: MUL TEMP[4].xyz, TEMP[8].xyzz, TEMP[1].xxxx
433: MAD TEMP[4].xyz, TEMP[6].xyzz, TEMP[1].yyyy, TEMP[4].xyzz
434: MAD TEMP[4].xyz, TEMP[9].xyzz, TEMP[1].zzzz, TEMP[4].xyzz
435: MOV TEMP[6].xy, IN[3].yxyy
436: MOV TEMP[8].x, IMM[2].xxxx
437: FSNE TEMP[9].x, CONST[6].xxxx, TEMP[5].xxxx
438: UIF TEMP[9].xxxx :0
439: MOV TEMP[8].x, IMM[2].yyyy
440: RCP TEMP[9].x, CONST[9].xxxx
441: MUL TEMP[6].xy, IN[3].yxxx, TEMP[9].xxxx
442: ELSE :0
443: RCP TEMP[9].x, CONST[8].xxxx
444: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[9].xxxx
445: ENDIF
446: FRC TEMP[6].xy, TEMP[6].xyyy
447: MUL TEMP[9].x, CONST[10].xxxx, IMM[2].wwww
448: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[8].xxxx
449: ADD TEMP[9].x, IMM[2].zzzz, -TEMP[9].xxxx
450: MUL TEMP[8].x, TEMP[8].xxxx, CONST[10].xxxx
451: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[9].xxxx, TEMP[8].xxxx
452: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[5].xxxx, TEMP[2].xyyy
453: MOV TEMP[8].xy, TEMP[6].xyyy
454: MOV TEMP[8].w, TEMP[7].xxxx
455: TXL TEMP[8], TEMP[8], SAMP[4], 2D
456: FSEQ TEMP[9].x, TEMP[2].zzzz, IMM[1].zzzz
457: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz
458: MOV TEMP[10].xy, TEMP[6].xyyy
459: MOV TEMP[10].w, TEMP[7].xxxx
460: TXL TEMP[10], TEMP[10], SAMP[3], 2D
461: FSEQ TEMP[11].x, TEMP[2].zzzz, IMM[3].xxxx
462: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
463: MOV TEMP[12].xy, TEMP[6].xyyy
464: MOV TEMP[12].w, TEMP[7].xxxx
465: TXL TEMP[12], TEMP[12], SAMP[2], 2D
466: FSEQ TEMP[13].x, TEMP[2].zzzz, IMM[2].wwww
467: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
468: MOV TEMP[14].xy, TEMP[6].xyyy
469: MOV TEMP[14].w, TEMP[7].xxxx
470: TXL TEMP[14], TEMP[14], SAMP[1], 2D
471: FSEQ TEMP[15].x, TEMP[2].zzzz, IMM[2].zzzz
472: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
473: MOV TEMP[6].xy, TEMP[6].xyyy
474: MOV TEMP[6].w, TEMP[7].xxxx
475: TXL TEMP[6], TEMP[6], SAMP[0], 2D
476: FSEQ TEMP[16].x, TEMP[2].zzzz, IMM[3].yyyy
477: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
478: MUL TEMP[6], TEMP[6], TEMP[16].xxxx
479: MAD TEMP[6], TEMP[14], TEMP[15].xxxx, TEMP[6]
480: MAD TEMP[6], TEMP[12], TEMP[13].xxxx, TEMP[6]
481: MAD TEMP[6], TEMP[10], TEMP[11].xxxx, TEMP[6]
482: MAD TEMP[6].yw, TEMP[8], TEMP[9].xxxx, TEMP[6]
483: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz
484: DP2 TEMP[8].x, TEMP[6].xyyy, TEMP[6].xyyy
485: MOV_SAT TEMP[26].x, TEMP[8].xxxx
486: MOV TEMP[8].xy, TEMP[0].zxzz
487: MOV TEMP[9].x, IMM[2].xxxx
488: FSNE TEMP[10].x, CONST[6].xxxx, TEMP[5].xxxx
489: UIF TEMP[10].xxxx :0
490: MOV TEMP[9].x, IMM[2].yyyy
491: RCP TEMP[10].x, CONST[9].xxxx
492: MUL TEMP[8].xy, TEMP[0].zxxx, TEMP[10].xxxx
493: ELSE :0
494: RCP TEMP[10].x, CONST[8].xxxx
495: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[10].xxxx
496: ENDIF
497: FRC TEMP[8].xy, TEMP[8].xyyy
498: MUL TEMP[10].x, CONST[10].xxxx, IMM[2].wwww
499: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[9].xxxx
500: ADD TEMP[10].x, IMM[2].zzzz, -TEMP[10].xxxx
501: MUL TEMP[9].x, TEMP[9].xxxx, CONST[10].xxxx
502: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[10].xxxx, TEMP[9].xxxx
503: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[5].xxxx, TEMP[2].xyyy
504: MOV TEMP[9].xy, TEMP[8].xyyy
505: MOV TEMP[9].w, TEMP[7].xxxx
506: TXL TEMP[9], TEMP[9], SAMP[4], 2D
507: FSEQ TEMP[10].x, TEMP[2].zzzz, IMM[1].zzzz
508: AND TEMP[10].x, TEMP[10].xxxx, IMM[2].zzzz
509: MOV TEMP[11].xy, TEMP[8].xyyy
510: MOV TEMP[11].w, TEMP[7].xxxx
511: TXL TEMP[11], TEMP[11], SAMP[3], 2D
512: FSEQ TEMP[12].x, TEMP[2].zzzz, IMM[3].xxxx
513: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
514: MOV TEMP[13].xy, TEMP[8].xyyy
515: MOV TEMP[13].w, TEMP[7].xxxx
516: TXL TEMP[13], TEMP[13], SAMP[2], 2D
517: FSEQ TEMP[14].x, TEMP[2].zzzz, IMM[2].wwww
518: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
519: MOV TEMP[15].xy, TEMP[8].xyyy
520: MOV TEMP[15].w, TEMP[7].xxxx
521: TXL TEMP[15], TEMP[15], SAMP[1], 2D
522: FSEQ TEMP[16].x, TEMP[2].zzzz, IMM[2].zzzz
523: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
524: MOV TEMP[8].xy, TEMP[8].xyyy
525: MOV TEMP[8].w, TEMP[7].xxxx
526: TXL TEMP[8], TEMP[8], SAMP[0], 2D
527: FSEQ TEMP[17].x, TEMP[2].zzzz, IMM[3].yyyy
528: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
529: MUL TEMP[8], TEMP[8], TEMP[17].xxxx
530: MAD TEMP[8], TEMP[15], TEMP[16].xxxx, TEMP[8]
531: MAD TEMP[8], TEMP[13], TEMP[14].xxxx, TEMP[8]
532: MAD TEMP[8], TEMP[11], TEMP[12].xxxx, TEMP[8]
533: MAD TEMP[8].yw, TEMP[9], TEMP[10].xxxx, TEMP[8]
534: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz
535: DP2 TEMP[9].x, TEMP[8].xyyy, TEMP[8].xyyy
536: MOV_SAT TEMP[27].x, TEMP[9].xxxx
537: MOV TEMP[9].xy, TEMP[0].xyxx
538: MOV TEMP[10].x, IMM[2].xxxx
539: FSNE TEMP[11].x, CONST[6].xxxx, TEMP[5].xxxx
540: UIF TEMP[11].xxxx :0
541: MOV TEMP[10].x, IMM[2].yyyy
542: RCP TEMP[11].x, CONST[9].xxxx
543: MUL TEMP[9].xy, TEMP[0].xyyy, TEMP[11].xxxx
544: ELSE :0
545: RCP TEMP[0].x, CONST[8].xxxx
546: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[0].xxxx
547: ENDIF
548: FRC TEMP[0].xy, TEMP[9].xyyy
549: MUL TEMP[9].x, CONST[10].xxxx, IMM[2].wwww
550: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx
551: ADD TEMP[9].x, IMM[2].zzzz, -TEMP[9].xxxx
552: MUL TEMP[10].x, TEMP[10].xxxx, CONST[10].xxxx
553: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[9].xxxx, TEMP[10].xxxx
554: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[5].xxxx, TEMP[2].xyyy
555: MOV TEMP[5].xy, TEMP[0].xyyy
556: MOV TEMP[5].w, TEMP[7].xxxx
557: TXL TEMP[5], TEMP[5], SAMP[4], 2D
558: FSEQ TEMP[9].x, TEMP[2].zzzz, IMM[1].zzzz
559: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz
560: MOV TEMP[10].xy, TEMP[0].xyyy
561: MOV TEMP[10].w, TEMP[7].xxxx
562: TXL TEMP[10], TEMP[10], SAMP[3], 2D
563: FSEQ TEMP[11].x, TEMP[2].zzzz, IMM[3].xxxx
564: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
565: MOV TEMP[12].xy, TEMP[0].xyyy
566: MOV TEMP[12].w, TEMP[7].xxxx
567: TXL TEMP[12], TEMP[12], SAMP[2], 2D
568: FSEQ TEMP[13].x, TEMP[2].zzzz, IMM[2].wwww
569: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
570: MOV TEMP[14].xy, TEMP[0].xyyy
571: MOV TEMP[14].w, TEMP[7].xxxx
572: TXL TEMP[14], TEMP[14], SAMP[1], 2D
573: FSEQ TEMP[15].x, TEMP[2].zzzz, IMM[2].zzzz
574: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
575: MOV TEMP[0].xy, TEMP[0].xyyy
576: MOV TEMP[0].w, TEMP[7].xxxx
577: TXL TEMP[0], TEMP[0], SAMP[0], 2D
578: FSEQ TEMP[2].x, TEMP[2].zzzz, IMM[3].yyyy
579: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].zzzz
580: MUL TEMP[0], TEMP[0], TEMP[2].xxxx
581: MAD TEMP[0], TEMP[14], TEMP[15].xxxx, TEMP[0]
582: MAD TEMP[0], TEMP[12], TEMP[13].xxxx, TEMP[0]
583: MAD TEMP[0], TEMP[10], TEMP[11].xxxx, TEMP[0]
584: MAD TEMP[0].yw, TEMP[5], TEMP[9].xxxx, TEMP[0]
585: MAD TEMP[0].xy, TEMP[0].wyyy, IMM[2].wwww, IMM[3].zzzz
586: DP2 TEMP[2].x, TEMP[0].xyyy, TEMP[0].xyyy
587: MOV_SAT TEMP[28].x, TEMP[2].xxxx
588: MOV TEMP[2].x, IMM[3].yyyy
589: MOV TEMP[2].y, TEMP[6].xxxx
590: MOV TEMP[2].z, TEMP[6].yyyy
591: MOV TEMP[5].y, IMM[3].yyyy
592: MOV TEMP[5].x, TEMP[8].yyyy
593: MOV TEMP[5].z, TEMP[8].xxxx
594: MOV TEMP[6].z, IMM[3].yyyy
595: MOV TEMP[6].xy, TEMP[0].xyxx
596: MOV TEMP[0].w, IMM[2].zzzz
597: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[1].xxxx
598: MAD TEMP[2].xyz, TEMP[5].xyzz, TEMP[1].yyyy, TEMP[2].xyzz
599: MAD TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].zzzz, TEMP[2].xyzz
600: MUL TEMP[2].xyz, IN[1].xxxx, TEMP[3].xyzz
601: MAD TEMP[2].xyz, IN[1].yyyy, TEMP[4].xyzz, TEMP[2].xyzz
602: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1].xyzz, TEMP[2].xyzz
603: DP4 TEMP[1].x, TEMP[0], TEMP[0]
604: RSQ TEMP[1].x, TEMP[1].xxxx
605: MUL TEMP[0].xyz, TEMP[0], TEMP[1].xxxx
606: MUL TEMP[0].xyz, TEMP[0].xyzz, IN[0].wwww
607: ADD TEMP[0].xyz, IN[2].xyzz, -TEMP[0].xyzz
608: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
609: RSQ TEMP[1].x, TEMP[1].xxxx
610: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
611: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].wwww, IMM[0].wwww
612: MOV TEMP[0].w, IN[1].wwww
613: MOV OUT[0], TEMP[0]
614: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%35 = load <8 x i32>, <8 x i32> addrspace(2)* %34, align 32, !tbaa !0
%36 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%37 = load <4 x i32>, <4 x i32> addrspace(2)* %36, align 16, !tbaa !0
%38 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%39 = load <8 x i32>, <8 x i32> addrspace(2)* %38, align 32, !tbaa !0
%40 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%41 = load <4 x i32>, <4 x i32> addrspace(2)* %40, align 16, !tbaa !0
%42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%43 = load <8 x i32>, <8 x i32> addrspace(2)* %42, align 32, !tbaa !0
%44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%45 = load <4 x i32>, <4 x i32> addrspace(2)* %44, align 16, !tbaa !0
%46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0
%48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0
%50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%51 = load <8 x i32>, <8 x i32> addrspace(2)* %50, align 32, !tbaa !0
%52 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%53 = load <4 x i32>, <4 x i32> addrspace(2)* %52, align 16, !tbaa !0
%54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%56 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%57 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%58 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%59 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%60 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%61 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%68 = call float @llvm.fabs.f32(float %62)
%69 = call float @llvm.fabs.f32(float %63)
%70 = call float @llvm.fabs.f32(float %64)
%71 = fmul float %68, %68
%72 = fmul float %69, %69
%73 = fadd float %72, %71
%74 = fmul float %70, %70
%75 = fadd float %73, %74
%76 = call float @llvm.AMDGPU.rsq.clamped.f32(float %75)
%77 = fmul float %68, %76
%78 = fadd float %77, 0xBFC99999A0000000
%79 = fmul float %69, %76
%80 = fadd float %79, 0xBFC99999A0000000
%81 = fmul float %70, %76
%82 = fadd float %81, 0xBFC99999A0000000
%83 = fmul float %78, 7.000000e+00
%84 = fmul float %80, 7.000000e+00
%85 = fmul float %82, 7.000000e+00
%86 = call float @llvm.maxnum.f32(float %83, float 0x3F847AE140000000)
%87 = call float @llvm.maxnum.f32(float %84, float 0x3F847AE140000000)
%88 = call float @llvm.maxnum.f32(float %85, float 0x3F847AE140000000)
%89 = fadd float %86, %87
%90 = fadd float %89, %88
%91 = fdiv float 1.000000e+00, %90
%92 = fmul float %86, %91
%93 = fmul float %87, %91
%94 = fmul float %88, %91
%95 = fadd float %54, 5.000000e-01
%96 = fadd float %55, 5.000000e-01
%97 = fadd float %56, 5.000000e-01
%98 = call float @llvm.floor.f32(float %95)
%99 = call float @llvm.floor.f32(float %96)
%100 = call float @llvm.floor.f32(float %97)
%101 = fmul float %98, %27
%102 = call float @llvm.floor.f32(float %101)
%103 = fmul float %102, %27
%104 = fcmp ult float %98, 6.400000e+01
br i1 %104, label %ELSE, label %IF
IF: ; preds = %main_body
%105 = fadd float %98, -6.400000e+01
%106 = fmul float %105, %28
%107 = call float @llvm.floor.f32(float %106)
%108 = fmul float %107, %28
%109 = call float @llvm.floor.f32(float %106)
%110 = fsub float %106, %109
%111 = call float @llvm.floor.f32(float %108)
%112 = fsub float %108, %111
%113 = call float @llvm.floor.f32(float %108)
%114 = fadd float %113, 4.000000e+00
br label %ENDIF
ELSE: ; preds = %main_body
%115 = call float @llvm.floor.f32(float %101)
%116 = fsub float %101, %115
%117 = call float @llvm.floor.f32(float %103)
%118 = fsub float %103, %117
%119 = call float @llvm.floor.f32(float %103)
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp26.0 = phi float [ %114, %IF ], [ %119, %ELSE ]
%temp25.0 = phi float [ %112, %IF ], [ %118, %ELSE ]
%temp24.0 = phi float [ %110, %IF ], [ %116, %ELSE ]
%temp12.0 = phi float [ %28, %IF ], [ %27, %ELSE ]
%120 = fmul float %99, %27
%121 = call float @llvm.floor.f32(float %120)
%122 = fmul float %121, %27
%123 = fcmp ult float %99, 6.400000e+01
br i1 %123, label %ELSE118, label %IF117
IF117: ; preds = %ENDIF
%124 = fadd float %99, -6.400000e+01
%125 = fmul float %124, %28
%126 = call float @llvm.floor.f32(float %125)
%127 = fmul float %126, %28
%128 = call float @llvm.floor.f32(float %125)
%129 = fsub float %125, %128
%130 = call float @llvm.floor.f32(float %127)
%131 = fsub float %127, %130
%132 = call float @llvm.floor.f32(float %127)
%133 = fadd float %132, 4.000000e+00
br label %ENDIF116
ELSE118: ; preds = %ENDIF
%134 = call float @llvm.floor.f32(float %120)
%135 = fsub float %120, %134
%136 = call float @llvm.floor.f32(float %122)
%137 = fsub float %122, %136
%138 = call float @llvm.floor.f32(float %122)
br label %ENDIF116
ENDIF116: ; preds = %ELSE118, %IF117
%temp32.0 = phi float [ %129, %IF117 ], [ %135, %ELSE118 ]
%temp33.0 = phi float [ %131, %IF117 ], [ %137, %ELSE118 ]
%temp34.0 = phi float [ %133, %IF117 ], [ %138, %ELSE118 ]
%temp16.0 = phi float [ %28, %IF117 ], [ %27, %ELSE118 ]
%139 = fmul float %100, %27
%140 = call float @llvm.floor.f32(float %139)
%141 = fmul float %140, %27
%142 = fcmp ult float %100, 6.400000e+01
br i1 %142, label %ELSE121, label %IF120
IF120: ; preds = %ENDIF116
%143 = fadd float %100, -6.400000e+01
%144 = fmul float %143, %28
%145 = call float @llvm.floor.f32(float %144)
%146 = fmul float %145, %28
%147 = call float @llvm.floor.f32(float %144)
%148 = fsub float %144, %147
%149 = call float @llvm.floor.f32(float %146)
%150 = fsub float %146, %149
%151 = call float @llvm.floor.f32(float %146)
%152 = fadd float %151, 4.000000e+00
br label %ENDIF119
ELSE121: ; preds = %ENDIF116
%153 = call float @llvm.floor.f32(float %139)
%154 = fsub float %139, %153
%155 = call float @llvm.floor.f32(float %141)
%156 = fsub float %141, %155
%157 = call float @llvm.floor.f32(float %141)
br label %ENDIF119
ENDIF119: ; preds = %ELSE121, %IF120
%temp20.0 = phi float [ %28, %IF120 ], [ %27, %ELSE121 ]
%temp10.0 = phi float [ %152, %IF120 ], [ %157, %ELSE121 ]
%temp9.0 = phi float [ %150, %IF120 ], [ %156, %ELSE121 ]
%temp8.0 = phi float [ %148, %IF120 ], [ %154, %ELSE121 ]
%158 = fsub float %65, %24
%159 = fsub float %66, %25
%160 = fsub float %67, %26
%161 = fmul float %158, %158
%162 = fmul float %159, %159
%163 = fadd float %162, %161
%164 = fmul float %160, %160
%165 = fadd float %163, %164
%166 = fmul float %33, %165
%167 = call float @llvm.log2.f32(float %166)
%168 = fmul float %167, 0x3FE62E4300000000
%169 = fmul float %168, %32
%170 = fcmp une float %27, %temp12.0
%.sink149 = select i1 %170, float %30, float %29
%temp40.0 = select i1 %170, float 1.953125e-03, float 3.906250e-03
%171 = fdiv float 1.000000e+00, %.sink149
%172 = fmul float %67, %171
%173 = fmul float %66, %171
%174 = call float @llvm.floor.f32(float %172)
%175 = fsub float %172, %174
%176 = call float @llvm.floor.f32(float %173)
%177 = fsub float %173, %176
%178 = fmul float %31, 2.000000e+00
%179 = fmul float %178, %temp40.0
%180 = fsub float 1.000000e+00, %179
%181 = fmul float %temp40.0, %31
%182 = fmul float %175, %180
%183 = fadd float %182, %181
%184 = fmul float %177, %180
%185 = fadd float %184, %181
%186 = fmul float %183, %temp12.0
%187 = fadd float %186, %temp24.0
%188 = fmul float %185, %temp12.0
%189 = fadd float %188, %temp25.0
%190 = bitcast float %187 to i32
%191 = bitcast float %189 to i32
%192 = bitcast float %169 to i32
%193 = insertelement <4 x i32> undef, i32 %190, i32 0
%194 = insertelement <4 x i32> %193, i32 %191, i32 1
%195 = insertelement <4 x i32> %194, i32 %192, i32 2
%196 = bitcast <8 x i32> %51 to <32 x i8>
%197 = bitcast <4 x i32> %53 to <16 x i8>
%198 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %195, <32 x i8> %196, <16 x i8> %197, i32 2)
%199 = extractelement <4 x float> %198, i32 1
%200 = extractelement <4 x float> %198, i32 3
%201 = fcmp oeq float %temp26.0, 4.000000e+00
%202 = select i1 %201, float 1.000000e+00, float 0.000000e+00
%203 = bitcast float %187 to i32
%204 = bitcast float %189 to i32
%205 = bitcast float %169 to i32
%206 = insertelement <4 x i32> undef, i32 %203, i32 0
%207 = insertelement <4 x i32> %206, i32 %204, i32 1
%208 = insertelement <4 x i32> %207, i32 %205, i32 2
%209 = bitcast <8 x i32> %47 to <32 x i8>
%210 = bitcast <4 x i32> %49 to <16 x i8>
%211 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %208, <32 x i8> %209, <16 x i8> %210, i32 2)
%212 = extractelement <4 x float> %211, i32 1
%213 = extractelement <4 x float> %211, i32 3
%214 = fcmp oeq float %temp26.0, 3.000000e+00
%215 = select i1 %214, float 1.000000e+00, float 0.000000e+00
%216 = bitcast float %187 to i32
%217 = bitcast float %189 to i32
%218 = bitcast float %169 to i32
%219 = insertelement <4 x i32> undef, i32 %216, i32 0
%220 = insertelement <4 x i32> %219, i32 %217, i32 1
%221 = insertelement <4 x i32> %220, i32 %218, i32 2
%222 = bitcast <8 x i32> %43 to <32 x i8>
%223 = bitcast <4 x i32> %45 to <16 x i8>
%224 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %221, <32 x i8> %222, <16 x i8> %223, i32 2)
%225 = extractelement <4 x float> %224, i32 1
%226 = extractelement <4 x float> %224, i32 3
%227 = fcmp oeq float %temp26.0, 2.000000e+00
%228 = select i1 %227, float 1.000000e+00, float 0.000000e+00
%229 = bitcast float %187 to i32
%230 = bitcast float %189 to i32
%231 = bitcast float %169 to i32
%232 = insertelement <4 x i32> undef, i32 %229, i32 0
%233 = insertelement <4 x i32> %232, i32 %230, i32 1
%234 = insertelement <4 x i32> %233, i32 %231, i32 2
%235 = bitcast <8 x i32> %39 to <32 x i8>
%236 = bitcast <4 x i32> %41 to <16 x i8>
%237 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %234, <32 x i8> %235, <16 x i8> %236, i32 2)
%238 = extractelement <4 x float> %237, i32 1
%239 = extractelement <4 x float> %237, i32 3
%240 = fcmp oeq float %temp26.0, 1.000000e+00
%241 = select i1 %240, float 1.000000e+00, float 0.000000e+00
%242 = bitcast float %187 to i32
%243 = bitcast float %189 to i32
%244 = bitcast float %169 to i32
%245 = insertelement <4 x i32> undef, i32 %242, i32 0
%246 = insertelement <4 x i32> %245, i32 %243, i32 1
%247 = insertelement <4 x i32> %246, i32 %244, i32 2
%248 = bitcast <8 x i32> %35 to <32 x i8>
%249 = bitcast <4 x i32> %37 to <16 x i8>
%250 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %247, <32 x i8> %248, <16 x i8> %249, i32 2)
%251 = extractelement <4 x float> %250, i32 1
%252 = extractelement <4 x float> %250, i32 3
%253 = fcmp oeq float %temp26.0, 0.000000e+00
%254 = select i1 %253, float 1.000000e+00, float 0.000000e+00
%255 = fmul float %251, %254
%256 = fmul float %252, %254
%257 = fmul float %238, %241
%258 = fadd float %257, %255
%259 = fmul float %239, %241
%260 = fadd float %259, %256
%261 = fmul float %225, %228
%262 = fadd float %261, %258
%263 = fmul float %226, %228
%264 = fadd float %263, %260
%265 = fmul float %212, %215
%266 = fadd float %265, %262
%267 = fmul float %213, %215
%268 = fadd float %267, %264
%269 = fmul float %199, %202
%270 = fadd float %269, %266
%271 = fmul float %200, %202
%272 = fadd float %271, %268
%273 = fmul float %272, 2.000000e+00
%274 = fadd float %273, -1.000000e+00
%275 = fmul float %270, 2.000000e+00
%276 = fadd float %275, -1.000000e+00
%277 = fmul float %274, %274
%278 = fmul float %276, %276
%279 = fadd float %277, %278
%280 = call float @llvm.AMDIL.clamp.(float %279, float 0.000000e+00, float 1.000000e+00)
%281 = fcmp une float %27, %temp12.0
%.sink150 = select i1 %281, float %30, float %29
%temp44.0 = select i1 %281, float 1.953125e-03, float 3.906250e-03
%282 = fdiv float 1.000000e+00, %.sink150
%283 = fmul float %67, %282
%284 = fmul float %65, %282
%285 = call float @llvm.floor.f32(float %283)
%286 = fsub float %283, %285
%287 = call float @llvm.floor.f32(float %284)
%288 = fsub float %284, %287
%289 = fmul float %31, 2.000000e+00
%290 = fmul float %289, %temp44.0
%291 = fsub float 1.000000e+00, %290
%292 = fmul float %temp44.0, %31
%293 = fmul float %286, %291
%294 = fadd float %293, %292
%295 = fmul float %288, %291
%296 = fadd float %295, %292
%297 = fmul float %294, %temp12.0
%298 = fadd float %297, %temp24.0
%299 = fmul float %296, %temp12.0
%300 = fadd float %299, %temp25.0
%301 = bitcast float %298 to i32
%302 = bitcast float %300 to i32
%303 = bitcast float %169 to i32
%304 = insertelement <4 x i32> undef, i32 %301, i32 0
%305 = insertelement <4 x i32> %304, i32 %302, i32 1
%306 = insertelement <4 x i32> %305, i32 %303, i32 2
%307 = bitcast <8 x i32> %51 to <32 x i8>
%308 = bitcast <4 x i32> %53 to <16 x i8>
%309 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %306, <32 x i8> %307, <16 x i8> %308, i32 2)
%310 = extractelement <4 x float> %309, i32 1
%311 = extractelement <4 x float> %309, i32 3
%312 = fcmp oeq float %temp26.0, 4.000000e+00
%313 = select i1 %312, float 1.000000e+00, float 0.000000e+00
%314 = bitcast float %298 to i32
%315 = bitcast float %300 to i32
%316 = bitcast float %169 to i32
%317 = insertelement <4 x i32> undef, i32 %314, i32 0
%318 = insertelement <4 x i32> %317, i32 %315, i32 1
%319 = insertelement <4 x i32> %318, i32 %316, i32 2
%320 = bitcast <8 x i32> %47 to <32 x i8>
%321 = bitcast <4 x i32> %49 to <16 x i8>
%322 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %319, <32 x i8> %320, <16 x i8> %321, i32 2)
%323 = extractelement <4 x float> %322, i32 1
%324 = extractelement <4 x float> %322, i32 3
%325 = fcmp oeq float %temp26.0, 3.000000e+00
%326 = select i1 %325, float 1.000000e+00, float 0.000000e+00
%327 = bitcast float %298 to i32
%328 = bitcast float %300 to i32
%329 = bitcast float %169 to i32
%330 = insertelement <4 x i32> undef, i32 %327, i32 0
%331 = insertelement <4 x i32> %330, i32 %328, i32 1
%332 = insertelement <4 x i32> %331, i32 %329, i32 2
%333 = bitcast <8 x i32> %43 to <32 x i8>
%334 = bitcast <4 x i32> %45 to <16 x i8>
%335 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %332, <32 x i8> %333, <16 x i8> %334, i32 2)
%336 = extractelement <4 x float> %335, i32 1
%337 = extractelement <4 x float> %335, i32 3
%338 = fcmp oeq float %temp26.0, 2.000000e+00
%339 = select i1 %338, float 1.000000e+00, float 0.000000e+00
%340 = bitcast float %298 to i32
%341 = bitcast float %300 to i32
%342 = bitcast float %169 to i32
%343 = insertelement <4 x i32> undef, i32 %340, i32 0
%344 = insertelement <4 x i32> %343, i32 %341, i32 1
%345 = insertelement <4 x i32> %344, i32 %342, i32 2
%346 = bitcast <8 x i32> %39 to <32 x i8>
%347 = bitcast <4 x i32> %41 to <16 x i8>
%348 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %345, <32 x i8> %346, <16 x i8> %347, i32 2)
%349 = extractelement <4 x float> %348, i32 1
%350 = extractelement <4 x float> %348, i32 3
%351 = fcmp oeq float %temp26.0, 1.000000e+00
%352 = select i1 %351, float 1.000000e+00, float 0.000000e+00
%353 = bitcast float %298 to i32
%354 = bitcast float %300 to i32
%355 = bitcast float %169 to i32
%356 = insertelement <4 x i32> undef, i32 %353, i32 0
%357 = insertelement <4 x i32> %356, i32 %354, i32 1
%358 = insertelement <4 x i32> %357, i32 %355, i32 2
%359 = bitcast <8 x i32> %35 to <32 x i8>
%360 = bitcast <4 x i32> %37 to <16 x i8>
%361 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %358, <32 x i8> %359, <16 x i8> %360, i32 2)
%362 = extractelement <4 x float> %361, i32 1
%363 = extractelement <4 x float> %361, i32 3
%364 = fcmp oeq float %temp26.0, 0.000000e+00
%365 = select i1 %364, float 1.000000e+00, float 0.000000e+00
%366 = fmul float %362, %365
%367 = fmul float %363, %365
%368 = fmul float %349, %352
%369 = fadd float %368, %366
%370 = fmul float %350, %352
%371 = fadd float %370, %367
%372 = fmul float %336, %339
%373 = fadd float %372, %369
%374 = fmul float %337, %339
%375 = fadd float %374, %371
%376 = fmul float %323, %326
%377 = fadd float %376, %373
%378 = fmul float %324, %326
%379 = fadd float %378, %375
%380 = fmul float %310, %313
%381 = fadd float %380, %377
%382 = fmul float %311, %313
%383 = fadd float %382, %379
%384 = fmul float %383, 2.000000e+00
%385 = fadd float %384, -1.000000e+00
%386 = fmul float %381, 2.000000e+00
%387 = fadd float %386, -1.000000e+00
%388 = fmul float %385, %385
%389 = fmul float %387, %387
%390 = fadd float %388, %389
%391 = call float @llvm.AMDIL.clamp.(float %390, float 0.000000e+00, float 1.000000e+00)
%392 = fcmp une float %27, %temp12.0
%.sink151 = select i1 %392, float %30, float %29
%temp48.0 = select i1 %392, float 1.953125e-03, float 3.906250e-03
%393 = fdiv float 1.000000e+00, %.sink151
%394 = fmul float %65, %393
%395 = fmul float %66, %393
%396 = call float @llvm.floor.f32(float %394)
%397 = fsub float %394, %396
%398 = call float @llvm.floor.f32(float %395)
%399 = fsub float %395, %398
%400 = fmul float %31, 2.000000e+00
%401 = fmul float %400, %temp48.0
%402 = fsub float 1.000000e+00, %401
%403 = fmul float %temp48.0, %31
%404 = fmul float %397, %402
%405 = fadd float %404, %403
%406 = fmul float %399, %402
%407 = fadd float %406, %403
%408 = fmul float %405, %temp12.0
%409 = fadd float %408, %temp24.0
%410 = fmul float %407, %temp12.0
%411 = fadd float %410, %temp25.0
%412 = bitcast float %409 to i32
%413 = bitcast float %411 to i32
%414 = bitcast float %169 to i32
%415 = insertelement <4 x i32> undef, i32 %412, i32 0
%416 = insertelement <4 x i32> %415, i32 %413, i32 1
%417 = insertelement <4 x i32> %416, i32 %414, i32 2
%418 = bitcast <8 x i32> %51 to <32 x i8>
%419 = bitcast <4 x i32> %53 to <16 x i8>
%420 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %417, <32 x i8> %418, <16 x i8> %419, i32 2)
%421 = extractelement <4 x float> %420, i32 1
%422 = extractelement <4 x float> %420, i32 3
%423 = fcmp oeq float %temp26.0, 4.000000e+00
%424 = select i1 %423, float 1.000000e+00, float 0.000000e+00
%425 = bitcast float %409 to i32
%426 = bitcast float %411 to i32
%427 = bitcast float %169 to i32
%428 = insertelement <4 x i32> undef, i32 %425, i32 0
%429 = insertelement <4 x i32> %428, i32 %426, i32 1
%430 = insertelement <4 x i32> %429, i32 %427, i32 2
%431 = bitcast <8 x i32> %47 to <32 x i8>
%432 = bitcast <4 x i32> %49 to <16 x i8>
%433 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %430, <32 x i8> %431, <16 x i8> %432, i32 2)
%434 = extractelement <4 x float> %433, i32 1
%435 = extractelement <4 x float> %433, i32 3
%436 = fcmp oeq float %temp26.0, 3.000000e+00
%437 = select i1 %436, float 1.000000e+00, float 0.000000e+00
%438 = bitcast float %409 to i32
%439 = bitcast float %411 to i32
%440 = bitcast float %169 to i32
%441 = insertelement <4 x i32> undef, i32 %438, i32 0
%442 = insertelement <4 x i32> %441, i32 %439, i32 1
%443 = insertelement <4 x i32> %442, i32 %440, i32 2
%444 = bitcast <8 x i32> %43 to <32 x i8>
%445 = bitcast <4 x i32> %45 to <16 x i8>
%446 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %443, <32 x i8> %444, <16 x i8> %445, i32 2)
%447 = extractelement <4 x float> %446, i32 1
%448 = extractelement <4 x float> %446, i32 3
%449 = fcmp oeq float %temp26.0, 2.000000e+00
%450 = select i1 %449, float 1.000000e+00, float 0.000000e+00
%451 = bitcast float %409 to i32
%452 = bitcast float %411 to i32
%453 = bitcast float %169 to i32
%454 = insertelement <4 x i32> undef, i32 %451, i32 0
%455 = insertelement <4 x i32> %454, i32 %452, i32 1
%456 = insertelement <4 x i32> %455, i32 %453, i32 2
%457 = bitcast <8 x i32> %39 to <32 x i8>
%458 = bitcast <4 x i32> %41 to <16 x i8>
%459 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %456, <32 x i8> %457, <16 x i8> %458, i32 2)
%460 = extractelement <4 x float> %459, i32 1
%461 = extractelement <4 x float> %459, i32 3
%462 = fcmp oeq float %temp26.0, 1.000000e+00
%463 = select i1 %462, float 1.000000e+00, float 0.000000e+00
%464 = bitcast float %409 to i32
%465 = bitcast float %411 to i32
%466 = bitcast float %169 to i32
%467 = insertelement <4 x i32> undef, i32 %464, i32 0
%468 = insertelement <4 x i32> %467, i32 %465, i32 1
%469 = insertelement <4 x i32> %468, i32 %466, i32 2
%470 = bitcast <8 x i32> %35 to <32 x i8>
%471 = bitcast <4 x i32> %37 to <16 x i8>
%472 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %469, <32 x i8> %470, <16 x i8> %471, i32 2)
%473 = extractelement <4 x float> %472, i32 1
%474 = extractelement <4 x float> %472, i32 3
%475 = fcmp oeq float %temp26.0, 0.000000e+00
%476 = select i1 %475, float 1.000000e+00, float 0.000000e+00
%477 = fmul float %473, %476
%478 = fmul float %474, %476
%479 = fmul float %460, %463
%480 = fadd float %479, %477
%481 = fmul float %461, %463
%482 = fadd float %481, %478
%483 = fmul float %447, %450
%484 = fadd float %483, %480
%485 = fmul float %448, %450
%486 = fadd float %485, %482
%487 = fmul float %434, %437
%488 = fadd float %487, %484
%489 = fmul float %435, %437
%490 = fadd float %489, %486
%491 = fmul float %421, %424
%492 = fadd float %491, %488
%493 = fmul float %422, %424
%494 = fadd float %493, %490
%495 = fmul float %494, 2.000000e+00
%496 = fadd float %495, -1.000000e+00
%497 = fmul float %492, 2.000000e+00
%498 = fadd float %497, -1.000000e+00
%499 = fmul float %496, %496
%500 = fmul float %498, %498
%501 = fadd float %499, %500
%502 = call float @llvm.AMDIL.clamp.(float %501, float 0.000000e+00, float 1.000000e+00)
%503 = fmul float %92, 0.000000e+00
%504 = fmul float %274, %92
%505 = fmul float %276, %92
%506 = fmul float %387, %93
%507 = fadd float %506, %503
%508 = fmul float %93, 0.000000e+00
%509 = fadd float %508, %504
%510 = fmul float %385, %93
%511 = fadd float %510, %505
%512 = fmul float %496, %94
%513 = fadd float %512, %507
%514 = fmul float %498, %94
%515 = fadd float %514, %509
%516 = fmul float %94, 0.000000e+00
%517 = fadd float %516, %511
%518 = fcmp une float %27, %temp16.0
%.sink152 = select i1 %518, float %30, float %29
%temp36.1 = select i1 %518, float 1.953125e-03, float 3.906250e-03
%519 = fdiv float 1.000000e+00, %.sink152
%520 = fmul float %67, %519
%521 = fmul float %66, %519
%522 = call float @llvm.floor.f32(float %520)
%523 = fsub float %520, %522
%524 = call float @llvm.floor.f32(float %521)
%525 = fsub float %521, %524
%526 = fmul float %31, 2.000000e+00
%527 = fmul float %526, %temp36.1
%528 = fsub float 1.000000e+00, %527
%529 = fmul float %temp36.1, %31
%530 = fmul float %523, %528
%531 = fadd float %530, %529
%532 = fmul float %525, %528
%533 = fadd float %532, %529
%534 = fmul float %531, %temp16.0
%535 = fadd float %534, %temp32.0
%536 = fmul float %533, %temp16.0
%537 = fadd float %536, %temp33.0
%538 = bitcast float %535 to i32
%539 = bitcast float %537 to i32
%540 = bitcast float %169 to i32
%541 = insertelement <4 x i32> undef, i32 %538, i32 0
%542 = insertelement <4 x i32> %541, i32 %539, i32 1
%543 = insertelement <4 x i32> %542, i32 %540, i32 2
%544 = bitcast <8 x i32> %51 to <32 x i8>
%545 = bitcast <4 x i32> %53 to <16 x i8>
%546 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %543, <32 x i8> %544, <16 x i8> %545, i32 2)
%547 = extractelement <4 x float> %546, i32 1
%548 = extractelement <4 x float> %546, i32 3
%549 = fcmp oeq float %temp34.0, 4.000000e+00
%550 = select i1 %549, float 1.000000e+00, float 0.000000e+00
%551 = bitcast float %535 to i32
%552 = bitcast float %537 to i32
%553 = bitcast float %169 to i32
%554 = insertelement <4 x i32> undef, i32 %551, i32 0
%555 = insertelement <4 x i32> %554, i32 %552, i32 1
%556 = insertelement <4 x i32> %555, i32 %553, i32 2
%557 = bitcast <8 x i32> %47 to <32 x i8>
%558 = bitcast <4 x i32> %49 to <16 x i8>
%559 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %556, <32 x i8> %557, <16 x i8> %558, i32 2)
%560 = extractelement <4 x float> %559, i32 1
%561 = extractelement <4 x float> %559, i32 3
%562 = fcmp oeq float %temp34.0, 3.000000e+00
%563 = select i1 %562, float 1.000000e+00, float 0.000000e+00
%564 = bitcast float %535 to i32
%565 = bitcast float %537 to i32
%566 = bitcast float %169 to i32
%567 = insertelement <4 x i32> undef, i32 %564, i32 0
%568 = insertelement <4 x i32> %567, i32 %565, i32 1
%569 = insertelement <4 x i32> %568, i32 %566, i32 2
%570 = bitcast <8 x i32> %43 to <32 x i8>
%571 = bitcast <4 x i32> %45 to <16 x i8>
%572 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %569, <32 x i8> %570, <16 x i8> %571, i32 2)
%573 = extractelement <4 x float> %572, i32 1
%574 = extractelement <4 x float> %572, i32 3
%575 = fcmp oeq float %temp34.0, 2.000000e+00
%576 = select i1 %575, float 1.000000e+00, float 0.000000e+00
%577 = bitcast float %535 to i32
%578 = bitcast float %537 to i32
%579 = bitcast float %169 to i32
%580 = insertelement <4 x i32> undef, i32 %577, i32 0
%581 = insertelement <4 x i32> %580, i32 %578, i32 1
%582 = insertelement <4 x i32> %581, i32 %579, i32 2
%583 = bitcast <8 x i32> %39 to <32 x i8>
%584 = bitcast <4 x i32> %41 to <16 x i8>
%585 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %582, <32 x i8> %583, <16 x i8> %584, i32 2)
%586 = extractelement <4 x float> %585, i32 1
%587 = extractelement <4 x float> %585, i32 3
%588 = fcmp oeq float %temp34.0, 1.000000e+00
%589 = select i1 %588, float 1.000000e+00, float 0.000000e+00
%590 = bitcast float %535 to i32
%591 = bitcast float %537 to i32
%592 = bitcast float %169 to i32
%593 = insertelement <4 x i32> undef, i32 %590, i32 0
%594 = insertelement <4 x i32> %593, i32 %591, i32 1
%595 = insertelement <4 x i32> %594, i32 %592, i32 2
%596 = bitcast <8 x i32> %35 to <32 x i8>
%597 = bitcast <4 x i32> %37 to <16 x i8>
%598 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %595, <32 x i8> %596, <16 x i8> %597, i32 2)
%599 = extractelement <4 x float> %598, i32 1
%600 = extractelement <4 x float> %598, i32 3
%601 = fcmp oeq float %temp34.0, 0.000000e+00
%602 = select i1 %601, float 1.000000e+00, float 0.000000e+00
%603 = fmul float %599, %602
%604 = fmul float %600, %602
%605 = fmul float %586, %589
%606 = fadd float %605, %603
%607 = fmul float %587, %589
%608 = fadd float %607, %604
%609 = fmul float %573, %576
%610 = fadd float %609, %606
%611 = fmul float %574, %576
%612 = fadd float %611, %608
%613 = fmul float %560, %563
%614 = fadd float %613, %610
%615 = fmul float %561, %563
%616 = fadd float %615, %612
%617 = fmul float %547, %550
%618 = fadd float %617, %614
%619 = fmul float %548, %550
%620 = fadd float %619, %616
%621 = fmul float %620, 2.000000e+00
%622 = fadd float %621, -1.000000e+00
%623 = fmul float %618, 2.000000e+00
%624 = fadd float %623, -1.000000e+00
%625 = fmul float %622, %622
%626 = fmul float %624, %624
%627 = fadd float %625, %626
%628 = call float @llvm.AMDIL.clamp.(float %627, float 0.000000e+00, float 1.000000e+00)
%629 = fcmp une float %27, %temp16.0
%.sink153 = select i1 %629, float %30, float %29
%temp40.2 = select i1 %629, float 1.953125e-03, float 3.906250e-03
%630 = fdiv float 1.000000e+00, %.sink153
%631 = fmul float %67, %630
%632 = fmul float %65, %630
%633 = call float @llvm.floor.f32(float %631)
%634 = fsub float %631, %633
%635 = call float @llvm.floor.f32(float %632)
%636 = fsub float %632, %635
%637 = fmul float %31, 2.000000e+00
%638 = fmul float %637, %temp40.2
%639 = fsub float 1.000000e+00, %638
%640 = fmul float %temp40.2, %31
%641 = fmul float %634, %639
%642 = fadd float %641, %640
%643 = fmul float %636, %639
%644 = fadd float %643, %640
%645 = fmul float %642, %temp16.0
%646 = fadd float %645, %temp32.0
%647 = fmul float %644, %temp16.0
%648 = fadd float %647, %temp33.0
%649 = bitcast float %646 to i32
%650 = bitcast float %648 to i32
%651 = bitcast float %169 to i32
%652 = insertelement <4 x i32> undef, i32 %649, i32 0
%653 = insertelement <4 x i32> %652, i32 %650, i32 1
%654 = insertelement <4 x i32> %653, i32 %651, i32 2
%655 = bitcast <8 x i32> %51 to <32 x i8>
%656 = bitcast <4 x i32> %53 to <16 x i8>
%657 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %654, <32 x i8> %655, <16 x i8> %656, i32 2)
%658 = extractelement <4 x float> %657, i32 1
%659 = extractelement <4 x float> %657, i32 3
%660 = fcmp oeq float %temp34.0, 4.000000e+00
%661 = select i1 %660, float 1.000000e+00, float 0.000000e+00
%662 = bitcast float %646 to i32
%663 = bitcast float %648 to i32
%664 = bitcast float %169 to i32
%665 = insertelement <4 x i32> undef, i32 %662, i32 0
%666 = insertelement <4 x i32> %665, i32 %663, i32 1
%667 = insertelement <4 x i32> %666, i32 %664, i32 2
%668 = bitcast <8 x i32> %47 to <32 x i8>
%669 = bitcast <4 x i32> %49 to <16 x i8>
%670 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %667, <32 x i8> %668, <16 x i8> %669, i32 2)
%671 = extractelement <4 x float> %670, i32 1
%672 = extractelement <4 x float> %670, i32 3
%673 = fcmp oeq float %temp34.0, 3.000000e+00
%674 = select i1 %673, float 1.000000e+00, float 0.000000e+00
%675 = bitcast float %646 to i32
%676 = bitcast float %648 to i32
%677 = bitcast float %169 to i32
%678 = insertelement <4 x i32> undef, i32 %675, i32 0
%679 = insertelement <4 x i32> %678, i32 %676, i32 1
%680 = insertelement <4 x i32> %679, i32 %677, i32 2
%681 = bitcast <8 x i32> %43 to <32 x i8>
%682 = bitcast <4 x i32> %45 to <16 x i8>
%683 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %680, <32 x i8> %681, <16 x i8> %682, i32 2)
%684 = extractelement <4 x float> %683, i32 1
%685 = extractelement <4 x float> %683, i32 3
%686 = fcmp oeq float %temp34.0, 2.000000e+00
%687 = select i1 %686, float 1.000000e+00, float 0.000000e+00
%688 = bitcast float %646 to i32
%689 = bitcast float %648 to i32
%690 = bitcast float %169 to i32
%691 = insertelement <4 x i32> undef, i32 %688, i32 0
%692 = insertelement <4 x i32> %691, i32 %689, i32 1
%693 = insertelement <4 x i32> %692, i32 %690, i32 2
%694 = bitcast <8 x i32> %39 to <32 x i8>
%695 = bitcast <4 x i32> %41 to <16 x i8>
%696 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %693, <32 x i8> %694, <16 x i8> %695, i32 2)
%697 = extractelement <4 x float> %696, i32 1
%698 = extractelement <4 x float> %696, i32 3
%699 = fcmp oeq float %temp34.0, 1.000000e+00
%700 = select i1 %699, float 1.000000e+00, float 0.000000e+00
%701 = bitcast float %646 to i32
%702 = bitcast float %648 to i32
%703 = bitcast float %169 to i32
%704 = insertelement <4 x i32> undef, i32 %701, i32 0
%705 = insertelement <4 x i32> %704, i32 %702, i32 1
%706 = insertelement <4 x i32> %705, i32 %703, i32 2
%707 = bitcast <8 x i32> %35 to <32 x i8>
%708 = bitcast <4 x i32> %37 to <16 x i8>
%709 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %706, <32 x i8> %707, <16 x i8> %708, i32 2)
%710 = extractelement <4 x float> %709, i32 1
%711 = extractelement <4 x float> %709, i32 3
%712 = fcmp oeq float %temp34.0, 0.000000e+00
%713 = select i1 %712, float 1.000000e+00, float 0.000000e+00
%714 = fmul float %710, %713
%715 = fmul float %711, %713
%716 = fmul float %697, %700
%717 = fadd float %716, %714
%718 = fmul float %698, %700
%719 = fadd float %718, %715
%720 = fmul float %684, %687
%721 = fadd float %720, %717
%722 = fmul float %685, %687
%723 = fadd float %722, %719
%724 = fmul float %671, %674
%725 = fadd float %724, %721
%726 = fmul float %672, %674
%727 = fadd float %726, %723
%728 = fmul float %658, %661
%729 = fadd float %728, %725
%730 = fmul float %659, %661
%731 = fadd float %730, %727
%732 = fmul float %731, 2.000000e+00
%733 = fadd float %732, -1.000000e+00
%734 = fmul float %729, 2.000000e+00
%735 = fadd float %734, -1.000000e+00
%736 = fmul float %733, %733
%737 = fmul float %735, %735
%738 = fadd float %736, %737
%739 = call float @llvm.AMDIL.clamp.(float %738, float 0.000000e+00, float 1.000000e+00)
%740 = fcmp une float %27, %temp16.0
%.sink154 = select i1 %740, float %30, float %29
%temp44.2 = select i1 %740, float 1.953125e-03, float 3.906250e-03
%741 = fdiv float 1.000000e+00, %.sink154
%742 = fmul float %65, %741
%743 = fmul float %66, %741
%744 = call float @llvm.floor.f32(float %742)
%745 = fsub float %742, %744
%746 = call float @llvm.floor.f32(float %743)
%747 = fsub float %743, %746
%748 = fmul float %31, 2.000000e+00
%749 = fmul float %748, %temp44.2
%750 = fsub float 1.000000e+00, %749
%751 = fmul float %temp44.2, %31
%752 = fmul float %745, %750
%753 = fadd float %752, %751
%754 = fmul float %747, %750
%755 = fadd float %754, %751
%756 = fmul float %753, %temp16.0
%757 = fadd float %756, %temp32.0
%758 = fmul float %755, %temp16.0
%759 = fadd float %758, %temp33.0
%760 = bitcast float %757 to i32
%761 = bitcast float %759 to i32
%762 = bitcast float %169 to i32
%763 = insertelement <4 x i32> undef, i32 %760, i32 0
%764 = insertelement <4 x i32> %763, i32 %761, i32 1
%765 = insertelement <4 x i32> %764, i32 %762, i32 2
%766 = bitcast <8 x i32> %51 to <32 x i8>
%767 = bitcast <4 x i32> %53 to <16 x i8>
%768 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %765, <32 x i8> %766, <16 x i8> %767, i32 2)
%769 = extractelement <4 x float> %768, i32 1
%770 = extractelement <4 x float> %768, i32 3
%771 = fcmp oeq float %temp34.0, 4.000000e+00
%772 = select i1 %771, float 1.000000e+00, float 0.000000e+00
%773 = bitcast float %757 to i32
%774 = bitcast float %759 to i32
%775 = bitcast float %169 to i32
%776 = insertelement <4 x i32> undef, i32 %773, i32 0
%777 = insertelement <4 x i32> %776, i32 %774, i32 1
%778 = insertelement <4 x i32> %777, i32 %775, i32 2
%779 = bitcast <8 x i32> %47 to <32 x i8>
%780 = bitcast <4 x i32> %49 to <16 x i8>
%781 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %778, <32 x i8> %779, <16 x i8> %780, i32 2)
%782 = extractelement <4 x float> %781, i32 1
%783 = extractelement <4 x float> %781, i32 3
%784 = fcmp oeq float %temp34.0, 3.000000e+00
%785 = select i1 %784, float 1.000000e+00, float 0.000000e+00
%786 = bitcast float %757 to i32
%787 = bitcast float %759 to i32
%788 = bitcast float %169 to i32
%789 = insertelement <4 x i32> undef, i32 %786, i32 0
%790 = insertelement <4 x i32> %789, i32 %787, i32 1
%791 = insertelement <4 x i32> %790, i32 %788, i32 2
%792 = bitcast <8 x i32> %43 to <32 x i8>
%793 = bitcast <4 x i32> %45 to <16 x i8>
%794 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %791, <32 x i8> %792, <16 x i8> %793, i32 2)
%795 = extractelement <4 x float> %794, i32 1
%796 = extractelement <4 x float> %794, i32 3
%797 = fcmp oeq float %temp34.0, 2.000000e+00
%798 = select i1 %797, float 1.000000e+00, float 0.000000e+00
%799 = bitcast float %757 to i32
%800 = bitcast float %759 to i32
%801 = bitcast float %169 to i32
%802 = insertelement <4 x i32> undef, i32 %799, i32 0
%803 = insertelement <4 x i32> %802, i32 %800, i32 1
%804 = insertelement <4 x i32> %803, i32 %801, i32 2
%805 = bitcast <8 x i32> %39 to <32 x i8>
%806 = bitcast <4 x i32> %41 to <16 x i8>
%807 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %804, <32 x i8> %805, <16 x i8> %806, i32 2)
%808 = extractelement <4 x float> %807, i32 1
%809 = extractelement <4 x float> %807, i32 3
%810 = fcmp oeq float %temp34.0, 1.000000e+00
%811 = select i1 %810, float 1.000000e+00, float 0.000000e+00
%812 = bitcast float %757 to i32
%813 = bitcast float %759 to i32
%814 = bitcast float %169 to i32
%815 = insertelement <4 x i32> undef, i32 %812, i32 0
%816 = insertelement <4 x i32> %815, i32 %813, i32 1
%817 = insertelement <4 x i32> %816, i32 %814, i32 2
%818 = bitcast <8 x i32> %35 to <32 x i8>
%819 = bitcast <4 x i32> %37 to <16 x i8>
%820 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %817, <32 x i8> %818, <16 x i8> %819, i32 2)
%821 = extractelement <4 x float> %820, i32 1
%822 = extractelement <4 x float> %820, i32 3
%823 = fcmp oeq float %temp34.0, 0.000000e+00
%824 = select i1 %823, float 1.000000e+00, float 0.000000e+00
%825 = fmul float %821, %824
%826 = fmul float %822, %824
%827 = fmul float %808, %811
%828 = fadd float %827, %825
%829 = fmul float %809, %811
%830 = fadd float %829, %826
%831 = fmul float %795, %798
%832 = fadd float %831, %828
%833 = fmul float %796, %798
%834 = fadd float %833, %830
%835 = fmul float %782, %785
%836 = fadd float %835, %832
%837 = fmul float %783, %785
%838 = fadd float %837, %834
%839 = fmul float %769, %772
%840 = fadd float %839, %836
%841 = fmul float %770, %772
%842 = fadd float %841, %838
%843 = fmul float %842, 2.000000e+00
%844 = fadd float %843, -1.000000e+00
%845 = fmul float %840, 2.000000e+00
%846 = fadd float %845, -1.000000e+00
%847 = fmul float %844, %844
%848 = fmul float %846, %846
%849 = fadd float %847, %848
%850 = call float @llvm.AMDIL.clamp.(float %849, float 0.000000e+00, float 1.000000e+00)
%851 = fmul float %92, 0.000000e+00
%852 = fmul float %622, %92
%853 = fmul float %624, %92
%854 = fmul float %735, %93
%855 = fadd float %854, %851
%856 = fmul float %93, 0.000000e+00
%857 = fadd float %856, %852
%858 = fmul float %733, %93
%859 = fadd float %858, %853
%860 = fmul float %844, %94
%861 = fadd float %860, %855
%862 = fmul float %846, %94
%863 = fadd float %862, %857
%864 = fmul float %94, 0.000000e+00
%865 = fadd float %864, %859
%866 = fcmp une float %27, %temp20.0
%.sink155 = select i1 %866, float %30, float %29
%temp32.1 = select i1 %866, float 1.953125e-03, float 3.906250e-03
%867 = fdiv float 1.000000e+00, %.sink155
%868 = fmul float %67, %867
%869 = fmul float %66, %867
%870 = call float @llvm.floor.f32(float %868)
%871 = fsub float %868, %870
%872 = call float @llvm.floor.f32(float %869)
%873 = fsub float %869, %872
%874 = fmul float %31, 2.000000e+00
%875 = fmul float %874, %temp32.1
%876 = fsub float 1.000000e+00, %875
%877 = fmul float %temp32.1, %31
%878 = fmul float %871, %876
%879 = fadd float %878, %877
%880 = fmul float %873, %876
%881 = fadd float %880, %877
%882 = fmul float %879, %temp20.0
%883 = fadd float %882, %temp8.0
%884 = fmul float %881, %temp20.0
%885 = fadd float %884, %temp9.0
%886 = bitcast float %883 to i32
%887 = bitcast float %885 to i32
%888 = bitcast float %169 to i32
%889 = insertelement <4 x i32> undef, i32 %886, i32 0
%890 = insertelement <4 x i32> %889, i32 %887, i32 1
%891 = insertelement <4 x i32> %890, i32 %888, i32 2
%892 = bitcast <8 x i32> %51 to <32 x i8>
%893 = bitcast <4 x i32> %53 to <16 x i8>
%894 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %891, <32 x i8> %892, <16 x i8> %893, i32 2)
%895 = extractelement <4 x float> %894, i32 1
%896 = extractelement <4 x float> %894, i32 3
%897 = fcmp oeq float %temp10.0, 4.000000e+00
%898 = select i1 %897, float 1.000000e+00, float 0.000000e+00
%899 = bitcast float %883 to i32
%900 = bitcast float %885 to i32
%901 = bitcast float %169 to i32
%902 = insertelement <4 x i32> undef, i32 %899, i32 0
%903 = insertelement <4 x i32> %902, i32 %900, i32 1
%904 = insertelement <4 x i32> %903, i32 %901, i32 2
%905 = bitcast <8 x i32> %47 to <32 x i8>
%906 = bitcast <4 x i32> %49 to <16 x i8>
%907 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %904, <32 x i8> %905, <16 x i8> %906, i32 2)
%908 = extractelement <4 x float> %907, i32 1
%909 = extractelement <4 x float> %907, i32 3
%910 = fcmp oeq float %temp10.0, 3.000000e+00
%911 = select i1 %910, float 1.000000e+00, float 0.000000e+00
%912 = bitcast float %883 to i32
%913 = bitcast float %885 to i32
%914 = bitcast float %169 to i32
%915 = insertelement <4 x i32> undef, i32 %912, i32 0
%916 = insertelement <4 x i32> %915, i32 %913, i32 1
%917 = insertelement <4 x i32> %916, i32 %914, i32 2
%918 = bitcast <8 x i32> %43 to <32 x i8>
%919 = bitcast <4 x i32> %45 to <16 x i8>
%920 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %917, <32 x i8> %918, <16 x i8> %919, i32 2)
%921 = extractelement <4 x float> %920, i32 1
%922 = extractelement <4 x float> %920, i32 3
%923 = fcmp oeq float %temp10.0, 2.000000e+00
%924 = select i1 %923, float 1.000000e+00, float 0.000000e+00
%925 = bitcast float %883 to i32
%926 = bitcast float %885 to i32
%927 = bitcast float %169 to i32
%928 = insertelement <4 x i32> undef, i32 %925, i32 0
%929 = insertelement <4 x i32> %928, i32 %926, i32 1
%930 = insertelement <4 x i32> %929, i32 %927, i32 2
%931 = bitcast <8 x i32> %39 to <32 x i8>
%932 = bitcast <4 x i32> %41 to <16 x i8>
%933 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %930, <32 x i8> %931, <16 x i8> %932, i32 2)
%934 = extractelement <4 x float> %933, i32 1
%935 = extractelement <4 x float> %933, i32 3
%936 = fcmp oeq float %temp10.0, 1.000000e+00
%937 = select i1 %936, float 1.000000e+00, float 0.000000e+00
%938 = bitcast float %883 to i32
%939 = bitcast float %885 to i32
%940 = bitcast float %169 to i32
%941 = insertelement <4 x i32> undef, i32 %938, i32 0
%942 = insertelement <4 x i32> %941, i32 %939, i32 1
%943 = insertelement <4 x i32> %942, i32 %940, i32 2
%944 = bitcast <8 x i32> %35 to <32 x i8>
%945 = bitcast <4 x i32> %37 to <16 x i8>
%946 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %943, <32 x i8> %944, <16 x i8> %945, i32 2)
%947 = extractelement <4 x float> %946, i32 1
%948 = extractelement <4 x float> %946, i32 3
%949 = fcmp oeq float %temp10.0, 0.000000e+00
%950 = select i1 %949, float 1.000000e+00, float 0.000000e+00
%951 = fmul float %947, %950
%952 = fmul float %948, %950
%953 = fmul float %934, %937
%954 = fadd float %953, %951
%955 = fmul float %935, %937
%956 = fadd float %955, %952
%957 = fmul float %921, %924
%958 = fadd float %957, %954
%959 = fmul float %922, %924
%960 = fadd float %959, %956
%961 = fmul float %908, %911
%962 = fadd float %961, %958
%963 = fmul float %909, %911
%964 = fadd float %963, %960
%965 = fmul float %895, %898
%966 = fadd float %965, %962
%967 = fmul float %896, %898
%968 = fadd float %967, %964
%969 = fmul float %968, 2.000000e+00
%970 = fadd float %969, -1.000000e+00
%971 = fmul float %966, 2.000000e+00
%972 = fadd float %971, -1.000000e+00
%973 = fmul float %970, %970
%974 = fmul float %972, %972
%975 = fadd float %973, %974
%976 = call float @llvm.AMDIL.clamp.(float %975, float 0.000000e+00, float 1.000000e+00)
%977 = fcmp une float %27, %temp20.0
%.sink156 = select i1 %977, float %30, float %29
%temp36.3 = select i1 %977, float 1.953125e-03, float 3.906250e-03
%978 = fdiv float 1.000000e+00, %.sink156
%979 = fmul float %67, %978
%980 = fmul float %65, %978
%981 = call float @llvm.floor.f32(float %979)
%982 = fsub float %979, %981
%983 = call float @llvm.floor.f32(float %980)
%984 = fsub float %980, %983
%985 = fmul float %31, 2.000000e+00
%986 = fmul float %985, %temp36.3
%987 = fsub float 1.000000e+00, %986
%988 = fmul float %temp36.3, %31
%989 = fmul float %982, %987
%990 = fadd float %989, %988
%991 = fmul float %984, %987
%992 = fadd float %991, %988
%993 = fmul float %990, %temp20.0
%994 = fadd float %993, %temp8.0
%995 = fmul float %992, %temp20.0
%996 = fadd float %995, %temp9.0
%997 = bitcast float %994 to i32
%998 = bitcast float %996 to i32
%999 = bitcast float %169 to i32
%1000 = insertelement <4 x i32> undef, i32 %997, i32 0
%1001 = insertelement <4 x i32> %1000, i32 %998, i32 1
%1002 = insertelement <4 x i32> %1001, i32 %999, i32 2
%1003 = bitcast <8 x i32> %51 to <32 x i8>
%1004 = bitcast <4 x i32> %53 to <16 x i8>
%1005 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1002, <32 x i8> %1003, <16 x i8> %1004, i32 2)
%1006 = extractelement <4 x float> %1005, i32 1
%1007 = extractelement <4 x float> %1005, i32 3
%1008 = fcmp oeq float %temp10.0, 4.000000e+00
%1009 = select i1 %1008, float 1.000000e+00, float 0.000000e+00
%1010 = bitcast float %994 to i32
%1011 = bitcast float %996 to i32
%1012 = bitcast float %169 to i32
%1013 = insertelement <4 x i32> undef, i32 %1010, i32 0
%1014 = insertelement <4 x i32> %1013, i32 %1011, i32 1
%1015 = insertelement <4 x i32> %1014, i32 %1012, i32 2
%1016 = bitcast <8 x i32> %47 to <32 x i8>
%1017 = bitcast <4 x i32> %49 to <16 x i8>
%1018 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1015, <32 x i8> %1016, <16 x i8> %1017, i32 2)
%1019 = extractelement <4 x float> %1018, i32 1
%1020 = extractelement <4 x float> %1018, i32 3
%1021 = fcmp oeq float %temp10.0, 3.000000e+00
%1022 = select i1 %1021, float 1.000000e+00, float 0.000000e+00
%1023 = bitcast float %994 to i32
%1024 = bitcast float %996 to i32
%1025 = bitcast float %169 to i32
%1026 = insertelement <4 x i32> undef, i32 %1023, i32 0
%1027 = insertelement <4 x i32> %1026, i32 %1024, i32 1
%1028 = insertelement <4 x i32> %1027, i32 %1025, i32 2
%1029 = bitcast <8 x i32> %43 to <32 x i8>
%1030 = bitcast <4 x i32> %45 to <16 x i8>
%1031 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1028, <32 x i8> %1029, <16 x i8> %1030, i32 2)
%1032 = extractelement <4 x float> %1031, i32 1
%1033 = extractelement <4 x float> %1031, i32 3
%1034 = fcmp oeq float %temp10.0, 2.000000e+00
%1035 = select i1 %1034, float 1.000000e+00, float 0.000000e+00
%1036 = bitcast float %994 to i32
%1037 = bitcast float %996 to i32
%1038 = bitcast float %169 to i32
%1039 = insertelement <4 x i32> undef, i32 %1036, i32 0
%1040 = insertelement <4 x i32> %1039, i32 %1037, i32 1
%1041 = insertelement <4 x i32> %1040, i32 %1038, i32 2
%1042 = bitcast <8 x i32> %39 to <32 x i8>
%1043 = bitcast <4 x i32> %41 to <16 x i8>
%1044 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1041, <32 x i8> %1042, <16 x i8> %1043, i32 2)
%1045 = extractelement <4 x float> %1044, i32 1
%1046 = extractelement <4 x float> %1044, i32 3
%1047 = fcmp oeq float %temp10.0, 1.000000e+00
%1048 = select i1 %1047, float 1.000000e+00, float 0.000000e+00
%1049 = bitcast float %994 to i32
%1050 = bitcast float %996 to i32
%1051 = bitcast float %169 to i32
%1052 = insertelement <4 x i32> undef, i32 %1049, i32 0
%1053 = insertelement <4 x i32> %1052, i32 %1050, i32 1
%1054 = insertelement <4 x i32> %1053, i32 %1051, i32 2
%1055 = bitcast <8 x i32> %35 to <32 x i8>
%1056 = bitcast <4 x i32> %37 to <16 x i8>
%1057 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1054, <32 x i8> %1055, <16 x i8> %1056, i32 2)
%1058 = extractelement <4 x float> %1057, i32 1
%1059 = extractelement <4 x float> %1057, i32 3
%1060 = fcmp oeq float %temp10.0, 0.000000e+00
%1061 = select i1 %1060, float 1.000000e+00, float 0.000000e+00
%1062 = fmul float %1058, %1061
%1063 = fmul float %1059, %1061
%1064 = fmul float %1045, %1048
%1065 = fadd float %1064, %1062
%1066 = fmul float %1046, %1048
%1067 = fadd float %1066, %1063
%1068 = fmul float %1032, %1035
%1069 = fadd float %1068, %1065
%1070 = fmul float %1033, %1035
%1071 = fadd float %1070, %1067
%1072 = fmul float %1019, %1022
%1073 = fadd float %1072, %1069
%1074 = fmul float %1020, %1022
%1075 = fadd float %1074, %1071
%1076 = fmul float %1006, %1009
%1077 = fadd float %1076, %1073
%1078 = fmul float %1007, %1009
%1079 = fadd float %1078, %1075
%1080 = fmul float %1079, 2.000000e+00
%1081 = fadd float %1080, -1.000000e+00
%1082 = fmul float %1077, 2.000000e+00
%1083 = fadd float %1082, -1.000000e+00
%1084 = fmul float %1081, %1081
%1085 = fmul float %1083, %1083
%1086 = fadd float %1084, %1085
%1087 = call float @llvm.AMDIL.clamp.(float %1086, float 0.000000e+00, float 1.000000e+00)
%1088 = fcmp une float %27, %temp20.0
%.sink157 = select i1 %1088, float %30, float %29
%temp40.4 = select i1 %1088, float 1.953125e-03, float 3.906250e-03
%1089 = fdiv float 1.000000e+00, %.sink157
%1090 = fmul float %65, %1089
%1091 = fmul float %66, %1089
%1092 = call float @llvm.floor.f32(float %1090)
%1093 = fsub float %1090, %1092
%1094 = call float @llvm.floor.f32(float %1091)
%1095 = fsub float %1091, %1094
%1096 = fmul float %31, 2.000000e+00
%1097 = fmul float %1096, %temp40.4
%1098 = fsub float 1.000000e+00, %1097
%1099 = fmul float %temp40.4, %31
%1100 = fmul float %1093, %1098
%1101 = fadd float %1100, %1099
%1102 = fmul float %1095, %1098
%1103 = fadd float %1102, %1099
%1104 = fmul float %1101, %temp20.0
%1105 = fadd float %1104, %temp8.0
%1106 = fmul float %1103, %temp20.0
%1107 = fadd float %1106, %temp9.0
%1108 = bitcast float %1105 to i32
%1109 = bitcast float %1107 to i32
%1110 = bitcast float %169 to i32
%1111 = insertelement <4 x i32> undef, i32 %1108, i32 0
%1112 = insertelement <4 x i32> %1111, i32 %1109, i32 1
%1113 = insertelement <4 x i32> %1112, i32 %1110, i32 2
%1114 = bitcast <8 x i32> %51 to <32 x i8>
%1115 = bitcast <4 x i32> %53 to <16 x i8>
%1116 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1113, <32 x i8> %1114, <16 x i8> %1115, i32 2)
%1117 = extractelement <4 x float> %1116, i32 1
%1118 = extractelement <4 x float> %1116, i32 3
%1119 = fcmp oeq float %temp10.0, 4.000000e+00
%1120 = select i1 %1119, float 1.000000e+00, float 0.000000e+00
%1121 = bitcast float %1105 to i32
%1122 = bitcast float %1107 to i32
%1123 = bitcast float %169 to i32
%1124 = insertelement <4 x i32> undef, i32 %1121, i32 0
%1125 = insertelement <4 x i32> %1124, i32 %1122, i32 1
%1126 = insertelement <4 x i32> %1125, i32 %1123, i32 2
%1127 = bitcast <8 x i32> %47 to <32 x i8>
%1128 = bitcast <4 x i32> %49 to <16 x i8>
%1129 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1126, <32 x i8> %1127, <16 x i8> %1128, i32 2)
%1130 = extractelement <4 x float> %1129, i32 1
%1131 = extractelement <4 x float> %1129, i32 3
%1132 = fcmp oeq float %temp10.0, 3.000000e+00
%1133 = select i1 %1132, float 1.000000e+00, float 0.000000e+00
%1134 = bitcast float %1105 to i32
%1135 = bitcast float %1107 to i32
%1136 = bitcast float %169 to i32
%1137 = insertelement <4 x i32> undef, i32 %1134, i32 0
%1138 = insertelement <4 x i32> %1137, i32 %1135, i32 1
%1139 = insertelement <4 x i32> %1138, i32 %1136, i32 2
%1140 = bitcast <8 x i32> %43 to <32 x i8>
%1141 = bitcast <4 x i32> %45 to <16 x i8>
%1142 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1139, <32 x i8> %1140, <16 x i8> %1141, i32 2)
%1143 = extractelement <4 x float> %1142, i32 1
%1144 = extractelement <4 x float> %1142, i32 3
%1145 = fcmp oeq float %temp10.0, 2.000000e+00
%1146 = select i1 %1145, float 1.000000e+00, float 0.000000e+00
%1147 = bitcast float %1105 to i32
%1148 = bitcast float %1107 to i32
%1149 = bitcast float %169 to i32
%1150 = insertelement <4 x i32> undef, i32 %1147, i32 0
%1151 = insertelement <4 x i32> %1150, i32 %1148, i32 1
%1152 = insertelement <4 x i32> %1151, i32 %1149, i32 2
%1153 = bitcast <8 x i32> %39 to <32 x i8>
%1154 = bitcast <4 x i32> %41 to <16 x i8>
%1155 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1152, <32 x i8> %1153, <16 x i8> %1154, i32 2)
%1156 = extractelement <4 x float> %1155, i32 1
%1157 = extractelement <4 x float> %1155, i32 3
%1158 = fcmp oeq float %temp10.0, 1.000000e+00
%1159 = select i1 %1158, float 1.000000e+00, float 0.000000e+00
%1160 = bitcast float %1105 to i32
%1161 = bitcast float %1107 to i32
%1162 = bitcast float %169 to i32
%1163 = insertelement <4 x i32> undef, i32 %1160, i32 0
%1164 = insertelement <4 x i32> %1163, i32 %1161, i32 1
%1165 = insertelement <4 x i32> %1164, i32 %1162, i32 2
%1166 = bitcast <8 x i32> %35 to <32 x i8>
%1167 = bitcast <4 x i32> %37 to <16 x i8>
%1168 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1165, <32 x i8> %1166, <16 x i8> %1167, i32 2)
%1169 = extractelement <4 x float> %1168, i32 1
%1170 = extractelement <4 x float> %1168, i32 3
%1171 = fcmp oeq float %temp10.0, 0.000000e+00
%1172 = select i1 %1171, float 1.000000e+00, float 0.000000e+00
%1173 = fmul float %1169, %1172
%1174 = fmul float %1170, %1172
%1175 = fmul float %1156, %1159
%1176 = fadd float %1175, %1173
%1177 = fmul float %1157, %1159
%1178 = fadd float %1177, %1174
%1179 = fmul float %1143, %1146
%1180 = fadd float %1179, %1176
%1181 = fmul float %1144, %1146
%1182 = fadd float %1181, %1178
%1183 = fmul float %1130, %1133
%1184 = fadd float %1183, %1180
%1185 = fmul float %1131, %1133
%1186 = fadd float %1185, %1182
%1187 = fmul float %1117, %1120
%1188 = fadd float %1187, %1184
%1189 = fmul float %1118, %1120
%1190 = fadd float %1189, %1186
%1191 = fmul float %1190, 2.000000e+00
%1192 = fadd float %1191, -1.000000e+00
%1193 = fmul float %1188, 2.000000e+00
%1194 = fadd float %1193, -1.000000e+00
%1195 = fmul float %1192, %1192
%1196 = fmul float %1194, %1194
%1197 = fadd float %1195, %1196
%1198 = call float @llvm.AMDIL.clamp.(float %1197, float 0.000000e+00, float 1.000000e+00)
%1199 = fmul float %92, 0.000000e+00
%1200 = fmul float %970, %92
%1201 = fmul float %972, %92
%1202 = fmul float %1083, %93
%1203 = fadd float %1202, %1199
%1204 = fmul float %93, 0.000000e+00
%1205 = fadd float %1204, %1200
%1206 = fmul float %1081, %93
%1207 = fadd float %1206, %1201
%1208 = fmul float %1192, %94
%1209 = fadd float %1208, %1203
%1210 = fmul float %1194, %94
%1211 = fadd float %1210, %1205
%1212 = fmul float %94, 0.000000e+00
%1213 = fadd float %1212, %1207
%1214 = fmul float %58, %513
%1215 = fmul float %58, %515
%1216 = fmul float %58, %517
%1217 = fmul float %59, %861
%1218 = fadd float %1217, %1214
%1219 = fmul float %59, %863
%1220 = fadd float %1219, %1215
%1221 = fmul float %59, %865
%1222 = fadd float %1221, %1216
%1223 = fmul float %60, %1209
%1224 = fadd float %1223, %1218
%1225 = fmul float %60, %1211
%1226 = fadd float %1225, %1220
%1227 = fmul float %60, %1213
%1228 = fadd float %1227, %1222
%1229 = fmul float %1224, %1224
%1230 = fmul float %1226, %1226
%1231 = fadd float %1229, %1230
%1232 = fmul float %1228, %1228
%1233 = fadd float %1231, %1232
%1234 = fadd float %1233, 1.000000e+00
%1235 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1234)
%1236 = fmul float %1224, %1235
%1237 = fmul float %1226, %1235
%1238 = fmul float %1228, %1235
%1239 = fmul float %1236, %57
%1240 = fmul float %1237, %57
%1241 = fmul float %1238, %57
%1242 = fsub float %62, %1239
%1243 = fsub float %63, %1240
%1244 = fsub float %64, %1241
%1245 = fmul float %1242, %1242
%1246 = fmul float %1243, %1243
%1247 = fadd float %1246, %1245
%1248 = fmul float %1244, %1244
%1249 = fadd float %1247, %1248
%1250 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1249)
%1251 = fmul float %1242, %1250
%1252 = fmul float %1243, %1250
%1253 = fmul float %1244, %1250
%1254 = fmul float %1251, 5.000000e-01
%1255 = fadd float %1254, 5.000000e-01
%1256 = fmul float %1252, 5.000000e-01
%1257 = fadd float %1256, 5.000000e-01
%1258 = fmul float %1253, 5.000000e-01
%1259 = fadd float %1258, 5.000000e-01
%1260 = call i32 @llvm.SI.packf16(float %1255, float %1257)
%1261 = bitcast i32 %1260 to float
%1262 = call i32 @llvm.SI.packf16(float %1259, float %61)
%1263 = bitcast i32 %1262 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1261, float %1263, float %1261, float %1263)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000
v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001
v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100
v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101
v_interp_p1_f32 v15, v0, 2, 0, [m0] ; C83C0200
v_interp_p2_f32 v15, [v15], v1, 2, 0, [m0] ; C83D0201
v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300
v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v3, v0, 3, 1, [m0] ; C80C0700
v_interp_p2_f32 v3, [v3], v1, 3, 1, [m0] ; C80D0701
v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800
v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801
v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900
v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901
v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00
v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01
v_interp_p1_f32 v14, v0, 3, 2, [m0] ; C8380B00
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p2_f32 v14, [v14], v1, 3, 2, [m0] ; C8390B01
v_interp_p1_f32 v16, v0, 0, 3, [m0] ; C8400C00
v_interp_p2_f32 v16, [v16], v1, 0, 3, [m0] ; C8410C01
v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00
v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01
v_mov_b32_e32 v10, 0x7fffffff ; 7E1402FF 7FFFFFFF
v_and_b32_e32 v0, v7, v10 ; 36001507
v_and_b32_e32 v1, v8, v10 ; 36021508
v_and_b32_e32 v10, v9, v10 ; 36141509
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[8:11], 0x18 ; C2000918
s_buffer_load_dword s1, s[8:11], 0x1c ; C200891C
v_mul_f32_e64 v11, |v7|, |v7| ; D210030B 00020F07
v_mad_f32 v11, |v8|, |v8|, v11 ; D282030B 042E1108
v_mad_f32 v11, |v9|, |v9|, v11 ; D282030B 042E1309
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_add_f32_e32 v12, 0.5, v12 ; 061818F0
v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0
v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0
v_floor_f32_e32 v21, v12 ; 7E2A490C
v_floor_f32_e32 v17, v13 ; 7E22490D
v_floor_f32_e32 v25, v15 ; 7E32490F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s1 ; 7E1A0201
v_mul_f32_e32 v19, s0, v21 ; 10262A00
v_floor_f32_e32 v15, v19 ; 7E1E4913
v_mov_b32_e32 v12, 0x42800000 ; 7E1802FF 42800000
v_cmp_le_f32_e32 vcc, v12, v21 ; 7C062B0C
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v12, 0xc2800000 ; 7E1802FF C2800000
v_add_f32_e32 v12, v21, v12 ; 06181915
v_mul_f32_e32 v20, s1, v12 ; 10281801
v_floor_f32_e32 v20, v20 ; 7E284914
v_mul_f32_e32 v21, s1, v20 ; 102A2801
v_mad_f32 v23, v12, s1, -v20 ; D2820017 8450030C
v_floor_f32_e32 v12, v21 ; 7E184915
v_mad_f32 v20, v20, s1, -v12 ; D2820014 84300314
v_add_f32_e32 v12, 4.0, v12 ; 061818F6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
v_mov_b32_e32 v29, s0 ; 7E3A0200
v_mov_b32_e32 v24, v13 ; 7E30030D
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v12, s0, v15 ; 10181E00
v_floor_f32_e32 v20, v19 ; 7E284913
v_subrev_f32_e32 v23, v20, v19 ; 0A2E2714
v_floor_f32_e32 v12, v12 ; 7E18490C
v_mad_f32 v20, v15, s0, -v12 ; D2820014 8430010F
v_mov_b32_e32 v24, v29 ; 7E30031D
s_or_b64 exec, exec, s[2:3] ; 88FE027E
v_mul_f32_e32 v26, s0, v17 ; 10342200
v_floor_f32_e32 v22, v26 ; 7E2C491A
v_mov_b32_e32 v15, 0x42800000 ; 7E1E02FF 42800000
v_cmp_le_f32_e32 vcc, v15, v17 ; 7C06230F
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v15, 0xc2800000 ; 7E1E02FF C2800000
v_add_f32_e32 v15, v17, v15 ; 061E1F11
v_mul_f32_e32 v17, s1, v15 ; 10221E01
v_floor_f32_e32 v17, v17 ; 7E224911
v_mul_f32_e32 v21, s1, v17 ; 102A2201
v_mad_f32 v19, v15, s1, -v17 ; D2820013 8444030F
v_floor_f32_e32 v15, v21 ; 7E1E4915
v_mad_f32 v21, v17, s1, -v15 ; D2820015 843C0311
v_add_f32_e32 v15, 4.0, v15 ; 061E1EF6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
v_mov_b32_e32 v27, v13 ; 7E36030D
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v15, s0, v22 ; 101E2C00
v_floor_f32_e32 v17, v26 ; 7E22491A
v_subrev_f32_e32 v19, v17, v26 ; 0A263511
v_floor_f32_e32 v15, v15 ; 7E1E490F
v_mad_f32 v21, v22, s0, -v15 ; D2820015 843C0116
v_mov_b32_e32 v27, v29 ; 7E36031D
s_or_b64 exec, exec, s[2:3] ; 88FE027E
s_buffer_load_dword s12, s[8:11], 0x20 ; C2060920
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
v_mul_f32_e32 v31, s0, v25 ; 103E3200
v_mov_b32_e32 v17, 0x42800000 ; 7E2202FF 42800000
v_floor_f32_e32 v30, v31 ; 7E3C491F
v_cmp_le_f32_e32 vcc, v17, v25 ; 7C063311
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v17, 0xc2800000 ; 7E2202FF C2800000
v_add_f32_e32 v17, v25, v17 ; 06222319
v_mul_f32_e32 v22, s1, v17 ; 102C2201
v_floor_f32_e32 v25, v22 ; 7E324916
v_mul_f32_e32 v26, s1, v25 ; 10343201
v_mad_f32 v22, v17, s1, -v25 ; D2820016 84640311
v_floor_f32_e32 v17, v26 ; 7E22491A
v_mad_f32 v26, v25, s1, -v17 ; D282001A 84440319
v_add_f32_e32 v17, 4.0, v17 ; 062222F6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
s_buffer_load_dword s38, s[8:11], 0x0 ; C2130900
s_buffer_load_dword s39, s[8:11], 0x1 ; C2138901
s_buffer_load_dword s40, s[8:11], 0x2 ; C2140902
s_buffer_load_dword s36, s[8:11], 0x28 ; C2120928
s_buffer_load_dword s37, s[8:11], 0x2c ; C212892C
s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930
v_mov_b32_e32 v25, s12 ; 7E32020C
v_mov_b32_e32 v28, s13 ; 7E38020D
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v13, s0, v30 ; 101A3C00
v_floor_f32_e32 v17, v31 ; 7E22491F
v_subrev_f32_e32 v22, v17, v31 ; 0A2C3F11
v_floor_f32_e32 v17, v13 ; 7E22490D
v_mad_f32 v26, v30, s0, -v17 ; D282001A 8444011E
v_mov_b32_e32 v13, v29 ; 7E1A031D
s_or_b64 exec, exec, s[2:3] ; 88FE027E
v_cmp_neq_f32_e64 s[30:31], s0, v24 ; D01A001E 00023000
v_cmp_eq_f32_e64 s[24:25], 4.0, v12 ; D0040018 000218F6
v_cmp_eq_f32_e64 s[20:21], 2.0, v12 ; D0040014 000218F4
v_cmp_eq_f32_e64 s[16:17], 1.0, v12 ; D0040010 000218F2
v_cmp_eq_f32_e64 s[18:19], 0, v12 ; D0040012 00021880
v_cmp_neq_f32_e64 s[28:29], s0, v27 ; D01A001C 00023600
v_cmp_neq_f32_e64 s[26:27], s0, v13 ; D01A001A 00021A00
v_cmp_eq_f32_e64 s[22:23], 4.0, v15 ; D0040016 00021EF6
v_cmp_eq_f32_e64 s[14:15], 2.0, v15 ; D004000E 00021EF4
v_cmp_eq_f32_e64 s[10:11], 1.0, v15 ; D004000A 00021EF2
v_cmp_eq_f32_e64 s[12:13], 0, v15 ; D004000C 00021E80
v_cmp_eq_f32_e32 vcc, 4.0, v17 ; 7C0422F6
v_cmp_eq_f32_e64 s[0:1], 2.0, v17 ; D0040000 000222F4
v_cmp_eq_f32_e64 s[2:3], 1.0, v17 ; D0040002 000222F2
v_cmp_eq_f32_e64 s[8:9], 0, v17 ; D0040008 00022280
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
v_subrev_f32_e32 v29, s38, v14 ; 0A3A1C26
v_mul_f32_e32 v29, v29, v29 ; 103A3B1D
v_subrev_f32_e32 v30, s39, v16 ; 0A3C2027
v_mac_f32_e32 v29, v30, v30 ; 3E3A3D1E
v_subrev_f32_e32 v30, s40, v18 ; 0A3C2428
v_mac_f32_e32 v29, v30, v30 ; 3E3A3D1E
v_mul_f32_e32 v29, s41, v29 ; 103A3A29
v_log_f32_e32 v29, v29 ; 7E3A4F1D
v_cndmask_b32_e64 v30, v25, v28, s[30:31] ; D200001E 007A3919
v_rcp_f32_e32 v30, v30 ; 7E3C551E
v_mul_f32_e32 v31, v30, v18 ; 103E251E
v_floor_f32_e32 v31, v31 ; 7E3E491F
v_mad_f32 v31, v18, v30, -v31 ; D282001F 847E3D12
v_mul_f32_e32 v32, v30, v16 ; 1040211E
v_floor_f32_e32 v32, v32 ; 7E404920
v_mad_f32 v32, v16, v30, -v32 ; D2820020 84823D10
v_mul_f32_e32 v33, v30, v14 ; 10421D1E
v_floor_f32_e32 v33, v33 ; 7E424921
v_mad_f32 v30, v14, v30, -v33 ; D282001E 84863D0E
v_mov_b32_e32 v33, 0x3b000000 ; 7E4202FF 3B000000
v_mov_b32_e32 v34, 0x3b800000 ; 7E4402FF 3B800000
v_cndmask_b32_e64 v35, v34, v33, s[30:31] ; D2000023 007A4322
v_add_f32_e64 v36, s36, s36 ; D2060024 00004824
v_mad_f32 v37, -v36, v35, 1.0 ; D2820025 23CA4724
v_mul_f32_e32 v35, s36, v35 ; 10464624
v_mad_f32 v31, v37, v31, v35 ; D282001F 048E3F25
v_mad_f32 v32, v37, v32, v35 ; D2820020 048E4125
v_mac_f32_e32 v35, v37, v30 ; 3E463D25
v_cndmask_b32_e64 v30, v34, v33, s[28:29] ; D200001E 00724322
v_cndmask_b32_e64 v33, v34, v33, s[26:27] ; D2000021 006A4322
v_mul_f32_e32 v29, 0x3f317218, v29 ; 103A3AFF 3F317218
s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508
v_cndmask_b32_e64 v34, v25, v28, s[28:29] ; D2000022 00723919
s_load_dwordx4 s[68:71], s[4:5], 0xc ; C0A2050C
v_mul_f32_e32 v37, s36, v30 ; 104A3C24
v_mul_f32_e32 v38, s36, v33 ; 104C4224
v_mul_f32_e32 v41, s37, v29 ; 10523A25
s_load_dwordx4 s[36:39], s[4:5], 0x10 ; C0920510
s_load_dwordx8 s[72:79], s[6:7], 0x18 ; C0E40718
s_load_dwordx8 s[80:87], s[6:7], 0x20 ; C0E80720
s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708
s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710
s_load_dwordx4 s[28:31], s[4:5], 0x4 ; C08E0504
s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700
v_mad_f32 v39, v24, v31, v23 ; D2820027 045E3F18
v_mad_f32 v40, v24, v32, v20 ; D2820028 04524118
v_mac_f32_e32 v20, v24, v35 ; 3E284718
v_mov_b32_e32 v42, v39 ; 7E540327
v_mov_b32_e32 v43, v40 ; 7E560328
v_mov_b32_e32 v44, v41 ; 7E580329
v_mov_b32_e32 v45, v42 ; 7E5A032A
v_mac_f32_e32 v23, v24, v35 ; 3E2E4718
v_mov_b32_e32 v43, v20 ; 7E560314
v_cndmask_b32_e64 v20, v25, v28, s[26:27] ; D2000014 006A3919
v_mov_b32_e32 v44, v41 ; 7E580329
v_mov_b32_e32 v24, v40 ; 7E300328
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[28:29], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[80:87], s[36:39] ; F0900A00 01341C27
image_sample_l v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[72:79], s[68:71] ; F0900A00 02321F27
image_sample_l v[46:47], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[60:67], s[40:43] ; F0900A00 014F2E27
image_sample_l v[48:49], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[28:31] ; F0900A00 00ED3027
image_sample_l v[50:51], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[32:35] ; F0900A00 010B3227
image_sample_l v[52:53], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[80:87], s[36:39] ; F0900A00 0134342A
image_sample_l v[54:55], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[72:79], s[68:71] ; F0900A00 0232362A
image_sample_l v[56:57], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[60:67], s[40:43] ; F0900A00 014F382A
image_sample_l v[58:59], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[52:59], s[28:31] ; F0900A00 00ED3A2A
image_sample_l v[42:43], 10, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[44:51], s[32:35] ; F0900A00 010B2A2A
v_mov_b32_e32 v25, v41 ; 7E320329
v_rcp_f32_e32 v34, v34 ; 7E445522
image_sample_l v[44:45], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[80:87], s[36:39] ; F0900A00 01342C17
image_sample_l v[60:61], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[72:79], s[68:71] ; F0900A00 02323C17
image_sample_l v[62:63], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[60:67], s[40:43] ; F0900A00 014F3E17
image_sample_l v[64:65], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[52:59], s[28:31] ; F0900A00 00ED4017
image_sample_l v[66:67], 10, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[44:51], s[32:35] ; F0900A00 010B4217
v_mul_f32_e32 v23, v34, v18 ; 102E2522
v_floor_f32_e32 v23, v23 ; 7E2E4917
v_mad_f32 v23, v18, v34, -v23 ; D2820017 845E4512
v_mul_f32_e32 v24, v34, v16 ; 10302122
v_floor_f32_e32 v24, v24 ; 7E304918
v_mad_f32 v24, v16, v34, -v24 ; D2820018 84624510
v_mul_f32_e32 v25, v34, v14 ; 10321D22
v_floor_f32_e32 v25, v25 ; 7E324919
v_mad_f32 v25, v14, v34, -v25 ; D2820019 8466450E
v_mad_f32 v30, -v36, v30, 1.0 ; D282001E 23CA3D24
v_mad_f32 v23, v30, v23, v37 ; D2820017 04962F1E
v_mad_f32 v24, v30, v24, v37 ; D2820018 0496311E
v_mac_f32_e32 v37, v30, v25 ; 3E4A331E
v_mad_f32 v39, v27, v23, v19 ; D2820027 044E2F1B
v_rcp_f32_e32 v20, v20 ; 7E285514
v_mad_f32 v40, v27, v24, v21 ; D2820028 0456311B
v_mac_f32_e32 v21, v27, v37 ; 3E2A4B1B
v_mac_f32_e32 v19, v27, v37 ; 3E264B1B
v_mul_f32_e32 v23, v20, v18 ; 102E2514
v_floor_f32_e32 v23, v23 ; 7E2E4917
v_mad_f32 v18, v18, v20, -v23 ; D2820012 845E2912
v_mul_f32_e32 v23, v20, v16 ; 102E2114
v_floor_f32_e32 v23, v23 ; 7E2E4917
v_mad_f32 v16, v16, v20, -v23 ; D2820010 845E2910
v_mul_f32_e32 v23, v20, v14 ; 102E1D14
v_floor_f32_e32 v23, v23 ; 7E2E4917
v_mov_b32_e32 v68, v39 ; 7E880327
v_mov_b32_e32 v69, v40 ; 7E8A0328
v_mov_b32_e32 v70, v41 ; 7E8C0329
s_waitcnt vmcnt(5) ; BF8C0775
v_mov_b32_e32 v71, v42 ; 7E8E032A
v_mad_f32 v14, v14, v20, -v23 ; D282000E 845E290E
v_mov_b32_e32 v69, v21 ; 7E8A0315
v_mad_f32 v21, -v36, v33, 1.0 ; D2820015 23CA4324
v_mov_b32_e32 v20, v40 ; 7E280328
v_mad_f32 v18, v21, v18, v38 ; D2820012 049A2515
v_mad_f32 v16, v21, v16, v38 ; D2820010 049A2115
v_mac_f32_e32 v38, v21, v14 ; 3E4C1D15
v_mov_b32_e32 v70, v41 ; 7E8C0329
v_mov_b32_e32 v21, v41 ; 7E2A0329
image_sample_l v[33:34], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[80:87], s[36:39] ; F0900A00 01342127
image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[72:79], s[68:71] ; F0900A00 02322327
image_sample_l v[72:73], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[60:67], s[40:43] ; F0900A00 014F4827
image_sample_l v[74:75], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[28:31] ; F0900A00 00ED4A27
image_sample_l v[76:77], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[32:35] ; F0900A00 010B4C27
v_mad_f32 v39, v13, v18, v22 ; D2820027 045A250D
v_mad_f32 v40, v13, v16, v26 ; D2820028 046A210D
v_mac_f32_e32 v26, v13, v38 ; 3E344D0D
v_mac_f32_e32 v22, v13, v38 ; 3E2C4D0D
image_sample_l v[13:14], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[80:87], s[36:39] ; F0900A00 01340D44
image_sample_l v[37:38], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[72:79], s[68:71] ; F0900A00 02322544
image_sample_l v[78:79], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[60:67], s[40:43] ; F0900A00 014F4E44
image_sample_l v[80:81], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[52:59], s[28:31] ; F0900A00 00ED5044
v_mov_b32_e32 v82, v39 ; 7EA40327
v_mov_b32_e32 v83, v40 ; 7EA60328
v_mov_b32_e32 v84, v41 ; 7EA80329
v_mov_b32_e32 v85, v42 ; 7EAA032A
image_sample_l v[68:69], 10, 0, 0, 0, 0, 0, 0, 0, v[68:71], s[44:51], s[32:35] ; F0900A00 010B4444
v_mov_b32_e32 v83, v26 ; 7EA6031A
v_mov_b32_e32 v23, v40 ; 7E2E0328
image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[80:87], s[36:39] ; F0900A00 01341913
image_sample_l v[70:71], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[72:79], s[68:71] ; F0900A00 02324613
image_sample_l v[86:87], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[60:67], s[40:43] ; F0900A00 014F5613
image_sample_l v[88:89], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[52:59], s[28:31] ; F0900A00 00ED5813
image_sample_l v[18:19], 10, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[44:51], s[32:35] ; F0900A00 010B1213
v_mov_b32_e32 v84, v41 ; 7EA80329
v_mov_b32_e32 v24, v41 ; 7E300329
image_sample_l v[20:21], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[80:87], s[36:39] ; F0900A00 01341427
image_sample_l v[90:91], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[72:79], s[68:71] ; F0900A00 02325A27
image_sample_l v[92:93], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[60:67], s[40:43] ; F0900A00 014F5C27
image_sample_l v[94:95], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[28:31] ; F0900A00 00ED5E27
image_sample_l v[39:40], 10, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[32:35] ; F0900A00 010B2727
image_sample_l v[96:97], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[80:87], s[36:39] ; F0900A00 01346052
image_sample_l v[98:99], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[72:79], s[68:71] ; F0900A00 02326252
image_sample_l v[100:101], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[60:67], s[40:43] ; F0900A00 014F6452
image_sample_l v[102:103], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[52:59], s[28:31] ; F0900A00 00ED6652
image_sample_l v[82:83], 10, 0, 0, 0, 0, 0, 0, 0, v[82:85], s[44:51], s[32:35] ; F0900A00 010B5252
s_waitcnt vmcnt(14) ; BF8C077E
image_sample_l v[84:85], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[80:87], s[36:39] ; F0900A00 01345416
image_sample_l v[104:105], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[72:79], s[68:71] ; F0900A00 02326816
v_cndmask_b32_e64 v16, 0, 1.0, s[24:25] ; D2000010 0061E480
v_mov_b32_e32 v27, 0x40400000 ; 7E3602FF 40400000
v_cmp_eq_f32_e64 s[4:5], v12, v27 ; D0040004 0002370C
v_cndmask_b32_e64 v12, 0, 1.0, s[4:5] ; D200000C 0011E480
v_cndmask_b32_e64 v30, 0, 1.0, s[20:21] ; D200001E 0051E480
v_cndmask_b32_e64 v41, 0, 1.0, s[16:17] ; D2000029 0041E480
v_cndmask_b32_e64 v106, 0, 1.0, s[18:19] ; D200006A 0049E480
v_cndmask_b32_e64 v107, 0, 1.0, s[22:23] ; D200006B 0059E480
v_cmp_eq_f32_e64 s[4:5], v15, v27 ; D0040004 0002370F
v_cndmask_b32_e64 v15, 0, 1.0, s[4:5] ; D200000F 0011E480
v_cndmask_b32_e64 v108, 0, 1.0, s[14:15] ; D200006C 0039E480
v_cndmask_b32_e64 v109, 0, 1.0, s[10:11] ; D200006D 0029E480
v_cndmask_b32_e64 v110, 0, 1.0, s[12:13] ; D200006E 0031E480
v_mul_f32_e32 v50, v106, v50 ; 1064656A
v_mul_f32_e32 v51, v106, v51 ; 1066676A
v_mac_f32_e32 v50, v41, v48 ; 3E646129
v_mac_f32_e32 v51, v41, v49 ; 3E666329
v_mul_f32_e32 v42, v106, v42 ; 1054556A
v_mul_f32_e32 v43, v106, v43 ; 1056576A
v_mac_f32_e32 v42, v41, v58 ; 3E547529
v_mac_f32_e32 v43, v41, v59 ; 3E567729
v_mul_f32_e32 v48, v106, v66 ; 1060856A
v_mul_f32_e32 v49, v106, v67 ; 1062876A
v_mac_f32_e32 v48, v41, v64 ; 3E608129
v_mac_f32_e32 v49, v41, v65 ; 3E628329
v_mac_f32_e32 v50, v30, v46 ; 3E645D1E
v_mac_f32_e32 v51, v30, v47 ; 3E665F1E
v_mac_f32_e32 v42, v30, v56 ; 3E54711E
v_mac_f32_e32 v43, v30, v57 ; 3E56731E
v_mac_f32_e32 v48, v30, v62 ; 3E607D1E
v_mac_f32_e32 v49, v30, v63 ; 3E627F1E
v_mac_f32_e32 v50, v12, v31 ; 3E643F0C
v_mac_f32_e32 v51, v12, v32 ; 3E66410C
v_mac_f32_e32 v42, v12, v54 ; 3E546D0C
v_mac_f32_e32 v43, v12, v55 ; 3E566F0C
v_mac_f32_e32 v48, v12, v60 ; 3E60790C
v_mac_f32_e32 v49, v12, v61 ; 3E627B0C
v_mac_f32_e32 v50, v16, v28 ; 3E643910
v_mac_f32_e32 v51, v16, v29 ; 3E663B10
v_mac_f32_e32 v42, v16, v52 ; 3E546910
v_mac_f32_e32 v43, v16, v53 ; 3E566B10
v_mac_f32_e32 v48, v16, v44 ; 3E605910
v_mac_f32_e32 v49, v16, v45 ; 3E625B10
v_mul_f32_e32 v12, v110, v76 ; 1018996E
v_mul_f32_e32 v16, v110, v77 ; 10209B6E
v_mac_f32_e32 v12, v109, v74 ; 3E18956D
v_mac_f32_e32 v16, v109, v75 ; 3E20976D
v_mul_f32_e32 v28, v110, v68 ; 1038896E
v_mul_f32_e32 v29, v110, v69 ; 103A8B6E
v_mac_f32_e32 v28, v109, v80 ; 3E38A16D
v_mac_f32_e32 v29, v109, v81 ; 3E3AA36D
s_waitcnt vmcnt(12) ; BF8C077C
v_mul_f32_e32 v18, v110, v18 ; 1024256E
v_mul_f32_e32 v19, v110, v19 ; 1026276E
v_mac_f32_e32 v18, v109, v88 ; 3E24B16D
v_mac_f32_e32 v19, v109, v89 ; 3E26B36D
v_mac_f32_e32 v12, v108, v72 ; 3E18916C
v_mac_f32_e32 v16, v108, v73 ; 3E20936C
v_mac_f32_e32 v28, v108, v78 ; 3E389D6C
v_mac_f32_e32 v29, v108, v79 ; 3E3A9F6C
v_mac_f32_e32 v18, v108, v86 ; 3E24AD6C
v_mac_f32_e32 v19, v108, v87 ; 3E26AF6C
v_mac_f32_e32 v12, v15, v35 ; 3E18470F
v_mac_f32_e32 v16, v15, v36 ; 3E20490F
v_mac_f32_e32 v28, v15, v37 ; 3E384B0F
v_mac_f32_e32 v29, v15, v38 ; 3E3A4D0F
v_mac_f32_e32 v18, v15, v70 ; 3E248D0F
v_mac_f32_e32 v19, v15, v71 ; 3E268F0F
v_mac_f32_e32 v12, v107, v33 ; 3E18436B
v_mac_f32_e32 v16, v107, v34 ; 3E20456B
v_mac_f32_e32 v28, v107, v13 ; 3E381B6B
v_mac_f32_e32 v29, v107, v14 ; 3E3A1D6B
v_mac_f32_e32 v18, v107, v25 ; 3E24336B
v_mac_f32_e32 v19, v107, v26 ; 3E26356B
image_sample_l v[13:14], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[60:67], s[40:43] ; F0900A00 014F0D16
image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[52:59], s[28:31] ; F0900A00 00ED1916
s_waitcnt vmcnt(0) ; BF8C0770
image_sample_l v[22:23], 10, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[44:51], s[32:35] ; F0900A00 010B1616
v_cndmask_b32_e64 v15, 0, 1.0, s[8:9] ; D200000F 0021E480
v_mul_f32_e32 v24, v15, v39 ; 10304F0F
v_mul_f32_e32 v30, v15, v40 ; 103C510F
v_cndmask_b32_e64 v31, 0, 1.0, s[2:3] ; D200001F 0009E480
v_mac_f32_e32 v24, v31, v94 ; 3E30BD1F
v_mac_f32_e32 v30, v31, v95 ; 3E3CBF1F
v_mul_f32_e32 v32, v15, v82 ; 1040A50F
v_mul_f32_e32 v33, v15, v83 ; 1042A70F
v_mac_f32_e32 v32, v31, v102 ; 3E40CD1F
v_mac_f32_e32 v33, v31, v103 ; 3E42CF1F
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v22, v15, v22 ; 102C2D0F
v_mul_f32_e32 v15, v15, v23 ; 101E2F0F
v_mac_f32_e32 v22, v31, v25 ; 3E2C331F
v_mac_f32_e32 v15, v31, v26 ; 3E1E351F
v_cndmask_b32_e64 v23, 0, 1.0, s[0:1] ; D2000017 0001E480
v_mac_f32_e32 v24, v23, v92 ; 3E30B917
v_mac_f32_e32 v30, v23, v93 ; 3E3CBB17
v_mac_f32_e32 v32, v23, v100 ; 3E40C917
v_mac_f32_e32 v33, v23, v101 ; 3E42CB17
v_mac_f32_e32 v22, v23, v13 ; 3E2C1B17
v_mac_f32_e32 v15, v23, v14 ; 3E1E1D17
v_cmp_eq_f32_e64 s[0:1], v17, v27 ; D0040000 00023711
v_cndmask_b32_e64 v13, 0, 1.0, s[0:1] ; D200000D 0001E480
v_mac_f32_e32 v24, v13, v90 ; 3E30B50D
v_mac_f32_e32 v30, v13, v91 ; 3E3CB70D
v_mac_f32_e32 v32, v13, v98 ; 3E40C50D
v_mac_f32_e32 v33, v13, v99 ; 3E42C70D
v_mac_f32_e32 v22, v13, v104 ; 3E2CD10D
v_mac_f32_e32 v15, v13, v105 ; 3E1ED30D
v_mov_b32_e32 v13, 0xbe4ccccd ; 7E1A02FF BE4CCCCD
v_mad_f32 v0, v11, v0, v13 ; D2820000 0436010B
v_mad_f32 v1, v11, v1, v13 ; D2820001 0436030B
v_mac_f32_e32 v13, v11, v10 ; 3E1A150B
v_cndmask_b32_e64 v10, 0, 1.0, vcc ; D200000A 01A9E480
v_mac_f32_e32 v24, v10, v20 ; 3E30290A
v_mac_f32_e32 v30, v10, v21 ; 3E3C2B0A
v_mov_b32_e32 v11, 0x40e00000 ; 7E1602FF 40E00000
v_mul_f32_e32 v0, v11, v0 ; 1000010B
v_mul_f32_e32 v1, v11, v1 ; 1002030B
v_mul_f32_e32 v11, v11, v13 ; 10161B0B
v_mov_b32_e32 v13, 0x3c23d70a ; 7E1A02FF 3C23D70A
v_max_f32_e32 v0, v13, v0 ; 2000010D
v_max_f32_e32 v1, v13, v1 ; 2002030D
v_max_f32_e32 v11, v13, v11 ; 2016170D
v_add_f32_e32 v13, v1, v0 ; 061A0101
v_add_f32_e32 v13, v11, v13 ; 061A1B0B
v_rcp_f32_e32 v13, v13 ; 7E1A550D
v_mac_f32_e32 v32, v10, v96 ; 3E40C10A
v_mac_f32_e32 v33, v10, v97 ; 3E42C30A
v_mac_f32_e32 v22, v10, v84 ; 3E2CA90A
v_mac_f32_e32 v15, v10, v85 ; 3E1EAB0A
v_mul_f32_e32 v0, v13, v0 ; 1000010D
v_mul_f32_e32 v1, v13, v1 ; 1002030D
v_mul_f32_e32 v10, v13, v11 ; 1014170D
v_mad_f32 v11, 2.0, v50, -1.0 ; D282000B 03CE64F4
v_mad_f32 v13, 2.0, v43, -1.0 ; D282000D 03CE56F4
v_mul_f32_e32 v11, v0, v11 ; 10161700
v_mac_f32_e32 v11, v1, v13 ; 3E161B01
v_mad_f32 v13, 2.0, v42, -1.0 ; D282000D 03CE54F4
v_mad_f32 v14, 2.0, v49, -1.0 ; D282000E 03CE62F4
v_mul_f32_e32 v17, 0, v0 ; 10220080
v_mad_f32 v13, v1, v13, v17 ; D282000D 04461B01
v_mac_f32_e32 v13, v10, v14 ; 3E1A1D0A
v_mad_f32 v14, 2.0, v51, -1.0 ; D282000E 03CE66F4
v_mad_f32 v20, 2.0, v48, -1.0 ; D2820014 03CE60F4
v_mul_f32_e32 v14, v0, v14 ; 101C1D00
v_mac_f32_e32 v14, 0, v1 ; 3E1C0280
v_mac_f32_e32 v14, v10, v20 ; 3E1C290A
v_mad_f32 v12, 2.0, v12, -1.0 ; D282000C 03CE18F4
v_mad_f32 v20, 2.0, v29, -1.0 ; D2820014 03CE3AF4
v_mul_f32_e32 v12, v0, v12 ; 10181900
v_mac_f32_e32 v12, v1, v20 ; 3E182901
v_mad_f32 v20, 2.0, v28, -1.0 ; D2820014 03CE38F4
v_mad_f32 v19, 2.0, v19, -1.0 ; D2820013 03CE26F4
v_mad_f32 v20, v1, v20, v17 ; D2820014 04462901
v_mac_f32_e32 v20, v10, v19 ; 3E28270A
v_mad_f32 v16, 2.0, v16, -1.0 ; D2820010 03CE20F4
v_mad_f32 v18, 2.0, v18, -1.0 ; D2820012 03CE24F4
v_mul_f32_e32 v16, v0, v16 ; 10202100
v_mac_f32_e32 v16, 0, v1 ; 3E200280
v_mac_f32_e32 v16, v10, v18 ; 3E20250A
v_mad_f32 v18, 2.0, v30, -1.0 ; D2820012 03CE3CF4
v_mad_f32 v19, 2.0, v24, -1.0 ; D2820013 03CE30F4
v_mul_f32_e32 v18, v0, v18 ; 10242500
v_mul_f32_e32 v0, v0, v19 ; 10002700
v_mad_f32 v19, 2.0, v32, -1.0 ; D2820013 03CE40F4
v_mac_f32_e32 v17, v1, v19 ; 3E222701
v_mad_f32 v19, 2.0, v33, -1.0 ; D2820013 03CE42F4
v_mac_f32_e32 v0, v1, v19 ; 3E002701
v_mac_f32_e32 v18, 0, v1 ; 3E240280
v_mad_f32 v1, 2.0, v15, -1.0 ; D2820001 03CE1EF4
v_mac_f32_e32 v17, v10, v1 ; 3E22030A
v_mad_f32 v1, 2.0, v22, -1.0 ; D2820001 03CE2CF4
v_mac_f32_e32 v18, v10, v1 ; 3E24030A
v_mac_f32_e32 v11, 0, v10 ; 3E161480
v_mac_f32_e32 v12, 0, v10 ; 3E181480
v_mac_f32_e32 v0, 0, v10 ; 3E001480
v_mul_f32_e32 v1, v13, v4 ; 1002090D
v_mul_f32_e32 v10, v14, v4 ; 1014090E
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mac_f32_e32 v1, v20, v5 ; 3E020B14
v_mac_f32_e32 v10, v16, v5 ; 3E140B10
v_mac_f32_e32 v4, v12, v5 ; 3E080B0C
v_mac_f32_e32 v1, v17, v6 ; 3E020D11
v_mac_f32_e32 v10, v18, v6 ; 3E140D12
v_mac_f32_e32 v4, v0, v6 ; 3E080D00
v_mul_f32_e32 v0, v10, v10 ; 1000150A
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_mac_f32_e32 v0, v4, v4 ; 3E000904
v_add_f32_e32 v0, 1.0, v0 ; 060000F2
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v0, v1 ; 10020300
v_mul_f32_e32 v5, v0, v10 ; 100A1500
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_mad_f32 v1, -v1, v2, v7 ; D2820001 241E0501
v_mad_f32 v4, -v5, v2, v8 ; D2820004 24220505
v_mad_f32 v0, -v0, v2, v9 ; D2820000 24260500
v_mul_f32_e32 v2, v1, v1 ; 10040301
v_mac_f32_e32 v2, v4, v4 ; 3E040904
v_mac_f32_e32 v2, v0, v0 ; 3E040100
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
v_mul_f32_e32 v1, v2, v1 ; 10020302
v_mul_f32_e32 v4, v2, v4 ; 10080902
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0
v_mad_f32 v2, 0.5, v4, 0.5 ; D2820002 03C208F0
v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 96
VGPRS: 112
Code Size: 2716 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL CONST[0..17]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx
1: UIF TEMP[0].xxxx :0
2: MUL TEMP[0], CONST[2], IN[0].xxxx
3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
5: ADD TEMP[0].xyz, TEMP[0], CONST[5]
6: MOV TEMP[1].x, CONST[6].xxxx
7: MOV TEMP[1].y, CONST[7].xxxx
8: MOV TEMP[1].z, CONST[8].xxxx
9: MOV TEMP[2].x, CONST[6].yyyy
10: MOV TEMP[2].y, CONST[7].yyyy
11: MOV TEMP[2].z, CONST[8].yyyy
12: MOV TEMP[3].x, CONST[6].zzzz
13: MOV TEMP[3].y, CONST[7].zzzz
14: MOV TEMP[3].z, CONST[8].zzzz
15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx
16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz
17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz
18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
19: RSQ TEMP[2].x, TEMP[2].xxxx
20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww
22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz
23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
24: RSQ TEMP[3].x, TEMP[3].xxxx
25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz
27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx
29: SQRT TEMP[2].x, TEMP[2].xxxx
30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx
31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
33: MUL TEMP[1], CONST[10], TEMP[0].xxxx
34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1]
35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1]
36: ADD TEMP[0], TEMP[0], CONST[13]
37: ELSE :0
38: MUL TEMP[1], CONST[14], IN[0].xxxx
39: MAD TEMP[1], CONST[15], IN[0].yyyy, TEMP[1]
40: MAD TEMP[1], CONST[16], IN[0].zzzz, TEMP[1]
41: ADD TEMP[0], TEMP[1], CONST[17]
42: ENDIF
43: MOV TEMP[1].xyw, TEMP[0].xyxw
44: RCP TEMP[2].x, TEMP[0].wwww
45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx
46: MOV_SAT TEMP[2].x, TEMP[2].xxxx
47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx
48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww
49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx
50: MOV TEMP[1].z, TEMP[0].xxxx
51: MOV OUT[0], TEMP[1]
52: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0
%19 = add i32 %5, %7
%20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19)
%21 = extractelement <4 x float> %20, i32 0
%22 = extractelement <4 x float> %20, i32 1
%23 = extractelement <4 x float> %20, i32 2
%24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = add i32 %5, %7
%27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %26)
%28 = extractelement <4 x float> %27, i32 0
%29 = extractelement <4 x float> %27, i32 1
%30 = extractelement <4 x float> %27, i32 2
%31 = fcmp une float %16, 0.000000e+00
br i1 %31, label %IF, label %ELSE
IF: ; preds = %main_body
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%72 = fmul float %68, %21
%73 = fmul float %67, %21
%74 = fmul float %66, %21
%75 = fmul float %65, %22
%76 = fadd float %75, %72
%77 = fmul float %64, %22
%78 = fadd float %77, %73
%79 = fmul float %63, %22
%80 = fadd float %79, %74
%81 = fmul float %62, %23
%82 = fadd float %81, %76
%83 = fmul float %61, %23
%84 = fadd float %83, %78
%85 = fmul float %60, %23
%86 = fadd float %85, %80
%87 = fadd float %82, %59
%88 = fadd float %84, %58
%89 = fadd float %86, %57
%90 = fmul float %56, %28
%91 = fmul float %53, %28
%92 = fmul float %50, %28
%93 = fmul float %55, %29
%94 = fadd float %93, %90
%95 = fmul float %52, %29
%96 = fadd float %95, %91
%97 = fmul float %49, %29
%98 = fadd float %97, %92
%99 = fmul float %54, %30
%100 = fadd float %99, %94
%101 = fmul float %51, %30
%102 = fadd float %101, %96
%103 = fmul float %48, %30
%104 = fadd float %103, %98
%105 = fmul float %100, %100
%106 = fmul float %102, %102
%107 = fadd float %106, %105
%108 = fmul float %104, %104
%109 = fadd float %107, %108
%110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109)
%111 = fmul float %100, %110
%112 = fmul float %102, %110
%113 = fmul float %104, %110
%114 = fmul float %87, %13
%115 = fmul float %88, %13
%116 = fmul float %89, %13
%117 = fsub float %71, %114
%118 = fsub float %70, %115
%119 = fsub float %69, %116
%120 = fmul float %117, %117
%121 = fmul float %118, %118
%122 = fadd float %121, %120
%123 = fmul float %119, %119
%124 = fadd float %122, %123
%125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124)
%126 = fmul float %117, %125
%127 = fmul float %118, %125
%128 = fmul float %119, %125
%129 = fmul float %111, %126
%130 = fmul float %112, %127
%131 = fadd float %130, %129
%132 = fmul float %113, %128
%133 = fadd float %131, %132
%134 = fmul float %133, %133
%135 = fsub float 1.000000e+00, %134
%136 = call float @llvm.sqrt.f32(float %135)
%137 = fmul float %16, %136
%138 = fmul float %111, %137
%139 = fmul float %112, %137
%140 = fmul float %113, %137
%141 = fsub float %87, %138
%142 = fsub float %88, %139
%143 = fsub float %89, %140
%144 = fmul float %47, %141
%145 = fmul float %46, %141
%146 = fmul float %45, %141
%147 = fmul float %44, %141
%148 = fmul float %43, %142
%149 = fadd float %148, %144
%150 = fmul float %42, %142
%151 = fadd float %150, %145
%152 = fmul float %41, %142
%153 = fadd float %152, %146
%154 = fmul float %40, %142
%155 = fadd float %154, %147
%156 = fmul float %39, %143
%157 = fadd float %156, %149
%158 = fmul float %38, %143
%159 = fadd float %158, %151
%160 = fmul float %37, %143
%161 = fadd float %160, %153
%162 = fmul float %36, %143
%163 = fadd float %162, %155
%164 = fadd float %157, %35
%165 = fadd float %159, %34
%166 = fadd float %161, %33
%167 = fadd float %163, %32
br label %ENDIF
ELSE: ; preds = %main_body
%168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%169 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%177 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%184 = fmul float %183, %21
%185 = fmul float %182, %21
%186 = fmul float %181, %21
%187 = fmul float %180, %21
%188 = fmul float %179, %22
%189 = fadd float %188, %184
%190 = fmul float %178, %22
%191 = fadd float %190, %185
%192 = fmul float %177, %22
%193 = fadd float %192, %186
%194 = fmul float %176, %22
%195 = fadd float %194, %187
%196 = fmul float %175, %23
%197 = fadd float %196, %189
%198 = fmul float %174, %23
%199 = fadd float %198, %191
%200 = fmul float %173, %23
%201 = fadd float %200, %193
%202 = fmul float %172, %23
%203 = fadd float %202, %195
%204 = fadd float %197, %171
%205 = fadd float %199, %170
%206 = fadd float %201, %169
%207 = fadd float %203, %168
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp.0 = phi float [ %164, %IF ], [ %204, %ELSE ]
%temp1.0 = phi float [ %165, %IF ], [ %205, %ELSE ]
%temp2.0 = phi float [ %166, %IF ], [ %206, %ELSE ]
%temp3.0 = phi float [ %167, %IF ], [ %207, %ELSE ]
%208 = fdiv float 1.000000e+00, %temp3.0
%209 = fmul float %14, %208
%210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00)
%211 = fadd float %temp2.0, %210
%212 = fsub float -0.000000e+00, %temp3.0
%213 = call float @llvm.maxnum.f32(float %211, float %212)
%214 = call float @llvm.AMDGPU.lrp(float %15, float %213, float %211)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %214, float %temp3.0)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v4, s10, v0 ; 4A08000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[3:6], v4, s[8:11], 0 idxen ; E00C2000 80020304
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880
s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s5, s[0:3], 0x47 ; C2028147
s_buffer_load_dword s8, s[0:3], 0x42 ; C2040142
s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143
s_buffer_load_dword s10, s[0:3], 0x44 ; C2050144
s_buffer_load_dword s11, s[0:3], 0x45 ; C2058145
s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146
s_buffer_load_dword s13, s[0:3], 0x3d ; C206813D
s_buffer_load_dword s14, s[0:3], 0x3e ; C207013E
s_buffer_load_dword s15, s[0:3], 0x3f ; C207813F
s_buffer_load_dword s16, s[0:3], 0x40 ; C2080140
s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141
s_buffer_load_dword s18, s[0:3], 0x38 ; C2090138
s_buffer_load_dword s19, s[0:3], 0x39 ; C2098139
s_buffer_load_dword s20, s[0:3], 0x3a ; C20A013A
s_buffer_load_dword s21, s[0:3], 0x3b ; C20A813B
s_buffer_load_dword s22, s[0:3], 0x3c ; C20B013C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s18, v0 ; 100C0012
v_mul_f32_e32 v7, s19, v0 ; 100E0013
v_mul_f32_e32 v9, s20, v0 ; 10120014
v_mul_f32_e32 v10, s21, v0 ; 10140015
v_mac_f32_e32 v6, s22, v1 ; 3E0C0216
v_mac_f32_e32 v7, s13, v1 ; 3E0E020D
v_mac_f32_e32 v9, s14, v1 ; 3E12020E
v_mac_f32_e32 v10, s15, v1 ; 3E14020F
v_mac_f32_e32 v6, s16, v2 ; 3E0C0410
v_mac_f32_e32 v7, s17, v2 ; 3E0E0411
v_mac_f32_e32 v9, s8, v2 ; 3E120408
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_add_f32_e32 v6, s10, v6 ; 060C0C0A
v_add_f32_e32 v8, s11, v7 ; 06100E0B
v_add_f32_e32 v9, s12, v9 ; 0612120C
v_add_f32_e32 v7, s5, v10 ; 060E1405
s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[6:7] ; 89FE067E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s9, s[0:3], 0x37 ; C2048137
s_buffer_load_dword s10, s[0:3], 0x32 ; C2050132
s_buffer_load_dword s11, s[0:3], 0x33 ; C2058133
s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134
s_buffer_load_dword s13, s[0:3], 0x35 ; C2068135
s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136
s_buffer_load_dword s15, s[0:3], 0x2d ; C207812D
s_buffer_load_dword s16, s[0:3], 0x2e ; C208012E
s_buffer_load_dword s17, s[0:3], 0x2f ; C208812F
s_buffer_load_dword s18, s[0:3], 0x30 ; C2090130
s_buffer_load_dword s19, s[0:3], 0x31 ; C2098131
s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128
s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129
s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A
s_buffer_load_dword s23, s[0:3], 0x2b ; C20B812B
s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C
s_buffer_load_dword s25, s[0:3], 0x1d ; C20C811D
s_buffer_load_dword s26, s[0:3], 0x1e ; C20D011E
s_buffer_load_dword s27, s[0:3], 0x20 ; C20D8120
s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121
s_buffer_load_dword s29, s[0:3], 0x22 ; C20E8122
s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116
s_buffer_load_dword s31, s[0:3], 0x18 ; C20F8118
s_buffer_load_dword s32, s[0:3], 0x19 ; C2100119
s_buffer_load_dword s33, s[0:3], 0x1a ; C210811A
s_buffer_load_dword s34, s[0:3], 0x1c ; C211011C
s_buffer_load_dword s35, s[0:3], 0x10 ; C2118110
s_buffer_load_dword s36, s[0:3], 0x11 ; C2120111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s27, v3 ; 100C061B
v_mac_f32_e32 v6, s28, v4 ; 3E0C081C
v_mac_f32_e32 v6, s29, v5 ; 3E0C0A1D
s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112
v_mul_f32_e32 v7, s31, v3 ; 100E061F
v_mac_f32_e32 v7, s32, v4 ; 3E0E0820
v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21
v_mul_f32_e32 v3, s34, v3 ; 10060622
v_mac_f32_e32 v3, s25, v4 ; 3E060819
v_mac_f32_e32 v3, s26, v5 ; 3E060A1A
s_buffer_load_dword s25, s[0:3], 0x14 ; C20C8114
s_buffer_load_dword s26, s[0:3], 0x15 ; C20D0115
s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109
s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A
s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C
s_buffer_load_dword s32, s[0:3], 0xd ; C210010D
s_buffer_load_dword s33, s[0:3], 0xe ; C210810E
s_buffer_load_dword s34, s[0:3], 0x0 ; C2110100
v_mul_f32_e32 v4, v7, v7 ; 10080F07
v_mac_f32_e32 v4, v3, v3 ; 3E080703
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s28, v0 ; 100A001C
v_mul_f32_e32 v8, s29, v0 ; 1010001D
s_buffer_load_dword s28, s[0:3], 0x1 ; C20E0101
v_mac_f32_e32 v5, s32, v1 ; 3E0A0220
v_mac_f32_e32 v8, s33, v1 ; 3E100221
v_mac_f32_e32 v5, s36, v2 ; 3E0A0424
v_mac_f32_e32 v8, s27, v2 ; 3E10041B
v_add_f32_e32 v5, s26, v5 ; 060A0A1A
v_add_f32_e32 v8, s30, v8 ; 0610101E
s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102
s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103
s_buffer_load_dword s29, s[0:3], 0x8 ; C20E8108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v9, s27 ; 7E12021B
v_mad_f32 v10, -v5, v9, s28 ; D282000A 20721305
v_mul_f32_e32 v0, s29, v0 ; 1000001D
v_mac_f32_e32 v0, s31, v1 ; 3E00021F
v_mac_f32_e32 v0, s35, v2 ; 3E000423
v_add_f32_e32 v0, s25, v0 ; 06000019
v_mad_f32 v1, -v0, v9, s34 ; D2820001 208A1300
v_mad_f32 v2, -v8, v9, s26 ; D2820002 206A1308
v_mul_f32_e32 v9, v1, v1 ; 10120301
v_mac_f32_e32 v9, v10, v10 ; 3E12150A
v_mac_f32_e32 v9, v2, v2 ; 3E120502
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v7, v4, v7 ; 100E0F04
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v1, v9, v1 ; 10020309
v_mul_f32_e32 v6, v9, v10 ; 100C1509
v_mul_f32_e32 v2, v9, v2 ; 10040509
v_mul_f32_e32 v1, v1, v7 ; 10020F01
v_mac_f32_e32 v1, v6, v3 ; 3E020706
v_mac_f32_e32 v1, v2, v4 ; 3E020902
v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mad_f32 v0, -v7, v1, v0 ; D2820000 24020307
v_mad_f32 v2, -v3, v1, v5 ; D2820002 24160303
v_mad_f32 v1, -v4, v1, v8 ; D2820001 24220304
v_mul_f32_e32 v3, s20, v0 ; 10060014
v_mul_f32_e32 v4, s21, v0 ; 10080015
v_mul_f32_e32 v5, s22, v0 ; 100A0016
v_mul_f32_e32 v0, s23, v0 ; 10000017
v_mac_f32_e32 v3, s24, v2 ; 3E060418
v_mac_f32_e32 v4, s15, v2 ; 3E08040F
v_mac_f32_e32 v5, s16, v2 ; 3E0A0410
v_mac_f32_e32 v0, s17, v2 ; 3E000411
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v4, s19, v1 ; 3E080213
v_mac_f32_e32 v5, s10, v1 ; 3E0A020A
v_mac_f32_e32 v0, s11, v1 ; 3E00020B
v_add_f32_e32 v6, s12, v3 ; 060C060C
v_add_f32_e32 v8, s13, v4 ; 0610080D
v_add_f32_e32 v9, s14, v5 ; 06120A0E
v_add_f32_e32 v7, s9, v0 ; 060E0009
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_rcp_f32_e32 v0, v7 ; 7E005507
v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_add_f32_e32 v0, v0, v9 ; 06001300
v_max_f32_e64 v2, v0, -v7 ; D2200002 40020F00
v_mul_f32_e32 v0, v0, v1 ; 10000300
v_mac_f32_e32 v0, s5, v2 ; 3E000405
exp 15, 12, 0, 1, 0, v6, v8, v0, v7 ; F80008CF 07000806
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 744 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL OUT[0], COLOR
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV OUT[0], IMM[0].xxxx
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%23 = bitcast i32 %22 to float
%24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%25 = bitcast i32 %24 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 8
VGPRS: 4
Code Size: 20 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL CONST[0..17]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx
1: UIF TEMP[0].xxxx :0
2: MUL TEMP[0], CONST[2], IN[0].xxxx
3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
5: ADD TEMP[0].xyz, TEMP[0], CONST[5]
6: MOV TEMP[1].x, CONST[6].xxxx
7: MOV TEMP[1].y, CONST[7].xxxx
8: MOV TEMP[1].z, CONST[8].xxxx
9: MOV TEMP[2].x, CONST[6].yyyy
10: MOV TEMP[2].y, CONST[7].yyyy
11: MOV TEMP[2].z, CONST[8].yyyy
12: MOV TEMP[3].x, CONST[6].zzzz
13: MOV TEMP[3].y, CONST[7].zzzz
14: MOV TEMP[3].z, CONST[8].zzzz
15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx
16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz
17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz
18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
19: RSQ TEMP[2].x, TEMP[2].xxxx
20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww
22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz
23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
24: RSQ TEMP[3].x, TEMP[3].xxxx
25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz
27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx
29: SQRT TEMP[2].x, TEMP[2].xxxx
30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx
31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
33: MUL TEMP[1], CONST[10], TEMP[0].xxxx
34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1]
35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1]
36: ADD TEMP[0], TEMP[0], CONST[13]
37: ELSE :0
38: MUL TEMP[1], CONST[14], IN[0].xxxx
39: MAD TEMP[1], CONST[15], IN[0].yyyy, TEMP[1]
40: MAD TEMP[1], CONST[16], IN[0].zzzz, TEMP[1]
41: ADD TEMP[0], TEMP[1], CONST[17]
42: ENDIF
43: MOV TEMP[1].xyw, TEMP[0].xyxw
44: RCP TEMP[2].x, TEMP[0].wwww
45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx
46: MOV_SAT TEMP[2].x, TEMP[2].xxxx
47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx
48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww
49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx
50: MOV TEMP[1].z, TEMP[0].xxxx
51: MOV OUT[0], TEMP[1]
52: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0
%19 = add i32 %5, %7
%20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19)
%21 = extractelement <4 x float> %20, i32 0
%22 = extractelement <4 x float> %20, i32 1
%23 = extractelement <4 x float> %20, i32 2
%24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = add i32 %5, %7
%27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %26)
%28 = extractelement <4 x float> %27, i32 0
%29 = extractelement <4 x float> %27, i32 1
%30 = extractelement <4 x float> %27, i32 2
%31 = fcmp une float %16, 0.000000e+00
br i1 %31, label %IF, label %ELSE
IF: ; preds = %main_body
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%72 = fmul float %68, %21
%73 = fmul float %67, %21
%74 = fmul float %66, %21
%75 = fmul float %65, %22
%76 = fadd float %75, %72
%77 = fmul float %64, %22
%78 = fadd float %77, %73
%79 = fmul float %63, %22
%80 = fadd float %79, %74
%81 = fmul float %62, %23
%82 = fadd float %81, %76
%83 = fmul float %61, %23
%84 = fadd float %83, %78
%85 = fmul float %60, %23
%86 = fadd float %85, %80
%87 = fadd float %82, %59
%88 = fadd float %84, %58
%89 = fadd float %86, %57
%90 = fmul float %56, %28
%91 = fmul float %53, %28
%92 = fmul float %50, %28
%93 = fmul float %55, %29
%94 = fadd float %93, %90
%95 = fmul float %52, %29
%96 = fadd float %95, %91
%97 = fmul float %49, %29
%98 = fadd float %97, %92
%99 = fmul float %54, %30
%100 = fadd float %99, %94
%101 = fmul float %51, %30
%102 = fadd float %101, %96
%103 = fmul float %48, %30
%104 = fadd float %103, %98
%105 = fmul float %100, %100
%106 = fmul float %102, %102
%107 = fadd float %106, %105
%108 = fmul float %104, %104
%109 = fadd float %107, %108
%110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109)
%111 = fmul float %100, %110
%112 = fmul float %102, %110
%113 = fmul float %104, %110
%114 = fmul float %87, %13
%115 = fmul float %88, %13
%116 = fmul float %89, %13
%117 = fsub float %71, %114
%118 = fsub float %70, %115
%119 = fsub float %69, %116
%120 = fmul float %117, %117
%121 = fmul float %118, %118
%122 = fadd float %121, %120
%123 = fmul float %119, %119
%124 = fadd float %122, %123
%125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124)
%126 = fmul float %117, %125
%127 = fmul float %118, %125
%128 = fmul float %119, %125
%129 = fmul float %111, %126
%130 = fmul float %112, %127
%131 = fadd float %130, %129
%132 = fmul float %113, %128
%133 = fadd float %131, %132
%134 = fmul float %133, %133
%135 = fsub float 1.000000e+00, %134
%136 = call float @llvm.sqrt.f32(float %135)
%137 = fmul float %16, %136
%138 = fmul float %111, %137
%139 = fmul float %112, %137
%140 = fmul float %113, %137
%141 = fsub float %87, %138
%142 = fsub float %88, %139
%143 = fsub float %89, %140
%144 = fmul float %47, %141
%145 = fmul float %46, %141
%146 = fmul float %45, %141
%147 = fmul float %44, %141
%148 = fmul float %43, %142
%149 = fadd float %148, %144
%150 = fmul float %42, %142
%151 = fadd float %150, %145
%152 = fmul float %41, %142
%153 = fadd float %152, %146
%154 = fmul float %40, %142
%155 = fadd float %154, %147
%156 = fmul float %39, %143
%157 = fadd float %156, %149
%158 = fmul float %38, %143
%159 = fadd float %158, %151
%160 = fmul float %37, %143
%161 = fadd float %160, %153
%162 = fmul float %36, %143
%163 = fadd float %162, %155
%164 = fadd float %157, %35
%165 = fadd float %159, %34
%166 = fadd float %161, %33
%167 = fadd float %163, %32
br label %ENDIF
ELSE: ; preds = %main_body
%168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%169 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%177 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%184 = fmul float %183, %21
%185 = fmul float %182, %21
%186 = fmul float %181, %21
%187 = fmul float %180, %21
%188 = fmul float %179, %22
%189 = fadd float %188, %184
%190 = fmul float %178, %22
%191 = fadd float %190, %185
%192 = fmul float %177, %22
%193 = fadd float %192, %186
%194 = fmul float %176, %22
%195 = fadd float %194, %187
%196 = fmul float %175, %23
%197 = fadd float %196, %189
%198 = fmul float %174, %23
%199 = fadd float %198, %191
%200 = fmul float %173, %23
%201 = fadd float %200, %193
%202 = fmul float %172, %23
%203 = fadd float %202, %195
%204 = fadd float %197, %171
%205 = fadd float %199, %170
%206 = fadd float %201, %169
%207 = fadd float %203, %168
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp.0 = phi float [ %164, %IF ], [ %204, %ELSE ]
%temp1.0 = phi float [ %165, %IF ], [ %205, %ELSE ]
%temp2.0 = phi float [ %166, %IF ], [ %206, %ELSE ]
%temp3.0 = phi float [ %167, %IF ], [ %207, %ELSE ]
%208 = fdiv float 1.000000e+00, %temp3.0
%209 = fmul float %14, %208
%210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00)
%211 = fadd float %temp2.0, %210
%212 = fsub float -0.000000e+00, %temp3.0
%213 = call float @llvm.maxnum.f32(float %211, float %212)
%214 = call float @llvm.AMDGPU.lrp(float %15, float %213, float %211)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %214, float %temp3.0)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v4, s10, v0 ; 4A08000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[3:6], v4, s[8:11], 0 idxen ; E00C2000 80020304
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880
s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s5, s[0:3], 0x47 ; C2028147
s_buffer_load_dword s8, s[0:3], 0x42 ; C2040142
s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143
s_buffer_load_dword s10, s[0:3], 0x44 ; C2050144
s_buffer_load_dword s11, s[0:3], 0x45 ; C2058145
s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146
s_buffer_load_dword s13, s[0:3], 0x3d ; C206813D
s_buffer_load_dword s14, s[0:3], 0x3e ; C207013E
s_buffer_load_dword s15, s[0:3], 0x3f ; C207813F
s_buffer_load_dword s16, s[0:3], 0x40 ; C2080140
s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141
s_buffer_load_dword s18, s[0:3], 0x38 ; C2090138
s_buffer_load_dword s19, s[0:3], 0x39 ; C2098139
s_buffer_load_dword s20, s[0:3], 0x3a ; C20A013A
s_buffer_load_dword s21, s[0:3], 0x3b ; C20A813B
s_buffer_load_dword s22, s[0:3], 0x3c ; C20B013C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s18, v0 ; 100C0012
v_mul_f32_e32 v7, s19, v0 ; 100E0013
v_mul_f32_e32 v9, s20, v0 ; 10120014
v_mul_f32_e32 v10, s21, v0 ; 10140015
v_mac_f32_e32 v6, s22, v1 ; 3E0C0216
v_mac_f32_e32 v7, s13, v1 ; 3E0E020D
v_mac_f32_e32 v9, s14, v1 ; 3E12020E
v_mac_f32_e32 v10, s15, v1 ; 3E14020F
v_mac_f32_e32 v6, s16, v2 ; 3E0C0410
v_mac_f32_e32 v7, s17, v2 ; 3E0E0411
v_mac_f32_e32 v9, s8, v2 ; 3E120408
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_add_f32_e32 v6, s10, v6 ; 060C0C0A
v_add_f32_e32 v8, s11, v7 ; 06100E0B
v_add_f32_e32 v9, s12, v9 ; 0612120C
v_add_f32_e32 v7, s5, v10 ; 060E1405
s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[6:7] ; 89FE067E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s9, s[0:3], 0x37 ; C2048137
s_buffer_load_dword s10, s[0:3], 0x32 ; C2050132
s_buffer_load_dword s11, s[0:3], 0x33 ; C2058133
s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134
s_buffer_load_dword s13, s[0:3], 0x35 ; C2068135
s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136
s_buffer_load_dword s15, s[0:3], 0x2d ; C207812D
s_buffer_load_dword s16, s[0:3], 0x2e ; C208012E
s_buffer_load_dword s17, s[0:3], 0x2f ; C208812F
s_buffer_load_dword s18, s[0:3], 0x30 ; C2090130
s_buffer_load_dword s19, s[0:3], 0x31 ; C2098131
s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128
s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129
s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A
s_buffer_load_dword s23, s[0:3], 0x2b ; C20B812B
s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C
s_buffer_load_dword s25, s[0:3], 0x1d ; C20C811D
s_buffer_load_dword s26, s[0:3], 0x1e ; C20D011E
s_buffer_load_dword s27, s[0:3], 0x20 ; C20D8120
s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121
s_buffer_load_dword s29, s[0:3], 0x22 ; C20E8122
s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116
s_buffer_load_dword s31, s[0:3], 0x18 ; C20F8118
s_buffer_load_dword s32, s[0:3], 0x19 ; C2100119
s_buffer_load_dword s33, s[0:3], 0x1a ; C210811A
s_buffer_load_dword s34, s[0:3], 0x1c ; C211011C
s_buffer_load_dword s35, s[0:3], 0x10 ; C2118110
s_buffer_load_dword s36, s[0:3], 0x11 ; C2120111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s27, v3 ; 100C061B
v_mac_f32_e32 v6, s28, v4 ; 3E0C081C
v_mac_f32_e32 v6, s29, v5 ; 3E0C0A1D
s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112
v_mul_f32_e32 v7, s31, v3 ; 100E061F
v_mac_f32_e32 v7, s32, v4 ; 3E0E0820
v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21
v_mul_f32_e32 v3, s34, v3 ; 10060622
v_mac_f32_e32 v3, s25, v4 ; 3E060819
v_mac_f32_e32 v3, s26, v5 ; 3E060A1A
s_buffer_load_dword s25, s[0:3], 0x14 ; C20C8114
s_buffer_load_dword s26, s[0:3], 0x15 ; C20D0115
s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109
s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A
s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C
s_buffer_load_dword s32, s[0:3], 0xd ; C210010D
s_buffer_load_dword s33, s[0:3], 0xe ; C210810E
s_buffer_load_dword s34, s[0:3], 0x0 ; C2110100
v_mul_f32_e32 v4, v7, v7 ; 10080F07
v_mac_f32_e32 v4, v3, v3 ; 3E080703
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s28, v0 ; 100A001C
v_mul_f32_e32 v8, s29, v0 ; 1010001D
s_buffer_load_dword s28, s[0:3], 0x1 ; C20E0101
v_mac_f32_e32 v5, s32, v1 ; 3E0A0220
v_mac_f32_e32 v8, s33, v1 ; 3E100221
v_mac_f32_e32 v5, s36, v2 ; 3E0A0424
v_mac_f32_e32 v8, s27, v2 ; 3E10041B
v_add_f32_e32 v5, s26, v5 ; 060A0A1A
v_add_f32_e32 v8, s30, v8 ; 0610101E
s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102
s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103
s_buffer_load_dword s29, s[0:3], 0x8 ; C20E8108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v9, s27 ; 7E12021B
v_mad_f32 v10, -v5, v9, s28 ; D282000A 20721305
v_mul_f32_e32 v0, s29, v0 ; 1000001D
v_mac_f32_e32 v0, s31, v1 ; 3E00021F
v_mac_f32_e32 v0, s35, v2 ; 3E000423
v_add_f32_e32 v0, s25, v0 ; 06000019
v_mad_f32 v1, -v0, v9, s34 ; D2820001 208A1300
v_mad_f32 v2, -v8, v9, s26 ; D2820002 206A1308
v_mul_f32_e32 v9, v1, v1 ; 10120301
v_mac_f32_e32 v9, v10, v10 ; 3E12150A
v_mac_f32_e32 v9, v2, v2 ; 3E120502
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v7, v4, v7 ; 100E0F04
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v1, v9, v1 ; 10020309
v_mul_f32_e32 v6, v9, v10 ; 100C1509
v_mul_f32_e32 v2, v9, v2 ; 10040509
v_mul_f32_e32 v1, v1, v7 ; 10020F01
v_mac_f32_e32 v1, v6, v3 ; 3E020706
v_mac_f32_e32 v1, v2, v4 ; 3E020902
v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mad_f32 v0, -v7, v1, v0 ; D2820000 24020307
v_mad_f32 v2, -v3, v1, v5 ; D2820002 24160303
v_mad_f32 v1, -v4, v1, v8 ; D2820001 24220304
v_mul_f32_e32 v3, s20, v0 ; 10060014
v_mul_f32_e32 v4, s21, v0 ; 10080015
v_mul_f32_e32 v5, s22, v0 ; 100A0016
v_mul_f32_e32 v0, s23, v0 ; 10000017
v_mac_f32_e32 v3, s24, v2 ; 3E060418
v_mac_f32_e32 v4, s15, v2 ; 3E08040F
v_mac_f32_e32 v5, s16, v2 ; 3E0A0410
v_mac_f32_e32 v0, s17, v2 ; 3E000411
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v4, s19, v1 ; 3E080213
v_mac_f32_e32 v5, s10, v1 ; 3E0A020A
v_mac_f32_e32 v0, s11, v1 ; 3E00020B
v_add_f32_e32 v6, s12, v3 ; 060C060C
v_add_f32_e32 v8, s13, v4 ; 0610080D
v_add_f32_e32 v9, s14, v5 ; 06120A0E
v_add_f32_e32 v7, s9, v0 ; 060E0009
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_rcp_f32_e32 v0, v7 ; 7E005507
v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_add_f32_e32 v0, v0, v9 ; 06001300
v_max_f32_e64 v2, v0, -v7 ; D2200002 40020F00
v_mul_f32_e32 v0, v0, v1 ; 10000300
v_mac_f32_e32 v0, s5, v2 ; 3E000405
exp 15, 12, 0, 1, 0, v6, v8, v0, v7 ; F80008CF 07000806
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 744 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL OUT[0], COLOR
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV OUT[0], IMM[0].xxxx
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%23 = bitcast i32 %22 to float
%24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%25 = bitcast i32 %24 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 8
VGPRS: 4
Code Size: 20 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL CONST[0..17]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx
1: UIF TEMP[0].xxxx :0
2: MUL TEMP[0], CONST[2], IN[0].xxxx
3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
5: ADD TEMP[0].xyz, TEMP[0], CONST[5]
6: MOV TEMP[1].x, CONST[6].xxxx
7: MOV TEMP[1].y, CONST[7].xxxx
8: MOV TEMP[1].z, CONST[8].xxxx
9: MOV TEMP[2].x, CONST[6].yyyy
10: MOV TEMP[2].y, CONST[7].yyyy
11: MOV TEMP[2].z, CONST[8].yyyy
12: MOV TEMP[3].x, CONST[6].zzzz
13: MOV TEMP[3].y, CONST[7].zzzz
14: MOV TEMP[3].z, CONST[8].zzzz
15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx
16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz
17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz
18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
19: RSQ TEMP[2].x, TEMP[2].xxxx
20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww
22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz
23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
24: RSQ TEMP[3].x, TEMP[3].xxxx
25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz
27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx
29: SQRT TEMP[2].x, TEMP[2].xxxx
30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx
31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
33: MUL TEMP[1], CONST[10], TEMP[0].xxxx
34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1]
35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1]
36: ADD TEMP[0], TEMP[0], CONST[13]
37: ELSE :0
38: MUL TEMP[1], CONST[14], IN[0].xxxx
39: MAD TEMP[1], CONST[15], IN[0].yyyy, TEMP[1]
40: MAD TEMP[1], CONST[16], IN[0].zzzz, TEMP[1]
41: ADD TEMP[0], TEMP[1], CONST[17]
42: ENDIF
43: MOV TEMP[1].xyw, TEMP[0].xyxw
44: RCP TEMP[2].x, TEMP[0].wwww
45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx
46: MOV_SAT TEMP[2].x, TEMP[2].xxxx
47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx
48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww
49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx
50: MOV TEMP[1].z, TEMP[0].xxxx
51: MOV OUT[0], TEMP[1]
52: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%18 = load <16 x i8>, <16 x i8> addrspace(2)* %17, align 16, !tbaa !0
%19 = add i32 %5, %7
%20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19)
%21 = extractelement <4 x float> %20, i32 0
%22 = extractelement <4 x float> %20, i32 1
%23 = extractelement <4 x float> %20, i32 2
%24 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = add i32 %5, %7
%27 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %25, i32 0, i32 %26)
%28 = extractelement <4 x float> %27, i32 0
%29 = extractelement <4 x float> %27, i32 1
%30 = extractelement <4 x float> %27, i32 2
%31 = fcmp une float %16, 0.000000e+00
br i1 %31, label %IF, label %ELSE
IF: ; preds = %main_body
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%72 = fmul float %68, %21
%73 = fmul float %67, %21
%74 = fmul float %66, %21
%75 = fmul float %65, %22
%76 = fadd float %75, %72
%77 = fmul float %64, %22
%78 = fadd float %77, %73
%79 = fmul float %63, %22
%80 = fadd float %79, %74
%81 = fmul float %62, %23
%82 = fadd float %81, %76
%83 = fmul float %61, %23
%84 = fadd float %83, %78
%85 = fmul float %60, %23
%86 = fadd float %85, %80
%87 = fadd float %82, %59
%88 = fadd float %84, %58
%89 = fadd float %86, %57
%90 = fmul float %56, %28
%91 = fmul float %53, %28
%92 = fmul float %50, %28
%93 = fmul float %55, %29
%94 = fadd float %93, %90
%95 = fmul float %52, %29
%96 = fadd float %95, %91
%97 = fmul float %49, %29
%98 = fadd float %97, %92
%99 = fmul float %54, %30
%100 = fadd float %99, %94
%101 = fmul float %51, %30
%102 = fadd float %101, %96
%103 = fmul float %48, %30
%104 = fadd float %103, %98
%105 = fmul float %100, %100
%106 = fmul float %102, %102
%107 = fadd float %106, %105
%108 = fmul float %104, %104
%109 = fadd float %107, %108
%110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109)
%111 = fmul float %100, %110
%112 = fmul float %102, %110
%113 = fmul float %104, %110
%114 = fmul float %87, %13
%115 = fmul float %88, %13
%116 = fmul float %89, %13
%117 = fsub float %71, %114
%118 = fsub float %70, %115
%119 = fsub float %69, %116
%120 = fmul float %117, %117
%121 = fmul float %118, %118
%122 = fadd float %121, %120
%123 = fmul float %119, %119
%124 = fadd float %122, %123
%125 = call float @llvm.AMDGPU.rsq.clamped.f32(float %124)
%126 = fmul float %117, %125
%127 = fmul float %118, %125
%128 = fmul float %119, %125
%129 = fmul float %111, %126
%130 = fmul float %112, %127
%131 = fadd float %130, %129
%132 = fmul float %113, %128
%133 = fadd float %131, %132
%134 = fmul float %133, %133
%135 = fsub float 1.000000e+00, %134
%136 = call float @llvm.sqrt.f32(float %135)
%137 = fmul float %16, %136
%138 = fmul float %111, %137
%139 = fmul float %112, %137
%140 = fmul float %113, %137
%141 = fsub float %87, %138
%142 = fsub float %88, %139
%143 = fsub float %89, %140
%144 = fmul float %47, %141
%145 = fmul float %46, %141
%146 = fmul float %45, %141
%147 = fmul float %44, %141
%148 = fmul float %43, %142
%149 = fadd float %148, %144
%150 = fmul float %42, %142
%151 = fadd float %150, %145
%152 = fmul float %41, %142
%153 = fadd float %152, %146
%154 = fmul float %40, %142
%155 = fadd float %154, %147
%156 = fmul float %39, %143
%157 = fadd float %156, %149
%158 = fmul float %38, %143
%159 = fadd float %158, %151
%160 = fmul float %37, %143
%161 = fadd float %160, %153
%162 = fmul float %36, %143
%163 = fadd float %162, %155
%164 = fadd float %157, %35
%165 = fadd float %159, %34
%166 = fadd float %161, %33
%167 = fadd float %163, %32
br label %ENDIF
ELSE: ; preds = %main_body
%168 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%169 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%172 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%173 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%177 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%184 = fmul float %183, %21
%185 = fmul float %182, %21
%186 = fmul float %181, %21
%187 = fmul float %180, %21
%188 = fmul float %179, %22
%189 = fadd float %188, %184
%190 = fmul float %178, %22
%191 = fadd float %190, %185
%192 = fmul float %177, %22
%193 = fadd float %192, %186
%194 = fmul float %176, %22
%195 = fadd float %194, %187
%196 = fmul float %175, %23
%197 = fadd float %196, %189
%198 = fmul float %174, %23
%199 = fadd float %198, %191
%200 = fmul float %173, %23
%201 = fadd float %200, %193
%202 = fmul float %172, %23
%203 = fadd float %202, %195
%204 = fadd float %197, %171
%205 = fadd float %199, %170
%206 = fadd float %201, %169
%207 = fadd float %203, %168
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp.0 = phi float [ %164, %IF ], [ %204, %ELSE ]
%temp1.0 = phi float [ %165, %IF ], [ %205, %ELSE ]
%temp2.0 = phi float [ %166, %IF ], [ %206, %ELSE ]
%temp3.0 = phi float [ %167, %IF ], [ %207, %ELSE ]
%208 = fdiv float 1.000000e+00, %temp3.0
%209 = fmul float %14, %208
%210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00)
%211 = fadd float %temp2.0, %210
%212 = fsub float -0.000000e+00, %temp3.0
%213 = call float @llvm.maxnum.f32(float %211, float %212)
%214 = call float @llvm.AMDGPU.lrp(float %15, float %213, float %211)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %214, float %temp3.0)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v4, s10, v0 ; 4A08000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v4, s[4:7], 0 idxen ; E00C2000 80010004
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[3:6], v4, s[8:11], 0 idxen ; E00C2000 80020304
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880
s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s5, s[0:3], 0x47 ; C2028147
s_buffer_load_dword s8, s[0:3], 0x42 ; C2040142
s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143
s_buffer_load_dword s10, s[0:3], 0x44 ; C2050144
s_buffer_load_dword s11, s[0:3], 0x45 ; C2058145
s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146
s_buffer_load_dword s13, s[0:3], 0x3d ; C206813D
s_buffer_load_dword s14, s[0:3], 0x3e ; C207013E
s_buffer_load_dword s15, s[0:3], 0x3f ; C207813F
s_buffer_load_dword s16, s[0:3], 0x40 ; C2080140
s_buffer_load_dword s17, s[0:3], 0x41 ; C2088141
s_buffer_load_dword s18, s[0:3], 0x38 ; C2090138
s_buffer_load_dword s19, s[0:3], 0x39 ; C2098139
s_buffer_load_dword s20, s[0:3], 0x3a ; C20A013A
s_buffer_load_dword s21, s[0:3], 0x3b ; C20A813B
s_buffer_load_dword s22, s[0:3], 0x3c ; C20B013C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s18, v0 ; 100C0012
v_mul_f32_e32 v7, s19, v0 ; 100E0013
v_mul_f32_e32 v9, s20, v0 ; 10120014
v_mul_f32_e32 v10, s21, v0 ; 10140015
v_mac_f32_e32 v6, s22, v1 ; 3E0C0216
v_mac_f32_e32 v7, s13, v1 ; 3E0E020D
v_mac_f32_e32 v9, s14, v1 ; 3E12020E
v_mac_f32_e32 v10, s15, v1 ; 3E14020F
v_mac_f32_e32 v6, s16, v2 ; 3E0C0410
v_mac_f32_e32 v7, s17, v2 ; 3E0E0411
v_mac_f32_e32 v9, s8, v2 ; 3E120408
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_add_f32_e32 v6, s10, v6 ; 060C0C0A
v_add_f32_e32 v8, s11, v7 ; 06100E0B
v_add_f32_e32 v9, s12, v9 ; 0612120C
v_add_f32_e32 v7, s5, v10 ; 060E1405
s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[6:7] ; 89FE067E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s9, s[0:3], 0x37 ; C2048137
s_buffer_load_dword s10, s[0:3], 0x32 ; C2050132
s_buffer_load_dword s11, s[0:3], 0x33 ; C2058133
s_buffer_load_dword s12, s[0:3], 0x34 ; C2060134
s_buffer_load_dword s13, s[0:3], 0x35 ; C2068135
s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136
s_buffer_load_dword s15, s[0:3], 0x2d ; C207812D
s_buffer_load_dword s16, s[0:3], 0x2e ; C208012E
s_buffer_load_dword s17, s[0:3], 0x2f ; C208812F
s_buffer_load_dword s18, s[0:3], 0x30 ; C2090130
s_buffer_load_dword s19, s[0:3], 0x31 ; C2098131
s_buffer_load_dword s20, s[0:3], 0x28 ; C20A0128
s_buffer_load_dword s21, s[0:3], 0x29 ; C20A8129
s_buffer_load_dword s22, s[0:3], 0x2a ; C20B012A
s_buffer_load_dword s23, s[0:3], 0x2b ; C20B812B
s_buffer_load_dword s24, s[0:3], 0x2c ; C20C012C
s_buffer_load_dword s25, s[0:3], 0x1d ; C20C811D
s_buffer_load_dword s26, s[0:3], 0x1e ; C20D011E
s_buffer_load_dword s27, s[0:3], 0x20 ; C20D8120
s_buffer_load_dword s28, s[0:3], 0x21 ; C20E0121
s_buffer_load_dword s29, s[0:3], 0x22 ; C20E8122
s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116
s_buffer_load_dword s31, s[0:3], 0x18 ; C20F8118
s_buffer_load_dword s32, s[0:3], 0x19 ; C2100119
s_buffer_load_dword s33, s[0:3], 0x1a ; C210811A
s_buffer_load_dword s34, s[0:3], 0x1c ; C211011C
s_buffer_load_dword s35, s[0:3], 0x10 ; C2118110
s_buffer_load_dword s36, s[0:3], 0x11 ; C2120111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s27, v3 ; 100C061B
v_mac_f32_e32 v6, s28, v4 ; 3E0C081C
v_mac_f32_e32 v6, s29, v5 ; 3E0C0A1D
s_buffer_load_dword s27, s[0:3], 0x12 ; C20D8112
v_mul_f32_e32 v7, s31, v3 ; 100E061F
v_mac_f32_e32 v7, s32, v4 ; 3E0E0820
v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21
v_mul_f32_e32 v3, s34, v3 ; 10060622
v_mac_f32_e32 v3, s25, v4 ; 3E060819
v_mac_f32_e32 v3, s26, v5 ; 3E060A1A
s_buffer_load_dword s25, s[0:3], 0x14 ; C20C8114
s_buffer_load_dword s26, s[0:3], 0x15 ; C20D0115
s_buffer_load_dword s28, s[0:3], 0x9 ; C20E0109
s_buffer_load_dword s29, s[0:3], 0xa ; C20E810A
s_buffer_load_dword s31, s[0:3], 0xc ; C20F810C
s_buffer_load_dword s32, s[0:3], 0xd ; C210010D
s_buffer_load_dword s33, s[0:3], 0xe ; C210810E
s_buffer_load_dword s34, s[0:3], 0x0 ; C2110100
v_mul_f32_e32 v4, v7, v7 ; 10080F07
v_mac_f32_e32 v4, v3, v3 ; 3E080703
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s28, v0 ; 100A001C
v_mul_f32_e32 v8, s29, v0 ; 1010001D
s_buffer_load_dword s28, s[0:3], 0x1 ; C20E0101
v_mac_f32_e32 v5, s32, v1 ; 3E0A0220
v_mac_f32_e32 v8, s33, v1 ; 3E100221
v_mac_f32_e32 v5, s36, v2 ; 3E0A0424
v_mac_f32_e32 v8, s27, v2 ; 3E10041B
v_add_f32_e32 v5, s26, v5 ; 060A0A1A
v_add_f32_e32 v8, s30, v8 ; 0610101E
s_buffer_load_dword s26, s[0:3], 0x2 ; C20D0102
s_buffer_load_dword s27, s[0:3], 0x3 ; C20D8103
s_buffer_load_dword s29, s[0:3], 0x8 ; C20E8108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v9, s27 ; 7E12021B
v_mad_f32 v10, -v5, v9, s28 ; D282000A 20721305
v_mul_f32_e32 v0, s29, v0 ; 1000001D
v_mac_f32_e32 v0, s31, v1 ; 3E00021F
v_mac_f32_e32 v0, s35, v2 ; 3E000423
v_add_f32_e32 v0, s25, v0 ; 06000019
v_mad_f32 v1, -v0, v9, s34 ; D2820001 208A1300
v_mad_f32 v2, -v8, v9, s26 ; D2820002 206A1308
v_mul_f32_e32 v9, v1, v1 ; 10120301
v_mac_f32_e32 v9, v10, v10 ; 3E12150A
v_mac_f32_e32 v9, v2, v2 ; 3E120502
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v7, v4, v7 ; 100E0F04
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v1, v9, v1 ; 10020309
v_mul_f32_e32 v6, v9, v10 ; 100C1509
v_mul_f32_e32 v2, v9, v2 ; 10040509
v_mul_f32_e32 v1, v1, v7 ; 10020F01
v_mac_f32_e32 v1, v6, v3 ; 3E020706
v_mac_f32_e32 v1, v2, v4 ; 3E020902
v_mad_f32 v1, -v1, v1, 1.0 ; D2820001 23CA0301
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mad_f32 v0, -v7, v1, v0 ; D2820000 24020307
v_mad_f32 v2, -v3, v1, v5 ; D2820002 24160303
v_mad_f32 v1, -v4, v1, v8 ; D2820001 24220304
v_mul_f32_e32 v3, s20, v0 ; 10060014
v_mul_f32_e32 v4, s21, v0 ; 10080015
v_mul_f32_e32 v5, s22, v0 ; 100A0016
v_mul_f32_e32 v0, s23, v0 ; 10000017
v_mac_f32_e32 v3, s24, v2 ; 3E060418
v_mac_f32_e32 v4, s15, v2 ; 3E08040F
v_mac_f32_e32 v5, s16, v2 ; 3E0A0410
v_mac_f32_e32 v0, s17, v2 ; 3E000411
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v4, s19, v1 ; 3E080213
v_mac_f32_e32 v5, s10, v1 ; 3E0A020A
v_mac_f32_e32 v0, s11, v1 ; 3E00020B
v_add_f32_e32 v6, s12, v3 ; 060C060C
v_add_f32_e32 v8, s13, v4 ; 0610080D
v_add_f32_e32 v9, s14, v5 ; 06120A0E
v_add_f32_e32 v7, s9, v0 ; 060E0009
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_rcp_f32_e32 v0, v7 ; 7E005507
v_sub_f32_e64 v1, 1.0, s5 ; D2080001 00000AF2
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_add_f32_e32 v0, v0, v9 ; 06001300
v_max_f32_e64 v2, v0, -v7 ; D2200002 40020F00
v_mul_f32_e32 v0, v0, v1 ; 10000300
v_mac_f32_e32 v0, s5, v2 ; 3E000405
exp 15, 12, 0, 1, 0, v6, v8, v0, v7 ; F80008CF 07000806
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 744 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL OUT[0], COLOR
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV OUT[0], IMM[0].xxxx
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%23 = bitcast i32 %22 to float
%24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%25 = bitcast i32 %24 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 8
VGPRS: 4
Code Size: 20 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..18]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx
1: UIF TEMP[0].xxxx :0
2: MUL TEMP[0], CONST[2], IN[0].xxxx
3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
5: ADD TEMP[0].xyz, TEMP[0], CONST[5]
6: MOV TEMP[1].x, CONST[6].xxxx
7: MOV TEMP[1].y, CONST[7].xxxx
8: MOV TEMP[1].z, CONST[8].xxxx
9: MOV TEMP[2].x, CONST[6].yyyy
10: MOV TEMP[2].y, CONST[7].yyyy
11: MOV TEMP[2].z, CONST[8].yyyy
12: MOV TEMP[3].x, CONST[6].zzzz
13: MOV TEMP[3].y, CONST[7].zzzz
14: MOV TEMP[3].z, CONST[8].zzzz
15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx
16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz
17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz
18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
19: RSQ TEMP[2].x, TEMP[2].xxxx
20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww
22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz
23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
24: RSQ TEMP[3].x, TEMP[3].xxxx
25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz
27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx
29: SQRT TEMP[2].x, TEMP[2].xxxx
30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx
31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
33: MUL TEMP[1], CONST[10], TEMP[0].xxxx
34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1]
35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1]
36: ADD TEMP[0], TEMP[0], CONST[13]
37: ELSE :0
38: MUL TEMP[1], CONST[15], IN[0].xxxx
39: MAD TEMP[1], CONST[16], IN[0].yyyy, TEMP[1]
40: MAD TEMP[1], CONST[17], IN[0].zzzz, TEMP[1]
41: ADD TEMP[0], TEMP[1], CONST[18]
42: ENDIF
43: MOV TEMP[1].xyw, TEMP[0].xyxw
44: RCP TEMP[2].x, TEMP[0].wwww
45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx
46: MOV_SAT TEMP[2].x, TEMP[2].xxxx
47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx
48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww
49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx
50: MOV TEMP[1].z, TEMP[0].xxxx
51: MAD TEMP[0].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
52: MOV OUT[1], TEMP[0]
53: MOV OUT[0], TEMP[1]
54: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0
%23 = add i32 %5, %7
%24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23)
%25 = extractelement <4 x float> %24, i32 0
%26 = extractelement <4 x float> %24, i32 1
%27 = extractelement <4 x float> %24, i32 2
%28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0
%30 = add i32 %5, %7
%31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %29, i32 0, i32 %30)
%32 = extractelement <4 x float> %31, i32 0
%33 = extractelement <4 x float> %31, i32 1
%34 = extractelement <4 x float> %31, i32 2
%35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = add i32 %5, %7
%38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = fcmp une float %16, 0.000000e+00
br i1 %41, label %IF, label %ELSE
IF: ; preds = %main_body
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%82 = fmul float %78, %25
%83 = fmul float %77, %25
%84 = fmul float %76, %25
%85 = fmul float %75, %26
%86 = fadd float %85, %82
%87 = fmul float %74, %26
%88 = fadd float %87, %83
%89 = fmul float %73, %26
%90 = fadd float %89, %84
%91 = fmul float %72, %27
%92 = fadd float %91, %86
%93 = fmul float %71, %27
%94 = fadd float %93, %88
%95 = fmul float %70, %27
%96 = fadd float %95, %90
%97 = fadd float %92, %69
%98 = fadd float %94, %68
%99 = fadd float %96, %67
%100 = fmul float %66, %32
%101 = fmul float %63, %32
%102 = fmul float %60, %32
%103 = fmul float %65, %33
%104 = fadd float %103, %100
%105 = fmul float %62, %33
%106 = fadd float %105, %101
%107 = fmul float %59, %33
%108 = fadd float %107, %102
%109 = fmul float %64, %34
%110 = fadd float %109, %104
%111 = fmul float %61, %34
%112 = fadd float %111, %106
%113 = fmul float %58, %34
%114 = fadd float %113, %108
%115 = fmul float %110, %110
%116 = fmul float %112, %112
%117 = fadd float %116, %115
%118 = fmul float %114, %114
%119 = fadd float %117, %118
%120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119)
%121 = fmul float %110, %120
%122 = fmul float %112, %120
%123 = fmul float %114, %120
%124 = fmul float %97, %13
%125 = fmul float %98, %13
%126 = fmul float %99, %13
%127 = fsub float %81, %124
%128 = fsub float %80, %125
%129 = fsub float %79, %126
%130 = fmul float %127, %127
%131 = fmul float %128, %128
%132 = fadd float %131, %130
%133 = fmul float %129, %129
%134 = fadd float %132, %133
%135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134)
%136 = fmul float %127, %135
%137 = fmul float %128, %135
%138 = fmul float %129, %135
%139 = fmul float %121, %136
%140 = fmul float %122, %137
%141 = fadd float %140, %139
%142 = fmul float %123, %138
%143 = fadd float %141, %142
%144 = fmul float %143, %143
%145 = fsub float 1.000000e+00, %144
%146 = call float @llvm.sqrt.f32(float %145)
%147 = fmul float %16, %146
%148 = fmul float %121, %147
%149 = fmul float %122, %147
%150 = fmul float %123, %147
%151 = fsub float %97, %148
%152 = fsub float %98, %149
%153 = fsub float %99, %150
%154 = fmul float %57, %151
%155 = fmul float %56, %151
%156 = fmul float %55, %151
%157 = fmul float %54, %151
%158 = fmul float %53, %152
%159 = fadd float %158, %154
%160 = fmul float %52, %152
%161 = fadd float %160, %155
%162 = fmul float %51, %152
%163 = fadd float %162, %156
%164 = fmul float %50, %152
%165 = fadd float %164, %157
%166 = fmul float %49, %153
%167 = fadd float %166, %159
%168 = fmul float %48, %153
%169 = fadd float %168, %161
%170 = fmul float %47, %153
%171 = fadd float %170, %163
%172 = fmul float %46, %153
%173 = fadd float %172, %165
%174 = fadd float %167, %45
%175 = fadd float %169, %44
%176 = fadd float %171, %43
%177 = fadd float %173, %42
br label %ENDIF
ELSE: ; preds = %main_body
%178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%186 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%188 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%189 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%191 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%192 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%193 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%194 = fmul float %193, %25
%195 = fmul float %192, %25
%196 = fmul float %191, %25
%197 = fmul float %190, %25
%198 = fmul float %189, %26
%199 = fadd float %198, %194
%200 = fmul float %188, %26
%201 = fadd float %200, %195
%202 = fmul float %187, %26
%203 = fadd float %202, %196
%204 = fmul float %186, %26
%205 = fadd float %204, %197
%206 = fmul float %185, %27
%207 = fadd float %206, %199
%208 = fmul float %184, %27
%209 = fadd float %208, %201
%210 = fmul float %183, %27
%211 = fadd float %210, %203
%212 = fmul float %182, %27
%213 = fadd float %212, %205
%214 = fadd float %207, %181
%215 = fadd float %209, %180
%216 = fadd float %211, %179
%217 = fadd float %213, %178
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp.0 = phi float [ %174, %IF ], [ %214, %ELSE ]
%temp1.0 = phi float [ %175, %IF ], [ %215, %ELSE ]
%temp2.0 = phi float [ %176, %IF ], [ %216, %ELSE ]
%temp3.0 = phi float [ %177, %IF ], [ %217, %ELSE ]
%218 = fdiv float 1.000000e+00, %temp3.0
%219 = fmul float %14, %218
%220 = call float @llvm.AMDIL.clamp.(float %219, float 0.000000e+00, float 1.000000e+00)
%221 = fadd float %temp2.0, %220
%222 = fsub float -0.000000e+00, %temp3.0
%223 = call float @llvm.maxnum.f32(float %221, float %222)
%224 = call float @llvm.AMDGPU.lrp(float %15, float %223, float %221)
%225 = fmul float %39, %17
%226 = fadd float %225, %19
%227 = fmul float %40, %18
%228 = fadd float %227, %20
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %226, float %228, float %temp2.0, float %temp3.0)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %224, float %temp3.0)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700
buffer_load_format_xyzw v[0:3], v0, s[16:19], 0 idxen ; E00C2000 80040000
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s12, s[0:3], 0x3a ; C206013A
s_buffer_load_dword s11, s[0:3], 0x3b ; C205813B
s_waitcnt vmcnt(1) ; BF8C0771
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880
s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B
s_buffer_load_dword s8, s[0:3], 0x46 ; C2040146
s_buffer_load_dword s9, s[0:3], 0x47 ; C2048147
s_buffer_load_dword s10, s[0:3], 0x48 ; C2050148
s_buffer_load_dword s13, s[0:3], 0x49 ; C2068149
s_buffer_load_dword s14, s[0:3], 0x4a ; C207014A
s_buffer_load_dword s15, s[0:3], 0x41 ; C2078141
s_buffer_load_dword s16, s[0:3], 0x42 ; C2080142
s_buffer_load_dword s17, s[0:3], 0x43 ; C2088143
s_buffer_load_dword s18, s[0:3], 0x44 ; C2090144
s_buffer_load_dword s19, s[0:3], 0x45 ; C2098145
s_buffer_load_dword s20, s[0:3], 0x3c ; C20A013C
s_buffer_load_dword s21, s[0:3], 0x3d ; C20A813D
s_buffer_load_dword s22, s[0:3], 0x3e ; C20B013E
s_buffer_load_dword s23, s[0:3], 0x3f ; C20B813F
s_buffer_load_dword s24, s[0:3], 0x40 ; C20C0140
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v2, s20, v4 ; 10040814
v_mul_f32_e32 v3, s21, v4 ; 10060815
v_mul_f32_e32 v12, s22, v4 ; 10180816
v_mul_f32_e32 v13, s23, v4 ; 101A0817
v_mac_f32_e32 v2, s24, v5 ; 3E040A18
v_mac_f32_e32 v3, s15, v5 ; 3E060A0F
v_mac_f32_e32 v12, s16, v5 ; 3E180A10
v_mac_f32_e32 v13, s17, v5 ; 3E1A0A11
v_mac_f32_e32 v2, s18, v6 ; 3E040C12
v_mac_f32_e32 v3, s19, v6 ; 3E060C13
v_mac_f32_e32 v12, s8, v6 ; 3E180C08
v_mac_f32_e32 v13, s9, v6 ; 3E1A0C09
v_add_f32_e32 v10, s10, v2 ; 0614040A
v_add_f32_e32 v11, s13, v3 ; 0616060D
v_add_f32_e32 v12, s14, v12 ; 0618180E
v_add_f32_e32 v13, s5, v13 ; 061A1A05
s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506
s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s8, s[0:3], 0x38 ; C2040138
s_buffer_load_dword s5, s[0:3], 0x39 ; C2028139
v_mov_b32_e32 v2, s12 ; 7E04020C
v_mov_b32_e32 v3, s11 ; 7E06020B
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[6:7] ; 89FE067E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s11, s[0:3], 0x37 ; C2058137
s_buffer_load_dword s12, s[0:3], 0x32 ; C2060132
s_buffer_load_dword s13, s[0:3], 0x33 ; C2068133
s_buffer_load_dword s14, s[0:3], 0x34 ; C2070134
s_buffer_load_dword s15, s[0:3], 0x35 ; C2078135
s_buffer_load_dword s16, s[0:3], 0x36 ; C2080136
s_buffer_load_dword s17, s[0:3], 0x2d ; C208812D
s_buffer_load_dword s18, s[0:3], 0x2e ; C209012E
s_buffer_load_dword s19, s[0:3], 0x2f ; C209812F
s_buffer_load_dword s20, s[0:3], 0x30 ; C20A0130
s_buffer_load_dword s21, s[0:3], 0x31 ; C20A8131
s_buffer_load_dword s22, s[0:3], 0x28 ; C20B0128
s_buffer_load_dword s23, s[0:3], 0x29 ; C20B8129
s_buffer_load_dword s24, s[0:3], 0x2a ; C20C012A
s_buffer_load_dword s25, s[0:3], 0x2b ; C20C812B
s_buffer_load_dword s26, s[0:3], 0x2c ; C20D012C
s_buffer_load_dword s27, s[0:3], 0x1d ; C20D811D
s_buffer_load_dword s28, s[0:3], 0x1e ; C20E011E
s_buffer_load_dword s29, s[0:3], 0x20 ; C20E8120
s_buffer_load_dword s30, s[0:3], 0x21 ; C20F0121
s_buffer_load_dword s31, s[0:3], 0x22 ; C20F8122
s_buffer_load_dword s32, s[0:3], 0x16 ; C2100116
s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118
s_buffer_load_dword s34, s[0:3], 0x19 ; C2110119
s_buffer_load_dword s35, s[0:3], 0x1a ; C211811A
s_buffer_load_dword s36, s[0:3], 0x1c ; C212011C
s_buffer_load_dword s37, s[0:3], 0x10 ; C2128110
s_buffer_load_dword s38, s[0:3], 0x11 ; C2130111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v10, s29, v7 ; 10140E1D
v_mac_f32_e32 v10, s30, v8 ; 3E14101E
v_mac_f32_e32 v10, s31, v9 ; 3E14121F
s_buffer_load_dword s29, s[0:3], 0x12 ; C20E8112
v_mul_f32_e32 v11, s33, v7 ; 10160E21
v_mac_f32_e32 v11, s34, v8 ; 3E161022
v_mac_f32_e32 v11, s35, v9 ; 3E161223
v_mul_f32_e32 v7, s36, v7 ; 100E0E24
v_mac_f32_e32 v7, s27, v8 ; 3E0E101B
v_mac_f32_e32 v7, s28, v9 ; 3E0E121C
s_buffer_load_dword s27, s[0:3], 0x14 ; C20D8114
s_buffer_load_dword s28, s[0:3], 0x15 ; C20E0115
s_buffer_load_dword s30, s[0:3], 0x9 ; C20F0109
s_buffer_load_dword s31, s[0:3], 0xa ; C20F810A
s_buffer_load_dword s33, s[0:3], 0xc ; C210810C
s_buffer_load_dword s34, s[0:3], 0xd ; C211010D
s_buffer_load_dword s35, s[0:3], 0xe ; C211810E
s_buffer_load_dword s36, s[0:3], 0x0 ; C2120100
v_mul_f32_e32 v8, v11, v11 ; 1010170B
v_mac_f32_e32 v8, v7, v7 ; 3E100F07
v_mac_f32_e32 v8, v10, v10 ; 3E10150A
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v9, s30, v4 ; 1012081E
v_mul_f32_e32 v12, s31, v4 ; 1018081F
s_buffer_load_dword s30, s[0:3], 0x1 ; C20F0101
v_mac_f32_e32 v9, s34, v5 ; 3E120A22
v_mac_f32_e32 v12, s35, v5 ; 3E180A23
v_mac_f32_e32 v9, s38, v6 ; 3E120C26
v_mac_f32_e32 v12, s29, v6 ; 3E180C1D
v_add_f32_e32 v9, s28, v9 ; 0612121C
v_add_f32_e32 v12, s32, v12 ; 06181820
s_buffer_load_dword s28, s[0:3], 0x2 ; C20E0102
s_buffer_load_dword s29, s[0:3], 0x3 ; C20E8103
s_buffer_load_dword s31, s[0:3], 0x8 ; C20F8108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s29 ; 7E1A021D
v_mad_f32 v14, -v9, v13, s30 ; D282000E 207A1B09
v_mul_f32_e32 v4, s31, v4 ; 1008081F
v_mac_f32_e32 v4, s33, v5 ; 3E080A21
v_mac_f32_e32 v4, s37, v6 ; 3E080C25
v_add_f32_e32 v4, s27, v4 ; 0608081B
v_mad_f32 v5, -v4, v13, s36 ; D2820005 20921B04
v_mad_f32 v6, -v12, v13, s28 ; D2820006 20721B0C
v_mul_f32_e32 v13, v5, v5 ; 101A0B05
v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E
v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_f32_e32 v8, v8, v10 ; 10101508
v_mul_f32_e32 v5, v13, v5 ; 100A0B0D
v_mul_f32_e32 v10, v13, v14 ; 10141D0D
v_mul_f32_e32 v6, v13, v6 ; 100C0D0D
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mac_f32_e32 v5, v10, v7 ; 3E0A0F0A
v_mac_f32_e32 v5, v6, v8 ; 3E0A1106
v_mad_f32 v5, -v5, v5, 1.0 ; D2820005 23CA0B05
v_sqrt_f32_e32 v5, v5 ; 7E0A6705
v_mul_f32_e32 v5, s4, v5 ; 100A0A04
v_mad_f32 v4, -v11, v5, v4 ; D2820004 24120B0B
v_mad_f32 v6, -v7, v5, v9 ; D2820006 24260B07
v_mad_f32 v5, -v8, v5, v12 ; D2820005 24320B08
v_mul_f32_e32 v7, s22, v4 ; 100E0816
v_mul_f32_e32 v8, s23, v4 ; 10100817
v_mul_f32_e32 v9, s24, v4 ; 10120818
v_mul_f32_e32 v4, s25, v4 ; 10080819
v_mac_f32_e32 v7, s26, v6 ; 3E0E0C1A
v_mac_f32_e32 v8, s17, v6 ; 3E100C11
v_mac_f32_e32 v9, s18, v6 ; 3E120C12
v_mac_f32_e32 v4, s19, v6 ; 3E080C13
v_mac_f32_e32 v7, s20, v5 ; 3E0E0A14
v_mac_f32_e32 v8, s21, v5 ; 3E100A15
v_mac_f32_e32 v9, s12, v5 ; 3E120A0C
v_mac_f32_e32 v4, s13, v5 ; 3E080A0D
v_add_f32_e32 v10, s14, v7 ; 06140E0E
v_add_f32_e32 v11, s15, v8 ; 0616100F
v_add_f32_e32 v12, s16, v9 ; 06181210
v_add_f32_e32 v13, s11, v4 ; 061A080B
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_rcp_f32_e32 v4, v13 ; 7E08550D
v_sub_f32_e64 v5, 1.0, s9 ; D2080005 000012F2
v_mul_f32_e32 v4, s10, v4 ; 1008080A
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_add_f32_e32 v4, v4, v12 ; 06081904
v_max_f32_e64 v6, v4, -v13 ; D2200006 40021B04
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mac_f32_e32 v4, s9, v6 ; 3E080C09
v_mac_f32_e32 v2, s8, v0 ; 3E040008
v_mac_f32_e32 v3, s5, v1 ; 3E060205
exp 15, 32, 0, 0, 0, v2, v3, v12, v13 ; F800020F 0D0C0302
exp 15, 12, 0, 1, 0, v10, v11, v4, v13 ; F80008CF 0D040B0A
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 16
Code Size: 800 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], POSITION, LINEAR
DCL IN[1], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 3D, FLOAT
DCL CONST[3]
DCL CONST[0]
DCL TEMP[0]
DCL TEMP[1..2], LOCAL
IMM[0] FLT32 { 0.2500, 0.9375, 0.0100, 1.0000}
IMM[1] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0], IN[0]
1: MAD TEMP[0].y, IN[0], CONST[3].xxxx, CONST[3].yyyy
2: MUL TEMP[1].xy, TEMP[0].xyyy, IMM[0].xxxx
3: MOV TEMP[2].xy, IN[1].xyyy
4: TEX TEMP[2].w, TEMP[2], SAMP[0], 2D
5: MUL TEMP[2].x, TEMP[2].wwww, CONST[0].wwww
6: MUL TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy
7: MOV TEMP[1].z, TEMP[2].xxxx
8: MOV TEMP[1].xyz, TEMP[1].xyzz
9: TEX TEMP[1].w, TEMP[1], SAMP[1], 3D
10: FSLT TEMP[1].x, TEMP[1].wwww, IMM[0].zzzz
11: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww
12: KILL_IF -TEMP[1].xxxx
13: MOV OUT[0], IMM[1].xxxx
14: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%27 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%28 = load <32 x i8>, <32 x i8> addrspace(2)* %27, align 32, !tbaa !0
%29 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%32 = bitcast <8 x i32> addrspace(2)* %31 to <32 x i8> addrspace(2)*
%33 = load <32 x i8>, <32 x i8> addrspace(2)* %32, align 32, !tbaa !0
%34 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%35 = bitcast <4 x i32> addrspace(2)* %34 to <16 x i8> addrspace(2)*
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%39 = fmul float %25, %15
%40 = fadd float %39, %26
%41 = fmul float %14, 2.500000e-01
%42 = fmul float %40, 2.500000e-01
%43 = bitcast float %37 to i32
%44 = bitcast float %38 to i32
%45 = insertelement <2 x i32> undef, i32 %43, i32 0
%46 = insertelement <2 x i32> %45, i32 %44, i32 1
%47 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %46, <32 x i8> %28, <16 x i8> %30, i32 2)
%48 = extractelement <4 x float> %47, i32 3
%49 = fmul float %48, %24
%50 = fmul float %49, 9.375000e-01
%51 = bitcast float %41 to i32
%52 = bitcast float %42 to i32
%53 = bitcast float %50 to i32
%54 = insertelement <4 x i32> undef, i32 %51, i32 0
%55 = insertelement <4 x i32> %54, i32 %52, i32 1
%56 = insertelement <4 x i32> %55, i32 %53, i32 2
%57 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %56, <32 x i8> %33, <16 x i8> %36, i32 3)
%58 = extractelement <4 x float> %57, i32 3
%59 = fcmp olt float %58, 0x3F847AE140000000
%60 = select i1 %59, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %60)
%61 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%62 = bitcast i32 %61 to float
%63 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%64 = bitcast i32 %63 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0xd ; C204010D
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_buffer_load_dword s10, s[0:3], 0xc ; C205010C
s_mov_b32 m0, s9 ; BEFC0309
s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103
v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000
v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001
s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504
s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708
v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100
v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[12:15] ; F0800800 00640004
v_mov_b32_e32 v1, s8 ; 7E020208
v_mac_f32_e32 v1, s10, v3 ; 3E02060A
v_mov_b32_e32 v3, 0x3e800000 ; 7E0602FF 3E800000
v_mul_f32_e32 v4, v3, v2 ; 10080503
v_mul_f32_e32 v5, v3, v1 ; 100A0303
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v0, s0, v0 ; 10000000
v_mul_f32_e32 v6, 0x3f700000, v0 ; 100C00FF 3F700000
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[4:7], s[28:35], s[24:27] ; F0800800 00C70004
v_mov_b32_e32 v1, 0x3c23d70a ; 7E0202FF 3C23D70A
s_waitcnt vmcnt(0) ; BF8C0770
v_cmp_gt_f32_e32 vcc, v1, v0 ; 7C080101
v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 8
Code Size: 168 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..8]
DCL TEMP[0..3], LOCAL
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1].x, TEMP[0].yyyy, CONST[0].xxxx
5: MUL TEMP[2], CONST[1], TEMP[0].xxxx
6: MAD TEMP[2], CONST[2], TEMP[1].xxxx, TEMP[2]
7: ADD TEMP[2], TEMP[2], -CONST[3]
8: ADD TEMP[2].xyz, TEMP[2], CONST[4]
9: MOV TEMP[3].xy, TEMP[2].xyxx
10: MOV TEMP[3].z, -TEMP[2].zzzz
11: MUL TEMP[2], CONST[1], TEMP[0].xxxx
12: MAD TEMP[1], CONST[2], TEMP[1].xxxx, TEMP[2]
13: ADD TEMP[1], TEMP[1], CONST[3]
14: ADD TEMP[1].z, TEMP[1], CONST[4]
15: MOV TEMP[3].w, -TEMP[1].zzzz
16: MOV TEMP[1].xy, IN[2].xyxx
17: MOV TEMP[1].zw, IN[1].yyxy
18: MOV TEMP[2].x, IN[1].zzzz
19: MOV OUT[3], TEMP[2]
20: MOV OUT[2], TEMP[1]
21: MOV OUT[1], TEMP[3]
22: MOV OUT[0], TEMP[0]
23: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0
%45 = add i32 %5, %7
%46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45)
%47 = extractelement <4 x float> %46, i32 0
%48 = extractelement <4 x float> %46, i32 1
%49 = extractelement <4 x float> %46, i32 2
%50 = extractelement <4 x float> %46, i32 3
%51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0
%53 = add i32 %5, %7
%54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53)
%55 = extractelement <4 x float> %54, i32 0
%56 = extractelement <4 x float> %54, i32 1
%57 = extractelement <4 x float> %54, i32 2
%58 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0
%60 = add i32 %5, %7
%61 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %59, i32 0, i32 %60)
%62 = extractelement <4 x float> %61, i32 0
%63 = extractelement <4 x float> %61, i32 1
%64 = fmul float %27, %47
%65 = fmul float %28, %47
%66 = fmul float %29, %47
%67 = fmul float %30, %47
%68 = fmul float %31, %48
%69 = fadd float %68, %64
%70 = fmul float %32, %48
%71 = fadd float %70, %65
%72 = fmul float %33, %48
%73 = fadd float %72, %66
%74 = fmul float %34, %48
%75 = fadd float %74, %67
%76 = fmul float %35, %49
%77 = fadd float %76, %69
%78 = fmul float %36, %49
%79 = fadd float %78, %71
%80 = fmul float %37, %49
%81 = fadd float %80, %73
%82 = fmul float %38, %49
%83 = fadd float %82, %75
%84 = fmul float %39, %50
%85 = fadd float %84, %77
%86 = fmul float %40, %50
%87 = fadd float %86, %79
%88 = fmul float %41, %50
%89 = fadd float %88, %81
%90 = fmul float %42, %50
%91 = fadd float %90, %83
%92 = fmul float %87, %13
%93 = fmul float %14, %85
%94 = fmul float %15, %85
%95 = fmul float %16, %85
%96 = fmul float %18, %92
%97 = fadd float %96, %93
%98 = fmul float %19, %92
%99 = fadd float %98, %94
%100 = fmul float %20, %92
%101 = fadd float %100, %95
%102 = fsub float %97, %21
%103 = fsub float %99, %22
%104 = fsub float %101, %23
%105 = fadd float %102, %24
%106 = fadd float %103, %25
%107 = fadd float %104, %26
%108 = fsub float -0.000000e+00, %107
%109 = fmul float %15, %85
%110 = fmul float %16, %85
%111 = fmul float %17, %85
%112 = fmul float %20, %92
%113 = fadd float %112, %110
%114 = fadd float %113, %23
%115 = fadd float %114, %26
%116 = fsub float -0.000000e+00, %115
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %105, float %106, float %108, float %116)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %62, float %63, float %55, float %56)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %57, float %109, float %110, float %111)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %85, float %87, float %89, float %91)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0xe ; C204010E
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110
s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111
s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112
s_buffer_load_dword s7, s[0:3], 0x14 ; C2038114
s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115
s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116
s_buffer_load_dword s11, s[0:3], 0x17 ; C2058117
s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118
s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119
s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100
s_buffer_load_dword s15, s[0:3], 0x4 ; C2078104
s_buffer_load_dword s16, s[0:3], 0x5 ; C2080105
s_buffer_load_dword s17, s[0:3], 0x6 ; C2088106
s_buffer_load_dword s18, s[0:3], 0x7 ; C2090107
s_buffer_load_dword s19, s[0:3], 0x8 ; C2098108
s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109
s_buffer_load_dword s21, s[0:3], 0xa ; C20A810A
s_buffer_load_dword s22, s[0:3], 0xc ; C20B010C
s_buffer_load_dword s23, s[0:3], 0xd ; C20B810D
v_mov_b32_e32 v0, s8 ; 7E000208
s_waitcnt vmcnt(0) ; BF8C0770
v_mov_b32_e32 v10, s8 ; 7E140208
s_buffer_load_dword s8, s[0:3], 0x1c ; C204011C
s_buffer_load_dword s24, s[0:3], 0x1d ; C20C011D
s_buffer_load_dword s25, s[0:3], 0x1e ; C20C811E
s_buffer_load_dword s26, s[0:3], 0x20 ; C20D0120
s_buffer_load_dword s27, s[0:3], 0x21 ; C20D8121
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v11, s7, v1 ; 10160207
v_mac_f32_e32 v11, s12, v2 ; 3E16040C
v_mul_f32_e32 v12, s9, v1 ; 10180209
v_mac_f32_e32 v12, s13, v2 ; 3E18040D
v_mac_f32_e32 v11, s8, v3 ; 3E160608
v_mac_f32_e32 v12, s24, v3 ; 3E180618
s_buffer_load_dword s7, s[0:3], 0x22 ; C2038122
s_buffer_load_dword s8, s[0:3], 0x23 ; C2040123
v_mac_f32_e32 v11, s26, v4 ; 3E16081A
v_mac_f32_e32 v12, s27, v4 ; 3E18081B
v_mov_b32_e32 v13, s22 ; 7E1A0216
v_mad_f32 v13, v11, s15, -v13 ; D282000D 84341F0B
v_mul_f32_e32 v14, s14, v12 ; 101C180E
v_mac_f32_e32 v13, s19, v14 ; 3E1A1C13
v_mov_b32_e32 v15, s23 ; 7E1E0217
v_mad_f32 v15, v11, s16, -v15 ; D282000F 843C210B
v_mac_f32_e32 v15, s20, v14 ; 3E1E1C14
v_mad_f32 v10, v11, s17, -v10 ; D282000A 8428230B
v_mac_f32_e32 v0, s17, v11 ; 3E001611
v_mac_f32_e32 v10, s21, v14 ; 3E141C15
v_mac_f32_e32 v0, s21, v14 ; 3E001C15
v_add_f32_e32 v13, s4, v13 ; 061A1A04
v_add_f32_e32 v14, s5, v15 ; 061C1E05
v_add_f32_e32 v10, s6, v10 ; 06141406
v_add_f32_e32 v0, s6, v0 ; 06000006
v_mov_b32_e32 v15, 0x80000000 ; 7E1E02FF 80000000
v_xor_b32_e32 v10, v10, v15 ; 3A141F0A
v_xor_b32_e32 v0, v0, v15 ; 3A001F00
s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A
exp 15, 32, 0, 0, 0, v13, v14, v10, v0 ; F800020F 000A0E0D
s_buffer_load_dword s5, s[0:3], 0x1b ; C202811B
exp 15, 33, 0, 0, 0, v8, v9, v5, v6 ; F800021F 06050908
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mul_f32_e32 v0, s16, v11 ; 10001610
v_mul_f32_e32 v5, s17, v11 ; 100A1611
v_mul_f32_e32 v6, s18, v11 ; 100C1612
exp 15, 34, 0, 0, 0, v7, v0, v5, v6 ; F800022F 06050007
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s10, v1 ; 1000020A
v_mac_f32_e32 v0, s4, v2 ; 3E000404
v_mul_f32_e32 v1, s11, v1 ; 1002020B
v_mac_f32_e32 v1, s5, v2 ; 3E020405
v_mac_f32_e32 v0, s25, v3 ; 3E000619
v_mac_f32_e32 v1, s0, v3 ; 3E020600
v_mac_f32_e32 v0, s7, v4 ; 3E000807
v_mac_f32_e32 v1, s8, v4 ; 3E020808
exp 15, 12, 0, 1, 0, v11, v12, v0, v1 ; F80008CF 01000C0B
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 16
Code Size: 396 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], SHADOW2D, FLOAT
DCL CONST[0..24]
DCL CONST[26..29]
DCL CONST[31]
DCL TEMP[0..15], LOCAL
IMM[0] FLT32 { 1.0000, 0.5000, -0.5000, 7.0000}
IMM[1] FLT32 { 4.0000, 3.0000, 2.0000, -2.0000}
IMM[2] FLT32 { 0.1429, 0.0069, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[1].zwzz
1: MOV TEMP[0].z, IN[2].xxxx
2: MOV TEMP[1].xy, IN[1].xyyy
3: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D
4: MOV TEMP[2].xy, IN[0].xyxx
5: LRP TEMP[3].x, TEMP[1].xxxx, IN[0].wwww, IN[0].zzzz
6: MOV TEMP[2].z, TEMP[3].xxxx
7: MAD TEMP[3].x, CONST[0].xxxx, TEMP[1].xxxx, CONST[0].yyyy
8: RCP TEMP[3].x, TEMP[3].xxxx
9: LRP TEMP[1].x, CONST[1].wwww, TEMP[1].xxxx, TEMP[3].xxxx
10: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[1].xxxx
11: LRP TEMP[0].xyz, CONST[1].wwww, TEMP[2].xyzz, TEMP[1].xyzz
12: MUL TEMP[1], CONST[26], TEMP[0].xxxx
13: MAD TEMP[1], CONST[27], TEMP[0].yyyy, TEMP[1]
14: MAD TEMP[0], CONST[28], TEMP[0].zzzz, TEMP[1]
15: ADD TEMP[0], TEMP[0], CONST[29]
16: ADD TEMP[1].xyz, TEMP[0].xyzz, -CONST[2].xyzz
17: ADD TEMP[2].xyz, TEMP[0].xyzz, -CONST[3].xyzz
18: ADD TEMP[3].xyz, TEMP[0].xyzz, -CONST[4].xyzz
19: ADD TEMP[4].xyz, TEMP[0].xyzz, -CONST[5].xyzz
20: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz
21: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz
22: MOV TEMP[1].y, TEMP[2].xxxx
23: DP3 TEMP[2].x, TEMP[3].xyzz, TEMP[3].xyzz
24: MOV TEMP[1].z, TEMP[2].xxxx
25: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[4].xyzz
26: MOV TEMP[1].w, TEMP[2].xxxx
27: FSLT TEMP[1], TEMP[1], CONST[6]
28: AND TEMP[1], TEMP[1], IMM[0].xxxx
29: ADD TEMP[2].xyz, TEMP[1].yzww, -TEMP[1].xyzz
30: MOV_SAT TEMP[2].xyz, TEMP[2].xyzz
31: MUL TEMP[3], CONST[19], TEMP[0].xxxx
32: MAD TEMP[3], CONST[20], TEMP[0].yyyy, TEMP[3]
33: MAD TEMP[3], CONST[21], TEMP[0].zzzz, TEMP[3]
34: MAD TEMP[3].xyz, CONST[22], TEMP[0].wwww, TEMP[3]
35: MUL TEMP[4], CONST[15], TEMP[0].xxxx
36: MAD TEMP[4], CONST[16], TEMP[0].yyyy, TEMP[4]
37: MAD TEMP[4], CONST[17], TEMP[0].zzzz, TEMP[4]
38: MAD TEMP[4].xyz, CONST[18], TEMP[0].wwww, TEMP[4]
39: MUL TEMP[5], CONST[11], TEMP[0].xxxx
40: MAD TEMP[5], CONST[12], TEMP[0].yyyy, TEMP[5]
41: MAD TEMP[5], CONST[13], TEMP[0].zzzz, TEMP[5]
42: MAD TEMP[5].xyz, CONST[14], TEMP[0].wwww, TEMP[5]
43: MUL TEMP[6], CONST[7], TEMP[0].xxxx
44: MAD TEMP[6], CONST[8], TEMP[0].yyyy, TEMP[6]
45: MAD TEMP[6], CONST[9], TEMP[0].zzzz, TEMP[6]
46: MAD TEMP[6].xyz, CONST[10], TEMP[0].wwww, TEMP[6]
47: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[1].xxxx
48: MAD TEMP[5].xyz, TEMP[5].xyzz, TEMP[2].xxxx, TEMP[6].xyzz
49: MAD TEMP[4].xyz, TEMP[4].xyzz, TEMP[2].yyyy, TEMP[5].xyzz
50: MAD TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].zzzz, TEMP[4].xyzz
51: MAD TEMP[2].xy, TEMP[1].xyyy, CONST[31].zwww, IMM[0].yyyy
52: FLR TEMP[3].xy, TEMP[2].xyyy
53: ADD TEMP[3].xy, TEMP[3].xyyy, IMM[0].zzzz
54: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[31].xyyy
55: FRC TEMP[2].xy, TEMP[2].xyyy
56: MOV TEMP[4].y, IMM[0].wwww
57: MUL TEMP[5].x, IMM[1].yyyy, TEMP[2].xxxx
58: ADD TEMP[4].x, IMM[1].xxxx, -TEMP[5].xxxx
59: MAD TEMP[5].x, IMM[1].yyyy, TEMP[2].xxxx, IMM[0].xxxx
60: MOV TEMP[4].z, TEMP[5].xxxx
61: MUL TEMP[6].x, IMM[1].zzzz, TEMP[2].xxxx
62: ADD TEMP[6].x, IMM[1].yyyy, -TEMP[6].xxxx
63: RCP TEMP[7].x, TEMP[4].xxxx
64: MAD TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx, IMM[1].wwww
65: ADD TEMP[7].x, IMM[1].yyyy, TEMP[2].xxxx
66: MUL TEMP[7].x, TEMP[7].xxxx, IMM[2].xxxx
67: MOV TEMP[6].y, TEMP[7].xxxx
68: RCP TEMP[5].x, TEMP[5].xxxx
69: MAD TEMP[5].x, TEMP[2].xxxx, TEMP[5].xxxx, IMM[1].zzzz
70: MOV TEMP[6].z, TEMP[5].xxxx
71: MUL TEMP[5].xyz, TEMP[6].xyzz, CONST[31].xxxx
72: MUL TEMP[7].x, IMM[1].yyyy, TEMP[2].yyyy
73: ADD TEMP[6].x, IMM[1].xxxx, -TEMP[7].xxxx
74: MAD TEMP[7].x, IMM[1].yyyy, TEMP[2].yyyy, IMM[0].xxxx
75: MUL TEMP[8].x, IMM[1].zzzz, TEMP[2].yyyy
76: ADD TEMP[8].x, IMM[1].yyyy, -TEMP[8].xxxx
77: RCP TEMP[9].x, TEMP[6].xxxx
78: MAD TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx, IMM[1].wwww
79: ADD TEMP[9].x, IMM[1].yyyy, TEMP[2].yyyy
80: MUL TEMP[9].x, TEMP[9].xxxx, IMM[2].xxxx
81: MOV TEMP[8].y, TEMP[9].xxxx
82: RCP TEMP[9].x, TEMP[7].xxxx
83: MAD TEMP[2].x, TEMP[2].yyyy, TEMP[9].xxxx, IMM[1].zzzz
84: MOV TEMP[8].z, TEMP[2].xxxx
85: MUL TEMP[2].xyz, TEMP[8].xyzz, CONST[31].yyyy
86: MUL TEMP[6].xyz, TEMP[4].xyzz, TEMP[6].xxxx
87: MOV TEMP[8].x, TEMP[5].xxxx
88: MOV TEMP[8].y, TEMP[2].xxxx
89: ADD TEMP[8].xy, TEMP[3].xyyy, TEMP[8].xyyy
90: MOV TEMP[9].x, TEMP[5].yyyy
91: MOV TEMP[9].y, TEMP[2].xxxx
92: ADD TEMP[9].xy, TEMP[3].xyyy, TEMP[9].xyyy
93: MOV TEMP[10].x, TEMP[5].zzzz
94: MOV TEMP[10].y, TEMP[2].xxxx
95: ADD TEMP[10].xy, TEMP[3].xyyy, TEMP[10].xyyy
96: MUL TEMP[11].xyz, TEMP[4].xyzz, IMM[0].wwww
97: MOV TEMP[12].x, TEMP[5].xxxx
98: MOV TEMP[12].y, TEMP[2].yyyy
99: ADD TEMP[12].xy, TEMP[3].xyyy, TEMP[12].xyyy
100: MOV TEMP[13].x, TEMP[5].yyyy
101: MOV TEMP[13].y, TEMP[2].yyyy
102: ADD TEMP[13].xy, TEMP[3].xyyy, TEMP[13].xyyy
103: MOV TEMP[14].x, TEMP[5].zzzz
104: MOV TEMP[14].y, TEMP[2].yyyy
105: ADD TEMP[14].xy, TEMP[3].xyyy, TEMP[14].xyyy
106: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[7].xxxx
107: MOV TEMP[7].x, TEMP[5].xxxx
108: MOV TEMP[7].y, TEMP[2].zzzz
109: ADD TEMP[7].xy, TEMP[3].xyyy, TEMP[7].xyyy
110: MOV TEMP[15].x, TEMP[5].yyyy
111: MOV TEMP[15].y, TEMP[2].zzzz
112: ADD TEMP[15].xy, TEMP[3].xyyy, TEMP[15].xyyy
113: MOV TEMP[5].x, TEMP[5].zzzz
114: MOV TEMP[5].y, TEMP[2].zzzz
115: ADD TEMP[2].xy, TEMP[3].xyyy, TEMP[5].xyyy
116: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[24].xyzz
117: MOV TEMP[2].xy, TEMP[2].xyyy
118: MOV TEMP[2].z, TEMP[1].zzzz
119: TEX TEMP[2].x, TEMP[2], SAMP[1], SHADOW2D
120: MOV TEMP[3].xy, TEMP[15].xyyy
121: MOV TEMP[3].z, TEMP[1].zzzz
122: TEX TEMP[3].x, TEMP[3], SAMP[1], SHADOW2D
123: MOV TEMP[5].xy, TEMP[7].xyyy
124: MOV TEMP[5].z, TEMP[1].zzzz
125: TEX TEMP[5].x, TEMP[5], SAMP[1], SHADOW2D
126: MOV TEMP[7].xy, TEMP[14].xyyy
127: MOV TEMP[7].z, TEMP[1].zzzz
128: TEX TEMP[7].x, TEMP[7], SAMP[1], SHADOW2D
129: MOV TEMP[13].xy, TEMP[13].xyyy
130: MOV TEMP[13].z, TEMP[1].zzzz
131: TEX TEMP[13].x, TEMP[13], SAMP[1], SHADOW2D
132: MOV TEMP[12].xy, TEMP[12].xyyy
133: MOV TEMP[12].z, TEMP[1].zzzz
134: TEX TEMP[12].x, TEMP[12], SAMP[1], SHADOW2D
135: MOV TEMP[10].xy, TEMP[10].xyyy
136: MOV TEMP[10].z, TEMP[1].zzzz
137: TEX TEMP[10].x, TEMP[10], SAMP[1], SHADOW2D
138: MOV TEMP[9].xy, TEMP[9].xyyy
139: MOV TEMP[9].z, TEMP[1].zzzz
140: TEX TEMP[9].x, TEMP[9], SAMP[1], SHADOW2D
141: MOV TEMP[8].xy, TEMP[8].xyyy
142: MOV TEMP[8].z, TEMP[1].zzzz
143: TEX TEMP[1].x, TEMP[8], SAMP[1], SHADOW2D
144: MUL TEMP[1].x, TEMP[6].xxxx, TEMP[1].xxxx
145: MAD TEMP[1].x, TEMP[6].yyyy, TEMP[9].xxxx, TEMP[1].xxxx
146: MAD TEMP[1].x, TEMP[6].zzzz, TEMP[10].xxxx, TEMP[1].xxxx
147: MAD TEMP[1].x, TEMP[11].xxxx, TEMP[12].xxxx, TEMP[1].xxxx
148: MAD TEMP[1].x, TEMP[11].yyyy, TEMP[13].xxxx, TEMP[1].xxxx
149: MAD TEMP[1].x, TEMP[11].zzzz, TEMP[7].xxxx, TEMP[1].xxxx
150: MAD TEMP[1].x, TEMP[4].xxxx, TEMP[5].xxxx, TEMP[1].xxxx
151: MAD TEMP[1].x, TEMP[4].yyyy, TEMP[3].xxxx, TEMP[1].xxxx
152: MAD TEMP[1].x, TEMP[4].zzzz, TEMP[2].xxxx, TEMP[1].xxxx
153: MUL TEMP[1].x, TEMP[1].xxxx, IMM[2].yyyy
154: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, CONST[23].xxxx
155: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[0].xyzz
156: SQRT TEMP[0].x, TEMP[0].xxxx
157: MAD TEMP[0].x, TEMP[0].xxxx, CONST[23].zzzz, CONST[23].wwww
158: MOV_SAT TEMP[0].x, TEMP[0].xxxx
159: ADD TEMP[0].x, TEMP[1].xxxx, TEMP[0].xxxx
160: MOV OUT[0], TEMP[0].xxxx
161: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 296)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324)
%84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328)
%85 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%86 = call float @llvm.SI.load.const(<16 x i8> %23, i32 340)
%87 = call float @llvm.SI.load.const(<16 x i8> %23, i32 344)
%88 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%89 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356)
%90 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360)
%91 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%92 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376)
%93 = call float @llvm.SI.load.const(<16 x i8> %23, i32 380)
%94 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%95 = call float @llvm.SI.load.const(<16 x i8> %23, i32 388)
%96 = call float @llvm.SI.load.const(<16 x i8> %23, i32 392)
%97 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%98 = call float @llvm.SI.load.const(<16 x i8> %23, i32 420)
%99 = call float @llvm.SI.load.const(<16 x i8> %23, i32 424)
%100 = call float @llvm.SI.load.const(<16 x i8> %23, i32 428)
%101 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432)
%102 = call float @llvm.SI.load.const(<16 x i8> %23, i32 436)
%103 = call float @llvm.SI.load.const(<16 x i8> %23, i32 440)
%104 = call float @llvm.SI.load.const(<16 x i8> %23, i32 444)
%105 = call float @llvm.SI.load.const(<16 x i8> %23, i32 448)
%106 = call float @llvm.SI.load.const(<16 x i8> %23, i32 452)
%107 = call float @llvm.SI.load.const(<16 x i8> %23, i32 456)
%108 = call float @llvm.SI.load.const(<16 x i8> %23, i32 460)
%109 = call float @llvm.SI.load.const(<16 x i8> %23, i32 464)
%110 = call float @llvm.SI.load.const(<16 x i8> %23, i32 468)
%111 = call float @llvm.SI.load.const(<16 x i8> %23, i32 472)
%112 = call float @llvm.SI.load.const(<16 x i8> %23, i32 476)
%113 = call float @llvm.SI.load.const(<16 x i8> %23, i32 496)
%114 = call float @llvm.SI.load.const(<16 x i8> %23, i32 500)
%115 = call float @llvm.SI.load.const(<16 x i8> %23, i32 504)
%116 = call float @llvm.SI.load.const(<16 x i8> %23, i32 508)
%117 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%118 = load <32 x i8>, <32 x i8> addrspace(2)* %117, align 32, !tbaa !0
%119 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%120 = load <16 x i8>, <16 x i8> addrspace(2)* %119, align 16, !tbaa !0
%121 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%122 = load <8 x i32>, <8 x i32> addrspace(2)* %121, align 32, !tbaa !0
%123 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%124 = load <4 x i32>, <4 x i32> addrspace(2)* %123, align 16, !tbaa !0
%125 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%132 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%133 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%134 = bitcast float %129 to i32
%135 = bitcast float %130 to i32
%136 = insertelement <2 x i32> undef, i32 %134, i32 0
%137 = insertelement <2 x i32> %136, i32 %135, i32 1
%138 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %118, <16 x i8> %120, i32 2)
%139 = extractelement <4 x float> %138, i32 0
%140 = call float @llvm.AMDGPU.lrp(float %139, float %128, float %127)
%141 = fmul float %24, %139
%142 = fadd float %141, %25
%143 = fdiv float 1.000000e+00, %142
%144 = call float @llvm.AMDGPU.lrp(float %26, float %139, float %143)
%145 = fmul float %131, %144
%146 = fmul float %132, %144
%147 = fmul float %133, %144
%148 = call float @llvm.AMDGPU.lrp(float %26, float %125, float %145)
%149 = call float @llvm.AMDGPU.lrp(float %26, float %126, float %146)
%150 = call float @llvm.AMDGPU.lrp(float %26, float %140, float %147)
%151 = fmul float %97, %148
%152 = fmul float %98, %148
%153 = fmul float %99, %148
%154 = fmul float %100, %148
%155 = fmul float %101, %149
%156 = fadd float %155, %151
%157 = fmul float %102, %149
%158 = fadd float %157, %152
%159 = fmul float %103, %149
%160 = fadd float %159, %153
%161 = fmul float %104, %149
%162 = fadd float %161, %154
%163 = fmul float %105, %150
%164 = fadd float %163, %156
%165 = fmul float %106, %150
%166 = fadd float %165, %158
%167 = fmul float %107, %150
%168 = fadd float %167, %160
%169 = fmul float %108, %150
%170 = fadd float %169, %162
%171 = fadd float %164, %109
%172 = fadd float %166, %110
%173 = fadd float %168, %111
%174 = fadd float %170, %112
%175 = fsub float %171, %27
%176 = fsub float %172, %28
%177 = fsub float %173, %29
%178 = fsub float %171, %30
%179 = fsub float %172, %31
%180 = fsub float %173, %32
%181 = fsub float %171, %33
%182 = fsub float %172, %34
%183 = fsub float %173, %35
%184 = fsub float %171, %36
%185 = fsub float %172, %37
%186 = fsub float %173, %38
%187 = fmul float %175, %175
%188 = fmul float %176, %176
%189 = fadd float %188, %187
%190 = fmul float %177, %177
%191 = fadd float %189, %190
%192 = fmul float %178, %178
%193 = fmul float %179, %179
%194 = fadd float %193, %192
%195 = fmul float %180, %180
%196 = fadd float %194, %195
%197 = fmul float %181, %181
%198 = fmul float %182, %182
%199 = fadd float %198, %197
%200 = fmul float %183, %183
%201 = fadd float %199, %200
%202 = fmul float %184, %184
%203 = fmul float %185, %185
%204 = fadd float %203, %202
%205 = fmul float %186, %186
%206 = fadd float %204, %205
%207 = fcmp olt float %191, %39
%208 = fcmp olt float %196, %40
%209 = fcmp olt float %201, %41
%210 = fcmp olt float %206, %42
%211 = select i1 %207, float 1.000000e+00, float 0.000000e+00
%212 = select i1 %208, float 1.000000e+00, float 0.000000e+00
%213 = select i1 %209, float 1.000000e+00, float 0.000000e+00
%214 = select i1 %210, float 1.000000e+00, float 0.000000e+00
%215 = fsub float %212, %211
%216 = fsub float %213, %212
%217 = fsub float %214, %213
%218 = call float @llvm.AMDIL.clamp.(float %215, float 0.000000e+00, float 1.000000e+00)
%219 = call float @llvm.AMDIL.clamp.(float %216, float 0.000000e+00, float 1.000000e+00)
%220 = call float @llvm.AMDIL.clamp.(float %217, float 0.000000e+00, float 1.000000e+00)
%221 = fmul float %79, %171
%222 = fmul float %80, %171
%223 = fmul float %81, %171
%224 = fmul float %82, %172
%225 = fadd float %224, %221
%226 = fmul float %83, %172
%227 = fadd float %226, %222
%228 = fmul float %84, %172
%229 = fadd float %228, %223
%230 = fmul float %85, %173
%231 = fadd float %230, %225
%232 = fmul float %86, %173
%233 = fadd float %232, %227
%234 = fmul float %87, %173
%235 = fadd float %234, %229
%236 = fmul float %88, %174
%237 = fadd float %236, %231
%238 = fmul float %89, %174
%239 = fadd float %238, %233
%240 = fmul float %90, %174
%241 = fadd float %240, %235
%242 = fmul float %67, %171
%243 = fmul float %68, %171
%244 = fmul float %69, %171
%245 = fmul float %70, %172
%246 = fadd float %245, %242
%247 = fmul float %71, %172
%248 = fadd float %247, %243
%249 = fmul float %72, %172
%250 = fadd float %249, %244
%251 = fmul float %73, %173
%252 = fadd float %251, %246
%253 = fmul float %74, %173
%254 = fadd float %253, %248
%255 = fmul float %75, %173
%256 = fadd float %255, %250
%257 = fmul float %76, %174
%258 = fadd float %257, %252
%259 = fmul float %77, %174
%260 = fadd float %259, %254
%261 = fmul float %78, %174
%262 = fadd float %261, %256
%263 = fmul float %55, %171
%264 = fmul float %56, %171
%265 = fmul float %57, %171
%266 = fmul float %58, %172
%267 = fadd float %266, %263
%268 = fmul float %59, %172
%269 = fadd float %268, %264
%270 = fmul float %60, %172
%271 = fadd float %270, %265
%272 = fmul float %61, %173
%273 = fadd float %272, %267
%274 = fmul float %62, %173
%275 = fadd float %274, %269
%276 = fmul float %63, %173
%277 = fadd float %276, %271
%278 = fmul float %64, %174
%279 = fadd float %278, %273
%280 = fmul float %65, %174
%281 = fadd float %280, %275
%282 = fmul float %66, %174
%283 = fadd float %282, %277
%284 = fmul float %43, %171
%285 = fmul float %44, %171
%286 = fmul float %45, %171
%287 = fmul float %46, %172
%288 = fadd float %287, %284
%289 = fmul float %47, %172
%290 = fadd float %289, %285
%291 = fmul float %48, %172
%292 = fadd float %291, %286
%293 = fmul float %49, %173
%294 = fadd float %293, %288
%295 = fmul float %50, %173
%296 = fadd float %295, %290
%297 = fmul float %51, %173
%298 = fadd float %297, %292
%299 = fmul float %52, %174
%300 = fadd float %299, %294
%301 = fmul float %53, %174
%302 = fadd float %301, %296
%303 = fmul float %54, %174
%304 = fadd float %303, %298
%305 = fmul float %300, %211
%306 = fmul float %302, %211
%307 = fmul float %304, %211
%308 = fmul float %279, %218
%309 = fadd float %308, %305
%310 = fmul float %281, %218
%311 = fadd float %310, %306
%312 = fmul float %283, %218
%313 = fadd float %312, %307
%314 = fmul float %258, %219
%315 = fadd float %314, %309
%316 = fmul float %260, %219
%317 = fadd float %316, %311
%318 = fmul float %262, %219
%319 = fadd float %318, %313
%320 = fmul float %237, %220
%321 = fadd float %320, %315
%322 = fmul float %239, %220
%323 = fadd float %322, %317
%324 = fmul float %241, %220
%325 = fadd float %324, %319
%326 = fmul float %321, %115
%327 = fadd float %326, 5.000000e-01
%328 = fmul float %323, %116
%329 = fadd float %328, 5.000000e-01
%330 = call float @llvm.floor.f32(float %327)
%331 = call float @llvm.floor.f32(float %329)
%332 = fadd float %330, -5.000000e-01
%333 = fadd float %331, -5.000000e-01
%334 = fmul float %332, %113
%335 = fmul float %333, %114
%336 = call float @llvm.floor.f32(float %327)
%337 = fsub float %327, %336
%338 = call float @llvm.floor.f32(float %329)
%339 = fsub float %329, %338
%340 = fmul float %337, 3.000000e+00
%341 = fsub float 4.000000e+00, %340
%342 = fmul float %337, 3.000000e+00
%343 = fadd float %342, 1.000000e+00
%344 = fmul float %337, 2.000000e+00
%345 = fsub float 3.000000e+00, %344
%346 = fdiv float 1.000000e+00, %341
%347 = fmul float %345, %346
%348 = fadd float %347, -2.000000e+00
%349 = fadd float %337, 3.000000e+00
%350 = fmul float %349, 0x3FC24924A0000000
%351 = fdiv float 1.000000e+00, %343
%352 = fmul float %337, %351
%353 = fadd float %352, 2.000000e+00
%354 = fmul float %348, %113
%355 = fmul float %350, %113
%356 = fmul float %353, %113
%357 = fmul float %339, 3.000000e+00
%358 = fsub float 4.000000e+00, %357
%359 = fmul float %339, 3.000000e+00
%360 = fadd float %359, 1.000000e+00
%361 = fmul float %339, 2.000000e+00
%362 = fsub float 3.000000e+00, %361
%363 = fdiv float 1.000000e+00, %358
%364 = fmul float %362, %363
%365 = fadd float %364, -2.000000e+00
%366 = fadd float %339, 3.000000e+00
%367 = fmul float %366, 0x3FC24924A0000000
%368 = fdiv float 1.000000e+00, %360
%369 = fmul float %339, %368
%370 = fadd float %369, 2.000000e+00
%371 = fmul float %365, %114
%372 = fmul float %367, %114
%373 = fmul float %370, %114
%374 = fmul float %341, %358
%375 = fmul float %358, 7.000000e+00
%376 = fmul float %343, %358
%377 = fadd float %334, %354
%378 = fadd float %335, %371
%379 = fadd float %334, %355
%380 = fadd float %335, %371
%381 = fadd float %334, %356
%382 = fadd float %335, %371
%383 = fmul float %341, 7.000000e+00
%384 = fmul float %343, 7.000000e+00
%385 = fadd float %334, %354
%386 = fadd float %335, %372
%387 = fadd float %334, %355
%388 = fadd float %335, %372
%389 = fadd float %334, %356
%390 = fadd float %335, %372
%391 = fmul float %341, %360
%392 = fmul float %360, 7.000000e+00
%393 = fmul float %343, %360
%394 = fadd float %334, %354
%395 = fadd float %335, %373
%396 = fadd float %334, %355
%397 = fadd float %335, %373
%398 = fadd float %334, %356
%399 = fadd float %335, %373
%400 = fsub float %171, %94
%401 = fsub float %172, %95
%402 = fsub float %173, %96
%403 = bitcast float %325 to i32
%404 = bitcast float %398 to i32
%405 = bitcast float %399 to i32
%406 = insertelement <4 x i32> undef, i32 %403, i32 0
%407 = insertelement <4 x i32> %406, i32 %404, i32 1
%408 = insertelement <4 x i32> %407, i32 %405, i32 2
%409 = bitcast <8 x i32> %122 to <32 x i8>
%410 = bitcast <4 x i32> %124 to <16 x i8>
%411 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %408, <32 x i8> %409, <16 x i8> %410, i32 7)
%412 = extractelement <4 x float> %411, i32 0
%413 = bitcast float %325 to i32
%414 = bitcast float %396 to i32
%415 = bitcast float %397 to i32
%416 = insertelement <4 x i32> undef, i32 %413, i32 0
%417 = insertelement <4 x i32> %416, i32 %414, i32 1
%418 = insertelement <4 x i32> %417, i32 %415, i32 2
%419 = bitcast <8 x i32> %122 to <32 x i8>
%420 = bitcast <4 x i32> %124 to <16 x i8>
%421 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %418, <32 x i8> %419, <16 x i8> %420, i32 7)
%422 = extractelement <4 x float> %421, i32 0
%423 = bitcast float %325 to i32
%424 = bitcast float %394 to i32
%425 = bitcast float %395 to i32
%426 = insertelement <4 x i32> undef, i32 %423, i32 0
%427 = insertelement <4 x i32> %426, i32 %424, i32 1
%428 = insertelement <4 x i32> %427, i32 %425, i32 2
%429 = bitcast <8 x i32> %122 to <32 x i8>
%430 = bitcast <4 x i32> %124 to <16 x i8>
%431 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %428, <32 x i8> %429, <16 x i8> %430, i32 7)
%432 = extractelement <4 x float> %431, i32 0
%433 = bitcast float %325 to i32
%434 = bitcast float %389 to i32
%435 = bitcast float %390 to i32
%436 = insertelement <4 x i32> undef, i32 %433, i32 0
%437 = insertelement <4 x i32> %436, i32 %434, i32 1
%438 = insertelement <4 x i32> %437, i32 %435, i32 2
%439 = bitcast <8 x i32> %122 to <32 x i8>
%440 = bitcast <4 x i32> %124 to <16 x i8>
%441 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %438, <32 x i8> %439, <16 x i8> %440, i32 7)
%442 = extractelement <4 x float> %441, i32 0
%443 = bitcast float %325 to i32
%444 = bitcast float %387 to i32
%445 = bitcast float %388 to i32
%446 = insertelement <4 x i32> undef, i32 %443, i32 0
%447 = insertelement <4 x i32> %446, i32 %444, i32 1
%448 = insertelement <4 x i32> %447, i32 %445, i32 2
%449 = bitcast <8 x i32> %122 to <32 x i8>
%450 = bitcast <4 x i32> %124 to <16 x i8>
%451 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %448, <32 x i8> %449, <16 x i8> %450, i32 7)
%452 = extractelement <4 x float> %451, i32 0
%453 = bitcast float %325 to i32
%454 = bitcast float %385 to i32
%455 = bitcast float %386 to i32
%456 = insertelement <4 x i32> undef, i32 %453, i32 0
%457 = insertelement <4 x i32> %456, i32 %454, i32 1
%458 = insertelement <4 x i32> %457, i32 %455, i32 2
%459 = bitcast <8 x i32> %122 to <32 x i8>
%460 = bitcast <4 x i32> %124 to <16 x i8>
%461 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %458, <32 x i8> %459, <16 x i8> %460, i32 7)
%462 = extractelement <4 x float> %461, i32 0
%463 = bitcast float %325 to i32
%464 = bitcast float %381 to i32
%465 = bitcast float %382 to i32
%466 = insertelement <4 x i32> undef, i32 %463, i32 0
%467 = insertelement <4 x i32> %466, i32 %464, i32 1
%468 = insertelement <4 x i32> %467, i32 %465, i32 2
%469 = bitcast <8 x i32> %122 to <32 x i8>
%470 = bitcast <4 x i32> %124 to <16 x i8>
%471 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %468, <32 x i8> %469, <16 x i8> %470, i32 7)
%472 = extractelement <4 x float> %471, i32 0
%473 = bitcast float %325 to i32
%474 = bitcast float %379 to i32
%475 = bitcast float %380 to i32
%476 = insertelement <4 x i32> undef, i32 %473, i32 0
%477 = insertelement <4 x i32> %476, i32 %474, i32 1
%478 = insertelement <4 x i32> %477, i32 %475, i32 2
%479 = bitcast <8 x i32> %122 to <32 x i8>
%480 = bitcast <4 x i32> %124 to <16 x i8>
%481 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %478, <32 x i8> %479, <16 x i8> %480, i32 7)
%482 = extractelement <4 x float> %481, i32 0
%483 = bitcast float %325 to i32
%484 = bitcast float %377 to i32
%485 = bitcast float %378 to i32
%486 = insertelement <4 x i32> undef, i32 %483, i32 0
%487 = insertelement <4 x i32> %486, i32 %484, i32 1
%488 = insertelement <4 x i32> %487, i32 %485, i32 2
%489 = bitcast <8 x i32> %122 to <32 x i8>
%490 = bitcast <4 x i32> %124 to <16 x i8>
%491 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %488, <32 x i8> %489, <16 x i8> %490, i32 7)
%492 = extractelement <4 x float> %491, i32 0
%493 = fmul float %374, %492
%494 = fmul float %375, %482
%495 = fadd float %494, %493
%496 = fmul float %376, %472
%497 = fadd float %496, %495
%498 = fmul float %383, %462
%499 = fadd float %498, %497
%500 = fmul float %452, 4.900000e+01
%501 = fadd float %500, %499
%502 = fmul float %384, %442
%503 = fadd float %502, %501
%504 = fmul float %391, %432
%505 = fadd float %504, %503
%506 = fmul float %392, %422
%507 = fadd float %506, %505
%508 = fmul float %393, %412
%509 = fadd float %508, %507
%510 = fmul float %509, 0x3F7C71C720000000
%511 = call float @llvm.AMDGPU.lrp(float %510, float 1.000000e+00, float %91)
%512 = fmul float %400, %400
%513 = fmul float %401, %401
%514 = fadd float %513, %512
%515 = fmul float %402, %402
%516 = fadd float %514, %515
%517 = call float @llvm.sqrt.f32(float %516)
%518 = fmul float %517, %92
%519 = fadd float %518, %93
%520 = call float @llvm.AMDIL.clamp.(float %519, float 0.000000e+00, float 1.000000e+00)
%521 = fadd float %511, %520
%522 = call i32 @llvm.SI.packf16(float %521, float %521)
%523 = bitcast i32 %522 to float
%524 = call i32 @llvm.SI.packf16(float %521, float %521)
%525 = bitcast i32 %524 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %523, float %525, float %523, float %525)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b64 s[16:17], s[6:7] ; BE900406
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[76:79], s[4:5], 0x0 ; C0A60500
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s74, s[0:3], 0x0 ; C2250100
s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101
s_buffer_load_dword s75, s[0:3], 0x7 ; C2258107
s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108
s_buffer_load_dword s34, s[0:3], 0x9 ; C2110109
s_buffer_load_dword s32, s[0:3], 0xa ; C210010A
s_buffer_load_dword s30, s[0:3], 0xc ; C20F010C
s_buffer_load_dword s31, s[0:3], 0xd ; C20F810D
s_buffer_load_dword s5, s[0:3], 0xe ; C202810E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s5, 11 ; 04331605
s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s5, 7 ; 04330E05
s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s5, 9 ; 04331205
v_mov_b32_e32 v6, s4 ; 7E0C0204
s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 8 ; 04331004
v_sub_f32_e64 v7, 1.0, s75 ; D2080007 000096F2
s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 3 ; 04330604
s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 5 ; 04330A04
s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 4 ; 04330804
s_buffer_load_dword s36, s[0:3], 0x18 ; C2120118
s_buffer_load_dword s37, s[0:3], 0x19 ; C2128119
s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 10 ; 04331404
s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 6 ; 04330C04
s_buffer_load_dword s48, s[0:3], 0x1c ; C218011C
s_buffer_load_dword s49, s[0:3], 0x1d ; C218811D
s_buffer_load_dword s42, s[0:3], 0x1e ; C215011E
s_buffer_load_dword s47, s[0:3], 0x20 ; C2178120
s_buffer_load_dword s46, s[0:3], 0x21 ; C2170121
s_buffer_load_dword s45, s[0:3], 0x22 ; C2168122
s_buffer_load_dword s43, s[0:3], 0x24 ; C2158124
s_buffer_load_dword s40, s[0:3], 0x25 ; C2140125
s_buffer_load_dword s39, s[0:3], 0x26 ; C2138126
s_buffer_load_dword s38, s[0:3], 0x28 ; C2130128
v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400
v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401
v_interp_p1_f32 v9, v0, 1, 1, [m0] ; C8240500
v_interp_p2_f32 v9, [v9], v1, 1, 1, [m0] ; C8250501
v_interp_p1_f32 v10, v0, 2, 1, [m0] ; C8280600
v_interp_p2_f32 v10, [v10], v1, 2, 1, [m0] ; C8290601
v_interp_p1_f32 v11, v0, 3, 1, [m0] ; C82C0700
v_interp_p2_f32 v11, [v11], v1, 3, 1, [m0] ; C82D0701
v_interp_p1_f32 v0, v0, 0, 2, [m0] ; C8000800
v_interp_p2_f32 v0, [v0], v1, 0, 2, [m0] ; C8010801
s_buffer_load_dword s44, s[0:3], 0x29 ; C2160129
s_buffer_load_dword s41, s[0:3], 0x2a ; C214812A
s_buffer_load_dword s61, s[0:3], 0x2c ; C21E812C
s_buffer_load_dword s60, s[0:3], 0x2d ; C21E012D
s_buffer_load_dword s59, s[0:3], 0x2e ; C21D812E
s_buffer_load_dword s58, s[0:3], 0x30 ; C21D0130
s_buffer_load_dword s57, s[0:3], 0x31 ; C21C8131
s_buffer_load_dword s56, s[0:3], 0x32 ; C21C0132
s_buffer_load_dword s55, s[0:3], 0x34 ; C21B8134
s_buffer_load_dword s54, s[0:3], 0x35 ; C21B0135
s_buffer_load_dword s53, s[0:3], 0x36 ; C21A8136
s_buffer_load_dword s52, s[0:3], 0x38 ; C21A0138
s_buffer_load_dword s51, s[0:3], 0x39 ; C2198139
s_buffer_load_dword s50, s[0:3], 0x3a ; C219013A
s_buffer_load_dword s71, s[0:3], 0x3c ; C223813C
s_buffer_load_dword s72, s[0:3], 0x3d ; C224013D
s_buffer_load_dword s73, s[0:3], 0x3e ; C224813E
s_buffer_load_dword s70, s[0:3], 0x40 ; C2230140
s_buffer_load_dword s69, s[0:3], 0x41 ; C2228141
s_buffer_load_dword s68, s[0:3], 0x42 ; C2220142
s_buffer_load_dword s67, s[0:3], 0x44 ; C2218144
s_buffer_load_dword s66, s[0:3], 0x45 ; C2210145
s_buffer_load_dword s65, s[0:3], 0x46 ; C2208146
s_buffer_load_dword s64, s[0:3], 0x48 ; C2200148
s_buffer_load_dword s63, s[0:3], 0x49 ; C21F8149
s_buffer_load_dword s62, s[0:3], 0x4a ; C21F014A
s_buffer_load_dword s80, s[0:3], 0x4c ; C228014C
s_buffer_load_dword s81, s[0:3], 0x4d ; C228814D
s_buffer_load_dword s82, s[0:3], 0x4e ; C229014E
s_buffer_load_dword s83, s[0:3], 0x50 ; C2298150
s_buffer_load_dword s84, s[0:3], 0x51 ; C22A0151
s_buffer_load_dword s85, s[0:3], 0x52 ; C22A8152
s_buffer_load_dword s86, s[0:3], 0x54 ; C22B0154
s_buffer_load_dword s87, s[0:3], 0x55 ; C22B8155
s_buffer_load_dword s88, s[0:3], 0x56 ; C22C0156
s_buffer_load_dword s89, s[0:3], 0x58 ; C22C8158
s_buffer_load_dword s90, s[0:3], 0x59 ; C22D0159
s_buffer_load_dword s91, s[0:3], 0x5a ; C22D815A
s_buffer_load_dword s4, s[0:3], 0x5c ; C202015C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 0 ; 04330004
s_buffer_load_dword s4, s[0:3], 0x5e ; C202015E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 1 ; 04330204
s_buffer_load_dword s4, s[0:3], 0x5f ; C202015F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v25, s4, 2 ; 04330404
s_buffer_load_dword s92, s[0:3], 0x6a ; C22E016A
s_buffer_load_dword s93, s[0:3], 0x6b ; C22E816B
s_buffer_load_dword s94, s[0:3], 0x6c ; C22F016C
s_buffer_load_dword s95, s[0:3], 0x6d ; C22F816D
s_buffer_load_dword s96, s[0:3], 0x6e ; C230016E
s_buffer_load_dword s97, s[0:3], 0x6f ; C230816F
s_buffer_load_dword s98, s[0:3], 0x70 ; C2310170
s_buffer_load_dword s99, s[0:3], 0x71 ; C2318171
s_buffer_load_dword s100, s[0:3], 0x72 ; C2320172
s_buffer_load_dword s101, s[0:3], 0x73 ; C2328173
s_buffer_load_dword s24, s[0:3], 0x74 ; C20C0174
s_buffer_load_dword s25, s[0:3], 0x75 ; C20C8175
s_buffer_load_dword s26, s[0:3], 0x76 ; C20D0176
s_buffer_load_dword s27, s[0:3], 0x77 ; C20D8177
s_buffer_load_dword s35, s[0:3], 0x7c ; C211817C
s_buffer_load_dword s28, s[0:3], 0x68 ; C20E0168
s_buffer_load_dword s29, s[0:3], 0x69 ; C20E8169
s_load_dwordx8 s[4:11], s[16:17], 0x0 ; C0C21100
s_load_dwordx8 s[16:23], s[16:17], 0x8 ; C0C81108
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[4:11], s[76:79] ; F0800100 02610108
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v6, s74, v1 ; 3E0C024A
v_rcp_f32_e32 v6, v6 ; 7E0C5506
v_sub_f32_e32 v8, 1.0, v1 ; 081002F2
v_mul_f32_e32 v4, v4, v8 ; 10081104
v_mac_f32_e32 v4, v5, v1 ; 3E080305
v_mul_f32_e32 v5, v6, v7 ; 100A0F06
v_mac_f32_e32 v5, s75, v1 ; 3E0A024B
v_mul_f32_e32 v1, v5, v10 ; 10021505
v_mul_f32_e32 v6, v5, v11 ; 100C1705
v_mul_f32_e32 v0, v5, v0 ; 10000105
v_mul_f32_e32 v1, v1, v7 ; 10020F01
v_mac_f32_e32 v1, s75, v2 ; 3E02044B
v_mul_f32_e32 v2, v6, v7 ; 10040F06
v_mac_f32_e32 v2, s75, v3 ; 3E04064B
v_mul_f32_e32 v0, v0, v7 ; 10000F00
v_mac_f32_e32 v0, s75, v4 ; 3E00084B
v_mul_f32_e32 v3, s28, v1 ; 1006021C
v_mul_f32_e32 v4, s29, v1 ; 1008021D
v_mul_f32_e32 v5, s92, v1 ; 100A025C
v_mul_f32_e32 v1, s93, v1 ; 1002025D
v_mac_f32_e32 v3, s94, v2 ; 3E06045E
v_mac_f32_e32 v4, s95, v2 ; 3E08045F
v_mac_f32_e32 v5, s96, v2 ; 3E0A0460
v_mac_f32_e32 v1, s97, v2 ; 3E020461
v_mac_f32_e32 v3, s98, v0 ; 3E060062
v_mac_f32_e32 v4, s99, v0 ; 3E080063
v_mac_f32_e32 v5, s100, v0 ; 3E0A0064
v_mac_f32_e32 v1, s101, v0 ; 3E020065
v_add_f32_e32 v0, s24, v3 ; 06000618
v_add_f32_e32 v2, s25, v4 ; 06040819
v_add_f32_e32 v3, s26, v5 ; 06060A1A
v_add_f32_e32 v1, s27, v1 ; 0602021B
v_mul_f32_e32 v4, s80, v0 ; 10080050
v_mul_f32_e32 v5, s81, v0 ; 100A0051
v_mul_f32_e32 v6, s82, v0 ; 100C0052
v_mac_f32_e32 v4, s83, v2 ; 3E080453
v_mac_f32_e32 v5, s84, v2 ; 3E0A0454
v_mac_f32_e32 v6, s85, v2 ; 3E0C0455
v_mac_f32_e32 v4, s86, v3 ; 3E080656
v_mac_f32_e32 v5, s87, v3 ; 3E0A0657
v_mac_f32_e32 v6, s88, v3 ; 3E0C0658
v_mac_f32_e32 v4, s89, v1 ; 3E080259
v_mac_f32_e32 v5, s90, v1 ; 3E0A025A
v_mac_f32_e32 v6, s91, v1 ; 3E0C025B
v_mul_f32_e32 v7, s71, v0 ; 100E0047
v_mul_f32_e32 v8, s72, v0 ; 10100048
v_mul_f32_e32 v9, s73, v0 ; 10120049
v_mac_f32_e32 v7, s70, v2 ; 3E0E0446
v_mac_f32_e32 v8, s69, v2 ; 3E100445
v_mac_f32_e32 v9, s68, v2 ; 3E120444
v_mac_f32_e32 v7, s67, v3 ; 3E0E0643
v_mac_f32_e32 v8, s66, v3 ; 3E100642
v_mac_f32_e32 v9, s65, v3 ; 3E120641
v_mac_f32_e32 v7, s64, v1 ; 3E0E0240
v_mac_f32_e32 v8, s63, v1 ; 3E10023F
v_mac_f32_e32 v9, s62, v1 ; 3E12023E
v_mul_f32_e32 v10, s61, v0 ; 1014003D
v_mul_f32_e32 v11, s60, v0 ; 1016003C
v_mul_f32_e32 v12, s59, v0 ; 1018003B
v_mac_f32_e32 v10, s58, v2 ; 3E14043A
v_mac_f32_e32 v11, s57, v2 ; 3E160439
v_mac_f32_e32 v12, s56, v2 ; 3E180438
v_mac_f32_e32 v10, s55, v3 ; 3E140637
v_mac_f32_e32 v11, s54, v3 ; 3E160636
v_mac_f32_e32 v12, s53, v3 ; 3E180635
v_mac_f32_e32 v10, s52, v1 ; 3E140234
v_mac_f32_e32 v11, s51, v1 ; 3E160233
v_mac_f32_e32 v12, s50, v1 ; 3E180232
v_mul_f32_e32 v13, s48, v0 ; 101A0030
v_mul_f32_e32 v14, s49, v0 ; 101C0031
v_mac_f32_e32 v13, s47, v2 ; 3E1A042F
v_mac_f32_e32 v14, s46, v2 ; 3E1C042E
v_mul_f32_e32 v15, s42, v0 ; 101E002A
v_mac_f32_e32 v15, s45, v2 ; 3E1E042D
v_mac_f32_e32 v13, s43, v3 ; 3E1A062B
v_mac_f32_e32 v14, s40, v3 ; 3E1C0628
v_mac_f32_e32 v15, s39, v3 ; 3E1E0627
v_mac_f32_e32 v13, s38, v1 ; 3E1A0226
v_mac_f32_e32 v14, s44, v1 ; 3E1C022C
v_mac_f32_e32 v15, s41, v1 ; 3E1E0229
v_subrev_f32_e32 v1, s33, v0 ; 0A020021
v_subrev_f32_e32 v16, s34, v2 ; 0A200422
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_mac_f32_e32 v1, v16, v16 ; 3E022110
v_subrev_f32_e32 v16, s32, v3 ; 0A200620
v_mac_f32_e32 v1, v16, v16 ; 3E022110
v_subrev_f32_e32 v16, s30, v0 ; 0A20001E
v_subrev_f32_e32 v17, s31, v2 ; 0A22041F
v_mul_f32_e32 v16, v16, v16 ; 10202110
v_mac_f32_e32 v16, v17, v17 ; 3E202311
v_readlane_b32 s4, v25, 11 ; 02091719
s_nop 2 ; BF800002
v_subrev_f32_e32 v17, s4, v3 ; 0A220604
v_mac_f32_e32 v16, v17, v17 ; 3E202311
v_cmp_gt_f32_e32 vcc, s36, v1 ; 7C080224
v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480
v_cmp_gt_f32_e32 vcc, s37, v16 ; 7C082025
v_cndmask_b32_e64 v16, 0, 1.0, vcc ; D2000010 01A9E480
v_subrev_f32_e32 v17, v1, v16 ; 0A222101
v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280
v_mul_f32_e32 v13, v1, v13 ; 101A1B01
v_mac_f32_e32 v13, v17, v10 ; 3E1A1511
v_mul_f32_e32 v10, v1, v14 ; 10141D01
v_mac_f32_e32 v10, v17, v11 ; 3E141711
v_mul_f32_e32 v18, v1, v15 ; 10241F01
v_mac_f32_e32 v18, v17, v12 ; 3E241911
v_readlane_b32 s4, v25, 7 ; 02090F19
s_nop 2 ; BF800002
v_subrev_f32_e32 v1, s4, v0 ; 0A020004
v_readlane_b32 s4, v25, 9 ; 02091319
s_nop 2 ; BF800002
v_subrev_f32_e32 v11, s4, v2 ; 0A160404
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_mac_f32_e32 v1, v11, v11 ; 3E02170B
v_readlane_b32 s4, v25, 8 ; 02091119
s_nop 2 ; BF800002
v_subrev_f32_e32 v11, s4, v3 ; 0A160604
v_mac_f32_e32 v1, v11, v11 ; 3E02170B
v_readlane_b32 s4, v25, 10 ; 02091519
s_nop 2 ; BF800002
v_cmp_gt_f32_e32 vcc, s4, v1 ; 7C080204
v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480
v_subrev_f32_e32 v11, v16, v1 ; 0A160310
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_mac_f32_e32 v13, v11, v7 ; 3E1A0F0B
v_mac_f32_e32 v10, v11, v8 ; 3E14110B
v_mac_f32_e32 v18, v11, v9 ; 3E24130B
v_readlane_b32 s4, v25, 3 ; 02090719
s_nop 2 ; BF800002
v_subrev_f32_e32 v7, s4, v0 ; 0A0E0004
v_readlane_b32 s4, v25, 5 ; 02090B19
s_nop 2 ; BF800002
v_subrev_f32_e32 v8, s4, v2 ; 0A100404
v_mul_f32_e32 v7, v7, v7 ; 100E0F07
v_mac_f32_e32 v7, v8, v8 ; 3E0E1108
v_readlane_b32 s4, v25, 4 ; 02090919
s_nop 2 ; BF800002
v_subrev_f32_e32 v8, s4, v3 ; 0A100604
s_buffer_load_dword s4, s[0:3], 0x7e ; C202017E
s_buffer_load_dword s5, s[0:3], 0x7f ; C202817F
v_mac_f32_e32 v7, v8, v8 ; 3E0E1108
v_readlane_b32 s6, v25, 6 ; 020D0D19
s_nop 2 ; BF800002
v_cmp_gt_f32_e32 vcc, s6, v7 ; 7C080E06
v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480
v_subrev_f32_e32 v1, v1, v7 ; 0A020F01
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mac_f32_e32 v13, v1, v4 ; 3E1A0901
v_mac_f32_e32 v10, v1, v5 ; 3E140B01
v_mac_f32_e32 v18, v1, v6 ; 3E240D01
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mad_f32 v1, v13, s4, 0.5 ; D2820001 03C0090D
v_mad_f32 v4, v10, s5, 0.5 ; D2820004 03C00B0A
v_floor_f32_e32 v5, v1 ; 7E0A4901
v_subrev_f32_e32 v1, v5, v1 ; 0A020305
v_floor_f32_e32 v6, v4 ; 7E0C4904
v_subrev_f32_e32 v4, v6, v4 ; 0A080906
s_buffer_load_dword s4, s[0:3], 0x7d ; C202017D
v_mov_b32_e32 v7, 0x40400000 ; 7E0E02FF 40400000
v_mad_f32 v8, -v1, v7, 4.0 ; D2820008 23DA0F01
v_mad_f32 v9, v1, v7, 1.0 ; D2820009 03CA0F01
v_mad_f32 v10, -2.0, v1, v7 ; D282000A 041E02F5
v_rcp_f32_e32 v11, v8 ; 7E165508
v_rcp_f32_e32 v12, v9 ; 7E185509
v_mad_f32 v13, v4, v7, 1.0 ; D282000D 03CA0F04
v_rcp_f32_e32 v14, v13 ; 7E1C550D
v_mad_f32 v10, v10, v11, -2.0 ; D282000A 03D6170A
v_mad_f32 v11, v1, v12, 2.0 ; D282000B 03D21901
v_mul_f32_e32 v19, s35, v11 ; 10261623
v_mad_f32 v11, v4, v14, 2.0 ; D282000B 03D21D04
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v20, s4, v11 ; 10281604
v_add_f32_e32 v5, -0.5, v5 ; 060A0AF1
v_add_f32_e32 v1, v7, v1 ; 06020307
v_mov_b32_e32 v11, 0x3e124925 ; 7E1602FF 3E124925
v_mul_f32_e32 v1, v11, v1 ; 1002030B
v_mul_f32_e32 v10, s35, v10 ; 10141423
v_mul_f32_e32 v1, s35, v1 ; 10020223
v_mac_f32_e32 v10, s35, v5 ; 3E140A23
v_mad_f32 v12, -v4, v7, 4.0 ; D282000C 23DA0F04
v_rcp_f32_e32 v14, v12 ; 7E1C550C
v_mac_f32_e32 v1, s35, v5 ; 3E020A23
v_mac_f32_e32 v19, s35, v5 ; 3E260A23
v_mad_f32 v5, -2.0, v4, v7 ; D2820005 041E08F5
v_mad_f32 v5, v5, v14, -2.0 ; D2820005 03D61D05
v_add_f32_e32 v6, -0.5, v6 ; 060C0CF1
v_mac_f32_e32 v20, s4, v6 ; 3E280C04
v_add_f32_e32 v4, v7, v4 ; 06080907
v_mov_b32_e32 v14, v18 ; 7E1C0312
v_mov_b32_e32 v15, v19 ; 7E1E0313
v_mov_b32_e32 v16, v20 ; 7E200314
v_mov_b32_e32 v17, v21 ; 7E220315
v_mov_b32_e32 v21, v18 ; 7E2A0312
v_mov_b32_e32 v22, v19 ; 7E2C0313
v_mov_b32_e32 v23, v20 ; 7E2E0314
v_mov_b32_e32 v24, v21 ; 7E300315
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mov_b32_e32 v15, v1 ; 7E1E0301
v_mov_b32_e32 v22, v10 ; 7E2C030A
v_mul_f32_e32 v1, s4, v5 ; 10020A04
v_mul_f32_e32 v4, s4, v4 ; 10080804
v_mac_f32_e32 v1, s4, v6 ; 3E020C04
v_mac_f32_e32 v4, s4, v6 ; 3E080C04
v_mov_b32_e32 v16, v20 ; 7E200314
v_mov_b32_e32 v23, v20 ; 7E2E0314
image_sample_c v5, 1, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[16:23], s[12:15] ; F0A00100 00640512
v_mov_b32_e32 v20, v4 ; 7E280304
image_sample_c v6, 1, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[16:23], s[12:15] ; F0A00100 0064060E
v_mov_b32_e32 v16, v4 ; 7E200304
image_sample_c v7, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0A00100 00640715
v_mov_b32_e32 v23, v4 ; 7E2E0304
image_sample_c v4, 1, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[16:23], s[12:15] ; F0A00100 00640412
v_mov_b32_e32 v20, v1 ; 7E280301
image_sample_c v10, 1, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[16:23], s[12:15] ; F0A00100 00640A0E
image_sample_c v11, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0A00100 00640B15
image_sample_c v18, 1, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[16:23], s[12:15] ; F0A00100 00641212
v_mov_b32_e32 v16, v1 ; 7E200301
image_sample_c v14, 1, 0, 0, 0, 0, 0, 0, 0, v[14:17], s[16:23], s[12:15] ; F0A00100 00640E0E
v_mov_b32_e32 v23, v1 ; 7E2E0301
image_sample_c v1, 1, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[16:23], s[12:15] ; F0A00100 00640115
s_buffer_load_dword s4, s[0:3], 0x60 ; C2020160
s_buffer_load_dword s5, s[0:3], 0x61 ; C2028161
s_buffer_load_dword s0, s[0:3], 0x62 ; C2000162
v_mul_f32_e32 v15, v12, v8 ; 101E110C
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v1, v1, v15 ; 10021F01
v_mov_b32_e32 v15, 0x40e00000 ; 7E1E02FF 40E00000
v_mul_f32_e32 v16, v15, v12 ; 1020190F
v_mac_f32_e32 v1, v14, v16 ; 3E02210E
v_mul_f32_e32 v12, v12, v9 ; 1018130C
v_mac_f32_e32 v1, v18, v12 ; 3E021912
v_mul_f32_e32 v12, v15, v8 ; 1018110F
v_mac_f32_e32 v1, v11, v12 ; 3E02190B
v_madmk_f32_e32 v1, v10, v1, 0x42440000 ; 4002030A 42440000
v_mul_f32_e32 v10, v15, v9 ; 1014130F
v_mac_f32_e32 v1, v4, v10 ; 3E021504
v_mul_f32_e32 v4, v13, v8 ; 1008110D
v_mac_f32_e32 v1, v7, v4 ; 3E020907
v_mul_f32_e32 v4, v15, v13 ; 10081B0F
v_mac_f32_e32 v1, v6, v4 ; 3E020906
v_mul_f32_e32 v4, v13, v9 ; 1008130D
v_mac_f32_e32 v1, v5, v4 ; 3E020905
v_mov_b32_e32 v4, 0x3be38e39 ; 7E0802FF 3BE38E39
v_mul_f32_e32 v5, v4, v1 ; 100A0304
v_mad_f32 v1, -v1, v4, 1.0 ; D2820001 23CA0901
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v0, s4, v0 ; 0A000004
v_subrev_f32_e32 v2, s5, v2 ; 0A040405
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v2, v2 ; 3E000502
v_subrev_f32_e32 v2, s0, v3 ; 0A040600
v_mac_f32_e32 v0, v2, v2 ; 3E000502
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_readlane_b32 s0, v25, 2 ; 02010519
s_nop 2 ; BF800002
v_mov_b32_e32 v2, s0 ; 7E040200
v_readlane_b32 s0, v25, 1 ; 02010319
s_nop 2 ; BF800002
v_mac_f32_e32 v2, s0, v0 ; 3E040000
v_add_f32_e64 v0, 0, v2 clamp ; D2060800 00020480
v_readlane_b32 s0, v25, 0 ; 02010119
s_nop 2 ; BF800002
v_mac_f32_e32 v0, s0, v1 ; 3E000200
v_mac_f32_e32 v0, 1.0, v5 ; 3E000AF2
v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 28
Code Size: 1792 bytes
LDS: 0 blocks
Scratch: 3072 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..9]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.5000, -1.0000, 1.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx
5: MOV TEMP[2].x, TEMP[1].xxxx
6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
7: MOV TEMP[2].y, TEMP[3].xxxx
8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
9: MOV TEMP[1].zw, TEMP[0].wwzw
10: MUL TEMP[2], CONST[6], IN[0].xxxx
11: MAD TEMP[2], CONST[7], IN[0].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[8], IN[0].zzzz, TEMP[2]
13: MAD TEMP[2].xyz, CONST[9], IN[0].wwww, TEMP[2]
14: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyzz
15: LRP TEMP[2].xyz, CONST[1].xxxx, IN[1].xyzz, TEMP[2].xyzz
16: MOV OUT[1], TEMP[1]
17: MOV OUT[2], TEMP[2]
18: MOV OUT[0], TEMP[0]
19: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%46 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = add i32 %5, %7
%49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48)
%50 = extractelement <4 x float> %49, i32 0
%51 = extractelement <4 x float> %49, i32 1
%52 = extractelement <4 x float> %49, i32 2
%53 = extractelement <4 x float> %49, i32 3
%54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = add i32 %5, %7
%57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56)
%58 = extractelement <4 x float> %57, i32 0
%59 = extractelement <4 x float> %57, i32 1
%60 = extractelement <4 x float> %57, i32 2
%61 = fmul float %15, %50
%62 = fmul float %16, %50
%63 = fmul float %17, %50
%64 = fmul float %18, %50
%65 = fmul float %19, %51
%66 = fadd float %65, %61
%67 = fmul float %20, %51
%68 = fadd float %67, %62
%69 = fmul float %21, %51
%70 = fadd float %69, %63
%71 = fmul float %22, %51
%72 = fadd float %71, %64
%73 = fmul float %23, %52
%74 = fadd float %73, %66
%75 = fmul float %24, %52
%76 = fadd float %75, %68
%77 = fmul float %25, %52
%78 = fadd float %77, %70
%79 = fmul float %26, %52
%80 = fadd float %79, %72
%81 = fmul float %27, %53
%82 = fadd float %81, %74
%83 = fmul float %28, %53
%84 = fadd float %83, %76
%85 = fmul float %29, %53
%86 = fadd float %85, %78
%87 = fmul float %30, %53
%88 = fadd float %87, %80
%89 = fmul float %82, 5.000000e-01
%90 = fmul float %84, 5.000000e-01
%91 = fmul float %88, 5.000000e-01
%92 = fmul float %90, %13
%93 = fadd float %89, %91
%94 = fadd float %92, %91
%95 = fmul float %31, %50
%96 = fmul float %32, %50
%97 = fmul float %33, %50
%98 = fmul float %34, %50
%99 = fmul float %35, %51
%100 = fadd float %99, %95
%101 = fmul float %36, %51
%102 = fadd float %101, %96
%103 = fmul float %37, %51
%104 = fadd float %103, %97
%105 = fmul float %38, %51
%106 = fadd float %105, %98
%107 = fmul float %39, %52
%108 = fadd float %107, %100
%109 = fmul float %40, %52
%110 = fadd float %109, %102
%111 = fmul float %41, %52
%112 = fadd float %111, %104
%113 = fmul float %42, %52
%114 = fadd float %113, %106
%115 = fmul float %43, %53
%116 = fadd float %115, %108
%117 = fmul float %44, %53
%118 = fadd float %117, %110
%119 = fmul float %45, %53
%120 = fadd float %119, %112
%121 = fsub float -0.000000e+00, %116
%122 = fsub float -0.000000e+00, %118
%123 = call float @llvm.AMDGPU.lrp(float %14, float %58, float %121)
%124 = call float @llvm.AMDGPU.lrp(float %14, float %59, float %122)
%125 = call float @llvm.AMDGPU.lrp(float %14, float %60, float %120)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %93, float %94, float %86, float %88)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %123, float %124, float %125, float %114)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %82, float %84, float %86, float %88)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109
s_buffer_load_dword s7, s[0:3], 0xd ; C203810D
s_buffer_load_dword s8, s[0:3], 0xa ; C204010A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
s_buffer_load_dword s4, s[0:3], 0xe ; C202010E
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mul_f32_e32 v8, s6, v1 ; 10100206
v_mac_f32_e32 v8, s7, v2 ; 3E100407
s_buffer_load_dword s5, s[0:3], 0xb ; C202810B
v_mul_f32_e32 v9, s8, v1 ; 10120208
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C
s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119
s_buffer_load_dword s10, s[0:3], 0x1d ; C205011D
s_buffer_load_dword s11, s[0:3], 0x1a ; C205811A
s_buffer_load_dword s12, s[0:3], 0x1e ; C206011E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v9, s4, v2 ; 3E120404
s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B
v_mul_f32_e32 v10, s5, v1 ; 10140205
v_mac_f32_e32 v10, s9, v2 ; 3E140409
v_mul_f32_e32 v11, s6, v1 ; 10160206
v_mac_f32_e32 v11, s7, v2 ; 3E160407
v_mul_f32_e32 v12, s8, v1 ; 10180208
v_mac_f32_e32 v12, s10, v2 ; 3E18040A
v_mul_f32_e32 v13, s11, v1 ; 101A020B
v_mac_f32_e32 v13, s12, v2 ; 3E1A040C
s_buffer_load_dword s5, s[0:3], 0x1f ; C202811F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s4, v1 ; 10020204
s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110
s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111
s_buffer_load_dword s7, s[0:3], 0x12 ; C2038112
s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113
s_buffer_load_dword s9, s[0:3], 0x20 ; C2048120
s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_buffer_load_dword s12, s[0:3], 0x23 ; C2060123
v_mac_f32_e32 v1, s5, v2 ; 3E020405
s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s4, v3 ; 3E000604
v_mac_f32_e32 v8, s6, v3 ; 3E100606
v_mac_f32_e32 v9, s7, v3 ; 3E120607
v_mac_f32_e32 v10, s8, v3 ; 3E140608
v_mac_f32_e32 v11, s9, v3 ; 3E160609
v_mac_f32_e32 v12, s10, v3 ; 3E18060A
s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115
s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116
s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117
s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124
s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125
s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126
v_mac_f32_e32 v13, s11, v3 ; 3E1A060B
s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104
v_mac_f32_e32 v1, s12, v3 ; 3E02060C
v_mac_f32_e32 v0, s5, v4 ; 3E000805
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v8, s4, v4 ; 3E100804
v_mac_f32_e32 v9, s6, v4 ; 3E120806
v_mac_f32_e32 v10, s7, v4 ; 3E140807
v_mac_f32_e32 v11, s8, v4 ; 3E160808
v_mac_f32_e32 v12, s9, v4 ; 3E180809
v_mac_f32_e32 v13, s10, v4 ; 3E1A080A
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
v_sub_f32_e64 v2, 1.0, s11 ; D2080002 000016F2
v_mul_f32_e32 v3, v11, v2 ; 1006050B
v_mul_f32_e32 v4, v12, v2 ; 1008050C
v_mul_f32_e32 v2, v13, v2 ; 1004050D
v_mad_f32 v3, s11, v5, -v3 ; D2820003 840E0A0B
v_mad_f32 v4, s11, v6, -v4 ; D2820004 84120C0B
v_mac_f32_e32 v2, s11, v7 ; 3E040E0B
v_mul_f32_e32 v5, 0.5, v8 ; 100A10F0
v_mul_f32_e32 v6, 0.5, v10 ; 100C14F0
v_mad_f32 v7, 0.5, v0, v6 ; D2820007 041A00F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v6, s0, v5 ; 3E0C0A00
exp 15, 32, 0, 0, 0, v7, v6, v9, v10 ; F800020F 0A090607
exp 15, 33, 0, 0, 0, v3, v4, v2, v1 ; F800021F 01020403
exp 15, 12, 0, 1, 0, v0, v8, v9, v10 ; F80008CF 0A090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Code Size: 408 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[7..13]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000}
IMM[1] FLT32 { 128.0000, 0.0000, 0.0000, 0.0000}
0: RCP TEMP[0].x, IN[0].wwww
1: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx
2: RCP TEMP[1].x, IN[1].zzzz
3: MUL TEMP[2].x, CONST[1].zzzz, TEMP[1].xxxx
4: MUL TEMP[2].xyz, IN[1].xyzz, TEMP[2].xxxx
5: MOV TEMP[3].xy, TEMP[0].xyyy
6: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D
7: MAD TEMP[3].x, CONST[2].xxxx, TEMP[3].xxxx, CONST[2].yyyy
8: RCP TEMP[3].x, TEMP[3].xxxx
9: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx
10: MUL TEMP[2], CONST[10], TEMP[1].xxxx
11: MAD TEMP[2], CONST[11], TEMP[1].yyyy, TEMP[2]
12: MAD TEMP[2], CONST[12], TEMP[1].zzzz, TEMP[2]
13: ADD TEMP[2].xyz, TEMP[2], CONST[13]
14: ADD TEMP[3].xyz, TEMP[2].xyzz, -CONST[4].xyzz
15: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz
16: SQRT TEMP[3].x, TEMP[3].xxxx
17: LRP TEMP[1].x, CONST[4].wwww, TEMP[3].xxxx, TEMP[1].zzzz
18: MOV TEMP[3].xyz, -CONST[7].xyzx
19: MOV TEMP[4].xy, TEMP[0].xyyy
20: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D
21: MAD TEMP[5].x, TEMP[1].xxxx, CONST[3].zzzz, CONST[3].wwww
22: MOV_SAT TEMP[5].x, TEMP[5].xxxx
23: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
24: MOV_SAT TEMP[4].x, TEMP[4].xxxx
25: MOV TEMP[0].xy, TEMP[0].xyyy
26: TEX TEMP[0], TEMP[0], SAMP[2], 2D
27: MAD TEMP[5].xyz, TEMP[0].xyzz, IMM[0].yyyy, IMM[0].zzzz
28: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz
29: RSQ TEMP[6].x, TEMP[6].xxxx
30: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx
31: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[5].xyzz
32: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
33: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx
34: MUL TEMP[6].xyz, CONST[8].xyzz, TEMP[6].xxxx
35: MUL TEMP[7].xyz, CONST[8].xyzz, CONST[5].xyzz
36: ADD TEMP[2].xyz, TEMP[2].xyzz, -CONST[0].xyzz
37: DP3 TEMP[8].x, TEMP[2].xyzz, TEMP[2].xyzz
38: RSQ TEMP[8].x, TEMP[8].xxxx
39: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[8].xxxx
40: ADD TEMP[2].xyz, TEMP[3].xyzz, -TEMP[2].xyzz
41: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
42: RSQ TEMP[3].x, TEMP[3].xxxx
43: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
44: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[5].xyzz
45: MAX TEMP[2].x, IMM[0].wwww, TEMP[2].xxxx
46: MUL TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx
47: POW TEMP[0].x, TEMP[2].xxxx, TEMP[0].xxxx
48: MOV_SAT TEMP[2].x, TEMP[4].xxxx
49: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
50: ADD TEMP[2].x, TEMP[7].xxxx, TEMP[7].zzzz
51: MUL TEMP[2].x, TEMP[7].yyyy, TEMP[2].xxxx
52: SQRT TEMP[2].x, TEMP[2].xxxx
53: MUL TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
54: ADD TEMP[3].x, TEMP[7].xxxx, TEMP[7].yyyy
55: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[7].zzzz
56: MAD TEMP[2].x, TEMP[2].xxxx, CONST[5].wwww, TEMP[3].xxxx
57: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
58: MOV TEMP[6].w, TEMP[0].xxxx
59: MAD TEMP[0].x, TEMP[1].xxxx, CONST[9].zzzz, CONST[9].wwww
60: ADD TEMP[0].x, IMM[0].xxxx, -TEMP[0].xxxx
61: MOV_SAT TEMP[0].x, TEMP[0].xxxx
62: MUL TEMP[0], TEMP[6], TEMP[0].xxxx
63: EX2 TEMP[1].x, -TEMP[0].xxxx
64: EX2 TEMP[1].y, -TEMP[0].yyyy
65: EX2 TEMP[1].z, -TEMP[0].zzzz
66: EX2 TEMP[1].w, -TEMP[0].wwww
67: MOV OUT[0], TEMP[1]
68: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0
%62 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0
%64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%65 = bitcast <8 x i32> addrspace(2)* %64 to <32 x i8> addrspace(2)*
%66 = load <32 x i8>, <32 x i8> addrspace(2)* %65, align 32, !tbaa !0
%67 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%68 = bitcast <4 x i32> addrspace(2)* %67 to <16 x i8> addrspace(2)*
%69 = load <16 x i8>, <16 x i8> addrspace(2)* %68, align 16, !tbaa !0
%70 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%71 = bitcast <8 x i32> addrspace(2)* %70 to <32 x i8> addrspace(2)*
%72 = load <32 x i8>, <32 x i8> addrspace(2)* %71, align 32, !tbaa !0
%73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%74 = bitcast <4 x i32> addrspace(2)* %73 to <16 x i8> addrspace(2)*
%75 = load <16 x i8>, <16 x i8> addrspace(2)* %74, align 16, !tbaa !0
%76 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%77 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%78 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%79 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%80 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%81 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%82 = fdiv float 1.000000e+00, %78
%83 = fmul float %76, %82
%84 = fmul float %77, %82
%85 = fdiv float 1.000000e+00, %81
%86 = fmul float %27, %85
%87 = fmul float %79, %86
%88 = fmul float %80, %86
%89 = fmul float %81, %86
%90 = bitcast float %83 to i32
%91 = bitcast float %84 to i32
%92 = insertelement <2 x i32> undef, i32 %90, i32 0
%93 = insertelement <2 x i32> %92, i32 %91, i32 1
%94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %61, <16 x i8> %63, i32 2)
%95 = extractelement <4 x float> %94, i32 0
%96 = fmul float %28, %95
%97 = fadd float %96, %29
%98 = fdiv float 1.000000e+00, %97
%99 = fmul float %87, %98
%100 = fmul float %88, %98
%101 = fmul float %89, %98
%102 = fmul float %48, %99
%103 = fmul float %49, %99
%104 = fmul float %50, %99
%105 = fmul float %51, %100
%106 = fadd float %105, %102
%107 = fmul float %52, %100
%108 = fadd float %107, %103
%109 = fmul float %53, %100
%110 = fadd float %109, %104
%111 = fmul float %54, %101
%112 = fadd float %111, %106
%113 = fmul float %55, %101
%114 = fadd float %113, %108
%115 = fmul float %56, %101
%116 = fadd float %115, %110
%117 = fadd float %112, %57
%118 = fadd float %114, %58
%119 = fadd float %116, %59
%120 = fsub float %117, %32
%121 = fsub float %118, %33
%122 = fsub float %119, %34
%123 = fmul float %120, %120
%124 = fmul float %121, %121
%125 = fadd float %124, %123
%126 = fmul float %122, %122
%127 = fadd float %125, %126
%128 = call float @llvm.sqrt.f32(float %127)
%129 = call float @llvm.AMDGPU.lrp(float %35, float %128, float %101)
%130 = bitcast float %83 to i32
%131 = bitcast float %84 to i32
%132 = insertelement <2 x i32> undef, i32 %130, i32 0
%133 = insertelement <2 x i32> %132, i32 %131, i32 1
%134 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %133, <32 x i8> %66, <16 x i8> %69, i32 2)
%135 = extractelement <4 x float> %134, i32 0
%136 = fmul float %129, %30
%137 = fadd float %136, %31
%138 = call float @llvm.AMDIL.clamp.(float %137, float 0.000000e+00, float 1.000000e+00)
%139 = fadd float %135, %138
%140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00)
%141 = bitcast float %83 to i32
%142 = bitcast float %84 to i32
%143 = insertelement <2 x i32> undef, i32 %141, i32 0
%144 = insertelement <2 x i32> %143, i32 %142, i32 1
%145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %144, <32 x i8> %72, <16 x i8> %75, i32 2)
%146 = extractelement <4 x float> %145, i32 0
%147 = extractelement <4 x float> %145, i32 1
%148 = extractelement <4 x float> %145, i32 2
%149 = extractelement <4 x float> %145, i32 3
%150 = fmul float %146, 2.000000e+00
%151 = fadd float %150, -1.000000e+00
%152 = fmul float %147, 2.000000e+00
%153 = fadd float %152, -1.000000e+00
%154 = fmul float %148, 2.000000e+00
%155 = fadd float %154, -1.000000e+00
%156 = fmul float %151, %151
%157 = fmul float %153, %153
%158 = fadd float %157, %156
%159 = fmul float %155, %155
%160 = fadd float %158, %159
%161 = call float @llvm.AMDGPU.rsq.clamped.f32(float %160)
%162 = fmul float %151, %161
%163 = fmul float %153, %161
%164 = fmul float %155, %161
%165 = fmul float %40, %162
%166 = fsub float -0.000000e+00, %165
%167 = fmul float %41, %163
%168 = fsub float %166, %167
%169 = fmul float %42, %164
%170 = fsub float %168, %169
%171 = call float @llvm.maxnum.f32(float %170, float 0.000000e+00)
%172 = fmul float %171, %140
%173 = fmul float %43, %172
%174 = fmul float %44, %172
%175 = fmul float %45, %172
%176 = fmul float %43, %36
%177 = fmul float %44, %37
%178 = fmul float %45, %38
%179 = fsub float %117, %24
%180 = fsub float %118, %25
%181 = fsub float %119, %26
%182 = fmul float %179, %179
%183 = fmul float %180, %180
%184 = fadd float %183, %182
%185 = fmul float %181, %181
%186 = fadd float %184, %185
%187 = call float @llvm.AMDGPU.rsq.clamped.f32(float %186)
%188 = fmul float %179, %187
%189 = fmul float %180, %187
%190 = fmul float %181, %187
%191 = fsub float -0.000000e+00, %188
%192 = fsub float %191, %40
%193 = fsub float -0.000000e+00, %189
%194 = fsub float %193, %41
%195 = fsub float -0.000000e+00, %190
%196 = fsub float %195, %42
%197 = fmul float %192, %192
%198 = fmul float %194, %194
%199 = fadd float %198, %197
%200 = fmul float %196, %196
%201 = fadd float %199, %200
%202 = call float @llvm.AMDGPU.rsq.clamped.f32(float %201)
%203 = fmul float %192, %202
%204 = fmul float %194, %202
%205 = fmul float %196, %202
%206 = fmul float %203, %162
%207 = fmul float %204, %163
%208 = fadd float %207, %206
%209 = fmul float %205, %164
%210 = fadd float %208, %209
%211 = call float @llvm.maxnum.f32(float %210, float 0.000000e+00)
%212 = fmul float %149, 1.280000e+02
%213 = call float @llvm.pow.f32(float %211, float %212)
%214 = call float @llvm.AMDIL.clamp.(float %140, float 0.000000e+00, float 1.000000e+00)
%215 = fmul float %213, %214
%216 = fadd float %176, %178
%217 = fmul float %177, %216
%218 = call float @llvm.sqrt.f32(float %217)
%219 = fmul float %218, 2.000000e+00
%220 = fadd float %176, %177
%221 = fadd float %220, %178
%222 = fmul float %219, %39
%223 = fadd float %222, %221
%224 = fmul float %215, %223
%225 = fmul float %129, %46
%226 = fadd float %225, %47
%227 = fsub float 1.000000e+00, %226
%228 = call float @llvm.AMDIL.clamp.(float %227, float 0.000000e+00, float 1.000000e+00)
%229 = fmul float %173, %228
%230 = fmul float %174, %228
%231 = fmul float %175, %228
%232 = fmul float %224, %228
%233 = fsub float -0.000000e+00, %229
%234 = call float @llvm.AMDIL.exp.(float %233)
%235 = fsub float -0.000000e+00, %230
%236 = call float @llvm.AMDIL.exp.(float %235)
%237 = fsub float -0.000000e+00, %231
%238 = call float @llvm.AMDIL.exp.(float %237)
%239 = fsub float -0.000000e+00, %232
%240 = call float @llvm.AMDIL.exp.(float %239)
%241 = call i32 @llvm.SI.packf16(float %234, float %236)
%242 = bitcast i32 %241 to float
%243 = call i32 @llvm.SI.packf16(float %238, float %240)
%244 = bitcast i32 %243 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %242, float %244, float %242, float %244)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504
s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_rcp_f32_e32 v4, v4 ; 7E085504
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v2, v4, v3 ; 10040704
image_sample v3, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[36:43], s[32:35] ; F0800100 01090301
image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[24:31], s[20:23] ; F0800100 00A60401
image_sample v[7:10], 15, 0, 0, 0, 0, 0, 0, 0, v[1:2], s[12:19], s[8:11] ; F0800F00 00430701
s_buffer_load_dword s6, s[0:3], 0xe ; C203010E
s_buffer_load_dword s7, s[0:3], 0xf ; C203810F
s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110
s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v1, s4 ; 7E020204
s_waitcnt vmcnt(2) ; BF8C0772
v_mac_f32_e32 v1, s5, v3 ; 3E020605
v_rcp_f32_e32 v1, v1 ; 7E025501
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106
s_buffer_load_dword s12, s[0:3], 0x12 ; C2060112
s_buffer_load_dword s13, s[0:3], 0x13 ; C2068113
s_buffer_load_dword s14, s[0:3], 0x14 ; C2070114
s_buffer_load_dword s15, s[0:3], 0x15 ; C2078115
s_buffer_load_dword s16, s[0:3], 0x16 ; C2080116
s_buffer_load_dword s17, s[0:3], 0x17 ; C2088117
s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C
s_buffer_load_dword s19, s[0:3], 0x1d ; C209811D
s_buffer_load_dword s20, s[0:3], 0x1e ; C20A011E
s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120
s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121
s_buffer_load_dword s23, s[0:3], 0x22 ; C20B8122
s_buffer_load_dword s24, s[0:3], 0x26 ; C20C0126
s_buffer_load_dword s25, s[0:3], 0x27 ; C20C8127
s_buffer_load_dword s26, s[0:3], 0x28 ; C20D0128
s_buffer_load_dword s27, s[0:3], 0x29 ; C20D8129
s_buffer_load_dword s28, s[0:3], 0x2a ; C20E012A
s_buffer_load_dword s29, s[0:3], 0x2c ; C20E812C
s_buffer_load_dword s30, s[0:3], 0x2d ; C20F012D
s_buffer_load_dword s31, s[0:3], 0x2e ; C20F812E
v_rcp_f32_e32 v2, v0 ; 7E045500
s_buffer_load_dword s32, s[0:3], 0x30 ; C2100130
s_buffer_load_dword s33, s[0:3], 0x31 ; C2108131
s_buffer_load_dword s34, s[0:3], 0x32 ; C2110132
s_buffer_load_dword s35, s[0:3], 0x34 ; C2118134
s_buffer_load_dword s36, s[0:3], 0x35 ; C2120135
s_buffer_load_dword s0, s[0:3], 0x36 ; C2000136
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v2, s11, v2 ; 1004040B
v_mul_f32_e32 v3, v2, v5 ; 10060B02
v_mul_f32_e32 v3, v1, v3 ; 10060701
v_mul_f32_e32 v5, s26, v3 ; 100A061A
v_mul_f32_e32 v11, s27, v3 ; 1016061B
v_mul_f32_e32 v3, s28, v3 ; 1006061C
v_mul_f32_e32 v6, v2, v6 ; 100C0D02
v_mul_f32_e32 v6, v1, v6 ; 100C0D01
v_mac_f32_e32 v5, s29, v6 ; 3E0A0C1D
v_mac_f32_e32 v11, s30, v6 ; 3E160C1E
v_mac_f32_e32 v3, s31, v6 ; 3E060C1F
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mac_f32_e32 v5, s32, v0 ; 3E0A0020
v_mac_f32_e32 v11, s33, v0 ; 3E160021
v_mac_f32_e32 v3, s34, v0 ; 3E060022
v_add_f32_e32 v1, s35, v5 ; 06020A23
v_add_f32_e32 v2, s36, v11 ; 06041624
v_add_f32_e32 v3, s0, v3 ; 06060600
v_subrev_f32_e32 v5, s8, v1 ; 0A0A0208
v_subrev_f32_e32 v6, s9, v2 ; 0A0C0409
v_mul_f32_e32 v5, v5, v5 ; 100A0B05
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_subrev_f32_e32 v6, s12, v3 ; 0A0C060C
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_sub_f32_e64 v6, 1.0, s13 ; D2080006 00001AF2
v_mul_f32_e32 v0, v0, v6 ; 10000D00
v_sqrt_f32_e32 v5, v5 ; 7E0A6705
v_mac_f32_e32 v0, s13, v5 ; 3E000A0D
v_subrev_f32_e32 v1, s4, v1 ; 0A020204
v_subrev_f32_e32 v2, s5, v2 ; 0A040405
v_subrev_f32_e32 v3, s10, v3 ; 0A06060A
v_mad_f32 v5, 2.0, v7, -1.0 ; D2820005 03CE0EF4
v_mad_f32 v6, 2.0, v8, -1.0 ; D2820006 03CE10F4
v_mul_f32_e32 v7, v5, v5 ; 100E0B05
v_mac_f32_e32 v7, v6, v6 ; 3E0E0D06
v_mul_f32_e32 v8, v1, v1 ; 10100301
v_mac_f32_e32 v8, v2, v2 ; 3E100502
v_mac_f32_e32 v8, v3, v3 ; 3E100703
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mad_f32 v9, 2.0, v9, -1.0 ; D2820009 03CE12F4
v_mac_f32_e32 v7, v9, v9 ; 3E0E1309
v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907
v_mad_f32 v1, -v1, v8, -s18 ; D2820001 A04A1101
v_mad_f32 v2, -v2, v8, -s19 ; D2820002 A04E1102
v_mad_f32 v3, -v3, v8, -s20 ; D2820003 A0521103
v_mul_f32_e32 v5, v7, v5 ; 100A0B07
v_mul_f32_e32 v8, v1, v1 ; 10100301
v_mac_f32_e32 v8, v2, v2 ; 3E100502
v_mac_f32_e32 v8, v3, v3 ; 3E100703
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v11, s18, v5 ; 10160A12
v_mul_f32_e32 v6, v7, v6 ; 100C0D07
v_mad_f32 v11, -s19, v6, -v11 ; D282000B A42E0C13
v_mul_f32_e32 v1, v8, v1 ; 10020308
v_mul_f32_e32 v1, v5, v1 ; 10020305
v_mul_f32_e32 v2, v8, v2 ; 10040508
v_mac_f32_e32 v1, v6, v2 ; 3E020506
v_mov_b32_e32 v2, s7 ; 7E040207
v_mac_f32_e32 v2, s6, v0 ; 3E040006
v_mul_f32_e32 v5, v7, v9 ; 100A1307
v_mad_f32 v6, -s20, v5, v11 ; D2820006 242E0A14
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_add_f32_e32 v2, v2, v4 ; 06040902
v_mov_b32_e32 v4, s16 ; 7E080210
v_mul_f32_e32 v4, s23, v4 ; 10080817
v_mov_b32_e32 v7, s14 ; 7E0E020E
v_mac_f32_e32 v4, s21, v7 ; 3E080E15
v_mov_b32_e32 v7, s14 ; 7E0E020E
v_mov_b32_e32 v9, s15 ; 7E12020F
v_mul_f32_e32 v9, s22, v9 ; 10121216
v_mul_f32_e32 v4, v4, v9 ; 10081304
v_mac_f32_e32 v9, s21, v7 ; 3E120E15
v_mov_b32_e32 v7, s16 ; 7E0E0210
v_mac_f32_e32 v9, s23, v7 ; 3E120E17
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_mul_f32_e32 v6, v2, v6 ; 100C0D02
v_mul_f32_e32 v7, s21, v6 ; 100E0C15
v_mul_f32_e32 v11, s22, v6 ; 10160C16
v_mul_f32_e32 v6, s23, v6 ; 100C0C17
v_mul_f32_e32 v3, v8, v3 ; 10060708
v_mac_f32_e32 v1, v5, v3 ; 3E020705
v_max_f32_e32 v1, 0, v1 ; 20020280
v_log_f32_e32 v1, v1 ; 7E024F01
v_sqrt_f32_e32 v3, v4 ; 7E066704
v_add_f32_e32 v3, v3, v3 ; 06060703
v_mac_f32_e32 v9, s17, v3 ; 3E120611
v_mov_b32_e32 v3, s25 ; 7E060219
v_mac_f32_e32 v3, s24, v0 ; 3E060018
v_mul_f32_e32 v0, 0x43000000, v10 ; 100014FF 43000000
v_mul_legacy_f32_e32 v0, v0, v1 ; 0E000300
v_exp_f32_e32 v0, v0 ; 7E004B00
v_add_f32_e64 v1, 0, v2 clamp ; D2060801 00020480
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_sub_f32_e32 v1, 1.0, v3 ; 080206F2
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v2, v1, v7 ; 10040F01
v_mul_f32_e32 v3, v1, v11 ; 10061701
v_mul_f32_e32 v4, v1, v6 ; 10080D01
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_exp_f32_e64 v1, -v2 ; D34A0001 20000102
v_exp_f32_e64 v2, -v3 ; D34A0002 20000103
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_exp_f32_e64 v2, -v4 ; D34A0002 20000104
v_exp_f32_e64 v0, -v0 ; D34A0000 20000100
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 12
Code Size: 812 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..17]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 0.5000, 1.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[14], IN[0].xxxx
1: MAD TEMP[0], CONST[15], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[16], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[17], IN[0].wwww, TEMP[0]
4: MUL TEMP[1].xyw, TEMP[0], IMM[0].xxxx
5: MOV TEMP[2].x, TEMP[1].xxxx
6: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
7: MOV TEMP[2].y, TEMP[3].xxxx
8: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
9: MOV TEMP[1].zw, TEMP[0].wwzw
10: MOV TEMP[2].x, CONST[8].xxxx
11: MOV TEMP[2].y, CONST[9].xxxx
12: MOV TEMP[2].z, CONST[10].xxxx
13: MOV TEMP[3].x, CONST[8].yyyy
14: MOV TEMP[3].y, CONST[9].yyyy
15: MOV TEMP[3].z, CONST[10].yyyy
16: MOV TEMP[4].x, CONST[8].zzzz
17: MOV TEMP[4].y, CONST[9].zzzz
18: MOV TEMP[4].z, CONST[10].zzzz
19: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx
20: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz
21: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz
22: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
23: RSQ TEMP[3].x, TEMP[3].xxxx
24: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
25: MOV TEMP[3].w, IMM[0].yyyy
26: MOV TEMP[3].xyz, TEMP[2].xyzx
27: DP4 TEMP[4].x, CONST[1], TEMP[3]
28: DP4 TEMP[5].x, CONST[2], TEMP[3]
29: MOV TEMP[4].y, TEMP[5].xxxx
30: DP4 TEMP[3].x, CONST[3], TEMP[3]
31: MOV TEMP[4].z, TEMP[3].xxxx
32: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx
33: DP4 TEMP[5].x, CONST[4], TEMP[3]
34: DP4 TEMP[6].x, CONST[5], TEMP[3]
35: MOV TEMP[5].y, TEMP[6].xxxx
36: DP4 TEMP[3].x, CONST[6], TEMP[3]
37: MOV TEMP[5].z, TEMP[3].xxxx
38: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy
39: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx
40: MAD TEMP[2].xyz, CONST[7].xyzz, TEMP[2].xxxx, TEMP[5].xyzz
41: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz
42: MAD TEMP[3].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
43: MOV TEMP[3].w, TEMP[2].xxxx
44: MOV TEMP[2].xy, TEMP[2].yzyy
45: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww
46: MOV TEMP[3].z, TEMP[4].xxxx
47: MOV OUT[2], TEMP[3]
48: MOV OUT[1], TEMP[1]
49: MOV OUT[0], TEMP[0]
50: MOV OUT[3], TEMP[2]
51: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%73 = load <16 x i8>, <16 x i8> addrspace(2)* %72, align 16, !tbaa !0
%74 = add i32 %5, %7
%75 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %73, i32 0, i32 %74)
%76 = extractelement <4 x float> %75, i32 0
%77 = extractelement <4 x float> %75, i32 1
%78 = extractelement <4 x float> %75, i32 2
%79 = extractelement <4 x float> %75, i32 3
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = add i32 %5, %7
%90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89)
%91 = extractelement <4 x float> %90, i32 0
%92 = extractelement <4 x float> %90, i32 1
%93 = fmul float %56, %76
%94 = fmul float %57, %76
%95 = fmul float %58, %76
%96 = fmul float %59, %76
%97 = fmul float %60, %77
%98 = fadd float %97, %93
%99 = fmul float %61, %77
%100 = fadd float %99, %94
%101 = fmul float %62, %77
%102 = fadd float %101, %95
%103 = fmul float %63, %77
%104 = fadd float %103, %96
%105 = fmul float %64, %78
%106 = fadd float %105, %98
%107 = fmul float %65, %78
%108 = fadd float %107, %100
%109 = fmul float %66, %78
%110 = fadd float %109, %102
%111 = fmul float %67, %78
%112 = fadd float %111, %104
%113 = fmul float %68, %79
%114 = fadd float %113, %106
%115 = fmul float %69, %79
%116 = fadd float %115, %108
%117 = fmul float %70, %79
%118 = fadd float %117, %110
%119 = fmul float %71, %79
%120 = fadd float %119, %112
%121 = fmul float %114, 5.000000e-01
%122 = fmul float %116, 5.000000e-01
%123 = fmul float %120, 5.000000e-01
%124 = fmul float %122, %13
%125 = fadd float %121, %123
%126 = fadd float %124, %123
%127 = fmul float %41, %84
%128 = fmul float %44, %84
%129 = fmul float %47, %84
%130 = fmul float %42, %85
%131 = fadd float %130, %127
%132 = fmul float %45, %85
%133 = fadd float %132, %128
%134 = fmul float %48, %85
%135 = fadd float %134, %129
%136 = fmul float %43, %86
%137 = fadd float %136, %131
%138 = fmul float %46, %86
%139 = fadd float %138, %133
%140 = fmul float %49, %86
%141 = fadd float %140, %135
%142 = fmul float %137, %137
%143 = fmul float %139, %139
%144 = fadd float %143, %142
%145 = fmul float %141, %141
%146 = fadd float %144, %145
%147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146)
%148 = fmul float %137, %147
%149 = fmul float %139, %147
%150 = fmul float %141, %147
%151 = fmul float %14, %148
%152 = fmul float %15, %149
%153 = fadd float %151, %152
%154 = fmul float %16, %150
%155 = fadd float %153, %154
%156 = fadd float %155, %17
%157 = fmul float %18, %148
%158 = fmul float %19, %149
%159 = fadd float %157, %158
%160 = fmul float %20, %150
%161 = fadd float %159, %160
%162 = fadd float %161, %21
%163 = fmul float %22, %148
%164 = fmul float %23, %149
%165 = fadd float %163, %164
%166 = fmul float %24, %150
%167 = fadd float %165, %166
%168 = fadd float %167, %25
%169 = fmul float %148, %149
%170 = fmul float %149, %150
%171 = fmul float %150, %150
%172 = fmul float %150, %148
%173 = fmul float %26, %169
%174 = fmul float %27, %170
%175 = fadd float %173, %174
%176 = fmul float %28, %171
%177 = fadd float %175, %176
%178 = fmul float %29, %172
%179 = fadd float %177, %178
%180 = fmul float %30, %169
%181 = fmul float %31, %170
%182 = fadd float %180, %181
%183 = fmul float %32, %171
%184 = fadd float %182, %183
%185 = fmul float %33, %172
%186 = fadd float %184, %185
%187 = fmul float %34, %169
%188 = fmul float %35, %170
%189 = fadd float %187, %188
%190 = fmul float %36, %171
%191 = fadd float %189, %190
%192 = fmul float %37, %172
%193 = fadd float %191, %192
%194 = fmul float %149, %149
%195 = fmul float %148, %148
%196 = fsub float %195, %194
%197 = fmul float %38, %196
%198 = fadd float %197, %179
%199 = fmul float %39, %196
%200 = fadd float %199, %186
%201 = fmul float %40, %196
%202 = fadd float %201, %193
%203 = fadd float %198, %156
%204 = fadd float %200, %162
%205 = fadd float %202, %168
%206 = fmul float %91, %52
%207 = fadd float %206, %54
%208 = fmul float %92, %53
%209 = fadd float %208, %55
%210 = fmul float %118, %50
%211 = fadd float %210, %51
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %125, float %126, float %118, float %120)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %207, float %209, float %211, float %203)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %204, float %205, float %205, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %116, float %118, float %120)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[12:15], 0x1c ; C2000D1C
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[16:19], 0 idxen ; E00C2000 80040500
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800
s_buffer_load_dword s1, s[12:15], 0x1d ; C2008D1D
s_buffer_load_dword s2, s[12:15], 0x1e ; C2010D1E
s_buffer_load_dword s5, s[12:15], 0x20 ; C2028D20
s_buffer_load_dword s6, s[12:15], 0x21 ; C2030D21
s_buffer_load_dword s7, s[12:15], 0x22 ; C2038D22
s_buffer_load_dword s8, s[12:15], 0x24 ; C2040D24
s_buffer_load_dword s9, s[12:15], 0x25 ; C2048D25
s_buffer_load_dword s10, s[12:15], 0x26 ; C2050D26
s_buffer_load_dword s11, s[12:15], 0x28 ; C2058D28
s_buffer_load_dword s16, s[12:15], 0x29 ; C2080D29
s_buffer_load_dword s17, s[12:15], 0x2a ; C2088D2A
s_buffer_load_dword s3, s[12:15], 0x32 ; C2018D32
s_buffer_load_dword s4, s[12:15], 0x33 ; C2020D33
s_buffer_load_dword s18, s[12:15], 0x34 ; C2090D34
s_buffer_load_dword s19, s[12:15], 0x35 ; C2098D35
s_buffer_load_dword s20, s[12:15], 0x36 ; C20A0D36
s_buffer_load_dword s21, s[12:15], 0x37 ; C20A8D37
s_buffer_load_dword s22, s[12:15], 0x38 ; C20B0D38
s_buffer_load_dword s23, s[12:15], 0x39 ; C20B8D39
s_buffer_load_dword s24, s[12:15], 0x3a ; C20C0D3A
s_buffer_load_dword s25, s[12:15], 0x3b ; C20C8D3B
s_buffer_load_dword s26, s[12:15], 0x3c ; C20D0D3C
s_buffer_load_dword s27, s[12:15], 0x3d ; C20D8D3D
s_buffer_load_dword s28, s[12:15], 0x3e ; C20E0D3E
s_buffer_load_dword s29, s[12:15], 0x3f ; C20E8D3F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s20 ; 7E000214
s_buffer_load_dword s20, s[12:15], 0x40 ; C20A0D40
v_mov_b32_e32 v10, s21 ; 7E140215
s_buffer_load_dword s21, s[12:15], 0x41 ; C20A8D41
s_buffer_load_dword s30, s[12:15], 0x42 ; C20F0D42
s_buffer_load_dword s31, s[12:15], 0x43 ; C20F8D43
s_buffer_load_dword s32, s[12:15], 0x44 ; C2100D44
s_buffer_load_dword s33, s[12:15], 0x45 ; C2108D45
s_buffer_load_dword s34, s[12:15], 0x46 ; C2110D46
s_buffer_load_dword s35, s[12:15], 0x47 ; C2118D47
v_mul_f32_e32 v11, s22, v1 ; 10160216
v_mul_f32_e32 v12, s5, v5 ; 10180A05
v_mac_f32_e32 v0, s18, v8 ; 3E001012
v_mac_f32_e32 v10, s19, v9 ; 3E141213
v_mac_f32_e32 v12, s6, v6 ; 3E180C06
v_mul_f32_e32 v8, s8, v5 ; 10100A08
v_mac_f32_e32 v8, s9, v6 ; 3E100C09
v_mul_f32_e32 v5, s11, v5 ; 100A0A0B
v_mac_f32_e32 v5, s16, v6 ; 3E0A0C10
v_mac_f32_e32 v12, s7, v7 ; 3E180E07
v_mac_f32_e32 v8, s10, v7 ; 3E100E0A
v_mac_f32_e32 v5, s17, v7 ; 3E0A0E11
v_mac_f32_e32 v11, s26, v2 ; 3E16041A
v_mul_f32_e32 v6, s23, v1 ; 100C0217
v_mac_f32_e32 v6, s27, v2 ; 3E0C041B
v_mul_f32_e32 v7, s24, v1 ; 100E0218
v_mac_f32_e32 v7, s28, v2 ; 3E0E041C
v_mul_f32_e32 v1, s25, v1 ; 10020219
v_mac_f32_e32 v1, s29, v2 ; 3E02041D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v11, s20, v3 ; 3E160614
v_mac_f32_e32 v6, s21, v3 ; 3E0C0615
v_mac_f32_e32 v7, s30, v3 ; 3E0E061E
v_mac_f32_e32 v1, s31, v3 ; 3E02061F
v_mac_f32_e32 v11, s32, v4 ; 3E160820
v_mac_f32_e32 v6, s33, v4 ; 3E0C0821
v_mac_f32_e32 v7, s34, v4 ; 3E0E0822
v_mac_f32_e32 v1, s35, v4 ; 3E020823
s_buffer_load_dword s5, s[12:15], 0x0 ; C2028D00
s_buffer_load_dword s6, s[12:15], 0x4 ; C2030D04
s_buffer_load_dword s7, s[12:15], 0x5 ; C2038D05
s_buffer_load_dword s8, s[12:15], 0x6 ; C2040D06
s_buffer_load_dword s9, s[12:15], 0x7 ; C2048D07
s_buffer_load_dword s10, s[12:15], 0x8 ; C2050D08
s_buffer_load_dword s11, s[12:15], 0x9 ; C2058D09
s_buffer_load_dword s16, s[12:15], 0xa ; C2080D0A
s_buffer_load_dword s17, s[12:15], 0xb ; C2088D0B
s_buffer_load_dword s18, s[12:15], 0xc ; C2090D0C
s_buffer_load_dword s19, s[12:15], 0xd ; C2098D0D
s_buffer_load_dword s20, s[12:15], 0xe ; C20A0D0E
s_buffer_load_dword s21, s[12:15], 0xf ; C20A8D0F
s_buffer_load_dword s22, s[12:15], 0x10 ; C20B0D10
s_buffer_load_dword s23, s[12:15], 0x11 ; C20B8D11
s_buffer_load_dword s24, s[12:15], 0x12 ; C20C0D12
s_buffer_load_dword s25, s[12:15], 0x13 ; C20C8D13
s_buffer_load_dword s26, s[12:15], 0x14 ; C20D0D14
s_buffer_load_dword s27, s[12:15], 0x15 ; C20D8D15
s_buffer_load_dword s28, s[12:15], 0x16 ; C20E0D16
s_buffer_load_dword s29, s[12:15], 0x17 ; C20E8D17
s_buffer_load_dword s30, s[12:15], 0x18 ; C20F0D18
s_buffer_load_dword s31, s[12:15], 0x19 ; C20F8D19
s_buffer_load_dword s32, s[12:15], 0x1a ; C2100D1A
s_buffer_load_dword s12, s[12:15], 0x1b ; C2060D1B
v_mul_f32_e32 v2, v12, v12 ; 1004190C
v_mac_f32_e32 v2, v8, v8 ; 3E041108
v_mac_f32_e32 v2, v5, v5 ; 3E040B05
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
v_mul_f32_e32 v3, 0.5, v6 ; 10060CF0
v_mul_f32_e32 v4, 0.5, v1 ; 100802F0
v_mad_f32 v9, 0.5, v11, v4 ; D2820009 041216F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v4, s5, v3 ; 3E080605
exp 15, 32, 0, 0, 0, v9, v4, v7, v1 ; F800020F 01070409
v_mul_f32_e32 v3, v2, v8 ; 10061102
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v4, v2, v5 ; 10080B02
v_mul_f32_e32 v5, v4, v3 ; 100A0704
v_mul_f32_e32 v8, s23, v5 ; 10100A17
v_mul_f32_e32 v9, s27, v5 ; 10120A1B
v_mul_f32_e32 v5, s31, v5 ; 100A0A1F
v_mul_f32_e32 v2, v2, v12 ; 10041902
v_mul_f32_e32 v12, v3, v2 ; 10180503
v_mac_f32_e32 v8, s22, v12 ; 3E101816
v_mac_f32_e32 v9, s26, v12 ; 3E12181A
v_mac_f32_e32 v5, s30, v12 ; 3E0A181E
v_mul_f32_e32 v12, v4, v4 ; 10180904
v_mac_f32_e32 v8, s24, v12 ; 3E101818
v_mac_f32_e32 v9, s28, v12 ; 3E12181C
v_mac_f32_e32 v5, s32, v12 ; 3E0A1820
v_mul_f32_e32 v12, s7, v3 ; 10180607
v_mac_f32_e32 v12, s6, v2 ; 3E180406
v_mul_f32_e32 v13, s11, v3 ; 101A060B
v_mac_f32_e32 v13, s10, v2 ; 3E1A040A
v_mul_f32_e32 v14, s19, v3 ; 101C0613
v_mac_f32_e32 v14, s18, v2 ; 3E1C0412
v_mac_f32_e32 v12, s8, v4 ; 3E180808
v_mac_f32_e32 v13, s16, v4 ; 3E1A0810
v_mac_f32_e32 v14, s20, v4 ; 3E1C0814
v_mul_f32_e32 v4, v2, v4 ; 10080902
v_mac_f32_e32 v8, s25, v4 ; 3E100819
v_mac_f32_e32 v9, s29, v4 ; 3E12081D
v_mac_f32_e32 v5, s12, v4 ; 3E0A080C
v_mov_b32_e32 v4, s4 ; 7E080204
v_mac_f32_e32 v4, s3, v7 ; 3E080E03
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mad_f32 v2, v2, v2, -v3 ; D2820002 840E0502
v_add_f32_e32 v3, s9, v12 ; 06061809
v_mac_f32_e32 v8, s0, v2 ; 3E100400
v_mac_f32_e32 v9, s1, v2 ; 3E120401
v_mac_f32_e32 v5, s2, v2 ; 3E0A0402
v_add_f32_e32 v2, v3, v8 ; 06041103
exp 15, 33, 0, 0, 0, v0, v10, v4, v2 ; F800021F 02040A00
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, s17, v13 ; 06001A11
v_add_f32_e32 v2, s21, v14 ; 06041C15
v_add_f32_e32 v0, v0, v9 ; 06001300
v_add_f32_e32 v2, v2, v5 ; 06040B02
v_mov_b32_e32 v3, 0 ; 7E060280
exp 15, 34, 0, 0, 0, v0, v2, v2, v3 ; F800022F 03020200
exp 15, 12, 0, 1, 0, v11, v6, v7, v1 ; F80008CF 0107060B
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 668 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[2]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].wwww
1: MOV TEMP[0].yz, IN[2].yxyy
2: MOV TEMP[1].xy, IN[1].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MUL TEMP[1].xyz, TEMP[1], CONST[2]
5: MOV TEMP[2].xy, IN[0].xyyy
6: MOV TEMP[2].w, IN[0].wwww
7: TXP TEMP[2], TEMP[2], SAMP[1], 2D
8: LG2 TEMP[3].x, TEMP[2].xxxx
9: LG2 TEMP[3].y, TEMP[2].yyyy
10: LG2 TEMP[3].z, TEMP[2].zzzz
11: LG2 TEMP[3].w, TEMP[2].wwww
12: MOV TEMP[2].xyz, -TEMP[3]
13: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz
14: MUL TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz
15: MOV_SAT TEMP[1].x, IN[1].zzzz
16: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[0].xyzz
17: MOV TEMP[0].w, IMM[0].xxxx
18: MOV OUT[0], TEMP[0]
19: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0
%32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
%34 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%35 = bitcast <8 x i32> addrspace(2)* %34 to <32 x i8> addrspace(2)*
%36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0
%37 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%38 = bitcast <4 x i32> addrspace(2)* %37 to <16 x i8> addrspace(2)*
%39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0
%40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%48 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%49 = bitcast float %43 to i32
%50 = bitcast float %44 to i32
%51 = insertelement <2 x i32> undef, i32 %49, i32 0
%52 = insertelement <2 x i32> %51, i32 %50, i32 1
%53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %31, <16 x i8> %33, i32 2)
%54 = extractelement <4 x float> %53, i32 0
%55 = extractelement <4 x float> %53, i32 1
%56 = extractelement <4 x float> %53, i32 2
%57 = fmul float %54, %27
%58 = fmul float %55, %28
%59 = fmul float %56, %29
%60 = fdiv float %40, %42
%61 = fdiv float %41, %42
%62 = bitcast float %60 to i32
%63 = bitcast float %61 to i32
%64 = insertelement <2 x i32> undef, i32 %62, i32 0
%65 = insertelement <2 x i32> %64, i32 %63, i32 1
%66 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %65, <32 x i8> %36, <16 x i8> %39, i32 2)
%67 = extractelement <4 x float> %66, i32 0
%68 = extractelement <4 x float> %66, i32 1
%69 = extractelement <4 x float> %66, i32 2
%70 = call float @llvm.log2.f32(float %67)
%71 = call float @llvm.log2.f32(float %68)
%72 = call float @llvm.log2.f32(float %69)
%73 = fsub float %46, %70
%74 = fsub float %47, %71
%75 = fsub float %48, %72
%76 = fmul float %57, %73
%77 = fmul float %58, %74
%78 = fmul float %59, %75
%79 = call float @llvm.AMDIL.clamp.(float %45, float 0.000000e+00, float 1.000000e+00)
%80 = call float @llvm.AMDGPU.lrp(float %79, float %76, float %24)
%81 = call float @llvm.AMDGPU.lrp(float %79, float %77, float %25)
%82 = call float @llvm.AMDGPU.lrp(float %79, float %78, float %26)
%83 = call i32 @llvm.SI.packf16(float %80, float %81)
%84 = bitcast i32 %83 to float
%85 = call i32 @llvm.SI.packf16(float %82, float 1.000000e+00)
%86 = bitcast i32 %85 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %84, float %86, float %84, float %86)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_mov_b32_e32 v5, 0x6f800000 ; 7E0A02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v4|, v5 ; D008016A 00020B04
v_mov_b32_e32 v5, 0x2f800000 ; 7E0A02FF 2F800000
v_cndmask_b32_e32 v5, 1.0, v5 ; 000A0AF2
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900
v_mul_f32_e32 v4, v5, v4 ; 10080905
v_rcp_f32_e32 v4, v4 ; 7E085504
s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[8:11] ; F0800700 00430B06
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v2, v4, v3 ; 10040704
v_mul_f32_e32 v3, v1, v5 ; 10060B01
v_mul_f32_e32 v4, v2, v5 ; 10080B02
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
image_sample v[1:3], 7, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[20:23] ; F0800700 00A60103
s_buffer_load_dword s7, s[0:3], 0x0 ; C2038100
s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101
s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_log_f32_e32 v1, v1 ; 7E024F01
v_log_f32_e32 v2, v2 ; 7E044F02
v_log_f32_e32 v3, v3 ; 7E064F03
v_mul_f32_e32 v4, s4, v11 ; 10081604
v_mul_f32_e32 v5, s5, v12 ; 100A1805
v_mul_f32_e32 v6, s6, v13 ; 100C1A06
v_subrev_f32_e32 v1, v1, v9 ; 0A021301
v_subrev_f32_e32 v2, v2, v10 ; 0A041502
v_subrev_f32_e32 v0, v3, v0 ; 0A000103
v_mul_f32_e32 v1, v1, v4 ; 10020901
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_mul_f32_e32 v0, v0, v6 ; 10000D00
v_add_f32_e64 v3, 0, v8 clamp ; D2060803 00021080
v_sub_f32_e32 v4, 1.0, v3 ; 080806F2
v_mul_f32_e32 v5, s7, v4 ; 100A0807
v_mac_f32_e32 v5, v1, v3 ; 3E0A0701
v_mul_f32_e32 v1, s8, v4 ; 10020808
v_mac_f32_e32 v1, v2, v3 ; 3E020702
v_mul_f32_e32 v2, s0, v4 ; 10040800
v_mac_f32_e32 v2, v0, v3 ; 3E040700
v_cvt_pkrtz_f16_f32_e32 v0, v5, v1 ; 5E000305
v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 308 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..15]
DCL CONST[17..20]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999}
IMM[1] INT32 {256, 0, 1, 2}
IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039}
IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000}
IMM[4] INT32 {4, 0, 0, 0}
IMM[5] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx
1: F2I TEMP[0].x, TEMP[0].xxxx
2: F2I TEMP[1].x, IN[2].yyyy
3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx
4: I2F TEMP[3].x, TEMP[0].xxxx
5: I2F TEMP[4].x, TEMP[2].xxxx
6: MOV TEMP[3].y, TEMP[4].xxxx
7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
8: INEG TEMP[2].x, TEMP[2].xxxx
9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx
10: I2F TEMP[2].x, TEMP[2].xxxx
11: MOV TEMP[3].z, TEMP[2].xxxx
12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy
13: I2F TEMP[1].x, TEMP[1].xxxx
14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx
15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww
16: F2I TEMP[1].x, TEMP[1].xxxx
17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy
18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx
19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz
20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx
21: MOV TEMP[4].y, TEMP[5].xxxx
22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww
23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx
24: MOV TEMP[4].z, TEMP[1].xxxx
25: MOV TEMP[1].xyz, TEMP[4].xyzx
26: MOV TEMP[4].w, IMM[2].yyyy
27: MOV TEMP[4].xyz, TEMP[3].xyzx
28: MOV TEMP[3].y, IMM[2].yzyy
29: DP4 TEMP[4].x, TEMP[1], TEMP[4]
30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww
31: MOV TEMP[3].xy, TEMP[3].xyyy
32: MOV TEMP[3].w, IMM[2].yyyy
33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D
34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy
35: MOV TEMP[2].w, TEMP[4].xxxx
36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz
37: MOV TEMP[1].w, TEMP[3].xxxx
38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx
39: I2F TEMP[0].x, TEMP[0].xxxx
40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx
41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww
42: MUL TEMP[3], CONST[17], IN[0].xxxx
43: MAD TEMP[3], CONST[18], IN[0].yyyy, TEMP[3]
44: MAD TEMP[3], CONST[19], IN[0].zzzz, TEMP[3]
45: MAD TEMP[3], CONST[20], IN[0].wwww, TEMP[3]
46: MOV TEMP[4].x, CONST[12].xxxx
47: MOV TEMP[4].y, CONST[13].xxxx
48: MOV TEMP[4].z, CONST[14].xxxx
49: MOV TEMP[5].x, CONST[12].yyyy
50: MOV TEMP[5].y, CONST[13].yyyy
51: MOV TEMP[5].z, CONST[14].yyyy
52: MOV TEMP[6].x, CONST[12].zzzz
53: MOV TEMP[6].y, CONST[13].zzzz
54: MOV TEMP[6].z, CONST[14].zzzz
55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx
56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz
57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz
58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
59: RSQ TEMP[4].x, TEMP[4].xxxx
60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
61: MUL TEMP[4].xyw, TEMP[3], IMM[5].xxxx
62: MOV TEMP[5].x, TEMP[4].xxxx
63: MUL TEMP[6].x, TEMP[4].yyyy, CONST[0].xxxx
64: MOV TEMP[5].y, TEMP[6].xxxx
65: ADD TEMP[4].xy, TEMP[5].xyyy, TEMP[4].wwww
66: MOV TEMP[4].zw, TEMP[3].wwzw
67: MOV TEMP[5].w, IMM[2].xxxx
68: MOV TEMP[5].xyz, TEMP[0].xyzx
69: DP4 TEMP[6].x, CONST[1], TEMP[5]
70: DP4 TEMP[7].x, CONST[2], TEMP[5]
71: MOV TEMP[6].y, TEMP[7].xxxx
72: DP4 TEMP[5].x, CONST[3], TEMP[5]
73: MOV TEMP[6].z, TEMP[5].xxxx
74: MUL TEMP[5], TEMP[0].xyzz, TEMP[0].yzzx
75: DP4 TEMP[7].x, CONST[4], TEMP[5]
76: DP4 TEMP[8].x, CONST[5], TEMP[5]
77: MOV TEMP[7].y, TEMP[8].xxxx
78: DP4 TEMP[5].x, CONST[6], TEMP[5]
79: MOV TEMP[7].z, TEMP[5].xxxx
80: MUL TEMP[5].x, TEMP[0].yyyy, TEMP[0].yyyy
81: MAD TEMP[5].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[5].xxxx
82: MAD TEMP[5].xyz, CONST[7].xyzz, TEMP[5].xxxx, TEMP[7].xyzz
83: ADD TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xyzz
84: MOV TEMP[0].yzw, TEMP[0].yxyz
85: MUL TEMP[6], CONST[8], IN[0].xxxx
86: MAD TEMP[6], CONST[9], IN[0].yyyy, TEMP[6]
87: MAD TEMP[6], CONST[10], IN[0].zzzz, TEMP[6]
88: MAD TEMP[6].xyz, CONST[11], IN[0].wwww, TEMP[6]
89: MOV TEMP[6].xyz, TEMP[6].xyzx
90: MOV TEMP[6].w, TEMP[5].xxxx
91: MOV TEMP[5].xy, TEMP[5].yzyy
92: MOV TEMP[0].x, TEMP[3].zzzz
93: MOV OUT[5], TEMP[6]
94: MOV OUT[1], TEMP[2]
95: MOV OUT[2], TEMP[1]
96: MOV OUT[3], TEMP[4]
97: MOV OUT[4], TEMP[0]
98: MOV OUT[0], TEMP[3]
99: MOV OUT[6], TEMP[5]
100: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%78 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0
%80 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0
%84 = add i32 %5, %7
%85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84)
%86 = extractelement <4 x float> %85, i32 0
%87 = extractelement <4 x float> %85, i32 1
%88 = extractelement <4 x float> %85, i32 2
%89 = extractelement <4 x float> %85, i32 3
%90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = add i32 %5, %7
%93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92)
%94 = extractelement <4 x float> %93, i32 0
%95 = extractelement <4 x float> %93, i32 1
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = fmul float %100, 2.500000e-01
%103 = fptosi float %102 to i32
%104 = fptosi float %101 to i32
%105 = sdiv i32 %104, 256
%106 = sitofp i32 %103 to float
%107 = sitofp i32 %105 to float
%108 = shl nsw i32 %105, 8
%109 = sub i32 %104, %108
%110 = sitofp i32 %109 to float
%111 = fadd float %106, -1.000000e+00
%112 = fadd float %107, -1.000000e+00
%113 = fadd float %110, -1.000000e+00
%114 = sitofp i32 %104 to float
%115 = fsub float %101, %114
%116 = fmul float %115, 1.000000e+01
%117 = fadd float %116, 0x3FDFFE5CA0000000
%118 = fptosi float %117 to i32
%119 = icmp eq i32 %118, 0
%120 = select i1 %119, float 1.000000e+00, float 0.000000e+00
%121 = icmp eq i32 %118, 1
%122 = select i1 %121, float 1.000000e+00, float 0.000000e+00
%123 = icmp eq i32 %118, 2
%124 = select i1 %123, float 1.000000e+00, float 0.000000e+00
%125 = fmul float %120, %106
%126 = fmul float %122, %107
%127 = fadd float %125, %126
%128 = fmul float %124, %110
%129 = fadd float %127, %128
%130 = fadd float %129, 0.000000e+00
%131 = fmul float %130, 0x3F70101020000000
%132 = bitcast float %131 to i32
%133 = insertelement <4 x i32> undef, i32 %132, i32 0
%134 = insertelement <4 x i32> %133, i32 1036831949, i32 1
%135 = insertelement <4 x i32> %134, i32 0, i32 2
%136 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %135, <32 x i8> %79, <16 x i8> %81, i32 2)
%137 = extractelement <4 x float> %136, i32 0
%138 = extractelement <4 x float> %136, i32 1
%139 = fmul float %137, 1.600000e+01
%140 = fadd float %139, -8.000000e+00
%141 = fmul float %138, 4.000000e+00
%142 = shl i32 %103, 2
%143 = sitofp i32 %142 to float
%144 = fsub float %100, %143
%145 = fadd float %144, -2.000000e+00
%146 = fmul float %62, %86
%147 = fmul float %63, %86
%148 = fmul float %64, %86
%149 = fmul float %65, %86
%150 = fmul float %66, %87
%151 = fadd float %150, %146
%152 = fmul float %67, %87
%153 = fadd float %152, %147
%154 = fmul float %68, %87
%155 = fadd float %154, %148
%156 = fmul float %69, %87
%157 = fadd float %156, %149
%158 = fmul float %70, %88
%159 = fadd float %158, %151
%160 = fmul float %71, %88
%161 = fadd float %160, %153
%162 = fmul float %72, %88
%163 = fadd float %162, %155
%164 = fmul float %73, %88
%165 = fadd float %164, %157
%166 = fmul float %74, %89
%167 = fadd float %166, %159
%168 = fmul float %75, %89
%169 = fadd float %168, %161
%170 = fmul float %76, %89
%171 = fadd float %170, %163
%172 = fmul float %77, %89
%173 = fadd float %172, %165
%174 = fmul float %53, %94
%175 = fmul float %56, %94
%176 = fmul float %59, %94
%177 = fmul float %54, %95
%178 = fadd float %177, %174
%179 = fmul float %57, %95
%180 = fadd float %179, %175
%181 = fmul float %60, %95
%182 = fadd float %181, %176
%183 = fmul float %55, %145
%184 = fadd float %183, %178
%185 = fmul float %58, %145
%186 = fadd float %185, %180
%187 = fmul float %61, %145
%188 = fadd float %187, %182
%189 = fmul float %184, %184
%190 = fmul float %186, %186
%191 = fadd float %190, %189
%192 = fmul float %188, %188
%193 = fadd float %191, %192
%194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193)
%195 = fmul float %184, %194
%196 = fmul float %186, %194
%197 = fmul float %188, %194
%198 = fmul float %167, 5.000000e-01
%199 = fmul float %169, 5.000000e-01
%200 = fmul float %173, 5.000000e-01
%201 = fmul float %199, %13
%202 = fadd float %198, %200
%203 = fadd float %201, %200
%204 = fmul float %14, %195
%205 = fmul float %15, %196
%206 = fadd float %204, %205
%207 = fmul float %16, %197
%208 = fadd float %206, %207
%209 = fadd float %208, %17
%210 = fmul float %18, %195
%211 = fmul float %19, %196
%212 = fadd float %210, %211
%213 = fmul float %20, %197
%214 = fadd float %212, %213
%215 = fadd float %214, %21
%216 = fmul float %22, %195
%217 = fmul float %23, %196
%218 = fadd float %216, %217
%219 = fmul float %24, %197
%220 = fadd float %218, %219
%221 = fadd float %220, %25
%222 = fmul float %195, %196
%223 = fmul float %196, %197
%224 = fmul float %197, %197
%225 = fmul float %197, %195
%226 = fmul float %26, %222
%227 = fmul float %27, %223
%228 = fadd float %226, %227
%229 = fmul float %28, %224
%230 = fadd float %228, %229
%231 = fmul float %29, %225
%232 = fadd float %230, %231
%233 = fmul float %30, %222
%234 = fmul float %31, %223
%235 = fadd float %233, %234
%236 = fmul float %32, %224
%237 = fadd float %235, %236
%238 = fmul float %33, %225
%239 = fadd float %237, %238
%240 = fmul float %34, %222
%241 = fmul float %35, %223
%242 = fadd float %240, %241
%243 = fmul float %36, %224
%244 = fadd float %242, %243
%245 = fmul float %37, %225
%246 = fadd float %244, %245
%247 = fmul float %196, %196
%248 = fmul float %195, %195
%249 = fsub float %248, %247
%250 = fmul float %38, %249
%251 = fadd float %250, %232
%252 = fmul float %39, %249
%253 = fadd float %252, %239
%254 = fmul float %40, %249
%255 = fadd float %254, %246
%256 = fadd float %251, %209
%257 = fadd float %253, %215
%258 = fadd float %255, %221
%259 = fmul float %41, %86
%260 = fmul float %42, %86
%261 = fmul float %43, %86
%262 = fmul float %44, %87
%263 = fadd float %262, %259
%264 = fmul float %45, %87
%265 = fadd float %264, %260
%266 = fmul float %46, %87
%267 = fadd float %266, %261
%268 = fmul float %47, %88
%269 = fadd float %268, %263
%270 = fmul float %48, %88
%271 = fadd float %270, %265
%272 = fmul float %49, %88
%273 = fadd float %272, %267
%274 = fmul float %50, %89
%275 = fadd float %274, %269
%276 = fmul float %51, %89
%277 = fadd float %276, %271
%278 = fmul float %52, %89
%279 = fadd float %278, %273
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %111, float %112, float %113, float %140)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %120, float %122, float %124, float %141)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %202, float %203, float %171, float %173)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %171, float %195, float %196, float %197)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %275, float %277, float %279, float %256)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %257, float %258, float %258, float %225)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %167, float %169, float %171, float %173)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5
v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000
v_mov_b32_e32 v5, 0 ; 7E0A0280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[72:75], s[2:3], 0x0 ; C0A40300
s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s70, s[72:75], 0x30 ; C2234930
s_buffer_load_dword s48, s[72:75], 0x31 ; C2184931
buffer_load_format_xyzw v[6:9], v0, s[0:3], 0 idxen ; E00C2000 80000600
buffer_load_format_xyzw v[10:13], v0, s[12:15], 0 idxen ; E00C2000 80030A00
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00
s_buffer_load_dword s14, s[72:75], 0x32 ; C2074932
s_buffer_load_dword s71, s[72:75], 0x34 ; C223C934
s_buffer_load_dword s52, s[72:75], 0x35 ; C21A4935
s_buffer_load_dword s16, s[72:75], 0x36 ; C2084936
s_buffer_load_dword s76, s[72:75], 0x38 ; C2264938
s_buffer_load_dword s53, s[72:75], 0x39 ; C21AC939
s_buffer_load_dword s19, s[72:75], 0x3a ; C209C93A
s_buffer_load_dword s77, s[72:75], 0x44 ; C226C944
s_buffer_load_dword s78, s[72:75], 0x45 ; C2274945
s_buffer_load_dword s79, s[72:75], 0x46 ; C227C946
s_buffer_load_dword s80, s[72:75], 0x47 ; C2284947
s_buffer_load_dword s56, s[72:75], 0x48 ; C21C4948
s_buffer_load_dword s57, s[72:75], 0x49 ; C21CC949
s_buffer_load_dword s54, s[72:75], 0x4a ; C21B494A
s_buffer_load_dword s55, s[72:75], 0x4b ; C21BC94B
s_buffer_load_dword s49, s[72:75], 0x4c ; C218C94C
s_buffer_load_dword s50, s[72:75], 0x4d ; C219494D
s_buffer_load_dword s47, s[72:75], 0x4e ; C217C94E
s_buffer_load_dword s2, s[72:75], 0xd ; C201490D
s_buffer_load_dword s1, s[72:75], 0xe ; C200C90E
s_buffer_load_dword s0, s[72:75], 0xf ; C200490F
s_buffer_load_dword s12, s[72:75], 0x10 ; C2064910
s_buffer_load_dword s17, s[72:75], 0x11 ; C208C911
s_buffer_load_dword s51, s[72:75], 0x4f ; C219C94F
s_buffer_load_dword s46, s[72:75], 0x50 ; C2174950
s_buffer_load_dword s45, s[72:75], 0x51 ; C216C951
s_buffer_load_dword s44, s[72:75], 0x52 ; C2164952
s_buffer_load_dword s43, s[72:75], 0x53 ; C215C953
s_buffer_load_dword s37, s[72:75], 0x0 ; C212C900
s_buffer_load_dword s5, s[72:75], 0x4 ; C202C904
s_buffer_load_dword s6, s[72:75], 0x5 ; C2034905
s_buffer_load_dword s4, s[72:75], 0x6 ; C2024906
s_buffer_load_dword s3, s[72:75], 0x7 ; C201C907
s_buffer_load_dword s18, s[72:75], 0x12 ; C2094912
s_buffer_load_dword s13, s[72:75], 0x13 ; C206C913
s_buffer_load_dword s27, s[72:75], 0x14 ; C20DC914
s_buffer_load_dword s38, s[72:75], 0x15 ; C2134915
s_buffer_load_dword s25, s[72:75], 0x16 ; C20CC916
s_buffer_load_dword s15, s[72:75], 0x17 ; C207C917
s_buffer_load_dword s36, s[72:75], 0x18 ; C2124918
s_buffer_load_dword s39, s[72:75], 0x19 ; C213C919
s_buffer_load_dword s26, s[72:75], 0x1a ; C20D491A
s_buffer_load_dword s24, s[72:75], 0x1b ; C20C491B
s_buffer_load_dword s9, s[72:75], 0x8 ; C204C908
s_buffer_load_dword s10, s[72:75], 0x9 ; C2054909
s_buffer_load_dword s8, s[72:75], 0xa ; C204490A
s_buffer_load_dword s7, s[72:75], 0xb ; C203C90B
s_buffer_load_dword s11, s[72:75], 0xc ; C205C90C
s_buffer_load_dword s40, s[72:75], 0x1c ; C214491C
s_buffer_load_dword s41, s[72:75], 0x1d ; C214C91D
s_buffer_load_dword s42, s[72:75], 0x1e ; C215491E
s_buffer_load_dword s69, s[72:75], 0x20 ; C222C920
s_buffer_load_dword s68, s[72:75], 0x21 ; C2224921
s_buffer_load_dword s67, s[72:75], 0x22 ; C221C922
s_buffer_load_dword s66, s[72:75], 0x24 ; C2214924
s_buffer_load_dword s65, s[72:75], 0x25 ; C220C925
s_buffer_load_dword s64, s[72:75], 0x26 ; C2204926
s_buffer_load_dword s63, s[72:75], 0x28 ; C21FC928
s_buffer_load_dword s62, s[72:75], 0x29 ; C21F4929
s_buffer_load_dword s61, s[72:75], 0x2a ; C21EC92A
s_buffer_load_dword s58, s[72:75], 0x2c ; C21D492C
s_buffer_load_dword s59, s[72:75], 0x2d ; C21DC92D
s_buffer_load_dword s60, s[72:75], 0x2e ; C21E492E
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s77, v6 ; 10000C4D
v_mul_f32_e32 v14, s78, v6 ; 101C0C4E
v_mul_f32_e32 v15, s79, v6 ; 101E0C4F
v_mul_f32_e32 v16, s80, v6 ; 10200C50
v_mul_f32_e32 v17, s70, v10 ; 10221446
v_mul_f32_e32 v18, s71, v10 ; 10241447
v_mul_f32_e32 v10, s76, v10 ; 1014144C
v_mul_f32_e32 v19, s69, v6 ; 10260C45
v_mul_f32_e32 v20, s68, v6 ; 10280C44
v_mac_f32_e32 v17, s48, v11 ; 3E221630
v_mac_f32_e32 v18, s52, v11 ; 3E241634
v_mac_f32_e32 v10, s53, v11 ; 3E141635
v_mul_f32_e32 v6, s67, v6 ; 100C0C43
v_mac_f32_e32 v0, s56, v7 ; 3E000E38
v_mac_f32_e32 v14, s57, v7 ; 3E1C0E39
v_mac_f32_e32 v15, s54, v7 ; 3E1E0E36
v_mac_f32_e32 v16, s55, v7 ; 3E200E37
v_mac_f32_e32 v19, s66, v7 ; 3E260E42
v_mac_f32_e32 v20, s65, v7 ; 3E280E41
v_mac_f32_e32 v6, s64, v7 ; 3E0C0E40
v_cvt_i32_f32_e32 v3, v13 ; 7E06110D
v_mac_f32_e32 v0, s49, v8 ; 3E001031
v_mac_f32_e32 v14, s50, v8 ; 3E1C1032
v_mac_f32_e32 v15, s47, v8 ; 3E1E102F
v_cvt_f32_i32_e32 v4, v3 ; 7E080B03
v_mac_f32_e32 v16, s51, v8 ; 3E201033
v_mac_f32_e32 v19, s63, v8 ; 3E26103F
v_mac_f32_e32 v20, s62, v8 ; 3E28103E
v_subrev_f32_e32 v4, v4, v13 ; 0A081B04
v_madmk_f32_e32 v1, v4, v1, 0x41200000 ; 40020304 41200000
v_cvt_i32_f32_e32 v1, v1 ; 7E021101
v_mac_f32_e32 v6, s61, v8 ; 3E0C103D
v_mac_f32_e32 v0, s46, v9 ; 3E00122E
v_mac_f32_e32 v14, s45, v9 ; 3E1C122D
v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280
v_cndmask_b32_e64 v7, 0, 1.0, vcc ; D2000007 01A9E480
v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281
v_cndmask_b32_e64 v8, 0, 1.0, vcc ; D2000008 01A9E480
v_cmp_eq_i32_e32 vcc, 2, v1 ; 7D040282
v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480
v_mac_f32_e32 v15, s44, v9 ; 3E1E122C
v_mac_f32_e32 v16, s43, v9 ; 3E20122B
v_mac_f32_e32 v19, s58, v9 ; 3E26123A
v_mac_f32_e32 v20, s59, v9 ; 3E28123B
v_mac_f32_e32 v6, s60, v9 ; 3E0C123C
v_mul_f32_e32 v4, 0x3e800000, v12 ; 100818FF 3E800000
v_cvt_i32_f32_e32 v4, v4 ; 7E081104
v_ashrrev_i32_e32 v9, 31, v3 ; 3012069F
v_lshrrev_b32_e32 v9, 24, v9 ; 2C121298
v_add_i32_e32 v9, v3, v9 ; 4A121303
v_and_b32_e32 v11, 0xffffff00, v9 ; 361612FF FFFFFF00
v_sub_i32_e32 v3, v3, v11 ; 4C061703
v_lshlrev_b32_e32 v11, 2, v4 ; 34160882
v_cvt_f32_i32_e32 v11, v11 ; 7E160B0B
v_ashrrev_i32_e32 v9, 8, v9 ; 30121288
v_cvt_f32_i32_e32 v9, v9 ; 7E120B09
v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04
v_cvt_f32_i32_e32 v21, v3 ; 7E2A0B03
v_subrev_f32_e32 v11, v11, v12 ; 0A16190B
v_mul_f32_e32 v3, v9, v8 ; 10061109
v_mac_f32_e32 v3, v13, v7 ; 3E060F0D
v_mac_f32_e32 v3, v21, v1 ; 3E060315
v_add_f32_e32 v3, 0, v3 ; 06060680
v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081
v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD
image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[28:35], s[20:23] ; F0900300 00A70303
v_add_f32_e32 v5, -1.0, v13 ; 060A1AF3
v_add_f32_e32 v9, -1.0, v9 ; 061212F3
v_add_f32_e32 v12, -1.0, v21 ; 06182AF3
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000
exp 15, 32, 0, 0, 0, v5, v9, v12, v2 ; F800020F 020C0905
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v2, 4.0, v4 ; 100408F6
exp 15, 33, 0, 0, 0, v7, v8, v1, v2 ; F800021F 02010807
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v1, -2.0, v11 ; 060216F5
v_mac_f32_e32 v17, s14, v1 ; 3E22020E
v_mac_f32_e32 v18, s16, v1 ; 3E240210
v_mac_f32_e32 v10, s19, v1 ; 3E140213
v_mul_f32_e32 v1, v17, v17 ; 10022311
v_mac_f32_e32 v1, v18, v18 ; 3E022512
v_mac_f32_e32 v1, v10, v10 ; 3E02150A
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v2, 0.5, v14 ; 10041CF0
v_mul_f32_e32 v3, 0.5, v16 ; 100620F0
v_mad_f32 v4, 0.5, v0, v3 ; D2820004 040E00F0
v_mac_f32_e32 v3, s37, v2 ; 3E060425
v_mul_f32_e32 v2, v1, v17 ; 10042301
v_mul_f32_e32 v5, v1, v18 ; 100A2501
v_mul_f32_e32 v1, v1, v10 ; 10021501
v_mul_f32_e32 v7, v1, v5 ; 100E0B01
v_mul_f32_e32 v8, s17, v7 ; 10100E11
v_mul_f32_e32 v9, s38, v7 ; 10120E26
v_mul_f32_e32 v7, s39, v7 ; 100E0E27
v_mul_f32_e32 v10, v5, v2 ; 10140505
v_mac_f32_e32 v8, s12, v10 ; 3E10140C
v_mac_f32_e32 v9, s27, v10 ; 3E12141B
v_mac_f32_e32 v7, s36, v10 ; 3E0E1424
v_mul_f32_e32 v10, v1, v1 ; 10140301
v_mac_f32_e32 v8, s18, v10 ; 3E101412
v_mac_f32_e32 v9, s25, v10 ; 3E121419
v_mac_f32_e32 v7, s26, v10 ; 3E0E141A
v_mul_f32_e32 v10, v2, v1 ; 10140302
v_mac_f32_e32 v8, s13, v10 ; 3E10140D
v_mac_f32_e32 v9, s15, v10 ; 3E12140F
v_mac_f32_e32 v7, s24, v10 ; 3E0E1418
v_mul_f32_e32 v11, v5, v5 ; 10160B05
v_mad_f32 v11, v2, v2, -v11 ; D282000B 842E0502
v_mac_f32_e32 v8, s40, v11 ; 3E101628
v_mac_f32_e32 v9, s41, v11 ; 3E121629
v_mac_f32_e32 v7, s42, v11 ; 3E0E162A
exp 15, 34, 0, 0, 0, v4, v3, v15, v16 ; F800022F 100F0304
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, s6, v5 ; 10060A06
v_mac_f32_e32 v3, s5, v2 ; 3E060405
v_mul_f32_e32 v4, s10, v5 ; 10080A0A
v_mac_f32_e32 v4, s9, v2 ; 3E080409
v_mul_f32_e32 v11, s2, v5 ; 10160A02
v_mac_f32_e32 v11, s11, v2 ; 3E16040B
v_mac_f32_e32 v3, s4, v1 ; 3E060204
v_mac_f32_e32 v4, s8, v1 ; 3E080208
v_mac_f32_e32 v11, s1, v1 ; 3E160201
v_add_f32_e32 v3, s3, v3 ; 06060603
v_add_f32_e32 v3, v3, v8 ; 06061103
exp 15, 35, 0, 0, 0, v15, v2, v5, v1 ; F800023F 0105020F
exp 15, 36, 0, 0, 0, v19, v20, v6, v3 ; F800024F 03061413
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v1, s7, v4 ; 06020807
v_add_f32_e32 v1, v1, v9 ; 06021301
v_add_f32_e32 v2, s0, v11 ; 06041600
v_add_f32_e32 v2, v2, v7 ; 06040F02
exp 15, 37, 0, 0, 0, v1, v2, v2, v10 ; F800025F 0A020201
exp 15, 12, 0, 1, 0, v0, v14, v15, v16 ; F80008CF 100F0E00
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 88
VGPRS: 24
Code Size: 960 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL CONST[0..2]
DCL CONST[8..15]
DCL CONST[17]
DCL TEMP[0..21], LOCAL
IMM[0] FLT32 { -0.2000, 7.0000, 0.0000, 0.5000}
IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931}
IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000}
IMM[3] FLT32 { 3.0000, 0.6600, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[4].wwww
1: MOV TEMP[0].yz, IN[5].yxyy
2: ABS TEMP[1].xyz, IN[3].yzww
3: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
4: RSQ TEMP[2].x, TEMP[2].xxxx
5: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx, IMM[0].xxxx
6: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[0].yyyy
7: MAX TEMP[1].xyz, TEMP[1].xyzz, IMM[0].zzzz
8: ADD TEMP[2].x, TEMP[1].xxxx, TEMP[1].yyyy
9: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[1].zzzz
10: RCP TEMP[2].xyz, TEMP[2].xxxx
11: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz
12: ADD TEMP[2], IN[0], IMM[0].wwww
13: FLR TEMP[2].xyz, TEMP[2]
14: MOV TEMP[3].x, CONST[8].xxxx
15: MUL TEMP[4].x, TEMP[2].xxxx, CONST[8].xxxx
16: MOV TEMP[5].x, TEMP[4].xxxx
17: FLR TEMP[4].x, TEMP[4].xxxx
18: MUL TEMP[4].x, TEMP[4].xxxx, CONST[8].xxxx
19: FSGE TEMP[6].x, TEMP[2].xxxx, IMM[1].xxxx
20: UIF TEMP[6].xxxx :0
21: MOV TEMP[3].x, CONST[9].xxxx
22: ADD TEMP[6].x, TEMP[2].xxxx, IMM[1].yyyy
23: MUL TEMP[6].x, TEMP[6].xxxx, CONST[9].xxxx
24: MOV TEMP[5].x, TEMP[6].xxxx
25: FLR TEMP[7].x, TEMP[6].xxxx
26: MUL TEMP[7].x, TEMP[7].xxxx, CONST[9].xxxx
27: MOV TEMP[4].x, TEMP[7].xxxx
28: FRC TEMP[6].x, TEMP[6].xxxx
29: FRC TEMP[8].x, TEMP[7].xxxx
30: MOV TEMP[6].y, TEMP[8].xxxx
31: FLR TEMP[7].x, TEMP[7].xxxx
32: ADD TEMP[7].x, TEMP[7].xxxx, IMM[1].zzzz
33: MOV TEMP[6].z, TEMP[7].xxxx
34: MOV TEMP[6].xyz, TEMP[6].xyzx
35: ELSE :0
36: FRC TEMP[5].x, TEMP[5].xxxx
37: FRC TEMP[7].x, TEMP[4].xxxx
38: MOV TEMP[5].y, TEMP[7].xxxx
39: FLR TEMP[4].x, TEMP[4].xxxx
40: MOV TEMP[5].z, TEMP[4].xxxx
41: MOV TEMP[6].xyz, TEMP[5].xyzx
42: ENDIF
43: MOV TEMP[4].x, CONST[8].xxxx
44: MUL TEMP[5].x, TEMP[2].yyyy, CONST[8].xxxx
45: MOV TEMP[7].x, TEMP[5].xxxx
46: FLR TEMP[5].x, TEMP[5].xxxx
47: MUL TEMP[5].x, TEMP[5].xxxx, CONST[8].xxxx
48: FSGE TEMP[8].x, TEMP[2].yyyy, IMM[1].xxxx
49: UIF TEMP[8].xxxx :0
50: MOV TEMP[4].x, CONST[9].xxxx
51: ADD TEMP[8].x, TEMP[2].yyyy, IMM[1].yyyy
52: MUL TEMP[8].x, TEMP[8].xxxx, CONST[9].xxxx
53: MOV TEMP[7].x, TEMP[8].xxxx
54: FLR TEMP[9].x, TEMP[8].xxxx
55: MUL TEMP[9].x, TEMP[9].xxxx, CONST[9].xxxx
56: MOV TEMP[5].x, TEMP[9].xxxx
57: FRC TEMP[8].x, TEMP[8].xxxx
58: FRC TEMP[10].x, TEMP[9].xxxx
59: MOV TEMP[8].y, TEMP[10].xxxx
60: FLR TEMP[9].x, TEMP[9].xxxx
61: ADD TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz
62: MOV TEMP[8].z, TEMP[9].xxxx
63: MOV TEMP[8].xyz, TEMP[8].xyzx
64: ELSE :0
65: FRC TEMP[7].x, TEMP[7].xxxx
66: FRC TEMP[9].x, TEMP[5].xxxx
67: MOV TEMP[7].y, TEMP[9].xxxx
68: FLR TEMP[5].x, TEMP[5].xxxx
69: MOV TEMP[7].z, TEMP[5].xxxx
70: MOV TEMP[8].xyz, TEMP[7].xyzx
71: ENDIF
72: MOV TEMP[5].x, CONST[8].xxxx
73: MUL TEMP[7].x, TEMP[2].zzzz, CONST[8].xxxx
74: MOV TEMP[9].x, TEMP[7].xxxx
75: FLR TEMP[7].x, TEMP[7].xxxx
76: MUL TEMP[7].x, TEMP[7].xxxx, CONST[8].xxxx
77: FSGE TEMP[10].x, TEMP[2].zzzz, IMM[1].xxxx
78: UIF TEMP[10].xxxx :0
79: MOV TEMP[5].x, CONST[9].xxxx
80: ADD TEMP[2].x, TEMP[2].zzzz, IMM[1].yyyy
81: MUL TEMP[2].x, TEMP[2].xxxx, CONST[9].xxxx
82: MOV TEMP[9].x, TEMP[2].xxxx
83: FLR TEMP[10].x, TEMP[2].xxxx
84: MUL TEMP[10].x, TEMP[10].xxxx, CONST[9].xxxx
85: MOV TEMP[7].x, TEMP[10].xxxx
86: FRC TEMP[2].x, TEMP[2].xxxx
87: FRC TEMP[11].x, TEMP[10].xxxx
88: MOV TEMP[2].y, TEMP[11].xxxx
89: FLR TEMP[10].x, TEMP[10].xxxx
90: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
91: MOV TEMP[2].z, TEMP[10].xxxx
92: MOV TEMP[2].xyz, TEMP[2].xyzx
93: ELSE :0
94: FRC TEMP[9].x, TEMP[9].xxxx
95: FRC TEMP[10].x, TEMP[7].xxxx
96: MOV TEMP[9].y, TEMP[10].xxxx
97: FLR TEMP[7].x, TEMP[7].xxxx
98: MOV TEMP[9].z, TEMP[7].xxxx
99: MOV TEMP[2].xyz, TEMP[9].xyzx
100: ENDIF
101: ADD TEMP[7].xyz, IN[4].xyzz, -CONST[0].xyzz
102: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[7].xyzz
103: MUL TEMP[7].x, CONST[14].xxxx, TEMP[7].xxxx
104: LG2 TEMP[7].x, TEMP[7].xxxx
105: MUL TEMP[7].x, TEMP[7].xxxx, IMM[1].wwww
106: MUL TEMP[7].x, TEMP[7].xxxx, CONST[13].xxxx
107: MOV TEMP[9].xy, IN[4].xyxx
108: MOV TEMP[10].x, IMM[2].xxxx
109: FSNE TEMP[11].x, CONST[8].xxxx, TEMP[3].xxxx
110: UIF TEMP[11].xxxx :0
111: MOV TEMP[10].x, IMM[2].yyyy
112: RCP TEMP[11].x, CONST[11].xxxx
113: MUL TEMP[9].xy, IN[4].xyyy, TEMP[11].xxxx
114: ELSE :0
115: RCP TEMP[11].x, CONST[10].xxxx
116: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx
117: ENDIF
118: FRC TEMP[9].xy, TEMP[9].xyyy
119: MUL TEMP[11].x, CONST[12].xxxx, IMM[2].wwww
120: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[10].xxxx
121: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx
122: MUL TEMP[10].x, TEMP[10].xxxx, CONST[12].xxxx
123: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[11].xxxx, TEMP[10].xxxx
124: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[3].xxxx, TEMP[6].xyyy
125: MOV TEMP[10].xy, TEMP[9].xyyy
126: MOV TEMP[10].w, TEMP[7].xxxx
127: TXL TEMP[10], TEMP[10], SAMP[4], 2D
128: FSEQ TEMP[11].x, TEMP[6].zzzz, IMM[1].zzzz
129: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
130: MOV TEMP[12].xy, TEMP[9].xyyy
131: MOV TEMP[12].w, TEMP[7].xxxx
132: TXL TEMP[12], TEMP[12], SAMP[3], 2D
133: FSEQ TEMP[13].x, TEMP[6].zzzz, IMM[3].xxxx
134: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
135: MOV TEMP[14].xy, TEMP[9].xyyy
136: MOV TEMP[14].w, TEMP[7].xxxx
137: TXL TEMP[14], TEMP[14], SAMP[2], 2D
138: FSEQ TEMP[15].x, TEMP[6].zzzz, IMM[2].wwww
139: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
140: MOV TEMP[16].xy, TEMP[9].xyyy
141: MOV TEMP[16].w, TEMP[7].xxxx
142: TXL TEMP[16], TEMP[16], SAMP[1], 2D
143: FSEQ TEMP[17].x, TEMP[6].zzzz, IMM[2].zzzz
144: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
145: MOV TEMP[9].xy, TEMP[9].xyyy
146: MOV TEMP[9].w, TEMP[7].xxxx
147: TXL TEMP[9], TEMP[9], SAMP[0], 2D
148: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[0].zzzz
149: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
150: MUL TEMP[9], TEMP[9], TEMP[18].xxxx
151: MAD TEMP[9], TEMP[16], TEMP[17].xxxx, TEMP[9]
152: MAD TEMP[9], TEMP[14], TEMP[15].xxxx, TEMP[9]
153: MAD TEMP[9], TEMP[12], TEMP[13].xxxx, TEMP[9]
154: MAD TEMP[9], TEMP[10], TEMP[11].xxxx, TEMP[9]
155: MOV TEMP[10].xy, IN[4].zyzz
156: MOV TEMP[11].x, IMM[2].xxxx
157: FSNE TEMP[12].x, CONST[8].xxxx, TEMP[3].xxxx
158: UIF TEMP[12].xxxx :0
159: MOV TEMP[11].x, IMM[2].yyyy
160: RCP TEMP[12].x, CONST[11].xxxx
161: MUL TEMP[10].xy, IN[4].zyyy, TEMP[12].xxxx
162: ELSE :0
163: RCP TEMP[12].x, CONST[10].xxxx
164: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx
165: ENDIF
166: FRC TEMP[10].xy, TEMP[10].xyyy
167: MUL TEMP[12].x, CONST[12].xxxx, IMM[2].wwww
168: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
169: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
170: MUL TEMP[11].x, TEMP[11].xxxx, CONST[12].xxxx
171: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
172: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[3].xxxx, TEMP[6].xyyy
173: MOV TEMP[11].xy, TEMP[10].xyyy
174: MOV TEMP[11].w, TEMP[7].xxxx
175: TXL TEMP[11], TEMP[11], SAMP[4], 2D
176: FSEQ TEMP[12].x, TEMP[6].zzzz, IMM[1].zzzz
177: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
178: MOV TEMP[13].xy, TEMP[10].xyyy
179: MOV TEMP[13].w, TEMP[7].xxxx
180: TXL TEMP[13], TEMP[13], SAMP[3], 2D
181: FSEQ TEMP[14].x, TEMP[6].zzzz, IMM[3].xxxx
182: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
183: MOV TEMP[15].xy, TEMP[10].xyyy
184: MOV TEMP[15].w, TEMP[7].xxxx
185: TXL TEMP[15], TEMP[15], SAMP[2], 2D
186: FSEQ TEMP[16].x, TEMP[6].zzzz, IMM[2].wwww
187: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
188: MOV TEMP[17].xy, TEMP[10].xyyy
189: MOV TEMP[17].w, TEMP[7].xxxx
190: TXL TEMP[17], TEMP[17], SAMP[1], 2D
191: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[2].zzzz
192: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
193: MOV TEMP[10].xy, TEMP[10].xyyy
194: MOV TEMP[10].w, TEMP[7].xxxx
195: TXL TEMP[10], TEMP[10], SAMP[0], 2D
196: FSEQ TEMP[19].x, TEMP[6].zzzz, IMM[0].zzzz
197: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
198: MUL TEMP[10], TEMP[10], TEMP[19].xxxx
199: MAD TEMP[10], TEMP[17], TEMP[18].xxxx, TEMP[10]
200: MAD TEMP[10], TEMP[15], TEMP[16].xxxx, TEMP[10]
201: MAD TEMP[10], TEMP[13], TEMP[14].xxxx, TEMP[10]
202: MAD TEMP[10], TEMP[11], TEMP[12].xxxx, TEMP[10]
203: MOV TEMP[11].xy, IN[4].zxzz
204: MOV TEMP[12].x, IMM[2].xxxx
205: FSNE TEMP[13].x, CONST[8].xxxx, TEMP[3].xxxx
206: UIF TEMP[13].xxxx :0
207: MOV TEMP[12].x, IMM[2].yyyy
208: RCP TEMP[13].x, CONST[11].xxxx
209: MUL TEMP[11].xy, IN[4].zxxx, TEMP[13].xxxx
210: ELSE :0
211: RCP TEMP[13].x, CONST[10].xxxx
212: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
213: ENDIF
214: FRC TEMP[11].xy, TEMP[11].xyyy
215: MUL TEMP[13].x, CONST[12].xxxx, IMM[2].wwww
216: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
217: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
218: MUL TEMP[12].x, TEMP[12].xxxx, CONST[12].xxxx
219: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
220: MAD TEMP[3].xy, TEMP[11].xyyy, TEMP[3].xxxx, TEMP[6].xyyy
221: MOV TEMP[11].xy, TEMP[3].xyyy
222: MOV TEMP[11].w, TEMP[7].xxxx
223: TXL TEMP[11], TEMP[11], SAMP[4], 2D
224: FSEQ TEMP[12].x, TEMP[6].zzzz, IMM[1].zzzz
225: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
226: MOV TEMP[13].xy, TEMP[3].xyyy
227: MOV TEMP[13].w, TEMP[7].xxxx
228: TXL TEMP[13], TEMP[13], SAMP[3], 2D
229: FSEQ TEMP[14].x, TEMP[6].zzzz, IMM[3].xxxx
230: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
231: MOV TEMP[15].xy, TEMP[3].xyyy
232: MOV TEMP[15].w, TEMP[7].xxxx
233: TXL TEMP[15], TEMP[15], SAMP[2], 2D
234: FSEQ TEMP[16].x, TEMP[6].zzzz, IMM[2].wwww
235: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
236: MOV TEMP[17].xy, TEMP[3].xyyy
237: MOV TEMP[17].w, TEMP[7].xxxx
238: TXL TEMP[17], TEMP[17], SAMP[1], 2D
239: FSEQ TEMP[18].x, TEMP[6].zzzz, IMM[2].zzzz
240: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
241: MOV TEMP[3].xy, TEMP[3].xyyy
242: MOV TEMP[3].w, TEMP[7].xxxx
243: TXL TEMP[3], TEMP[3], SAMP[0], 2D
244: FSEQ TEMP[6].x, TEMP[6].zzzz, IMM[0].zzzz
245: AND TEMP[6].x, TEMP[6].xxxx, IMM[2].zzzz
246: MUL TEMP[3], TEMP[3], TEMP[6].xxxx
247: MAD TEMP[3], TEMP[17], TEMP[18].xxxx, TEMP[3]
248: MAD TEMP[3], TEMP[15], TEMP[16].xxxx, TEMP[3]
249: MAD TEMP[3], TEMP[13], TEMP[14].xxxx, TEMP[3]
250: MAD TEMP[3], TEMP[11], TEMP[12].xxxx, TEMP[3]
251: MOV TEMP[6].xy, IN[4].xyxx
252: MOV TEMP[11].x, IMM[2].xxxx
253: FSNE TEMP[12].x, CONST[8].xxxx, TEMP[4].xxxx
254: UIF TEMP[12].xxxx :0
255: MOV TEMP[11].x, IMM[2].yyyy
256: RCP TEMP[12].x, CONST[11].xxxx
257: MUL TEMP[6].xy, IN[4].xyyy, TEMP[12].xxxx
258: ELSE :0
259: RCP TEMP[12].x, CONST[10].xxxx
260: MUL TEMP[6].xy, TEMP[6].xyyy, TEMP[12].xxxx
261: ENDIF
262: FRC TEMP[6].xy, TEMP[6].xyyy
263: MUL TEMP[12].x, CONST[12].xxxx, IMM[2].wwww
264: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
265: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
266: MUL TEMP[11].x, TEMP[11].xxxx, CONST[12].xxxx
267: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
268: MAD TEMP[6].xy, TEMP[6].xyyy, TEMP[4].xxxx, TEMP[8].xyyy
269: MOV TEMP[11].xy, TEMP[6].xyyy
270: MOV TEMP[11].w, TEMP[7].xxxx
271: TXL TEMP[11], TEMP[11], SAMP[4], 2D
272: FSEQ TEMP[12].x, TEMP[8].zzzz, IMM[1].zzzz
273: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
274: MOV TEMP[13].xy, TEMP[6].xyyy
275: MOV TEMP[13].w, TEMP[7].xxxx
276: TXL TEMP[13], TEMP[13], SAMP[3], 2D
277: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[3].xxxx
278: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
279: MOV TEMP[15].xy, TEMP[6].xyyy
280: MOV TEMP[15].w, TEMP[7].xxxx
281: TXL TEMP[15], TEMP[15], SAMP[2], 2D
282: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[2].wwww
283: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
284: MOV TEMP[17].xy, TEMP[6].xyyy
285: MOV TEMP[17].w, TEMP[7].xxxx
286: TXL TEMP[17], TEMP[17], SAMP[1], 2D
287: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].zzzz
288: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
289: MOV TEMP[6].xy, TEMP[6].xyyy
290: MOV TEMP[6].w, TEMP[7].xxxx
291: TXL TEMP[6], TEMP[6], SAMP[0], 2D
292: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[0].zzzz
293: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
294: MUL TEMP[6], TEMP[6], TEMP[19].xxxx
295: MAD TEMP[6], TEMP[17], TEMP[18].xxxx, TEMP[6]
296: MAD TEMP[6], TEMP[15], TEMP[16].xxxx, TEMP[6]
297: MAD TEMP[6], TEMP[13], TEMP[14].xxxx, TEMP[6]
298: MAD TEMP[6], TEMP[11], TEMP[12].xxxx, TEMP[6]
299: MOV TEMP[11].xy, IN[4].zyzz
300: MOV TEMP[12].x, IMM[2].xxxx
301: FSNE TEMP[13].x, CONST[8].xxxx, TEMP[4].xxxx
302: UIF TEMP[13].xxxx :0
303: MOV TEMP[12].x, IMM[2].yyyy
304: RCP TEMP[13].x, CONST[11].xxxx
305: MUL TEMP[11].xy, IN[4].zyyy, TEMP[13].xxxx
306: ELSE :0
307: RCP TEMP[13].x, CONST[10].xxxx
308: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
309: ENDIF
310: FRC TEMP[11].xy, TEMP[11].xyyy
311: MUL TEMP[13].x, CONST[12].xxxx, IMM[2].wwww
312: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
313: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
314: MUL TEMP[12].x, TEMP[12].xxxx, CONST[12].xxxx
315: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
316: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[8].xyyy
317: MOV TEMP[12].xy, TEMP[11].xyyy
318: MOV TEMP[12].w, TEMP[7].xxxx
319: TXL TEMP[12], TEMP[12], SAMP[4], 2D
320: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[1].zzzz
321: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
322: MOV TEMP[14].xy, TEMP[11].xyyy
323: MOV TEMP[14].w, TEMP[7].xxxx
324: TXL TEMP[14], TEMP[14], SAMP[3], 2D
325: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[3].xxxx
326: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
327: MOV TEMP[16].xy, TEMP[11].xyyy
328: MOV TEMP[16].w, TEMP[7].xxxx
329: TXL TEMP[16], TEMP[16], SAMP[2], 2D
330: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].wwww
331: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
332: MOV TEMP[18].xy, TEMP[11].xyyy
333: MOV TEMP[18].w, TEMP[7].xxxx
334: TXL TEMP[18], TEMP[18], SAMP[1], 2D
335: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].zzzz
336: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
337: MOV TEMP[11].xy, TEMP[11].xyyy
338: MOV TEMP[11].w, TEMP[7].xxxx
339: TXL TEMP[11], TEMP[11], SAMP[0], 2D
340: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[0].zzzz
341: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
342: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
343: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
344: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
345: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
346: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11]
347: MOV TEMP[12].xy, IN[4].zxzz
348: MOV TEMP[13].x, IMM[2].xxxx
349: FSNE TEMP[14].x, CONST[8].xxxx, TEMP[4].xxxx
350: UIF TEMP[14].xxxx :0
351: MOV TEMP[13].x, IMM[2].yyyy
352: RCP TEMP[14].x, CONST[11].xxxx
353: MUL TEMP[12].xy, IN[4].zxxx, TEMP[14].xxxx
354: ELSE :0
355: RCP TEMP[14].x, CONST[10].xxxx
356: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
357: ENDIF
358: FRC TEMP[12].xy, TEMP[12].xyyy
359: MUL TEMP[14].x, CONST[12].xxxx, IMM[2].wwww
360: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
361: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
362: MUL TEMP[13].x, TEMP[13].xxxx, CONST[12].xxxx
363: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
364: MAD TEMP[4].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[8].xyyy
365: MOV TEMP[12].xy, TEMP[4].xyyy
366: MOV TEMP[12].w, TEMP[7].xxxx
367: TXL TEMP[12], TEMP[12], SAMP[4], 2D
368: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[1].zzzz
369: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
370: MOV TEMP[14].xy, TEMP[4].xyyy
371: MOV TEMP[14].w, TEMP[7].xxxx
372: TXL TEMP[14], TEMP[14], SAMP[3], 2D
373: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[3].xxxx
374: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
375: MOV TEMP[16].xy, TEMP[4].xyyy
376: MOV TEMP[16].w, TEMP[7].xxxx
377: TXL TEMP[16], TEMP[16], SAMP[2], 2D
378: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].wwww
379: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
380: MOV TEMP[18].xy, TEMP[4].xyyy
381: MOV TEMP[18].w, TEMP[7].xxxx
382: TXL TEMP[18], TEMP[18], SAMP[1], 2D
383: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].zzzz
384: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
385: MOV TEMP[4].xy, TEMP[4].xyyy
386: MOV TEMP[4].w, TEMP[7].xxxx
387: TXL TEMP[4], TEMP[4], SAMP[0], 2D
388: FSEQ TEMP[8].x, TEMP[8].zzzz, IMM[0].zzzz
389: AND TEMP[8].x, TEMP[8].xxxx, IMM[2].zzzz
390: MUL TEMP[4], TEMP[4], TEMP[8].xxxx
391: MAD TEMP[4], TEMP[18], TEMP[19].xxxx, TEMP[4]
392: MAD TEMP[4], TEMP[16], TEMP[17].xxxx, TEMP[4]
393: MAD TEMP[4], TEMP[14], TEMP[15].xxxx, TEMP[4]
394: MAD TEMP[4], TEMP[12], TEMP[13].xxxx, TEMP[4]
395: MOV TEMP[8].xy, IN[4].xyxx
396: MOV TEMP[12].x, IMM[2].xxxx
397: FSNE TEMP[13].x, CONST[8].xxxx, TEMP[5].xxxx
398: UIF TEMP[13].xxxx :0
399: MOV TEMP[12].x, IMM[2].yyyy
400: RCP TEMP[13].x, CONST[11].xxxx
401: MUL TEMP[8].xy, IN[4].xyyy, TEMP[13].xxxx
402: ELSE :0
403: RCP TEMP[13].x, CONST[10].xxxx
404: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx
405: ENDIF
406: FRC TEMP[8].xy, TEMP[8].xyyy
407: MUL TEMP[13].x, CONST[12].xxxx, IMM[2].wwww
408: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
409: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
410: MUL TEMP[12].x, TEMP[12].xxxx, CONST[12].xxxx
411: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
412: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[5].xxxx, TEMP[2].xyyy
413: MOV TEMP[12].xy, TEMP[8].xyyy
414: MOV TEMP[12].w, TEMP[7].xxxx
415: TXL TEMP[12], TEMP[12], SAMP[4], 2D
416: FSEQ TEMP[13].x, TEMP[2].zzzz, IMM[1].zzzz
417: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
418: MOV TEMP[14].xy, TEMP[8].xyyy
419: MOV TEMP[14].w, TEMP[7].xxxx
420: TXL TEMP[14], TEMP[14], SAMP[3], 2D
421: FSEQ TEMP[15].x, TEMP[2].zzzz, IMM[3].xxxx
422: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
423: MOV TEMP[16].xy, TEMP[8].xyyy
424: MOV TEMP[16].w, TEMP[7].xxxx
425: TXL TEMP[16], TEMP[16], SAMP[2], 2D
426: FSEQ TEMP[17].x, TEMP[2].zzzz, IMM[2].wwww
427: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
428: MOV TEMP[18].xy, TEMP[8].xyyy
429: MOV TEMP[18].w, TEMP[7].xxxx
430: TXL TEMP[18], TEMP[18], SAMP[1], 2D
431: FSEQ TEMP[19].x, TEMP[2].zzzz, IMM[2].zzzz
432: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
433: MOV TEMP[8].xy, TEMP[8].xyyy
434: MOV TEMP[8].w, TEMP[7].xxxx
435: TXL TEMP[8], TEMP[8], SAMP[0], 2D
436: FSEQ TEMP[20].x, TEMP[2].zzzz, IMM[0].zzzz
437: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
438: MUL TEMP[8], TEMP[8], TEMP[20].xxxx
439: MAD TEMP[8], TEMP[18], TEMP[19].xxxx, TEMP[8]
440: MAD TEMP[8], TEMP[16], TEMP[17].xxxx, TEMP[8]
441: MAD TEMP[8], TEMP[14], TEMP[15].xxxx, TEMP[8]
442: MAD TEMP[8], TEMP[12], TEMP[13].xxxx, TEMP[8]
443: MOV TEMP[12].xy, IN[4].zyzz
444: MOV TEMP[13].x, IMM[2].xxxx
445: FSNE TEMP[14].x, CONST[8].xxxx, TEMP[5].xxxx
446: UIF TEMP[14].xxxx :0
447: MOV TEMP[13].x, IMM[2].yyyy
448: RCP TEMP[14].x, CONST[11].xxxx
449: MUL TEMP[12].xy, IN[4].zyyy, TEMP[14].xxxx
450: ELSE :0
451: RCP TEMP[14].x, CONST[10].xxxx
452: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
453: ENDIF
454: FRC TEMP[12].xy, TEMP[12].xyyy
455: MUL TEMP[14].x, CONST[12].xxxx, IMM[2].wwww
456: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
457: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
458: MUL TEMP[13].x, TEMP[13].xxxx, CONST[12].xxxx
459: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
460: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[2].xyyy
461: MOV TEMP[13].xy, TEMP[12].xyyy
462: MOV TEMP[13].w, TEMP[7].xxxx
463: TXL TEMP[13], TEMP[13], SAMP[4], 2D
464: FSEQ TEMP[14].x, TEMP[2].zzzz, IMM[1].zzzz
465: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
466: MOV TEMP[15].xy, TEMP[12].xyyy
467: MOV TEMP[15].w, TEMP[7].xxxx
468: TXL TEMP[15], TEMP[15], SAMP[3], 2D
469: FSEQ TEMP[16].x, TEMP[2].zzzz, IMM[3].xxxx
470: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
471: MOV TEMP[17].xy, TEMP[12].xyyy
472: MOV TEMP[17].w, TEMP[7].xxxx
473: TXL TEMP[17], TEMP[17], SAMP[2], 2D
474: FSEQ TEMP[18].x, TEMP[2].zzzz, IMM[2].wwww
475: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
476: MOV TEMP[19].xy, TEMP[12].xyyy
477: MOV TEMP[19].w, TEMP[7].xxxx
478: TXL TEMP[19], TEMP[19], SAMP[1], 2D
479: FSEQ TEMP[20].x, TEMP[2].zzzz, IMM[2].zzzz
480: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
481: MOV TEMP[12].xy, TEMP[12].xyyy
482: MOV TEMP[12].w, TEMP[7].xxxx
483: TXL TEMP[12], TEMP[12], SAMP[0], 2D
484: FSEQ TEMP[21].x, TEMP[2].zzzz, IMM[0].zzzz
485: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
486: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
487: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
488: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
489: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
490: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12]
491: MOV TEMP[13].xy, IN[4].zxzz
492: MOV TEMP[14].x, IMM[2].xxxx
493: FSNE TEMP[15].x, CONST[8].xxxx, TEMP[5].xxxx
494: UIF TEMP[15].xxxx :0
495: MOV TEMP[14].x, IMM[2].yyyy
496: RCP TEMP[15].x, CONST[11].xxxx
497: MUL TEMP[13].xy, IN[4].zxxx, TEMP[15].xxxx
498: ELSE :0
499: RCP TEMP[15].x, CONST[10].xxxx
500: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
501: ENDIF
502: FRC TEMP[13].xy, TEMP[13].xyyy
503: MUL TEMP[15].x, CONST[12].xxxx, IMM[2].wwww
504: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
505: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
506: MUL TEMP[14].x, TEMP[14].xxxx, CONST[12].xxxx
507: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
508: MAD TEMP[5].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[2].xyyy
509: MOV TEMP[13].xy, TEMP[5].xyyy
510: MOV TEMP[13].w, TEMP[7].xxxx
511: TXL TEMP[13], TEMP[13], SAMP[4], 2D
512: FSEQ TEMP[14].x, TEMP[2].zzzz, IMM[1].zzzz
513: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
514: MOV TEMP[15].xy, TEMP[5].xyyy
515: MOV TEMP[15].w, TEMP[7].xxxx
516: TXL TEMP[15], TEMP[15], SAMP[3], 2D
517: FSEQ TEMP[16].x, TEMP[2].zzzz, IMM[3].xxxx
518: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
519: MOV TEMP[17].xy, TEMP[5].xyyy
520: MOV TEMP[17].w, TEMP[7].xxxx
521: TXL TEMP[17], TEMP[17], SAMP[2], 2D
522: FSEQ TEMP[18].x, TEMP[2].zzzz, IMM[2].wwww
523: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
524: MOV TEMP[19].xy, TEMP[5].xyyy
525: MOV TEMP[19].w, TEMP[7].xxxx
526: TXL TEMP[19], TEMP[19], SAMP[1], 2D
527: FSEQ TEMP[20].x, TEMP[2].zzzz, IMM[2].zzzz
528: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
529: MOV TEMP[5].xy, TEMP[5].xyyy
530: MOV TEMP[5].w, TEMP[7].xxxx
531: TXL TEMP[5], TEMP[5], SAMP[0], 2D
532: FSEQ TEMP[2].x, TEMP[2].zzzz, IMM[0].zzzz
533: AND TEMP[2].x, TEMP[2].xxxx, IMM[2].zzzz
534: MUL TEMP[2], TEMP[5], TEMP[2].xxxx
535: MAD TEMP[2], TEMP[19], TEMP[20].xxxx, TEMP[2]
536: MAD TEMP[2], TEMP[17], TEMP[18].xxxx, TEMP[2]
537: MAD TEMP[2], TEMP[15], TEMP[16].xxxx, TEMP[2]
538: MAD TEMP[2], TEMP[13], TEMP[14].xxxx, TEMP[2]
539: MUL TEMP[5], TEMP[8], TEMP[1].zzzz
540: MAD TEMP[5], TEMP[12], TEMP[1].xxxx, TEMP[5]
541: MAD TEMP[2], TEMP[2], TEMP[1].yyyy, TEMP[5]
542: MUL TEMP[5], TEMP[6], TEMP[1].zzzz
543: MAD TEMP[5], TEMP[11], TEMP[1].xxxx, TEMP[5]
544: MAD TEMP[4], TEMP[4], TEMP[1].yyyy, TEMP[5]
545: MUL TEMP[5], TEMP[9], TEMP[1].zzzz
546: MAD TEMP[5], TEMP[10], TEMP[1].xxxx, TEMP[5]
547: MAD TEMP[1], TEMP[3], TEMP[1].yyyy, TEMP[5]
548: MUL TEMP[1], IN[1].xxxx, TEMP[1]
549: MAD TEMP[1], IN[1].yyyy, TEMP[4], TEMP[1]
550: MAD TEMP[1].xyz, IN[1].zzzz, TEMP[2], TEMP[1]
551: MOV TEMP[2].xy, IN[2].xyyy
552: MOV TEMP[2].w, IN[2].wwww
553: TXP TEMP[2], TEMP[2], SAMP[5], 2D
554: LG2 TEMP[3].x, TEMP[2].xxxx
555: LG2 TEMP[3].y, TEMP[2].yyyy
556: LG2 TEMP[3].z, TEMP[2].zzzz
557: LG2 TEMP[3].w, TEMP[2].wwww
558: MOV TEMP[2], -TEMP[3]
559: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz
560: MUL TEMP[3].xyz, TEMP[0].xyzz, IMM[3].yyyy
561: MUL TEMP[4].xyz, CONST[17], IMM[2].wwww
562: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[3].xyzz
563: MUL TEMP[3].xyz, TEMP[4].xyzz, TEMP[3].xyzz
564: MAX TEMP[3].xyz, TEMP[3].xyzz, CONST[15].xyzz
565: MIN TEMP[3].xyz, TEMP[3].xyzz, IMM[2].zzzz
566: MOV_SAT TEMP[2].x, TEMP[2].wwww
567: MUL TEMP[2].x, TEMP[2].xxxx, IN[1].wwww
568: MUL TEMP[4].xyz, TEMP[0].xyzz, IMM[0].wwww
569: MUL TEMP[4].xyz, TEMP[1].xyzz, TEMP[4].xyzz
570: MAD TEMP[2].xyz, TEMP[0].xyzz, TEMP[2].xxxx, TEMP[4].xyzz
571: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[1].xyzz, TEMP[2].xyzz
572: MAD TEMP[1].x, IN[3].xxxx, CONST[2].zzzz, CONST[2].wwww
573: MOV_SAT TEMP[1].x, TEMP[1].xxxx
574: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[1].xyzz
575: MOV TEMP[0].w, IMM[2].zzzz
576: MOV OUT[0], TEMP[0]
577: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%46 = load <8 x i32>, <8 x i32> addrspace(2)* %45, align 32, !tbaa !0
%47 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%48 = load <4 x i32>, <4 x i32> addrspace(2)* %47, align 16, !tbaa !0
%49 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%50 = load <8 x i32>, <8 x i32> addrspace(2)* %49, align 32, !tbaa !0
%51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%52 = load <4 x i32>, <4 x i32> addrspace(2)* %51, align 16, !tbaa !0
%53 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%54 = load <8 x i32>, <8 x i32> addrspace(2)* %53, align 32, !tbaa !0
%55 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%56 = load <4 x i32>, <4 x i32> addrspace(2)* %55, align 16, !tbaa !0
%57 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%58 = load <8 x i32>, <8 x i32> addrspace(2)* %57, align 32, !tbaa !0
%59 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%60 = load <4 x i32>, <4 x i32> addrspace(2)* %59, align 16, !tbaa !0
%61 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%62 = load <8 x i32>, <8 x i32> addrspace(2)* %61, align 32, !tbaa !0
%63 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%64 = load <4 x i32>, <4 x i32> addrspace(2)* %63, align 16, !tbaa !0
%65 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%66 = bitcast <8 x i32> addrspace(2)* %65 to <32 x i8> addrspace(2)*
%67 = load <32 x i8>, <32 x i8> addrspace(2)* %66, align 32, !tbaa !0
%68 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%69 = bitcast <4 x i32> addrspace(2)* %68 to <16 x i8> addrspace(2)*
%70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !tbaa !0
%71 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%74 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%75 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%76 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%77 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%78 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%79 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%80 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%81 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%82 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%83 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%84 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%85 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%86 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%87 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%88 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%89 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%90 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%91 = call float @llvm.fabs.f32(float %82)
%92 = call float @llvm.fabs.f32(float %83)
%93 = call float @llvm.fabs.f32(float %84)
%94 = fmul float %91, %91
%95 = fmul float %92, %92
%96 = fadd float %95, %94
%97 = fmul float %93, %93
%98 = fadd float %96, %97
%99 = call float @llvm.AMDGPU.rsq.clamped.f32(float %98)
%100 = fmul float %91, %99
%101 = fadd float %100, 0xBFC99999A0000000
%102 = fmul float %92, %99
%103 = fadd float %102, 0xBFC99999A0000000
%104 = fmul float %93, %99
%105 = fadd float %104, 0xBFC99999A0000000
%106 = fmul float %101, 7.000000e+00
%107 = fmul float %103, 7.000000e+00
%108 = fmul float %105, 7.000000e+00
%109 = call float @llvm.maxnum.f32(float %106, float 0.000000e+00)
%110 = call float @llvm.maxnum.f32(float %107, float 0.000000e+00)
%111 = call float @llvm.maxnum.f32(float %108, float 0.000000e+00)
%112 = fadd float %109, %110
%113 = fadd float %112, %111
%114 = fdiv float 1.000000e+00, %113
%115 = fmul float %109, %114
%116 = fmul float %110, %114
%117 = fmul float %111, %114
%118 = fadd float %71, 5.000000e-01
%119 = fadd float %72, 5.000000e-01
%120 = fadd float %73, 5.000000e-01
%121 = call float @llvm.floor.f32(float %118)
%122 = call float @llvm.floor.f32(float %119)
%123 = call float @llvm.floor.f32(float %120)
%124 = fmul float %121, %32
%125 = call float @llvm.floor.f32(float %124)
%126 = fmul float %125, %32
%127 = fcmp ult float %121, 6.400000e+01
br i1 %127, label %ELSE, label %IF
IF: ; preds = %main_body
%128 = fadd float %121, -6.400000e+01
%129 = fmul float %128, %33
%130 = call float @llvm.floor.f32(float %129)
%131 = fmul float %130, %33
%132 = call float @llvm.floor.f32(float %129)
%133 = fsub float %129, %132
%134 = call float @llvm.floor.f32(float %131)
%135 = fsub float %131, %134
%136 = call float @llvm.floor.f32(float %131)
%137 = fadd float %136, 4.000000e+00
br label %ENDIF
ELSE: ; preds = %main_body
%138 = call float @llvm.floor.f32(float %124)
%139 = fsub float %124, %138
%140 = call float @llvm.floor.f32(float %126)
%141 = fsub float %126, %140
%142 = call float @llvm.floor.f32(float %126)
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp12.0 = phi float [ %33, %IF ], [ %32, %ELSE ]
%temp24.0 = phi float [ %133, %IF ], [ %139, %ELSE ]
%temp25.0 = phi float [ %135, %IF ], [ %141, %ELSE ]
%temp26.0 = phi float [ %137, %IF ], [ %142, %ELSE ]
%143 = fmul float %122, %32
%144 = call float @llvm.floor.f32(float %143)
%145 = fmul float %144, %32
%146 = fcmp ult float %122, 6.400000e+01
br i1 %146, label %ELSE90, label %IF89
IF89: ; preds = %ENDIF
%147 = fadd float %122, -6.400000e+01
%148 = fmul float %147, %33
%149 = call float @llvm.floor.f32(float %148)
%150 = fmul float %149, %33
%151 = call float @llvm.floor.f32(float %148)
%152 = fsub float %148, %151
%153 = call float @llvm.floor.f32(float %150)
%154 = fsub float %150, %153
%155 = call float @llvm.floor.f32(float %150)
%156 = fadd float %155, 4.000000e+00
br label %ENDIF88
ELSE90: ; preds = %ENDIF
%157 = call float @llvm.floor.f32(float %143)
%158 = fsub float %143, %157
%159 = call float @llvm.floor.f32(float %145)
%160 = fsub float %145, %159
%161 = call float @llvm.floor.f32(float %145)
br label %ENDIF88
ENDIF88: ; preds = %ELSE90, %IF89
%temp16.0 = phi float [ %33, %IF89 ], [ %32, %ELSE90 ]
%temp32.0 = phi float [ %152, %IF89 ], [ %158, %ELSE90 ]
%temp33.0 = phi float [ %154, %IF89 ], [ %160, %ELSE90 ]
%temp34.0 = phi float [ %156, %IF89 ], [ %161, %ELSE90 ]
%162 = fmul float %123, %32
%163 = call float @llvm.floor.f32(float %162)
%164 = fmul float %163, %32
%165 = fcmp ult float %123, 6.400000e+01
br i1 %165, label %ELSE93, label %IF92
IF92: ; preds = %ENDIF88
%166 = fadd float %123, -6.400000e+01
%167 = fmul float %166, %33
%168 = call float @llvm.floor.f32(float %167)
%169 = fmul float %168, %33
%170 = call float @llvm.floor.f32(float %167)
%171 = fsub float %167, %170
%172 = call float @llvm.floor.f32(float %169)
%173 = fsub float %169, %172
%174 = call float @llvm.floor.f32(float %169)
%175 = fadd float %174, 4.000000e+00
br label %ENDIF91
ELSE93: ; preds = %ENDIF88
%176 = call float @llvm.floor.f32(float %162)
%177 = fsub float %162, %176
%178 = call float @llvm.floor.f32(float %164)
%179 = fsub float %164, %178
%180 = call float @llvm.floor.f32(float %164)
br label %ENDIF91
ENDIF91: ; preds = %ELSE93, %IF92
%temp8.0 = phi float [ %171, %IF92 ], [ %177, %ELSE93 ]
%temp9.0 = phi float [ %173, %IF92 ], [ %179, %ELSE93 ]
%temp10.0 = phi float [ %175, %IF92 ], [ %180, %ELSE93 ]
%temp20.0 = phi float [ %33, %IF92 ], [ %32, %ELSE93 ]
%181 = fsub float %85, %24
%182 = fsub float %86, %25
%183 = fsub float %87, %26
%184 = fmul float %181, %181
%185 = fmul float %182, %182
%186 = fadd float %185, %184
%187 = fmul float %183, %183
%188 = fadd float %186, %187
%189 = fmul float %38, %188
%190 = call float @llvm.log2.f32(float %189)
%191 = fmul float %190, 0x3FE62E4300000000
%192 = fmul float %191, %37
%193 = fcmp une float %32, %temp12.0
%.sink121 = select i1 %193, float %35, float %34
%temp40.0 = select i1 %193, float 1.953125e-03, float 3.906250e-03
%194 = fdiv float 1.000000e+00, %.sink121
%195 = fmul float %85, %194
%196 = fmul float %86, %194
%197 = call float @llvm.floor.f32(float %195)
%198 = fsub float %195, %197
%199 = call float @llvm.floor.f32(float %196)
%200 = fsub float %196, %199
%201 = fmul float %36, 2.000000e+00
%202 = fmul float %201, %temp40.0
%203 = fsub float 1.000000e+00, %202
%204 = fmul float %temp40.0, %36
%205 = fmul float %198, %203
%206 = fadd float %205, %204
%207 = fmul float %200, %203
%208 = fadd float %207, %204
%209 = fmul float %206, %temp12.0
%210 = fadd float %209, %temp24.0
%211 = fmul float %208, %temp12.0
%212 = fadd float %211, %temp25.0
%213 = bitcast float %210 to i32
%214 = bitcast float %212 to i32
%215 = bitcast float %192 to i32
%216 = insertelement <4 x i32> undef, i32 %213, i32 0
%217 = insertelement <4 x i32> %216, i32 %214, i32 1
%218 = insertelement <4 x i32> %217, i32 %215, i32 2
%219 = bitcast <8 x i32> %62 to <32 x i8>
%220 = bitcast <4 x i32> %64 to <16 x i8>
%221 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %218, <32 x i8> %219, <16 x i8> %220, i32 2)
%222 = extractelement <4 x float> %221, i32 0
%223 = extractelement <4 x float> %221, i32 1
%224 = extractelement <4 x float> %221, i32 2
%225 = fcmp oeq float %temp26.0, 4.000000e+00
%226 = select i1 %225, float 1.000000e+00, float 0.000000e+00
%227 = bitcast float %210 to i32
%228 = bitcast float %212 to i32
%229 = bitcast float %192 to i32
%230 = insertelement <4 x i32> undef, i32 %227, i32 0
%231 = insertelement <4 x i32> %230, i32 %228, i32 1
%232 = insertelement <4 x i32> %231, i32 %229, i32 2
%233 = bitcast <8 x i32> %58 to <32 x i8>
%234 = bitcast <4 x i32> %60 to <16 x i8>
%235 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %232, <32 x i8> %233, <16 x i8> %234, i32 2)
%236 = extractelement <4 x float> %235, i32 0
%237 = extractelement <4 x float> %235, i32 1
%238 = extractelement <4 x float> %235, i32 2
%239 = fcmp oeq float %temp26.0, 3.000000e+00
%240 = select i1 %239, float 1.000000e+00, float 0.000000e+00
%241 = bitcast float %210 to i32
%242 = bitcast float %212 to i32
%243 = bitcast float %192 to i32
%244 = insertelement <4 x i32> undef, i32 %241, i32 0
%245 = insertelement <4 x i32> %244, i32 %242, i32 1
%246 = insertelement <4 x i32> %245, i32 %243, i32 2
%247 = bitcast <8 x i32> %54 to <32 x i8>
%248 = bitcast <4 x i32> %56 to <16 x i8>
%249 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2)
%250 = extractelement <4 x float> %249, i32 0
%251 = extractelement <4 x float> %249, i32 1
%252 = extractelement <4 x float> %249, i32 2
%253 = fcmp oeq float %temp26.0, 2.000000e+00
%254 = select i1 %253, float 1.000000e+00, float 0.000000e+00
%255 = bitcast float %210 to i32
%256 = bitcast float %212 to i32
%257 = bitcast float %192 to i32
%258 = insertelement <4 x i32> undef, i32 %255, i32 0
%259 = insertelement <4 x i32> %258, i32 %256, i32 1
%260 = insertelement <4 x i32> %259, i32 %257, i32 2
%261 = bitcast <8 x i32> %50 to <32 x i8>
%262 = bitcast <4 x i32> %52 to <16 x i8>
%263 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %260, <32 x i8> %261, <16 x i8> %262, i32 2)
%264 = extractelement <4 x float> %263, i32 0
%265 = extractelement <4 x float> %263, i32 1
%266 = extractelement <4 x float> %263, i32 2
%267 = fcmp oeq float %temp26.0, 1.000000e+00
%268 = select i1 %267, float 1.000000e+00, float 0.000000e+00
%269 = bitcast float %210 to i32
%270 = bitcast float %212 to i32
%271 = bitcast float %192 to i32
%272 = insertelement <4 x i32> undef, i32 %269, i32 0
%273 = insertelement <4 x i32> %272, i32 %270, i32 1
%274 = insertelement <4 x i32> %273, i32 %271, i32 2
%275 = bitcast <8 x i32> %46 to <32 x i8>
%276 = bitcast <4 x i32> %48 to <16 x i8>
%277 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %274, <32 x i8> %275, <16 x i8> %276, i32 2)
%278 = extractelement <4 x float> %277, i32 0
%279 = extractelement <4 x float> %277, i32 1
%280 = extractelement <4 x float> %277, i32 2
%281 = fcmp oeq float %temp26.0, 0.000000e+00
%282 = select i1 %281, float 1.000000e+00, float 0.000000e+00
%283 = fmul float %278, %282
%284 = fmul float %279, %282
%285 = fmul float %280, %282
%286 = fmul float %264, %268
%287 = fadd float %286, %283
%288 = fmul float %265, %268
%289 = fadd float %288, %284
%290 = fmul float %266, %268
%291 = fadd float %290, %285
%292 = fmul float %250, %254
%293 = fadd float %292, %287
%294 = fmul float %251, %254
%295 = fadd float %294, %289
%296 = fmul float %252, %254
%297 = fadd float %296, %291
%298 = fmul float %236, %240
%299 = fadd float %298, %293
%300 = fmul float %237, %240
%301 = fadd float %300, %295
%302 = fmul float %238, %240
%303 = fadd float %302, %297
%304 = fmul float %222, %226
%305 = fadd float %304, %299
%306 = fmul float %223, %226
%307 = fadd float %306, %301
%308 = fmul float %224, %226
%309 = fadd float %308, %303
%310 = fcmp une float %32, %temp12.0
%.sink122 = select i1 %310, float %35, float %34
%temp44.0 = select i1 %310, float 1.953125e-03, float 3.906250e-03
%311 = fdiv float 1.000000e+00, %.sink122
%312 = fmul float %87, %311
%313 = fmul float %86, %311
%314 = call float @llvm.floor.f32(float %312)
%315 = fsub float %312, %314
%316 = call float @llvm.floor.f32(float %313)
%317 = fsub float %313, %316
%318 = fmul float %36, 2.000000e+00
%319 = fmul float %318, %temp44.0
%320 = fsub float 1.000000e+00, %319
%321 = fmul float %temp44.0, %36
%322 = fmul float %315, %320
%323 = fadd float %322, %321
%324 = fmul float %317, %320
%325 = fadd float %324, %321
%326 = fmul float %323, %temp12.0
%327 = fadd float %326, %temp24.0
%328 = fmul float %325, %temp12.0
%329 = fadd float %328, %temp25.0
%330 = bitcast float %327 to i32
%331 = bitcast float %329 to i32
%332 = bitcast float %192 to i32
%333 = insertelement <4 x i32> undef, i32 %330, i32 0
%334 = insertelement <4 x i32> %333, i32 %331, i32 1
%335 = insertelement <4 x i32> %334, i32 %332, i32 2
%336 = bitcast <8 x i32> %62 to <32 x i8>
%337 = bitcast <4 x i32> %64 to <16 x i8>
%338 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %335, <32 x i8> %336, <16 x i8> %337, i32 2)
%339 = extractelement <4 x float> %338, i32 0
%340 = extractelement <4 x float> %338, i32 1
%341 = extractelement <4 x float> %338, i32 2
%342 = fcmp oeq float %temp26.0, 4.000000e+00
%343 = select i1 %342, float 1.000000e+00, float 0.000000e+00
%344 = bitcast float %327 to i32
%345 = bitcast float %329 to i32
%346 = bitcast float %192 to i32
%347 = insertelement <4 x i32> undef, i32 %344, i32 0
%348 = insertelement <4 x i32> %347, i32 %345, i32 1
%349 = insertelement <4 x i32> %348, i32 %346, i32 2
%350 = bitcast <8 x i32> %58 to <32 x i8>
%351 = bitcast <4 x i32> %60 to <16 x i8>
%352 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %349, <32 x i8> %350, <16 x i8> %351, i32 2)
%353 = extractelement <4 x float> %352, i32 0
%354 = extractelement <4 x float> %352, i32 1
%355 = extractelement <4 x float> %352, i32 2
%356 = fcmp oeq float %temp26.0, 3.000000e+00
%357 = select i1 %356, float 1.000000e+00, float 0.000000e+00
%358 = bitcast float %327 to i32
%359 = bitcast float %329 to i32
%360 = bitcast float %192 to i32
%361 = insertelement <4 x i32> undef, i32 %358, i32 0
%362 = insertelement <4 x i32> %361, i32 %359, i32 1
%363 = insertelement <4 x i32> %362, i32 %360, i32 2
%364 = bitcast <8 x i32> %54 to <32 x i8>
%365 = bitcast <4 x i32> %56 to <16 x i8>
%366 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %363, <32 x i8> %364, <16 x i8> %365, i32 2)
%367 = extractelement <4 x float> %366, i32 0
%368 = extractelement <4 x float> %366, i32 1
%369 = extractelement <4 x float> %366, i32 2
%370 = fcmp oeq float %temp26.0, 2.000000e+00
%371 = select i1 %370, float 1.000000e+00, float 0.000000e+00
%372 = bitcast float %327 to i32
%373 = bitcast float %329 to i32
%374 = bitcast float %192 to i32
%375 = insertelement <4 x i32> undef, i32 %372, i32 0
%376 = insertelement <4 x i32> %375, i32 %373, i32 1
%377 = insertelement <4 x i32> %376, i32 %374, i32 2
%378 = bitcast <8 x i32> %50 to <32 x i8>
%379 = bitcast <4 x i32> %52 to <16 x i8>
%380 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %377, <32 x i8> %378, <16 x i8> %379, i32 2)
%381 = extractelement <4 x float> %380, i32 0
%382 = extractelement <4 x float> %380, i32 1
%383 = extractelement <4 x float> %380, i32 2
%384 = fcmp oeq float %temp26.0, 1.000000e+00
%385 = select i1 %384, float 1.000000e+00, float 0.000000e+00
%386 = bitcast float %327 to i32
%387 = bitcast float %329 to i32
%388 = bitcast float %192 to i32
%389 = insertelement <4 x i32> undef, i32 %386, i32 0
%390 = insertelement <4 x i32> %389, i32 %387, i32 1
%391 = insertelement <4 x i32> %390, i32 %388, i32 2
%392 = bitcast <8 x i32> %46 to <32 x i8>
%393 = bitcast <4 x i32> %48 to <16 x i8>
%394 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %391, <32 x i8> %392, <16 x i8> %393, i32 2)
%395 = extractelement <4 x float> %394, i32 0
%396 = extractelement <4 x float> %394, i32 1
%397 = extractelement <4 x float> %394, i32 2
%398 = fcmp oeq float %temp26.0, 0.000000e+00
%399 = select i1 %398, float 1.000000e+00, float 0.000000e+00
%400 = fmul float %395, %399
%401 = fmul float %396, %399
%402 = fmul float %397, %399
%403 = fmul float %381, %385
%404 = fadd float %403, %400
%405 = fmul float %382, %385
%406 = fadd float %405, %401
%407 = fmul float %383, %385
%408 = fadd float %407, %402
%409 = fmul float %367, %371
%410 = fadd float %409, %404
%411 = fmul float %368, %371
%412 = fadd float %411, %406
%413 = fmul float %369, %371
%414 = fadd float %413, %408
%415 = fmul float %353, %357
%416 = fadd float %415, %410
%417 = fmul float %354, %357
%418 = fadd float %417, %412
%419 = fmul float %355, %357
%420 = fadd float %419, %414
%421 = fmul float %339, %343
%422 = fadd float %421, %416
%423 = fmul float %340, %343
%424 = fadd float %423, %418
%425 = fmul float %341, %343
%426 = fadd float %425, %420
%427 = fcmp une float %32, %temp12.0
%.sink123 = select i1 %427, float %35, float %34
%temp48.0 = select i1 %427, float 1.953125e-03, float 3.906250e-03
%428 = fdiv float 1.000000e+00, %.sink123
%429 = fmul float %87, %428
%430 = fmul float %85, %428
%431 = call float @llvm.floor.f32(float %429)
%432 = fsub float %429, %431
%433 = call float @llvm.floor.f32(float %430)
%434 = fsub float %430, %433
%435 = fmul float %36, 2.000000e+00
%436 = fmul float %435, %temp48.0
%437 = fsub float 1.000000e+00, %436
%438 = fmul float %temp48.0, %36
%439 = fmul float %432, %437
%440 = fadd float %439, %438
%441 = fmul float %434, %437
%442 = fadd float %441, %438
%443 = fmul float %440, %temp12.0
%444 = fadd float %443, %temp24.0
%445 = fmul float %442, %temp12.0
%446 = fadd float %445, %temp25.0
%447 = bitcast float %444 to i32
%448 = bitcast float %446 to i32
%449 = bitcast float %192 to i32
%450 = insertelement <4 x i32> undef, i32 %447, i32 0
%451 = insertelement <4 x i32> %450, i32 %448, i32 1
%452 = insertelement <4 x i32> %451, i32 %449, i32 2
%453 = bitcast <8 x i32> %62 to <32 x i8>
%454 = bitcast <4 x i32> %64 to <16 x i8>
%455 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %452, <32 x i8> %453, <16 x i8> %454, i32 2)
%456 = extractelement <4 x float> %455, i32 0
%457 = extractelement <4 x float> %455, i32 1
%458 = extractelement <4 x float> %455, i32 2
%459 = fcmp oeq float %temp26.0, 4.000000e+00
%460 = select i1 %459, float 1.000000e+00, float 0.000000e+00
%461 = bitcast float %444 to i32
%462 = bitcast float %446 to i32
%463 = bitcast float %192 to i32
%464 = insertelement <4 x i32> undef, i32 %461, i32 0
%465 = insertelement <4 x i32> %464, i32 %462, i32 1
%466 = insertelement <4 x i32> %465, i32 %463, i32 2
%467 = bitcast <8 x i32> %58 to <32 x i8>
%468 = bitcast <4 x i32> %60 to <16 x i8>
%469 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %466, <32 x i8> %467, <16 x i8> %468, i32 2)
%470 = extractelement <4 x float> %469, i32 0
%471 = extractelement <4 x float> %469, i32 1
%472 = extractelement <4 x float> %469, i32 2
%473 = fcmp oeq float %temp26.0, 3.000000e+00
%474 = select i1 %473, float 1.000000e+00, float 0.000000e+00
%475 = bitcast float %444 to i32
%476 = bitcast float %446 to i32
%477 = bitcast float %192 to i32
%478 = insertelement <4 x i32> undef, i32 %475, i32 0
%479 = insertelement <4 x i32> %478, i32 %476, i32 1
%480 = insertelement <4 x i32> %479, i32 %477, i32 2
%481 = bitcast <8 x i32> %54 to <32 x i8>
%482 = bitcast <4 x i32> %56 to <16 x i8>
%483 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %480, <32 x i8> %481, <16 x i8> %482, i32 2)
%484 = extractelement <4 x float> %483, i32 0
%485 = extractelement <4 x float> %483, i32 1
%486 = extractelement <4 x float> %483, i32 2
%487 = fcmp oeq float %temp26.0, 2.000000e+00
%488 = select i1 %487, float 1.000000e+00, float 0.000000e+00
%489 = bitcast float %444 to i32
%490 = bitcast float %446 to i32
%491 = bitcast float %192 to i32
%492 = insertelement <4 x i32> undef, i32 %489, i32 0
%493 = insertelement <4 x i32> %492, i32 %490, i32 1
%494 = insertelement <4 x i32> %493, i32 %491, i32 2
%495 = bitcast <8 x i32> %50 to <32 x i8>
%496 = bitcast <4 x i32> %52 to <16 x i8>
%497 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %494, <32 x i8> %495, <16 x i8> %496, i32 2)
%498 = extractelement <4 x float> %497, i32 0
%499 = extractelement <4 x float> %497, i32 1
%500 = extractelement <4 x float> %497, i32 2
%501 = fcmp oeq float %temp26.0, 1.000000e+00
%502 = select i1 %501, float 1.000000e+00, float 0.000000e+00
%503 = bitcast float %444 to i32
%504 = bitcast float %446 to i32
%505 = bitcast float %192 to i32
%506 = insertelement <4 x i32> undef, i32 %503, i32 0
%507 = insertelement <4 x i32> %506, i32 %504, i32 1
%508 = insertelement <4 x i32> %507, i32 %505, i32 2
%509 = bitcast <8 x i32> %46 to <32 x i8>
%510 = bitcast <4 x i32> %48 to <16 x i8>
%511 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %508, <32 x i8> %509, <16 x i8> %510, i32 2)
%512 = extractelement <4 x float> %511, i32 0
%513 = extractelement <4 x float> %511, i32 1
%514 = extractelement <4 x float> %511, i32 2
%515 = fcmp oeq float %temp26.0, 0.000000e+00
%516 = select i1 %515, float 1.000000e+00, float 0.000000e+00
%517 = fmul float %512, %516
%518 = fmul float %513, %516
%519 = fmul float %514, %516
%520 = fmul float %498, %502
%521 = fadd float %520, %517
%522 = fmul float %499, %502
%523 = fadd float %522, %518
%524 = fmul float %500, %502
%525 = fadd float %524, %519
%526 = fmul float %484, %488
%527 = fadd float %526, %521
%528 = fmul float %485, %488
%529 = fadd float %528, %523
%530 = fmul float %486, %488
%531 = fadd float %530, %525
%532 = fmul float %470, %474
%533 = fadd float %532, %527
%534 = fmul float %471, %474
%535 = fadd float %534, %529
%536 = fmul float %472, %474
%537 = fadd float %536, %531
%538 = fmul float %456, %460
%539 = fadd float %538, %533
%540 = fmul float %457, %460
%541 = fadd float %540, %535
%542 = fmul float %458, %460
%543 = fadd float %542, %537
%544 = fcmp une float %32, %temp16.0
%.sink124 = select i1 %544, float %35, float %34
%temp44.2 = select i1 %544, float 1.953125e-03, float 3.906250e-03
%545 = fdiv float 1.000000e+00, %.sink124
%546 = fmul float %85, %545
%547 = fmul float %86, %545
%548 = call float @llvm.floor.f32(float %546)
%549 = fsub float %546, %548
%550 = call float @llvm.floor.f32(float %547)
%551 = fsub float %547, %550
%552 = fmul float %36, 2.000000e+00
%553 = fmul float %552, %temp44.2
%554 = fsub float 1.000000e+00, %553
%555 = fmul float %temp44.2, %36
%556 = fmul float %549, %554
%557 = fadd float %556, %555
%558 = fmul float %551, %554
%559 = fadd float %558, %555
%560 = fmul float %557, %temp16.0
%561 = fadd float %560, %temp32.0
%562 = fmul float %559, %temp16.0
%563 = fadd float %562, %temp33.0
%564 = bitcast float %561 to i32
%565 = bitcast float %563 to i32
%566 = bitcast float %192 to i32
%567 = insertelement <4 x i32> undef, i32 %564, i32 0
%568 = insertelement <4 x i32> %567, i32 %565, i32 1
%569 = insertelement <4 x i32> %568, i32 %566, i32 2
%570 = bitcast <8 x i32> %62 to <32 x i8>
%571 = bitcast <4 x i32> %64 to <16 x i8>
%572 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %569, <32 x i8> %570, <16 x i8> %571, i32 2)
%573 = extractelement <4 x float> %572, i32 0
%574 = extractelement <4 x float> %572, i32 1
%575 = extractelement <4 x float> %572, i32 2
%576 = fcmp oeq float %temp34.0, 4.000000e+00
%577 = select i1 %576, float 1.000000e+00, float 0.000000e+00
%578 = bitcast float %561 to i32
%579 = bitcast float %563 to i32
%580 = bitcast float %192 to i32
%581 = insertelement <4 x i32> undef, i32 %578, i32 0
%582 = insertelement <4 x i32> %581, i32 %579, i32 1
%583 = insertelement <4 x i32> %582, i32 %580, i32 2
%584 = bitcast <8 x i32> %58 to <32 x i8>
%585 = bitcast <4 x i32> %60 to <16 x i8>
%586 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %583, <32 x i8> %584, <16 x i8> %585, i32 2)
%587 = extractelement <4 x float> %586, i32 0
%588 = extractelement <4 x float> %586, i32 1
%589 = extractelement <4 x float> %586, i32 2
%590 = fcmp oeq float %temp34.0, 3.000000e+00
%591 = select i1 %590, float 1.000000e+00, float 0.000000e+00
%592 = bitcast float %561 to i32
%593 = bitcast float %563 to i32
%594 = bitcast float %192 to i32
%595 = insertelement <4 x i32> undef, i32 %592, i32 0
%596 = insertelement <4 x i32> %595, i32 %593, i32 1
%597 = insertelement <4 x i32> %596, i32 %594, i32 2
%598 = bitcast <8 x i32> %54 to <32 x i8>
%599 = bitcast <4 x i32> %56 to <16 x i8>
%600 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %597, <32 x i8> %598, <16 x i8> %599, i32 2)
%601 = extractelement <4 x float> %600, i32 0
%602 = extractelement <4 x float> %600, i32 1
%603 = extractelement <4 x float> %600, i32 2
%604 = fcmp oeq float %temp34.0, 2.000000e+00
%605 = select i1 %604, float 1.000000e+00, float 0.000000e+00
%606 = bitcast float %561 to i32
%607 = bitcast float %563 to i32
%608 = bitcast float %192 to i32
%609 = insertelement <4 x i32> undef, i32 %606, i32 0
%610 = insertelement <4 x i32> %609, i32 %607, i32 1
%611 = insertelement <4 x i32> %610, i32 %608, i32 2
%612 = bitcast <8 x i32> %50 to <32 x i8>
%613 = bitcast <4 x i32> %52 to <16 x i8>
%614 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %611, <32 x i8> %612, <16 x i8> %613, i32 2)
%615 = extractelement <4 x float> %614, i32 0
%616 = extractelement <4 x float> %614, i32 1
%617 = extractelement <4 x float> %614, i32 2
%618 = fcmp oeq float %temp34.0, 1.000000e+00
%619 = select i1 %618, float 1.000000e+00, float 0.000000e+00
%620 = bitcast float %561 to i32
%621 = bitcast float %563 to i32
%622 = bitcast float %192 to i32
%623 = insertelement <4 x i32> undef, i32 %620, i32 0
%624 = insertelement <4 x i32> %623, i32 %621, i32 1
%625 = insertelement <4 x i32> %624, i32 %622, i32 2
%626 = bitcast <8 x i32> %46 to <32 x i8>
%627 = bitcast <4 x i32> %48 to <16 x i8>
%628 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %625, <32 x i8> %626, <16 x i8> %627, i32 2)
%629 = extractelement <4 x float> %628, i32 0
%630 = extractelement <4 x float> %628, i32 1
%631 = extractelement <4 x float> %628, i32 2
%632 = fcmp oeq float %temp34.0, 0.000000e+00
%633 = select i1 %632, float 1.000000e+00, float 0.000000e+00
%634 = fmul float %629, %633
%635 = fmul float %630, %633
%636 = fmul float %631, %633
%637 = fmul float %615, %619
%638 = fadd float %637, %634
%639 = fmul float %616, %619
%640 = fadd float %639, %635
%641 = fmul float %617, %619
%642 = fadd float %641, %636
%643 = fmul float %601, %605
%644 = fadd float %643, %638
%645 = fmul float %602, %605
%646 = fadd float %645, %640
%647 = fmul float %603, %605
%648 = fadd float %647, %642
%649 = fmul float %587, %591
%650 = fadd float %649, %644
%651 = fmul float %588, %591
%652 = fadd float %651, %646
%653 = fmul float %589, %591
%654 = fadd float %653, %648
%655 = fmul float %573, %577
%656 = fadd float %655, %650
%657 = fmul float %574, %577
%658 = fadd float %657, %652
%659 = fmul float %575, %577
%660 = fadd float %659, %654
%661 = fcmp une float %32, %temp16.0
%.sink125 = select i1 %661, float %35, float %34
%temp48.1 = select i1 %661, float 1.953125e-03, float 3.906250e-03
%662 = fdiv float 1.000000e+00, %.sink125
%663 = fmul float %87, %662
%664 = fmul float %86, %662
%665 = call float @llvm.floor.f32(float %663)
%666 = fsub float %663, %665
%667 = call float @llvm.floor.f32(float %664)
%668 = fsub float %664, %667
%669 = fmul float %36, 2.000000e+00
%670 = fmul float %669, %temp48.1
%671 = fsub float 1.000000e+00, %670
%672 = fmul float %temp48.1, %36
%673 = fmul float %666, %671
%674 = fadd float %673, %672
%675 = fmul float %668, %671
%676 = fadd float %675, %672
%677 = fmul float %674, %temp16.0
%678 = fadd float %677, %temp32.0
%679 = fmul float %676, %temp16.0
%680 = fadd float %679, %temp33.0
%681 = bitcast float %678 to i32
%682 = bitcast float %680 to i32
%683 = bitcast float %192 to i32
%684 = insertelement <4 x i32> undef, i32 %681, i32 0
%685 = insertelement <4 x i32> %684, i32 %682, i32 1
%686 = insertelement <4 x i32> %685, i32 %683, i32 2
%687 = bitcast <8 x i32> %62 to <32 x i8>
%688 = bitcast <4 x i32> %64 to <16 x i8>
%689 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %686, <32 x i8> %687, <16 x i8> %688, i32 2)
%690 = extractelement <4 x float> %689, i32 0
%691 = extractelement <4 x float> %689, i32 1
%692 = extractelement <4 x float> %689, i32 2
%693 = fcmp oeq float %temp34.0, 4.000000e+00
%694 = select i1 %693, float 1.000000e+00, float 0.000000e+00
%695 = bitcast float %678 to i32
%696 = bitcast float %680 to i32
%697 = bitcast float %192 to i32
%698 = insertelement <4 x i32> undef, i32 %695, i32 0
%699 = insertelement <4 x i32> %698, i32 %696, i32 1
%700 = insertelement <4 x i32> %699, i32 %697, i32 2
%701 = bitcast <8 x i32> %58 to <32 x i8>
%702 = bitcast <4 x i32> %60 to <16 x i8>
%703 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %700, <32 x i8> %701, <16 x i8> %702, i32 2)
%704 = extractelement <4 x float> %703, i32 0
%705 = extractelement <4 x float> %703, i32 1
%706 = extractelement <4 x float> %703, i32 2
%707 = fcmp oeq float %temp34.0, 3.000000e+00
%708 = select i1 %707, float 1.000000e+00, float 0.000000e+00
%709 = bitcast float %678 to i32
%710 = bitcast float %680 to i32
%711 = bitcast float %192 to i32
%712 = insertelement <4 x i32> undef, i32 %709, i32 0
%713 = insertelement <4 x i32> %712, i32 %710, i32 1
%714 = insertelement <4 x i32> %713, i32 %711, i32 2
%715 = bitcast <8 x i32> %54 to <32 x i8>
%716 = bitcast <4 x i32> %56 to <16 x i8>
%717 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %714, <32 x i8> %715, <16 x i8> %716, i32 2)
%718 = extractelement <4 x float> %717, i32 0
%719 = extractelement <4 x float> %717, i32 1
%720 = extractelement <4 x float> %717, i32 2
%721 = fcmp oeq float %temp34.0, 2.000000e+00
%722 = select i1 %721, float 1.000000e+00, float 0.000000e+00
%723 = bitcast float %678 to i32
%724 = bitcast float %680 to i32
%725 = bitcast float %192 to i32
%726 = insertelement <4 x i32> undef, i32 %723, i32 0
%727 = insertelement <4 x i32> %726, i32 %724, i32 1
%728 = insertelement <4 x i32> %727, i32 %725, i32 2
%729 = bitcast <8 x i32> %50 to <32 x i8>
%730 = bitcast <4 x i32> %52 to <16 x i8>
%731 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %728, <32 x i8> %729, <16 x i8> %730, i32 2)
%732 = extractelement <4 x float> %731, i32 0
%733 = extractelement <4 x float> %731, i32 1
%734 = extractelement <4 x float> %731, i32 2
%735 = fcmp oeq float %temp34.0, 1.000000e+00
%736 = select i1 %735, float 1.000000e+00, float 0.000000e+00
%737 = bitcast float %678 to i32
%738 = bitcast float %680 to i32
%739 = bitcast float %192 to i32
%740 = insertelement <4 x i32> undef, i32 %737, i32 0
%741 = insertelement <4 x i32> %740, i32 %738, i32 1
%742 = insertelement <4 x i32> %741, i32 %739, i32 2
%743 = bitcast <8 x i32> %46 to <32 x i8>
%744 = bitcast <4 x i32> %48 to <16 x i8>
%745 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %742, <32 x i8> %743, <16 x i8> %744, i32 2)
%746 = extractelement <4 x float> %745, i32 0
%747 = extractelement <4 x float> %745, i32 1
%748 = extractelement <4 x float> %745, i32 2
%749 = fcmp oeq float %temp34.0, 0.000000e+00
%750 = select i1 %749, float 1.000000e+00, float 0.000000e+00
%751 = fmul float %746, %750
%752 = fmul float %747, %750
%753 = fmul float %748, %750
%754 = fmul float %732, %736
%755 = fadd float %754, %751
%756 = fmul float %733, %736
%757 = fadd float %756, %752
%758 = fmul float %734, %736
%759 = fadd float %758, %753
%760 = fmul float %718, %722
%761 = fadd float %760, %755
%762 = fmul float %719, %722
%763 = fadd float %762, %757
%764 = fmul float %720, %722
%765 = fadd float %764, %759
%766 = fmul float %704, %708
%767 = fadd float %766, %761
%768 = fmul float %705, %708
%769 = fadd float %768, %763
%770 = fmul float %706, %708
%771 = fadd float %770, %765
%772 = fmul float %690, %694
%773 = fadd float %772, %767
%774 = fmul float %691, %694
%775 = fadd float %774, %769
%776 = fmul float %692, %694
%777 = fadd float %776, %771
%778 = fcmp une float %32, %temp16.0
%.sink126 = select i1 %778, float %35, float %34
%temp52.0 = select i1 %778, float 1.953125e-03, float 3.906250e-03
%779 = fdiv float 1.000000e+00, %.sink126
%780 = fmul float %87, %779
%781 = fmul float %85, %779
%782 = call float @llvm.floor.f32(float %780)
%783 = fsub float %780, %782
%784 = call float @llvm.floor.f32(float %781)
%785 = fsub float %781, %784
%786 = fmul float %36, 2.000000e+00
%787 = fmul float %786, %temp52.0
%788 = fsub float 1.000000e+00, %787
%789 = fmul float %temp52.0, %36
%790 = fmul float %783, %788
%791 = fadd float %790, %789
%792 = fmul float %785, %788
%793 = fadd float %792, %789
%794 = fmul float %791, %temp16.0
%795 = fadd float %794, %temp32.0
%796 = fmul float %793, %temp16.0
%797 = fadd float %796, %temp33.0
%798 = bitcast float %795 to i32
%799 = bitcast float %797 to i32
%800 = bitcast float %192 to i32
%801 = insertelement <4 x i32> undef, i32 %798, i32 0
%802 = insertelement <4 x i32> %801, i32 %799, i32 1
%803 = insertelement <4 x i32> %802, i32 %800, i32 2
%804 = bitcast <8 x i32> %62 to <32 x i8>
%805 = bitcast <4 x i32> %64 to <16 x i8>
%806 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %803, <32 x i8> %804, <16 x i8> %805, i32 2)
%807 = extractelement <4 x float> %806, i32 0
%808 = extractelement <4 x float> %806, i32 1
%809 = extractelement <4 x float> %806, i32 2
%810 = fcmp oeq float %temp34.0, 4.000000e+00
%811 = select i1 %810, float 1.000000e+00, float 0.000000e+00
%812 = bitcast float %795 to i32
%813 = bitcast float %797 to i32
%814 = bitcast float %192 to i32
%815 = insertelement <4 x i32> undef, i32 %812, i32 0
%816 = insertelement <4 x i32> %815, i32 %813, i32 1
%817 = insertelement <4 x i32> %816, i32 %814, i32 2
%818 = bitcast <8 x i32> %58 to <32 x i8>
%819 = bitcast <4 x i32> %60 to <16 x i8>
%820 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %817, <32 x i8> %818, <16 x i8> %819, i32 2)
%821 = extractelement <4 x float> %820, i32 0
%822 = extractelement <4 x float> %820, i32 1
%823 = extractelement <4 x float> %820, i32 2
%824 = fcmp oeq float %temp34.0, 3.000000e+00
%825 = select i1 %824, float 1.000000e+00, float 0.000000e+00
%826 = bitcast float %795 to i32
%827 = bitcast float %797 to i32
%828 = bitcast float %192 to i32
%829 = insertelement <4 x i32> undef, i32 %826, i32 0
%830 = insertelement <4 x i32> %829, i32 %827, i32 1
%831 = insertelement <4 x i32> %830, i32 %828, i32 2
%832 = bitcast <8 x i32> %54 to <32 x i8>
%833 = bitcast <4 x i32> %56 to <16 x i8>
%834 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %831, <32 x i8> %832, <16 x i8> %833, i32 2)
%835 = extractelement <4 x float> %834, i32 0
%836 = extractelement <4 x float> %834, i32 1
%837 = extractelement <4 x float> %834, i32 2
%838 = fcmp oeq float %temp34.0, 2.000000e+00
%839 = select i1 %838, float 1.000000e+00, float 0.000000e+00
%840 = bitcast float %795 to i32
%841 = bitcast float %797 to i32
%842 = bitcast float %192 to i32
%843 = insertelement <4 x i32> undef, i32 %840, i32 0
%844 = insertelement <4 x i32> %843, i32 %841, i32 1
%845 = insertelement <4 x i32> %844, i32 %842, i32 2
%846 = bitcast <8 x i32> %50 to <32 x i8>
%847 = bitcast <4 x i32> %52 to <16 x i8>
%848 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %845, <32 x i8> %846, <16 x i8> %847, i32 2)
%849 = extractelement <4 x float> %848, i32 0
%850 = extractelement <4 x float> %848, i32 1
%851 = extractelement <4 x float> %848, i32 2
%852 = fcmp oeq float %temp34.0, 1.000000e+00
%853 = select i1 %852, float 1.000000e+00, float 0.000000e+00
%854 = bitcast float %795 to i32
%855 = bitcast float %797 to i32
%856 = bitcast float %192 to i32
%857 = insertelement <4 x i32> undef, i32 %854, i32 0
%858 = insertelement <4 x i32> %857, i32 %855, i32 1
%859 = insertelement <4 x i32> %858, i32 %856, i32 2
%860 = bitcast <8 x i32> %46 to <32 x i8>
%861 = bitcast <4 x i32> %48 to <16 x i8>
%862 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %859, <32 x i8> %860, <16 x i8> %861, i32 2)
%863 = extractelement <4 x float> %862, i32 0
%864 = extractelement <4 x float> %862, i32 1
%865 = extractelement <4 x float> %862, i32 2
%866 = fcmp oeq float %temp34.0, 0.000000e+00
%867 = select i1 %866, float 1.000000e+00, float 0.000000e+00
%868 = fmul float %863, %867
%869 = fmul float %864, %867
%870 = fmul float %865, %867
%871 = fmul float %849, %853
%872 = fadd float %871, %868
%873 = fmul float %850, %853
%874 = fadd float %873, %869
%875 = fmul float %851, %853
%876 = fadd float %875, %870
%877 = fmul float %835, %839
%878 = fadd float %877, %872
%879 = fmul float %836, %839
%880 = fadd float %879, %874
%881 = fmul float %837, %839
%882 = fadd float %881, %876
%883 = fmul float %821, %825
%884 = fadd float %883, %878
%885 = fmul float %822, %825
%886 = fadd float %885, %880
%887 = fmul float %823, %825
%888 = fadd float %887, %882
%889 = fmul float %807, %811
%890 = fadd float %889, %884
%891 = fmul float %808, %811
%892 = fadd float %891, %886
%893 = fmul float %809, %811
%894 = fadd float %893, %888
%895 = fcmp une float %32, %temp20.0
%.sink127 = select i1 %895, float %35, float %34
%temp48.3 = select i1 %895, float 1.953125e-03, float 3.906250e-03
%896 = fdiv float 1.000000e+00, %.sink127
%897 = fmul float %85, %896
%898 = fmul float %86, %896
%899 = call float @llvm.floor.f32(float %897)
%900 = fsub float %897, %899
%901 = call float @llvm.floor.f32(float %898)
%902 = fsub float %898, %901
%903 = fmul float %36, 2.000000e+00
%904 = fmul float %903, %temp48.3
%905 = fsub float 1.000000e+00, %904
%906 = fmul float %temp48.3, %36
%907 = fmul float %900, %905
%908 = fadd float %907, %906
%909 = fmul float %902, %905
%910 = fadd float %909, %906
%911 = fmul float %908, %temp20.0
%912 = fadd float %911, %temp8.0
%913 = fmul float %910, %temp20.0
%914 = fadd float %913, %temp9.0
%915 = bitcast float %912 to i32
%916 = bitcast float %914 to i32
%917 = bitcast float %192 to i32
%918 = insertelement <4 x i32> undef, i32 %915, i32 0
%919 = insertelement <4 x i32> %918, i32 %916, i32 1
%920 = insertelement <4 x i32> %919, i32 %917, i32 2
%921 = bitcast <8 x i32> %62 to <32 x i8>
%922 = bitcast <4 x i32> %64 to <16 x i8>
%923 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %920, <32 x i8> %921, <16 x i8> %922, i32 2)
%924 = extractelement <4 x float> %923, i32 0
%925 = extractelement <4 x float> %923, i32 1
%926 = extractelement <4 x float> %923, i32 2
%927 = fcmp oeq float %temp10.0, 4.000000e+00
%928 = select i1 %927, float 1.000000e+00, float 0.000000e+00
%929 = bitcast float %912 to i32
%930 = bitcast float %914 to i32
%931 = bitcast float %192 to i32
%932 = insertelement <4 x i32> undef, i32 %929, i32 0
%933 = insertelement <4 x i32> %932, i32 %930, i32 1
%934 = insertelement <4 x i32> %933, i32 %931, i32 2
%935 = bitcast <8 x i32> %58 to <32 x i8>
%936 = bitcast <4 x i32> %60 to <16 x i8>
%937 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %934, <32 x i8> %935, <16 x i8> %936, i32 2)
%938 = extractelement <4 x float> %937, i32 0
%939 = extractelement <4 x float> %937, i32 1
%940 = extractelement <4 x float> %937, i32 2
%941 = fcmp oeq float %temp10.0, 3.000000e+00
%942 = select i1 %941, float 1.000000e+00, float 0.000000e+00
%943 = bitcast float %912 to i32
%944 = bitcast float %914 to i32
%945 = bitcast float %192 to i32
%946 = insertelement <4 x i32> undef, i32 %943, i32 0
%947 = insertelement <4 x i32> %946, i32 %944, i32 1
%948 = insertelement <4 x i32> %947, i32 %945, i32 2
%949 = bitcast <8 x i32> %54 to <32 x i8>
%950 = bitcast <4 x i32> %56 to <16 x i8>
%951 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %948, <32 x i8> %949, <16 x i8> %950, i32 2)
%952 = extractelement <4 x float> %951, i32 0
%953 = extractelement <4 x float> %951, i32 1
%954 = extractelement <4 x float> %951, i32 2
%955 = fcmp oeq float %temp10.0, 2.000000e+00
%956 = select i1 %955, float 1.000000e+00, float 0.000000e+00
%957 = bitcast float %912 to i32
%958 = bitcast float %914 to i32
%959 = bitcast float %192 to i32
%960 = insertelement <4 x i32> undef, i32 %957, i32 0
%961 = insertelement <4 x i32> %960, i32 %958, i32 1
%962 = insertelement <4 x i32> %961, i32 %959, i32 2
%963 = bitcast <8 x i32> %50 to <32 x i8>
%964 = bitcast <4 x i32> %52 to <16 x i8>
%965 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %962, <32 x i8> %963, <16 x i8> %964, i32 2)
%966 = extractelement <4 x float> %965, i32 0
%967 = extractelement <4 x float> %965, i32 1
%968 = extractelement <4 x float> %965, i32 2
%969 = fcmp oeq float %temp10.0, 1.000000e+00
%970 = select i1 %969, float 1.000000e+00, float 0.000000e+00
%971 = bitcast float %912 to i32
%972 = bitcast float %914 to i32
%973 = bitcast float %192 to i32
%974 = insertelement <4 x i32> undef, i32 %971, i32 0
%975 = insertelement <4 x i32> %974, i32 %972, i32 1
%976 = insertelement <4 x i32> %975, i32 %973, i32 2
%977 = bitcast <8 x i32> %46 to <32 x i8>
%978 = bitcast <4 x i32> %48 to <16 x i8>
%979 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %976, <32 x i8> %977, <16 x i8> %978, i32 2)
%980 = extractelement <4 x float> %979, i32 0
%981 = extractelement <4 x float> %979, i32 1
%982 = extractelement <4 x float> %979, i32 2
%983 = fcmp oeq float %temp10.0, 0.000000e+00
%984 = select i1 %983, float 1.000000e+00, float 0.000000e+00
%985 = fmul float %980, %984
%986 = fmul float %981, %984
%987 = fmul float %982, %984
%988 = fmul float %966, %970
%989 = fadd float %988, %985
%990 = fmul float %967, %970
%991 = fadd float %990, %986
%992 = fmul float %968, %970
%993 = fadd float %992, %987
%994 = fmul float %952, %956
%995 = fadd float %994, %989
%996 = fmul float %953, %956
%997 = fadd float %996, %991
%998 = fmul float %954, %956
%999 = fadd float %998, %993
%1000 = fmul float %938, %942
%1001 = fadd float %1000, %995
%1002 = fmul float %939, %942
%1003 = fadd float %1002, %997
%1004 = fmul float %940, %942
%1005 = fadd float %1004, %999
%1006 = fmul float %924, %928
%1007 = fadd float %1006, %1001
%1008 = fmul float %925, %928
%1009 = fadd float %1008, %1003
%1010 = fmul float %926, %928
%1011 = fadd float %1010, %1005
%1012 = fcmp une float %32, %temp20.0
%.sink128 = select i1 %1012, float %35, float %34
%temp52.1 = select i1 %1012, float 1.953125e-03, float 3.906250e-03
%1013 = fdiv float 1.000000e+00, %.sink128
%1014 = fmul float %87, %1013
%1015 = fmul float %86, %1013
%1016 = call float @llvm.floor.f32(float %1014)
%1017 = fsub float %1014, %1016
%1018 = call float @llvm.floor.f32(float %1015)
%1019 = fsub float %1015, %1018
%1020 = fmul float %36, 2.000000e+00
%1021 = fmul float %1020, %temp52.1
%1022 = fsub float 1.000000e+00, %1021
%1023 = fmul float %temp52.1, %36
%1024 = fmul float %1017, %1022
%1025 = fadd float %1024, %1023
%1026 = fmul float %1019, %1022
%1027 = fadd float %1026, %1023
%1028 = fmul float %1025, %temp20.0
%1029 = fadd float %1028, %temp8.0
%1030 = fmul float %1027, %temp20.0
%1031 = fadd float %1030, %temp9.0
%1032 = bitcast float %1029 to i32
%1033 = bitcast float %1031 to i32
%1034 = bitcast float %192 to i32
%1035 = insertelement <4 x i32> undef, i32 %1032, i32 0
%1036 = insertelement <4 x i32> %1035, i32 %1033, i32 1
%1037 = insertelement <4 x i32> %1036, i32 %1034, i32 2
%1038 = bitcast <8 x i32> %62 to <32 x i8>
%1039 = bitcast <4 x i32> %64 to <16 x i8>
%1040 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1037, <32 x i8> %1038, <16 x i8> %1039, i32 2)
%1041 = extractelement <4 x float> %1040, i32 0
%1042 = extractelement <4 x float> %1040, i32 1
%1043 = extractelement <4 x float> %1040, i32 2
%1044 = fcmp oeq float %temp10.0, 4.000000e+00
%1045 = select i1 %1044, float 1.000000e+00, float 0.000000e+00
%1046 = bitcast float %1029 to i32
%1047 = bitcast float %1031 to i32
%1048 = bitcast float %192 to i32
%1049 = insertelement <4 x i32> undef, i32 %1046, i32 0
%1050 = insertelement <4 x i32> %1049, i32 %1047, i32 1
%1051 = insertelement <4 x i32> %1050, i32 %1048, i32 2
%1052 = bitcast <8 x i32> %58 to <32 x i8>
%1053 = bitcast <4 x i32> %60 to <16 x i8>
%1054 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1051, <32 x i8> %1052, <16 x i8> %1053, i32 2)
%1055 = extractelement <4 x float> %1054, i32 0
%1056 = extractelement <4 x float> %1054, i32 1
%1057 = extractelement <4 x float> %1054, i32 2
%1058 = fcmp oeq float %temp10.0, 3.000000e+00
%1059 = select i1 %1058, float 1.000000e+00, float 0.000000e+00
%1060 = bitcast float %1029 to i32
%1061 = bitcast float %1031 to i32
%1062 = bitcast float %192 to i32
%1063 = insertelement <4 x i32> undef, i32 %1060, i32 0
%1064 = insertelement <4 x i32> %1063, i32 %1061, i32 1
%1065 = insertelement <4 x i32> %1064, i32 %1062, i32 2
%1066 = bitcast <8 x i32> %54 to <32 x i8>
%1067 = bitcast <4 x i32> %56 to <16 x i8>
%1068 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1065, <32 x i8> %1066, <16 x i8> %1067, i32 2)
%1069 = extractelement <4 x float> %1068, i32 0
%1070 = extractelement <4 x float> %1068, i32 1
%1071 = extractelement <4 x float> %1068, i32 2
%1072 = fcmp oeq float %temp10.0, 2.000000e+00
%1073 = select i1 %1072, float 1.000000e+00, float 0.000000e+00
%1074 = bitcast float %1029 to i32
%1075 = bitcast float %1031 to i32
%1076 = bitcast float %192 to i32
%1077 = insertelement <4 x i32> undef, i32 %1074, i32 0
%1078 = insertelement <4 x i32> %1077, i32 %1075, i32 1
%1079 = insertelement <4 x i32> %1078, i32 %1076, i32 2
%1080 = bitcast <8 x i32> %50 to <32 x i8>
%1081 = bitcast <4 x i32> %52 to <16 x i8>
%1082 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1079, <32 x i8> %1080, <16 x i8> %1081, i32 2)
%1083 = extractelement <4 x float> %1082, i32 0
%1084 = extractelement <4 x float> %1082, i32 1
%1085 = extractelement <4 x float> %1082, i32 2
%1086 = fcmp oeq float %temp10.0, 1.000000e+00
%1087 = select i1 %1086, float 1.000000e+00, float 0.000000e+00
%1088 = bitcast float %1029 to i32
%1089 = bitcast float %1031 to i32
%1090 = bitcast float %192 to i32
%1091 = insertelement <4 x i32> undef, i32 %1088, i32 0
%1092 = insertelement <4 x i32> %1091, i32 %1089, i32 1
%1093 = insertelement <4 x i32> %1092, i32 %1090, i32 2
%1094 = bitcast <8 x i32> %46 to <32 x i8>
%1095 = bitcast <4 x i32> %48 to <16 x i8>
%1096 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1093, <32 x i8> %1094, <16 x i8> %1095, i32 2)
%1097 = extractelement <4 x float> %1096, i32 0
%1098 = extractelement <4 x float> %1096, i32 1
%1099 = extractelement <4 x float> %1096, i32 2
%1100 = fcmp oeq float %temp10.0, 0.000000e+00
%1101 = select i1 %1100, float 1.000000e+00, float 0.000000e+00
%1102 = fmul float %1097, %1101
%1103 = fmul float %1098, %1101
%1104 = fmul float %1099, %1101
%1105 = fmul float %1083, %1087
%1106 = fadd float %1105, %1102
%1107 = fmul float %1084, %1087
%1108 = fadd float %1107, %1103
%1109 = fmul float %1085, %1087
%1110 = fadd float %1109, %1104
%1111 = fmul float %1069, %1073
%1112 = fadd float %1111, %1106
%1113 = fmul float %1070, %1073
%1114 = fadd float %1113, %1108
%1115 = fmul float %1071, %1073
%1116 = fadd float %1115, %1110
%1117 = fmul float %1055, %1059
%1118 = fadd float %1117, %1112
%1119 = fmul float %1056, %1059
%1120 = fadd float %1119, %1114
%1121 = fmul float %1057, %1059
%1122 = fadd float %1121, %1116
%1123 = fmul float %1041, %1045
%1124 = fadd float %1123, %1118
%1125 = fmul float %1042, %1045
%1126 = fadd float %1125, %1120
%1127 = fmul float %1043, %1045
%1128 = fadd float %1127, %1122
%1129 = fcmp une float %32, %temp20.0
%.sink129 = select i1 %1129, float %35, float %34
%temp56.0 = select i1 %1129, float 1.953125e-03, float 3.906250e-03
%1130 = fdiv float 1.000000e+00, %.sink129
%1131 = fmul float %87, %1130
%1132 = fmul float %85, %1130
%1133 = call float @llvm.floor.f32(float %1131)
%1134 = fsub float %1131, %1133
%1135 = call float @llvm.floor.f32(float %1132)
%1136 = fsub float %1132, %1135
%1137 = fmul float %36, 2.000000e+00
%1138 = fmul float %1137, %temp56.0
%1139 = fsub float 1.000000e+00, %1138
%1140 = fmul float %temp56.0, %36
%1141 = fmul float %1134, %1139
%1142 = fadd float %1141, %1140
%1143 = fmul float %1136, %1139
%1144 = fadd float %1143, %1140
%1145 = fmul float %1142, %temp20.0
%1146 = fadd float %1145, %temp8.0
%1147 = fmul float %1144, %temp20.0
%1148 = fadd float %1147, %temp9.0
%1149 = bitcast float %1146 to i32
%1150 = bitcast float %1148 to i32
%1151 = bitcast float %192 to i32
%1152 = insertelement <4 x i32> undef, i32 %1149, i32 0
%1153 = insertelement <4 x i32> %1152, i32 %1150, i32 1
%1154 = insertelement <4 x i32> %1153, i32 %1151, i32 2
%1155 = bitcast <8 x i32> %62 to <32 x i8>
%1156 = bitcast <4 x i32> %64 to <16 x i8>
%1157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1154, <32 x i8> %1155, <16 x i8> %1156, i32 2)
%1158 = extractelement <4 x float> %1157, i32 0
%1159 = extractelement <4 x float> %1157, i32 1
%1160 = extractelement <4 x float> %1157, i32 2
%1161 = fcmp oeq float %temp10.0, 4.000000e+00
%1162 = select i1 %1161, float 1.000000e+00, float 0.000000e+00
%1163 = bitcast float %1146 to i32
%1164 = bitcast float %1148 to i32
%1165 = bitcast float %192 to i32
%1166 = insertelement <4 x i32> undef, i32 %1163, i32 0
%1167 = insertelement <4 x i32> %1166, i32 %1164, i32 1
%1168 = insertelement <4 x i32> %1167, i32 %1165, i32 2
%1169 = bitcast <8 x i32> %58 to <32 x i8>
%1170 = bitcast <4 x i32> %60 to <16 x i8>
%1171 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1168, <32 x i8> %1169, <16 x i8> %1170, i32 2)
%1172 = extractelement <4 x float> %1171, i32 0
%1173 = extractelement <4 x float> %1171, i32 1
%1174 = extractelement <4 x float> %1171, i32 2
%1175 = fcmp oeq float %temp10.0, 3.000000e+00
%1176 = select i1 %1175, float 1.000000e+00, float 0.000000e+00
%1177 = bitcast float %1146 to i32
%1178 = bitcast float %1148 to i32
%1179 = bitcast float %192 to i32
%1180 = insertelement <4 x i32> undef, i32 %1177, i32 0
%1181 = insertelement <4 x i32> %1180, i32 %1178, i32 1
%1182 = insertelement <4 x i32> %1181, i32 %1179, i32 2
%1183 = bitcast <8 x i32> %54 to <32 x i8>
%1184 = bitcast <4 x i32> %56 to <16 x i8>
%1185 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1182, <32 x i8> %1183, <16 x i8> %1184, i32 2)
%1186 = extractelement <4 x float> %1185, i32 0
%1187 = extractelement <4 x float> %1185, i32 1
%1188 = extractelement <4 x float> %1185, i32 2
%1189 = fcmp oeq float %temp10.0, 2.000000e+00
%1190 = select i1 %1189, float 1.000000e+00, float 0.000000e+00
%1191 = bitcast float %1146 to i32
%1192 = bitcast float %1148 to i32
%1193 = bitcast float %192 to i32
%1194 = insertelement <4 x i32> undef, i32 %1191, i32 0
%1195 = insertelement <4 x i32> %1194, i32 %1192, i32 1
%1196 = insertelement <4 x i32> %1195, i32 %1193, i32 2
%1197 = bitcast <8 x i32> %50 to <32 x i8>
%1198 = bitcast <4 x i32> %52 to <16 x i8>
%1199 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1196, <32 x i8> %1197, <16 x i8> %1198, i32 2)
%1200 = extractelement <4 x float> %1199, i32 0
%1201 = extractelement <4 x float> %1199, i32 1
%1202 = extractelement <4 x float> %1199, i32 2
%1203 = fcmp oeq float %temp10.0, 1.000000e+00
%1204 = select i1 %1203, float 1.000000e+00, float 0.000000e+00
%1205 = bitcast float %1146 to i32
%1206 = bitcast float %1148 to i32
%1207 = bitcast float %192 to i32
%1208 = insertelement <4 x i32> undef, i32 %1205, i32 0
%1209 = insertelement <4 x i32> %1208, i32 %1206, i32 1
%1210 = insertelement <4 x i32> %1209, i32 %1207, i32 2
%1211 = bitcast <8 x i32> %46 to <32 x i8>
%1212 = bitcast <4 x i32> %48 to <16 x i8>
%1213 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1210, <32 x i8> %1211, <16 x i8> %1212, i32 2)
%1214 = extractelement <4 x float> %1213, i32 0
%1215 = extractelement <4 x float> %1213, i32 1
%1216 = extractelement <4 x float> %1213, i32 2
%1217 = fcmp oeq float %temp10.0, 0.000000e+00
%1218 = select i1 %1217, float 1.000000e+00, float 0.000000e+00
%1219 = fmul float %1214, %1218
%1220 = fmul float %1215, %1218
%1221 = fmul float %1216, %1218
%1222 = fmul float %1200, %1204
%1223 = fadd float %1222, %1219
%1224 = fmul float %1201, %1204
%1225 = fadd float %1224, %1220
%1226 = fmul float %1202, %1204
%1227 = fadd float %1226, %1221
%1228 = fmul float %1186, %1190
%1229 = fadd float %1228, %1223
%1230 = fmul float %1187, %1190
%1231 = fadd float %1230, %1225
%1232 = fmul float %1188, %1190
%1233 = fadd float %1232, %1227
%1234 = fmul float %1172, %1176
%1235 = fadd float %1234, %1229
%1236 = fmul float %1173, %1176
%1237 = fadd float %1236, %1231
%1238 = fmul float %1174, %1176
%1239 = fadd float %1238, %1233
%1240 = fmul float %1158, %1162
%1241 = fadd float %1240, %1235
%1242 = fmul float %1159, %1162
%1243 = fadd float %1242, %1237
%1244 = fmul float %1160, %1162
%1245 = fadd float %1244, %1239
%1246 = fmul float %1007, %117
%1247 = fmul float %1009, %117
%1248 = fmul float %1011, %117
%1249 = fmul float %1124, %115
%1250 = fadd float %1249, %1246
%1251 = fmul float %1126, %115
%1252 = fadd float %1251, %1247
%1253 = fmul float %1128, %115
%1254 = fadd float %1253, %1248
%1255 = fmul float %1241, %116
%1256 = fadd float %1255, %1250
%1257 = fmul float %1243, %116
%1258 = fadd float %1257, %1252
%1259 = fmul float %1245, %116
%1260 = fadd float %1259, %1254
%1261 = fmul float %656, %117
%1262 = fmul float %658, %117
%1263 = fmul float %660, %117
%1264 = fmul float %773, %115
%1265 = fadd float %1264, %1261
%1266 = fmul float %775, %115
%1267 = fadd float %1266, %1262
%1268 = fmul float %777, %115
%1269 = fadd float %1268, %1263
%1270 = fmul float %890, %116
%1271 = fadd float %1270, %1265
%1272 = fmul float %892, %116
%1273 = fadd float %1272, %1267
%1274 = fmul float %894, %116
%1275 = fadd float %1274, %1269
%1276 = fmul float %305, %117
%1277 = fmul float %307, %117
%1278 = fmul float %309, %117
%1279 = fmul float %422, %115
%1280 = fadd float %1279, %1276
%1281 = fmul float %424, %115
%1282 = fadd float %1281, %1277
%1283 = fmul float %426, %115
%1284 = fadd float %1283, %1278
%1285 = fmul float %539, %116
%1286 = fadd float %1285, %1280
%1287 = fmul float %541, %116
%1288 = fadd float %1287, %1282
%1289 = fmul float %543, %116
%1290 = fadd float %1289, %1284
%1291 = fmul float %74, %1286
%1292 = fmul float %74, %1288
%1293 = fmul float %74, %1290
%1294 = fmul float %75, %1271
%1295 = fadd float %1294, %1291
%1296 = fmul float %75, %1273
%1297 = fadd float %1296, %1292
%1298 = fmul float %75, %1275
%1299 = fadd float %1298, %1293
%1300 = fmul float %76, %1256
%1301 = fadd float %1300, %1295
%1302 = fmul float %76, %1258
%1303 = fadd float %1302, %1297
%1304 = fmul float %76, %1260
%1305 = fadd float %1304, %1299
%1306 = fdiv float %78, %80
%1307 = fdiv float %79, %80
%1308 = bitcast float %1306 to i32
%1309 = bitcast float %1307 to i32
%1310 = insertelement <2 x i32> undef, i32 %1308, i32 0
%1311 = insertelement <2 x i32> %1310, i32 %1309, i32 1
%1312 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1311, <32 x i8> %67, <16 x i8> %70, i32 2)
%1313 = extractelement <4 x float> %1312, i32 0
%1314 = extractelement <4 x float> %1312, i32 1
%1315 = extractelement <4 x float> %1312, i32 2
%1316 = extractelement <4 x float> %1312, i32 3
%1317 = call float @llvm.log2.f32(float %1313)
%1318 = call float @llvm.log2.f32(float %1314)
%1319 = call float @llvm.log2.f32(float %1315)
%1320 = call float @llvm.log2.f32(float %1316)
%1321 = fsub float -0.000000e+00, %1320
%1322 = fsub float %88, %1317
%1323 = fsub float %89, %1318
%1324 = fsub float %90, %1319
%1325 = fmul float %1322, 0x3FE51EB860000000
%1326 = fmul float %1323, 0x3FE51EB860000000
%1327 = fmul float %1324, 0x3FE51EB860000000
%1328 = fmul float %42, 2.000000e+00
%1329 = fmul float %43, 2.000000e+00
%1330 = fmul float %44, 2.000000e+00
%1331 = fmul float %1325, %1325
%1332 = fmul float %1326, %1326
%1333 = fmul float %1327, %1327
%1334 = fmul float %1328, %1331
%1335 = fmul float %1329, %1332
%1336 = fmul float %1330, %1333
%1337 = call float @llvm.maxnum.f32(float %1334, float %39)
%1338 = call float @llvm.maxnum.f32(float %1335, float %40)
%1339 = call float @llvm.maxnum.f32(float %1336, float %41)
%1340 = call float @llvm.minnum.f32(float %1337, float 1.000000e+00)
%1341 = call float @llvm.minnum.f32(float %1338, float 1.000000e+00)
%1342 = call float @llvm.minnum.f32(float %1339, float 1.000000e+00)
%1343 = call float @llvm.AMDIL.clamp.(float %1321, float 0.000000e+00, float 1.000000e+00)
%1344 = fmul float %1343, %77
%1345 = fmul float %1322, 5.000000e-01
%1346 = fmul float %1323, 5.000000e-01
%1347 = fmul float %1324, 5.000000e-01
%1348 = fmul float %1301, %1345
%1349 = fmul float %1303, %1346
%1350 = fmul float %1305, %1347
%1351 = fmul float %1322, %1344
%1352 = fadd float %1351, %1348
%1353 = fmul float %1323, %1344
%1354 = fadd float %1353, %1349
%1355 = fmul float %1324, %1344
%1356 = fadd float %1355, %1350
%1357 = fmul float %1340, %1301
%1358 = fadd float %1357, %1352
%1359 = fmul float %1341, %1303
%1360 = fadd float %1359, %1354
%1361 = fmul float %1342, %1305
%1362 = fadd float %1361, %1356
%1363 = fmul float %81, %30
%1364 = fadd float %1363, %31
%1365 = call float @llvm.AMDIL.clamp.(float %1364, float 0.000000e+00, float 1.000000e+00)
%1366 = call float @llvm.AMDGPU.lrp(float %1365, float %1358, float %27)
%1367 = call float @llvm.AMDGPU.lrp(float %1365, float %1360, float %28)
%1368 = call float @llvm.AMDGPU.lrp(float %1365, float %1362, float %29)
%1369 = call i32 @llvm.SI.packf16(float %1366, float %1367)
%1370 = bitcast i32 %1369 to float
%1371 = call i32 @llvm.SI.packf16(float %1368, float 1.000000e+00)
%1372 = bitcast i32 %1371 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1370, float %1372, float %1370, float %1372)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000
v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001
v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100
v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101
v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200
v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201
v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400
v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401
v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500
v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501
v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600
v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601
v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700
v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701
v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800
v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801
v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900
v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901
v_interp_p1_f32 v10, v0, 3, 2, [m0] ; C8280B00
v_interp_p2_f32 v10, [v10], v1, 3, 2, [m0] ; C8290B01
v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00
v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01
v_interp_p1_f32 v17, v0, 1, 3, [m0] ; C8440D00
v_interp_p2_f32 v17, [v17], v1, 1, 3, [m0] ; C8450D01
v_interp_p1_f32 v18, v0, 2, 3, [m0] ; C8480E00
v_interp_p2_f32 v18, [v18], v1, 2, 3, [m0] ; C8490E01
v_interp_p1_f32 v19, v0, 3, 3, [m0] ; C84C0F00
v_interp_p2_f32 v19, [v19], v1, 3, 3, [m0] ; C84D0F01
v_interp_p1_f32 v20, v0, 0, 4, [m0] ; C8501000
v_interp_p2_f32 v20, [v20], v1, 0, 4, [m0] ; C8511001
v_interp_p1_f32 v27, v0, 1, 4, [m0] ; C86C1100
v_interp_p2_f32 v27, [v27], v1, 1, 4, [m0] ; C86D1101
v_interp_p1_f32 v13, v0, 2, 4, [m0] ; C8341200
v_interp_p2_f32 v13, [v13], v1, 2, 4, [m0] ; C8351201
v_interp_p1_f32 v8, v0, 3, 4, [m0] ; C8201300
v_interp_p2_f32 v8, [v8], v1, 3, 4, [m0] ; C8211301
v_interp_p1_f32 v9, v0, 0, 5, [m0] ; C8241400
v_interp_p2_f32 v9, [v9], v1, 0, 5, [m0] ; C8251401
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p1_f32 v0, v0, 1, 5, [m0] ; C8001500
v_interp_p2_f32 v0, [v0], v1, 1, 5, [m0] ; C8011501
v_mov_b32_e32 v1, 0x7fffffff ; 7E0202FF 7FFFFFFF
v_and_b32_e32 v16, v17, v1 ; 36200311
v_mul_f32_e64 v17, |v17|, |v17| ; D2100311 00022311
v_mad_f32 v17, |v18|, |v18|, v17 ; D2820311 04462512
v_and_b32_e32 v32, v18, v1 ; 36400312
v_and_b32_e32 v31, v19, v1 ; 363E0313
v_mad_f32 v1, |v19|, |v19|, v17 ; D2820301 04462713
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s1, s[8:11], 0x24 ; C2008924
s_buffer_load_dword s0, s[8:11], 0x20 ; C2000920
v_rsq_clamp_f32_e32 v33, v1 ; 7E425901
v_add_f32_e32 v1, 0.5, v11 ; 060216F0
v_add_f32_e32 v11, 0.5, v12 ; 061618F0
v_add_f32_e32 v12, 0.5, v14 ; 06181CF0
v_floor_f32_e32 v1, v1 ; 7E024901
v_floor_f32_e32 v14, v11 ; 7E1C490B
v_mov_b32_e32 v11, 0x42800000 ; 7E1602FF 42800000
v_cmp_le_f32_e32 vcc, v11, v1 ; 7C06030B
v_floor_f32_e32 v17, v12 ; 7E22490C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v12, s1 ; 7E180201
v_mul_f32_e32 v19, s0, v1 ; 10260200
v_floor_f32_e32 v18, v19 ; 7E244913
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v11, 0xc2800000 ; 7E1602FF C2800000
v_add_f32_e32 v1, v1, v11 ; 06021701
v_mul_f32_e32 v11, s1, v1 ; 10160201
v_floor_f32_e32 v11, v11 ; 7E16490B
v_mul_f32_e32 v21, s1, v11 ; 102A1601
v_mad_f32 v24, v1, s1, -v11 ; D2820018 842C0301
v_floor_f32_e32 v1, v21 ; 7E024915
v_mad_f32 v25, v11, s1, -v1 ; D2820019 8404030B
v_add_f32_e32 v11, 4.0, v1 ; 061602F6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
v_mov_b32_e32 v1, s0 ; 7E020200
v_mov_b32_e32 v30, v12 ; 7E3C030C
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v11, s0, v18 ; 10162400
v_floor_f32_e32 v21, v19 ; 7E2A4913
v_subrev_f32_e32 v24, v21, v19 ; 0A302715
v_floor_f32_e32 v11, v11 ; 7E16490B
v_mad_f32 v25, v18, s0, -v11 ; D2820019 842C0112
v_mov_b32_e32 v30, v1 ; 7E3C0301
s_or_b64 exec, exec, s[2:3] ; 88FE027E
v_mul_f32_e32 v19, s0, v14 ; 10261C00
v_floor_f32_e32 v18, v19 ; 7E244913
v_mov_b32_e32 v21, 0x42800000 ; 7E2A02FF 42800000
v_cmp_le_f32_e32 vcc, v21, v14 ; 7C061D15
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v21, 0xc2800000 ; 7E2A02FF C2800000
v_add_f32_e32 v14, v14, v21 ; 061C2B0E
v_mul_f32_e32 v21, s1, v14 ; 102A1C01
v_floor_f32_e32 v21, v21 ; 7E2A4915
v_mul_f32_e32 v23, s1, v21 ; 102E2A01
v_mad_f32 v22, v14, s1, -v21 ; D2820016 8454030E
v_floor_f32_e32 v14, v23 ; 7E1C4917
v_mad_f32 v23, v21, s1, -v14 ; D2820017 84380315
v_add_f32_e32 v21, 4.0, v14 ; 062A1CF6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
v_mov_b32_e32 v28, v12 ; 7E38030C
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v14, s0, v18 ; 101C2400
v_floor_f32_e32 v21, v19 ; 7E2A4913
v_subrev_f32_e32 v22, v21, v19 ; 0A2C2715
v_floor_f32_e32 v21, v14 ; 7E2A490E
v_mad_f32 v23, v18, s0, -v21 ; D2820017 84540112
v_mov_b32_e32 v28, v1 ; 7E380301
s_or_b64 exec, exec, s[2:3] ; 88FE027E
s_buffer_load_dword s13, s[8:11], 0xb ; C206890B
s_buffer_load_dword s14, s[8:11], 0x28 ; C2070928
s_buffer_load_dword s12, s[8:11], 0x2c ; C206092C
v_mul_f32_e32 v35, s0, v17 ; 10462200
v_floor_f32_e32 v34, v35 ; 7E444923
v_mov_b32_e32 v14, 0x42800000 ; 7E1C02FF 42800000
v_cmp_le_f32_e32 vcc, v14, v17 ; 7C06230E
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[2:3], vcc ; BE82246A
s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E
v_mov_b32_e32 v14, 0xc2800000 ; 7E1C02FF C2800000
v_add_f32_e32 v14, v17, v14 ; 061C1D11
v_mul_f32_e32 v17, s1, v14 ; 10221C01
v_floor_f32_e32 v17, v17 ; 7E224911
v_mul_f32_e32 v19, s1, v17 ; 10262201
v_mad_f32 v18, v14, s1, -v17 ; D2820012 8444030E
v_floor_f32_e32 v14, v19 ; 7E1C4913
v_mad_f32 v19, v17, s1, -v14 ; D2820013 84380311
v_add_f32_e32 v14, 4.0, v14 ; 061C1CF6
s_or_saveexec_b64 s[2:3], s[2:3] ; BE822502
v_mov_b32_e32 v17, s13 ; 7E22020D
v_mov_b32_e32 v26, s14 ; 7E34020E
s_buffer_load_dword s43, s[8:11], 0x0 ; C2158900
s_buffer_load_dword s41, s[8:11], 0x1 ; C2148901
s_buffer_load_dword s42, s[8:11], 0x2 ; C2150902
s_buffer_load_dword s32, s[8:11], 0x4 ; C2100904
s_buffer_load_dword s31, s[8:11], 0x5 ; C20F8905
s_buffer_load_dword s30, s[8:11], 0x6 ; C20F0906
s_buffer_load_dword s13, s[8:11], 0xa ; C206890A
s_buffer_load_dword s39, s[8:11], 0x30 ; C2138930
s_buffer_load_dword s40, s[8:11], 0x34 ; C2140934
s_buffer_load_dword s44, s[8:11], 0x38 ; C2160938
s_buffer_load_dword s33, s[8:11], 0x3c ; C210893C
s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D
s_buffer_load_dword s35, s[8:11], 0x3e ; C211893E
s_buffer_load_dword s38, s[8:11], 0x44 ; C2130944
s_buffer_load_dword s37, s[8:11], 0x45 ; C2128945
s_buffer_load_dword s36, s[8:11], 0x46 ; C2120946
v_mov_b32_e32 v29, s12 ; 7E3A020C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[2:3] ; 89FE027E
v_mul_f32_e32 v12, s0, v34 ; 10184400
v_floor_f32_e32 v14, v35 ; 7E1C4923
v_subrev_f32_e32 v18, v14, v35 ; 0A24470E
v_floor_f32_e32 v14, v12 ; 7E1C490C
v_mad_f32 v19, v34, s0, -v14 ; D2820013 84380122
v_mov_b32_e32 v12, v1 ; 7E180301
s_or_b64 exec, exec, s[2:3] ; 88FE027E
v_mac_f32_e32 v17, s13, v15 ; 3E221E0D
v_add_f32_e64 v1, 0, v17 clamp ; D2060801 00022280
v_cmp_neq_f32_e64 s[28:29], s0, v30 ; D01A001C 00023C00
v_cmp_eq_f32_e64 s[22:23], 4.0, v11 ; D0040016 000216F6
v_cmp_eq_f32_e64 s[18:19], 2.0, v11 ; D0040012 000216F4
v_cmp_eq_f32_e64 s[14:15], 1.0, v11 ; D004000E 000216F2
v_cmp_eq_f32_e64 s[16:17], 0, v11 ; D0040010 00021680
v_cmp_neq_f32_e64 s[24:25], s0, v28 ; D01A0018 00023800
v_cmp_neq_f32_e64 s[26:27], s0, v12 ; D01A001A 00021800
v_cmp_eq_f32_e64 s[20:21], 4.0, v21 ; D0040014 00022AF6
v_cmp_eq_f32_e64 s[12:13], 2.0, v21 ; D004000C 00022AF4
v_cmp_eq_f32_e64 s[8:9], 1.0, v21 ; D0040008 00022AF2
v_cmp_eq_f32_e64 s[10:11], 0, v21 ; D004000A 00022A80
v_cmp_eq_f32_e32 vcc, 4.0, v14 ; 7C041CF6
v_subrev_f32_e32 v15, s43, v20 ; 0A1E282B
v_mul_f32_e32 v34, v15, v15 ; 10441F0F
v_cmp_eq_f32_e64 s[0:1], 2.0, v14 ; D0040000 00021CF4
v_mov_b32_e32 v15, 0xbe4ccccd ; 7E1E02FF BE4CCCCD
v_mad_f32 v17, v33, v16, v15 ; D2820011 043E2121
v_mad_f32 v16, v33, v32, v15 ; D2820010 043E4121
v_mac_f32_e32 v15, v33, v31 ; 3E1E3F21
v_subrev_f32_e32 v31, s41, v27 ; 0A3E3629
v_mac_f32_e32 v34, v31, v31 ; 3E443F1F
v_subrev_f32_e32 v31, s42, v13 ; 0A3E1A2A
v_mac_f32_e32 v34, v31, v31 ; 3E443F1F
v_mul_f32_e32 v31, s44, v34 ; 103E442C
v_log_f32_e32 v31, v31 ; 7E3E4F1F
v_cmp_eq_f32_e64 s[2:3], 1.0, v14 ; D0040002 00021CF2
v_cndmask_b32_e64 v32, v26, v29, s[28:29] ; D2000020 00723B1A
v_rcp_f32_e32 v32, v32 ; 7E405520
v_mul_f32_e32 v31, 0x3f317218, v31 ; 103E3EFF 3F317218
v_mov_b32_e32 v33, 0x3b000000 ; 7E4202FF 3B000000
v_mov_b32_e32 v34, 0x3b800000 ; 7E4402FF 3B800000
v_cndmask_b32_e64 v35, v34, v33, s[28:29] ; D2000023 00724322
v_mul_f32_e32 v36, v32, v20 ; 10482920
v_floor_f32_e32 v36, v36 ; 7E484924
v_mad_f32 v36, v20, v32, -v36 ; D2820024 84924114
v_mul_f32_e32 v37, v32, v27 ; 104A3720
v_floor_f32_e32 v37, v37 ; 7E4A4925
v_mad_f32 v37, v27, v32, -v37 ; D2820025 8496411B
v_add_f32_e64 v38, s39, s39 ; D2060026 00004E27
v_mad_f32 v39, -v38, v35, 1.0 ; D2820027 23CA4726
v_mul_f32_e32 v35, s39, v35 ; 10464627
v_mul_f32_e32 v40, v32, v13 ; 10501B20
v_floor_f32_e32 v40, v40 ; 7E504928
v_mad_f32 v32, v13, v32, -v40 ; D2820020 84A2410D
v_mad_f32 v36, v39, v36, v35 ; D2820024 048E4927
v_mad_f32 v37, v39, v37, v35 ; D2820025 048E4B27
v_mac_f32_e32 v35, v39, v32 ; 3E464127
v_mul_f32_e32 v41, s40, v31 ; 10523E28
v_mad_f32 v39, v30, v36, v24 ; D2820027 0462491E
v_mad_f32 v40, v30, v37, v25 ; D2820028 04664B1E
v_cndmask_b32_e64 v31, v26, v29, s[24:25] ; D200001F 00623B1A
v_rcp_f32_e32 v31, v31 ; 7E3E551F
v_mac_f32_e32 v24, v30, v35 ; 3E30471E
v_mov_b32_e32 v42, v24 ; 7E540318
v_mov_b32_e32 v43, v25 ; 7E560319
v_mov_b32_e32 v44, v26 ; 7E58031A
v_mov_b32_e32 v45, v27 ; 7E5A031B
v_mac_f32_e32 v25, v30, v36 ; 3E32491E
v_mul_f32_e32 v30, v31, v20 ; 103C291F
v_floor_f32_e32 v30, v30 ; 7E3C491E
v_mad_f32 v30, v20, v31, -v30 ; D282001E 847A3F14
v_mul_f32_e32 v32, v31, v27 ; 1040371F
v_floor_f32_e32 v32, v32 ; 7E404920
v_mad_f32 v32, v27, v31, -v32 ; D2820020 84823F1B
v_mul_f32_e32 v35, v31, v13 ; 10461B1F
v_floor_f32_e32 v35, v35 ; 7E464923
v_mad_f32 v31, v13, v31, -v35 ; D282001F 848E3F0D
v_cndmask_b32_e64 v29, v26, v29, s[26:27] ; D200001D 006A3B1A
v_cndmask_b32_e64 v26, v34, v33, s[24:25] ; D200001A 00624322
v_cndmask_b32_e64 v33, v34, v33, s[26:27] ; D2000021 006A4322
v_mov_b32_e32 v43, v40 ; 7E560328
s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500
s_load_dwordx4 s[40:43], s[4:5], 0x4 ; C0940504
s_load_dwordx4 s[60:63], s[4:5], 0x8 ; C09E0508
s_load_dwordx4 s[72:75], s[4:5], 0xc ; C0A4050C
s_load_dwordx4 s[84:87], s[4:5], 0x10 ; C0AA0510
s_load_dwordx8 s[88:95], s[6:7], 0x20 ; C0EC0720
s_load_dwordx8 s[76:83], s[6:7], 0x18 ; C0E60718
s_load_dwordx8 s[64:71], s[6:7], 0x10 ; C0E00710
s_load_dwordx8 s[52:59], s[6:7], 0x8 ; C0DA0708
s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700
v_mad_f32 v34, -v38, v26, 1.0 ; D2820022 23CA3526
v_mul_f32_e32 v35, s39, v26 ; 10463427
v_mad_f32 v30, v34, v30, v35 ; D282001E 048E3D22
v_mad_f32 v32, v34, v32, v35 ; D2820020 048E4122
v_mac_f32_e32 v35, v34, v31 ; 3E463F22
v_mov_b32_e32 v44, v41 ; 7E580329
v_mov_b32_e32 v26, v41 ; 7E340329
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[46:48], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[88:95], s[84:87] ; F0900700 02B62E27
image_sample_l v[49:51], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[76:83], s[72:75] ; F0900700 02533127
image_sample_l v[52:54], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[64:71], s[60:63] ; F0900700 01F03427
image_sample_l v[55:57], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[40:43] ; F0900700 014D3727
image_sample_l v[58:60], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[24:27] ; F0900700 00CB3A27
v_mad_f32 v39, v28, v30, v22 ; D2820027 045A3D1C
v_mad_f32 v40, v28, v32, v23 ; D2820028 045E411C
v_rcp_f32_e32 v29, v29 ; 7E3A551D
v_mac_f32_e32 v22, v28, v35 ; 3E2C471C
v_mul_f32_e32 v31, s39, v33 ; 103E4227
v_mov_b32_e32 v34, v22 ; 7E440316
v_mov_b32_e32 v35, v23 ; 7E460317
v_mov_b32_e32 v36, v24 ; 7E480318
v_mov_b32_e32 v37, v25 ; 7E4A0319
v_mac_f32_e32 v23, v28, v30 ; 3E2E3D1C
v_mul_f32_e32 v28, v29, v20 ; 1038291D
v_floor_f32_e32 v28, v28 ; 7E38491C
v_mad_f32 v20, v20, v29, -v28 ; D2820014 84723B14
v_mul_f32_e32 v28, v29, v27 ; 1038371D
v_floor_f32_e32 v28, v28 ; 7E38491C
v_mad_f32 v27, v27, v29, -v28 ; D282001B 84723B1B
v_mul_f32_e32 v28, v29, v13 ; 10381B1D
v_floor_f32_e32 v28, v28 ; 7E38491C
v_mad_f32 v13, v13, v29, -v28 ; D282000D 84723B0D
v_mad_f32 v28, -v38, v33, 1.0 ; D282001C 23CA4326
v_mad_f32 v20, v28, v20, v31 ; D2820014 047E291C
v_mad_f32 v27, v28, v27, v31 ; D282001B 047E371C
v_mac_f32_e32 v31, v28, v13 ; 3E3E1B1C
image_sample_l v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[88:95], s[84:87] ; F0900700 02B61C2A
image_sample_l v[61:63], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[76:83], s[72:75] ; F0900700 02533D2A
image_sample_l v[64:66], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[64:71], s[60:63] ; F0900700 01F0402A
image_sample_l v[67:69], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[52:59], s[40:43] ; F0900700 014D432A
image_sample_l v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[44:51], s[24:27] ; F0900700 00CB2A2A
v_mov_b32_e32 v35, v40 ; 7E460328
image_sample_l v[70:72], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[88:95], s[84:87] ; F0900700 02B64618
image_sample_l v[73:75], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[76:83], s[72:75] ; F0900700 02534918
image_sample_l v[76:78], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[64:71], s[60:63] ; F0900700 01F04C18
image_sample_l v[79:81], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[52:59], s[40:43] ; F0900700 014D4F18
image_sample_l v[82:84], 7, 0, 0, 0, 0, 0, 0, 0, v[24:27], s[44:51], s[24:27] ; F0900700 00CB5218
v_mov_b32_e32 v36, v41 ; 7E480329
v_mov_b32_e32 v24, v41 ; 7E300329
s_waitcnt vmcnt(5) ; BF8C0775
image_sample_l v[85:87], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[88:95], s[84:87] ; F0900700 02B65527
image_sample_l v[88:90], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[76:83], s[72:75] ; F0900700 02535827
image_sample_l v[91:93], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[64:71], s[60:63] ; F0900700 01F05B27
image_sample_l v[94:96], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[40:43] ; F0900700 014D5E27
image_sample_l v[97:99], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[24:27] ; F0900700 00CB6127
v_mad_f32 v39, v12, v20, v18 ; D2820027 044A290C
v_mad_f32 v40, v12, v27, v19 ; D2820028 044E370C
v_mac_f32_e32 v18, v12, v31 ; 3E243F0C
v_mov_b32_e32 v100, v18 ; 7EC80312
v_mov_b32_e32 v101, v19 ; 7ECA0313
v_mov_b32_e32 v102, v20 ; 7ECC0314
v_mov_b32_e32 v103, v21 ; 7ECE0315
v_mac_f32_e32 v19, v12, v20 ; 3E26290C
image_sample_l v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[88:95], s[84:87] ; F0900700 02B61922
image_sample_l v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[76:83], s[72:75] ; F0900700 02531F22
image_sample_l v[104:106], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[64:71], s[60:63] ; F0900700 01F06822
image_sample_l v[107:109], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[52:59], s[40:43] ; F0900700 014D6B22
image_sample_l v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[44:51], s[24:27] ; F0900700 00CB2222
v_mov_b32_e32 v101, v40 ; 7ECA0328
s_waitcnt vmcnt(4) ; BF8C0774
image_sample_l v[110:112], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[88:95], s[84:87] ; F0900700 02B66E16
image_sample_l v[113:115], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[76:83], s[72:75] ; F0900700 02537116
image_sample_l v[116:118], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[64:71], s[60:63] ; F0900700 01F07416
image_sample_l v[119:121], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[52:59], s[40:43] ; F0900700 014D7716
image_sample_l v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[44:51], s[24:27] ; F0900700 00CB1616
v_mov_b32_e32 v102, v41 ; 7ECC0329
v_mov_b32_e32 v20, v41 ; 7E280329
image_sample_l v[122:124], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[88:95], s[84:87] ; F0900700 02B67A27
image_sample_l v[125:127], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[76:83], s[72:75] ; F0900700 02537D27
image_sample_l v[128:130], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[64:71], s[60:63] ; F0900700 01F08027
image_sample_l v[131:133], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[52:59], s[40:43] ; F0900700 014D8327
image_sample_l v[37:39], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[24:27] ; F0900700 00CB2527
image_sample_l v[134:136], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[88:95], s[84:87] ; F0900700 02B68664
image_sample_l v[137:139], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[76:83], s[72:75] ; F0900700 02538964
image_sample_l v[140:142], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[64:71], s[60:63] ; F0900700 01F08C64
image_sample_l v[143:145], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[52:59], s[40:43] ; F0900700 014D8F64
image_sample_l v[100:102], 7, 0, 0, 0, 0, 0, 0, 0, v[100:103], s[44:51], s[24:27] ; F0900700 00CB6464
image_sample_l v[146:148], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[88:95], s[84:87] ; F0900700 02B69212
image_sample_l v[149:151], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[76:83], s[72:75] ; F0900700 02539512
image_sample_l v[152:154], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[64:71], s[60:63] ; F0900700 01F09812
v_cndmask_b32_e64 v12, 0, 1.0, s[22:23] ; D200000C 0059E480
v_mov_b32_e32 v13, 0x40400000 ; 7E1A02FF 40400000
v_cmp_eq_f32_e64 s[22:23], v11, v13 ; D0040016 00021B0B
v_cndmask_b32_e64 v11, 0, 1.0, s[22:23] ; D200000B 0059E480
v_cndmask_b32_e64 v40, 0, 1.0, s[18:19] ; D2000028 0049E480
v_cndmask_b32_e64 v41, 0, 1.0, s[14:15] ; D2000029 0039E480
v_cndmask_b32_e64 v45, 0, 1.0, s[16:17] ; D200002D 0041E480
v_cndmask_b32_e64 v103, 0, 1.0, s[20:21] ; D2000067 0051E480
v_cmp_eq_f32_e64 s[14:15], v21, v13 ; D004000E 00021B15
v_cndmask_b32_e64 v21, 0, 1.0, s[14:15] ; D2000015 0039E480
v_cndmask_b32_e64 v155, 0, 1.0, s[12:13] ; D200009B 0031E480
v_cndmask_b32_e64 v156, 0, 1.0, s[8:9] ; D200009C 0021E480
v_cndmask_b32_e64 v157, 0, 1.0, s[10:11] ; D200009D 0029E480
v_cmp_eq_f32_e64 s[8:9], 0, v14 ; D0040008 00021C80
v_cndmask_b32_e64 v158, 0, 1.0, s[8:9] ; D200009E 0021E480
v_mul_f32_e32 v58, v45, v58 ; 1074752D
v_mul_f32_e32 v59, v45, v59 ; 1076772D
v_mul_f32_e32 v60, v45, v60 ; 1078792D
v_mac_f32_e32 v58, v41, v55 ; 3E746F29
v_mac_f32_e32 v59, v41, v56 ; 3E767129
v_mac_f32_e32 v60, v41, v57 ; 3E787329
v_mul_f32_e32 v42, v45, v42 ; 1054552D
v_mul_f32_e32 v43, v45, v43 ; 1056572D
v_mul_f32_e32 v44, v45, v44 ; 1058592D
v_mac_f32_e32 v42, v41, v67 ; 3E548729
v_mac_f32_e32 v43, v41, v68 ; 3E568929
v_mac_f32_e32 v44, v41, v69 ; 3E588B29
v_mul_f32_e32 v55, v45, v82 ; 106EA52D
v_mul_f32_e32 v56, v45, v83 ; 1070A72D
v_mul_f32_e32 v45, v45, v84 ; 105AA92D
v_mac_f32_e32 v55, v41, v79 ; 3E6E9F29
v_mac_f32_e32 v56, v41, v80 ; 3E70A129
v_mac_f32_e32 v45, v41, v81 ; 3E5AA329
v_mac_f32_e32 v58, v40, v52 ; 3E746928
v_mac_f32_e32 v59, v40, v53 ; 3E766B28
v_mac_f32_e32 v60, v40, v54 ; 3E786D28
v_mac_f32_e32 v42, v40, v64 ; 3E548128
v_mac_f32_e32 v43, v40, v65 ; 3E568328
v_mac_f32_e32 v44, v40, v66 ; 3E588528
v_mac_f32_e32 v55, v40, v76 ; 3E6E9928
v_mac_f32_e32 v56, v40, v77 ; 3E709B28
v_mac_f32_e32 v45, v40, v78 ; 3E5A9D28
v_mac_f32_e32 v58, v11, v49 ; 3E74630B
v_mac_f32_e32 v59, v11, v50 ; 3E76650B
v_mac_f32_e32 v60, v11, v51 ; 3E78670B
v_mac_f32_e32 v42, v11, v61 ; 3E547B0B
v_mac_f32_e32 v43, v11, v62 ; 3E567D0B
v_mac_f32_e32 v44, v11, v63 ; 3E587F0B
v_mac_f32_e32 v55, v11, v73 ; 3E6E930B
v_mac_f32_e32 v56, v11, v74 ; 3E70950B
v_mac_f32_e32 v45, v11, v75 ; 3E5A970B
v_mac_f32_e32 v58, v12, v46 ; 3E745D0C
v_mac_f32_e32 v59, v12, v47 ; 3E765F0C
v_mac_f32_e32 v60, v12, v48 ; 3E78610C
v_mac_f32_e32 v42, v12, v28 ; 3E54390C
v_mac_f32_e32 v43, v12, v29 ; 3E563B0C
v_mac_f32_e32 v44, v12, v30 ; 3E583D0C
v_mac_f32_e32 v55, v12, v70 ; 3E6E8D0C
v_mac_f32_e32 v56, v12, v71 ; 3E708F0C
v_mac_f32_e32 v45, v12, v72 ; 3E5A910C
v_mul_f32_e32 v11, v157, v97 ; 1016C39D
v_mul_f32_e32 v12, v157, v98 ; 1018C59D
v_mul_f32_e32 v28, v157, v99 ; 1038C79D
v_mac_f32_e32 v11, v156, v94 ; 3E16BD9C
v_mac_f32_e32 v12, v156, v95 ; 3E18BF9C
v_mac_f32_e32 v28, v156, v96 ; 3E38C19C
s_waitcnt ; BF8C077F
v_mul_f32_e32 v29, v157, v34 ; 103A459D
v_mul_f32_e32 v30, v157, v35 ; 103C479D
v_mul_f32_e32 v34, v157, v36 ; 1044499D
v_mac_f32_e32 v29, v156, v107 ; 3E3AD79C
v_mac_f32_e32 v30, v156, v108 ; 3E3CD99C
v_mac_f32_e32 v34, v156, v109 ; 3E44DB9C
s_waitcnt vmcnt(13) ; BF8C077D
v_mul_f32_e32 v22, v157, v22 ; 102C2D9D
v_mul_f32_e32 v23, v157, v23 ; 102E2F9D
v_mul_f32_e32 v24, v157, v24 ; 1030319D
v_mac_f32_e32 v22, v156, v119 ; 3E2CEF9C
v_mac_f32_e32 v23, v156, v120 ; 3E2EF19C
v_mac_f32_e32 v24, v156, v121 ; 3E30F39C
v_mac_f32_e32 v11, v155, v91 ; 3E16B79B
v_mac_f32_e32 v12, v155, v92 ; 3E18B99B
v_mac_f32_e32 v28, v155, v93 ; 3E38BB9B
v_mac_f32_e32 v29, v155, v104 ; 3E3AD19B
v_mac_f32_e32 v30, v155, v105 ; 3E3CD39B
v_mac_f32_e32 v34, v155, v106 ; 3E44D59B
v_mac_f32_e32 v22, v155, v116 ; 3E2CE99B
v_mac_f32_e32 v23, v155, v117 ; 3E2EEB9B
v_mac_f32_e32 v24, v155, v118 ; 3E30ED9B
v_mac_f32_e32 v11, v21, v88 ; 3E16B115
v_mac_f32_e32 v12, v21, v89 ; 3E18B315
v_mac_f32_e32 v28, v21, v90 ; 3E38B515
v_mac_f32_e32 v29, v21, v31 ; 3E3A3F15
v_mac_f32_e32 v30, v21, v32 ; 3E3C4115
v_mac_f32_e32 v34, v21, v33 ; 3E444315
v_mac_f32_e32 v22, v21, v113 ; 3E2CE315
v_mac_f32_e32 v23, v21, v114 ; 3E2EE515
v_mac_f32_e32 v24, v21, v115 ; 3E30E715
v_mac_f32_e32 v11, v103, v85 ; 3E16AB67
v_mac_f32_e32 v12, v103, v86 ; 3E18AD67
v_mac_f32_e32 v28, v103, v87 ; 3E38AF67
v_mac_f32_e32 v29, v103, v25 ; 3E3A3367
v_mac_f32_e32 v30, v103, v26 ; 3E3C3567
v_mac_f32_e32 v34, v103, v27 ; 3E443767
v_mac_f32_e32 v22, v103, v110 ; 3E2CDD67
v_mac_f32_e32 v23, v103, v111 ; 3E2EDF67
v_mac_f32_e32 v24, v103, v112 ; 3E30E167
s_waitcnt vmcnt(8) ; BF8C0778
v_mul_f32_e32 v21, v158, v37 ; 102A4B9E
v_mul_f32_e32 v25, v158, v38 ; 10324D9E
v_mul_f32_e32 v26, v158, v39 ; 10344F9E
v_cndmask_b32_e64 v27, 0, 1.0, s[2:3] ; D200001B 0009E480
v_mac_f32_e32 v21, v27, v131 ; 3E2B071B
v_mac_f32_e32 v25, v27, v132 ; 3E33091B
v_mac_f32_e32 v26, v27, v133 ; 3E350B1B
s_waitcnt vmcnt(3) ; BF8C0773
v_mul_f32_e32 v31, v158, v100 ; 103EC99E
v_mul_f32_e32 v32, v158, v101 ; 1040CB9E
v_mul_f32_e32 v33, v158, v102 ; 1042CD9E
v_mac_f32_e32 v31, v27, v143 ; 3E3F1F1B
v_mac_f32_e32 v32, v27, v144 ; 3E41211B
v_mac_f32_e32 v33, v27, v145 ; 3E43231B
image_sample_l v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[52:59], s[40:43] ; F0900700 014D2312
image_sample_l v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:21], s[44:51], s[24:27] ; F0900700 00CB1212
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v18, v158, v18 ; 1024259E
v_mul_f32_e32 v19, v158, v19 ; 1026279E
v_mul_f32_e32 v20, v158, v20 ; 1028299E
v_mac_f32_e32 v18, v27, v35 ; 3E24471B
v_mac_f32_e32 v19, v27, v36 ; 3E26491B
v_mac_f32_e32 v20, v27, v37 ; 3E284B1B
v_cndmask_b32_e64 v27, 0, 1.0, s[0:1] ; D200001B 0001E480
v_mac_f32_e32 v21, v27, v128 ; 3E2B011B
v_mac_f32_e32 v25, v27, v129 ; 3E33031B
v_mac_f32_e32 v26, v27, v130 ; 3E35051B
v_mac_f32_e32 v31, v27, v140 ; 3E3F191B
v_mac_f32_e32 v32, v27, v141 ; 3E411B1B
v_mac_f32_e32 v33, v27, v142 ; 3E431D1B
v_mac_f32_e32 v18, v27, v152 ; 3E25311B
v_mac_f32_e32 v19, v27, v153 ; 3E27331B
v_mac_f32_e32 v20, v27, v154 ; 3E29351B
v_cmp_eq_f32_e64 s[0:1], v14, v13 ; D0040000 00021B0E
v_cndmask_b32_e64 v13, 0, 1.0, s[0:1] ; D200000D 0001E480
v_mac_f32_e32 v21, v13, v125 ; 3E2AFB0D
v_mac_f32_e32 v25, v13, v126 ; 3E32FD0D
v_mac_f32_e32 v26, v13, v127 ; 3E34FF0D
v_mac_f32_e32 v31, v13, v137 ; 3E3F130D
v_mac_f32_e32 v32, v13, v138 ; 3E41150D
v_mac_f32_e32 v33, v13, v139 ; 3E43170D
v_mac_f32_e32 v18, v13, v149 ; 3E252B0D
v_mac_f32_e32 v19, v13, v150 ; 3E272D0D
v_mac_f32_e32 v20, v13, v151 ; 3E292F0D
v_cndmask_b32_e64 v13, 0, 1.0, vcc ; D200000D 01A9E480
v_mac_f32_e32 v21, v13, v122 ; 3E2AF50D
v_mac_f32_e32 v25, v13, v123 ; 3E32F70D
v_mac_f32_e32 v26, v13, v124 ; 3E34F90D
v_mac_f32_e32 v31, v13, v134 ; 3E3F0D0D
v_mac_f32_e32 v32, v13, v135 ; 3E410F0D
v_mac_f32_e32 v33, v13, v136 ; 3E43110D
v_mov_b32_e32 v14, 0x6f800000 ; 7E1C02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v10|, v14 ; D008016A 00021D0A
v_mov_b32_e32 v14, 0x2f800000 ; 7E1C02FF 2F800000
v_cndmask_b32_e32 v14, 1.0, v14 ; 001C1CF2
v_mul_f32_e32 v10, v14, v10 ; 1014150E
v_rcp_f32_e32 v10, v10 ; 7E14550A
v_mac_f32_e32 v18, v13, v146 ; 3E25250D
v_mac_f32_e32 v19, v13, v147 ; 3E27270D
v_mac_f32_e32 v20, v13, v148 ; 3E29290D
v_mul_f32_e32 v6, v10, v6 ; 100C0D0A
v_mul_f32_e32 v7, v10, v7 ; 100E0F0A
v_mul_f32_e32 v35, v6, v14 ; 10461D06
v_mul_f32_e32 v36, v7, v14 ; 10481D07
s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514
s_load_dwordx8 s[4:11], s[6:7], 0x28 ; C0C20728
v_mov_b32_e32 v6, 0x40e00000 ; 7E0C02FF 40E00000
v_mul_f32_e32 v7, v6, v17 ; 100E2306
v_mul_f32_e32 v10, v6, v16 ; 10142106
v_mul_f32_e32 v6, v6, v15 ; 100C1F06
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_max_f32_e32 v10, 0, v10 ; 20141480
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_add_f32_e32 v13, v10, v7 ; 061A0F0A
v_add_f32_e32 v13, v6, v13 ; 061A1B06
v_rcp_f32_e32 v13, v13 ; 7E1A550D
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[35:36], s[4:11], s[0:3] ; F0800F00 00010E23
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v14, v14 ; 7E1C4F0E
v_log_f32_e32 v15, v15 ; 7E1E4F0F
v_log_f32_e32 v16, v16 ; 7E204F10
v_log_f32_e32 v17, v17 ; 7E224F11
v_mul_f32_e32 v7, v13, v7 ; 100E0F0D
v_mul_f32_e32 v6, v13, v6 ; 100C0D0D
v_mul_f32_e32 v21, v6, v21 ; 102A2B06
v_mac_f32_e32 v21, v7, v31 ; 3E2A3F07
v_mul_f32_e32 v25, v6, v25 ; 10323306
v_mac_f32_e32 v25, v7, v32 ; 3E324107
v_mul_f32_e32 v26, v6, v26 ; 10343506
v_mac_f32_e32 v26, v7, v33 ; 3E344307
v_mul_f32_e32 v11, v6, v11 ; 10161706
v_mac_f32_e32 v11, v7, v29 ; 3E163B07
v_mul_f32_e32 v12, v6, v12 ; 10181906
v_mac_f32_e32 v12, v7, v30 ; 3E183D07
v_mul_f32_e32 v27, v6, v28 ; 10363906
v_mac_f32_e32 v27, v7, v34 ; 3E364507
v_mul_f32_e32 v28, v6, v58 ; 10387506
v_mul_f32_e32 v29, v6, v59 ; 103A7706
v_mul_f32_e32 v6, v6, v60 ; 100C7906
v_mac_f32_e32 v28, v7, v42 ; 3E385507
v_mac_f32_e32 v29, v7, v43 ; 3E3A5707
v_mac_f32_e32 v6, v7, v44 ; 3E0C5907
v_mul_f32_e32 v7, v13, v10 ; 100E150D
v_mac_f32_e32 v21, v7, v18 ; 3E2A2507
v_mac_f32_e32 v25, v7, v19 ; 3E322707
v_mac_f32_e32 v26, v7, v20 ; 3E342907
v_mac_f32_e32 v11, v7, v22 ; 3E162D07
v_mac_f32_e32 v12, v7, v23 ; 3E182F07
v_mac_f32_e32 v27, v7, v24 ; 3E363107
v_mac_f32_e32 v28, v7, v55 ; 3E386F07
v_mac_f32_e32 v29, v7, v56 ; 3E3A7107
v_mac_f32_e32 v6, v7, v45 ; 3E0C5B07
v_mul_f32_e32 v7, v28, v3 ; 100E071C
v_mul_f32_e32 v10, v29, v3 ; 1014071D
v_mul_f32_e32 v3, v6, v3 ; 10060706
v_mac_f32_e32 v7, v11, v4 ; 3E0E090B
v_mac_f32_e32 v10, v12, v4 ; 3E14090C
v_mac_f32_e32 v3, v27, v4 ; 3E06091B
v_mac_f32_e32 v7, v21, v5 ; 3E0E0B15
v_mac_f32_e32 v10, v25, v5 ; 3E140B19
v_mac_f32_e32 v3, v26, v5 ; 3E060B1A
v_subrev_f32_e32 v4, v14, v8 ; 0A08110E
v_subrev_f32_e32 v5, v15, v9 ; 0A0A130F
v_subrev_f32_e32 v0, v16, v0 ; 0A000110
v_add_f32_e64 v6, s38, s38 ; D2060006 00004C26
v_mov_b32_e32 v8, 0x3f28f5c3 ; 7E1002FF 3F28F5C3
v_mul_f32_e32 v9, v8, v4 ; 10120908
v_mul_f32_e32 v9, v9, v9 ; 10121309
v_mul_f32_e32 v6, v9, v6 ; 100C0D09
v_add_f32_e64 v9, s37, s37 ; D2060009 00004A25
v_mul_f32_e32 v11, v8, v5 ; 10160B08
v_mul_f32_e32 v11, v11, v11 ; 1016170B
v_mul_f32_e32 v9, v11, v9 ; 1012130B
v_mul_f32_e32 v8, v8, v0 ; 10100108
v_add_f32_e64 v11, s36, s36 ; D206000B 00004824
v_mul_f32_e32 v8, v8, v8 ; 10101108
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_max_f32_e32 v6, s33, v6 ; 200C0C21
v_max_f32_e32 v9, s34, v9 ; 20121222
v_max_f32_e32 v8, s35, v8 ; 20101023
v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2
v_min_f32_e32 v9, 1.0, v9 ; 1E1212F2
v_min_f32_e32 v8, 1.0, v8 ; 1E1010F2
v_add_f32_e64 v11, 0, -v17 clamp ; D206080B 40022280
v_mul_f32_e32 v2, v2, v11 ; 10041702
v_mul_f32_e32 v11, 0.5, v4 ; 101608F0
v_mul_f32_e32 v12, 0.5, v5 ; 10180AF0
v_mul_f32_e32 v13, 0.5, v0 ; 101A00F0
v_mul_f32_e32 v11, v11, v7 ; 10160F0B
v_mul_f32_e32 v12, v12, v10 ; 1018150C
v_mul_f32_e32 v13, v13, v3 ; 101A070D
v_mac_f32_e32 v11, v2, v4 ; 3E160902
v_mac_f32_e32 v12, v2, v5 ; 3E180B02
v_mac_f32_e32 v13, v2, v0 ; 3E1A0102
v_mac_f32_e32 v11, v7, v6 ; 3E160D07
v_mac_f32_e32 v12, v10, v9 ; 3E18130A
v_mac_f32_e32 v13, v3, v8 ; 3E1A1103
v_sub_f32_e32 v0, 1.0, v1 ; 080002F2
v_mul_f32_e32 v2, s32, v0 ; 10040020
v_mac_f32_e32 v2, v11, v1 ; 3E04030B
v_mul_f32_e32 v3, s31, v0 ; 1006001F
v_mac_f32_e32 v3, v12, v1 ; 3E06030C
v_mul_f32_e32 v0, s30, v0 ; 1000001E
v_mac_f32_e32 v0, v13, v1 ; 3E00030D
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 160
Code Size: 3032 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..4]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[1], IN[0].xxxx
1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0]
4: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0
%35 = add i32 %5, %7
%36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35)
%37 = extractelement <4 x float> %36, i32 0
%38 = extractelement <4 x float> %36, i32 1
%39 = extractelement <4 x float> %36, i32 2
%40 = extractelement <4 x float> %36, i32 3
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = fmul float %17, %37
%48 = fmul float %18, %37
%49 = fmul float %19, %37
%50 = fmul float %20, %37
%51 = fmul float %21, %38
%52 = fadd float %51, %47
%53 = fmul float %22, %38
%54 = fadd float %53, %48
%55 = fmul float %23, %38
%56 = fadd float %55, %49
%57 = fmul float %24, %38
%58 = fadd float %57, %50
%59 = fmul float %25, %39
%60 = fadd float %59, %52
%61 = fmul float %26, %39
%62 = fadd float %61, %54
%63 = fmul float %27, %39
%64 = fadd float %63, %56
%65 = fmul float %28, %39
%66 = fadd float %65, %58
%67 = fmul float %29, %40
%68 = fadd float %67, %60
%69 = fmul float %30, %40
%70 = fadd float %69, %62
%71 = fmul float %31, %40
%72 = fadd float %71, %64
%73 = fmul float %32, %40
%74 = fadd float %73, %66
%75 = fmul float %45, %13
%76 = fadd float %75, %15
%77 = fmul float %46, %14
%78 = fadd float %77, %16
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %76, float %78, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101
s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102
s_buffer_load_dword s6, s[0:3], 0x3 ; C2030103
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107
s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s5 ; 7E000205
v_mov_b32_e32 v7, s6 ; 7E0E0206
v_mac_f32_e32 v0, s12, v5 ; 3E000A0C
v_mac_f32_e32 v7, s4, v6 ; 3E0E0C04
s_buffer_load_dword s4, s[0:3], 0xb ; C202010B
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s6, s[0:3], 0xd ; C203010D
s_buffer_load_dword s12, s[0:3], 0xe ; C206010E
s_buffer_load_dword s15, s[0:3], 0xf ; C207810F
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
v_mul_f32_e32 v5, s7, v1 ; 100A0207
v_mac_f32_e32 v5, s11, v2 ; 3E0A040B
v_mul_f32_e32 v6, s8, v1 ; 100C0208
v_mac_f32_e32 v6, s13, v2 ; 3E0C040D
v_mul_f32_e32 v8, s9, v1 ; 10100209
v_mac_f32_e32 v8, s14, v2 ; 3E10040E
v_mul_f32_e32 v1, s10, v1 ; 1002020A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s4, v2 ; 3E020404
v_mac_f32_e32 v5, s5, v3 ; 3E0A0605
v_mac_f32_e32 v6, s6, v3 ; 3E0C0606
v_mac_f32_e32 v8, s12, v3 ; 3E10060C
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v5, s16, v4 ; 3E0A0810
v_mac_f32_e32 v6, s17, v4 ; 3E0C0811
v_mac_f32_e32 v8, s18, v4 ; 3E100812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_mov_b32_e32 v2, 0 ; 7E040280
exp 15, 32, 0, 0, 0, v0, v7, v2, v2 ; F800020F 02020700
exp 15, 12, 0, 1, 0, v5, v6, v8, v1 ; F80008CF 01080605
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 228 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[3..6]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.2813}
IMM[1] FLT32 { 1.0000, 0.5000, 255.0000, 0.0039}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0].x, TEMP[0], SAMP[0], 2D
2: MAD TEMP[0].x, CONST[0].xxxx, TEMP[0].xxxx, CONST[0].yyyy
3: RCP TEMP[0].x, TEMP[0].xxxx
4: MOV TEMP[1].xy, IN[0].xyyy
5: TEX TEMP[1], TEMP[1], SAMP[1], 2D
6: MAD TEMP[1].xyz, TEMP[1], IMM[0].xxxx, IMM[0].yyyy
7: MUL TEMP[2].xyz, CONST[3].xyzz, TEMP[1].xxxx
8: MAD TEMP[2].xyz, CONST[4].xyzz, TEMP[1].yyyy, TEMP[2].xyzz
9: MAD TEMP[1].xyz, CONST[5].xyzz, TEMP[1].zzzz, TEMP[2].xyzz
10: MOV TEMP[2].z, -TEMP[1].zzzz
11: FSLT TEMP[3].x, TEMP[0].xxxx, IMM[0].zzzz
12: UIF TEMP[3].xxxx :0
13: ADD TEMP[2].x, TEMP[2].zzzz, IMM[1].xxxx
14: RCP TEMP[2].x, TEMP[2].xxxx
15: MUL TEMP[1].xy, TEMP[1].xyyy, TEMP[2].xxxx
16: MAD TEMP[1].xy, IMM[0].wwww, TEMP[1].xyyy, IMM[1].yyyy
17: MUL TEMP[0].xy, IMM[1].xzzz, TEMP[0].xxxx
18: FRC TEMP[0].xy, TEMP[0].xyyy
19: MOV TEMP[2].y, TEMP[0].yyyy
20: MUL TEMP[3].x, TEMP[0].yyyy, IMM[1].wwww
21: ADD TEMP[2].x, TEMP[0].xxxx, -TEMP[3].xxxx
22: MOV TEMP[1].zw, TEMP[2].yyxy
23: MOV TEMP[0], TEMP[1]
24: ELSE :0
25: MOV TEMP[0], IMM[1].yyxx
26: ENDIF
27: MOV OUT[0], TEMP[0]
28: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0
%28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0
%30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)*
%32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0
%33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)*
%35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0
%36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%38 = bitcast float %36 to i32
%39 = bitcast float %37 to i32
%40 = insertelement <2 x i32> undef, i32 %38, i32 0
%41 = insertelement <2 x i32> %40, i32 %39, i32 1
%42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %27, <16 x i8> %29, i32 2)
%43 = extractelement <4 x float> %42, i32 0
%44 = fmul float %24, %43
%45 = fadd float %44, %25
%46 = fdiv float 1.000000e+00, %45
%47 = bitcast float %36 to i32
%48 = bitcast float %37 to i32
%49 = insertelement <2 x i32> undef, i32 %47, i32 0
%50 = insertelement <2 x i32> %49, i32 %48, i32 1
%51 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %32, <16 x i8> %35, i32 2)
%52 = extractelement <4 x float> %51, i32 0
%53 = extractelement <4 x float> %51, i32 1
%54 = extractelement <4 x float> %51, i32 2
%55 = fmul float %52, 2.000000e+00
%56 = fadd float %55, -1.000000e+00
%57 = fmul float %53, 2.000000e+00
%58 = fadd float %57, -1.000000e+00
%59 = fmul float %54, 2.000000e+00
%60 = fadd float %59, -1.000000e+00
%61 = fcmp olt float %46, 0x3FEFFFDFC0000000
br i1 %61, label %IF, label %ENDIF
IF: ; preds = %main_body
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%63 = fmul float %62, %60
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%65 = fmul float %64, %58
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%67 = fmul float %66, %56
%68 = fadd float %65, %67
%69 = fadd float %63, %68
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%71 = fmul float %70, %60
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%73 = fmul float %72, %58
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%75 = fmul float %74, %56
%76 = fadd float %73, %75
%77 = fadd float %71, %76
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%79 = fmul float %78, %60
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%81 = fmul float %80, %58
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%83 = fmul float %82, %56
%84 = fadd float %81, %83
%85 = fadd float %79, %84
%86 = fsub float 1.000000e+00, %69
%87 = fdiv float 1.000000e+00, %86
%88 = fmul float %85, %87
%89 = fmul float %77, %87
%90 = fmul float %88, 0x3FD20033A0000000
%91 = fadd float %90, 5.000000e-01
%92 = fmul float %89, 0x3FD20033A0000000
%93 = fadd float %92, 5.000000e-01
%94 = fmul float %46, 2.550000e+02
%95 = call float @llvm.floor.f32(float %46)
%96 = fsub float %46, %95
%97 = call float @llvm.floor.f32(float %94)
%98 = fsub float %94, %97
%99 = fmul float %98, 0x3F70101020000000
%100 = fsub float %96, %99
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp.0 = phi float [ %91, %IF ], [ 5.000000e-01, %main_body ]
%temp1.0 = phi float [ %93, %IF ], [ 5.000000e-01, %main_body ]
%temp2.0 = phi float [ %100, %IF ], [ 1.000000e+00, %main_body ]
%temp3.0 = phi float [ %98, %IF ], [ 1.000000e+00, %main_body ]
%101 = call i32 @llvm.SI.packf16(float %temp.0, float %temp1.0)
%102 = bitcast i32 %101 to float
%103 = call i32 @llvm.SI.packf16(float %temp2.0, float %temp3.0)
%104 = bitcast i32 %103 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %102, float %104, float %102, float %104)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_mov_b32_e32 v6, 0x3f7ffefe ; 7E0C02FF 3F7FFEFE
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
v_mov_b32_e32 v3, 1.0 ; 7E0602F2
v_mov_b32_e32 v2, 0.5 ; 7E0402F0
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s20, s[0:3], 0x1 ; C20A0101
s_buffer_load_dword s21, s[0:3], 0x0 ; C20A8100
v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000
v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001
v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100
v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101
image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800100 00430004
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v1, s20 ; 7E020214
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v1, s21, v0 ; 3E020015
v_rcp_f32_e32 v0, v1 ; 7E005501
v_mov_b32_e32 v1, 0.5 ; 7E0202F0
v_cmp_gt_f32_e32 vcc, v6, v0 ; 7C080106
v_mov_b32_e32 v6, 1.0 ; 7E0C02F2
s_and_saveexec_b64 s[8:9], vcc ; BE88246A
s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s10, s[0:3], 0x12 ; C2050112
s_buffer_load_dword s11, s[0:3], 0x14 ; C2058114
s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115
s_buffer_load_dword s13, s[0:3], 0x16 ; C2068116
s_buffer_load_dword s14, s[0:3], 0xc ; C207010C
s_buffer_load_dword s15, s[0:3], 0xd ; C207810D
s_buffer_load_dword s16, s[0:3], 0xe ; C208010E
s_buffer_load_dword s17, s[0:3], 0x10 ; C2088110
s_buffer_load_dword s18, s[0:3], 0x11 ; C2090111
v_mov_b32_e32 v1, 0x3e90019d ; 7E0202FF 3E90019D
v_mov_b32_e32 v3, 0x437f0000 ; 7E0602FF 437F0000
v_mul_f32_e32 v6, v3, v0 ; 100C0103
s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[24:31], s[20:23] ; F0800700 00A60704
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v2, 2.0, v7, -1.0 ; D2820002 03CE0EF4
v_mad_f32 v4, 2.0, v8, -1.0 ; D2820004 03CE10F4
v_mad_f32 v5, 2.0, v9, -1.0 ; D2820005 03CE12F4
v_mul_f32_e32 v7, s16, v2 ; 100E0410
v_mac_f32_e32 v7, s10, v4 ; 3E0E080A
v_mac_f32_e32 v7, s13, v5 ; 3E0E0A0D
v_mul_f32_e32 v8, s15, v2 ; 1010040F
v_mac_f32_e32 v8, s18, v4 ; 3E100812
v_mul_f32_e32 v2, s14, v2 ; 1004040E
v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2
v_rcp_f32_e32 v7, v7 ; 7E0E5507
v_mac_f32_e32 v2, s17, v4 ; 3E040811
v_mac_f32_e32 v8, s12, v5 ; 3E100A0C
v_mac_f32_e32 v2, s11, v5 ; 3E040A0B
v_mul_f32_e32 v2, v7, v2 ; 10040507
v_mul_f32_e32 v4, v7, v8 ; 10081107
v_mad_f32 v2, v2, v1, 0.5 ; D2820002 03C20302
v_mad_f32 v1, v4, v1, 0.5 ; D2820001 03C20304
v_floor_f32_e32 v4, v0 ; 7E084900
v_subrev_f32_e32 v4, v4, v0 ; 0A080104
v_floor_f32_e32 v5, v6 ; 7E0A4906
v_mad_f32 v6, v0, v3, -v5 ; D2820006 84160700
v_madmk_f32_e32 v3, v6, v4, 0xbb808081 ; 40060906 BB808081
s_or_b64 exec, exec, s[8:9] ; 88FE087E
v_cvt_pkrtz_f16_f32_e32 v0, v2, v1 ; 5E000302
v_cvt_pkrtz_f16_f32_e32 v1, v3, v6 ; 5E020D03
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 12
Code Size: 340 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xyz, IN[0].xyzx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = fmul float %13, %33
%38 = fmul float %14, %33
%39 = fmul float %15, %33
%40 = fmul float %16, %33
%41 = fmul float %17, %34
%42 = fadd float %41, %37
%43 = fmul float %18, %34
%44 = fadd float %43, %38
%45 = fmul float %19, %34
%46 = fadd float %45, %39
%47 = fmul float %20, %34
%48 = fadd float %47, %40
%49 = fmul float %21, %35
%50 = fadd float %49, %42
%51 = fmul float %22, %35
%52 = fadd float %51, %44
%53 = fmul float %23, %35
%54 = fadd float %53, %46
%55 = fmul float %24, %35
%56 = fadd float %55, %48
%57 = fmul float %25, %36
%58 = fadd float %57, %50
%59 = fmul float %26, %36
%60 = fadd float %59, %52
%61 = fmul float %27, %36
%62 = fadd float %61, %54
%63 = fmul float %28, %36
%64 = fadd float %63, %56
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(0) ; BF8C0770
exp 15, 32, 0, 0, 0, v2, v3, v4, v1 ; F800020F 01040302
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v2 ; 10000404
v_mac_f32_e32 v0, s8, v3 ; 3E000608
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v1, s5, v2 ; 10020405
v_mac_f32_e32 v1, s9, v3 ; 3E020609
v_mul_f32_e32 v6, s6, v2 ; 100C0406
v_mul_f32_e32 v2, s7, v2 ; 10040407
v_mac_f32_e32 v6, s10, v3 ; 3E0C060A
v_mac_f32_e32 v2, s11, v3 ; 3E04060B
v_mac_f32_e32 v0, s12, v4 ; 3E00080C
v_mac_f32_e32 v1, s13, v4 ; 3E02080D
v_mac_f32_e32 v6, s14, v4 ; 3E0C080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v1, s17, v5 ; 3E020A11
v_mac_f32_e32 v6, s18, v5 ; 3E0C0A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 188 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..10]
DCL CONST[13..17]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 1.0000, 0.0400, 0.0000, 0.7000}
IMM[1] FLT32 { 0.3000, 0.4000, 1.5000, -0.5000}
IMM[2] FLT32 { 16.0000, 4.0000, -2.0000, 3.0000}
IMM[3] FLT32 { 0.5000, -0.7000, 1.1000, 0.1000}
IMM[4] FLT32 { 0.6000, 0.1500, -0.1500, 6.6666}
IMM[5] FLT32 { 6.0000, 0.2500, 1000.0000, -0.3000}
0: DP3 TEMP[0].x, CONST[8].xyzz, CONST[8].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, CONST[8].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[0].xyzz, IN[0].xyzz
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[0].xyzz, TEMP[1].xxxx
6: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[0].xyzz
7: ADD TEMP[3].x, IMM[0].xxxx, CONST[7].xxxx
8: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[2].xyzz
9: SQRT TEMP[2].x, TEMP[2].xxxx
10: MUL TEMP[4].x, CONST[6].xxxx, IMM[0].yyyy
11: RCP TEMP[4].x, TEMP[4].xxxx
12: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx
13: ADD TEMP[2].x, TEMP[3].xxxx, -TEMP[2].xxxx
14: RCP TEMP[3].x, CONST[7].xxxx
15: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx
16: MOV_SAT TEMP[2].x, TEMP[2].xxxx
17: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
18: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[1].xyzz
19: FSLT TEMP[1].x, IMM[0].zzzz, TEMP[0].xxxx
20: UIF TEMP[1].xxxx :0
21: POW TEMP[0].x, TEMP[0].xxxx, CONST[9].xxxx
22: ELSE :0
23: MOV TEMP[0].x, IMM[0].zzzz
24: ENDIF
25: DP3 TEMP[1].x, IN[0].xyzz, IN[0].xyzz
26: RSQ TEMP[1].x, TEMP[1].xxxx
27: MUL TEMP[1].xyz, IN[0].xyzz, TEMP[1].xxxx
28: DP3 TEMP[3].x, CONST[8].xyzz, CONST[8].xyzz
29: RSQ TEMP[3].x, TEMP[3].xxxx
30: MUL TEMP[3].xyz, CONST[8].xyzz, TEMP[3].xxxx
31: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[1].xyzz
32: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
33: MUL TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww
34: MOV_SAT TEMP[4].x, TEMP[4].xxxx
35: LRP TEMP[4], TEMP[4].xxxx, CONST[1], CONST[2]
36: MUL TEMP[2].xyz, CONST[5].xyzz, TEMP[2].xxxx
37: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx
38: MUL TEMP[0].x, TEMP[0].xxxx, IMM[1].yyyy
39: MOV_SAT TEMP[1].x, TEMP[1].yyyy
40: RCP TEMP[5].x, CONST[3].xxxx
41: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx
42: MOV_SAT TEMP[1].x, TEMP[1].xxxx
43: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[1].xxxx
44: POW TEMP[1].x, TEMP[1].xxxx, IMM[1].zzzz
45: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[4].wwww
46: MOV_SAT TEMP[1].x, TEMP[1].xxxx
47: LRP TEMP[1].xyz, TEMP[1].xxxx, TEMP[4].xyzz, CONST[0].xyzz
48: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[0].xxxx, TEMP[1].xyzz
49: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[0].xyzz
50: MAD TEMP[0].xyz, TEMP[2].xyzz, CONST[10].xxxx, TEMP[0].xyzz
51: MOV TEMP[0].w, CONST[0].wwww
52: DP3 TEMP[1].x, IN[0].xyzz, CONST[15].xyzz
53: DP3 TEMP[2].x, IN[0].xyzz, CONST[16].xyzz
54: MOV TEMP[1].y, TEMP[2].xxxx
55: DP3 TEMP[2].x, IN[0].xyzz, CONST[17].xyzz
56: MOV TEMP[1].z, TEMP[2].xxxx
57: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
58: RSQ TEMP[2].x, TEMP[2].xxxx
59: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
60: MUL TEMP[1].xyz, TEMP[1].xyzz, CONST[13].xxxx
61: FSLT TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz
62: UIF TEMP[2].xxxx :0
63: FLR TEMP[2].x, -TEMP[1].xxxx
64: MOV TEMP[2].x, -TEMP[2].xxxx
65: ELSE :0
66: FLR TEMP[2].x, TEMP[1].xxxx
67: ENDIF
68: FSLT TEMP[3].x, TEMP[1].yyyy, IMM[0].zzzz
69: UIF TEMP[3].xxxx :0
70: FLR TEMP[3].x, -TEMP[1].yyyy
71: MOV TEMP[3].x, -TEMP[3].xxxx
72: ELSE :0
73: FLR TEMP[3].x, TEMP[1].yyyy
74: ENDIF
75: FSLT TEMP[4].x, TEMP[1].zzzz, IMM[0].zzzz
76: UIF TEMP[4].xxxx :0
77: FLR TEMP[4].x, -TEMP[1].zzzz
78: MOV TEMP[4].x, -TEMP[4].xxxx
79: ELSE :0
80: FLR TEMP[4].x, TEMP[1].zzzz
81: ENDIF
82: MOV TEMP[2].x, TEMP[2].xxxx
83: MOV TEMP[2].y, TEMP[3].xxxx
84: MOV TEMP[2].z, TEMP[4].xxxx
85: RCP TEMP[3].x, CONST[13].xxxx
86: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
87: MOV TEMP[3].xy, TEMP[2].xyyy
88: TEX TEMP[3].xyz, TEMP[3], SAMP[1], 2D
89: ADD TEMP[3].xyz, TEMP[3].xyzz, IMM[1].wwww
90: MOV TEMP[4].xy, TEMP[2].yzzz
91: TEX TEMP[4].xyz, TEMP[4], SAMP[1], 2D
92: ADD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].wwww
93: ADD TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz
94: MOV TEMP[2].xy, TEMP[2].xzzz
95: TEX TEMP[2].xyz, TEMP[2], SAMP[1], 2D
96: ADD TEMP[2].xyz, TEMP[2].xyzz, IMM[1].wwww
97: ADD TEMP[2].xyz, TEMP[3].xyzz, TEMP[2].xyzz
98: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
99: RSQ TEMP[3].x, TEMP[3].xxxx
100: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
101: ADD TEMP[3].x, TEMP[2].xxxx, TEMP[2].yyyy
102: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[2].zzzz
103: MUL TEMP[3].x, TEMP[3].xxxx, IMM[2].xxxx
104: FRC TEMP[3].x, TEMP[3].xxxx
105: MAD TEMP[3].x, TEMP[3].xxxx, IMM[2].yyyy, IMM[2].zzzz
106: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz
107: MIN TEMP[3].x, TEMP[3].xxxx, IMM[2].wwww
108: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx
109: ADD TEMP[4].xyz, TEMP[2].xyzz, IMM[1].wwww
110: FRC TEMP[1].xyz, TEMP[1].xyzz
111: ADD TEMP[1].xyz, TEMP[1].xyzz, IMM[3].yyyy
112: MAD TEMP[1].xyz, TEMP[4].xyzz, IMM[3].xxxx, TEMP[1].xyzz
113: MUL TEMP[4].x, TEMP[0].xxxx, IMM[1].xxxx
114: MAD TEMP[4].x, TEMP[0].yyyy, IMM[4].xxxx, TEMP[4].xxxx
115: MAD TEMP[4].x, TEMP[0].zzzz, IMM[3].wwww, TEMP[4].xxxx
116: ADD TEMP[4].x, IMM[3].zzzz, -TEMP[4].xxxx
117: MUL TEMP[5].x, TEMP[4].xxxx, TEMP[4].xxxx
118: LRP TEMP[2].xyz, CONST[14].xxxx, TEMP[2].xyzz, IMM[0].xxxx
119: MUL TEMP[2].xyz, TEMP[3].xxxx, TEMP[2].xyzz
120: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz
121: SQRT TEMP[1].x, TEMP[1].xxxx
122: ADD TEMP[1].x, TEMP[1].xxxx, IMM[4].zzzz
123: MUL TEMP[1].x, TEMP[1].xxxx, IMM[4].wwww
124: ADD TEMP[1].x, IMM[4].yyyy, -TEMP[1].xxxx
125: MOV_SAT TEMP[1].x, TEMP[1].xxxx
126: MUL TEMP[3].x, TEMP[3].xxxx, IMM[5].yyyy
127: POW TEMP[3].x, TEMP[3].xxxx, IMM[5].zzzz
128: MOV_SAT TEMP[3].x, TEMP[3].xxxx
129: MUL TEMP[3].x, TEMP[3].xxxx, IMM[2].wwww
130: ADD TEMP[3].x, IMM[5].xxxx, -TEMP[3].xxxx
131: POW TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx
132: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xxxx
133: MOV_SAT TEMP[1].xyz, TEMP[1].xyzz
134: MAD TEMP[2].x, TEMP[5].xxxx, TEMP[5].xxxx, IMM[5].wwww
135: MOV_SAT TEMP[2].x, TEMP[2].xxxx
136: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
137: MAD TEMP[0].xyz, TEMP[1].xyzz, CONST[10].xxxx, TEMP[0].xyzz
138: DP3 TEMP[1].x, IN[0].xyzz, IN[0].xyzz
139: RSQ TEMP[1].x, TEMP[1].xxxx
140: MUL TEMP[1].xyz, IN[0].xyzz, TEMP[1].xxxx
141: MUL TEMP[2].xy, TEMP[1].xzzz, IMM[3].xxxx
142: MOV TEMP[2].xy, TEMP[2].xyyy
143: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
144: ABS TEMP[3].x, TEMP[1].yyyy
145: ADD TEMP[3].x, TEMP[3].xxxx, IMM[5].wwww
146: MUL TEMP[5].xy, TEMP[1].yzzz, IMM[3].xxxx
147: MOV TEMP[5].xy, TEMP[5].xyyy
148: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
149: ABS TEMP[6].x, TEMP[1].xxxx
150: ADD TEMP[6].x, TEMP[6].xxxx, IMM[5].wwww
151: MUL TEMP[7].xy, TEMP[1].xyyy, IMM[3].xxxx
152: MOV TEMP[7].xy, TEMP[7].xyyy
153: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D
154: ABS TEMP[1].x, TEMP[1].zzzz
155: ADD TEMP[1].x, TEMP[1].xxxx, IMM[5].wwww
156: MUL TEMP[1].xyz, TEMP[7].xyzz, TEMP[1].xxxx
157: MAD TEMP[1].xyz, TEMP[5].xyzz, TEMP[6].xxxx, TEMP[1].xyzz
158: MAD TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xxxx, TEMP[1].xyzz
159: ADD TEMP[2].x, TEMP[4].xxxx, IMM[1].wwww
160: MAX TEMP[2].x, TEMP[2].xxxx, IMM[0].zzzz
161: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
162: MAD TEMP[0].xyz, TEMP[1].xyzz, IMM[0].yyyy, TEMP[0].xyzz
163: MOV OUT[0], TEMP[0]
164: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0
%62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0
%64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0
%66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0
%68 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%71 = fmul float %45, %45
%72 = fmul float %46, %46
%73 = fadd float %72, %71
%74 = fmul float %47, %47
%75 = fadd float %73, %74
%76 = call float @llvm.AMDGPU.rsq.clamped.f32(float %75)
%77 = fmul float %45, %76
%78 = fmul float %46, %76
%79 = fmul float %47, %76
%80 = fmul float %68, %68
%81 = fmul float %69, %69
%82 = fadd float %81, %80
%83 = fmul float %70, %70
%84 = fadd float %82, %83
%85 = call float @llvm.AMDGPU.rsq.clamped.f32(float %84)
%86 = fmul float %68, %85
%87 = fmul float %69, %85
%88 = fmul float %70, %85
%89 = fsub float %86, %77
%90 = fsub float %87, %78
%91 = fsub float %88, %79
%92 = fadd float %44, 1.000000e+00
%93 = fmul float %89, %89
%94 = fmul float %90, %90
%95 = fadd float %94, %93
%96 = fmul float %91, %91
%97 = fadd float %95, %96
%98 = call float @llvm.sqrt.f32(float %97)
%99 = fmul float %43, 0x3FA47AE140000000
%100 = fdiv float 1.000000e+00, %99
%101 = fmul float %98, %100
%102 = fsub float %92, %101
%103 = fdiv float 1.000000e+00, %44
%104 = fmul float %102, %103
%105 = call float @llvm.AMDIL.clamp.(float %104, float 0.000000e+00, float 1.000000e+00)
%106 = fmul float %105, %105
%107 = fmul float %77, %86
%108 = fmul float %78, %87
%109 = fadd float %108, %107
%110 = fmul float %79, %88
%111 = fadd float %109, %110
%112 = fcmp ogt float %111, 0.000000e+00
br i1 %112, label %IF, label %ENDIF
IF: ; preds = %main_body
%113 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%114 = call float @llvm.pow.f32(float %111, float %113)
%phitmp = fmul float %114, 0x3FD99999A0000000
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp.0 = phi float [ %phitmp, %IF ], [ 0.000000e+00, %main_body ]
%115 = fmul float %68, %68
%116 = fmul float %69, %69
%117 = fadd float %116, %115
%118 = fmul float %70, %70
%119 = fadd float %117, %118
%120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119)
%121 = fmul float %68, %120
%122 = fmul float %69, %120
%123 = fmul float %70, %120
%124 = fmul float %45, %45
%125 = fmul float %46, %46
%126 = fadd float %125, %124
%127 = fmul float %47, %47
%128 = fadd float %126, %127
%129 = call float @llvm.AMDGPU.rsq.clamped.f32(float %128)
%130 = fmul float %45, %129
%131 = fmul float %46, %129
%132 = fmul float %47, %129
%133 = fmul float %130, %121
%134 = fmul float %131, %122
%135 = fadd float %134, %133
%136 = fmul float %132, %123
%137 = fadd float %135, %136
%138 = fadd float %137, 0x3FE6666660000000
%139 = fmul float %138, 0x3FE6666660000000
%140 = call float @llvm.AMDIL.clamp.(float %139, float 0.000000e+00, float 1.000000e+00)
%141 = call float @llvm.AMDGPU.lrp(float %140, float %28, float %32)
%142 = call float @llvm.AMDGPU.lrp(float %140, float %29, float %33)
%143 = call float @llvm.AMDGPU.lrp(float %140, float %30, float %34)
%144 = call float @llvm.AMDGPU.lrp(float %140, float %31, float %35)
%145 = fmul float %40, %106
%146 = fmul float %41, %106
%147 = fmul float %42, %106
%148 = fmul float %138, 0x3FD3333340000000
%149 = call float @llvm.AMDIL.clamp.(float %122, float 0.000000e+00, float 1.000000e+00)
%150 = fdiv float 1.000000e+00, %36
%151 = fmul float %149, %150
%152 = call float @llvm.AMDIL.clamp.(float %151, float 0.000000e+00, float 1.000000e+00)
%153 = fsub float 1.000000e+00, %152
%154 = call float @llvm.pow.f32(float %153, float 1.500000e+00)
%155 = fmul float %154, %144
%156 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00)
%157 = call float @llvm.AMDGPU.lrp(float %156, float %141, float %24)
%158 = call float @llvm.AMDGPU.lrp(float %156, float %142, float %25)
%159 = call float @llvm.AMDGPU.lrp(float %156, float %143, float %26)
%160 = fmul float %37, %temp.0
%161 = fadd float %160, %157
%162 = fmul float %38, %temp.0
%163 = fadd float %162, %158
%164 = fmul float %39, %temp.0
%165 = fadd float %164, %159
%166 = fmul float %37, %148
%167 = fadd float %166, %161
%168 = fmul float %38, %148
%169 = fadd float %168, %163
%170 = fmul float %39, %148
%171 = fadd float %170, %165
%172 = fmul float %145, %48
%173 = fadd float %172, %167
%174 = fmul float %146, %48
%175 = fadd float %174, %169
%176 = fmul float %147, %48
%177 = fadd float %176, %171
%178 = fmul float %68, %51
%179 = fmul float %69, %52
%180 = fadd float %179, %178
%181 = fmul float %70, %53
%182 = fadd float %180, %181
%183 = fmul float %68, %54
%184 = fmul float %69, %55
%185 = fadd float %184, %183
%186 = fmul float %70, %56
%187 = fadd float %185, %186
%188 = fmul float %68, %57
%189 = fmul float %69, %58
%190 = fadd float %189, %188
%191 = fmul float %70, %59
%192 = fadd float %190, %191
%193 = fmul float %182, %182
%194 = fmul float %187, %187
%195 = fadd float %194, %193
%196 = fmul float %192, %192
%197 = fadd float %195, %196
%198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197)
%199 = fmul float %182, %198
%200 = fmul float %187, %198
%201 = fmul float %192, %198
%202 = fmul float %199, %49
%203 = fmul float %200, %49
%204 = fmul float %201, %49
%205 = fcmp olt float %202, 0.000000e+00
br i1 %205, label %IF33, label %ELSE34
IF33: ; preds = %ENDIF
%206 = fsub float -0.000000e+00, %202
%207 = call float @llvm.floor.f32(float %206)
%208 = fsub float -0.000000e+00, %207
br label %ENDIF32
ELSE34: ; preds = %ENDIF
%209 = call float @llvm.floor.f32(float %202)
br label %ENDIF32
ENDIF32: ; preds = %ELSE34, %IF33
%temp8.0 = phi float [ %208, %IF33 ], [ %209, %ELSE34 ]
%210 = fcmp olt float %203, 0.000000e+00
br i1 %210, label %IF36, label %ELSE37
IF36: ; preds = %ENDIF32
%211 = fsub float -0.000000e+00, %203
%212 = call float @llvm.floor.f32(float %211)
%213 = fsub float -0.000000e+00, %212
br label %ENDIF35
ELSE37: ; preds = %ENDIF32
%214 = call float @llvm.floor.f32(float %203)
br label %ENDIF35
ENDIF35: ; preds = %ELSE37, %IF36
%temp12.0 = phi float [ %213, %IF36 ], [ %214, %ELSE37 ]
%215 = fcmp olt float %204, 0.000000e+00
br i1 %215, label %IF39, label %ELSE40
IF39: ; preds = %ENDIF35
%216 = fsub float -0.000000e+00, %204
%217 = call float @llvm.floor.f32(float %216)
%218 = fsub float -0.000000e+00, %217
br label %ENDIF38
ELSE40: ; preds = %ENDIF35
%219 = call float @llvm.floor.f32(float %204)
br label %ENDIF38
ENDIF38: ; preds = %ELSE40, %IF39
%temp16.0 = phi float [ %218, %IF39 ], [ %219, %ELSE40 ]
%220 = fdiv float 1.000000e+00, %49
%221 = fmul float %temp8.0, %220
%222 = fmul float %temp12.0, %220
%223 = fmul float %temp16.0, %220
%224 = bitcast float %221 to i32
%225 = bitcast float %222 to i32
%226 = insertelement <2 x i32> undef, i32 %224, i32 0
%227 = insertelement <2 x i32> %226, i32 %225, i32 1
%228 = bitcast <8 x i32> %65 to <32 x i8>
%229 = bitcast <4 x i32> %67 to <16 x i8>
%230 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %227, <32 x i8> %228, <16 x i8> %229, i32 2)
%231 = extractelement <4 x float> %230, i32 0
%232 = extractelement <4 x float> %230, i32 1
%233 = extractelement <4 x float> %230, i32 2
%234 = fadd float %231, -5.000000e-01
%235 = fadd float %232, -5.000000e-01
%236 = fadd float %233, -5.000000e-01
%237 = bitcast float %222 to i32
%238 = bitcast float %223 to i32
%239 = insertelement <2 x i32> undef, i32 %237, i32 0
%240 = insertelement <2 x i32> %239, i32 %238, i32 1
%241 = bitcast <8 x i32> %65 to <32 x i8>
%242 = bitcast <4 x i32> %67 to <16 x i8>
%243 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %240, <32 x i8> %241, <16 x i8> %242, i32 2)
%244 = extractelement <4 x float> %243, i32 0
%245 = extractelement <4 x float> %243, i32 1
%246 = extractelement <4 x float> %243, i32 2
%247 = fadd float %244, -5.000000e-01
%248 = fadd float %245, -5.000000e-01
%249 = fadd float %246, -5.000000e-01
%250 = fadd float %234, %247
%251 = fadd float %235, %248
%252 = fadd float %236, %249
%253 = bitcast float %221 to i32
%254 = bitcast float %223 to i32
%255 = insertelement <2 x i32> undef, i32 %253, i32 0
%256 = insertelement <2 x i32> %255, i32 %254, i32 1
%257 = bitcast <8 x i32> %65 to <32 x i8>
%258 = bitcast <4 x i32> %67 to <16 x i8>
%259 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %256, <32 x i8> %257, <16 x i8> %258, i32 2)
%260 = extractelement <4 x float> %259, i32 0
%261 = extractelement <4 x float> %259, i32 1
%262 = extractelement <4 x float> %259, i32 2
%263 = fadd float %260, -5.000000e-01
%264 = fadd float %261, -5.000000e-01
%265 = fadd float %262, -5.000000e-01
%266 = fadd float %250, %263
%267 = fadd float %251, %264
%268 = fadd float %252, %265
%269 = fmul float %266, %266
%270 = fmul float %267, %267
%271 = fadd float %270, %269
%272 = fmul float %268, %268
%273 = fadd float %271, %272
%274 = call float @llvm.AMDGPU.rsq.clamped.f32(float %273)
%275 = fmul float %266, %274
%276 = fmul float %267, %274
%277 = fmul float %268, %274
%278 = fadd float %275, %276
%279 = fadd float %278, %277
%280 = fmul float %279, 1.600000e+01
%281 = call float @llvm.floor.f32(float %280)
%282 = fsub float %280, %281
%283 = fmul float %282, 4.000000e+00
%284 = fadd float %283, -2.000000e+00
%285 = call float @llvm.maxnum.f32(float %284, float 0.000000e+00)
%286 = call float @llvm.minnum.f32(float %285, float 3.000000e+00)
%287 = fmul float %286, %286
%288 = fadd float %275, -5.000000e-01
%289 = fadd float %276, -5.000000e-01
%290 = fadd float %277, -5.000000e-01
%291 = call float @llvm.floor.f32(float %202)
%292 = fsub float %202, %291
%293 = call float @llvm.floor.f32(float %203)
%294 = fsub float %203, %293
%295 = call float @llvm.floor.f32(float %204)
%296 = fsub float %204, %295
%297 = fadd float %292, 0xBFE6666660000000
%298 = fadd float %294, 0xBFE6666660000000
%299 = fadd float %296, 0xBFE6666660000000
%300 = fmul float %288, 5.000000e-01
%301 = fadd float %300, %297
%302 = fmul float %289, 5.000000e-01
%303 = fadd float %302, %298
%304 = fmul float %290, 5.000000e-01
%305 = fadd float %304, %299
%306 = fmul float %173, 0x3FD3333340000000
%307 = fmul float %175, 0x3FE3333340000000
%308 = fadd float %307, %306
%309 = fmul float %177, 0x3FB99999A0000000
%310 = fadd float %309, %308
%311 = fsub float 0x3FF19999A0000000, %310
%312 = fmul float %311, %311
%313 = call float @llvm.AMDGPU.lrp(float %50, float %275, float 1.000000e+00)
%314 = call float @llvm.AMDGPU.lrp(float %50, float %276, float 1.000000e+00)
%315 = call float @llvm.AMDGPU.lrp(float %50, float %277, float 1.000000e+00)
%316 = fmul float %287, %313
%317 = fmul float %287, %314
%318 = fmul float %287, %315
%319 = fmul float %301, %301
%320 = fmul float %303, %303
%321 = fadd float %320, %319
%322 = fmul float %305, %305
%323 = fadd float %321, %322
%324 = call float @llvm.sqrt.f32(float %323)
%325 = fadd float %324, 0xBFC3333340000000
%326 = fmul float %325, 0x401AAA9940000000
%327 = fsub float 0x3FC3333340000000, %326
%328 = call float @llvm.AMDIL.clamp.(float %327, float 0.000000e+00, float 1.000000e+00)
%329 = fmul float %287, 2.500000e-01
%330 = call float @llvm.pow.f32(float %329, float 1.000000e+03)
%331 = call float @llvm.AMDIL.clamp.(float %330, float 0.000000e+00, float 1.000000e+00)
%332 = fmul float %331, 3.000000e+00
%333 = fsub float 6.000000e+00, %332
%334 = call float @llvm.pow.f32(float %328, float %333)
%335 = fmul float %316, %334
%336 = fmul float %317, %334
%337 = fmul float %318, %334
%338 = call float @llvm.AMDIL.clamp.(float %335, float 0.000000e+00, float 1.000000e+00)
%339 = call float @llvm.AMDIL.clamp.(float %336, float 0.000000e+00, float 1.000000e+00)
%340 = call float @llvm.AMDIL.clamp.(float %337, float 0.000000e+00, float 1.000000e+00)
%341 = fmul float %312, %312
%342 = fadd float %341, 0xBFD3333340000000
%343 = call float @llvm.AMDIL.clamp.(float %342, float 0.000000e+00, float 1.000000e+00)
%344 = fmul float %338, %343
%345 = fmul float %339, %343
%346 = fmul float %340, %343
%347 = fmul float %344, %48
%348 = fadd float %347, %173
%349 = fmul float %345, %48
%350 = fadd float %349, %175
%351 = fmul float %346, %48
%352 = fadd float %351, %177
%353 = fmul float %68, %68
%354 = fmul float %69, %69
%355 = fadd float %354, %353
%356 = fmul float %70, %70
%357 = fadd float %355, %356
%358 = call float @llvm.AMDGPU.rsq.clamped.f32(float %357)
%359 = fmul float %68, %358
%360 = fmul float %69, %358
%361 = fmul float %70, %358
%362 = fmul float %359, 5.000000e-01
%363 = fmul float %361, 5.000000e-01
%364 = bitcast float %362 to i32
%365 = bitcast float %363 to i32
%366 = insertelement <2 x i32> undef, i32 %364, i32 0
%367 = insertelement <2 x i32> %366, i32 %365, i32 1
%368 = bitcast <8 x i32> %61 to <32 x i8>
%369 = bitcast <4 x i32> %63 to <16 x i8>
%370 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %367, <32 x i8> %368, <16 x i8> %369, i32 2)
%371 = extractelement <4 x float> %370, i32 0
%372 = extractelement <4 x float> %370, i32 1
%373 = extractelement <4 x float> %370, i32 2
%374 = call float @llvm.fabs.f32(float %360)
%375 = fadd float %374, 0xBFD3333340000000
%376 = fmul float %360, 5.000000e-01
%377 = fmul float %361, 5.000000e-01
%378 = bitcast float %376 to i32
%379 = bitcast float %377 to i32
%380 = insertelement <2 x i32> undef, i32 %378, i32 0
%381 = insertelement <2 x i32> %380, i32 %379, i32 1
%382 = bitcast <8 x i32> %61 to <32 x i8>
%383 = bitcast <4 x i32> %63 to <16 x i8>
%384 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %381, <32 x i8> %382, <16 x i8> %383, i32 2)
%385 = extractelement <4 x float> %384, i32 0
%386 = extractelement <4 x float> %384, i32 1
%387 = extractelement <4 x float> %384, i32 2
%388 = call float @llvm.fabs.f32(float %359)
%389 = fadd float %388, 0xBFD3333340000000
%390 = fmul float %359, 5.000000e-01
%391 = fmul float %360, 5.000000e-01
%392 = bitcast float %390 to i32
%393 = bitcast float %391 to i32
%394 = insertelement <2 x i32> undef, i32 %392, i32 0
%395 = insertelement <2 x i32> %394, i32 %393, i32 1
%396 = bitcast <8 x i32> %61 to <32 x i8>
%397 = bitcast <4 x i32> %63 to <16 x i8>
%398 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %395, <32 x i8> %396, <16 x i8> %397, i32 2)
%399 = extractelement <4 x float> %398, i32 0
%400 = extractelement <4 x float> %398, i32 1
%401 = extractelement <4 x float> %398, i32 2
%402 = call float @llvm.fabs.f32(float %361)
%403 = fadd float %402, 0xBFD3333340000000
%404 = fmul float %399, %403
%405 = fmul float %400, %403
%406 = fmul float %401, %403
%407 = fmul float %385, %389
%408 = fadd float %407, %404
%409 = fmul float %386, %389
%410 = fadd float %409, %405
%411 = fmul float %387, %389
%412 = fadd float %411, %406
%413 = fmul float %371, %375
%414 = fadd float %413, %408
%415 = fmul float %372, %375
%416 = fadd float %415, %410
%417 = fmul float %373, %375
%418 = fadd float %417, %412
%419 = fadd float %311, -5.000000e-01
%420 = call float @llvm.maxnum.f32(float %419, float 0.000000e+00)
%421 = fmul float %414, %420
%422 = fmul float %416, %420
%423 = fmul float %418, %420
%424 = fmul float %421, 0x3FA47AE140000000
%425 = fadd float %424, %348
%426 = fmul float %422, 0x3FA47AE140000000
%427 = fadd float %426, %350
%428 = fmul float %423, 0x3FA47AE140000000
%429 = fadd float %428, %352
%430 = call i32 @llvm.SI.packf16(float %425, float %427)
%431 = bitcast i32 %430 to float
%432 = call i32 @llvm.SI.packf16(float %429, float %27)
%433 = bitcast i32 %432 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %431, float %433, float %431, float %433)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v0, v0, 2, 0, [m0] ; C8000200
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s1, s[12:15], 0x20 ; C2008D20
s_buffer_load_dword s2, s[12:15], 0x21 ; C2010D21
s_buffer_load_dword s8, s[12:15], 0x22 ; C2040D22
v_mul_f32_e32 v4, v2, v2 ; 10080502
v_interp_p2_f32 v0, [v0], v1, 2, 0, [m0] ; C8010201
v_mac_f32_e32 v4, v3, v3 ; 3E080703
v_mac_f32_e32 v4, v0, v0 ; 3E080100
s_buffer_load_dword s3, s[12:15], 0x34 ; C2018D34
s_buffer_load_dword s22, s[12:15], 0x3c ; C20B0D3C
s_buffer_load_dword s23, s[12:15], 0x3d ; C20B8D3D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e64 v1, s1, s1 ; D2100001 00000201
v_mac_f32_e64 v1, s2, s2 ; D23E0001 00000402
v_mac_f32_e64 v1, s8, s8 ; D23E0001 00001008
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_buffer_load_dword s0, s[12:15], 0x18 ; C2000D18
v_mul_f32_e32 v5, s1, v1 ; 100A0201
v_mad_f32 v6, v2, v4, -v5 ; D2820006 84160902
v_mul_f32_e32 v6, v6, v6 ; 100C0D06
s_buffer_load_dword s33, s[12:15], 0x1c ; C2108D1C
v_mul_f32_e32 v7, s2, v1 ; 100E0202
v_mad_f32 v8, v3, v4, -v7 ; D2820008 841E0903
v_mac_f32_e32 v6, v8, v8 ; 3E0C1108
v_mov_b32_e32 v8, 0xbd23d70a ; 7E1002FF BD23D70A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v8, s0, v8 ; 10101000
v_mul_f32_e32 v9, s8, v1 ; 10120208
v_rcp_f32_e32 v1, v8 ; 7E025508
v_mad_f32 v8, v0, v4, -v9 ; D2820008 84260900
v_mac_f32_e32 v6, v8, v8 ; 3E0C1108
v_sqrt_f32_e32 v6, v6 ; 7E0C6706
v_add_f32_e64 v8, 1.0, s33 ; D2060008 000042F2
v_mac_f32_e32 v8, v1, v6 ; 3E100D01
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v1, v1, v5 ; 10020B01
s_buffer_load_dword s11, s[12:15], 0xb ; C2058D0B
s_buffer_load_dword s25, s[12:15], 0xc ; C20C8D0C
s_buffer_load_dword s10, s[12:15], 0x0 ; C2050D00
s_buffer_load_dword s9, s[12:15], 0x1 ; C2048D01
s_buffer_load_dword s0, s[12:15], 0x2 ; C2000D02
s_buffer_load_dword s20, s[12:15], 0x4 ; C20A0D04
s_buffer_load_dword s18, s[12:15], 0x5 ; C2090D05
s_buffer_load_dword s17, s[12:15], 0x6 ; C2088D06
s_buffer_load_dword s16, s[12:15], 0x7 ; C2080D07
s_buffer_load_dword s24, s[12:15], 0x8 ; C20C0D08
s_buffer_load_dword s21, s[12:15], 0x9 ; C20A8D09
s_buffer_load_dword s19, s[12:15], 0xa ; C2098D0A
s_buffer_load_dword s26, s[12:15], 0x3e ; C20D0D3E
s_buffer_load_dword s27, s[12:15], 0x40 ; C20D8D40
s_buffer_load_dword s28, s[12:15], 0x41 ; C20E0D41
s_buffer_load_dword s29, s[12:15], 0x42 ; C20E8D42
s_buffer_load_dword s30, s[12:15], 0x44 ; C20F0D44
s_buffer_load_dword s31, s[12:15], 0x45 ; C20F8D45
s_buffer_load_dword s32, s[12:15], 0x46 ; C2100D46
v_rcp_f32_e32 v5, s33 ; 7E0A5421
v_mul_f32_e32 v6, v4, v3 ; 100C0704
v_mac_f32_e32 v1, v6, v7 ; 3E020F06
v_mul_f32_e32 v4, v4, v0 ; 10080104
v_mac_f32_e32 v1, v4, v9 ; 3E021304
v_mul_f32_e32 v4, v5, v8 ; 10081105
v_add_f32_e64 v7, 0, v4 clamp ; D2060807 00020880
v_mov_b32_e32 v8, 0 ; 7E100280
v_cmp_lt_f32_e32 vcc, 0, v1 ; 7C020280
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[34:35], vcc ; BEA2246A
s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E
s_buffer_load_dword s33, s[12:15], 0x24 ; C2108D24
v_log_f32_e32 v1, v1 ; 7E024F01
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_legacy_f32_e32 v1, s33, v1 ; 0E020221
v_exp_f32_e32 v1, v1 ; 7E024B01
v_mul_f32_e32 v8, 0x3ecccccd, v1 ; 101002FF 3ECCCCCD
s_or_b64 exec, exec, s[34:35] ; 88FE227E
v_rcp_f32_e32 v1, s25 ; 7E025419
v_mul_f32_e32 v11, s22, v2 ; 10160416
v_mac_f32_e32 v11, s23, v3 ; 3E160617
v_mac_f32_e32 v11, s26, v0 ; 3E16001A
v_mul_f32_e32 v10, s27, v2 ; 1014041B
v_mac_f32_e32 v10, s28, v3 ; 3E14061C
v_mac_f32_e32 v10, s29, v0 ; 3E14001D
v_mul_f32_e32 v6, s30, v2 ; 100C041E
v_mac_f32_e32 v6, s31, v3 ; 3E0C061F
v_mac_f32_e32 v6, s32, v0 ; 3E0C0020
v_mul_f32_e64 v4, s1, s1 ; D2100004 00000201
v_mac_f32_e64 v4, s2, s2 ; D23E0004 00000402
v_mac_f32_e64 v4, s8, s8 ; D23E0004 00001008
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
v_mul_f32_e32 v5, v2, v2 ; 100A0502
v_mac_f32_e32 v5, v3, v3 ; 3E0A0703
v_mac_f32_e32 v5, v0, v0 ; 3E0A0100
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v9, s1, v4 ; 10120801
v_mul_f32_e32 v12, s2, v4 ; 10180802
v_mul_f32_e32 v4, s8, v4 ; 10080808
v_mul_f32_e32 v13, v5, v2 ; 101A0505
v_mul_f32_e32 v9, v13, v9 ; 1012130D
v_mul_f32_e32 v13, v5, v3 ; 101A0705
v_mac_f32_e32 v9, v13, v12 ; 3E12190D
v_mul_f32_e32 v5, v5, v0 ; 100A0105
v_mac_f32_e32 v9, v5, v4 ; 3E120905
v_mov_b32_e32 v4, 0x3f333333 ; 7E0802FF 3F333333
v_add_f32_e32 v9, v4, v9 ; 06121304
v_mul_f32_e32 v4, v4, v9 ; 10081304
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
v_mul_f32_e32 v12, s24, v5 ; 10180A18
v_mac_f32_e32 v12, s20, v4 ; 3E180814
v_mul_f32_e32 v15, s21, v5 ; 101E0A15
v_mac_f32_e32 v15, s18, v4 ; 3E1E0812
v_mul_f32_e32 v16, s19, v5 ; 10200A13
v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80
v_mul_f32_e32 v1, v1, v13 ; 10021B01
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_sub_f32_e32 v1, 1.0, v1 ; 080202F2
v_log_f32_e32 v1, v1 ; 7E024F01
v_mac_f32_e32 v16, s17, v4 ; 3E200811
v_mul_f32_e32 v5, s11, v5 ; 100A0A0B
v_mac_f32_e32 v5, s16, v4 ; 3E0A0810
v_mul_legacy_f32_e32 v1, 0x3fc00000, v1 ; 0E0202FF 3FC00000
v_exp_f32_e32 v1, v1 ; 7E024B01
v_mul_f32_e32 v1, v5, v1 ; 10020305
v_add_f32_e64 v13, 0, v1 clamp ; D206080D 00020280
v_sub_f32_e32 v5, 1.0, v13 ; 080A1AF2
v_mul_f32_e32 v1, s10, v5 ; 10020A0A
v_mac_f32_e32 v1, v12, v13 ; 3E021B0C
v_mul_f32_e32 v4, s9, v5 ; 10080A09
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mac_f32_e32 v12, v10, v10 ; 3E18150A
v_mac_f32_e32 v12, v6, v6 ; 3E180D06
v_rsq_clamp_f32_e32 v14, v12 ; 7E1C590C
v_mac_f32_e32 v4, v15, v13 ; 3E081B0F
v_mul_f32_e32 v5, s0, v5 ; 100A0A00
v_mac_f32_e32 v5, v16, v13 ; 3E0A1B10
v_mul_f32_e32 v11, v14, v11 ; 1016170E
v_mul_f32_e32 v11, s3, v11 ; 10161603
v_cmp_ngt_f32_e32 vcc, 0, v11 ; 7C161680
s_and_saveexec_b64 s[0:1], vcc ; BE80246A
s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E
v_floor_f32_e32 v12, v11 ; 7E18490B
s_or_saveexec_b64 s[0:1], s[0:1] ; BE802500
s_xor_b64 exec, exec, s[0:1] ; 89FE007E
v_floor_f32_e64 v12, -v11 ; D348000C 2000010B
v_xor_b32_e32 v12, 0x80000000, v12 ; 3A1818FF 80000000
s_or_b64 exec, exec, s[0:1] ; 88FE007E
v_mul_f32_e32 v10, v14, v10 ; 1014150E
v_mul_f32_e32 v10, s3, v10 ; 10141403
v_cmp_ngt_f32_e32 vcc, 0, v10 ; 7C161480
s_and_saveexec_b64 s[0:1], vcc ; BE80246A
s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E
v_floor_f32_e32 v13, v10 ; 7E1A490A
s_or_saveexec_b64 s[0:1], s[0:1] ; BE802500
s_xor_b64 exec, exec, s[0:1] ; 89FE007E
v_floor_f32_e64 v13, -v10 ; D348000D 2000010A
v_xor_b32_e32 v13, 0x80000000, v13 ; 3A1A1AFF 80000000
s_or_b64 exec, exec, s[0:1] ; 88FE007E
v_mul_f32_e32 v6, v14, v6 ; 100C0D0E
v_mul_f32_e32 v6, s3, v6 ; 100C0C03
v_cmp_ngt_f32_e32 vcc, 0, v6 ; 7C160C80
s_and_saveexec_b64 s[8:9], vcc ; BE88246A
s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E
v_floor_f32_e32 v14, v6 ; 7E1C4906
s_or_saveexec_b64 s[8:9], s[8:9] ; BE882508
s_buffer_load_dword s0, s[12:15], 0x3 ; C2000D03
s_buffer_load_dword s10, s[12:15], 0x10 ; C2050D10
s_buffer_load_dword s11, s[12:15], 0x11 ; C2058D11
s_buffer_load_dword s16, s[12:15], 0x12 ; C2080D12
s_buffer_load_dword s17, s[12:15], 0x14 ; C2088D14
s_buffer_load_dword s18, s[12:15], 0x15 ; C2090D15
s_buffer_load_dword s19, s[12:15], 0x16 ; C2098D16
s_buffer_load_dword s1, s[12:15], 0x28 ; C2008D28
s_buffer_load_dword s2, s[12:15], 0x38 ; C2010D38
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[8:9] ; 89FE087E
v_floor_f32_e64 v14, -v6 ; D348000E 20000106
v_xor_b32_e32 v14, 0x80000000, v14 ; 3A1C1CFF 80000000
s_or_b64 exec, exec, s[8:9] ; 88FE087E
v_mul_f32_e32 v7, v7, v7 ; 100E0F07
v_mul_f32_e32 v15, s17, v7 ; 101E0E11
v_mul_f32_e32 v16, s18, v7 ; 10200E12
v_mul_f32_e32 v7, s19, v7 ; 100E0E13
v_mac_f32_e32 v1, s10, v8 ; 3E02100A
v_mac_f32_e32 v4, s11, v8 ; 3E08100B
v_mac_f32_e32 v5, s16, v8 ; 3E0A1010
v_rcp_f32_e32 v8, s3 ; 7E105403
v_mov_b32_e32 v17, 0x3e99999a ; 7E2202FF 3E99999A
v_mul_f32_e32 v9, v17, v9 ; 10121311
v_mac_f32_e32 v1, s10, v9 ; 3E02120A
v_mac_f32_e32 v4, s11, v9 ; 3E08120B
v_mac_f32_e32 v5, s16, v9 ; 3E0A1210
v_mac_f32_e32 v1, s1, v15 ; 3E021E01
v_mac_f32_e32 v4, s1, v16 ; 3E082001
v_mac_f32_e32 v5, s1, v7 ; 3E0A0E01
v_mov_b32_e32 v7, 0x41800000 ; 7E0E02FF 41800000
v_mov_b32_e32 v9, 0x40400000 ; 7E1202FF 40400000
v_floor_f32_e32 v15, v11 ; 7E1E490B
v_subrev_f32_e32 v11, v15, v11 ; 0A16170F
v_floor_f32_e32 v15, v10 ; 7E1E490A
v_subrev_f32_e32 v10, v15, v10 ; 0A14150F
v_mul_f32_e32 v15, v2, v2 ; 101E0502
v_mac_f32_e32 v15, v3, v3 ; 3E1E0703
v_mac_f32_e32 v15, v0, v0 ; 3E1E0100
v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F
v_mul_f32_e32 v18, v8, v12 ; 10241908
v_mul_f32_e32 v19, v8, v13 ; 10261B08
v_mul_f32_e32 v20, v8, v14 ; 10281D08
s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504
s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[12:19], s[8:11] ; F0800700 00430C12
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v8, -0.5, v12 ; 061018F1
v_add_f32_e32 v12, -0.5, v13 ; 06181AF1
v_add_f32_e32 v13, -0.5, v14 ; 061A1CF1
image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[12:19], s[8:11] ; F0800700 00431513
v_mov_b32_e32 v19, v20 ; 7E260314
image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[12:19], s[8:11] ; F0800700 00431212
s_waitcnt vmcnt(1) ; BF8C0771
v_add_f32_e32 v14, -0.5, v21 ; 061C2AF1
v_add_f32_e32 v16, -0.5, v22 ; 06202CF1
v_add_f32_e32 v21, -0.5, v23 ; 062A2EF1
s_waitcnt vmcnt(0) ; BF8C0770
v_add_f32_e32 v18, -0.5, v18 ; 062424F1
v_add_f32_e32 v19, -0.5, v19 ; 062626F1
v_add_f32_e32 v20, -0.5, v20 ; 062828F1
v_add_f32_e32 v8, v14, v8 ; 0610110E
v_add_f32_e32 v12, v16, v12 ; 06181910
v_add_f32_e32 v13, v21, v13 ; 061A1B15
v_add_f32_e32 v8, v18, v8 ; 06101112
v_add_f32_e32 v12, v19, v12 ; 06181913
v_add_f32_e32 v13, v20, v13 ; 061A1B14
v_mul_f32_e32 v14, v8, v8 ; 101C1108
v_mac_f32_e32 v14, v12, v12 ; 3E1C190C
v_mac_f32_e32 v14, v13, v13 ; 3E1C1B0D
v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E
v_mul_f32_e32 v16, v14, v12 ; 1020190E
v_mad_f32 v18, v14, v8, v16 ; D2820012 0442110E
v_mac_f32_e32 v18, v14, v13 ; 3E241B0E
v_mul_f32_e32 v19, v7, v18 ; 10262507
v_floor_f32_e32 v19, v19 ; 7E264913
v_mad_f32 v7, v18, v7, -v19 ; D2820007 844E0F12
v_mul_f32_e32 v2, v15, v2 ; 1004050F
v_mul_f32_e32 v3, v15, v3 ; 1006070F
v_mul_f32_e32 v0, v15, v0 ; 1000010F
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
v_mad_f32 v7, 4.0, v7, -2.0 ; D2820007 03D60EF6
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_min_f32_e32 v7, v9, v7 ; 1E0E0F09
v_mul_f32_e32 v7, v7, v7 ; 100E0F07
v_mul_f32_e32 v18, 0.5, v2 ; 102404F0
v_mul_f32_e32 v19, 0.5, v0 ; 102600F0
v_mul_f32_e32 v15, 0x3e800000, v7 ; 101E0EFF 3E800000
v_mul_f32_e32 v20, 0.5, v3 ; 102806F0
v_log_f32_e32 v15, v15 ; 7E1E4F0F
v_mov_b32_e32 v21, v19 ; 7E2A0313
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[12:19], s[8:11] ; F0800700 00431612
v_mov_b32_e32 v19, v20 ; 7E260314
v_mov_b32_e32 v25, 0xbe99999a ; 7E3202FF BE99999A
v_add_f32_e64 v0, |v0|, v25 ; D2060100 00023300
image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[12:19], s[8:11] ; F0800700 00431A14
image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[12:19], s[8:11] ; F0800700 00431212
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v18, v0, v18 ; 10242500
v_mul_f32_e32 v19, v0, v19 ; 10262700
v_mul_f32_e32 v0, v0, v20 ; 10002900
v_mul_legacy_f32_e32 v15, 0x447a0000, v15 ; 0E1E1EFF 447A0000
v_exp_f32_e32 v15, v15 ; 7E1E4B0F
v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80
v_mov_b32_e32 v20, 0x40c00000 ; 7E2802FF 40C00000
v_mad_f32 v9, -v15, v9, v20 ; D2820009 2452130F
v_mul_f32_e32 v15, v14, v8 ; 101E110E
v_mad_f32 v8, v8, v14, -0.5 ; D2820008 03C61D08
v_mad_f32 v12, v12, v14, -0.5 ; D282000C 03C61D0C
v_mul_f32_e32 v20, v14, v13 ; 10281B0E
v_mad_f32 v13, v13, v14, -0.5 ; D282000D 03C61D0D
v_floor_f32_e32 v14, v6 ; 7E1C4906
v_subrev_f32_e32 v6, v14, v6 ; 0A0C0D0E
v_mov_b32_e32 v14, 0xbf333333 ; 7E1C02FF BF333333
v_add_f32_e32 v11, v14, v11 ; 0616170E
v_add_f32_e32 v10, v14, v10 ; 0614150E
v_add_f32_e32 v6, v14, v6 ; 060C0D0E
v_mac_f32_e32 v11, 0.5, v8 ; 3E1610F0
v_mac_f32_e32 v10, 0.5, v12 ; 3E1418F0
v_mac_f32_e32 v6, 0.5, v13 ; 3E0C1AF0
v_sub_f32_e64 v8, 1.0, s2 ; D2080008 000004F2
v_mad_f32 v12, s2, v15, v8 ; D282000C 04221E02
v_mad_f32 v13, s2, v16, v8 ; D282000D 04222002
v_mac_f32_e32 v8, s2, v20 ; 3E102802
v_mul_f32_e32 v11, v11, v11 ; 1016170B
v_mac_f32_e32 v11, v10, v10 ; 3E16150A
v_mac_f32_e32 v11, v6, v6 ; 3E160D06
v_sqrt_f32_e32 v6, v11 ; 7E0C670B
v_mov_b32_e32 v10, 0xbe19999a ; 7E1402FF BE19999A
v_add_f32_e32 v6, v6, v10 ; 060C1506
v_mov_b32_e32 v10, 0x3e19999a ; 7E1402FF 3E19999A
v_madmk_f32_e32 v6, v6, v10, 0xc0d554ca ; 400C1506 C0D554CA
v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80
v_log_f32_e32 v6, v6 ; 7E0C4F06
v_mul_f32_e32 v10, v12, v7 ; 10140F0C
v_mul_f32_e32 v11, v13, v7 ; 10160F0D
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_legacy_f32_e32 v6, v9, v6 ; 0E0C0D09
v_exp_f32_e32 v6, v6 ; 7E0C4B06
v_mul_f32_e32 v8, v6, v10 ; 10101506
v_mul_f32_e32 v9, v6, v11 ; 10121706
v_mul_f32_e32 v6, v6, v7 ; 100C0F06
v_mul_f32_e32 v7, v17, v1 ; 100E0311
v_madmk_f32_e32 v7, v4, v7, 0x3f19999a ; 400E0F04 3F19999A
v_madmk_f32_e32 v7, v5, v7, 0x3dcccccd ; 400E0F05 3DCCCCCD
v_sub_f32_e32 v7, 0x3f8ccccd, v7 ; 080E0EFF 3F8CCCCD
v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080
v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280
v_add_f32_e64 v6, 0, v6 clamp ; D2060806 00020C80
v_mul_f32_e32 v10, v7, v7 ; 10140F07
v_mad_f32 v10, v10, v10, v25 ; D282000A 0466150A
v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480
v_mul_f32_e32 v8, v10, v8 ; 1010110A
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_mul_f32_e32 v6, v10, v6 ; 100C0D0A
v_mac_f32_e32 v1, s1, v8 ; 3E021001
v_mac_f32_e32 v4, s1, v9 ; 3E081201
v_mac_f32_e32 v5, s1, v6 ; 3E0A0C01
v_add_f32_e64 v3, |v3|, v25 ; D2060103 00023303
v_add_f32_e64 v2, |v2|, v25 ; D2060102 00023302
v_mac_f32_e32 v18, v2, v26 ; 3E243502
v_mac_f32_e32 v19, v2, v27 ; 3E263702
v_mac_f32_e32 v0, v2, v28 ; 3E003902
v_mac_f32_e32 v18, v3, v22 ; 3E242D03
v_mac_f32_e32 v19, v3, v23 ; 3E262F03
v_mac_f32_e32 v0, v3, v24 ; 3E003103
v_add_f32_e32 v2, -0.5, v7 ; 06040EF1
v_max_f32_e32 v2, 0, v2 ; 20040480
v_mul_f32_e32 v3, v2, v18 ; 10062502
v_mul_f32_e32 v6, v2, v19 ; 100C2702
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mov_b32_e32 v2, 0x3d23d70a ; 7E0402FF 3D23D70A
v_mac_f32_e32 v1, v2, v3 ; 3E020702
v_mac_f32_e32 v4, v2, v6 ; 3E080D02
v_mac_f32_e32 v5, v2, v0 ; 3E0A0102
v_cvt_pkrtz_f16_f32_e32 v0, v1, v4 ; 5E000901
v_cvt_pkrtz_f16_f32_e64 v1, v5, s0 ; D25E0001 00000105
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 32
Code Size: 1688 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xyz, IN[0].xyzx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = fmul float %13, %33
%38 = fmul float %14, %33
%39 = fmul float %15, %33
%40 = fmul float %16, %33
%41 = fmul float %17, %34
%42 = fadd float %41, %37
%43 = fmul float %18, %34
%44 = fadd float %43, %38
%45 = fmul float %19, %34
%46 = fadd float %45, %39
%47 = fmul float %20, %34
%48 = fadd float %47, %40
%49 = fmul float %21, %35
%50 = fadd float %49, %42
%51 = fmul float %22, %35
%52 = fadd float %51, %44
%53 = fmul float %23, %35
%54 = fadd float %53, %46
%55 = fmul float %24, %35
%56 = fadd float %55, %48
%57 = fmul float %25, %36
%58 = fadd float %57, %50
%59 = fmul float %26, %36
%60 = fadd float %59, %52
%61 = fmul float %27, %36
%62 = fadd float %61, %54
%63 = fmul float %28, %36
%64 = fadd float %63, %56
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %33, float %34, float %35, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(0) ; BF8C0770
exp 15, 32, 0, 0, 0, v2, v3, v4, v1 ; F800020F 01040302
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v2 ; 10000404
v_mac_f32_e32 v0, s8, v3 ; 3E000608
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v1, s5, v2 ; 10020405
v_mac_f32_e32 v1, s9, v3 ; 3E020609
v_mul_f32_e32 v6, s6, v2 ; 100C0406
v_mul_f32_e32 v2, s7, v2 ; 10040407
v_mac_f32_e32 v6, s10, v3 ; 3E0C060A
v_mac_f32_e32 v2, s11, v3 ; 3E04060B
v_mac_f32_e32 v0, s12, v4 ; 3E00080C
v_mac_f32_e32 v1, s13, v4 ; 3E02080D
v_mac_f32_e32 v6, s14, v4 ; 3E0C080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v1, s17, v5 ; 3E020A11
v_mac_f32_e32 v6, s18, v5 ; 3E0C0A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 12, 0, 1, 0, v0, v1, v6, v2 ; F80008CF 02060100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 188 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL CONST[0..1]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 1.0000, 1.5000, 0.0000, 0.0000}
0: MOV TEMP[0].xyz, CONST[0].xyzx
1: RCP TEMP[1].x, CONST[1].xxxx
2: MUL TEMP[1].x, IN[0].yyyy, TEMP[1].xxxx
3: MOV_SAT TEMP[1].x, TEMP[1].xxxx
4: ADD TEMP[1].x, IMM[0].xxxx, -TEMP[1].xxxx
5: POW TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy
6: MOV TEMP[0].w, TEMP[1].xxxx
7: MOV OUT[0], TEMP[0]
8: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%29 = fdiv float 1.000000e+00, %27
%30 = fmul float %28, %29
%31 = call float @llvm.AMDIL.clamp.(float %30, float 0.000000e+00, float 1.000000e+00)
%32 = fsub float 1.000000e+00, %31
%33 = call float @llvm.pow.f32(float %32, float 1.500000e+00)
%34 = call i32 @llvm.SI.packf16(float %24, float %25)
%35 = bitcast i32 %34 to float
%36 = call i32 @llvm.SI.packf16(float %26, float %33)
%37 = bitcast i32 %36 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %35, float %37, float %35, float %37)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_mov_b32 m0, s9 ; BEFC0309
s_waitcnt lgkmcnt(0) ; BF8C007F
v_rcp_f32_e32 v2, s4 ; 7E045404
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102
v_interp_p1_f32 v0, v0, 1, 0, [m0] ; C8000100
v_interp_p2_f32 v0, [v0], v1, 1, 0, [m0] ; C8010101
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_log_f32_e32 v0, v0 ; 7E004F00
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v1, s5 ; 7E020205
v_mul_legacy_f32_e32 v0, 0x3fc00000, v0 ; 0E0000FF 3FC00000
v_exp_f32_e32 v0, v0 ; 7E004B00
v_cvt_pkrtz_f16_f32_e32 v1, s4, v1 ; 5E020204
v_cvt_pkrtz_f16_f32_e32 v0, s0, v0 ; 5E000000
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 4
Code Size: 104 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL CONST[0..19]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[16], IN[0].xxxx
5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[9].xxxx
18: MOV TEMP[3].y, CONST[10].xxxx
19: MOV TEMP[3].z, CONST[11].xxxx
20: MOV TEMP[4].x, CONST[9].yyyy
21: MOV TEMP[4].y, CONST[10].yyyy
22: MOV TEMP[4].z, CONST[11].yyyy
23: MOV TEMP[5].x, CONST[9].zzzz
24: MOV TEMP[5].y, CONST[10].zzzz
25: MOV TEMP[5].z, CONST[11].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MOV TEMP[4].xyz, TEMP[3].xyzx
33: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].yzzx
34: DP4 TEMP[6].x, CONST[1], TEMP[5]
35: DP4 TEMP[7].x, CONST[2], TEMP[5]
36: MOV TEMP[6].y, TEMP[7].xxxx
37: DP4 TEMP[5].x, CONST[3], TEMP[5]
38: MOV TEMP[6].z, TEMP[5].xxxx
39: MUL TEMP[5].x, TEMP[3].yyyy, TEMP[3].yyyy
40: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[5].xxxx
41: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
42: ADD TEMP[5].xyz, TEMP[0].xyzz, -CONST[0].xyzz
43: MOV TEMP[5].yzw, TEMP[5].yxyz
44: MOV TEMP[5].x, TEMP[1].zzzz
45: MOV TEMP[0].xyz, TEMP[0].xyzx
46: MOV OUT[5], TEMP[0]
47: MOV OUT[1], TEMP[2]
48: MOV OUT[2], TEMP[4]
49: MOV OUT[3], TEMP[3]
50: MOV OUT[0], TEMP[1]
51: MOV OUT[4], TEMP[5]
52: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0
%90 = add i32 %5, %7
%91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = extractelement <4 x float> %91, i32 2
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0
%103 = add i32 %5, %7
%104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103)
%105 = extractelement <4 x float> %104, i32 0
%106 = extractelement <4 x float> %104, i32 1
%107 = fmul float %31, %84
%108 = fmul float %32, %84
%109 = fmul float %33, %84
%110 = fmul float %34, %84
%111 = fmul float %35, %85
%112 = fadd float %111, %107
%113 = fmul float %36, %85
%114 = fadd float %113, %108
%115 = fmul float %37, %85
%116 = fadd float %115, %109
%117 = fmul float %38, %85
%118 = fadd float %117, %110
%119 = fmul float %39, %86
%120 = fadd float %119, %112
%121 = fmul float %40, %86
%122 = fadd float %121, %114
%123 = fmul float %41, %86
%124 = fadd float %123, %116
%125 = fmul float %42, %86
%126 = fadd float %125, %118
%127 = fmul float %43, %87
%128 = fadd float %127, %120
%129 = fmul float %44, %87
%130 = fadd float %129, %122
%131 = fmul float %45, %87
%132 = fadd float %131, %124
%133 = fmul float %64, %84
%134 = fmul float %65, %84
%135 = fmul float %66, %84
%136 = fmul float %67, %84
%137 = fmul float %68, %85
%138 = fadd float %137, %133
%139 = fmul float %69, %85
%140 = fadd float %139, %134
%141 = fmul float %70, %85
%142 = fadd float %141, %135
%143 = fmul float %71, %85
%144 = fadd float %143, %136
%145 = fmul float %72, %86
%146 = fadd float %145, %138
%147 = fmul float %73, %86
%148 = fadd float %147, %140
%149 = fmul float %74, %86
%150 = fadd float %149, %142
%151 = fmul float %75, %86
%152 = fadd float %151, %144
%153 = fmul float %76, %87
%154 = fadd float %153, %146
%155 = fmul float %77, %87
%156 = fadd float %155, %148
%157 = fmul float %78, %87
%158 = fadd float %157, %150
%159 = fmul float %79, %87
%160 = fadd float %159, %152
%161 = fmul float %99, %55
%162 = fadd float %161, %57
%163 = fmul float %100, %56
%164 = fadd float %163, %58
%165 = fcmp oeq float %63, 0.000000e+00
%. = select i1 %165, float %99, float %105
%.32 = select i1 %165, float %100, float %106
%166 = fmul float %., %59
%167 = fadd float %166, %61
%168 = fmul float %.32, %60
%169 = fadd float %168, %62
%170 = fmul float %46, %92
%171 = fmul float %49, %92
%172 = fmul float %52, %92
%173 = fmul float %47, %93
%174 = fadd float %173, %170
%175 = fmul float %50, %93
%176 = fadd float %175, %171
%177 = fmul float %53, %93
%178 = fadd float %177, %172
%179 = fmul float %48, %94
%180 = fadd float %179, %174
%181 = fmul float %51, %94
%182 = fadd float %181, %176
%183 = fmul float %54, %94
%184 = fadd float %183, %178
%185 = fmul float %180, %180
%186 = fmul float %182, %182
%187 = fadd float %186, %185
%188 = fmul float %184, %184
%189 = fadd float %187, %188
%190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189)
%191 = fmul float %180, %190
%192 = fmul float %182, %190
%193 = fmul float %184, %190
%194 = fmul float %191, %192
%195 = fmul float %192, %193
%196 = fmul float %193, %193
%197 = fmul float %193, %191
%198 = fmul float %16, %194
%199 = fmul float %17, %195
%200 = fadd float %198, %199
%201 = fmul float %18, %196
%202 = fadd float %200, %201
%203 = fmul float %19, %197
%204 = fadd float %202, %203
%205 = fmul float %20, %194
%206 = fmul float %21, %195
%207 = fadd float %205, %206
%208 = fmul float %22, %196
%209 = fadd float %207, %208
%210 = fmul float %23, %197
%211 = fadd float %209, %210
%212 = fmul float %24, %194
%213 = fmul float %25, %195
%214 = fadd float %212, %213
%215 = fmul float %26, %196
%216 = fadd float %214, %215
%217 = fmul float %27, %197
%218 = fadd float %216, %217
%219 = fmul float %192, %192
%220 = fmul float %191, %191
%221 = fsub float %220, %219
%222 = fmul float %28, %221
%223 = fadd float %222, %204
%224 = fmul float %29, %221
%225 = fadd float %224, %211
%226 = fmul float %30, %221
%227 = fadd float %226, %218
%228 = fsub float %128, %13
%229 = fsub float %130, %14
%230 = fsub float %132, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %162, float %164, float %167, float %169)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %192, float %193, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %223, float %225, float %227, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %158, float %228, float %229, float %230)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %128, float %130, float %132, float %126)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %154, float %156, float %158, float %160)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s18, s[20:23], 0x20 ; C2091520
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s19, s[20:23], 0x21 ; C2099521
s_buffer_load_dword s24, s[20:23], 0x22 ; C20C1522
s_buffer_load_dword s25, s[20:23], 0x24 ; C20C9524
s_buffer_load_dword s26, s[20:23], 0x25 ; C20D1525
s_buffer_load_dword s27, s[20:23], 0x26 ; C20D9526
s_buffer_load_dword s28, s[20:23], 0x28 ; C20E1528
s_buffer_load_dword s29, s[20:23], 0x29 ; C20E9529
s_buffer_load_dword s30, s[20:23], 0x2a ; C20F152A
s_buffer_load_dword s31, s[20:23], 0x2c ; C20F952C
s_buffer_load_dword s32, s[20:23], 0x2d ; C210152D
s_buffer_load_dword s33, s[20:23], 0x2e ; C210952E
s_buffer_load_dword s34, s[20:23], 0x34 ; C2111534
s_buffer_load_dword s35, s[20:23], 0x35 ; C2119535
s_buffer_load_dword s5, s[20:23], 0x36 ; C2029536
s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500
s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501
s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502
s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504
s_buffer_load_dword s14, s[20:23], 0x5 ; C2071505
s_buffer_load_dword s4, s[20:23], 0x6 ; C2021506
s_buffer_load_dword s3, s[20:23], 0x7 ; C2019507
s_buffer_load_dword s12, s[20:23], 0x8 ; C2061508
s_buffer_load_dword s16, s[20:23], 0x9 ; C2081509
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s5 ; 7E000205
s_buffer_load_dword s7, s[20:23], 0xa ; C203950A
s_buffer_load_dword s5, s[20:23], 0xb ; C202950B
s_buffer_load_dword s15, s[20:23], 0xc ; C207950C
s_buffer_load_dword s17, s[20:23], 0xd ; C208950D
s_buffer_load_dword s13, s[20:23], 0xe ; C206950E
s_buffer_load_dword s8, s[20:23], 0xf ; C204150F
s_buffer_load_dword s36, s[20:23], 0x3c ; C212153C
s_buffer_load_dword s37, s[20:23], 0x40 ; C2129540
s_buffer_load_dword s38, s[20:23], 0x41 ; C2131541
s_buffer_load_dword s39, s[20:23], 0x42 ; C2139542
s_buffer_load_dword s40, s[20:23], 0x43 ; C2141543
s_buffer_load_dword s9, s[20:23], 0x10 ; C2049510
s_buffer_load_dword s10, s[20:23], 0x11 ; C2051511
s_buffer_load_dword s11, s[20:23], 0x12 ; C2059512
s_buffer_load_dword s41, s[20:23], 0x14 ; C2149514
s_buffer_load_dword s42, s[20:23], 0x15 ; C2151515
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s36 ; D004006A 00004880
s_buffer_load_dword s36, s[20:23], 0x16 ; C2121516
s_buffer_load_dword s43, s[20:23], 0x17 ; C2159517
s_buffer_load_dword s44, s[20:23], 0x18 ; C2161518
s_buffer_load_dword s45, s[20:23], 0x19 ; C2169519
s_buffer_load_dword s46, s[20:23], 0x1a ; C217151A
s_buffer_load_dword s47, s[20:23], 0x37 ; C2179537
s_buffer_load_dword s48, s[20:23], 0x38 ; C2181538
s_buffer_load_dword s49, s[20:23], 0x39 ; C2189539
s_buffer_load_dword s50, s[20:23], 0x3a ; C219153A
s_buffer_load_dword s51, s[20:23], 0x3b ; C219953B
s_buffer_load_dword s52, s[20:23], 0x1b ; C21A151B
s_buffer_load_dword s53, s[20:23], 0x1c ; C21A951C
s_buffer_load_dword s54, s[20:23], 0x1d ; C21B151D
s_buffer_load_dword s55, s[20:23], 0x1e ; C21B951E
s_buffer_load_dword s56, s[20:23], 0x1f ; C21C151F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s47 ; 7E1A022F
s_buffer_load_dword s47, s[20:23], 0x44 ; C2179544
s_buffer_load_dword s57, s[20:23], 0x45 ; C21C9545
s_buffer_load_dword s58, s[20:23], 0x46 ; C21D1546
v_mov_b32_e32 v14, s50 ; 7E1C0232
s_buffer_load_dword s50, s[20:23], 0x47 ; C2191547
v_mov_b32_e32 v15, s51 ; 7E1E0233
s_buffer_load_dword s51, s[20:23], 0x48 ; C2199548
s_buffer_load_dword s59, s[20:23], 0x49 ; C21D9549
s_buffer_load_dword s60, s[20:23], 0x4a ; C21E154A
s_buffer_load_dword s61, s[20:23], 0x4b ; C21E954B
s_buffer_load_dword s62, s[20:23], 0x4c ; C21F154C
s_buffer_load_dword s63, s[20:23], 0x4d ; C21F954D
s_buffer_load_dword s64, s[20:23], 0x4e ; C220154E
s_buffer_load_dword s20, s[20:23], 0x4f ; C20A154F
v_mul_f32_e32 v16, s41, v2 ; 10200429
v_mac_f32_e32 v0, s34, v9 ; 3E001222
v_mul_f32_e32 v17, s42, v2 ; 1022042A
v_mul_f32_e32 v18, s36, v2 ; 10240424
v_mul_f32_e32 v19, s43, v2 ; 1026042B
v_mac_f32_e32 v13, s35, v10 ; 3E1A1423
v_mul_f32_e32 v20, s25, v6 ; 10280C19
v_mul_f32_e32 v21, s28, v6 ; 102A0C1C
v_mul_f32_e32 v6, s31, v6 ; 100C0C1F
v_mac_f32_e32 v16, s44, v3 ; 3E20062C
v_mac_f32_e32 v17, s45, v3 ; 3E22062D
v_mac_f32_e32 v18, s46, v3 ; 3E24062E
v_mac_f32_e32 v20, s26, v7 ; 3E280E1A
v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mac_f32_e32 v6, s32, v7 ; 3E0C0E20
v_mac_f32_e32 v20, s27, v8 ; 3E28101B
v_mac_f32_e32 v21, s30, v8 ; 3E2A101E
v_mac_f32_e32 v6, s33, v8 ; 3E0C1021
v_mac_f32_e32 v19, s52, v3 ; 3E260634
v_mul_f32_e32 v7, s37, v2 ; 100E0425
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v7, s47, v3 ; 3E0E062F
v_mul_f32_e32 v8, s38, v2 ; 10100426
v_mac_f32_e32 v8, s57, v3 ; 3E100639
v_mul_f32_e32 v11, s39, v2 ; 10160427
v_mac_f32_e32 v11, s58, v3 ; 3E16063A
v_mul_f32_e32 v2, s40, v2 ; 10040428
v_mac_f32_e32 v2, s50, v3 ; 3E040632
v_mac_f32_e32 v16, s53, v4 ; 3E200835
v_mac_f32_e32 v17, s54, v4 ; 3E220836
v_mac_f32_e32 v18, s55, v4 ; 3E240837
v_mac_f32_e32 v19, s56, v4 ; 3E260838
v_mac_f32_e32 v7, s51, v4 ; 3E0E0833
v_mac_f32_e32 v8, s59, v4 ; 3E10083B
v_mac_f32_e32 v11, s60, v4 ; 3E16083C
v_mac_f32_e32 v2, s61, v4 ; 3E04083D
v_mac_f32_e32 v16, s18, v5 ; 3E200A12
v_mac_f32_e32 v17, s19, v5 ; 3E220A13
v_mac_f32_e32 v18, s24, v5 ; 3E240A18
v_mac_f32_e32 v7, s62, v5 ; 3E0E0A3E
v_mac_f32_e32 v8, s63, v5 ; 3E100A3F
v_mac_f32_e32 v11, s64, v5 ; 3E160A40
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mul_f32_e32 v3, v20, v20 ; 10062914
v_mac_f32_e32 v3, v21, v21 ; 3E062B15
v_mac_f32_e32 v3, v6, v6 ; 3E060D06
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mac_f32_e32 v14, s48, v9 ; 3E1C1230
v_mac_f32_e32 v15, s49, v10 ; 3E1E1431
exp 15, 32, 0, 0, 0, v0, v13, v14, v15 ; F800020F 0F0E0D00
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v3, v20 ; 10002903
v_mul_f32_e32 v4, v3, v21 ; 10082B03
v_mul_f32_e32 v3, v3, v6 ; 10060D03
v_mul_f32_e32 v5, v3, v4 ; 100A0903
v_mul_f32_e32 v6, s14, v5 ; 100C0A0E
v_mul_f32_e32 v9, s16, v5 ; 10120A10
v_mul_f32_e32 v5, s17, v5 ; 100A0A11
v_mul_f32_e32 v10, v4, v0 ; 10140104
v_mac_f32_e32 v6, s6, v10 ; 3E0C1406
v_mac_f32_e32 v9, s12, v10 ; 3E12140C
v_mac_f32_e32 v5, s15, v10 ; 3E0A140F
v_mul_f32_e32 v10, v3, v3 ; 10140703
v_mac_f32_e32 v6, s4, v10 ; 3E0C1404
v_mac_f32_e32 v9, s7, v10 ; 3E121407
v_mac_f32_e32 v5, s13, v10 ; 3E0A140D
v_mul_f32_e32 v10, v0, v3 ; 10140700
v_mac_f32_e32 v6, s3, v10 ; 3E0C1403
v_mac_f32_e32 v9, s5, v10 ; 3E121405
v_mac_f32_e32 v5, s8, v10 ; 3E0A1408
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mad_f32 v10, v0, v0, -v10 ; D282000A 842A0100
v_mac_f32_e32 v6, s9, v10 ; 3E0C1409
v_mac_f32_e32 v9, s10, v10 ; 3E12140A
v_mac_f32_e32 v5, s11, v10 ; 3E0A140B
v_subrev_f32_e32 v10, s0, v16 ; 0A142000
v_subrev_f32_e32 v12, s1, v17 ; 0A182201
v_subrev_f32_e32 v13, s2, v18 ; 0A1A2402
exp 15, 33, 0, 0, 0, v0, v4, v3, v1 ; F800021F 01030400
exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906
exp 15, 35, 0, 0, 0, v11, v10, v12, v13 ; F800023F 0D0C0A0B
exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110
exp 15, 12, 0, 1, 0, v7, v8, v11, v2 ; F80008CF 020B0807
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 24
Code Size: 748 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[21..22]
DCL CONST[24..25]
DCL TEMP[0..18], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000}
IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000}
IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000}
0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[3].yzww, IN[3].yzww
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[3].yzww, TEMP[1].xxxx
6: MOV TEMP[2].xy, IN[0].xyyy
7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz
10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww
11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
13: MOV TEMP[5].xy, IN[0].xyyy
14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D
15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx
16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx
17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
19: MOV TEMP[7].xyz, IMM[0].yyyy
20: MOV TEMP[8].w, IMM[0].xxxx
21: MOV TEMP[8].xyz, TEMP[0].xyzx
22: DP4 TEMP[9].x, CONST[1], TEMP[8]
23: DP4 TEMP[10].x, CONST[2], TEMP[8]
24: MOV TEMP[9].y, TEMP[10].xxxx
25: DP4 TEMP[8].x, CONST[3], TEMP[8]
26: MOV TEMP[9].z, TEMP[8].xxxx
27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz
28: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
29: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz
30: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz
31: MUL TEMP[9].xyz, IMM[0].zzzz, TEMP[9].xyzz
32: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz
33: MOV TEMP[10].xyz, TEMP[9].xyzx
34: FSLT TEMP[11].x, IMM[0].yyyy, CONST[10].wwww
35: UIF TEMP[11].xxxx :0
36: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz
37: RSQ TEMP[11].x, TEMP[11].xxxx
38: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx
39: MOV TEMP[12].xyz, -IN[4].xyzx
40: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz
41: RCP TEMP[14].x, TEMP[11].xxxx
42: RCP TEMP[14].y, TEMP[11].yyyy
43: RCP TEMP[14].z, TEMP[11].zzzz
44: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
45: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz
46: RCP TEMP[14].x, TEMP[11].xxxx
47: RCP TEMP[14].y, TEMP[11].yyyy
48: RCP TEMP[14].z, TEMP[11].zzzz
49: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
50: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[11].xyzz
51: UIF TEMP[14].xxxx :0
52: MOV TEMP[15].x, TEMP[13].xxxx
53: ELSE :0
54: MOV TEMP[15].x, TEMP[12].xxxx
55: ENDIF
56: UIF TEMP[14].yyyy :0
57: MOV TEMP[16].x, TEMP[13].yyyy
58: ELSE :0
59: MOV TEMP[16].x, TEMP[12].yyyy
60: ENDIF
61: UIF TEMP[14].zzzz :0
62: MOV TEMP[13].x, TEMP[13].zzzz
63: ELSE :0
64: MOV TEMP[13].x, TEMP[12].zzzz
65: ENDIF
66: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz
67: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww
68: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
69: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
70: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz
71: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz
72: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
73: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz
74: ENDIF
75: ADD TEMP[11].x, IMM[0].xxxx, -CONST[22].xxxx
76: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx
77: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy
78: MOV TEMP[10].xyz, TEMP[10].xyzz
79: MOV TEMP[10].w, TEMP[11].xxxx
80: TXL TEMP[10], TEMP[10], SAMP[0], CUBE
81: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy
82: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx
83: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz
84: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].zzzz
85: UIF TEMP[11].xxxx :0
86: MOV TEMP[11].xyz, TEMP[9].xyzx
87: FSLT TEMP[12].x, IMM[0].yyyy, CONST[14].wwww
88: UIF TEMP[12].xxxx :0
89: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz
90: RSQ TEMP[12].x, TEMP[12].xxxx
91: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx
92: MOV TEMP[12].xyz, -IN[4].xyzx
93: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz
94: RCP TEMP[14].x, TEMP[9].xxxx
95: RCP TEMP[14].y, TEMP[9].yyyy
96: RCP TEMP[14].z, TEMP[9].zzzz
97: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
98: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz
99: RCP TEMP[14].x, TEMP[9].xxxx
100: RCP TEMP[14].y, TEMP[9].yyyy
101: RCP TEMP[14].z, TEMP[9].zzzz
102: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
103: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[9].xyzz
104: UIF TEMP[14].xxxx :0
105: MOV TEMP[15].x, TEMP[13].xxxx
106: ELSE :0
107: MOV TEMP[15].x, TEMP[12].xxxx
108: ENDIF
109: UIF TEMP[14].yyyy :0
110: MOV TEMP[16].x, TEMP[13].yyyy
111: ELSE :0
112: MOV TEMP[16].x, TEMP[12].yyyy
113: ENDIF
114: UIF TEMP[14].zzzz :0
115: MOV TEMP[13].x, TEMP[13].zzzz
116: ELSE :0
117: MOV TEMP[13].x, TEMP[12].zzzz
118: ENDIF
119: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz
120: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww
121: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
122: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
123: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz
124: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz
125: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
126: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz
127: ENDIF
128: ADD TEMP[9].x, IMM[0].xxxx, -CONST[22].xxxx
129: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx
130: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy
131: MOV TEMP[11].xyz, TEMP[11].xyzz
132: MOV TEMP[11].w, TEMP[9].xxxx
133: TXL TEMP[9], TEMP[11], SAMP[1], CUBE
134: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy
135: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx
136: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz
137: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz
138: ELSE :0
139: MOV TEMP[7].xyz, TEMP[10].xyzx
140: ENDIF
141: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
142: MOV TEMP[1].xyz, -TEMP[1].xyzx
143: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx
144: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz
145: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz
146: RSQ TEMP[10].x, TEMP[10].xxxx
147: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx
148: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
149: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
150: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz
151: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx
152: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx
153: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww
154: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx
155: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx, IMM[2].yyyy
156: LG2 TEMP[12].x, TEMP[12].xxxx
157: RCP TEMP[12].x, TEMP[12].xxxx
158: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx
159: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx
160: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx
161: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx
162: MUL TEMP[15].x, IMM[0].zzzz, TEMP[10].xxxx
163: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
164: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[0].wwww
165: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx
166: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
167: MOV TEMP[16].xy, IN[0].xyyy
168: TEX TEMP[16].xyz, TEMP[16], SAMP[4], 2D
169: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
170: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx
171: MOV_SAT TEMP[4].x, TEMP[4].xxxx
172: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx
173: MUL TEMP[18].x, TEMP[15].xxxx, TEMP[15].xxxx
174: MUL TEMP[15].x, TEMP[18].xxxx, TEMP[15].xxxx
175: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx
176: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
177: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx
178: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx
179: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].zzzz
180: RCP TEMP[1].x, TEMP[1].xxxx
181: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz
182: MAX TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx
183: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
184: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx
185: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy
186: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
187: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx
188: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
189: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
190: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
191: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz
192: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[3].xyzz
193: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx
194: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx
195: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx
196: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx
197: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz
198: ADD TEMP[9].x, TEMP[5].xxxx, IMM[2].wwww
199: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx
200: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx
201: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx
202: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
203: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx
204: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww
205: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx
206: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
207: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx
208: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
209: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx
210: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx
211: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
212: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
213: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
214: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
215: MAD TEMP[1].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
216: MAD TEMP[0].xyz, TEMP[16].xyzz, CONST[25].xyzz, TEMP[1].xyzz
217: MAD TEMP[1].x, IN[3].xxxx, CONST[5].zzzz, CONST[5].wwww
218: MOV_SAT TEMP[1].x, TEMP[1].xxxx
219: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
220: MOV TEMP[0].xyz, TEMP[0].xyzx
221: MOV TEMP[0].w, IMM[0].xxxx
222: MOV OUT[0], TEMP[0]
223: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 404)
%84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 408)
%85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0
%87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)*
%91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0
%92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)*
%94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0
%95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)*
%97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0
%98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)*
%100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0
%101 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%102 = bitcast <8 x i32> addrspace(2)* %101 to <32 x i8> addrspace(2)*
%103 = load <32 x i8>, <32 x i8> addrspace(2)* %102, align 32, !tbaa !0
%104 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%105 = bitcast <4 x i32> addrspace(2)* %104 to <16 x i8> addrspace(2)*
%106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0
%107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%108 = bitcast <8 x i32> addrspace(2)* %107 to <32 x i8> addrspace(2)*
%109 = load <32 x i8>, <32 x i8> addrspace(2)* %108, align 32, !tbaa !0
%110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%111 = bitcast <4 x i32> addrspace(2)* %110 to <16 x i8> addrspace(2)*
%112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0
%113 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%128 = fmul float %115, %115
%129 = fmul float %116, %116
%130 = fadd float %129, %128
%131 = fmul float %117, %117
%132 = fadd float %130, %131
%133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132)
%134 = fmul float %115, %133
%135 = fmul float %116, %133
%136 = fmul float %117, %133
%137 = fmul float %122, %122
%138 = fmul float %123, %123
%139 = fadd float %138, %137
%140 = fmul float %124, %124
%141 = fadd float %139, %140
%142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141)
%143 = fmul float %122, %142
%144 = fmul float %123, %142
%145 = fmul float %124, %142
%146 = bitcast float %113 to i32
%147 = bitcast float %114 to i32
%148 = insertelement <2 x i32> undef, i32 %146, i32 0
%149 = insertelement <2 x i32> %148, i32 %147, i32 1
%150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %97, <16 x i8> %100, i32 2)
%151 = extractelement <4 x float> %150, i32 0
%152 = extractelement <4 x float> %150, i32 1
%153 = extractelement <4 x float> %150, i32 2
%154 = fmul float %76, %151
%155 = fmul float %77, %152
%156 = fmul float %78, %153
%157 = call float @llvm.AMDGPU.lrp(float %79, float %154, float %66)
%158 = call float @llvm.AMDGPU.lrp(float %79, float %155, float %67)
%159 = call float @llvm.AMDGPU.lrp(float %79, float %156, float %68)
%160 = fmul float %79, %69
%161 = fsub float %69, %160
%162 = fmul float %154, %161
%163 = fmul float %155, %161
%164 = fmul float %156, %161
%165 = bitcast float %113 to i32
%166 = bitcast float %114 to i32
%167 = insertelement <2 x i32> undef, i32 %165, i32 0
%168 = insertelement <2 x i32> %167, i32 %166, i32 1
%169 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %168, <32 x i8> %103, <16 x i8> %106, i32 2)
%170 = extractelement <4 x float> %169, i32 1
%171 = fsub float 1.000000e+00, %81
%172 = fmul float %170, %81
%173 = fadd float %172, %171
%174 = fmul float %134, %24
%175 = fmul float %135, %25
%176 = fadd float %175, %174
%177 = fmul float %136, %26
%178 = fadd float %176, %177
%179 = call float @llvm.maxnum.f32(float %178, float 0.000000e+00)
%180 = fmul float %27, %134
%181 = fmul float %28, %135
%182 = fadd float %180, %181
%183 = fmul float %29, %136
%184 = fadd float %182, %183
%185 = fadd float %184, %30
%186 = fmul float %31, %134
%187 = fmul float %32, %135
%188 = fadd float %186, %187
%189 = fmul float %33, %136
%190 = fadd float %188, %189
%191 = fadd float %190, %34
%192 = fmul float %35, %134
%193 = fmul float %36, %135
%194 = fadd float %192, %193
%195 = fmul float %37, %136
%196 = fadd float %194, %195
%197 = fadd float %196, %38
%198 = fadd float %118, %185
%199 = fadd float %119, %191
%200 = fadd float %120, %197
%201 = fmul float %198, %173
%202 = fmul float %199, %173
%203 = fmul float %200, %173
%204 = fmul float %134, %143
%205 = fmul float %135, %144
%206 = fadd float %205, %204
%207 = fmul float %136, %145
%208 = fadd float %206, %207
%209 = fmul float %208, %134
%210 = fmul float %208, %135
%211 = fmul float %208, %136
%212 = fmul float %209, 2.000000e+00
%213 = fmul float %210, 2.000000e+00
%214 = fmul float %211, 2.000000e+00
%215 = fsub float %143, %212
%216 = fsub float %144, %213
%217 = fsub float %145, %214
%218 = fcmp ogt float %51, 0.000000e+00
br i1 %218, label %IF, label %ENDIF
IF: ; preds = %main_body
%219 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%220 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%221 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%222 = fmul float %215, %215
%223 = fmul float %216, %216
%224 = fadd float %223, %222
%225 = fmul float %217, %217
%226 = fadd float %224, %225
%227 = call float @llvm.AMDGPU.rsq.clamped.f32(float %226)
%228 = fmul float %215, %227
%229 = fmul float %216, %227
%230 = fmul float %217, %227
%231 = fsub float %44, %125
%232 = fsub float %45, %126
%233 = fsub float %46, %127
%234 = fdiv float 1.000000e+00, %228
%235 = fdiv float 1.000000e+00, %229
%236 = fdiv float 1.000000e+00, %230
%237 = fmul float %231, %234
%238 = fmul float %232, %235
%239 = fmul float %233, %236
%240 = fsub float %47, %125
%241 = fsub float %48, %126
%242 = fsub float %49, %127
%243 = fdiv float 1.000000e+00, %228
%244 = fdiv float 1.000000e+00, %229
%245 = fdiv float 1.000000e+00, %230
%246 = fmul float %240, %243
%247 = fmul float %241, %244
%248 = fmul float %242, %245
%249 = fcmp ogt float %228, 0.000000e+00
%250 = fcmp ogt float %229, 0.000000e+00
%251 = fcmp ogt float %230, 0.000000e+00
%. = select i1 %249, float %237, float %246
%temp64.0 = select i1 %250, float %238, float %247
%.100 = select i1 %251, float %239, float %248
%252 = fadd float %44, %47
%253 = fadd float %45, %48
%254 = fadd float %46, %49
%255 = fmul float %252, 5.000000e-01
%256 = fmul float %253, 5.000000e-01
%257 = fmul float %254, 5.000000e-01
%258 = call float @llvm.minnum.f32(float %., float %temp64.0)
%259 = call float @llvm.minnum.f32(float %258, float %.100)
%260 = fsub float %255, %221
%261 = fsub float %256, %220
%262 = fsub float %257, %219
%263 = fadd float %260, %125
%264 = fadd float %261, %126
%265 = fadd float %262, %127
%266 = fmul float %228, %259
%267 = fadd float %266, %263
%268 = fmul float %229, %259
%269 = fadd float %268, %264
%270 = fmul float %230, %259
%271 = fadd float %270, %265
%272 = fsub float %267, %255
%273 = fsub float %269, %256
%274 = fsub float %271, %257
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp40.0 = phi float [ %272, %IF ], [ %215, %main_body ]
%temp41.0 = phi float [ %273, %IF ], [ %216, %main_body ]
%temp42.0 = phi float [ %274, %IF ], [ %217, %main_body ]
%275 = fsub float 1.000000e+00, %80
%276 = call float @llvm.pow.f32(float %275, float 7.500000e-01)
%277 = fmul float %276, 7.000000e+00
%278 = insertelement <4 x float> undef, float %temp40.0, i32 0
%279 = insertelement <4 x float> %278, float %temp41.0, i32 1
%280 = insertelement <4 x float> %279, float %temp42.0, i32 2
%281 = insertelement <4 x float> %280, float %277, i32 3
%282 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %281)
%283 = extractelement <4 x float> %282, i32 0
%284 = extractelement <4 x float> %282, i32 1
%285 = extractelement <4 x float> %282, i32 2
%286 = extractelement <4 x float> %282, i32 3
%287 = call float @llvm.fabs.f32(float %285)
%288 = fdiv float 1.000000e+00, %287
%289 = fmul float %283, %288
%290 = fadd float %289, 1.500000e+00
%291 = fmul float %284, %288
%292 = fadd float %291, 1.500000e+00
%293 = bitcast float %292 to i32
%294 = bitcast float %290 to i32
%295 = bitcast float %286 to i32
%296 = bitcast float %277 to i32
%297 = insertelement <4 x i32> undef, i32 %293, i32 0
%298 = insertelement <4 x i32> %297, i32 %294, i32 1
%299 = insertelement <4 x i32> %298, i32 %295, i32 2
%300 = insertelement <4 x i32> %299, i32 %296, i32 3
%301 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %300, <32 x i8> %86, <16 x i8> %88, i32 4)
%302 = extractelement <4 x float> %301, i32 0
%303 = extractelement <4 x float> %301, i32 1
%304 = extractelement <4 x float> %301, i32 2
%305 = extractelement <4 x float> %301, i32 3
%306 = call float @llvm.pow.f32(float %305, float %53)
%307 = fmul float %52, %306
%308 = fmul float %307, %302
%309 = fmul float %307, %303
%310 = fmul float %307, %304
%311 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %311, label %IF86, label %ENDIF85
IF86: ; preds = %ENDIF
%312 = fcmp ogt float %63, 0.000000e+00
br i1 %312, label %IF89, label %ENDIF88
ENDIF85: ; preds = %ENDIF, %ENDIF88
%temp28.0 = phi float [ %554, %ENDIF88 ], [ %308, %ENDIF ]
%temp29.0 = phi float [ %555, %ENDIF88 ], [ %309, %ENDIF ]
%temp30.0 = phi float [ %556, %ENDIF88 ], [ %310, %ENDIF ]
%313 = fmul float %temp28.0, %173
%314 = fmul float %temp29.0, %173
%315 = fmul float %temp30.0, %173
%316 = fsub float 1.000000e+00, %80
%317 = fsub float %24, %143
%318 = fsub float %25, %144
%319 = fsub float %26, %145
%320 = fmul float %317, %317
%321 = fmul float %318, %318
%322 = fadd float %321, %320
%323 = fmul float %319, %319
%324 = fadd float %322, %323
%325 = call float @llvm.AMDGPU.rsq.clamped.f32(float %324)
%326 = fmul float %317, %325
%327 = fmul float %318, %325
%328 = fmul float %319, %325
%329 = fmul float %143, %134
%330 = fsub float -0.000000e+00, %329
%331 = fmul float %144, %135
%332 = fsub float %330, %331
%333 = fmul float %145, %136
%334 = fsub float %332, %333
%335 = call float @llvm.maxnum.f32(float %334, float 0.000000e+00)
%336 = fmul float %24, %326
%337 = fmul float %25, %327
%338 = fadd float %337, %336
%339 = fmul float %26, %328
%340 = fadd float %338, %339
%341 = call float @llvm.maxnum.f32(float %340, float 0.000000e+00)
%342 = fmul float %316, %316
%343 = fmul float %342, %75
%344 = fsub float 1.000000e+00, %316
%345 = fmul float %344, 0x3FEEF9DB20000000
%346 = fadd float %345, 0x3F9EB851E0000000
%347 = call float @llvm.log2.f32(float %346)
%348 = fdiv float 1.000000e+00, %347
%349 = fmul float %348, 1.000000e+01
%350 = fmul float %349, %349
%351 = fsub float 1.000000e+00, %179
%352 = fsub float 1.000000e+00, %335
%353 = fmul float %341, 2.000000e+00
%354 = fmul float %341, %316
%355 = fmul float %353, %354
%356 = fadd float %355, 5.000000e-01
%357 = fsub float 1.000000e+00, %341
%358 = fsub float 1.000000e+00, %335
%359 = bitcast float %113 to i32
%360 = bitcast float %114 to i32
%361 = insertelement <2 x i32> undef, i32 %359, i32 0
%362 = insertelement <2 x i32> %361, i32 %360, i32 1
%363 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %362, <32 x i8> %109, <16 x i8> %112, i32 2)
%364 = extractelement <4 x float> %363, i32 0
%365 = extractelement <4 x float> %363, i32 1
%366 = extractelement <4 x float> %363, i32 2
%367 = fsub float 1.000000e+00, %161
%368 = fadd float %80, %367
%369 = call float @llvm.AMDIL.clamp.(float %368, float 0.000000e+00, float 1.000000e+00)
%370 = fmul float %358, %358
%371 = fmul float %358, %358
%372 = fmul float %371, %358
%373 = fmul float %370, %372
%374 = call float @llvm.AMDGPU.lrp(float %373, float %369, float %157)
%375 = call float @llvm.AMDGPU.lrp(float %373, float %369, float %158)
%376 = call float @llvm.AMDGPU.lrp(float %373, float %369, float %159)
%377 = call float @llvm.AMDGPU.lrp(float %179, float 1.000000e+00, float %343)
%378 = call float @llvm.AMDGPU.lrp(float %335, float 1.000000e+00, float %343)
%379 = fmul float %377, %378
%380 = fadd float %379, 0x3F1A36E2E0000000
%381 = fdiv float 1.000000e+00, %380
%382 = fmul float %134, %326
%383 = fmul float %135, %327
%384 = fadd float %383, %382
%385 = fmul float %136, %328
%386 = fadd float %384, %385
%387 = call float @llvm.maxnum.f32(float %386, float 0.000000e+00)
%388 = call float @llvm.pow.f32(float %387, float %350)
%389 = fadd float %350, 1.000000e+00
%390 = fmul float %389, %74
%391 = fmul float %388, %390
%392 = fmul float %381, %391
%393 = fmul float %392, %179
%394 = fmul float %393, %73
%395 = call float @llvm.maxnum.f32(float %394, float 0.000000e+00)
%396 = fmul float %395, %70
%397 = fmul float %395, %71
%398 = fmul float %395, %72
%399 = fsub float 1.000000e+00, %157
%400 = fsub float 1.000000e+00, %158
%401 = fsub float 1.000000e+00, %159
%402 = fmul float %357, %357
%403 = fmul float %357, %357
%404 = fmul float %403, %357
%405 = fmul float %402, %404
%406 = fmul float %399, %405
%407 = fadd float %406, %157
%408 = fmul float %400, %405
%409 = fadd float %408, %158
%410 = fmul float %401, %405
%411 = fadd float %410, %159
%412 = fadd float %356, -1.000000e+00
%413 = fmul float %351, %351
%414 = fmul float %351, %351
%415 = fmul float %414, %351
%416 = fmul float %413, %415
%417 = fmul float %412, %416
%418 = fadd float %417, 1.000000e+00
%419 = fadd float %356, -1.000000e+00
%420 = fmul float %352, %352
%421 = fmul float %352, %352
%422 = fmul float %421, %352
%423 = fmul float %420, %422
%424 = fmul float %419, %423
%425 = fadd float %424, 1.000000e+00
%426 = fmul float %418, %425
%427 = fmul float %426, %179
%428 = fmul float %70, %427
%429 = fadd float %428, %201
%430 = fmul float %71, %427
%431 = fadd float %430, %202
%432 = fmul float %72, %427
%433 = fadd float %432, %203
%434 = fmul float %162, %429
%435 = fmul float %163, %431
%436 = fmul float %164, %433
%437 = fmul float %396, %407
%438 = fadd float %437, %434
%439 = fmul float %397, %409
%440 = fadd float %439, %435
%441 = fmul float %398, %411
%442 = fadd float %441, %436
%443 = fmul float %313, %374
%444 = fadd float %443, %438
%445 = fmul float %314, %375
%446 = fadd float %445, %440
%447 = fmul float %315, %376
%448 = fadd float %447, %442
%449 = fmul float %364, %82
%450 = fadd float %449, %444
%451 = fmul float %365, %83
%452 = fadd float %451, %446
%453 = fmul float %366, %84
%454 = fadd float %453, %448
%455 = fmul float %121, %42
%456 = fadd float %455, %43
%457 = call float @llvm.AMDIL.clamp.(float %456, float 0.000000e+00, float 1.000000e+00)
%458 = call float @llvm.AMDGPU.lrp(float %457, float %450, float %39)
%459 = call float @llvm.AMDGPU.lrp(float %457, float %452, float %40)
%460 = call float @llvm.AMDGPU.lrp(float %457, float %454, float %41)
%461 = call i32 @llvm.SI.packf16(float %458, float %459)
%462 = bitcast i32 %461 to float
%463 = call i32 @llvm.SI.packf16(float %460, float 1.000000e+00)
%464 = bitcast i32 %463 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %462, float %464, float %462, float %464)
ret void
IF89: ; preds = %IF86
%465 = fmul float %215, %215
%466 = fmul float %216, %216
%467 = fadd float %466, %465
%468 = fmul float %217, %217
%469 = fadd float %467, %468
%470 = call float @llvm.AMDGPU.rsq.clamped.f32(float %469)
%471 = fmul float %215, %470
%472 = fmul float %216, %470
%473 = fmul float %217, %470
%474 = fsub float %54, %125
%475 = fsub float %55, %126
%476 = fsub float %56, %127
%477 = fdiv float 1.000000e+00, %471
%478 = fdiv float 1.000000e+00, %472
%479 = fdiv float 1.000000e+00, %473
%480 = fmul float %474, %477
%481 = fmul float %475, %478
%482 = fmul float %476, %479
%483 = fsub float %57, %125
%484 = fsub float %58, %126
%485 = fsub float %59, %127
%486 = fdiv float 1.000000e+00, %471
%487 = fdiv float 1.000000e+00, %472
%488 = fdiv float 1.000000e+00, %473
%489 = fmul float %483, %486
%490 = fmul float %484, %487
%491 = fmul float %485, %488
%492 = fcmp ogt float %471, 0.000000e+00
%493 = fcmp ogt float %472, 0.000000e+00
%494 = fcmp ogt float %473, 0.000000e+00
%.101 = select i1 %492, float %480, float %489
%temp64.1 = select i1 %493, float %481, float %490
%.102 = select i1 %494, float %482, float %491
%495 = fadd float %54, %57
%496 = fadd float %55, %58
%497 = fadd float %56, %59
%498 = fmul float %495, 5.000000e-01
%499 = fmul float %496, 5.000000e-01
%500 = fmul float %497, 5.000000e-01
%501 = call float @llvm.minnum.f32(float %.101, float %temp64.1)
%502 = call float @llvm.minnum.f32(float %501, float %.102)
%503 = fsub float %498, %60
%504 = fsub float %499, %61
%505 = fsub float %500, %62
%506 = fadd float %503, %125
%507 = fadd float %504, %126
%508 = fadd float %505, %127
%509 = fmul float %471, %502
%510 = fadd float %509, %506
%511 = fmul float %472, %502
%512 = fadd float %511, %507
%513 = fmul float %473, %502
%514 = fadd float %513, %508
%515 = fsub float %510, %498
%516 = fsub float %512, %499
%517 = fsub float %514, %500
br label %ENDIF88
ENDIF88: ; preds = %IF86, %IF89
%temp44.0 = phi float [ %515, %IF89 ], [ %215, %IF86 ]
%temp45.0 = phi float [ %516, %IF89 ], [ %216, %IF86 ]
%temp46.0 = phi float [ %517, %IF89 ], [ %217, %IF86 ]
%518 = fsub float 1.000000e+00, %80
%519 = call float @llvm.pow.f32(float %518, float 7.500000e-01)
%520 = fmul float %519, 7.000000e+00
%521 = insertelement <4 x float> undef, float %temp44.0, i32 0
%522 = insertelement <4 x float> %521, float %temp45.0, i32 1
%523 = insertelement <4 x float> %522, float %temp46.0, i32 2
%524 = insertelement <4 x float> %523, float %520, i32 3
%525 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %524)
%526 = extractelement <4 x float> %525, i32 0
%527 = extractelement <4 x float> %525, i32 1
%528 = extractelement <4 x float> %525, i32 2
%529 = extractelement <4 x float> %525, i32 3
%530 = call float @llvm.fabs.f32(float %528)
%531 = fdiv float 1.000000e+00, %530
%532 = fmul float %526, %531
%533 = fadd float %532, 1.500000e+00
%534 = fmul float %527, %531
%535 = fadd float %534, 1.500000e+00
%536 = bitcast float %535 to i32
%537 = bitcast float %533 to i32
%538 = bitcast float %529 to i32
%539 = bitcast float %520 to i32
%540 = insertelement <4 x i32> undef, i32 %536, i32 0
%541 = insertelement <4 x i32> %540, i32 %537, i32 1
%542 = insertelement <4 x i32> %541, i32 %538, i32 2
%543 = insertelement <4 x i32> %542, i32 %539, i32 3
%544 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %543, <32 x i8> %91, <16 x i8> %94, i32 4)
%545 = extractelement <4 x float> %544, i32 0
%546 = extractelement <4 x float> %544, i32 1
%547 = extractelement <4 x float> %544, i32 2
%548 = extractelement <4 x float> %544, i32 3
%549 = call float @llvm.pow.f32(float %548, float %65)
%550 = fmul float %64, %549
%551 = fmul float %550, %545
%552 = fmul float %550, %546
%553 = fmul float %550, %547
%554 = call float @llvm.AMDGPU.lrp(float %50, float %308, float %551)
%555 = call float @llvm.AMDGPU.lrp(float %50, float %309, float %552)
%556 = call float @llvm.AMDGPU.lrp(float %50, float %310, float %553)
br label %ENDIF85
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600
v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601
v_interp_p1_f32 v11, v0, 0, 2, [m0] ; C82C0800
v_interp_p2_f32 v11, [v11], v1, 0, 2, [m0] ; C82D0801
v_interp_p1_f32 v12, v0, 1, 2, [m0] ; C8300900
v_interp_p2_f32 v12, [v12], v1, 1, 2, [m0] ; C8310901
v_interp_p1_f32 v13, v0, 2, 2, [m0] ; C8340A00
v_interp_p2_f32 v13, [v13], v1, 2, 2, [m0] ; C8350A01
v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00
v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01
v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00
v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01
v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00
v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01
v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00
v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01
v_mul_f32_e32 v6, v4, v4 ; 100C0904
v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05
v_mac_f32_e32 v6, v9, v9 ; 3E0C1309
v_rsq_clamp_f32_e32 v10, v6 ; 7E145906
v_mul_f32_e32 v6, v15, v15 ; 100C1F0F
v_mac_f32_e32 v6, v16, v16 ; 3E0C2110
v_mac_f32_e32 v6, v17, v17 ; 3E0C2311
v_rsq_clamp_f32_e32 v18, v6 ; 7E245906
v_mul_f32_e32 v7, v10, v4 ; 100E090A
v_mul_f32_e32 v6, v10, v5 ; 100C0B0A
v_mul_f32_e32 v5, v10, v9 ; 100A130A
v_mul_f32_e32 v10, v18, v15 ; 10141F12
v_mul_f32_e32 v9, v18, v16 ; 10122112
v_mul_f32_e32 v4, v10, v7 ; 10080F0A
v_mac_f32_e32 v4, v9, v6 ; 3E080D09
v_mul_f32_e32 v14, v18, v17 ; 101C2312
v_mac_f32_e32 v4, v14, v5 ; 3E080B0E
v_mul_f32_e32 v19, v7, v4 ; 10260907
v_mac_f32_e32 v19, v7, v4 ; 3E260907
v_mul_f32_e32 v20, v6, v4 ; 10280906
v_mac_f32_e32 v20, v6, v4 ; 3E280906
v_mad_f32 v22, v15, v18, -v19 ; D2820016 844E250F
v_mad_f32 v23, v16, v18, -v20 ; D2820017 84522510
v_mul_f32_e32 v15, v5, v4 ; 101E0905
v_mac_f32_e32 v15, v5, v4 ; 3E1E0905
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_mad_f32 v24, v17, v18, -v15 ; D2820018 843E2511
v_interp_p1_f32 v26, v0, 0, 4, [m0] ; C8681000
v_interp_p2_f32 v26, [v26], v1, 0, 4, [m0] ; C8691001
s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508
s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710
v_interp_p1_f32 v25, v0, 1, 4, [m0] ; C8641100
v_interp_p2_f32 v25, [v25], v1, 1, 4, [m0] ; C8651101
v_interp_p1_f32 v27, v0, 2, 4, [m0] ; C86C1200
v_interp_p2_f32 v27, [v27], v1, 2, 4, [m0] ; C86D1201
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s20, s[8:11], 0x4c ; C20A094C
s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D
s_buffer_load_dword s22, s[8:11], 0x4e ; C20B094E
s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C
s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718
image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800700 00030F02
s_buffer_load_dword s1, s[8:11], 0x41 ; C2008941
s_buffer_load_dword s2, s[8:11], 0x42 ; C2010942
s_buffer_load_dword s28, s[8:11], 0x54 ; C20E0954
s_buffer_load_dword s3, s[8:11], 0x40 ; C2018940
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v19, s20, v15 ; 10261E14
v_mul_f32_e32 v20, s21, v16 ; 10282015
v_mul_f32_e32 v21, s22, v17 ; 102A2216
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B
s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C
s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D
s_buffer_load_dword s0, s[8:11], 0x58 ; C2000958
v_sub_f32_e64 v0, 1.0, s28 ; D2080000 000038F2
v_mul_f32_e32 v4, s3, v0 ; 10080003
v_mul_f32_e32 v1, s1, v0 ; 10020001
v_mul_f32_e32 v0, s2, v0 ; 10000002
v_mac_f32_e32 v4, s28, v19 ; 3E08261C
v_mov_b32_e32 v28, v22 ; 7E380316
v_mac_f32_e32 v1, s28, v20 ; 3E02281C
v_mov_b32_e32 v29, v23 ; 7E3A0317
v_mac_f32_e32 v0, s28, v21 ; 3E002A1C
v_mov_b32_e32 v30, v24 ; 7E3C0318
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80
image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[24:27] ; F0800F00 00C80F02
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[20:21], s[2:3] ; BE942402
s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925
v_mul_f32_e32 v15, v22, v22 ; 101E2D16
v_mac_f32_e32 v15, v23, v23 ; 3E1E2F17
v_mac_f32_e32 v15, v24, v24 ; 3E1E3118
v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F
s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926
s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928
s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929
s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A
v_mul_f32_e32 v17, v15, v22 ; 10222D0F
v_mul_f32_e32 v18, v15, v23 ; 10242F0F
v_mul_f32_e32 v15, v15, v24 ; 101E310F
v_rcp_f32_e32 v28, v17 ; 7E385511
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v29, s1, v26 ; 083A3401
v_sub_f32_e32 v30, s2, v25 ; 083C3202
v_rcp_f32_e32 v31, v18 ; 7E3E5512
v_mul_f32_e32 v29, v28, v29 ; 103A3B1C
v_sub_f32_e32 v32, s13, v26 ; 0840340D
v_mul_f32_e32 v28, v28, v32 ; 1038411C
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v28, v28, v29 ; 00383B1C
v_rcp_f32_e32 v29, v15 ; 7E3A550F
v_mul_f32_e32 v30, v31, v30 ; 103C3D1F
v_sub_f32_e32 v32, s14, v25 ; 0840320E
v_mul_f32_e32 v31, v31, v32 ; 103E411F
v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480
v_cndmask_b32_e32 v30, v31, v30 ; 003C3D1F
v_sub_f32_e32 v31, s3, v27 ; 083E3603
v_mul_f32_e32 v31, v29, v31 ; 103E3F1D
v_sub_f32_e32 v32, s15, v27 ; 0840360F
v_mul_f32_e32 v29, v29, v32 ; 103A411D
v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80
v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D
v_min3_f32 v28, v28, v30, v29 ; D2A2001C 04763D1C
v_mov_b32_e32 v29, s13 ; 7E3A020D
v_add_f32_e32 v29, s1, v29 ; 063A3A01
v_mov_b32_e32 v30, s14 ; 7E3C020E
v_add_f32_e32 v30, s2, v30 ; 063C3C02
v_mov_b32_e32 v31, s15 ; 7E3E020F
v_add_f32_e32 v31, s3, v31 ; 063E3E03
v_mad_f32 v32, 0.5, v29, -s16 ; D2820020 80423AF0
v_add_f32_e32 v32, v26, v32 ; 0640411A
v_mac_f32_e32 v32, v28, v17 ; 3E40231C
v_mad_f32 v17, 0.5, v30, -s17 ; D2820011 80463CF0
v_add_f32_e32 v17, v25, v17 ; 06222319
v_mac_f32_e32 v17, v28, v18 ; 3E22251C
v_mad_f32 v18, 0.5, v31, -s18 ; D2820012 804A3EF0
v_add_f32_e32 v18, v27, v18 ; 0624251B
v_mac_f32_e32 v18, v28, v15 ; 3E241F1C
v_mad_f32 v28, 0.5, -v29, v32 ; D282001C 44823AF0
v_mad_f32 v29, 0.5, -v30, v17 ; D282001D 44463CF0
v_mad_f32 v30, 0.5, -v31, v18 ; D282001E 444A3EF0
s_or_b64 exec, exec, s[20:21] ; 88FE147E
s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917
s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943
s_buffer_load_dword s13, s[8:11], 0x60 ; C2068960
s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900
s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901
s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902
s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904
s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905
s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906
s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907
s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908
s_buffer_load_dword s21, s[8:11], 0x9 ; C20A8909
s_buffer_load_dword s22, s[8:11], 0xa ; C20B090A
s_buffer_load_dword s23, s[8:11], 0xb ; C20B890B
s_buffer_load_dword s24, s[8:11], 0xc ; C20C090C
s_buffer_load_dword s25, s[8:11], 0xd ; C20C890D
s_buffer_load_dword s26, s[8:11], 0xe ; C20D090E
s_buffer_load_dword s27, s[8:11], 0xf ; C20D890F
v_sub_f32_e64 v15, 1.0, s0 ; D208000F 000000F2
v_log_f32_e32 v15, v15 ; 7E1E4F0F
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_mul_legacy_f32_e32 v15, 0x3f400000, v15 ; 0E1E1EFF 3F400000
v_exp_f32_e32 v15, v15 ; 7E1E4B0F
v_mul_f32_e32 v31, 0x40e00000, v15 ; 103E1EFF 40E00000
v_cubeid_f32 v35, v28, v29, v30 ; D2880023 047A3B1C
v_cubema_f32 v34, v28, v29, v30 ; D28E0022 047A3B1C
v_cubesc_f32 v33, v28, v29, v30 ; D28A0021 047A3B1C
v_cubetc_f32 v32, v28, v29, v30 ; D28C0020 047A3B1C
v_mov_b32_e32 v28, 0x3fc00000 ; 7E3802FF 3FC00000
v_rcp_f32_e64 v15, |v34| ; D354010F 00000122
v_mad_f32 v29, v15, v32, v28 ; D282001D 0472410F
v_mac_f32_e32 v28, v15, v33 ; 3E38430F
v_mov_b32_e32 v30, v35 ; 7E3C0323
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[36:43], s[32:35] ; F0900F00 01091C1C
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v15, v31 ; 7E1E4F1F
v_mul_legacy_f32_e32 v15, s30, v15 ; 0E1E1E1E
v_exp_f32_e32 v15, v15 ; 7E1E4B0F
v_mul_f32_e32 v15, s29, v15 ; 101E1E1D
v_mul_f32_e32 v18, v28, v15 ; 10241F1C
v_mul_f32_e32 v17, v29, v15 ; 10221F1D
v_mul_f32_e32 v15, v30, v15 ; 101E1F1E
v_mov_b32_e32 v28, s28 ; 7E38021C
v_mov_b32_e32 v29, 0x3f7fff58 ; 7E3A02FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v29 ; 7C023A0C
s_and_saveexec_b64 s[28:29], vcc ; BE9C246A
s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B
s_buffer_load_dword s30, s[8:11], 0x3c ; C20F093C
s_buffer_load_dword s31, s[8:11], 0x3d ; C20F893D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080
s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936
s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938
s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939
s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A
s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930
s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931
s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932
s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934
s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935
v_mul_f32_e32 v29, v22, v22 ; 103A2D16
v_mac_f32_e32 v29, v23, v23 ; 3E3A2F17
v_mac_f32_e32 v29, v24, v24 ; 3E3A3118
v_rsq_clamp_f32_e32 v29, v29 ; 7E3A591D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v30, s34, v27 ; 083C3622
v_mov_b32_e32 v31, s34 ; 7E3E0222
v_sub_f32_e32 v32, s38, v26 ; 08403426
v_sub_f32_e32 v33, s39, v25 ; 08423227
v_add_f32_e32 v31, s40, v31 ; 063E3E28
v_sub_f32_e32 v34, s40, v27 ; 08443628
v_mad_f32 v35, 0.5, v31, -s37 ; D2820023 80963EF0
v_add_f32_e32 v27, v27, v35 ; 0636471B
v_mul_f32_e32 v22, v29, v22 ; 102C2D1D
v_mul_f32_e32 v23, v29, v23 ; 102E2F1D
v_mul_f32_e32 v24, v29, v24 ; 1030311D
v_rcp_f32_e32 v29, v22 ; 7E3A5516
v_rcp_f32_e32 v35, v23 ; 7E465517
v_rcp_f32_e32 v36, v24 ; 7E485518
v_sub_f32_e32 v37, s41, v26 ; 084A3429
v_mov_b32_e32 v38, s41 ; 7E4C0229
v_add_f32_e32 v38, s38, v38 ; 064C4C26
v_mul_f32_e32 v32, v29, v32 ; 1040411D
v_mul_f32_e32 v29, v29, v37 ; 103A4B1D
v_mul_f32_e32 v33, v35, v33 ; 10424323
v_mul_f32_e32 v34, v36, v34 ; 10444524
v_mul_f32_e32 v30, v36, v30 ; 103C3D24
v_mad_f32 v36, 0.5, v38, -s35 ; D2820024 808E4CF0
v_add_f32_e32 v26, v26, v36 ; 0634491A
v_sub_f32_e32 v36, s42, v25 ; 0848322A
v_mov_b32_e32 v37, s42 ; 7E4A022A
v_mul_f32_e32 v35, v35, v36 ; 10464923
v_add_f32_e32 v36, s39, v37 ; 06484A27
v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80
v_cndmask_b32_e32 v29, v29, v32 ; 003A411D
v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80
v_cndmask_b32_e32 v32, v35, v33 ; 00404323
v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080
v_cndmask_b32_e32 v30, v30, v34 ; 003C451E
v_min3_f32 v29, v29, v32, v30 ; D2A2001D 047A411D
v_mad_f32 v30, 0.5, v36, -s36 ; D282001E 809248F0
v_add_f32_e32 v25, v25, v30 ; 06323D19
v_mac_f32_e32 v26, v29, v22 ; 3E342D1D
v_mac_f32_e32 v25, v29, v23 ; 3E322F1D
v_mac_f32_e32 v27, v29, v24 ; 3E36311D
v_mad_f32 v22, 0.5, -v38, v26 ; D2820016 446A4CF0
v_mad_f32 v23, 0.5, -v36, v25 ; D2820017 446648F0
v_mad_f32 v24, 0.5, -v31, v27 ; D2820018 446E3EF0
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_sub_f32_e64 v25, 1.0, s0 ; D2080019 000000F2
v_log_f32_e32 v25, v25 ; 7E324F19
s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504
v_mul_legacy_f32_e32 v25, 0x3f400000, v25 ; 0E3232FF 3F400000
v_exp_f32_e32 v25, v25 ; 7E324B19
v_mul_f32_e32 v25, 0x40e00000, v25 ; 103232FF 40E00000
v_cubeid_f32 v32, v22, v23, v24 ; D2880020 04622F16
v_cubema_f32 v31, v22, v23, v24 ; D28E001F 04622F16
s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708
v_cubesc_f32 v30, v22, v23, v24 ; D28A001E 04622F16
v_cubetc_f32 v29, v22, v23, v24 ; D28C001D 04622F16
v_rcp_f32_e64 v24, |v31| ; D3540118 0000011F
v_mov_b32_e32 v22, 0x3fc00000 ; 7E2C02FF 3FC00000
v_mad_f32 v23, v24, v29, v22 ; D2820017 045A3B18
v_mac_f32_e32 v22, v24, v30 ; 3E2C3D18
v_mov_b32_e32 v24, v32 ; 7E300320
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[36:43], s[32:35] ; F0900F00 01091616
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v25, v25 ; 7E324F19
v_sub_f32_e64 v26, 1.0, s12 ; D208001A 000018F2
v_mul_legacy_f32_e32 v25, s31, v25 ; 0E32321F
v_exp_f32_e32 v25, v25 ; 7E324B19
v_mul_f32_e32 v25, s30, v25 ; 1032321E
v_mul_f32_e32 v22, v22, v25 ; 102C3316
v_mul_f32_e32 v23, v23, v25 ; 102E3317
v_mul_f32_e32 v24, v24, v25 ; 10303318
v_mul_f32_e32 v22, v22, v26 ; 102C3516
v_mul_f32_e32 v23, v23, v26 ; 102E3517
v_mul_f32_e32 v24, v24, v26 ; 10303518
v_mac_f32_e32 v22, s12, v18 ; 3E2C240C
v_mac_f32_e32 v23, s12, v17 ; 3E2E220C
v_mac_f32_e32 v24, s12, v15 ; 3E301E0C
v_mov_b32_e32 v15, v24 ; 7E1E0318
v_mov_b32_e32 v17, v23 ; 7E220317
v_mov_b32_e32 v18, v22 ; 7E240316
s_or_b64 exec, exec, s[28:29] ; 88FE1C7E
v_mad_f32 v24, -v28, s15, s15 ; D2820018 203C1F1C
v_mov_b32_e32 v22, s14 ; 7E2C020E
v_mul_f32_e32 v23, v24, v19 ; 102E2718
v_mul_f32_e32 v20, v24, v20 ; 10282918
v_mul_f32_e32 v19, v24, v21 ; 10262B18
v_mul_f32_e32 v21, s17, v6 ; 102A0C11
v_mac_f32_e32 v21, s16, v7 ; 3E2A0E10
v_mac_f32_e32 v21, s18, v5 ; 3E2A0A12
v_add_f32_e32 v21, s20, v21 ; 062A2A14
v_add_f32_e32 v25, v21, v11 ; 06321715
v_mul_f32_e32 v11, s21, v6 ; 10160C15
v_mac_f32_e32 v11, s19, v7 ; 3E160E13
v_mac_f32_e32 v11, s22, v5 ; 3E160A16
v_add_f32_e32 v11, s23, v11 ; 06161617
v_add_f32_e32 v12, v11, v12 ; 0618190B
v_mul_f32_e32 v11, s25, v6 ; 10160C19
v_mac_f32_e32 v11, s24, v7 ; 3E160E18
v_mac_f32_e32 v11, s26, v5 ; 3E160A1A
v_add_f32_e32 v11, s27, v11 ; 0616161B
v_add_f32_e32 v26, v11, v13 ; 06341B0B
s_buffer_load_dword s15, s[8:11], 0x10 ; C2078910
s_buffer_load_dword s14, s[8:11], 0x11 ; C2070911
s_buffer_load_dword s12, s[8:11], 0x12 ; C2060912
s_buffer_load_dword s36, s[8:11], 0x16 ; C2120916
s_buffer_load_dword s20, s[8:11], 0x44 ; C20A0944
s_buffer_load_dword s19, s[8:11], 0x45 ; C2098945
s_buffer_load_dword s18, s[8:11], 0x46 ; C2090946
s_buffer_load_dword s21, s[8:11], 0x48 ; C20A8948
s_buffer_load_dword s22, s[8:11], 0x49 ; C20B0949
s_buffer_load_dword s23, s[8:11], 0x4b ; C20B894B
s_buffer_load_dword s17, s[8:11], 0x64 ; C2088964
s_buffer_load_dword s16, s[8:11], 0x65 ; C2080965
s_buffer_load_dword s8, s[8:11], 0x66 ; C2040966
s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510
s_load_dwordx8 s[28:35], s[6:7], 0x20 ; C0CE0720
v_sub_f32_e64 v21, 1.0, s13 ; D2080015 00001AF2
v_mac_f32_e32 v21, s13, v16 ; 3E2A200D
v_mul_f32_e32 v11, s1, v7 ; 10160E01
v_mac_f32_e32 v11, s2, v6 ; 3E160C02
v_mac_f32_e32 v11, s3, v5 ; 3E160A03
v_max_f32_e32 v11, 0, v11 ; 20161680
v_mul_f32_e32 v16, v21, v25 ; 10203315
v_mul_f32_e32 v13, v21, v12 ; 101A1915
v_mul_f32_e32 v12, v21, v26 ; 10183515
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v22, s36, v8 ; 3E2C1024
v_mul_f32_e32 v8, v21, v18 ; 10102515
v_mul_f32_e32 v17, v21, v17 ; 10222315
v_mul_f32_e32 v15, v21, v15 ; 101E1F15
v_sub_f32_e32 v18, 1.0, v24 ; 082430F2
v_add_f32_e32 v18, s0, v18 ; 06242400
v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480
v_sub_f32_e32 v21, s1, v10 ; 082A1401
v_sub_f32_e32 v24, s2, v9 ; 08301202
v_mul_f32_e32 v25, v21, v21 ; 10322B15
v_mac_f32_e32 v25, v24, v24 ; 3E323118
v_sub_f32_e32 v26, s3, v14 ; 08341C03
v_mac_f32_e32 v25, v26, v26 ; 3E32351A
v_rsq_clamp_f32_e32 v25, v25 ; 7E325919
v_mul_f32_e32 v21, v25, v21 ; 102A2B19
v_mul_f32_e32 v24, v25, v24 ; 10303119
v_mul_f32_e32 v25, v25, v26 ; 10323519
v_mul_f32_e32 v10, v10, v7 ; 10140F0A
v_mad_f32 v9, -v9, v6, -v10 ; D2820009 A42A0D09
v_mad_f32 v9, -v14, v5, v9 ; D2820009 24260B0E
v_mul_f32_e32 v7, v21, v7 ; 100E0F15
v_mac_f32_e32 v7, v24, v6 ; 3E0E0D18
v_mul_f32_e32 v6, s1, v21 ; 100C2A01
v_mac_f32_e32 v6, s2, v24 ; 3E0C3002
v_mac_f32_e32 v6, s3, v25 ; 3E0C3203
v_mac_f32_e32 v7, v25, v5 ; 3E0E0B19
v_max_f32_e32 v5, 0, v6 ; 200A0C80
v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2
v_mul_f32_e32 v10, v6, v6 ; 10140D06
v_mul_f32_e32 v6, v6, v10 ; 100C1506
v_mul_f32_e32 v6, v6, v10 ; 100C1506
v_max_f32_e32 v9, 0, v9 ; 20121280
v_sub_f32_e32 v10, 1.0, v9 ; 081412F2
v_mul_f32_e32 v14, v10, v10 ; 101C150A
v_mul_f32_e32 v21, v10, v14 ; 102A1D0A
v_mad_f32 v24, -v14, v21, 1.0 ; D2820018 23CA2B0E
v_mul_f32_e32 v25, v4, v24 ; 10323104
v_sub_f32_e32 v26, 1.0, v4 ; 083408F2
v_mac_f32_e32 v4, v6, v26 ; 3E083506
v_mul_f32_e32 v26, v1, v24 ; 10343101
v_sub_f32_e32 v27, 1.0, v1 ; 083602F2
v_mac_f32_e32 v1, v6, v27 ; 3E023706
v_mul_f32_e32 v24, v0, v24 ; 10303100
v_sub_f32_e32 v27, 1.0, v0 ; 083600F2
v_mac_f32_e32 v0, v6, v27 ; 3E003706
v_sub_f32_e64 v6, 1.0, s0 ; D2080006 000000F2
v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2
v_mov_b32_e32 v28, 0x3cf5c28f ; 7E3802FF 3CF5C28F
v_madmk_f32_e32 v27, v27, v28, 0x3f77ced9 ; 4036391B 3F77CED9
v_add_f32_e32 v28, v5, v5 ; 06380B05
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_mad_f32 v5, v28, v5, 0.5 ; D2820005 03C20B1C
v_mul_f32_e32 v14, v21, v14 ; 101C1D15
v_mac_f32_e32 v25, v18, v14 ; 3E321D12
v_mac_f32_e32 v26, v18, v14 ; 3E341D12
v_mac_f32_e32 v24, v18, v14 ; 3E301D12
v_mul_f32_e32 v6, v6, v6 ; 100C0D06
v_log_f32_e32 v18, v27 ; 7E244F1B
v_mul_f32_e32 v6, s23, v6 ; 100C0C17
v_mul_f32_e32 v10, v6, v10 ; 10141506
v_mac_f32_e32 v10, 1.0, v9 ; 3E1412F2
v_rcp_f32_e32 v9, v18 ; 7E125512
v_sub_f32_e32 v18, 1.0, v11 ; 082416F2
v_mul_f32_e32 v6, v6, v18 ; 100C2506
v_mac_f32_e32 v6, 1.0, v11 ; 3E0C16F2
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_log_f32_e32 v7, v7 ; 7E0E4F07
v_madak_f32_e32 v6, v6, v10, 0x38d1b717 ; 420C1506 38D1B717
v_mul_f32_e32 v9, 0x41200000, v9 ; 101212FF 41200000
v_mul_f32_e32 v10, v9, v9 ; 10141309
v_mul_legacy_f32_e32 v7, v10, v7 ; 0E0E0F0A
v_rcp_f32_e32 v6, v6 ; 7E0C5506
v_mad_f32 v9, v9, v9, 1.0 ; D2820009 03CA1309
v_mul_f32_e32 v9, s22, v9 ; 10121216
v_exp_f32_e32 v7, v7 ; 7E0E4B07
v_mul_f32_e32 v7, v9, v7 ; 100E0F09
v_mul_f32_e32 v6, v7, v6 ; 100C0D07
v_mul_f32_e32 v7, v18, v18 ; 100E2512
v_mul_f32_e32 v9, v18, v7 ; 10120F12
v_mul_f32_e32 v7, v9, v7 ; 100E0F09
v_add_f32_e32 v5, -1.0, v5 ; 060A0AF3
v_mad_f32 v7, v5, v7, 1.0 ; D2820007 03CA0F05
v_mad_f32 v5, v5, v14, 1.0 ; D2820005 03CA1D05
v_mul_f32_e32 v5, v5, v7 ; 100A0F05
image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[24:27] ; F0800700 00C71B02
v_mul_f32_e32 v2, v11, v6 ; 10040D0B
v_mul_f32_e32 v2, s21, v2 ; 10040415
v_mul_f32_e32 v3, v11, v5 ; 10060B0B
v_mac_f32_e32 v16, s20, v3 ; 3E200614
v_mul_f32_e32 v5, v16, v23 ; 100A2F10
v_max_f32_e32 v2, 0, v2 ; 20040480
v_mul_f32_e32 v6, s20, v2 ; 100C0414
v_mac_f32_e32 v5, v4, v6 ; 3E0A0D04
v_mac_f32_e32 v13, s19, v3 ; 3E1A0613
v_mul_f32_e32 v4, v13, v20 ; 1008290D
v_mul_f32_e32 v6, s19, v2 ; 100C0413
v_mac_f32_e32 v4, v1, v6 ; 3E080D01
v_mac_f32_e32 v12, s18, v3 ; 3E180612
v_mul_f32_e32 v1, s18, v2 ; 10020412
v_mul_f32_e32 v2, v12, v19 ; 1004270C
v_mac_f32_e32 v2, v0, v1 ; 3E040300
v_mac_f32_e32 v5, v25, v8 ; 3E0A1119
v_mac_f32_e32 v4, v26, v17 ; 3E08231A
v_mac_f32_e32 v2, v24, v15 ; 3E041F18
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v5, s17, v27 ; 3E0A3611
v_mac_f32_e32 v4, s16, v28 ; 3E083810
v_mac_f32_e32 v2, s8, v29 ; 3E043A08
v_add_f32_e64 v0, 0, v22 clamp ; D2060800 00022C80
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v3, s15, v1 ; 1006020F
v_mac_f32_e32 v3, v5, v0 ; 3E060105
v_mul_f32_e32 v5, s14, v1 ; 100A020E
v_mac_f32_e32 v5, v4, v0 ; 3E0A0104
v_mul_f32_e32 v1, s12, v1 ; 1002020C
v_mac_f32_e32 v1, v2, v0 ; 3E020102
v_cvt_pkrtz_f16_f32_e32 v0, v3, v5 ; 5E000B03
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 40
Code Size: 2156 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL CONST[0..19]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[16], IN[0].xxxx
5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[9].xxxx
18: MOV TEMP[3].y, CONST[10].xxxx
19: MOV TEMP[3].z, CONST[11].xxxx
20: MOV TEMP[4].x, CONST[9].yyyy
21: MOV TEMP[4].y, CONST[10].yyyy
22: MOV TEMP[4].z, CONST[11].yyyy
23: MOV TEMP[5].x, CONST[9].zzzz
24: MOV TEMP[5].y, CONST[10].zzzz
25: MOV TEMP[5].z, CONST[11].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MOV TEMP[4].xyz, TEMP[3].xyzx
33: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].yzzx
34: DP4 TEMP[6].x, CONST[1], TEMP[5]
35: DP4 TEMP[7].x, CONST[2], TEMP[5]
36: MOV TEMP[6].y, TEMP[7].xxxx
37: DP4 TEMP[5].x, CONST[3], TEMP[5]
38: MOV TEMP[6].z, TEMP[5].xxxx
39: MUL TEMP[5].x, TEMP[3].yyyy, TEMP[3].yyyy
40: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[5].xxxx
41: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
42: ADD TEMP[5].xyz, TEMP[0].xyzz, -CONST[0].xyzz
43: MOV TEMP[5].yzw, TEMP[5].yxyz
44: MOV TEMP[5].x, TEMP[1].zzzz
45: MOV TEMP[0].xyz, TEMP[0].xyzx
46: MOV OUT[5], TEMP[0]
47: MOV OUT[1], TEMP[2]
48: MOV OUT[2], TEMP[4]
49: MOV OUT[3], TEMP[3]
50: MOV OUT[0], TEMP[1]
51: MOV OUT[4], TEMP[5]
52: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0
%90 = add i32 %5, %7
%91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = extractelement <4 x float> %91, i32 2
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0
%103 = add i32 %5, %7
%104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103)
%105 = extractelement <4 x float> %104, i32 0
%106 = extractelement <4 x float> %104, i32 1
%107 = fmul float %31, %84
%108 = fmul float %32, %84
%109 = fmul float %33, %84
%110 = fmul float %34, %84
%111 = fmul float %35, %85
%112 = fadd float %111, %107
%113 = fmul float %36, %85
%114 = fadd float %113, %108
%115 = fmul float %37, %85
%116 = fadd float %115, %109
%117 = fmul float %38, %85
%118 = fadd float %117, %110
%119 = fmul float %39, %86
%120 = fadd float %119, %112
%121 = fmul float %40, %86
%122 = fadd float %121, %114
%123 = fmul float %41, %86
%124 = fadd float %123, %116
%125 = fmul float %42, %86
%126 = fadd float %125, %118
%127 = fmul float %43, %87
%128 = fadd float %127, %120
%129 = fmul float %44, %87
%130 = fadd float %129, %122
%131 = fmul float %45, %87
%132 = fadd float %131, %124
%133 = fmul float %64, %84
%134 = fmul float %65, %84
%135 = fmul float %66, %84
%136 = fmul float %67, %84
%137 = fmul float %68, %85
%138 = fadd float %137, %133
%139 = fmul float %69, %85
%140 = fadd float %139, %134
%141 = fmul float %70, %85
%142 = fadd float %141, %135
%143 = fmul float %71, %85
%144 = fadd float %143, %136
%145 = fmul float %72, %86
%146 = fadd float %145, %138
%147 = fmul float %73, %86
%148 = fadd float %147, %140
%149 = fmul float %74, %86
%150 = fadd float %149, %142
%151 = fmul float %75, %86
%152 = fadd float %151, %144
%153 = fmul float %76, %87
%154 = fadd float %153, %146
%155 = fmul float %77, %87
%156 = fadd float %155, %148
%157 = fmul float %78, %87
%158 = fadd float %157, %150
%159 = fmul float %79, %87
%160 = fadd float %159, %152
%161 = fmul float %99, %55
%162 = fadd float %161, %57
%163 = fmul float %100, %56
%164 = fadd float %163, %58
%165 = fcmp oeq float %63, 0.000000e+00
%. = select i1 %165, float %99, float %105
%.32 = select i1 %165, float %100, float %106
%166 = fmul float %., %59
%167 = fadd float %166, %61
%168 = fmul float %.32, %60
%169 = fadd float %168, %62
%170 = fmul float %46, %92
%171 = fmul float %49, %92
%172 = fmul float %52, %92
%173 = fmul float %47, %93
%174 = fadd float %173, %170
%175 = fmul float %50, %93
%176 = fadd float %175, %171
%177 = fmul float %53, %93
%178 = fadd float %177, %172
%179 = fmul float %48, %94
%180 = fadd float %179, %174
%181 = fmul float %51, %94
%182 = fadd float %181, %176
%183 = fmul float %54, %94
%184 = fadd float %183, %178
%185 = fmul float %180, %180
%186 = fmul float %182, %182
%187 = fadd float %186, %185
%188 = fmul float %184, %184
%189 = fadd float %187, %188
%190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189)
%191 = fmul float %180, %190
%192 = fmul float %182, %190
%193 = fmul float %184, %190
%194 = fmul float %191, %192
%195 = fmul float %192, %193
%196 = fmul float %193, %193
%197 = fmul float %193, %191
%198 = fmul float %16, %194
%199 = fmul float %17, %195
%200 = fadd float %198, %199
%201 = fmul float %18, %196
%202 = fadd float %200, %201
%203 = fmul float %19, %197
%204 = fadd float %202, %203
%205 = fmul float %20, %194
%206 = fmul float %21, %195
%207 = fadd float %205, %206
%208 = fmul float %22, %196
%209 = fadd float %207, %208
%210 = fmul float %23, %197
%211 = fadd float %209, %210
%212 = fmul float %24, %194
%213 = fmul float %25, %195
%214 = fadd float %212, %213
%215 = fmul float %26, %196
%216 = fadd float %214, %215
%217 = fmul float %27, %197
%218 = fadd float %216, %217
%219 = fmul float %192, %192
%220 = fmul float %191, %191
%221 = fsub float %220, %219
%222 = fmul float %28, %221
%223 = fadd float %222, %204
%224 = fmul float %29, %221
%225 = fadd float %224, %211
%226 = fmul float %30, %221
%227 = fadd float %226, %218
%228 = fsub float %128, %13
%229 = fsub float %130, %14
%230 = fsub float %132, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %162, float %164, float %167, float %169)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %192, float %193, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %223, float %225, float %227, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %158, float %228, float %229, float %230)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %128, float %130, float %132, float %126)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %154, float %156, float %158, float %160)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s18, s[20:23], 0x20 ; C2091520
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s19, s[20:23], 0x21 ; C2099521
s_buffer_load_dword s24, s[20:23], 0x22 ; C20C1522
s_buffer_load_dword s25, s[20:23], 0x24 ; C20C9524
s_buffer_load_dword s26, s[20:23], 0x25 ; C20D1525
s_buffer_load_dword s27, s[20:23], 0x26 ; C20D9526
s_buffer_load_dword s28, s[20:23], 0x28 ; C20E1528
s_buffer_load_dword s29, s[20:23], 0x29 ; C20E9529
s_buffer_load_dword s30, s[20:23], 0x2a ; C20F152A
s_buffer_load_dword s31, s[20:23], 0x2c ; C20F952C
s_buffer_load_dword s32, s[20:23], 0x2d ; C210152D
s_buffer_load_dword s33, s[20:23], 0x2e ; C210952E
s_buffer_load_dword s34, s[20:23], 0x34 ; C2111534
s_buffer_load_dword s35, s[20:23], 0x35 ; C2119535
s_buffer_load_dword s5, s[20:23], 0x36 ; C2029536
s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500
s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501
s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502
s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504
s_buffer_load_dword s14, s[20:23], 0x5 ; C2071505
s_buffer_load_dword s4, s[20:23], 0x6 ; C2021506
s_buffer_load_dword s3, s[20:23], 0x7 ; C2019507
s_buffer_load_dword s12, s[20:23], 0x8 ; C2061508
s_buffer_load_dword s16, s[20:23], 0x9 ; C2081509
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s5 ; 7E000205
s_buffer_load_dword s7, s[20:23], 0xa ; C203950A
s_buffer_load_dword s5, s[20:23], 0xb ; C202950B
s_buffer_load_dword s15, s[20:23], 0xc ; C207950C
s_buffer_load_dword s17, s[20:23], 0xd ; C208950D
s_buffer_load_dword s13, s[20:23], 0xe ; C206950E
s_buffer_load_dword s8, s[20:23], 0xf ; C204150F
s_buffer_load_dword s36, s[20:23], 0x3c ; C212153C
s_buffer_load_dword s37, s[20:23], 0x40 ; C2129540
s_buffer_load_dword s38, s[20:23], 0x41 ; C2131541
s_buffer_load_dword s39, s[20:23], 0x42 ; C2139542
s_buffer_load_dword s40, s[20:23], 0x43 ; C2141543
s_buffer_load_dword s9, s[20:23], 0x10 ; C2049510
s_buffer_load_dword s10, s[20:23], 0x11 ; C2051511
s_buffer_load_dword s11, s[20:23], 0x12 ; C2059512
s_buffer_load_dword s41, s[20:23], 0x14 ; C2149514
s_buffer_load_dword s42, s[20:23], 0x15 ; C2151515
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s36 ; D004006A 00004880
s_buffer_load_dword s36, s[20:23], 0x16 ; C2121516
s_buffer_load_dword s43, s[20:23], 0x17 ; C2159517
s_buffer_load_dword s44, s[20:23], 0x18 ; C2161518
s_buffer_load_dword s45, s[20:23], 0x19 ; C2169519
s_buffer_load_dword s46, s[20:23], 0x1a ; C217151A
s_buffer_load_dword s47, s[20:23], 0x37 ; C2179537
s_buffer_load_dword s48, s[20:23], 0x38 ; C2181538
s_buffer_load_dword s49, s[20:23], 0x39 ; C2189539
s_buffer_load_dword s50, s[20:23], 0x3a ; C219153A
s_buffer_load_dword s51, s[20:23], 0x3b ; C219953B
s_buffer_load_dword s52, s[20:23], 0x1b ; C21A151B
s_buffer_load_dword s53, s[20:23], 0x1c ; C21A951C
s_buffer_load_dword s54, s[20:23], 0x1d ; C21B151D
s_buffer_load_dword s55, s[20:23], 0x1e ; C21B951E
s_buffer_load_dword s56, s[20:23], 0x1f ; C21C151F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s47 ; 7E1A022F
s_buffer_load_dword s47, s[20:23], 0x44 ; C2179544
s_buffer_load_dword s57, s[20:23], 0x45 ; C21C9545
s_buffer_load_dword s58, s[20:23], 0x46 ; C21D1546
v_mov_b32_e32 v14, s50 ; 7E1C0232
s_buffer_load_dword s50, s[20:23], 0x47 ; C2191547
v_mov_b32_e32 v15, s51 ; 7E1E0233
s_buffer_load_dword s51, s[20:23], 0x48 ; C2199548
s_buffer_load_dword s59, s[20:23], 0x49 ; C21D9549
s_buffer_load_dword s60, s[20:23], 0x4a ; C21E154A
s_buffer_load_dword s61, s[20:23], 0x4b ; C21E954B
s_buffer_load_dword s62, s[20:23], 0x4c ; C21F154C
s_buffer_load_dword s63, s[20:23], 0x4d ; C21F954D
s_buffer_load_dword s64, s[20:23], 0x4e ; C220154E
s_buffer_load_dword s20, s[20:23], 0x4f ; C20A154F
v_mul_f32_e32 v16, s41, v2 ; 10200429
v_mac_f32_e32 v0, s34, v9 ; 3E001222
v_mul_f32_e32 v17, s42, v2 ; 1022042A
v_mul_f32_e32 v18, s36, v2 ; 10240424
v_mul_f32_e32 v19, s43, v2 ; 1026042B
v_mac_f32_e32 v13, s35, v10 ; 3E1A1423
v_mul_f32_e32 v20, s25, v6 ; 10280C19
v_mul_f32_e32 v21, s28, v6 ; 102A0C1C
v_mul_f32_e32 v6, s31, v6 ; 100C0C1F
v_mac_f32_e32 v16, s44, v3 ; 3E20062C
v_mac_f32_e32 v17, s45, v3 ; 3E22062D
v_mac_f32_e32 v18, s46, v3 ; 3E24062E
v_mac_f32_e32 v20, s26, v7 ; 3E280E1A
v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mac_f32_e32 v6, s32, v7 ; 3E0C0E20
v_mac_f32_e32 v20, s27, v8 ; 3E28101B
v_mac_f32_e32 v21, s30, v8 ; 3E2A101E
v_mac_f32_e32 v6, s33, v8 ; 3E0C1021
v_mac_f32_e32 v19, s52, v3 ; 3E260634
v_mul_f32_e32 v7, s37, v2 ; 100E0425
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v7, s47, v3 ; 3E0E062F
v_mul_f32_e32 v8, s38, v2 ; 10100426
v_mac_f32_e32 v8, s57, v3 ; 3E100639
v_mul_f32_e32 v11, s39, v2 ; 10160427
v_mac_f32_e32 v11, s58, v3 ; 3E16063A
v_mul_f32_e32 v2, s40, v2 ; 10040428
v_mac_f32_e32 v2, s50, v3 ; 3E040632
v_mac_f32_e32 v16, s53, v4 ; 3E200835
v_mac_f32_e32 v17, s54, v4 ; 3E220836
v_mac_f32_e32 v18, s55, v4 ; 3E240837
v_mac_f32_e32 v19, s56, v4 ; 3E260838
v_mac_f32_e32 v7, s51, v4 ; 3E0E0833
v_mac_f32_e32 v8, s59, v4 ; 3E10083B
v_mac_f32_e32 v11, s60, v4 ; 3E16083C
v_mac_f32_e32 v2, s61, v4 ; 3E04083D
v_mac_f32_e32 v16, s18, v5 ; 3E200A12
v_mac_f32_e32 v17, s19, v5 ; 3E220A13
v_mac_f32_e32 v18, s24, v5 ; 3E240A18
v_mac_f32_e32 v7, s62, v5 ; 3E0E0A3E
v_mac_f32_e32 v8, s63, v5 ; 3E100A3F
v_mac_f32_e32 v11, s64, v5 ; 3E160A40
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mul_f32_e32 v3, v20, v20 ; 10062914
v_mac_f32_e32 v3, v21, v21 ; 3E062B15
v_mac_f32_e32 v3, v6, v6 ; 3E060D06
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mac_f32_e32 v14, s48, v9 ; 3E1C1230
v_mac_f32_e32 v15, s49, v10 ; 3E1E1431
exp 15, 32, 0, 0, 0, v0, v13, v14, v15 ; F800020F 0F0E0D00
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v3, v20 ; 10002903
v_mul_f32_e32 v4, v3, v21 ; 10082B03
v_mul_f32_e32 v3, v3, v6 ; 10060D03
v_mul_f32_e32 v5, v3, v4 ; 100A0903
v_mul_f32_e32 v6, s14, v5 ; 100C0A0E
v_mul_f32_e32 v9, s16, v5 ; 10120A10
v_mul_f32_e32 v5, s17, v5 ; 100A0A11
v_mul_f32_e32 v10, v4, v0 ; 10140104
v_mac_f32_e32 v6, s6, v10 ; 3E0C1406
v_mac_f32_e32 v9, s12, v10 ; 3E12140C
v_mac_f32_e32 v5, s15, v10 ; 3E0A140F
v_mul_f32_e32 v10, v3, v3 ; 10140703
v_mac_f32_e32 v6, s4, v10 ; 3E0C1404
v_mac_f32_e32 v9, s7, v10 ; 3E121407
v_mac_f32_e32 v5, s13, v10 ; 3E0A140D
v_mul_f32_e32 v10, v0, v3 ; 10140700
v_mac_f32_e32 v6, s3, v10 ; 3E0C1403
v_mac_f32_e32 v9, s5, v10 ; 3E121405
v_mac_f32_e32 v5, s8, v10 ; 3E0A1408
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mad_f32 v10, v0, v0, -v10 ; D282000A 842A0100
v_mac_f32_e32 v6, s9, v10 ; 3E0C1409
v_mac_f32_e32 v9, s10, v10 ; 3E12140A
v_mac_f32_e32 v5, s11, v10 ; 3E0A140B
v_subrev_f32_e32 v10, s0, v16 ; 0A142000
v_subrev_f32_e32 v12, s1, v17 ; 0A182201
v_subrev_f32_e32 v13, s2, v18 ; 0A1A2402
exp 15, 33, 0, 0, 0, v0, v4, v3, v1 ; F800021F 01030400
exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906
exp 15, 35, 0, 0, 0, v11, v10, v12, v13 ; F800023F 0D0C0A0B
exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110
exp 15, 12, 0, 1, 0, v7, v8, v11, v2 ; F80008CF 020B0807
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 24
Code Size: 748 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[21..22]
DCL CONST[24]
DCL TEMP[0..17], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000}
IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000}
IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000}
0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[3].yzww, IN[3].yzww
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[3].yzww, TEMP[1].xxxx
6: MOV TEMP[2].xy, IN[0].xyyy
7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz
10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww
11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
13: MOV TEMP[5].xy, IN[0].xyyy
14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D
15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx
16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx
17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
19: MOV TEMP[7].xyz, IMM[0].yyyy
20: MOV TEMP[8].w, IMM[0].xxxx
21: MOV TEMP[8].xyz, TEMP[0].xyzx
22: DP4 TEMP[9].x, CONST[1], TEMP[8]
23: DP4 TEMP[10].x, CONST[2], TEMP[8]
24: MOV TEMP[9].y, TEMP[10].xxxx
25: DP4 TEMP[8].x, CONST[3], TEMP[8]
26: MOV TEMP[9].z, TEMP[8].xxxx
27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz
28: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
29: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz
30: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz
31: MUL TEMP[9].xyz, IMM[0].zzzz, TEMP[9].xyzz
32: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz
33: MOV TEMP[10].xyz, TEMP[9].xyzx
34: FSLT TEMP[11].x, IMM[0].yyyy, CONST[10].wwww
35: UIF TEMP[11].xxxx :0
36: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz
37: RSQ TEMP[11].x, TEMP[11].xxxx
38: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx
39: MOV TEMP[12].xyz, -IN[4].xyzx
40: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz
41: RCP TEMP[14].x, TEMP[11].xxxx
42: RCP TEMP[14].y, TEMP[11].yyyy
43: RCP TEMP[14].z, TEMP[11].zzzz
44: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
45: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz
46: RCP TEMP[14].x, TEMP[11].xxxx
47: RCP TEMP[14].y, TEMP[11].yyyy
48: RCP TEMP[14].z, TEMP[11].zzzz
49: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
50: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[11].xyzz
51: UIF TEMP[14].xxxx :0
52: MOV TEMP[15].x, TEMP[13].xxxx
53: ELSE :0
54: MOV TEMP[15].x, TEMP[12].xxxx
55: ENDIF
56: UIF TEMP[14].yyyy :0
57: MOV TEMP[16].x, TEMP[13].yyyy
58: ELSE :0
59: MOV TEMP[16].x, TEMP[12].yyyy
60: ENDIF
61: UIF TEMP[14].zzzz :0
62: MOV TEMP[13].x, TEMP[13].zzzz
63: ELSE :0
64: MOV TEMP[13].x, TEMP[12].zzzz
65: ENDIF
66: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz
67: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww
68: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
69: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
70: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz
71: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz
72: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
73: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz
74: ENDIF
75: ADD TEMP[11].x, IMM[0].xxxx, -CONST[22].xxxx
76: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx
77: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy
78: MOV TEMP[10].xyz, TEMP[10].xyzz
79: MOV TEMP[10].w, TEMP[11].xxxx
80: TXL TEMP[10], TEMP[10], SAMP[0], CUBE
81: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy
82: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx
83: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz
84: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].zzzz
85: UIF TEMP[11].xxxx :0
86: MOV TEMP[11].xyz, TEMP[9].xyzx
87: FSLT TEMP[12].x, IMM[0].yyyy, CONST[14].wwww
88: UIF TEMP[12].xxxx :0
89: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz
90: RSQ TEMP[12].x, TEMP[12].xxxx
91: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx
92: MOV TEMP[12].xyz, -IN[4].xyzx
93: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz
94: RCP TEMP[14].x, TEMP[9].xxxx
95: RCP TEMP[14].y, TEMP[9].yyyy
96: RCP TEMP[14].z, TEMP[9].zzzz
97: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
98: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz
99: RCP TEMP[14].x, TEMP[9].xxxx
100: RCP TEMP[14].y, TEMP[9].yyyy
101: RCP TEMP[14].z, TEMP[9].zzzz
102: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
103: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[9].xyzz
104: UIF TEMP[14].xxxx :0
105: MOV TEMP[15].x, TEMP[13].xxxx
106: ELSE :0
107: MOV TEMP[15].x, TEMP[12].xxxx
108: ENDIF
109: UIF TEMP[14].yyyy :0
110: MOV TEMP[16].x, TEMP[13].yyyy
111: ELSE :0
112: MOV TEMP[16].x, TEMP[12].yyyy
113: ENDIF
114: UIF TEMP[14].zzzz :0
115: MOV TEMP[13].x, TEMP[13].zzzz
116: ELSE :0
117: MOV TEMP[13].x, TEMP[12].zzzz
118: ENDIF
119: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz
120: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww
121: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
122: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
123: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz
124: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz
125: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
126: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz
127: ENDIF
128: ADD TEMP[9].x, IMM[0].xxxx, -CONST[22].xxxx
129: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx
130: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy
131: MOV TEMP[11].xyz, TEMP[11].xyzz
132: MOV TEMP[11].w, TEMP[9].xxxx
133: TXL TEMP[9], TEMP[11], SAMP[1], CUBE
134: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy
135: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx
136: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz
137: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz
138: ELSE :0
139: MOV TEMP[7].xyz, TEMP[10].xyzx
140: ENDIF
141: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
142: MOV TEMP[1].xyz, -TEMP[1].xyzx
143: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx
144: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz
145: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz
146: RSQ TEMP[10].x, TEMP[10].xxxx
147: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx
148: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
149: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
150: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz
151: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx
152: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx
153: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww
154: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx
155: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx, IMM[2].yyyy
156: LG2 TEMP[12].x, TEMP[12].xxxx
157: RCP TEMP[12].x, TEMP[12].xxxx
158: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx
159: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx
160: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx
161: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx
162: MUL TEMP[15].x, IMM[0].zzzz, TEMP[10].xxxx
163: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
164: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[0].wwww
165: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx
166: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
167: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
168: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx
169: MOV_SAT TEMP[4].x, TEMP[4].xxxx
170: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx
171: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx
172: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx
173: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx
174: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
175: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx
176: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx
177: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].zzzz
178: RCP TEMP[1].x, TEMP[1].xxxx
179: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz
180: MAX TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx
181: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
182: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx
183: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy
184: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
185: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx
186: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
187: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
188: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
189: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz
190: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[3].xyzz
191: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx
192: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx
193: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx
194: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx
195: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz
196: ADD TEMP[9].x, TEMP[5].xxxx, IMM[2].wwww
197: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx
198: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx
199: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx
200: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
201: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx
202: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww
203: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx
204: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
205: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx
206: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
207: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx
208: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx
209: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
210: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
211: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
212: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
213: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
214: MOV TEMP[0].xyz, TEMP[0].xyzx
215: MAD TEMP[1].x, IN[3].xxxx, CONST[5].zzzz, CONST[5].wwww
216: MOV_SAT TEMP[1].x, TEMP[1].xxxx
217: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
218: MOV TEMP[0].xyz, TEMP[0].xyzx
219: MOV TEMP[0].w, IMM[0].xxxx
220: MOV OUT[0], TEMP[0]
221: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0
%84 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0
%86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%87 = bitcast <8 x i32> addrspace(2)* %86 to <32 x i8> addrspace(2)*
%88 = load <32 x i8>, <32 x i8> addrspace(2)* %87, align 32, !tbaa !0
%89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%90 = bitcast <4 x i32> addrspace(2)* %89 to <16 x i8> addrspace(2)*
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%93 = bitcast <8 x i32> addrspace(2)* %92 to <32 x i8> addrspace(2)*
%94 = load <32 x i8>, <32 x i8> addrspace(2)* %93, align 32, !tbaa !0
%95 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%96 = bitcast <4 x i32> addrspace(2)* %95 to <16 x i8> addrspace(2)*
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%99 = bitcast <8 x i32> addrspace(2)* %98 to <32 x i8> addrspace(2)*
%100 = load <32 x i8>, <32 x i8> addrspace(2)* %99, align 32, !tbaa !0
%101 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%102 = bitcast <4 x i32> addrspace(2)* %101 to <16 x i8> addrspace(2)*
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%105 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%106 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%107 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%108 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%109 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%110 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%111 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%112 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%119 = fmul float %106, %106
%120 = fmul float %107, %107
%121 = fadd float %120, %119
%122 = fmul float %108, %108
%123 = fadd float %121, %122
%124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123)
%125 = fmul float %106, %124
%126 = fmul float %107, %124
%127 = fmul float %108, %124
%128 = fmul float %113, %113
%129 = fmul float %114, %114
%130 = fadd float %129, %128
%131 = fmul float %115, %115
%132 = fadd float %130, %131
%133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132)
%134 = fmul float %113, %133
%135 = fmul float %114, %133
%136 = fmul float %115, %133
%137 = bitcast float %104 to i32
%138 = bitcast float %105 to i32
%139 = insertelement <2 x i32> undef, i32 %137, i32 0
%140 = insertelement <2 x i32> %139, i32 %138, i32 1
%141 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %140, <32 x i8> %94, <16 x i8> %97, i32 2)
%142 = extractelement <4 x float> %141, i32 0
%143 = extractelement <4 x float> %141, i32 1
%144 = extractelement <4 x float> %141, i32 2
%145 = fmul float %76, %142
%146 = fmul float %77, %143
%147 = fmul float %78, %144
%148 = call float @llvm.AMDGPU.lrp(float %79, float %145, float %66)
%149 = call float @llvm.AMDGPU.lrp(float %79, float %146, float %67)
%150 = call float @llvm.AMDGPU.lrp(float %79, float %147, float %68)
%151 = fmul float %79, %69
%152 = fsub float %69, %151
%153 = fmul float %145, %152
%154 = fmul float %146, %152
%155 = fmul float %147, %152
%156 = bitcast float %104 to i32
%157 = bitcast float %105 to i32
%158 = insertelement <2 x i32> undef, i32 %156, i32 0
%159 = insertelement <2 x i32> %158, i32 %157, i32 1
%160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %100, <16 x i8> %103, i32 2)
%161 = extractelement <4 x float> %160, i32 1
%162 = fsub float 1.000000e+00, %81
%163 = fmul float %161, %81
%164 = fadd float %163, %162
%165 = fmul float %125, %24
%166 = fmul float %126, %25
%167 = fadd float %166, %165
%168 = fmul float %127, %26
%169 = fadd float %167, %168
%170 = call float @llvm.maxnum.f32(float %169, float 0.000000e+00)
%171 = fmul float %27, %125
%172 = fmul float %28, %126
%173 = fadd float %171, %172
%174 = fmul float %29, %127
%175 = fadd float %173, %174
%176 = fadd float %175, %30
%177 = fmul float %31, %125
%178 = fmul float %32, %126
%179 = fadd float %177, %178
%180 = fmul float %33, %127
%181 = fadd float %179, %180
%182 = fadd float %181, %34
%183 = fmul float %35, %125
%184 = fmul float %36, %126
%185 = fadd float %183, %184
%186 = fmul float %37, %127
%187 = fadd float %185, %186
%188 = fadd float %187, %38
%189 = fadd float %109, %176
%190 = fadd float %110, %182
%191 = fadd float %111, %188
%192 = fmul float %189, %164
%193 = fmul float %190, %164
%194 = fmul float %191, %164
%195 = fmul float %125, %134
%196 = fmul float %126, %135
%197 = fadd float %196, %195
%198 = fmul float %127, %136
%199 = fadd float %197, %198
%200 = fmul float %199, %125
%201 = fmul float %199, %126
%202 = fmul float %199, %127
%203 = fmul float %200, 2.000000e+00
%204 = fmul float %201, 2.000000e+00
%205 = fmul float %202, 2.000000e+00
%206 = fsub float %134, %203
%207 = fsub float %135, %204
%208 = fsub float %136, %205
%209 = fcmp ogt float %51, 0.000000e+00
br i1 %209, label %IF, label %ENDIF
IF: ; preds = %main_body
%210 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%211 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%212 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%213 = fmul float %206, %206
%214 = fmul float %207, %207
%215 = fadd float %214, %213
%216 = fmul float %208, %208
%217 = fadd float %215, %216
%218 = call float @llvm.AMDGPU.rsq.clamped.f32(float %217)
%219 = fmul float %206, %218
%220 = fmul float %207, %218
%221 = fmul float %208, %218
%222 = fsub float %44, %116
%223 = fsub float %45, %117
%224 = fsub float %46, %118
%225 = fdiv float 1.000000e+00, %219
%226 = fdiv float 1.000000e+00, %220
%227 = fdiv float 1.000000e+00, %221
%228 = fmul float %222, %225
%229 = fmul float %223, %226
%230 = fmul float %224, %227
%231 = fsub float %47, %116
%232 = fsub float %48, %117
%233 = fsub float %49, %118
%234 = fdiv float 1.000000e+00, %219
%235 = fdiv float 1.000000e+00, %220
%236 = fdiv float 1.000000e+00, %221
%237 = fmul float %231, %234
%238 = fmul float %232, %235
%239 = fmul float %233, %236
%240 = fcmp ogt float %219, 0.000000e+00
%241 = fcmp ogt float %220, 0.000000e+00
%242 = fcmp ogt float %221, 0.000000e+00
%. = select i1 %240, float %228, float %237
%temp64.0 = select i1 %241, float %229, float %238
%.96 = select i1 %242, float %230, float %239
%243 = fadd float %44, %47
%244 = fadd float %45, %48
%245 = fadd float %46, %49
%246 = fmul float %243, 5.000000e-01
%247 = fmul float %244, 5.000000e-01
%248 = fmul float %245, 5.000000e-01
%249 = call float @llvm.minnum.f32(float %., float %temp64.0)
%250 = call float @llvm.minnum.f32(float %249, float %.96)
%251 = fsub float %246, %212
%252 = fsub float %247, %211
%253 = fsub float %248, %210
%254 = fadd float %251, %116
%255 = fadd float %252, %117
%256 = fadd float %253, %118
%257 = fmul float %219, %250
%258 = fadd float %257, %254
%259 = fmul float %220, %250
%260 = fadd float %259, %255
%261 = fmul float %221, %250
%262 = fadd float %261, %256
%263 = fsub float %258, %246
%264 = fsub float %260, %247
%265 = fsub float %262, %248
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp40.0 = phi float [ %263, %IF ], [ %206, %main_body ]
%temp41.0 = phi float [ %264, %IF ], [ %207, %main_body ]
%temp42.0 = phi float [ %265, %IF ], [ %208, %main_body ]
%266 = fsub float 1.000000e+00, %80
%267 = call float @llvm.pow.f32(float %266, float 7.500000e-01)
%268 = fmul float %267, 7.000000e+00
%269 = insertelement <4 x float> undef, float %temp40.0, i32 0
%270 = insertelement <4 x float> %269, float %temp41.0, i32 1
%271 = insertelement <4 x float> %270, float %temp42.0, i32 2
%272 = insertelement <4 x float> %271, float %268, i32 3
%273 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %272)
%274 = extractelement <4 x float> %273, i32 0
%275 = extractelement <4 x float> %273, i32 1
%276 = extractelement <4 x float> %273, i32 2
%277 = extractelement <4 x float> %273, i32 3
%278 = call float @llvm.fabs.f32(float %276)
%279 = fdiv float 1.000000e+00, %278
%280 = fmul float %274, %279
%281 = fadd float %280, 1.500000e+00
%282 = fmul float %275, %279
%283 = fadd float %282, 1.500000e+00
%284 = bitcast float %283 to i32
%285 = bitcast float %281 to i32
%286 = bitcast float %277 to i32
%287 = bitcast float %268 to i32
%288 = insertelement <4 x i32> undef, i32 %284, i32 0
%289 = insertelement <4 x i32> %288, i32 %285, i32 1
%290 = insertelement <4 x i32> %289, i32 %286, i32 2
%291 = insertelement <4 x i32> %290, i32 %287, i32 3
%292 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %291, <32 x i8> %83, <16 x i8> %85, i32 4)
%293 = extractelement <4 x float> %292, i32 0
%294 = extractelement <4 x float> %292, i32 1
%295 = extractelement <4 x float> %292, i32 2
%296 = extractelement <4 x float> %292, i32 3
%297 = call float @llvm.pow.f32(float %296, float %53)
%298 = fmul float %52, %297
%299 = fmul float %298, %293
%300 = fmul float %298, %294
%301 = fmul float %298, %295
%302 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %302, label %IF82, label %ENDIF81
IF82: ; preds = %ENDIF
%303 = fcmp ogt float %63, 0.000000e+00
br i1 %303, label %IF85, label %ENDIF84
ENDIF81: ; preds = %ENDIF, %ENDIF84
%temp28.0 = phi float [ %531, %ENDIF84 ], [ %299, %ENDIF ]
%temp29.0 = phi float [ %532, %ENDIF84 ], [ %300, %ENDIF ]
%temp30.0 = phi float [ %533, %ENDIF84 ], [ %301, %ENDIF ]
%304 = fmul float %temp28.0, %164
%305 = fmul float %temp29.0, %164
%306 = fmul float %temp30.0, %164
%307 = fsub float 1.000000e+00, %80
%308 = fsub float %24, %134
%309 = fsub float %25, %135
%310 = fsub float %26, %136
%311 = fmul float %308, %308
%312 = fmul float %309, %309
%313 = fadd float %312, %311
%314 = fmul float %310, %310
%315 = fadd float %313, %314
%316 = call float @llvm.AMDGPU.rsq.clamped.f32(float %315)
%317 = fmul float %308, %316
%318 = fmul float %309, %316
%319 = fmul float %310, %316
%320 = fmul float %134, %125
%321 = fsub float -0.000000e+00, %320
%322 = fmul float %135, %126
%323 = fsub float %321, %322
%324 = fmul float %136, %127
%325 = fsub float %323, %324
%326 = call float @llvm.maxnum.f32(float %325, float 0.000000e+00)
%327 = fmul float %24, %317
%328 = fmul float %25, %318
%329 = fadd float %328, %327
%330 = fmul float %26, %319
%331 = fadd float %329, %330
%332 = call float @llvm.maxnum.f32(float %331, float 0.000000e+00)
%333 = fmul float %307, %307
%334 = fmul float %333, %75
%335 = fsub float 1.000000e+00, %307
%336 = fmul float %335, 0x3FEEF9DB20000000
%337 = fadd float %336, 0x3F9EB851E0000000
%338 = call float @llvm.log2.f32(float %337)
%339 = fdiv float 1.000000e+00, %338
%340 = fmul float %339, 1.000000e+01
%341 = fmul float %340, %340
%342 = fsub float 1.000000e+00, %170
%343 = fsub float 1.000000e+00, %326
%344 = fmul float %332, 2.000000e+00
%345 = fmul float %332, %307
%346 = fmul float %344, %345
%347 = fadd float %346, 5.000000e-01
%348 = fsub float 1.000000e+00, %332
%349 = fsub float 1.000000e+00, %326
%350 = fsub float 1.000000e+00, %152
%351 = fadd float %80, %350
%352 = call float @llvm.AMDIL.clamp.(float %351, float 0.000000e+00, float 1.000000e+00)
%353 = fmul float %349, %349
%354 = fmul float %349, %349
%355 = fmul float %354, %349
%356 = fmul float %353, %355
%357 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %148)
%358 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %149)
%359 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %150)
%360 = call float @llvm.AMDGPU.lrp(float %170, float 1.000000e+00, float %334)
%361 = call float @llvm.AMDGPU.lrp(float %326, float 1.000000e+00, float %334)
%362 = fmul float %360, %361
%363 = fadd float %362, 0x3F1A36E2E0000000
%364 = fdiv float 1.000000e+00, %363
%365 = fmul float %125, %317
%366 = fmul float %126, %318
%367 = fadd float %366, %365
%368 = fmul float %127, %319
%369 = fadd float %367, %368
%370 = call float @llvm.maxnum.f32(float %369, float 0.000000e+00)
%371 = call float @llvm.pow.f32(float %370, float %341)
%372 = fadd float %341, 1.000000e+00
%373 = fmul float %372, %74
%374 = fmul float %371, %373
%375 = fmul float %364, %374
%376 = fmul float %375, %170
%377 = fmul float %376, %73
%378 = call float @llvm.maxnum.f32(float %377, float 0.000000e+00)
%379 = fmul float %378, %70
%380 = fmul float %378, %71
%381 = fmul float %378, %72
%382 = fsub float 1.000000e+00, %148
%383 = fsub float 1.000000e+00, %149
%384 = fsub float 1.000000e+00, %150
%385 = fmul float %348, %348
%386 = fmul float %348, %348
%387 = fmul float %386, %348
%388 = fmul float %385, %387
%389 = fmul float %382, %388
%390 = fadd float %389, %148
%391 = fmul float %383, %388
%392 = fadd float %391, %149
%393 = fmul float %384, %388
%394 = fadd float %393, %150
%395 = fadd float %347, -1.000000e+00
%396 = fmul float %342, %342
%397 = fmul float %342, %342
%398 = fmul float %397, %342
%399 = fmul float %396, %398
%400 = fmul float %395, %399
%401 = fadd float %400, 1.000000e+00
%402 = fadd float %347, -1.000000e+00
%403 = fmul float %343, %343
%404 = fmul float %343, %343
%405 = fmul float %404, %343
%406 = fmul float %403, %405
%407 = fmul float %402, %406
%408 = fadd float %407, 1.000000e+00
%409 = fmul float %401, %408
%410 = fmul float %409, %170
%411 = fmul float %70, %410
%412 = fadd float %411, %192
%413 = fmul float %71, %410
%414 = fadd float %413, %193
%415 = fmul float %72, %410
%416 = fadd float %415, %194
%417 = fmul float %153, %412
%418 = fmul float %154, %414
%419 = fmul float %155, %416
%420 = fmul float %379, %390
%421 = fadd float %420, %417
%422 = fmul float %380, %392
%423 = fadd float %422, %418
%424 = fmul float %381, %394
%425 = fadd float %424, %419
%426 = fmul float %304, %357
%427 = fadd float %426, %421
%428 = fmul float %305, %358
%429 = fadd float %428, %423
%430 = fmul float %306, %359
%431 = fadd float %430, %425
%432 = fmul float %112, %42
%433 = fadd float %432, %43
%434 = call float @llvm.AMDIL.clamp.(float %433, float 0.000000e+00, float 1.000000e+00)
%435 = call float @llvm.AMDGPU.lrp(float %434, float %427, float %39)
%436 = call float @llvm.AMDGPU.lrp(float %434, float %429, float %40)
%437 = call float @llvm.AMDGPU.lrp(float %434, float %431, float %41)
%438 = call i32 @llvm.SI.packf16(float %435, float %436)
%439 = bitcast i32 %438 to float
%440 = call i32 @llvm.SI.packf16(float %437, float 1.000000e+00)
%441 = bitcast i32 %440 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %439, float %441, float %439, float %441)
ret void
IF85: ; preds = %IF82
%442 = fmul float %206, %206
%443 = fmul float %207, %207
%444 = fadd float %443, %442
%445 = fmul float %208, %208
%446 = fadd float %444, %445
%447 = call float @llvm.AMDGPU.rsq.clamped.f32(float %446)
%448 = fmul float %206, %447
%449 = fmul float %207, %447
%450 = fmul float %208, %447
%451 = fsub float %54, %116
%452 = fsub float %55, %117
%453 = fsub float %56, %118
%454 = fdiv float 1.000000e+00, %448
%455 = fdiv float 1.000000e+00, %449
%456 = fdiv float 1.000000e+00, %450
%457 = fmul float %451, %454
%458 = fmul float %452, %455
%459 = fmul float %453, %456
%460 = fsub float %57, %116
%461 = fsub float %58, %117
%462 = fsub float %59, %118
%463 = fdiv float 1.000000e+00, %448
%464 = fdiv float 1.000000e+00, %449
%465 = fdiv float 1.000000e+00, %450
%466 = fmul float %460, %463
%467 = fmul float %461, %464
%468 = fmul float %462, %465
%469 = fcmp ogt float %448, 0.000000e+00
%470 = fcmp ogt float %449, 0.000000e+00
%471 = fcmp ogt float %450, 0.000000e+00
%.97 = select i1 %469, float %457, float %466
%temp64.1 = select i1 %470, float %458, float %467
%.98 = select i1 %471, float %459, float %468
%472 = fadd float %54, %57
%473 = fadd float %55, %58
%474 = fadd float %56, %59
%475 = fmul float %472, 5.000000e-01
%476 = fmul float %473, 5.000000e-01
%477 = fmul float %474, 5.000000e-01
%478 = call float @llvm.minnum.f32(float %.97, float %temp64.1)
%479 = call float @llvm.minnum.f32(float %478, float %.98)
%480 = fsub float %475, %60
%481 = fsub float %476, %61
%482 = fsub float %477, %62
%483 = fadd float %480, %116
%484 = fadd float %481, %117
%485 = fadd float %482, %118
%486 = fmul float %448, %479
%487 = fadd float %486, %483
%488 = fmul float %449, %479
%489 = fadd float %488, %484
%490 = fmul float %450, %479
%491 = fadd float %490, %485
%492 = fsub float %487, %475
%493 = fsub float %489, %476
%494 = fsub float %491, %477
br label %ENDIF84
ENDIF84: ; preds = %IF82, %IF85
%temp44.0 = phi float [ %492, %IF85 ], [ %206, %IF82 ]
%temp45.0 = phi float [ %493, %IF85 ], [ %207, %IF82 ]
%temp46.0 = phi float [ %494, %IF85 ], [ %208, %IF82 ]
%495 = fsub float 1.000000e+00, %80
%496 = call float @llvm.pow.f32(float %495, float 7.500000e-01)
%497 = fmul float %496, 7.000000e+00
%498 = insertelement <4 x float> undef, float %temp44.0, i32 0
%499 = insertelement <4 x float> %498, float %temp45.0, i32 1
%500 = insertelement <4 x float> %499, float %temp46.0, i32 2
%501 = insertelement <4 x float> %500, float %497, i32 3
%502 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %501)
%503 = extractelement <4 x float> %502, i32 0
%504 = extractelement <4 x float> %502, i32 1
%505 = extractelement <4 x float> %502, i32 2
%506 = extractelement <4 x float> %502, i32 3
%507 = call float @llvm.fabs.f32(float %505)
%508 = fdiv float 1.000000e+00, %507
%509 = fmul float %503, %508
%510 = fadd float %509, 1.500000e+00
%511 = fmul float %504, %508
%512 = fadd float %511, 1.500000e+00
%513 = bitcast float %512 to i32
%514 = bitcast float %510 to i32
%515 = bitcast float %506 to i32
%516 = bitcast float %497 to i32
%517 = insertelement <4 x i32> undef, i32 %513, i32 0
%518 = insertelement <4 x i32> %517, i32 %514, i32 1
%519 = insertelement <4 x i32> %518, i32 %515, i32 2
%520 = insertelement <4 x i32> %519, i32 %516, i32 3
%521 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %520, <32 x i8> %88, <16 x i8> %91, i32 4)
%522 = extractelement <4 x float> %521, i32 0
%523 = extractelement <4 x float> %521, i32 1
%524 = extractelement <4 x float> %521, i32 2
%525 = extractelement <4 x float> %521, i32 3
%526 = call float @llvm.pow.f32(float %525, float %65)
%527 = fmul float %64, %526
%528 = fmul float %527, %522
%529 = fmul float %527, %523
%530 = fmul float %527, %524
%531 = call float @llvm.AMDGPU.lrp(float %50, float %299, float %528)
%532 = call float @llvm.AMDGPU.lrp(float %50, float %300, float %529)
%533 = call float @llvm.AMDGPU.lrp(float %50, float %301, float %530)
br label %ENDIF81
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000
v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001
v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100
v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500
v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900
v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901
v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00
v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01
v_interp_p1_f32 v7, v0, 0, 3, [m0] ; C81C0C00
v_interp_p2_f32 v7, [v7], v1, 0, 3, [m0] ; C81D0C01
v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00
v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01
v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00
v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01
v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00
v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01
v_mul_f32_e32 v4, v2, v2 ; 10080502
v_mac_f32_e32 v4, v3, v3 ; 3E080703
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v8, v4 ; 7E105904
v_mul_f32_e32 v4, v15, v15 ; 10081F0F
v_mac_f32_e32 v4, v16, v16 ; 3E082110
v_mac_f32_e32 v4, v17, v17 ; 3E082311
v_rsq_clamp_f32_e32 v18, v4 ; 7E245904
v_mul_f32_e32 v5, v8, v2 ; 100A0508
v_mul_f32_e32 v4, v8, v3 ; 10080708
v_mul_f32_e32 v3, v8, v6 ; 10060D08
v_mul_f32_e32 v9, v18, v15 ; 10121F12
v_mul_f32_e32 v8, v18, v16 ; 10102112
v_mul_f32_e32 v2, v9, v5 ; 10040B09
v_mac_f32_e32 v2, v8, v4 ; 3E040908
v_mul_f32_e32 v6, v18, v17 ; 100C2312
v_mac_f32_e32 v2, v6, v3 ; 3E040706
v_mul_f32_e32 v19, v5, v2 ; 10260505
v_mac_f32_e32 v19, v5, v2 ; 3E260505
v_mul_f32_e32 v21, v4, v2 ; 102A0504
v_mac_f32_e32 v21, v4, v2 ; 3E2A0504
v_mad_f32 v20, v15, v18, -v19 ; D2820014 844E250F
v_mad_f32 v21, v16, v18, -v21 ; D2820015 84562510
v_mul_f32_e32 v15, v3, v2 ; 101E0503
v_mac_f32_e32 v15, v3, v2 ; 3E1E0503
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_mad_f32 v22, v17, v18, -v15 ; D2820016 843E2511
v_interp_p1_f32 v24, v0, 0, 4, [m0] ; C8601000
v_interp_p2_f32 v24, [v24], v1, 0, 4, [m0] ; C8611001
s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508
s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710
v_interp_p1_f32 v23, v0, 1, 4, [m0] ; C85C1100
v_interp_p2_f32 v23, [v23], v1, 1, 4, [m0] ; C85D1101
v_interp_p1_f32 v25, v0, 2, 4, [m0] ; C8641200
v_interp_p2_f32 v25, [v25], v1, 2, 4, [m0] ; C8651201
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s20, s[8:11], 0x4c ; C20A094C
s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D
s_buffer_load_dword s22, s[8:11], 0x4e ; C20B094E
s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C
s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718
image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[0:3] ; F0800700 0003000D
s_buffer_load_dword s0, s[8:11], 0x41 ; C2000941
s_buffer_load_dword s1, s[8:11], 0x42 ; C2008942
s_buffer_load_dword s28, s[8:11], 0x54 ; C20E0954
s_buffer_load_dword s2, s[8:11], 0x40 ; C2010940
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v17, s20, v0 ; 10220014
v_mul_f32_e32 v18, s21, v1 ; 10240215
v_mul_f32_e32 v19, s22, v2 ; 10260416
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B
s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C
s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D
s_buffer_load_dword s3, s[8:11], 0x58 ; C2018958
v_sub_f32_e64 v0, 1.0, s28 ; D2080000 000038F2
v_mul_f32_e32 v2, s2, v0 ; 10040002
v_mul_f32_e32 v1, s0, v0 ; 10020000
v_mul_f32_e32 v0, s1, v0 ; 10000001
v_mac_f32_e32 v2, s28, v17 ; 3E04221C
v_mov_b32_e32 v26, v20 ; 7E340314
v_mac_f32_e32 v1, s28, v18 ; 3E02241C
v_mov_b32_e32 v27, v21 ; 7E360315
v_mac_f32_e32 v0, s28, v19 ; 3E00261C
v_mov_b32_e32 v28, v22 ; 7E380316
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[0:1], 0, s13 ; D0020000 00001A80
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[24:27] ; F0800F00 00C80D0D
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[20:21], s[0:1] ; BE942400
s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s0, s[8:11], 0x20 ; C2000920
s_buffer_load_dword s1, s[8:11], 0x21 ; C2008921
s_buffer_load_dword s2, s[8:11], 0x22 ; C2010922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925
v_mul_f32_e32 v13, v20, v20 ; 101A2914
v_mac_f32_e32 v13, v21, v21 ; 3E1A2B15
v_mac_f32_e32 v13, v22, v22 ; 3E1A2D16
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926
s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928
s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929
s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A
v_mul_f32_e32 v15, v13, v20 ; 101E290D
v_mul_f32_e32 v16, v13, v21 ; 10202B0D
v_mul_f32_e32 v13, v13, v22 ; 101A2D0D
v_rcp_f32_e32 v26, v15 ; 7E34550F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v27, s0, v24 ; 08363000
v_sub_f32_e32 v28, s1, v23 ; 08382E01
v_rcp_f32_e32 v29, v16 ; 7E3A5510
v_mul_f32_e32 v27, v26, v27 ; 1036371A
v_sub_f32_e32 v30, s13, v24 ; 083C300D
v_mul_f32_e32 v26, v26, v30 ; 10343D1A
v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80
v_cndmask_b32_e32 v26, v26, v27 ; 0034371A
v_rcp_f32_e32 v27, v13 ; 7E36550D
v_mul_f32_e32 v28, v29, v28 ; 1038391D
v_sub_f32_e32 v30, s14, v23 ; 083C2E0E
v_mul_f32_e32 v29, v29, v30 ; 103A3D1D
v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080
v_cndmask_b32_e32 v28, v29, v28 ; 0038391D
v_sub_f32_e32 v29, s2, v25 ; 083A3202
v_mul_f32_e32 v29, v27, v29 ; 103A3B1B
v_sub_f32_e32 v30, s15, v25 ; 083C320F
v_mul_f32_e32 v27, v27, v30 ; 10363D1B
v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80
v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B
v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A
v_mov_b32_e32 v27, s13 ; 7E36020D
v_add_f32_e32 v27, s0, v27 ; 06363600
v_mov_b32_e32 v28, s14 ; 7E38020E
v_add_f32_e32 v28, s1, v28 ; 06383801
v_mov_b32_e32 v29, s15 ; 7E3A020F
v_add_f32_e32 v29, s2, v29 ; 063A3A02
v_mad_f32 v30, 0.5, v27, -s16 ; D282001E 804236F0
v_add_f32_e32 v30, v24, v30 ; 063C3D18
v_mac_f32_e32 v30, v26, v15 ; 3E3C1F1A
v_mad_f32 v15, 0.5, v28, -s17 ; D282000F 804638F0
v_add_f32_e32 v15, v23, v15 ; 061E1F17
v_mac_f32_e32 v15, v26, v16 ; 3E1E211A
v_mad_f32 v16, 0.5, v29, -s18 ; D2820010 804A3AF0
v_add_f32_e32 v16, v25, v16 ; 06202119
v_mac_f32_e32 v16, v26, v13 ; 3E201B1A
v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0
v_mad_f32 v27, 0.5, -v28, v15 ; D282001B 443E38F0
v_mad_f32 v28, 0.5, -v29, v16 ; D282001C 44423AF0
s_or_b64 exec, exec, s[20:21] ; 88FE147E
s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917
s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943
s_buffer_load_dword s13, s[8:11], 0x60 ; C2068960
s_buffer_load_dword s0, s[8:11], 0x0 ; C2000900
s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901
s_buffer_load_dword s2, s[8:11], 0x2 ; C2010902
s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904
s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905
s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906
s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907
s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908
s_buffer_load_dword s21, s[8:11], 0x9 ; C20A8909
s_buffer_load_dword s22, s[8:11], 0xa ; C20B090A
s_buffer_load_dword s23, s[8:11], 0xb ; C20B890B
s_buffer_load_dword s24, s[8:11], 0xc ; C20C090C
s_buffer_load_dword s25, s[8:11], 0xd ; C20C890D
s_buffer_load_dword s26, s[8:11], 0xe ; C20D090E
s_buffer_load_dword s27, s[8:11], 0xf ; C20D890F
v_sub_f32_e64 v13, 1.0, s3 ; D208000D 000006F2
v_log_f32_e32 v13, v13 ; 7E1A4F0D
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_mul_legacy_f32_e32 v13, 0x3f400000, v13 ; 0E1A1AFF 3F400000
v_exp_f32_e32 v13, v13 ; 7E1A4B0D
v_mul_f32_e32 v29, 0x40e00000, v13 ; 103A1AFF 40E00000
v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A
v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A
v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A
v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A
v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000
v_rcp_f32_e64 v13, |v32| ; D354010D 00000120
v_mad_f32 v27, v13, v30, v26 ; D282001B 046A3D0D
v_mac_f32_e32 v26, v13, v31 ; 3E343F0D
v_mov_b32_e32 v28, v33 ; 7E380321
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v13, v29 ; 7E1A4F1D
v_mul_legacy_f32_e32 v13, s30, v13 ; 0E1A1A1E
v_exp_f32_e32 v13, v13 ; 7E1A4B0D
v_mul_f32_e32 v13, s29, v13 ; 101A1A1D
v_mul_f32_e32 v16, v26, v13 ; 10201B1A
v_mul_f32_e32 v15, v27, v13 ; 101E1B1B
v_mul_f32_e32 v13, v28, v13 ; 101A1B1C
v_mov_b32_e32 v26, s28 ; 7E34021C
v_mov_b32_e32 v27, 0x3f7fff58 ; 7E3602FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v27 ; 7C02360C
s_and_saveexec_b64 s[28:29], vcc ; BE9C246A
s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B
s_buffer_load_dword s30, s[8:11], 0x3c ; C20F093C
s_buffer_load_dword s31, s[8:11], 0x3d ; C20F893D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080
s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936
s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938
s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939
s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A
s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930
s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931
s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932
s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934
s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935
v_mul_f32_e32 v27, v20, v20 ; 10362914
v_mac_f32_e32 v27, v21, v21 ; 3E362B15
v_mac_f32_e32 v27, v22, v22 ; 3E362D16
v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v28, s34, v25 ; 08383222
v_mov_b32_e32 v29, s34 ; 7E3A0222
v_sub_f32_e32 v30, s38, v24 ; 083C3026
v_sub_f32_e32 v31, s39, v23 ; 083E2E27
v_add_f32_e32 v29, s40, v29 ; 063A3A28
v_sub_f32_e32 v32, s40, v25 ; 08403228
v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0
v_add_f32_e32 v25, v25, v33 ; 06324319
v_mul_f32_e32 v20, v27, v20 ; 1028291B
v_mul_f32_e32 v21, v27, v21 ; 102A2B1B
v_mul_f32_e32 v22, v27, v22 ; 102C2D1B
v_rcp_f32_e32 v27, v20 ; 7E365514
v_rcp_f32_e32 v33, v21 ; 7E425515
v_rcp_f32_e32 v34, v22 ; 7E445516
v_sub_f32_e32 v35, s41, v24 ; 08463029
v_mov_b32_e32 v36, s41 ; 7E480229
v_add_f32_e32 v36, s38, v36 ; 06484826
v_mul_f32_e32 v30, v27, v30 ; 103C3D1B
v_mul_f32_e32 v27, v27, v35 ; 1036471B
v_mul_f32_e32 v31, v33, v31 ; 103E3F21
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v28, v34, v28 ; 10383922
v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0
v_add_f32_e32 v24, v24, v34 ; 06304518
v_sub_f32_e32 v34, s42, v23 ; 08442E2A
v_mov_b32_e32 v35, s42 ; 7E46022A
v_mul_f32_e32 v33, v33, v34 ; 10424521
v_add_f32_e32 v34, s39, v35 ; 06444627
v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880
v_cndmask_b32_e32 v27, v27, v30 ; 00363D1B
v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80
v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21
v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80
v_cndmask_b32_e32 v28, v28, v32 ; 0038411C
v_min3_f32 v27, v27, v30, v28 ; D2A2001B 04723D1B
v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0
v_add_f32_e32 v23, v23, v28 ; 062E3917
v_mac_f32_e32 v24, v27, v20 ; 3E30291B
v_mac_f32_e32 v23, v27, v21 ; 3E2E2B1B
v_mac_f32_e32 v25, v27, v22 ; 3E322D1B
v_mad_f32 v20, 0.5, -v36, v24 ; D2820014 446248F0
v_mad_f32 v21, 0.5, -v34, v23 ; D2820015 445E44F0
v_mad_f32 v22, 0.5, -v29, v25 ; D2820016 44663AF0
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_sub_f32_e64 v23, 1.0, s3 ; D2080017 000006F2
v_log_f32_e32 v23, v23 ; 7E2E4F17
s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504
v_mul_legacy_f32_e32 v23, 0x3f400000, v23 ; 0E2E2EFF 3F400000
v_exp_f32_e32 v23, v23 ; 7E2E4B17
v_mul_f32_e32 v23, 0x40e00000, v23 ; 102E2EFF 40E00000
v_cubeid_f32 v30, v20, v21, v22 ; D288001E 045A2B14
v_cubema_f32 v29, v20, v21, v22 ; D28E001D 045A2B14
s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708
v_cubesc_f32 v28, v20, v21, v22 ; D28A001C 045A2B14
v_cubetc_f32 v27, v20, v21, v22 ; D28C001B 045A2B14
v_rcp_f32_e64 v22, |v29| ; D3540116 0000011D
v_mov_b32_e32 v20, 0x3fc00000 ; 7E2802FF 3FC00000
v_mad_f32 v21, v22, v27, v20 ; D2820015 04523716
v_mac_f32_e32 v20, v22, v28 ; 3E283916
v_mov_b32_e32 v22, v30 ; 7E2C031E
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[36:43], s[32:35] ; F0900F00 01091414
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v23, v23 ; 7E2E4F17
v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2
v_mul_legacy_f32_e32 v23, s31, v23 ; 0E2E2E1F
v_exp_f32_e32 v23, v23 ; 7E2E4B17
v_mul_f32_e32 v23, s30, v23 ; 102E2E1E
v_mul_f32_e32 v20, v20, v23 ; 10282F14
v_mul_f32_e32 v21, v21, v23 ; 102A2F15
v_mul_f32_e32 v22, v22, v23 ; 102C2F16
v_mul_f32_e32 v20, v20, v24 ; 10283114
v_mul_f32_e32 v21, v21, v24 ; 102A3115
v_mul_f32_e32 v22, v22, v24 ; 102C3116
v_mac_f32_e32 v20, s12, v16 ; 3E28200C
v_mac_f32_e32 v21, s12, v15 ; 3E2A1E0C
v_mac_f32_e32 v22, s12, v13 ; 3E2C1A0C
v_mov_b32_e32 v13, v22 ; 7E1A0316
v_mov_b32_e32 v15, v21 ; 7E1E0315
v_mov_b32_e32 v16, v20 ; 7E200314
s_or_b64 exec, exec, s[28:29] ; 88FE1C7E
v_mad_f32 v22, -v26, s15, s15 ; D2820016 203C1F1A
v_mov_b32_e32 v20, s14 ; 7E28020E
v_mul_f32_e32 v21, v22, v17 ; 102A2316
v_mul_f32_e32 v18, v22, v18 ; 10242516
v_mul_f32_e32 v17, v22, v19 ; 10222716
v_mul_f32_e32 v19, s17, v4 ; 10260811
v_mac_f32_e32 v19, s16, v5 ; 3E260A10
v_mac_f32_e32 v19, s18, v3 ; 3E260612
v_add_f32_e32 v19, s20, v19 ; 06262614
v_add_f32_e32 v23, v19, v10 ; 062E1513
v_mul_f32_e32 v10, s21, v4 ; 10140815
v_mac_f32_e32 v10, s19, v5 ; 3E140A13
v_mac_f32_e32 v10, s22, v3 ; 3E140616
v_add_f32_e32 v10, s23, v10 ; 06141417
v_add_f32_e32 v11, v10, v11 ; 0616170A
v_mul_f32_e32 v10, s25, v4 ; 10140819
v_mac_f32_e32 v10, s24, v5 ; 3E140A18
v_mac_f32_e32 v10, s26, v3 ; 3E14061A
v_add_f32_e32 v10, s27, v10 ; 0614141B
v_add_f32_e32 v12, v10, v12 ; 0618190A
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916
s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944
s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945
s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946
s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948
s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_sub_f32_e64 v19, 1.0, s13 ; D2080013 00001AF2
v_mac_f32_e32 v19, s13, v14 ; 3E261C0D
v_mul_f32_e32 v10, s0, v5 ; 10140A00
v_mac_f32_e32 v10, s1, v4 ; 3E140801
v_mac_f32_e32 v10, s2, v3 ; 3E140602
v_max_f32_e32 v10, 0, v10 ; 20141480
v_mul_f32_e32 v14, v19, v23 ; 101C2F13
v_mul_f32_e32 v11, v19, v11 ; 10161713
v_mul_f32_e32 v12, v19, v12 ; 10181913
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v20, s17, v7 ; 3E280E11
v_mul_f32_e32 v7, v19, v16 ; 100E2113
v_mul_f32_e32 v15, v19, v15 ; 101E1F13
v_mul_f32_e32 v13, v19, v13 ; 101A1B13
v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2
v_add_f32_e32 v16, s3, v16 ; 06202003
v_sub_f32_e64 v19, 1.0, s3 ; D2080013 000006F2
v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080
v_sub_f32_e32 v22, s0, v9 ; 082C1200
v_sub_f32_e32 v23, s1, v8 ; 082E1001
v_mul_f32_e32 v24, v22, v22 ; 10302D16
v_mac_f32_e32 v24, v23, v23 ; 3E302F17
v_sub_f32_e32 v25, s2, v6 ; 08320C02
v_mac_f32_e32 v24, v25, v25 ; 3E303319
v_rsq_clamp_f32_e32 v24, v24 ; 7E305918
v_mul_f32_e32 v22, v24, v22 ; 102C2D18
v_mul_f32_e32 v23, v24, v23 ; 102E2F18
v_mul_f32_e32 v24, v24, v25 ; 10303318
v_mul_f32_e32 v9, v9, v5 ; 10120B09
v_mad_f32 v8, -v8, v4, -v9 ; D2820008 A4260908
v_mul_f32_e32 v5, v22, v5 ; 100A0B16
v_mac_f32_e32 v5, v23, v4 ; 3E0A0917
v_mul_f32_e32 v4, s0, v22 ; 10082C00
v_mac_f32_e32 v4, s1, v23 ; 3E082E01
v_mad_f32 v6, -v6, v3, v8 ; D2820006 24220706
v_mac_f32_e32 v4, s2, v24 ; 3E083002
v_mac_f32_e32 v5, v24, v3 ; 3E0A0718
v_max_f32_e32 v3, 0, v4 ; 20060880
v_sub_f32_e32 v4, 1.0, v3 ; 080806F2
v_mul_f32_e32 v8, v4, v4 ; 10100904
v_mul_f32_e32 v4, v4, v8 ; 10081104
v_mul_f32_e32 v4, v4, v8 ; 10081104
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_sub_f32_e32 v8, 1.0, v6 ; 08100CF2
v_mul_f32_e32 v9, v8, v8 ; 10121108
v_mul_f32_e32 v22, v8, v9 ; 102C1308
v_mad_f32 v23, -v9, v22, 1.0 ; D2820017 23CA2D09
v_mul_f32_e32 v24, v2, v23 ; 10302F02
v_sub_f32_e32 v25, 1.0, v2 ; 083204F2
v_mac_f32_e32 v2, v4, v25 ; 3E043304
v_mul_f32_e32 v25, v1, v23 ; 10322F01
v_sub_f32_e32 v26, 1.0, v1 ; 083402F2
v_mac_f32_e32 v1, v4, v26 ; 3E023504
v_mul_f32_e32 v23, v0, v23 ; 102E2F00
v_sub_f32_e32 v26, 1.0, v0 ; 083400F2
v_mac_f32_e32 v0, v4, v26 ; 3E003504
v_sub_f32_e32 v4, 1.0, v19 ; 080826F2
v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F
v_madmk_f32_e32 v4, v4, v26, 0x3f77ced9 ; 40083504 3F77CED9
v_add_f32_e32 v26, v3, v3 ; 06340703
v_mul_f32_e32 v3, v19, v3 ; 10060713
v_mad_f32 v3, v26, v3, 0.5 ; D2820003 03C2071A
v_mul_f32_e32 v9, v22, v9 ; 10121316
v_mac_f32_e32 v24, v16, v9 ; 3E301310
v_mac_f32_e32 v25, v16, v9 ; 3E321310
v_mac_f32_e32 v23, v16, v9 ; 3E2E1310
v_mul_f32_e32 v16, v19, v19 ; 10202713
v_log_f32_e32 v4, v4 ; 7E084F04
v_mul_f32_e32 v16, s8, v16 ; 10202008
v_mul_f32_e32 v8, v16, v8 ; 10101110
v_mac_f32_e32 v8, 1.0, v6 ; 3E100CF2
v_rcp_f32_e32 v4, v4 ; 7E085504
v_sub_f32_e32 v6, 1.0, v10 ; 080C14F2
v_mul_f32_e32 v16, v16, v6 ; 10200D10
v_mac_f32_e32 v16, 1.0, v10 ; 3E2014F2
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_madak_f32_e32 v8, v16, v8, 0x38d1b717 ; 42101110 38D1B717
v_mul_f32_e32 v4, 0x41200000, v4 ; 100808FF 41200000
v_mul_f32_e32 v16, v4, v4 ; 10200904
v_mul_legacy_f32_e32 v5, v16, v5 ; 0E0A0B10
v_rcp_f32_e32 v8, v8 ; 7E105508
v_mad_f32 v4, v4, v4, 1.0 ; D2820004 03CA0904
v_mul_f32_e32 v4, s16, v4 ; 10080810
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mul_f32_e32 v4, v4, v8 ; 10081104
v_mul_f32_e32 v4, v10, v4 ; 1008090A
v_mul_f32_e32 v4, s15, v4 ; 1008080F
v_mul_f32_e32 v5, v6, v6 ; 100A0D06
v_mul_f32_e32 v6, v6, v5 ; 100C0B06
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_add_f32_e32 v3, -1.0, v3 ; 060606F3
v_mad_f32 v5, v3, v5, 1.0 ; D2820005 03CA0B03
v_mad_f32 v3, v3, v9, 1.0 ; D2820003 03CA1303
v_mul_f32_e32 v3, v3, v5 ; 10060B03
v_mul_f32_e32 v3, v10, v3 ; 1006070A
v_mac_f32_e32 v14, s14, v3 ; 3E1C060E
v_mul_f32_e32 v5, v14, v21 ; 100A2B0E
v_max_f32_e32 v4, 0, v4 ; 20080880
v_mul_f32_e32 v6, s14, v4 ; 100C080E
v_mac_f32_e32 v5, v2, v6 ; 3E0A0D02
v_mac_f32_e32 v11, s7, v3 ; 3E160607
v_mac_f32_e32 v12, s12, v3 ; 3E18060C
v_mul_f32_e32 v2, s7, v4 ; 10040807
v_mul_f32_e32 v3, s12, v4 ; 1006080C
v_mul_f32_e32 v4, v11, v18 ; 1008250B
v_mul_f32_e32 v6, v12, v17 ; 100C230C
v_mac_f32_e32 v4, v1, v2 ; 3E080501
v_mac_f32_e32 v6, v0, v3 ; 3E0C0700
v_mac_f32_e32 v5, v24, v7 ; 3E0A0F18
v_mac_f32_e32 v4, v25, v15 ; 3E081F19
v_mac_f32_e32 v6, v23, v13 ; 3E0C1B17
v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s6, v1 ; 10040206
v_mac_f32_e32 v2, v5, v0 ; 3E040105
v_mul_f32_e32 v3, s5, v1 ; 10060205
v_mac_f32_e32 v3, v4, v0 ; 3E060104
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v6, v0 ; 3E020106
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 40
Code Size: 2112 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[6], IN[0].xxxx
1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[10].xxxx
18: MOV TEMP[3].y, CONST[11].xxxx
19: MOV TEMP[3].z, CONST[12].xxxx
20: MOV TEMP[4].x, CONST[10].yyyy
21: MOV TEMP[4].y, CONST[11].yyyy
22: MOV TEMP[4].z, CONST[12].yyyy
23: MOV TEMP[5].x, CONST[10].zzzz
24: MOV TEMP[5].y, CONST[11].zzzz
25: MOV TEMP[5].z, CONST[12].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MOV TEMP[4].xyz, TEMP[3].xyzx
33: MUL TEMP[5].xyw, TEMP[1], IMM[0].yyyy
34: MOV TEMP[6].x, TEMP[5].xxxx
35: MUL TEMP[7].x, TEMP[5].yyyy, CONST[1].xxxx
36: MOV TEMP[6].y, TEMP[7].xxxx
37: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[5].wwww
38: MOV TEMP[5].zw, TEMP[1].wwzw
39: MUL TEMP[6], TEMP[3].xyzz, TEMP[3].yzzx
40: DP4 TEMP[7].x, CONST[2], TEMP[6]
41: DP4 TEMP[8].x, CONST[3], TEMP[6]
42: MOV TEMP[7].y, TEMP[8].xxxx
43: DP4 TEMP[6].x, CONST[4], TEMP[6]
44: MOV TEMP[7].z, TEMP[6].xxxx
45: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[3].yyyy
46: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[6].xxxx
47: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[7].xyzz
48: ADD TEMP[6].xyz, TEMP[0].xyzz, -CONST[0].xyzz
49: MOV TEMP[6].yzw, TEMP[6].yxyz
50: MOV TEMP[6].x, TEMP[1].zzzz
51: MOV TEMP[0].xyz, TEMP[0].xyzx
52: MOV OUT[6], TEMP[0]
53: MOV OUT[1], TEMP[2]
54: MOV OUT[2], TEMP[4]
55: MOV OUT[3], TEMP[3]
56: MOV OUT[4], TEMP[5]
57: MOV OUT[0], TEMP[1]
58: MOV OUT[5], TEMP[6]
59: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = add i32 %5, %7
%105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = fmul float %32, %85
%109 = fmul float %33, %85
%110 = fmul float %34, %85
%111 = fmul float %35, %85
%112 = fmul float %36, %86
%113 = fadd float %112, %108
%114 = fmul float %37, %86
%115 = fadd float %114, %109
%116 = fmul float %38, %86
%117 = fadd float %116, %110
%118 = fmul float %39, %86
%119 = fadd float %118, %111
%120 = fmul float %40, %87
%121 = fadd float %120, %113
%122 = fmul float %41, %87
%123 = fadd float %122, %115
%124 = fmul float %42, %87
%125 = fadd float %124, %117
%126 = fmul float %43, %87
%127 = fadd float %126, %119
%128 = fmul float %44, %88
%129 = fadd float %128, %121
%130 = fmul float %45, %88
%131 = fadd float %130, %123
%132 = fmul float %46, %88
%133 = fadd float %132, %125
%134 = fmul float %65, %85
%135 = fmul float %66, %85
%136 = fmul float %67, %85
%137 = fmul float %68, %85
%138 = fmul float %69, %86
%139 = fadd float %138, %134
%140 = fmul float %70, %86
%141 = fadd float %140, %135
%142 = fmul float %71, %86
%143 = fadd float %142, %136
%144 = fmul float %72, %86
%145 = fadd float %144, %137
%146 = fmul float %73, %87
%147 = fadd float %146, %139
%148 = fmul float %74, %87
%149 = fadd float %148, %141
%150 = fmul float %75, %87
%151 = fadd float %150, %143
%152 = fmul float %76, %87
%153 = fadd float %152, %145
%154 = fmul float %77, %88
%155 = fadd float %154, %147
%156 = fmul float %78, %88
%157 = fadd float %156, %149
%158 = fmul float %79, %88
%159 = fadd float %158, %151
%160 = fmul float %80, %88
%161 = fadd float %160, %153
%162 = fmul float %100, %56
%163 = fadd float %162, %58
%164 = fmul float %101, %57
%165 = fadd float %164, %59
%166 = fcmp oeq float %64, 0.000000e+00
%. = select i1 %166, float %100, float %106
%.36 = select i1 %166, float %101, float %107
%167 = fmul float %., %60
%168 = fadd float %167, %62
%169 = fmul float %.36, %61
%170 = fadd float %169, %63
%171 = fmul float %47, %93
%172 = fmul float %50, %93
%173 = fmul float %53, %93
%174 = fmul float %48, %94
%175 = fadd float %174, %171
%176 = fmul float %51, %94
%177 = fadd float %176, %172
%178 = fmul float %54, %94
%179 = fadd float %178, %173
%180 = fmul float %49, %95
%181 = fadd float %180, %175
%182 = fmul float %52, %95
%183 = fadd float %182, %177
%184 = fmul float %55, %95
%185 = fadd float %184, %179
%186 = fmul float %181, %181
%187 = fmul float %183, %183
%188 = fadd float %187, %186
%189 = fmul float %185, %185
%190 = fadd float %188, %189
%191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190)
%192 = fmul float %181, %191
%193 = fmul float %183, %191
%194 = fmul float %185, %191
%195 = fmul float %155, 5.000000e-01
%196 = fmul float %157, 5.000000e-01
%197 = fmul float %161, 5.000000e-01
%198 = fmul float %196, %16
%199 = fadd float %195, %197
%200 = fadd float %198, %197
%201 = fmul float %192, %193
%202 = fmul float %193, %194
%203 = fmul float %194, %194
%204 = fmul float %194, %192
%205 = fmul float %17, %201
%206 = fmul float %18, %202
%207 = fadd float %205, %206
%208 = fmul float %19, %203
%209 = fadd float %207, %208
%210 = fmul float %20, %204
%211 = fadd float %209, %210
%212 = fmul float %21, %201
%213 = fmul float %22, %202
%214 = fadd float %212, %213
%215 = fmul float %23, %203
%216 = fadd float %214, %215
%217 = fmul float %24, %204
%218 = fadd float %216, %217
%219 = fmul float %25, %201
%220 = fmul float %26, %202
%221 = fadd float %219, %220
%222 = fmul float %27, %203
%223 = fadd float %221, %222
%224 = fmul float %28, %204
%225 = fadd float %223, %224
%226 = fmul float %193, %193
%227 = fmul float %192, %192
%228 = fsub float %227, %226
%229 = fmul float %29, %228
%230 = fadd float %229, %211
%231 = fmul float %30, %228
%232 = fadd float %231, %218
%233 = fmul float %31, %228
%234 = fadd float %233, %225
%235 = fsub float %129, %13
%236 = fsub float %131, %14
%237 = fsub float %133, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %163, float %165, float %168, float %170)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %192, float %193, float %194, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %230, float %232, float %234, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %159, float %161)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %159, float %235, float %236, float %237)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %129, float %131, float %133, float %127)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %155, float %157, float %159, float %161)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s19, s[20:23], 0x23 ; C2099523
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s24, s[20:23], 0x24 ; C20C1524
s_buffer_load_dword s25, s[20:23], 0x25 ; C20C9525
s_buffer_load_dword s26, s[20:23], 0x26 ; C20D1526
s_buffer_load_dword s27, s[20:23], 0x28 ; C20D9528
s_buffer_load_dword s28, s[20:23], 0x29 ; C20E1529
s_buffer_load_dword s29, s[20:23], 0x2a ; C20E952A
s_buffer_load_dword s30, s[20:23], 0x2c ; C20F152C
s_buffer_load_dword s31, s[20:23], 0x2d ; C20F952D
s_buffer_load_dword s32, s[20:23], 0x2e ; C210152E
s_buffer_load_dword s33, s[20:23], 0x30 ; C2109530
s_buffer_load_dword s34, s[20:23], 0x31 ; C2111531
s_buffer_load_dword s35, s[20:23], 0x32 ; C2119532
s_buffer_load_dword s36, s[20:23], 0x38 ; C2121538
s_buffer_load_dword s37, s[20:23], 0x39 ; C2129539
s_buffer_load_dword s11, s[20:23], 0x9 ; C2059509
s_buffer_load_dword s5, s[20:23], 0xa ; C202950A
s_buffer_load_dword s3, s[20:23], 0xb ; C201950B
s_buffer_load_dword s9, s[20:23], 0xc ; C204950C
s_buffer_load_dword s12, s[20:23], 0xd ; C206150D
s_buffer_load_dword s7, s[20:23], 0xe ; C203950E
s_buffer_load_dword s4, s[20:23], 0xf ; C202150F
s_buffer_load_dword s10, s[20:23], 0x10 ; C2051510
s_buffer_load_dword s13, s[20:23], 0x11 ; C2069511
s_buffer_load_dword s8, s[20:23], 0x12 ; C2041512
s_buffer_load_dword s0, s[20:23], 0x3f ; C200153F
s_buffer_load_dword s1, s[20:23], 0x40 ; C2009540
s_buffer_load_dword s38, s[20:23], 0x44 ; C2131544
s_buffer_load_dword s39, s[20:23], 0x45 ; C2139545
s_buffer_load_dword s40, s[20:23], 0x46 ; C2141546
s_buffer_load_dword s41, s[20:23], 0x47 ; C2149547
s_buffer_load_dword s42, s[20:23], 0x48 ; C2151548
s_buffer_load_dword s43, s[20:23], 0x49 ; C2159549
s_buffer_load_dword s44, s[20:23], 0x4a ; C216154A
s_buffer_load_dword s45, s[20:23], 0x4b ; C216954B
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s0 ; 7E000200
s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500
v_cmp_eq_f32_e64 vcc, 0, s1 ; D004006A 00000280
s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501
s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502
s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504
s_buffer_load_dword s14, s[20:23], 0x8 ; C2071508
s_buffer_load_dword s46, s[20:23], 0x3a ; C217153A
s_buffer_load_dword s47, s[20:23], 0x3b ; C217953B
s_buffer_load_dword s48, s[20:23], 0x3c ; C218153C
s_buffer_load_dword s49, s[20:23], 0x3d ; C218953D
s_buffer_load_dword s50, s[20:23], 0x3e ; C219153E
s_buffer_load_dword s18, s[20:23], 0x13 ; C2091513
s_buffer_load_dword s15, s[20:23], 0x14 ; C2079514
s_buffer_load_dword s16, s[20:23], 0x15 ; C2081515
s_buffer_load_dword s17, s[20:23], 0x16 ; C2089516
s_buffer_load_dword s51, s[20:23], 0x18 ; C2199518
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s46 ; 7E1A022E
s_buffer_load_dword s46, s[20:23], 0x19 ; C2171519
s_buffer_load_dword s52, s[20:23], 0x1a ; C21A151A
s_buffer_load_dword s53, s[20:23], 0x1b ; C21A951B
s_buffer_load_dword s54, s[20:23], 0x1c ; C21B151C
s_buffer_load_dword s55, s[20:23], 0x1d ; C21B951D
s_buffer_load_dword s56, s[20:23], 0x1e ; C21C151E
s_buffer_load_dword s57, s[20:23], 0x1f ; C21C951F
s_buffer_load_dword s58, s[20:23], 0x20 ; C21D1520
s_buffer_load_dword s59, s[20:23], 0x21 ; C21D9521
s_buffer_load_dword s60, s[20:23], 0x22 ; C21E1522
s_buffer_load_dword s61, s[20:23], 0x4c ; C21E954C
s_buffer_load_dword s62, s[20:23], 0x4d ; C21F154D
s_buffer_load_dword s63, s[20:23], 0x4e ; C21F954E
s_buffer_load_dword s64, s[20:23], 0x4f ; C220154F
s_buffer_load_dword s65, s[20:23], 0x50 ; C2209550
s_buffer_load_dword s66, s[20:23], 0x51 ; C2211551
s_buffer_load_dword s67, s[20:23], 0x52 ; C2219552
s_buffer_load_dword s20, s[20:23], 0x53 ; C20A1553
v_mac_f32_e32 v13, s36, v9 ; 3E1A1224
v_mov_b32_e32 v14, s47 ; 7E1C022F
v_mul_f32_e32 v15, s51, v2 ; 101E0433
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v16, s46, v2 ; 1020042E
v_mul_f32_e32 v17, s27, v6 ; 10220C1B
v_mul_f32_e32 v18, s30, v6 ; 10240C1E
v_mul_f32_e32 v6, s33, v6 ; 100C0C21
v_mac_f32_e32 v17, s28, v7 ; 3E220E1C
v_mac_f32_e32 v18, s31, v7 ; 3E240E1F
v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22
v_mac_f32_e32 v17, s29, v8 ; 3E22101D
v_mac_f32_e32 v18, s32, v8 ; 3E241020
v_mac_f32_e32 v6, s35, v8 ; 3E0C1023
v_mul_f32_e32 v7, s52, v2 ; 100E0434
v_mul_f32_e32 v8, s53, v2 ; 10100435
v_mul_f32_e32 v19, s38, v2 ; 10260426
v_mac_f32_e32 v14, s37, v10 ; 3E1C1425
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mac_f32_e32 v15, s54, v3 ; 3E1E0636
v_mac_f32_e32 v16, s55, v3 ; 3E200637
v_mac_f32_e32 v7, s56, v3 ; 3E0E0638
v_mac_f32_e32 v8, s57, v3 ; 3E100639
v_mac_f32_e32 v19, s42, v3 ; 3E26062A
v_mul_f32_e32 v11, s39, v2 ; 10160427
v_mac_f32_e32 v11, s43, v3 ; 3E16062B
v_mul_f32_e32 v12, s40, v2 ; 10180428
v_mac_f32_e32 v12, s44, v3 ; 3E18062C
v_mul_f32_e32 v2, s41, v2 ; 10040429
v_mac_f32_e32 v2, s45, v3 ; 3E04062D
v_mac_f32_e32 v15, s58, v4 ; 3E1E083A
v_mac_f32_e32 v16, s59, v4 ; 3E20083B
v_mac_f32_e32 v7, s60, v4 ; 3E0E083C
v_mac_f32_e32 v8, s19, v4 ; 3E100813
v_mac_f32_e32 v19, s61, v4 ; 3E26083D
v_mac_f32_e32 v11, s62, v4 ; 3E16083E
v_mac_f32_e32 v12, s63, v4 ; 3E18083F
v_mac_f32_e32 v2, s64, v4 ; 3E040840
v_mac_f32_e32 v15, s24, v5 ; 3E1E0A18
v_mac_f32_e32 v16, s25, v5 ; 3E200A19
v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A
v_mac_f32_e32 v19, s65, v5 ; 3E260A41
v_mac_f32_e32 v11, s66, v5 ; 3E160A42
v_mac_f32_e32 v12, s67, v5 ; 3E180A43
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mov_b32_e32 v3, s50 ; 7E060232
v_mul_f32_e32 v4, v17, v17 ; 10082311
v_mac_f32_e32 v4, v18, v18 ; 3E082512
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
v_mac_f32_e32 v3, s48, v9 ; 3E061230
v_mac_f32_e32 v0, s49, v10 ; 3E001431
exp 15, 32, 0, 0, 0, v13, v14, v3, v0 ; F800020F 00030E0D
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v4, v17 ; 10002304
v_mul_f32_e32 v3, v4, v18 ; 10062504
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v5, v4, v3 ; 100A0704
v_mul_f32_e32 v6, s11, v5 ; 100C0A0B
v_mul_f32_e32 v9, s12, v5 ; 10120A0C
v_mul_f32_e32 v5, s13, v5 ; 100A0A0D
v_mul_f32_e32 v10, v3, v0 ; 10140103
v_mac_f32_e32 v6, s14, v10 ; 3E0C140E
v_mac_f32_e32 v9, s9, v10 ; 3E121409
v_mac_f32_e32 v5, s10, v10 ; 3E0A140A
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mac_f32_e32 v6, s5, v10 ; 3E0C1405
v_mac_f32_e32 v9, s7, v10 ; 3E121407
v_mac_f32_e32 v5, s8, v10 ; 3E0A1408
v_mul_f32_e32 v10, v0, v4 ; 10140900
v_mac_f32_e32 v6, s3, v10 ; 3E0C1403
v_mac_f32_e32 v9, s4, v10 ; 3E121404
v_mac_f32_e32 v5, s18, v10 ; 3E0A1412
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mad_f32 v0, v0, v0, -v3 ; D2820000 840E0100
v_mac_f32_e32 v6, s15, v0 ; 3E0C000F
v_mac_f32_e32 v9, s16, v0 ; 3E120010
v_mac_f32_e32 v5, s17, v0 ; 3E0A0011
v_mul_f32_e32 v0, 0.5, v11 ; 100016F0
v_mul_f32_e32 v3, 0.5, v2 ; 100604F0
exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v1, 0.5, v19, v3 ; D2820001 040E26F0
v_mac_f32_e32 v3, s6, v0 ; 3E060006
exp 15, 35, 0, 0, 0, v1, v3, v12, v2 ; F800023F 020C0301
v_subrev_f32_e32 v0, s0, v15 ; 0A001E00
s_waitcnt expcnt(0) ; BF8C070F
v_subrev_f32_e32 v1, s1, v16 ; 0A022001
v_subrev_f32_e32 v3, s2, v7 ; 0A060E02
exp 15, 36, 0, 0, 0, v12, v0, v1, v3 ; F800024F 0301000C
exp 15, 37, 0, 0, 0, v15, v16, v7, v8 ; F800025F 0807100F
exp 15, 12, 0, 1, 0, v19, v11, v12, v2 ; F80008CF 020C0B13
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 20
Code Size: 788 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[21..22]
DCL CONST[24..25]
DCL TEMP[0..19], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000}
IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000}
IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000}
0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[4].yzww, IN[4].yzww
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[4].yzww, TEMP[1].xxxx
6: MOV TEMP[2].xy, IN[0].xyyy
7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz
10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww
11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
13: MOV TEMP[5].xy, IN[0].xyyy
14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D
15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx
16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx
17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
19: MOV TEMP[7].xyz, IMM[0].yyyy
20: MOV TEMP[8].w, IMM[0].xxxx
21: MOV TEMP[8].xyz, TEMP[0].xyzx
22: DP4 TEMP[9].x, CONST[1], TEMP[8]
23: DP4 TEMP[10].x, CONST[2], TEMP[8]
24: MOV TEMP[9].y, TEMP[10].xxxx
25: DP4 TEMP[8].x, CONST[3], TEMP[8]
26: MOV TEMP[9].z, TEMP[8].xxxx
27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz
28: MOV TEMP[9].xy, IN[3].xyyy
29: MOV TEMP[9].w, IN[3].wwww
30: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D
31: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx
32: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
33: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz
34: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz
35: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz
36: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz
37: MOV TEMP[11].xyz, TEMP[10].xyzx
38: FSLT TEMP[12].x, IMM[0].yyyy, CONST[10].wwww
39: UIF TEMP[12].xxxx :0
40: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz
41: RSQ TEMP[12].x, TEMP[12].xxxx
42: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx
43: MOV TEMP[13].xyz, -IN[5].xyzx
44: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz
45: RCP TEMP[15].x, TEMP[12].xxxx
46: RCP TEMP[15].y, TEMP[12].yyyy
47: RCP TEMP[15].z, TEMP[12].zzzz
48: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
49: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz
50: RCP TEMP[15].x, TEMP[12].xxxx
51: RCP TEMP[15].y, TEMP[12].yyyy
52: RCP TEMP[15].z, TEMP[12].zzzz
53: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
54: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[12].xyzz
55: UIF TEMP[15].xxxx :0
56: MOV TEMP[16].x, TEMP[14].xxxx
57: ELSE :0
58: MOV TEMP[16].x, TEMP[13].xxxx
59: ENDIF
60: UIF TEMP[15].yyyy :0
61: MOV TEMP[17].x, TEMP[14].yyyy
62: ELSE :0
63: MOV TEMP[17].x, TEMP[13].yyyy
64: ENDIF
65: UIF TEMP[15].zzzz :0
66: MOV TEMP[14].x, TEMP[14].zzzz
67: ELSE :0
68: MOV TEMP[14].x, TEMP[13].zzzz
69: ENDIF
70: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz
71: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww
72: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
73: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
74: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz
75: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz
76: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
77: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz
78: ENDIF
79: ADD TEMP[12].x, IMM[0].xxxx, -CONST[22].xxxx
80: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx
81: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy
82: MOV TEMP[11].xyz, TEMP[11].xyzz
83: MOV TEMP[11].w, TEMP[12].xxxx
84: TXL TEMP[11], TEMP[11], SAMP[0], CUBE
85: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy
86: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx
87: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz
88: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].zzzz
89: UIF TEMP[12].xxxx :0
90: MOV TEMP[12].xyz, TEMP[10].xyzx
91: FSLT TEMP[13].x, IMM[0].yyyy, CONST[14].wwww
92: UIF TEMP[13].xxxx :0
93: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz
94: RSQ TEMP[13].x, TEMP[13].xxxx
95: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx
96: MOV TEMP[13].xyz, -IN[5].xyzx
97: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz
98: RCP TEMP[15].x, TEMP[10].xxxx
99: RCP TEMP[15].y, TEMP[10].yyyy
100: RCP TEMP[15].z, TEMP[10].zzzz
101: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
102: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz
103: RCP TEMP[15].x, TEMP[10].xxxx
104: RCP TEMP[15].y, TEMP[10].yyyy
105: RCP TEMP[15].z, TEMP[10].zzzz
106: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
107: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[10].xyzz
108: UIF TEMP[15].xxxx :0
109: MOV TEMP[16].x, TEMP[14].xxxx
110: ELSE :0
111: MOV TEMP[16].x, TEMP[13].xxxx
112: ENDIF
113: UIF TEMP[15].yyyy :0
114: MOV TEMP[17].x, TEMP[14].yyyy
115: ELSE :0
116: MOV TEMP[17].x, TEMP[13].yyyy
117: ENDIF
118: UIF TEMP[15].zzzz :0
119: MOV TEMP[14].x, TEMP[14].zzzz
120: ELSE :0
121: MOV TEMP[14].x, TEMP[13].zzzz
122: ENDIF
123: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz
124: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww
125: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
126: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
127: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz
128: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz
129: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
130: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz
131: ENDIF
132: ADD TEMP[10].x, IMM[0].xxxx, -CONST[22].xxxx
133: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx
134: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy
135: MOV TEMP[12].xyz, TEMP[12].xyzz
136: MOV TEMP[12].w, TEMP[10].xxxx
137: TXL TEMP[10], TEMP[12], SAMP[1], CUBE
138: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy
139: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx
140: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz
141: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz
142: ELSE :0
143: MOV TEMP[7].xyz, TEMP[11].xyzx
144: ENDIF
145: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
146: MOV TEMP[1].xyz, -TEMP[1].xyzx
147: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx
148: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz
149: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz
150: RSQ TEMP[11].x, TEMP[11].xxxx
151: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx
152: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
153: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
154: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz
155: MAX TEMP[11].x, IMM[0].yyyy, TEMP[11].xxxx
156: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx
157: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww
158: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx
159: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].xxxx, IMM[2].yyyy
160: LG2 TEMP[13].x, TEMP[13].xxxx
161: RCP TEMP[13].x, TEMP[13].xxxx
162: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx
163: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx
164: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx
165: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
166: MUL TEMP[16].x, IMM[0].zzzz, TEMP[11].xxxx
167: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx
168: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[0].wwww
169: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx
170: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[1].xxxx
171: MOV TEMP[17].xy, IN[0].xyyy
172: TEX TEMP[17].xyz, TEMP[17], SAMP[4], 2D
173: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
174: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx
175: MOV_SAT TEMP[4].x, TEMP[4].xxxx
176: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx
177: MUL TEMP[19].x, TEMP[16].xxxx, TEMP[16].xxxx
178: MUL TEMP[16].x, TEMP[19].xxxx, TEMP[16].xxxx
179: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx
180: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
181: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx
182: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[12].xxxx
183: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].zzzz
184: RCP TEMP[1].x, TEMP[1].xxxx
185: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz
186: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx
187: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx
188: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx
189: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy
190: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx
191: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx
192: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
193: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
194: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
195: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz
196: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[3].xyzz
197: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx
198: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx
199: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx
200: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx
201: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz
202: ADD TEMP[10].x, TEMP[5].xxxx, IMM[2].wwww
203: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
204: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx
205: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx
206: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
207: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx
208: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww
209: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx
210: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx
211: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx
212: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
213: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx
214: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
215: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
216: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
217: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
218: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
219: MAD TEMP[1].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
220: MAD TEMP[0].xyz, TEMP[17].xyzz, CONST[25].xyzz, TEMP[1].xyzz
221: MAD TEMP[1].x, IN[4].xxxx, CONST[5].zzzz, CONST[5].wwww
222: MOV_SAT TEMP[1].x, TEMP[1].xxxx
223: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
224: MOV TEMP[0].xyz, TEMP[0].xyzx
225: MOV TEMP[0].w, IMM[0].xxxx
226: MOV OUT[0], TEMP[0]
227: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 404)
%84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 408)
%85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0
%87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)*
%91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0
%92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)*
%94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0
%95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)*
%97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0
%98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)*
%100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0
%101 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%102 = bitcast <8 x i32> addrspace(2)* %101 to <32 x i8> addrspace(2)*
%103 = load <32 x i8>, <32 x i8> addrspace(2)* %102, align 32, !tbaa !0
%104 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%105 = bitcast <4 x i32> addrspace(2)* %104 to <16 x i8> addrspace(2)*
%106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0
%107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%108 = bitcast <8 x i32> addrspace(2)* %107 to <32 x i8> addrspace(2)*
%109 = load <32 x i8>, <32 x i8> addrspace(2)* %108, align 32, !tbaa !0
%110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%111 = bitcast <4 x i32> addrspace(2)* %110 to <16 x i8> addrspace(2)*
%112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0
%113 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%114 = bitcast <8 x i32> addrspace(2)* %113 to <32 x i8> addrspace(2)*
%115 = load <32 x i8>, <32 x i8> addrspace(2)* %114, align 32, !tbaa !0
%116 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%117 = bitcast <4 x i32> addrspace(2)* %116 to <16 x i8> addrspace(2)*
%118 = load <16 x i8>, <16 x i8> addrspace(2)* %117, align 16, !tbaa !0
%119 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%132 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%133 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%134 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%135 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%136 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%137 = fmul float %121, %121
%138 = fmul float %122, %122
%139 = fadd float %138, %137
%140 = fmul float %123, %123
%141 = fadd float %139, %140
%142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141)
%143 = fmul float %121, %142
%144 = fmul float %122, %142
%145 = fmul float %123, %142
%146 = fmul float %131, %131
%147 = fmul float %132, %132
%148 = fadd float %147, %146
%149 = fmul float %133, %133
%150 = fadd float %148, %149
%151 = call float @llvm.AMDGPU.rsq.clamped.f32(float %150)
%152 = fmul float %131, %151
%153 = fmul float %132, %151
%154 = fmul float %133, %151
%155 = bitcast float %119 to i32
%156 = bitcast float %120 to i32
%157 = insertelement <2 x i32> undef, i32 %155, i32 0
%158 = insertelement <2 x i32> %157, i32 %156, i32 1
%159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %97, <16 x i8> %100, i32 2)
%160 = extractelement <4 x float> %159, i32 0
%161 = extractelement <4 x float> %159, i32 1
%162 = extractelement <4 x float> %159, i32 2
%163 = fmul float %76, %160
%164 = fmul float %77, %161
%165 = fmul float %78, %162
%166 = call float @llvm.AMDGPU.lrp(float %79, float %163, float %66)
%167 = call float @llvm.AMDGPU.lrp(float %79, float %164, float %67)
%168 = call float @llvm.AMDGPU.lrp(float %79, float %165, float %68)
%169 = fmul float %79, %69
%170 = fsub float %69, %169
%171 = fmul float %163, %170
%172 = fmul float %164, %170
%173 = fmul float %165, %170
%174 = bitcast float %119 to i32
%175 = bitcast float %120 to i32
%176 = insertelement <2 x i32> undef, i32 %174, i32 0
%177 = insertelement <2 x i32> %176, i32 %175, i32 1
%178 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %177, <32 x i8> %103, <16 x i8> %106, i32 2)
%179 = extractelement <4 x float> %178, i32 1
%180 = fsub float 1.000000e+00, %81
%181 = fmul float %179, %81
%182 = fadd float %181, %180
%183 = fmul float %143, %24
%184 = fmul float %144, %25
%185 = fadd float %184, %183
%186 = fmul float %145, %26
%187 = fadd float %185, %186
%188 = call float @llvm.maxnum.f32(float %187, float 0.000000e+00)
%189 = fmul float %27, %143
%190 = fmul float %28, %144
%191 = fadd float %189, %190
%192 = fmul float %29, %145
%193 = fadd float %191, %192
%194 = fadd float %193, %30
%195 = fmul float %31, %143
%196 = fmul float %32, %144
%197 = fadd float %195, %196
%198 = fmul float %33, %145
%199 = fadd float %197, %198
%200 = fadd float %199, %34
%201 = fmul float %35, %143
%202 = fmul float %36, %144
%203 = fadd float %201, %202
%204 = fmul float %37, %145
%205 = fadd float %203, %204
%206 = fadd float %205, %38
%207 = fadd float %124, %194
%208 = fadd float %125, %200
%209 = fadd float %126, %206
%210 = fdiv float %127, %129
%211 = fdiv float %128, %129
%212 = bitcast float %210 to i32
%213 = bitcast float %211 to i32
%214 = insertelement <2 x i32> undef, i32 %212, i32 0
%215 = insertelement <2 x i32> %214, i32 %213, i32 1
%216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %115, <16 x i8> %118, i32 2)
%217 = extractelement <4 x float> %216, i32 0
%218 = fmul float %70, %217
%219 = fmul float %71, %217
%220 = fmul float %72, %217
%221 = fmul float %207, %182
%222 = fmul float %208, %182
%223 = fmul float %209, %182
%224 = fmul float %143, %152
%225 = fmul float %144, %153
%226 = fadd float %225, %224
%227 = fmul float %145, %154
%228 = fadd float %226, %227
%229 = fmul float %228, %143
%230 = fmul float %228, %144
%231 = fmul float %228, %145
%232 = fmul float %229, 2.000000e+00
%233 = fmul float %230, 2.000000e+00
%234 = fmul float %231, 2.000000e+00
%235 = fsub float %152, %232
%236 = fsub float %153, %233
%237 = fsub float %154, %234
%238 = fcmp ogt float %51, 0.000000e+00
br i1 %238, label %IF, label %ENDIF
IF: ; preds = %main_body
%239 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%240 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%241 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%242 = fmul float %235, %235
%243 = fmul float %236, %236
%244 = fadd float %243, %242
%245 = fmul float %237, %237
%246 = fadd float %244, %245
%247 = call float @llvm.AMDGPU.rsq.clamped.f32(float %246)
%248 = fmul float %235, %247
%249 = fmul float %236, %247
%250 = fmul float %237, %247
%251 = fsub float %44, %134
%252 = fsub float %45, %135
%253 = fsub float %46, %136
%254 = fdiv float 1.000000e+00, %248
%255 = fdiv float 1.000000e+00, %249
%256 = fdiv float 1.000000e+00, %250
%257 = fmul float %251, %254
%258 = fmul float %252, %255
%259 = fmul float %253, %256
%260 = fsub float %47, %134
%261 = fsub float %48, %135
%262 = fsub float %49, %136
%263 = fdiv float 1.000000e+00, %248
%264 = fdiv float 1.000000e+00, %249
%265 = fdiv float 1.000000e+00, %250
%266 = fmul float %260, %263
%267 = fmul float %261, %264
%268 = fmul float %262, %265
%269 = fcmp ogt float %248, 0.000000e+00
%270 = fcmp ogt float %249, 0.000000e+00
%271 = fcmp ogt float %250, 0.000000e+00
%. = select i1 %269, float %257, float %266
%temp68.0 = select i1 %270, float %258, float %267
%.104 = select i1 %271, float %259, float %268
%272 = fadd float %44, %47
%273 = fadd float %45, %48
%274 = fadd float %46, %49
%275 = fmul float %272, 5.000000e-01
%276 = fmul float %273, 5.000000e-01
%277 = fmul float %274, 5.000000e-01
%278 = call float @llvm.minnum.f32(float %., float %temp68.0)
%279 = call float @llvm.minnum.f32(float %278, float %.104)
%280 = fsub float %275, %241
%281 = fsub float %276, %240
%282 = fsub float %277, %239
%283 = fadd float %280, %134
%284 = fadd float %281, %135
%285 = fadd float %282, %136
%286 = fmul float %248, %279
%287 = fadd float %286, %283
%288 = fmul float %249, %279
%289 = fadd float %288, %284
%290 = fmul float %250, %279
%291 = fadd float %290, %285
%292 = fsub float %287, %275
%293 = fsub float %289, %276
%294 = fsub float %291, %277
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp44.0 = phi float [ %292, %IF ], [ %235, %main_body ]
%temp45.0 = phi float [ %293, %IF ], [ %236, %main_body ]
%temp46.0 = phi float [ %294, %IF ], [ %237, %main_body ]
%295 = fsub float 1.000000e+00, %80
%296 = call float @llvm.pow.f32(float %295, float 7.500000e-01)
%297 = fmul float %296, 7.000000e+00
%298 = insertelement <4 x float> undef, float %temp44.0, i32 0
%299 = insertelement <4 x float> %298, float %temp45.0, i32 1
%300 = insertelement <4 x float> %299, float %temp46.0, i32 2
%301 = insertelement <4 x float> %300, float %297, i32 3
%302 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %301)
%303 = extractelement <4 x float> %302, i32 0
%304 = extractelement <4 x float> %302, i32 1
%305 = extractelement <4 x float> %302, i32 2
%306 = extractelement <4 x float> %302, i32 3
%307 = call float @llvm.fabs.f32(float %305)
%308 = fdiv float 1.000000e+00, %307
%309 = fmul float %303, %308
%310 = fadd float %309, 1.500000e+00
%311 = fmul float %304, %308
%312 = fadd float %311, 1.500000e+00
%313 = bitcast float %312 to i32
%314 = bitcast float %310 to i32
%315 = bitcast float %306 to i32
%316 = bitcast float %297 to i32
%317 = insertelement <4 x i32> undef, i32 %313, i32 0
%318 = insertelement <4 x i32> %317, i32 %314, i32 1
%319 = insertelement <4 x i32> %318, i32 %315, i32 2
%320 = insertelement <4 x i32> %319, i32 %316, i32 3
%321 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %320, <32 x i8> %86, <16 x i8> %88, i32 4)
%322 = extractelement <4 x float> %321, i32 0
%323 = extractelement <4 x float> %321, i32 1
%324 = extractelement <4 x float> %321, i32 2
%325 = extractelement <4 x float> %321, i32 3
%326 = call float @llvm.pow.f32(float %325, float %53)
%327 = fmul float %52, %326
%328 = fmul float %327, %322
%329 = fmul float %327, %323
%330 = fmul float %327, %324
%331 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %331, label %IF90, label %ENDIF89
IF90: ; preds = %ENDIF
%332 = fcmp ogt float %63, 0.000000e+00
br i1 %332, label %IF93, label %ENDIF92
ENDIF89: ; preds = %ENDIF, %ENDIF92
%temp28.0 = phi float [ %574, %ENDIF92 ], [ %328, %ENDIF ]
%temp29.0 = phi float [ %575, %ENDIF92 ], [ %329, %ENDIF ]
%temp30.0 = phi float [ %576, %ENDIF92 ], [ %330, %ENDIF ]
%333 = fmul float %temp28.0, %182
%334 = fmul float %temp29.0, %182
%335 = fmul float %temp30.0, %182
%336 = fsub float 1.000000e+00, %80
%337 = fsub float %24, %152
%338 = fsub float %25, %153
%339 = fsub float %26, %154
%340 = fmul float %337, %337
%341 = fmul float %338, %338
%342 = fadd float %341, %340
%343 = fmul float %339, %339
%344 = fadd float %342, %343
%345 = call float @llvm.AMDGPU.rsq.clamped.f32(float %344)
%346 = fmul float %337, %345
%347 = fmul float %338, %345
%348 = fmul float %339, %345
%349 = fmul float %152, %143
%350 = fsub float -0.000000e+00, %349
%351 = fmul float %153, %144
%352 = fsub float %350, %351
%353 = fmul float %154, %145
%354 = fsub float %352, %353
%355 = call float @llvm.maxnum.f32(float %354, float 0.000000e+00)
%356 = fmul float %24, %346
%357 = fmul float %25, %347
%358 = fadd float %357, %356
%359 = fmul float %26, %348
%360 = fadd float %358, %359
%361 = call float @llvm.maxnum.f32(float %360, float 0.000000e+00)
%362 = fmul float %336, %336
%363 = fmul float %362, %75
%364 = fsub float 1.000000e+00, %336
%365 = fmul float %364, 0x3FEEF9DB20000000
%366 = fadd float %365, 0x3F9EB851E0000000
%367 = call float @llvm.log2.f32(float %366)
%368 = fdiv float 1.000000e+00, %367
%369 = fmul float %368, 1.000000e+01
%370 = fmul float %369, %369
%371 = fsub float 1.000000e+00, %188
%372 = fsub float 1.000000e+00, %355
%373 = fmul float %361, 2.000000e+00
%374 = fmul float %361, %336
%375 = fmul float %373, %374
%376 = fadd float %375, 5.000000e-01
%377 = fsub float 1.000000e+00, %361
%378 = fsub float 1.000000e+00, %355
%379 = bitcast float %119 to i32
%380 = bitcast float %120 to i32
%381 = insertelement <2 x i32> undef, i32 %379, i32 0
%382 = insertelement <2 x i32> %381, i32 %380, i32 1
%383 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %382, <32 x i8> %109, <16 x i8> %112, i32 2)
%384 = extractelement <4 x float> %383, i32 0
%385 = extractelement <4 x float> %383, i32 1
%386 = extractelement <4 x float> %383, i32 2
%387 = fsub float 1.000000e+00, %170
%388 = fadd float %80, %387
%389 = call float @llvm.AMDIL.clamp.(float %388, float 0.000000e+00, float 1.000000e+00)
%390 = fmul float %378, %378
%391 = fmul float %378, %378
%392 = fmul float %391, %378
%393 = fmul float %390, %392
%394 = call float @llvm.AMDGPU.lrp(float %393, float %389, float %166)
%395 = call float @llvm.AMDGPU.lrp(float %393, float %389, float %167)
%396 = call float @llvm.AMDGPU.lrp(float %393, float %389, float %168)
%397 = call float @llvm.AMDGPU.lrp(float %188, float 1.000000e+00, float %363)
%398 = call float @llvm.AMDGPU.lrp(float %355, float 1.000000e+00, float %363)
%399 = fmul float %397, %398
%400 = fadd float %399, 0x3F1A36E2E0000000
%401 = fdiv float 1.000000e+00, %400
%402 = fmul float %143, %346
%403 = fmul float %144, %347
%404 = fadd float %403, %402
%405 = fmul float %145, %348
%406 = fadd float %404, %405
%407 = call float @llvm.maxnum.f32(float %406, float 0.000000e+00)
%408 = call float @llvm.pow.f32(float %407, float %370)
%409 = fadd float %370, 1.000000e+00
%410 = fmul float %409, %74
%411 = fmul float %408, %410
%412 = fmul float %401, %411
%413 = fmul float %412, %188
%414 = fmul float %413, %73
%415 = call float @llvm.maxnum.f32(float %414, float 0.000000e+00)
%416 = fmul float %415, %218
%417 = fmul float %415, %219
%418 = fmul float %415, %220
%419 = fsub float 1.000000e+00, %166
%420 = fsub float 1.000000e+00, %167
%421 = fsub float 1.000000e+00, %168
%422 = fmul float %377, %377
%423 = fmul float %377, %377
%424 = fmul float %423, %377
%425 = fmul float %422, %424
%426 = fmul float %419, %425
%427 = fadd float %426, %166
%428 = fmul float %420, %425
%429 = fadd float %428, %167
%430 = fmul float %421, %425
%431 = fadd float %430, %168
%432 = fadd float %376, -1.000000e+00
%433 = fmul float %371, %371
%434 = fmul float %371, %371
%435 = fmul float %434, %371
%436 = fmul float %433, %435
%437 = fmul float %432, %436
%438 = fadd float %437, 1.000000e+00
%439 = fadd float %376, -1.000000e+00
%440 = fmul float %372, %372
%441 = fmul float %372, %372
%442 = fmul float %441, %372
%443 = fmul float %440, %442
%444 = fmul float %439, %443
%445 = fadd float %444, 1.000000e+00
%446 = fmul float %438, %445
%447 = fmul float %446, %188
%448 = fmul float %218, %447
%449 = fadd float %448, %221
%450 = fmul float %219, %447
%451 = fadd float %450, %222
%452 = fmul float %220, %447
%453 = fadd float %452, %223
%454 = fmul float %171, %449
%455 = fmul float %172, %451
%456 = fmul float %173, %453
%457 = fmul float %416, %427
%458 = fadd float %457, %454
%459 = fmul float %417, %429
%460 = fadd float %459, %455
%461 = fmul float %418, %431
%462 = fadd float %461, %456
%463 = fmul float %333, %394
%464 = fadd float %463, %458
%465 = fmul float %334, %395
%466 = fadd float %465, %460
%467 = fmul float %335, %396
%468 = fadd float %467, %462
%469 = fmul float %384, %82
%470 = fadd float %469, %464
%471 = fmul float %385, %83
%472 = fadd float %471, %466
%473 = fmul float %386, %84
%474 = fadd float %473, %468
%475 = fmul float %130, %42
%476 = fadd float %475, %43
%477 = call float @llvm.AMDIL.clamp.(float %476, float 0.000000e+00, float 1.000000e+00)
%478 = call float @llvm.AMDGPU.lrp(float %477, float %470, float %39)
%479 = call float @llvm.AMDGPU.lrp(float %477, float %472, float %40)
%480 = call float @llvm.AMDGPU.lrp(float %477, float %474, float %41)
%481 = call i32 @llvm.SI.packf16(float %478, float %479)
%482 = bitcast i32 %481 to float
%483 = call i32 @llvm.SI.packf16(float %480, float 1.000000e+00)
%484 = bitcast i32 %483 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %482, float %484, float %482, float %484)
ret void
IF93: ; preds = %IF90
%485 = fmul float %235, %235
%486 = fmul float %236, %236
%487 = fadd float %486, %485
%488 = fmul float %237, %237
%489 = fadd float %487, %488
%490 = call float @llvm.AMDGPU.rsq.clamped.f32(float %489)
%491 = fmul float %235, %490
%492 = fmul float %236, %490
%493 = fmul float %237, %490
%494 = fsub float %54, %134
%495 = fsub float %55, %135
%496 = fsub float %56, %136
%497 = fdiv float 1.000000e+00, %491
%498 = fdiv float 1.000000e+00, %492
%499 = fdiv float 1.000000e+00, %493
%500 = fmul float %494, %497
%501 = fmul float %495, %498
%502 = fmul float %496, %499
%503 = fsub float %57, %134
%504 = fsub float %58, %135
%505 = fsub float %59, %136
%506 = fdiv float 1.000000e+00, %491
%507 = fdiv float 1.000000e+00, %492
%508 = fdiv float 1.000000e+00, %493
%509 = fmul float %503, %506
%510 = fmul float %504, %507
%511 = fmul float %505, %508
%512 = fcmp ogt float %491, 0.000000e+00
%513 = fcmp ogt float %492, 0.000000e+00
%514 = fcmp ogt float %493, 0.000000e+00
%.105 = select i1 %512, float %500, float %509
%temp68.1 = select i1 %513, float %501, float %510
%.106 = select i1 %514, float %502, float %511
%515 = fadd float %54, %57
%516 = fadd float %55, %58
%517 = fadd float %56, %59
%518 = fmul float %515, 5.000000e-01
%519 = fmul float %516, 5.000000e-01
%520 = fmul float %517, 5.000000e-01
%521 = call float @llvm.minnum.f32(float %.105, float %temp68.1)
%522 = call float @llvm.minnum.f32(float %521, float %.106)
%523 = fsub float %518, %60
%524 = fsub float %519, %61
%525 = fsub float %520, %62
%526 = fadd float %523, %134
%527 = fadd float %524, %135
%528 = fadd float %525, %136
%529 = fmul float %491, %522
%530 = fadd float %529, %526
%531 = fmul float %492, %522
%532 = fadd float %531, %527
%533 = fmul float %493, %522
%534 = fadd float %533, %528
%535 = fsub float %530, %518
%536 = fsub float %532, %519
%537 = fsub float %534, %520
br label %ENDIF92
ENDIF92: ; preds = %IF90, %IF93
%temp48.0 = phi float [ %535, %IF93 ], [ %235, %IF90 ]
%temp49.0 = phi float [ %536, %IF93 ], [ %236, %IF90 ]
%temp50.0 = phi float [ %537, %IF93 ], [ %237, %IF90 ]
%538 = fsub float 1.000000e+00, %80
%539 = call float @llvm.pow.f32(float %538, float 7.500000e-01)
%540 = fmul float %539, 7.000000e+00
%541 = insertelement <4 x float> undef, float %temp48.0, i32 0
%542 = insertelement <4 x float> %541, float %temp49.0, i32 1
%543 = insertelement <4 x float> %542, float %temp50.0, i32 2
%544 = insertelement <4 x float> %543, float %540, i32 3
%545 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %544)
%546 = extractelement <4 x float> %545, i32 0
%547 = extractelement <4 x float> %545, i32 1
%548 = extractelement <4 x float> %545, i32 2
%549 = extractelement <4 x float> %545, i32 3
%550 = call float @llvm.fabs.f32(float %548)
%551 = fdiv float 1.000000e+00, %550
%552 = fmul float %546, %551
%553 = fadd float %552, 1.500000e+00
%554 = fmul float %547, %551
%555 = fadd float %554, 1.500000e+00
%556 = bitcast float %555 to i32
%557 = bitcast float %553 to i32
%558 = bitcast float %549 to i32
%559 = bitcast float %540 to i32
%560 = insertelement <4 x i32> undef, i32 %556, i32 0
%561 = insertelement <4 x i32> %560, i32 %557, i32 1
%562 = insertelement <4 x i32> %561, i32 %558, i32 2
%563 = insertelement <4 x i32> %562, i32 %559, i32 3
%564 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %563, <32 x i8> %91, <16 x i8> %94, i32 4)
%565 = extractelement <4 x float> %564, i32 0
%566 = extractelement <4 x float> %564, i32 1
%567 = extractelement <4 x float> %564, i32 2
%568 = extractelement <4 x float> %564, i32 3
%569 = call float @llvm.pow.f32(float %568, float %65)
%570 = fmul float %64, %569
%571 = fmul float %570, %565
%572 = fmul float %570, %566
%573 = fmul float %570, %567
%574 = call float @llvm.AMDGPU.lrp(float %50, float %328, float %571)
%575 = call float @llvm.AMDGPU.lrp(float %50, float %329, float %572)
%576 = call float @llvm.AMDGPU.lrp(float %50, float %330, float %573)
br label %ENDIF89
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800
v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801
v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900
v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901
v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00
v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01
v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00
v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01
v_interp_p1_f32 v16, v0, 1, 3, [m0] ; C8400D00
v_interp_p2_f32 v16, [v16], v1, 1, 3, [m0] ; C8410D01
v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00
v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01
v_interp_p1_f32 v11, v0, 0, 4, [m0] ; C82C1000
v_interp_p2_f32 v11, [v11], v1, 0, 4, [m0] ; C82D1001
v_interp_p1_f32 v18, v0, 1, 4, [m0] ; C8481100
v_interp_p2_f32 v18, [v18], v1, 1, 4, [m0] ; C8491101
v_interp_p1_f32 v19, v0, 2, 4, [m0] ; C84C1200
v_interp_p2_f32 v19, [v19], v1, 2, 4, [m0] ; C84D1201
v_interp_p1_f32 v20, v0, 3, 4, [m0] ; C8501300
v_interp_p2_f32 v20, [v20], v1, 3, 4, [m0] ; C8511301
v_mul_f32_e32 v6, v4, v4 ; 100C0904
v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05
v_mac_f32_e32 v6, v8, v8 ; 3E0C1108
v_rsq_clamp_f32_e32 v12, v6 ; 7E185906
v_mul_f32_e32 v6, v18, v18 ; 100C2512
v_mac_f32_e32 v6, v19, v19 ; 3E0C2713
v_mac_f32_e32 v6, v20, v20 ; 3E0C2914
v_rsq_clamp_f32_e32 v21, v6 ; 7E2A5906
v_mul_f32_e32 v6, v12, v4 ; 100C090C
v_mul_f32_e32 v5, v12, v5 ; 100A0B0C
v_mul_f32_e32 v4, v12, v8 ; 1008110C
v_mul_f32_e32 v13, v21, v18 ; 101A2515
v_mul_f32_e32 v12, v21, v19 ; 10182715
v_mul_f32_e32 v8, v13, v6 ; 10100D0D
v_mac_f32_e32 v8, v12, v5 ; 3E100B0C
v_mul_f32_e32 v14, v21, v20 ; 101C2915
v_mac_f32_e32 v8, v14, v4 ; 3E10090E
v_mul_f32_e32 v22, v6, v8 ; 102C1106
v_mac_f32_e32 v22, v6, v8 ; 3E2C1106
v_mul_f32_e32 v23, v5, v8 ; 102E1105
v_mac_f32_e32 v23, v5, v8 ; 3E2E1105
v_mad_f32 v27, v18, v21, -v22 ; D282001B 845A2B12
v_mad_f32 v28, v19, v21, -v23 ; D282001C 845E2B13
v_interp_p1_f32 v30, v0, 0, 5, [m0] ; C8781400
v_interp_p2_f32 v30, [v30], v1, 0, 5, [m0] ; C8791401
v_interp_p1_f32 v26, v0, 1, 5, [m0] ; C8681500
v_interp_p2_f32 v26, [v26], v1, 1, 5, [m0] ; C8691501
s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508
s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p1_f32 v31, v0, 2, 5, [m0] ; C87C1600
v_interp_p2_f32 v31, [v31], v1, 2, 5, [m0] ; C87D1601
v_mul_f32_e32 v0, v4, v8 ; 10001104
v_mac_f32_e32 v0, v4, v8 ; 3E001104
s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C
s_load_dwordx4 s[36:39], s[4:5], 0x14 ; C0920514
s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718
s_load_dwordx8 s[48:55], s[6:7], 0x28 ; C0D80728
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[12:15] ; F0800700 00642002
s_buffer_load_dword s0, s[8:11], 0x4c ; C200094C
s_buffer_load_dword s1, s[8:11], 0x4d ; C200894D
s_buffer_load_dword s2, s[8:11], 0x4e ; C201094E
v_mad_f32 v29, v20, v21, -v0 ; D282001D 84022B14
v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000
v_cmp_gt_f32_e64 vcc, |v17|, v0 ; D008016A 00020111
v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000
v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2
v_mul_f32_e32 v1, v0, v17 ; 10022300
v_rcp_f32_e32 v1, v1 ; 7E025501
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v23, s0, v32 ; 102E4000
v_mul_f32_e32 v24, s1, v33 ; 10304201
v_mul_f32_e32 v25, s2, v34 ; 10324402
v_mul_f32_e32 v8, v1, v15 ; 10101F01
v_mul_f32_e32 v1, v1, v16 ; 10022101
s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940
s_buffer_load_dword s31, s[8:11], 0x54 ; C20F8954
s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941
s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942
v_mul_f32_e32 v15, v8, v0 ; 101E0108
v_mul_f32_e32 v16, v1, v0 ; 10200101
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B
s_buffer_load_dword s32, s[8:11], 0x2c ; C210092C
s_buffer_load_dword s33, s[8:11], 0x2d ; C210892D
s_buffer_load_dword s0, s[8:11], 0x58 ; C2000958
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e64 v0, 1.0, s31 ; D2080000 00003EF2
v_mul_f32_e32 v8, s1, v0 ; 10100001
v_mul_f32_e32 v1, s2, v0 ; 10020002
v_mul_f32_e32 v0, s3, v0 ; 10000003
v_mac_f32_e32 v8, s31, v23 ; 3E102E1F
v_mov_b32_e32 v32, v27 ; 7E40031B
v_mac_f32_e32 v1, s31, v24 ; 3E02301F
v_mov_b32_e32 v33, v28 ; 7E42031C
v_mac_f32_e32 v0, s31, v25 ; 3E00321F
v_mov_b32_e32 v34, v29 ; 7E44031D
v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80
image_sample v[19:22], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[40:47], s[24:27] ; F0800F00 00CA1302
image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[48:55], s[36:39] ; F0800F00 012C0F0F
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402
s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925
v_mul_f32_e32 v16, v27, v27 ; 1020371B
v_mac_f32_e32 v16, v28, v28 ; 3E20391C
v_mac_f32_e32 v16, v29, v29 ; 3E203B1D
v_rsq_clamp_f32_e32 v16, v16 ; 7E205910
s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926
s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928
s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929
s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A
v_mul_f32_e32 v17, v16, v27 ; 10223710
v_mul_f32_e32 v18, v16, v28 ; 10243910
v_mul_f32_e32 v16, v16, v29 ; 10203B10
v_rcp_f32_e32 v19, v17 ; 7E265511
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v21, s1, v30 ; 082A3C01
v_sub_f32_e32 v22, s2, v26 ; 082C3402
v_rcp_f32_e32 v32, v18 ; 7E405512
v_mul_f32_e32 v21, v19, v21 ; 102A2B13
v_sub_f32_e32 v33, s13, v30 ; 08423C0D
v_mul_f32_e32 v19, v19, v33 ; 10264313
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v19, v19, v21 ; 00262B13
v_rcp_f32_e32 v21, v16 ; 7E2A5510
v_mul_f32_e32 v22, v32, v22 ; 102C2D20
v_sub_f32_e32 v33, s16, v26 ; 08423410
v_mul_f32_e32 v32, v32, v33 ; 10404320
v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480
v_cndmask_b32_e32 v22, v32, v22 ; 002C2D20
v_sub_f32_e32 v32, s3, v31 ; 08403E03
v_mul_f32_e32 v32, v21, v32 ; 10404115
v_sub_f32_e32 v33, s17, v31 ; 08423E11
v_mul_f32_e32 v21, v21, v33 ; 102A4315
v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080
v_cndmask_b32_e32 v21, v21, v32 ; 002A4115
v_min3_f32 v19, v19, v22, v21 ; D2A20013 04562D13
v_mov_b32_e32 v21, s13 ; 7E2A020D
v_add_f32_e32 v21, s1, v21 ; 062A2A01
v_mov_b32_e32 v22, s16 ; 7E2C0210
v_add_f32_e32 v22, s2, v22 ; 062C2C02
v_mov_b32_e32 v32, s17 ; 7E400211
v_add_f32_e32 v34, s3, v32 ; 06444003
v_mad_f32 v32, 0.5, v21, -s18 ; D2820020 804A2AF0
v_add_f32_e32 v32, v30, v32 ; 0640411E
v_mac_f32_e32 v32, v19, v17 ; 3E402313
v_mad_f32 v17, 0.5, v22, -s19 ; D2820011 804E2CF0
v_add_f32_e32 v17, v26, v17 ; 0622231A
v_mac_f32_e32 v17, v19, v18 ; 3E222513
v_mad_f32 v18, 0.5, v34, -s20 ; D2820012 805244F0
v_add_f32_e32 v18, v31, v18 ; 0624251F
v_mac_f32_e32 v18, v19, v16 ; 3E242113
v_mad_f32 v32, 0.5, -v21, v32 ; D2820020 44822AF0
v_mad_f32 v33, 0.5, -v22, v17 ; D2820021 44462CF0
v_mad_f32 v34, 0.5, -v34, v18 ; D2820022 444A44F0
s_or_b64 exec, exec, s[14:15] ; 88FE0E7E
s_buffer_load_dword s28, s[8:11], 0x17 ; C20E0917
s_buffer_load_dword s29, s[8:11], 0x43 ; C20E8943
s_buffer_load_dword s26, s[8:11], 0x44 ; C20D0944
s_buffer_load_dword s23, s[8:11], 0x45 ; C20B8945
s_buffer_load_dword s22, s[8:11], 0x46 ; C20B0946
s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900
s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901
s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902
s_buffer_load_dword s13, s[8:11], 0x4 ; C2068904
s_buffer_load_dword s14, s[8:11], 0x5 ; C2070905
s_buffer_load_dword s15, s[8:11], 0x6 ; C2078906
s_buffer_load_dword s16, s[8:11], 0x7 ; C2080907
s_buffer_load_dword s17, s[8:11], 0x8 ; C2088908
s_buffer_load_dword s18, s[8:11], 0x9 ; C2090909
s_buffer_load_dword s19, s[8:11], 0xa ; C209890A
s_buffer_load_dword s20, s[8:11], 0xb ; C20A090B
s_buffer_load_dword s21, s[8:11], 0xc ; C20A890C
s_buffer_load_dword s24, s[8:11], 0xd ; C20C090D
s_buffer_load_dword s25, s[8:11], 0xe ; C20C890E
v_sub_f32_e64 v16, 1.0, s0 ; D2080010 000000F2
v_log_f32_e32 v16, v16 ; 7E204F10
v_mul_legacy_f32_e32 v16, 0x3f400000, v16 ; 0E2020FF 3F400000
v_exp_f32_e32 v16, v16 ; 7E204B10
v_mul_f32_e32 v35, 0x40e00000, v16 ; 104620FF 40E00000
v_cubeid_f32 v19, v32, v33, v34 ; D2880013 048A4320
v_cubema_f32 v18, v32, v33, v34 ; D28E0012 048A4320
s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500
s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700
v_cubesc_f32 v17, v32, v33, v34 ; D28A0011 048A4320
v_cubetc_f32 v16, v32, v33, v34 ; D28C0010 048A4320
v_rcp_f32_e64 v18, |v18| ; D3540112 00000112
v_mov_b32_e32 v32, 0x3fc00000 ; 7E4002FF 3FC00000
v_mad_f32 v33, v18, v16, v32 ; D2820021 04822112
v_mac_f32_e32 v32, v18, v17 ; 3E402312
v_mov_b32_e32 v34, v19 ; 7E440313
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[40:47], s[36:39] ; F0900F00 012A2020
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v16, v35 ; 7E204F23
s_buffer_load_dword s27, s[8:11], 0xf ; C20D890F
s_buffer_load_dword s30, s[8:11], 0x60 ; C20F0960
v_mul_legacy_f32_e32 v16, s33, v16 ; 0E202021
v_exp_f32_e32 v16, v16 ; 7E204B10
v_mul_f32_e32 v16, s32, v16 ; 10202020
v_mul_f32_e32 v19, v32, v16 ; 10262120
v_mul_f32_e32 v18, v33, v16 ; 10242121
v_mul_f32_e32 v17, v34, v16 ; 10222122
v_mov_b32_e32 v16, s31 ; 7E20021F
v_mov_b32_e32 v21, 0x3f7fff58 ; 7E2A02FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v21 ; 7C022A0C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[32:33], vcc ; BEA0246A
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s35, s[8:11], 0x3b ; C211893B
s_buffer_load_dword s31, s[8:11], 0x3c ; C20F893C
s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680
s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424
s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s35, s[8:11], 0x36 ; C2118936
s_buffer_load_dword s38, s[8:11], 0x38 ; C2130938
s_buffer_load_dword s39, s[8:11], 0x39 ; C2138939
s_buffer_load_dword s40, s[8:11], 0x3a ; C214093A
s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930
s_buffer_load_dword s42, s[8:11], 0x31 ; C2150931
s_buffer_load_dword s43, s[8:11], 0x32 ; C2158932
s_buffer_load_dword s44, s[8:11], 0x34 ; C2160934
s_buffer_load_dword s45, s[8:11], 0x35 ; C2168935
v_mul_f32_e32 v21, v27, v27 ; 102A371B
v_mac_f32_e32 v21, v28, v28 ; 3E2A391C
v_mac_f32_e32 v21, v29, v29 ; 3E2A3B1D
v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v22, s35, v31 ; 082C3E23
v_mov_b32_e32 v32, s35 ; 7E400223
v_sub_f32_e32 v33, s41, v30 ; 08423C29
v_sub_f32_e32 v34, s42, v26 ; 0844342A
v_add_f32_e32 v32, s43, v32 ; 0640402B
v_sub_f32_e32 v35, s43, v31 ; 08463E2B
v_mad_f32 v36, 0.5, v32, -s40 ; D2820024 80A240F0
v_add_f32_e32 v31, v31, v36 ; 063E491F
v_mul_f32_e32 v27, v21, v27 ; 10363715
v_mul_f32_e32 v28, v21, v28 ; 10383915
v_mul_f32_e32 v21, v21, v29 ; 102A3B15
v_rcp_f32_e32 v29, v27 ; 7E3A551B
v_rcp_f32_e32 v36, v28 ; 7E48551C
v_rcp_f32_e32 v37, v21 ; 7E4A5515
v_sub_f32_e32 v38, s44, v30 ; 084C3C2C
v_mov_b32_e32 v39, s44 ; 7E4E022C
v_add_f32_e32 v39, s41, v39 ; 064E4E29
v_mul_f32_e32 v33, v29, v33 ; 1042431D
v_mul_f32_e32 v29, v29, v38 ; 103A4D1D
v_mul_f32_e32 v34, v36, v34 ; 10444524
v_mul_f32_e32 v35, v37, v35 ; 10464725
v_mul_f32_e32 v22, v37, v22 ; 102C2D25
v_mad_f32 v37, 0.5, v39, -s38 ; D2820025 809A4EF0
v_add_f32_e32 v30, v30, v37 ; 063C4B1E
v_sub_f32_e32 v37, s45, v26 ; 084A342D
v_mov_b32_e32 v38, s45 ; 7E4C022D
v_mul_f32_e32 v36, v36, v37 ; 10484B24
v_add_f32_e32 v37, s42, v38 ; 064A4C2A
v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680
v_cndmask_b32_e32 v29, v29, v33 ; 003A431D
v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880
v_cndmask_b32_e32 v33, v36, v34 ; 00424524
v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80
v_cndmask_b32_e32 v22, v22, v35 ; 002C4716
v_min3_f32 v22, v29, v33, v22 ; D2A20016 045A431D
v_mad_f32 v29, 0.5, v37, -s39 ; D282001D 809E4AF0
v_add_f32_e32 v26, v26, v29 ; 06343B1A
v_mac_f32_e32 v30, v22, v27 ; 3E3C3716
v_mac_f32_e32 v26, v22, v28 ; 3E343916
v_mac_f32_e32 v31, v22, v21 ; 3E3E2B16
v_mad_f32 v27, 0.5, -v39, v30 ; D282001B 447A4EF0
v_mad_f32 v28, 0.5, -v37, v26 ; D282001C 446A4AF0
v_mad_f32 v29, 0.5, -v32, v31 ; D282001D 447E40F0
s_or_b64 exec, exec, s[36:37] ; 88FE247E
v_sub_f32_e64 v21, 1.0, s0 ; D2080015 000000F2
v_log_f32_e32 v21, v21 ; 7E2A4F15
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
v_mul_legacy_f32_e32 v21, 0x3f400000, v21 ; 0E2A2AFF 3F400000
v_exp_f32_e32 v21, v21 ; 7E2A4B15
v_mul_f32_e32 v30, 0x40e00000, v21 ; 103C2AFF 40E00000
v_cubeid_f32 v34, v27, v28, v29 ; D2880022 0476391B
v_cubema_f32 v33, v27, v28, v29 ; D28E0021 0476391B
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
v_cubesc_f32 v32, v27, v28, v29 ; D28A0020 0476391B
v_cubetc_f32 v31, v27, v28, v29 ; D28C001F 0476391B
v_rcp_f32_e64 v21, |v33| ; D3540115 00000121
v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000
v_mad_f32 v28, v21, v31, v27 ; D282001C 046E3F15
v_mac_f32_e32 v27, v21, v32 ; 3E364115
v_mov_b32_e32 v29, v34 ; 7E3A0322
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A1A1B
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v21, v29 ; 7E2A4F1D
v_sub_f32_e64 v22, 1.0, s12 ; D2080016 000018F2
v_mul_legacy_f32_e32 v21, s34, v21 ; 0E2A2A22
v_exp_f32_e32 v21, v21 ; 7E2A4B15
v_mul_f32_e32 v21, s31, v21 ; 102A2A1F
v_mul_f32_e32 v26, v26, v21 ; 10342B1A
v_mul_f32_e32 v27, v27, v21 ; 10362B1B
v_mul_f32_e32 v21, v28, v21 ; 102A2B1C
v_mul_f32_e32 v26, v26, v22 ; 10342D1A
v_mul_f32_e32 v27, v27, v22 ; 10362D1B
v_mul_f32_e32 v21, v21, v22 ; 102A2D15
v_mac_f32_e32 v26, s12, v19 ; 3E34260C
v_mac_f32_e32 v27, s12, v18 ; 3E36240C
v_mac_f32_e32 v21, s12, v17 ; 3E2A220C
v_mov_b32_e32 v17, v21 ; 7E220315
v_mov_b32_e32 v18, v27 ; 7E24031B
v_mov_b32_e32 v19, v26 ; 7E26031A
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_mad_f32 v27, -v16, s29, s29 ; D282001B 20743B10
v_mov_b32_e32 v16, s28 ; 7E20021C
v_mul_f32_e32 v26, v27, v23 ; 10342F1B
v_mul_f32_e32 v22, v27, v24 ; 102C311B
v_mul_f32_e32 v21, v27, v25 ; 102A331B
v_mul_f32_e32 v23, s26, v15 ; 102E1E1A
v_sub_f32_e64 v25, 1.0, s30 ; D2080019 00003CF2
v_mac_f32_e32 v25, s30, v20 ; 3E32281E
v_mul_f32_e32 v20, s23, v15 ; 10281E17
v_mul_f32_e32 v15, s22, v15 ; 101E1E16
s_buffer_load_dword s23, s[8:11], 0x10 ; C20B8910
s_buffer_load_dword s22, s[8:11], 0x11 ; C20B0911
s_buffer_load_dword s12, s[8:11], 0x12 ; C2060912
s_buffer_load_dword s40, s[8:11], 0x16 ; C2140916
s_buffer_load_dword s29, s[8:11], 0x48 ; C20E8948
s_buffer_load_dword s30, s[8:11], 0x49 ; C20F0949
s_buffer_load_dword s31, s[8:11], 0x4b ; C20F894B
s_buffer_load_dword s28, s[8:11], 0x64 ; C20E0964
s_buffer_load_dword s26, s[8:11], 0x65 ; C20D0965
s_buffer_load_dword s8, s[8:11], 0x66 ; C2040966
v_mul_f32_e32 v24, s14, v5 ; 10300A0E
v_mac_f32_e32 v24, s13, v6 ; 3E300C0D
v_mac_f32_e32 v24, s15, v4 ; 3E30080F
v_add_f32_e32 v24, s16, v24 ; 06303010
v_mul_f32_e32 v28, s18, v5 ; 10380A12
v_mac_f32_e32 v28, s17, v6 ; 3E380C11
v_mac_f32_e32 v28, s19, v4 ; 3E380813
v_add_f32_e32 v28, s20, v28 ; 06383814
v_mul_f32_e32 v29, s24, v5 ; 103A0A18
v_mac_f32_e32 v29, s21, v6 ; 3E3A0C15
v_mac_f32_e32 v29, s25, v4 ; 3E3A0819
v_add_f32_e32 v29, s27, v29 ; 063A3A1B
v_add_f32_e32 v7, v24, v7 ; 060E0F18
s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510
s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720
v_add_f32_e32 v9, v28, v9 ; 0612131C
v_add_f32_e32 v28, v29, v10 ; 0638151D
v_mul_f32_e32 v10, s2, v6 ; 10140C02
v_mac_f32_e32 v10, s3, v5 ; 3E140A03
v_mac_f32_e32 v10, s1, v4 ; 3E140801
v_max_f32_e32 v24, 0, v10 ; 20301480
v_mul_f32_e32 v10, v25, v7 ; 10140F19
v_mul_f32_e32 v9, v25, v9 ; 10121319
v_mul_f32_e32 v7, v25, v28 ; 100E3919
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v16, s40, v11 ; 3E201628
v_mul_f32_e32 v11, v25, v19 ; 10162719
v_mul_f32_e32 v18, v25, v18 ; 10242519
v_mul_f32_e32 v17, v25, v17 ; 10222319
v_sub_f32_e32 v19, 1.0, v27 ; 082636F2
v_add_f32_e32 v19, s0, v19 ; 06262600
v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680
v_sub_f32_e32 v25, s2, v13 ; 08321A02
v_sub_f32_e32 v27, s3, v12 ; 08361803
v_mul_f32_e32 v28, v25, v25 ; 10383319
v_mac_f32_e32 v28, v27, v27 ; 3E38371B
v_sub_f32_e32 v29, s1, v14 ; 083A1C01
v_mac_f32_e32 v28, v29, v29 ; 3E383B1D
v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C
v_mul_f32_e32 v25, v28, v25 ; 1032331C
v_mul_f32_e32 v27, v28, v27 ; 1036371C
v_mul_f32_e32 v28, v28, v29 ; 10383B1C
v_mul_f32_e32 v13, v13, v6 ; 101A0D0D
v_mad_f32 v12, -v12, v5, -v13 ; D282000C A4360B0C
v_mad_f32 v12, -v14, v4, v12 ; D282000C 2432090E
v_mul_f32_e32 v6, v25, v6 ; 100C0D19
v_mac_f32_e32 v6, v27, v5 ; 3E0C0B1B
v_mul_f32_e32 v5, s2, v25 ; 100A3202
v_mac_f32_e32 v5, s3, v27 ; 3E0A3603
v_mac_f32_e32 v6, v28, v4 ; 3E0C091C
v_mac_f32_e32 v5, s1, v28 ; 3E0A3801
v_max_f32_e32 v4, 0, v5 ; 20080A80
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
v_mul_f32_e32 v13, v5, v5 ; 101A0B05
v_mul_f32_e32 v5, v5, v13 ; 100A1B05
v_mul_f32_e32 v5, v5, v13 ; 100A1B05
v_max_f32_e32 v12, 0, v12 ; 20181880
v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2
v_mul_f32_e32 v14, v13, v13 ; 101C1B0D
v_mul_f32_e32 v25, v13, v14 ; 10321D0D
v_mad_f32 v27, -v14, v25, 1.0 ; D282001B 23CA330E
v_mul_f32_e32 v28, v8, v27 ; 10383708
v_sub_f32_e32 v29, 1.0, v8 ; 083A10F2
v_mac_f32_e32 v8, v5, v29 ; 3E103B05
v_mul_f32_e32 v29, v1, v27 ; 103A3701
v_sub_f32_e32 v30, 1.0, v1 ; 083C02F2
v_mac_f32_e32 v1, v5, v30 ; 3E023D05
v_mul_f32_e32 v27, v0, v27 ; 10363700
v_sub_f32_e32 v30, 1.0, v0 ; 083C00F2
v_mac_f32_e32 v0, v5, v30 ; 3E003D05
v_sub_f32_e64 v5, 1.0, s0 ; D2080005 000000F2
v_sub_f32_e32 v30, 1.0, v5 ; 083C0AF2
v_mov_b32_e32 v31, 0x3cf5c28f ; 7E3E02FF 3CF5C28F
v_madmk_f32_e32 v30, v30, v31, 0x3f77ced9 ; 403C3F1E 3F77CED9
v_add_f32_e32 v31, v4, v4 ; 063E0904
v_mul_f32_e32 v4, v5, v4 ; 10080905
v_mad_f32 v4, v31, v4, 0.5 ; D2820004 03C2091F
v_mul_f32_e32 v14, v25, v14 ; 101C1D19
v_mac_f32_e32 v28, v19, v14 ; 3E381D13
v_mac_f32_e32 v29, v19, v14 ; 3E3A1D13
v_mac_f32_e32 v27, v19, v14 ; 3E361D13
v_mul_f32_e32 v5, v5, v5 ; 100A0B05
v_log_f32_e32 v19, v30 ; 7E264F1E
v_mul_f32_e32 v5, s31, v5 ; 100A0A1F
v_mul_f32_e32 v13, v5, v13 ; 101A1B05
v_mac_f32_e32 v13, 1.0, v12 ; 3E1A18F2
v_rcp_f32_e32 v12, v19 ; 7E185513
v_sub_f32_e32 v19, 1.0, v24 ; 082630F2
v_mul_f32_e32 v5, v5, v19 ; 100A2705
v_mac_f32_e32 v5, 1.0, v24 ; 3E0A30F2
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_log_f32_e32 v6, v6 ; 7E0C4F06
v_madak_f32_e32 v5, v5, v13, 0x38d1b717 ; 420A1B05 38D1B717
v_mul_f32_e32 v12, 0x41200000, v12 ; 101818FF 41200000
v_mul_f32_e32 v13, v12, v12 ; 101A190C
v_mul_legacy_f32_e32 v6, v13, v6 ; 0E0C0D0D
v_rcp_f32_e32 v5, v5 ; 7E0A5505
v_mad_f32 v12, v12, v12, 1.0 ; D282000C 03CA190C
v_mul_f32_e32 v12, s30, v12 ; 1018181E
v_exp_f32_e32 v6, v6 ; 7E0C4B06
v_mul_f32_e32 v6, v12, v6 ; 100C0D0C
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_mul_f32_e32 v6, v19, v19 ; 100C2713
v_mul_f32_e32 v12, v19, v6 ; 10180D13
v_mul_f32_e32 v6, v12, v6 ; 100C0D0C
v_add_f32_e32 v4, -1.0, v4 ; 060808F3
v_mad_f32 v6, v4, v6, 1.0 ; D2820006 03CA0D04
v_mad_f32 v4, v4, v14, 1.0 ; D2820004 03CA1D04
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v5, v24, v5 ; 100A0B18
v_mul_f32_e32 v4, v24, v4 ; 10080918
v_mac_f32_e32 v10, v4, v23 ; 3E142F04
v_mul_f32_e32 v6, v10, v26 ; 100C350A
v_mul_f32_e32 v5, s29, v5 ; 100A0A1D
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_mul_f32_e32 v10, v23, v5 ; 10140B17
v_mac_f32_e32 v6, v8, v10 ; 3E0C1508
image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[32:39], s[16:19] ; F0800700 00880C02
v_mac_f32_e32 v9, v4, v20 ; 3E122904
v_mul_f32_e32 v2, v9, v22 ; 10042D09
v_mul_f32_e32 v3, v20, v5 ; 10060B14
v_mac_f32_e32 v2, v1, v3 ; 3E040701
v_mac_f32_e32 v7, v4, v15 ; 3E0E1F04
v_mul_f32_e32 v1, v15, v5 ; 10020B0F
v_mul_f32_e32 v3, v7, v21 ; 10062B07
v_mac_f32_e32 v3, v0, v1 ; 3E060300
v_mac_f32_e32 v6, v28, v11 ; 3E0C171C
v_mac_f32_e32 v2, v29, v18 ; 3E04251D
v_mac_f32_e32 v3, v27, v17 ; 3E06231B
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v6, s28, v12 ; 3E0C181C
v_mac_f32_e32 v2, s26, v13 ; 3E041A1A
v_mac_f32_e32 v3, s8, v14 ; 3E061C08
v_add_f32_e64 v0, 0, v16 clamp ; D2060800 00022080
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v4, s23, v1 ; 10080217
v_mac_f32_e32 v4, v6, v0 ; 3E080106
v_mul_f32_e32 v5, s22, v1 ; 100A0216
v_mac_f32_e32 v5, v2, v0 ; 3E0A0102
v_mul_f32_e32 v1, s12, v1 ; 1002020C
v_mac_f32_e32 v1, v3, v0 ; 3E020103
v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 40
Code Size: 2264 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[6], IN[0].xxxx
1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[10].xxxx
18: MOV TEMP[3].y, CONST[11].xxxx
19: MOV TEMP[3].z, CONST[12].xxxx
20: MOV TEMP[4].x, CONST[10].yyyy
21: MOV TEMP[4].y, CONST[11].yyyy
22: MOV TEMP[4].z, CONST[12].yyyy
23: MOV TEMP[5].x, CONST[10].zzzz
24: MOV TEMP[5].y, CONST[11].zzzz
25: MOV TEMP[5].z, CONST[12].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MOV TEMP[4].xyz, TEMP[3].xyzx
33: MUL TEMP[5].xyw, TEMP[1], IMM[0].yyyy
34: MOV TEMP[6].x, TEMP[5].xxxx
35: MUL TEMP[7].x, TEMP[5].yyyy, CONST[1].xxxx
36: MOV TEMP[6].y, TEMP[7].xxxx
37: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[5].wwww
38: MOV TEMP[5].zw, TEMP[1].wwzw
39: MUL TEMP[6], TEMP[3].xyzz, TEMP[3].yzzx
40: DP4 TEMP[7].x, CONST[2], TEMP[6]
41: DP4 TEMP[8].x, CONST[3], TEMP[6]
42: MOV TEMP[7].y, TEMP[8].xxxx
43: DP4 TEMP[6].x, CONST[4], TEMP[6]
44: MOV TEMP[7].z, TEMP[6].xxxx
45: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[3].yyyy
46: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[6].xxxx
47: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[7].xyzz
48: ADD TEMP[6].xyz, TEMP[0].xyzz, -CONST[0].xyzz
49: MOV TEMP[6].yzw, TEMP[6].yxyz
50: MOV TEMP[6].x, TEMP[1].zzzz
51: MOV TEMP[0].xyz, TEMP[0].xyzx
52: MOV OUT[6], TEMP[0]
53: MOV OUT[1], TEMP[2]
54: MOV OUT[2], TEMP[4]
55: MOV OUT[3], TEMP[3]
56: MOV OUT[4], TEMP[5]
57: MOV OUT[0], TEMP[1]
58: MOV OUT[5], TEMP[6]
59: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = add i32 %5, %7
%105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = fmul float %32, %85
%109 = fmul float %33, %85
%110 = fmul float %34, %85
%111 = fmul float %35, %85
%112 = fmul float %36, %86
%113 = fadd float %112, %108
%114 = fmul float %37, %86
%115 = fadd float %114, %109
%116 = fmul float %38, %86
%117 = fadd float %116, %110
%118 = fmul float %39, %86
%119 = fadd float %118, %111
%120 = fmul float %40, %87
%121 = fadd float %120, %113
%122 = fmul float %41, %87
%123 = fadd float %122, %115
%124 = fmul float %42, %87
%125 = fadd float %124, %117
%126 = fmul float %43, %87
%127 = fadd float %126, %119
%128 = fmul float %44, %88
%129 = fadd float %128, %121
%130 = fmul float %45, %88
%131 = fadd float %130, %123
%132 = fmul float %46, %88
%133 = fadd float %132, %125
%134 = fmul float %65, %85
%135 = fmul float %66, %85
%136 = fmul float %67, %85
%137 = fmul float %68, %85
%138 = fmul float %69, %86
%139 = fadd float %138, %134
%140 = fmul float %70, %86
%141 = fadd float %140, %135
%142 = fmul float %71, %86
%143 = fadd float %142, %136
%144 = fmul float %72, %86
%145 = fadd float %144, %137
%146 = fmul float %73, %87
%147 = fadd float %146, %139
%148 = fmul float %74, %87
%149 = fadd float %148, %141
%150 = fmul float %75, %87
%151 = fadd float %150, %143
%152 = fmul float %76, %87
%153 = fadd float %152, %145
%154 = fmul float %77, %88
%155 = fadd float %154, %147
%156 = fmul float %78, %88
%157 = fadd float %156, %149
%158 = fmul float %79, %88
%159 = fadd float %158, %151
%160 = fmul float %80, %88
%161 = fadd float %160, %153
%162 = fmul float %100, %56
%163 = fadd float %162, %58
%164 = fmul float %101, %57
%165 = fadd float %164, %59
%166 = fcmp oeq float %64, 0.000000e+00
%. = select i1 %166, float %100, float %106
%.36 = select i1 %166, float %101, float %107
%167 = fmul float %., %60
%168 = fadd float %167, %62
%169 = fmul float %.36, %61
%170 = fadd float %169, %63
%171 = fmul float %47, %93
%172 = fmul float %50, %93
%173 = fmul float %53, %93
%174 = fmul float %48, %94
%175 = fadd float %174, %171
%176 = fmul float %51, %94
%177 = fadd float %176, %172
%178 = fmul float %54, %94
%179 = fadd float %178, %173
%180 = fmul float %49, %95
%181 = fadd float %180, %175
%182 = fmul float %52, %95
%183 = fadd float %182, %177
%184 = fmul float %55, %95
%185 = fadd float %184, %179
%186 = fmul float %181, %181
%187 = fmul float %183, %183
%188 = fadd float %187, %186
%189 = fmul float %185, %185
%190 = fadd float %188, %189
%191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190)
%192 = fmul float %181, %191
%193 = fmul float %183, %191
%194 = fmul float %185, %191
%195 = fmul float %155, 5.000000e-01
%196 = fmul float %157, 5.000000e-01
%197 = fmul float %161, 5.000000e-01
%198 = fmul float %196, %16
%199 = fadd float %195, %197
%200 = fadd float %198, %197
%201 = fmul float %192, %193
%202 = fmul float %193, %194
%203 = fmul float %194, %194
%204 = fmul float %194, %192
%205 = fmul float %17, %201
%206 = fmul float %18, %202
%207 = fadd float %205, %206
%208 = fmul float %19, %203
%209 = fadd float %207, %208
%210 = fmul float %20, %204
%211 = fadd float %209, %210
%212 = fmul float %21, %201
%213 = fmul float %22, %202
%214 = fadd float %212, %213
%215 = fmul float %23, %203
%216 = fadd float %214, %215
%217 = fmul float %24, %204
%218 = fadd float %216, %217
%219 = fmul float %25, %201
%220 = fmul float %26, %202
%221 = fadd float %219, %220
%222 = fmul float %27, %203
%223 = fadd float %221, %222
%224 = fmul float %28, %204
%225 = fadd float %223, %224
%226 = fmul float %193, %193
%227 = fmul float %192, %192
%228 = fsub float %227, %226
%229 = fmul float %29, %228
%230 = fadd float %229, %211
%231 = fmul float %30, %228
%232 = fadd float %231, %218
%233 = fmul float %31, %228
%234 = fadd float %233, %225
%235 = fsub float %129, %13
%236 = fsub float %131, %14
%237 = fsub float %133, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %163, float %165, float %168, float %170)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %192, float %193, float %194, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %230, float %232, float %234, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %159, float %161)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %159, float %235, float %236, float %237)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %129, float %131, float %133, float %127)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %155, float %157, float %159, float %161)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s19, s[20:23], 0x23 ; C2099523
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s24, s[20:23], 0x24 ; C20C1524
s_buffer_load_dword s25, s[20:23], 0x25 ; C20C9525
s_buffer_load_dword s26, s[20:23], 0x26 ; C20D1526
s_buffer_load_dword s27, s[20:23], 0x28 ; C20D9528
s_buffer_load_dword s28, s[20:23], 0x29 ; C20E1529
s_buffer_load_dword s29, s[20:23], 0x2a ; C20E952A
s_buffer_load_dword s30, s[20:23], 0x2c ; C20F152C
s_buffer_load_dword s31, s[20:23], 0x2d ; C20F952D
s_buffer_load_dword s32, s[20:23], 0x2e ; C210152E
s_buffer_load_dword s33, s[20:23], 0x30 ; C2109530
s_buffer_load_dword s34, s[20:23], 0x31 ; C2111531
s_buffer_load_dword s35, s[20:23], 0x32 ; C2119532
s_buffer_load_dword s36, s[20:23], 0x38 ; C2121538
s_buffer_load_dword s37, s[20:23], 0x39 ; C2129539
s_buffer_load_dword s11, s[20:23], 0x9 ; C2059509
s_buffer_load_dword s5, s[20:23], 0xa ; C202950A
s_buffer_load_dword s3, s[20:23], 0xb ; C201950B
s_buffer_load_dword s9, s[20:23], 0xc ; C204950C
s_buffer_load_dword s12, s[20:23], 0xd ; C206150D
s_buffer_load_dword s7, s[20:23], 0xe ; C203950E
s_buffer_load_dword s4, s[20:23], 0xf ; C202150F
s_buffer_load_dword s10, s[20:23], 0x10 ; C2051510
s_buffer_load_dword s13, s[20:23], 0x11 ; C2069511
s_buffer_load_dword s8, s[20:23], 0x12 ; C2041512
s_buffer_load_dword s0, s[20:23], 0x3f ; C200153F
s_buffer_load_dword s1, s[20:23], 0x40 ; C2009540
s_buffer_load_dword s38, s[20:23], 0x44 ; C2131544
s_buffer_load_dword s39, s[20:23], 0x45 ; C2139545
s_buffer_load_dword s40, s[20:23], 0x46 ; C2141546
s_buffer_load_dword s41, s[20:23], 0x47 ; C2149547
s_buffer_load_dword s42, s[20:23], 0x48 ; C2151548
s_buffer_load_dword s43, s[20:23], 0x49 ; C2159549
s_buffer_load_dword s44, s[20:23], 0x4a ; C216154A
s_buffer_load_dword s45, s[20:23], 0x4b ; C216954B
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s0 ; 7E000200
s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500
v_cmp_eq_f32_e64 vcc, 0, s1 ; D004006A 00000280
s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501
s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502
s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504
s_buffer_load_dword s14, s[20:23], 0x8 ; C2071508
s_buffer_load_dword s46, s[20:23], 0x3a ; C217153A
s_buffer_load_dword s47, s[20:23], 0x3b ; C217953B
s_buffer_load_dword s48, s[20:23], 0x3c ; C218153C
s_buffer_load_dword s49, s[20:23], 0x3d ; C218953D
s_buffer_load_dword s50, s[20:23], 0x3e ; C219153E
s_buffer_load_dword s18, s[20:23], 0x13 ; C2091513
s_buffer_load_dword s15, s[20:23], 0x14 ; C2079514
s_buffer_load_dword s16, s[20:23], 0x15 ; C2081515
s_buffer_load_dword s17, s[20:23], 0x16 ; C2089516
s_buffer_load_dword s51, s[20:23], 0x18 ; C2199518
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s46 ; 7E1A022E
s_buffer_load_dword s46, s[20:23], 0x19 ; C2171519
s_buffer_load_dword s52, s[20:23], 0x1a ; C21A151A
s_buffer_load_dword s53, s[20:23], 0x1b ; C21A951B
s_buffer_load_dword s54, s[20:23], 0x1c ; C21B151C
s_buffer_load_dword s55, s[20:23], 0x1d ; C21B951D
s_buffer_load_dword s56, s[20:23], 0x1e ; C21C151E
s_buffer_load_dword s57, s[20:23], 0x1f ; C21C951F
s_buffer_load_dword s58, s[20:23], 0x20 ; C21D1520
s_buffer_load_dword s59, s[20:23], 0x21 ; C21D9521
s_buffer_load_dword s60, s[20:23], 0x22 ; C21E1522
s_buffer_load_dword s61, s[20:23], 0x4c ; C21E954C
s_buffer_load_dword s62, s[20:23], 0x4d ; C21F154D
s_buffer_load_dword s63, s[20:23], 0x4e ; C21F954E
s_buffer_load_dword s64, s[20:23], 0x4f ; C220154F
s_buffer_load_dword s65, s[20:23], 0x50 ; C2209550
s_buffer_load_dword s66, s[20:23], 0x51 ; C2211551
s_buffer_load_dword s67, s[20:23], 0x52 ; C2219552
s_buffer_load_dword s20, s[20:23], 0x53 ; C20A1553
v_mac_f32_e32 v13, s36, v9 ; 3E1A1224
v_mov_b32_e32 v14, s47 ; 7E1C022F
v_mul_f32_e32 v15, s51, v2 ; 101E0433
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v16, s46, v2 ; 1020042E
v_mul_f32_e32 v17, s27, v6 ; 10220C1B
v_mul_f32_e32 v18, s30, v6 ; 10240C1E
v_mul_f32_e32 v6, s33, v6 ; 100C0C21
v_mac_f32_e32 v17, s28, v7 ; 3E220E1C
v_mac_f32_e32 v18, s31, v7 ; 3E240E1F
v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22
v_mac_f32_e32 v17, s29, v8 ; 3E22101D
v_mac_f32_e32 v18, s32, v8 ; 3E241020
v_mac_f32_e32 v6, s35, v8 ; 3E0C1023
v_mul_f32_e32 v7, s52, v2 ; 100E0434
v_mul_f32_e32 v8, s53, v2 ; 10100435
v_mul_f32_e32 v19, s38, v2 ; 10260426
v_mac_f32_e32 v14, s37, v10 ; 3E1C1425
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mac_f32_e32 v15, s54, v3 ; 3E1E0636
v_mac_f32_e32 v16, s55, v3 ; 3E200637
v_mac_f32_e32 v7, s56, v3 ; 3E0E0638
v_mac_f32_e32 v8, s57, v3 ; 3E100639
v_mac_f32_e32 v19, s42, v3 ; 3E26062A
v_mul_f32_e32 v11, s39, v2 ; 10160427
v_mac_f32_e32 v11, s43, v3 ; 3E16062B
v_mul_f32_e32 v12, s40, v2 ; 10180428
v_mac_f32_e32 v12, s44, v3 ; 3E18062C
v_mul_f32_e32 v2, s41, v2 ; 10040429
v_mac_f32_e32 v2, s45, v3 ; 3E04062D
v_mac_f32_e32 v15, s58, v4 ; 3E1E083A
v_mac_f32_e32 v16, s59, v4 ; 3E20083B
v_mac_f32_e32 v7, s60, v4 ; 3E0E083C
v_mac_f32_e32 v8, s19, v4 ; 3E100813
v_mac_f32_e32 v19, s61, v4 ; 3E26083D
v_mac_f32_e32 v11, s62, v4 ; 3E16083E
v_mac_f32_e32 v12, s63, v4 ; 3E18083F
v_mac_f32_e32 v2, s64, v4 ; 3E040840
v_mac_f32_e32 v15, s24, v5 ; 3E1E0A18
v_mac_f32_e32 v16, s25, v5 ; 3E200A19
v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A
v_mac_f32_e32 v19, s65, v5 ; 3E260A41
v_mac_f32_e32 v11, s66, v5 ; 3E160A42
v_mac_f32_e32 v12, s67, v5 ; 3E180A43
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mov_b32_e32 v3, s50 ; 7E060232
v_mul_f32_e32 v4, v17, v17 ; 10082311
v_mac_f32_e32 v4, v18, v18 ; 3E082512
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
v_mac_f32_e32 v3, s48, v9 ; 3E061230
v_mac_f32_e32 v0, s49, v10 ; 3E001431
exp 15, 32, 0, 0, 0, v13, v14, v3, v0 ; F800020F 00030E0D
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v4, v17 ; 10002304
v_mul_f32_e32 v3, v4, v18 ; 10062504
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v5, v4, v3 ; 100A0704
v_mul_f32_e32 v6, s11, v5 ; 100C0A0B
v_mul_f32_e32 v9, s12, v5 ; 10120A0C
v_mul_f32_e32 v5, s13, v5 ; 100A0A0D
v_mul_f32_e32 v10, v3, v0 ; 10140103
v_mac_f32_e32 v6, s14, v10 ; 3E0C140E
v_mac_f32_e32 v9, s9, v10 ; 3E121409
v_mac_f32_e32 v5, s10, v10 ; 3E0A140A
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mac_f32_e32 v6, s5, v10 ; 3E0C1405
v_mac_f32_e32 v9, s7, v10 ; 3E121407
v_mac_f32_e32 v5, s8, v10 ; 3E0A1408
v_mul_f32_e32 v10, v0, v4 ; 10140900
v_mac_f32_e32 v6, s3, v10 ; 3E0C1403
v_mac_f32_e32 v9, s4, v10 ; 3E121404
v_mac_f32_e32 v5, s18, v10 ; 3E0A1412
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mad_f32 v0, v0, v0, -v3 ; D2820000 840E0100
v_mac_f32_e32 v6, s15, v0 ; 3E0C000F
v_mac_f32_e32 v9, s16, v0 ; 3E120010
v_mac_f32_e32 v5, s17, v0 ; 3E0A0011
v_mul_f32_e32 v0, 0.5, v11 ; 100016F0
v_mul_f32_e32 v3, 0.5, v2 ; 100604F0
exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v1, 0.5, v19, v3 ; D2820001 040E26F0
v_mac_f32_e32 v3, s6, v0 ; 3E060006
exp 15, 35, 0, 0, 0, v1, v3, v12, v2 ; F800023F 020C0301
v_subrev_f32_e32 v0, s0, v15 ; 0A001E00
s_waitcnt expcnt(0) ; BF8C070F
v_subrev_f32_e32 v1, s1, v16 ; 0A022001
v_subrev_f32_e32 v3, s2, v7 ; 0A060E02
exp 15, 36, 0, 0, 0, v12, v0, v1, v3 ; F800024F 0301000C
exp 15, 37, 0, 0, 0, v15, v16, v7, v8 ; F800025F 0807100F
exp 15, 12, 0, 1, 0, v19, v11, v12, v2 ; F80008CF 020C0B13
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 20
Code Size: 788 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[21..22]
DCL CONST[24]
DCL TEMP[0..18], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000}
IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000}
IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000}
0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[4].yzww, IN[4].yzww
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[4].yzww, TEMP[1].xxxx
6: MOV TEMP[2].xy, IN[0].xyyy
7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz
10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww
11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
13: MOV TEMP[5].xy, IN[0].xyyy
14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D
15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx
16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx
17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
19: MOV TEMP[7].xyz, IMM[0].yyyy
20: MOV TEMP[8].w, IMM[0].xxxx
21: MOV TEMP[8].xyz, TEMP[0].xyzx
22: DP4 TEMP[9].x, CONST[1], TEMP[8]
23: DP4 TEMP[10].x, CONST[2], TEMP[8]
24: MOV TEMP[9].y, TEMP[10].xxxx
25: DP4 TEMP[8].x, CONST[3], TEMP[8]
26: MOV TEMP[9].z, TEMP[8].xxxx
27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz
28: MOV TEMP[9].xy, IN[3].xyyy
29: MOV TEMP[9].w, IN[3].wwww
30: TXP TEMP[9].x, TEMP[9], SAMP[4], 2D
31: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx
32: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
33: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz
34: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz
35: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz
36: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz
37: MOV TEMP[11].xyz, TEMP[10].xyzx
38: FSLT TEMP[12].x, IMM[0].yyyy, CONST[10].wwww
39: UIF TEMP[12].xxxx :0
40: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz
41: RSQ TEMP[12].x, TEMP[12].xxxx
42: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx
43: MOV TEMP[13].xyz, -IN[5].xyzx
44: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz
45: RCP TEMP[15].x, TEMP[12].xxxx
46: RCP TEMP[15].y, TEMP[12].yyyy
47: RCP TEMP[15].z, TEMP[12].zzzz
48: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
49: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz
50: RCP TEMP[15].x, TEMP[12].xxxx
51: RCP TEMP[15].y, TEMP[12].yyyy
52: RCP TEMP[15].z, TEMP[12].zzzz
53: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
54: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[12].xyzz
55: UIF TEMP[15].xxxx :0
56: MOV TEMP[16].x, TEMP[14].xxxx
57: ELSE :0
58: MOV TEMP[16].x, TEMP[13].xxxx
59: ENDIF
60: UIF TEMP[15].yyyy :0
61: MOV TEMP[17].x, TEMP[14].yyyy
62: ELSE :0
63: MOV TEMP[17].x, TEMP[13].yyyy
64: ENDIF
65: UIF TEMP[15].zzzz :0
66: MOV TEMP[14].x, TEMP[14].zzzz
67: ELSE :0
68: MOV TEMP[14].x, TEMP[13].zzzz
69: ENDIF
70: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz
71: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww
72: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
73: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
74: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz
75: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz
76: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
77: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz
78: ENDIF
79: ADD TEMP[12].x, IMM[0].xxxx, -CONST[22].xxxx
80: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx
81: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy
82: MOV TEMP[11].xyz, TEMP[11].xyzz
83: MOV TEMP[11].w, TEMP[12].xxxx
84: TXL TEMP[11], TEMP[11], SAMP[0], CUBE
85: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy
86: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx
87: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz
88: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].zzzz
89: UIF TEMP[12].xxxx :0
90: MOV TEMP[12].xyz, TEMP[10].xyzx
91: FSLT TEMP[13].x, IMM[0].yyyy, CONST[14].wwww
92: UIF TEMP[13].xxxx :0
93: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz
94: RSQ TEMP[13].x, TEMP[13].xxxx
95: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx
96: MOV TEMP[13].xyz, -IN[5].xyzx
97: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz
98: RCP TEMP[15].x, TEMP[10].xxxx
99: RCP TEMP[15].y, TEMP[10].yyyy
100: RCP TEMP[15].z, TEMP[10].zzzz
101: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
102: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz
103: RCP TEMP[15].x, TEMP[10].xxxx
104: RCP TEMP[15].y, TEMP[10].yyyy
105: RCP TEMP[15].z, TEMP[10].zzzz
106: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
107: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[10].xyzz
108: UIF TEMP[15].xxxx :0
109: MOV TEMP[16].x, TEMP[14].xxxx
110: ELSE :0
111: MOV TEMP[16].x, TEMP[13].xxxx
112: ENDIF
113: UIF TEMP[15].yyyy :0
114: MOV TEMP[17].x, TEMP[14].yyyy
115: ELSE :0
116: MOV TEMP[17].x, TEMP[13].yyyy
117: ENDIF
118: UIF TEMP[15].zzzz :0
119: MOV TEMP[14].x, TEMP[14].zzzz
120: ELSE :0
121: MOV TEMP[14].x, TEMP[13].zzzz
122: ENDIF
123: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz
124: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww
125: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
126: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
127: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz
128: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz
129: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
130: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz
131: ENDIF
132: ADD TEMP[10].x, IMM[0].xxxx, -CONST[22].xxxx
133: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx
134: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy
135: MOV TEMP[12].xyz, TEMP[12].xyzz
136: MOV TEMP[12].w, TEMP[10].xxxx
137: TXL TEMP[10], TEMP[12], SAMP[1], CUBE
138: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy
139: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx
140: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz
141: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz
142: ELSE :0
143: MOV TEMP[7].xyz, TEMP[11].xyzx
144: ENDIF
145: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
146: MOV TEMP[1].xyz, -TEMP[1].xyzx
147: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx
148: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz
149: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz
150: RSQ TEMP[11].x, TEMP[11].xxxx
151: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx
152: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
153: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
154: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz
155: MAX TEMP[11].x, IMM[0].yyyy, TEMP[11].xxxx
156: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx
157: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww
158: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx
159: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].xxxx, IMM[2].yyyy
160: LG2 TEMP[13].x, TEMP[13].xxxx
161: RCP TEMP[13].x, TEMP[13].xxxx
162: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx
163: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx
164: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx
165: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
166: MUL TEMP[16].x, IMM[0].zzzz, TEMP[11].xxxx
167: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx
168: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[0].wwww
169: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx
170: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[1].xxxx
171: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
172: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx
173: MOV_SAT TEMP[4].x, TEMP[4].xxxx
174: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx
175: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx
176: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx
177: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx
178: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
179: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx
180: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[12].xxxx
181: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].zzzz
182: RCP TEMP[1].x, TEMP[1].xxxx
183: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz
184: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx
185: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx
186: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx
187: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy
188: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx
189: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx
190: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
191: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
192: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
193: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz
194: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[3].xyzz
195: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx
196: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx
197: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx
198: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx
199: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz
200: ADD TEMP[10].x, TEMP[5].xxxx, IMM[2].wwww
201: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
202: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx
203: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx
204: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
205: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx
206: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww
207: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx
208: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx
209: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx
210: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
211: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx
212: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
213: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
214: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
215: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
216: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
217: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
218: MOV TEMP[0].xyz, TEMP[0].xyzx
219: MAD TEMP[1].x, IN[4].xxxx, CONST[5].zzzz, CONST[5].wwww
220: MOV_SAT TEMP[1].x, TEMP[1].xxxx
221: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
222: MOV TEMP[0].xyz, TEMP[0].xyzx
223: MOV TEMP[0].w, IMM[0].xxxx
224: MOV OUT[0], TEMP[0]
225: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0
%84 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0
%86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%87 = bitcast <8 x i32> addrspace(2)* %86 to <32 x i8> addrspace(2)*
%88 = load <32 x i8>, <32 x i8> addrspace(2)* %87, align 32, !tbaa !0
%89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%90 = bitcast <4 x i32> addrspace(2)* %89 to <16 x i8> addrspace(2)*
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%93 = bitcast <8 x i32> addrspace(2)* %92 to <32 x i8> addrspace(2)*
%94 = load <32 x i8>, <32 x i8> addrspace(2)* %93, align 32, !tbaa !0
%95 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%96 = bitcast <4 x i32> addrspace(2)* %95 to <16 x i8> addrspace(2)*
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%99 = bitcast <8 x i32> addrspace(2)* %98 to <32 x i8> addrspace(2)*
%100 = load <32 x i8>, <32 x i8> addrspace(2)* %99, align 32, !tbaa !0
%101 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%102 = bitcast <4 x i32> addrspace(2)* %101 to <16 x i8> addrspace(2)*
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%105 = bitcast <8 x i32> addrspace(2)* %104 to <32 x i8> addrspace(2)*
%106 = load <32 x i8>, <32 x i8> addrspace(2)* %105, align 32, !tbaa !0
%107 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%108 = bitcast <4 x i32> addrspace(2)* %107 to <16 x i8> addrspace(2)*
%109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0
%110 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%111 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%112 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%128 = fmul float %112, %112
%129 = fmul float %113, %113
%130 = fadd float %129, %128
%131 = fmul float %114, %114
%132 = fadd float %130, %131
%133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132)
%134 = fmul float %112, %133
%135 = fmul float %113, %133
%136 = fmul float %114, %133
%137 = fmul float %122, %122
%138 = fmul float %123, %123
%139 = fadd float %138, %137
%140 = fmul float %124, %124
%141 = fadd float %139, %140
%142 = call float @llvm.AMDGPU.rsq.clamped.f32(float %141)
%143 = fmul float %122, %142
%144 = fmul float %123, %142
%145 = fmul float %124, %142
%146 = bitcast float %110 to i32
%147 = bitcast float %111 to i32
%148 = insertelement <2 x i32> undef, i32 %146, i32 0
%149 = insertelement <2 x i32> %148, i32 %147, i32 1
%150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %94, <16 x i8> %97, i32 2)
%151 = extractelement <4 x float> %150, i32 0
%152 = extractelement <4 x float> %150, i32 1
%153 = extractelement <4 x float> %150, i32 2
%154 = fmul float %76, %151
%155 = fmul float %77, %152
%156 = fmul float %78, %153
%157 = call float @llvm.AMDGPU.lrp(float %79, float %154, float %66)
%158 = call float @llvm.AMDGPU.lrp(float %79, float %155, float %67)
%159 = call float @llvm.AMDGPU.lrp(float %79, float %156, float %68)
%160 = fmul float %79, %69
%161 = fsub float %69, %160
%162 = fmul float %154, %161
%163 = fmul float %155, %161
%164 = fmul float %156, %161
%165 = bitcast float %110 to i32
%166 = bitcast float %111 to i32
%167 = insertelement <2 x i32> undef, i32 %165, i32 0
%168 = insertelement <2 x i32> %167, i32 %166, i32 1
%169 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %168, <32 x i8> %100, <16 x i8> %103, i32 2)
%170 = extractelement <4 x float> %169, i32 1
%171 = fsub float 1.000000e+00, %81
%172 = fmul float %170, %81
%173 = fadd float %172, %171
%174 = fmul float %134, %24
%175 = fmul float %135, %25
%176 = fadd float %175, %174
%177 = fmul float %136, %26
%178 = fadd float %176, %177
%179 = call float @llvm.maxnum.f32(float %178, float 0.000000e+00)
%180 = fmul float %27, %134
%181 = fmul float %28, %135
%182 = fadd float %180, %181
%183 = fmul float %29, %136
%184 = fadd float %182, %183
%185 = fadd float %184, %30
%186 = fmul float %31, %134
%187 = fmul float %32, %135
%188 = fadd float %186, %187
%189 = fmul float %33, %136
%190 = fadd float %188, %189
%191 = fadd float %190, %34
%192 = fmul float %35, %134
%193 = fmul float %36, %135
%194 = fadd float %192, %193
%195 = fmul float %37, %136
%196 = fadd float %194, %195
%197 = fadd float %196, %38
%198 = fadd float %115, %185
%199 = fadd float %116, %191
%200 = fadd float %117, %197
%201 = fdiv float %118, %120
%202 = fdiv float %119, %120
%203 = bitcast float %201 to i32
%204 = bitcast float %202 to i32
%205 = insertelement <2 x i32> undef, i32 %203, i32 0
%206 = insertelement <2 x i32> %205, i32 %204, i32 1
%207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %206, <32 x i8> %106, <16 x i8> %109, i32 2)
%208 = extractelement <4 x float> %207, i32 0
%209 = fmul float %70, %208
%210 = fmul float %71, %208
%211 = fmul float %72, %208
%212 = fmul float %198, %173
%213 = fmul float %199, %173
%214 = fmul float %200, %173
%215 = fmul float %134, %143
%216 = fmul float %135, %144
%217 = fadd float %216, %215
%218 = fmul float %136, %145
%219 = fadd float %217, %218
%220 = fmul float %219, %134
%221 = fmul float %219, %135
%222 = fmul float %219, %136
%223 = fmul float %220, 2.000000e+00
%224 = fmul float %221, 2.000000e+00
%225 = fmul float %222, 2.000000e+00
%226 = fsub float %143, %223
%227 = fsub float %144, %224
%228 = fsub float %145, %225
%229 = fcmp ogt float %51, 0.000000e+00
br i1 %229, label %IF, label %ENDIF
IF: ; preds = %main_body
%230 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%231 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%232 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%233 = fmul float %226, %226
%234 = fmul float %227, %227
%235 = fadd float %234, %233
%236 = fmul float %228, %228
%237 = fadd float %235, %236
%238 = call float @llvm.AMDGPU.rsq.clamped.f32(float %237)
%239 = fmul float %226, %238
%240 = fmul float %227, %238
%241 = fmul float %228, %238
%242 = fsub float %44, %125
%243 = fsub float %45, %126
%244 = fsub float %46, %127
%245 = fdiv float 1.000000e+00, %239
%246 = fdiv float 1.000000e+00, %240
%247 = fdiv float 1.000000e+00, %241
%248 = fmul float %242, %245
%249 = fmul float %243, %246
%250 = fmul float %244, %247
%251 = fsub float %47, %125
%252 = fsub float %48, %126
%253 = fsub float %49, %127
%254 = fdiv float 1.000000e+00, %239
%255 = fdiv float 1.000000e+00, %240
%256 = fdiv float 1.000000e+00, %241
%257 = fmul float %251, %254
%258 = fmul float %252, %255
%259 = fmul float %253, %256
%260 = fcmp ogt float %239, 0.000000e+00
%261 = fcmp ogt float %240, 0.000000e+00
%262 = fcmp ogt float %241, 0.000000e+00
%. = select i1 %260, float %248, float %257
%temp68.0 = select i1 %261, float %249, float %258
%.100 = select i1 %262, float %250, float %259
%263 = fadd float %44, %47
%264 = fadd float %45, %48
%265 = fadd float %46, %49
%266 = fmul float %263, 5.000000e-01
%267 = fmul float %264, 5.000000e-01
%268 = fmul float %265, 5.000000e-01
%269 = call float @llvm.minnum.f32(float %., float %temp68.0)
%270 = call float @llvm.minnum.f32(float %269, float %.100)
%271 = fsub float %266, %232
%272 = fsub float %267, %231
%273 = fsub float %268, %230
%274 = fadd float %271, %125
%275 = fadd float %272, %126
%276 = fadd float %273, %127
%277 = fmul float %239, %270
%278 = fadd float %277, %274
%279 = fmul float %240, %270
%280 = fadd float %279, %275
%281 = fmul float %241, %270
%282 = fadd float %281, %276
%283 = fsub float %278, %266
%284 = fsub float %280, %267
%285 = fsub float %282, %268
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp44.0 = phi float [ %283, %IF ], [ %226, %main_body ]
%temp45.0 = phi float [ %284, %IF ], [ %227, %main_body ]
%temp46.0 = phi float [ %285, %IF ], [ %228, %main_body ]
%286 = fsub float 1.000000e+00, %80
%287 = call float @llvm.pow.f32(float %286, float 7.500000e-01)
%288 = fmul float %287, 7.000000e+00
%289 = insertelement <4 x float> undef, float %temp44.0, i32 0
%290 = insertelement <4 x float> %289, float %temp45.0, i32 1
%291 = insertelement <4 x float> %290, float %temp46.0, i32 2
%292 = insertelement <4 x float> %291, float %288, i32 3
%293 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %292)
%294 = extractelement <4 x float> %293, i32 0
%295 = extractelement <4 x float> %293, i32 1
%296 = extractelement <4 x float> %293, i32 2
%297 = extractelement <4 x float> %293, i32 3
%298 = call float @llvm.fabs.f32(float %296)
%299 = fdiv float 1.000000e+00, %298
%300 = fmul float %294, %299
%301 = fadd float %300, 1.500000e+00
%302 = fmul float %295, %299
%303 = fadd float %302, 1.500000e+00
%304 = bitcast float %303 to i32
%305 = bitcast float %301 to i32
%306 = bitcast float %297 to i32
%307 = bitcast float %288 to i32
%308 = insertelement <4 x i32> undef, i32 %304, i32 0
%309 = insertelement <4 x i32> %308, i32 %305, i32 1
%310 = insertelement <4 x i32> %309, i32 %306, i32 2
%311 = insertelement <4 x i32> %310, i32 %307, i32 3
%312 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %311, <32 x i8> %83, <16 x i8> %85, i32 4)
%313 = extractelement <4 x float> %312, i32 0
%314 = extractelement <4 x float> %312, i32 1
%315 = extractelement <4 x float> %312, i32 2
%316 = extractelement <4 x float> %312, i32 3
%317 = call float @llvm.pow.f32(float %316, float %53)
%318 = fmul float %52, %317
%319 = fmul float %318, %313
%320 = fmul float %318, %314
%321 = fmul float %318, %315
%322 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %322, label %IF86, label %ENDIF85
IF86: ; preds = %ENDIF
%323 = fcmp ogt float %63, 0.000000e+00
br i1 %323, label %IF89, label %ENDIF88
ENDIF85: ; preds = %ENDIF, %ENDIF88
%temp28.0 = phi float [ %551, %ENDIF88 ], [ %319, %ENDIF ]
%temp29.0 = phi float [ %552, %ENDIF88 ], [ %320, %ENDIF ]
%temp30.0 = phi float [ %553, %ENDIF88 ], [ %321, %ENDIF ]
%324 = fmul float %temp28.0, %173
%325 = fmul float %temp29.0, %173
%326 = fmul float %temp30.0, %173
%327 = fsub float 1.000000e+00, %80
%328 = fsub float %24, %143
%329 = fsub float %25, %144
%330 = fsub float %26, %145
%331 = fmul float %328, %328
%332 = fmul float %329, %329
%333 = fadd float %332, %331
%334 = fmul float %330, %330
%335 = fadd float %333, %334
%336 = call float @llvm.AMDGPU.rsq.clamped.f32(float %335)
%337 = fmul float %328, %336
%338 = fmul float %329, %336
%339 = fmul float %330, %336
%340 = fmul float %143, %134
%341 = fsub float -0.000000e+00, %340
%342 = fmul float %144, %135
%343 = fsub float %341, %342
%344 = fmul float %145, %136
%345 = fsub float %343, %344
%346 = call float @llvm.maxnum.f32(float %345, float 0.000000e+00)
%347 = fmul float %24, %337
%348 = fmul float %25, %338
%349 = fadd float %348, %347
%350 = fmul float %26, %339
%351 = fadd float %349, %350
%352 = call float @llvm.maxnum.f32(float %351, float 0.000000e+00)
%353 = fmul float %327, %327
%354 = fmul float %353, %75
%355 = fsub float 1.000000e+00, %327
%356 = fmul float %355, 0x3FEEF9DB20000000
%357 = fadd float %356, 0x3F9EB851E0000000
%358 = call float @llvm.log2.f32(float %357)
%359 = fdiv float 1.000000e+00, %358
%360 = fmul float %359, 1.000000e+01
%361 = fmul float %360, %360
%362 = fsub float 1.000000e+00, %179
%363 = fsub float 1.000000e+00, %346
%364 = fmul float %352, 2.000000e+00
%365 = fmul float %352, %327
%366 = fmul float %364, %365
%367 = fadd float %366, 5.000000e-01
%368 = fsub float 1.000000e+00, %352
%369 = fsub float 1.000000e+00, %346
%370 = fsub float 1.000000e+00, %161
%371 = fadd float %80, %370
%372 = call float @llvm.AMDIL.clamp.(float %371, float 0.000000e+00, float 1.000000e+00)
%373 = fmul float %369, %369
%374 = fmul float %369, %369
%375 = fmul float %374, %369
%376 = fmul float %373, %375
%377 = call float @llvm.AMDGPU.lrp(float %376, float %372, float %157)
%378 = call float @llvm.AMDGPU.lrp(float %376, float %372, float %158)
%379 = call float @llvm.AMDGPU.lrp(float %376, float %372, float %159)
%380 = call float @llvm.AMDGPU.lrp(float %179, float 1.000000e+00, float %354)
%381 = call float @llvm.AMDGPU.lrp(float %346, float 1.000000e+00, float %354)
%382 = fmul float %380, %381
%383 = fadd float %382, 0x3F1A36E2E0000000
%384 = fdiv float 1.000000e+00, %383
%385 = fmul float %134, %337
%386 = fmul float %135, %338
%387 = fadd float %386, %385
%388 = fmul float %136, %339
%389 = fadd float %387, %388
%390 = call float @llvm.maxnum.f32(float %389, float 0.000000e+00)
%391 = call float @llvm.pow.f32(float %390, float %361)
%392 = fadd float %361, 1.000000e+00
%393 = fmul float %392, %74
%394 = fmul float %391, %393
%395 = fmul float %384, %394
%396 = fmul float %395, %179
%397 = fmul float %396, %73
%398 = call float @llvm.maxnum.f32(float %397, float 0.000000e+00)
%399 = fmul float %398, %209
%400 = fmul float %398, %210
%401 = fmul float %398, %211
%402 = fsub float 1.000000e+00, %157
%403 = fsub float 1.000000e+00, %158
%404 = fsub float 1.000000e+00, %159
%405 = fmul float %368, %368
%406 = fmul float %368, %368
%407 = fmul float %406, %368
%408 = fmul float %405, %407
%409 = fmul float %402, %408
%410 = fadd float %409, %157
%411 = fmul float %403, %408
%412 = fadd float %411, %158
%413 = fmul float %404, %408
%414 = fadd float %413, %159
%415 = fadd float %367, -1.000000e+00
%416 = fmul float %362, %362
%417 = fmul float %362, %362
%418 = fmul float %417, %362
%419 = fmul float %416, %418
%420 = fmul float %415, %419
%421 = fadd float %420, 1.000000e+00
%422 = fadd float %367, -1.000000e+00
%423 = fmul float %363, %363
%424 = fmul float %363, %363
%425 = fmul float %424, %363
%426 = fmul float %423, %425
%427 = fmul float %422, %426
%428 = fadd float %427, 1.000000e+00
%429 = fmul float %421, %428
%430 = fmul float %429, %179
%431 = fmul float %209, %430
%432 = fadd float %431, %212
%433 = fmul float %210, %430
%434 = fadd float %433, %213
%435 = fmul float %211, %430
%436 = fadd float %435, %214
%437 = fmul float %162, %432
%438 = fmul float %163, %434
%439 = fmul float %164, %436
%440 = fmul float %399, %410
%441 = fadd float %440, %437
%442 = fmul float %400, %412
%443 = fadd float %442, %438
%444 = fmul float %401, %414
%445 = fadd float %444, %439
%446 = fmul float %324, %377
%447 = fadd float %446, %441
%448 = fmul float %325, %378
%449 = fadd float %448, %443
%450 = fmul float %326, %379
%451 = fadd float %450, %445
%452 = fmul float %121, %42
%453 = fadd float %452, %43
%454 = call float @llvm.AMDIL.clamp.(float %453, float 0.000000e+00, float 1.000000e+00)
%455 = call float @llvm.AMDGPU.lrp(float %454, float %447, float %39)
%456 = call float @llvm.AMDGPU.lrp(float %454, float %449, float %40)
%457 = call float @llvm.AMDGPU.lrp(float %454, float %451, float %41)
%458 = call i32 @llvm.SI.packf16(float %455, float %456)
%459 = bitcast i32 %458 to float
%460 = call i32 @llvm.SI.packf16(float %457, float 1.000000e+00)
%461 = bitcast i32 %460 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %459, float %461, float %459, float %461)
ret void
IF89: ; preds = %IF86
%462 = fmul float %226, %226
%463 = fmul float %227, %227
%464 = fadd float %463, %462
%465 = fmul float %228, %228
%466 = fadd float %464, %465
%467 = call float @llvm.AMDGPU.rsq.clamped.f32(float %466)
%468 = fmul float %226, %467
%469 = fmul float %227, %467
%470 = fmul float %228, %467
%471 = fsub float %54, %125
%472 = fsub float %55, %126
%473 = fsub float %56, %127
%474 = fdiv float 1.000000e+00, %468
%475 = fdiv float 1.000000e+00, %469
%476 = fdiv float 1.000000e+00, %470
%477 = fmul float %471, %474
%478 = fmul float %472, %475
%479 = fmul float %473, %476
%480 = fsub float %57, %125
%481 = fsub float %58, %126
%482 = fsub float %59, %127
%483 = fdiv float 1.000000e+00, %468
%484 = fdiv float 1.000000e+00, %469
%485 = fdiv float 1.000000e+00, %470
%486 = fmul float %480, %483
%487 = fmul float %481, %484
%488 = fmul float %482, %485
%489 = fcmp ogt float %468, 0.000000e+00
%490 = fcmp ogt float %469, 0.000000e+00
%491 = fcmp ogt float %470, 0.000000e+00
%.101 = select i1 %489, float %477, float %486
%temp68.1 = select i1 %490, float %478, float %487
%.102 = select i1 %491, float %479, float %488
%492 = fadd float %54, %57
%493 = fadd float %55, %58
%494 = fadd float %56, %59
%495 = fmul float %492, 5.000000e-01
%496 = fmul float %493, 5.000000e-01
%497 = fmul float %494, 5.000000e-01
%498 = call float @llvm.minnum.f32(float %.101, float %temp68.1)
%499 = call float @llvm.minnum.f32(float %498, float %.102)
%500 = fsub float %495, %60
%501 = fsub float %496, %61
%502 = fsub float %497, %62
%503 = fadd float %500, %125
%504 = fadd float %501, %126
%505 = fadd float %502, %127
%506 = fmul float %468, %499
%507 = fadd float %506, %503
%508 = fmul float %469, %499
%509 = fadd float %508, %504
%510 = fmul float %470, %499
%511 = fadd float %510, %505
%512 = fsub float %507, %495
%513 = fsub float %509, %496
%514 = fsub float %511, %497
br label %ENDIF88
ENDIF88: ; preds = %IF86, %IF89
%temp48.0 = phi float [ %512, %IF89 ], [ %226, %IF86 ]
%temp49.0 = phi float [ %513, %IF89 ], [ %227, %IF86 ]
%temp50.0 = phi float [ %514, %IF89 ], [ %228, %IF86 ]
%515 = fsub float 1.000000e+00, %80
%516 = call float @llvm.pow.f32(float %515, float 7.500000e-01)
%517 = fmul float %516, 7.000000e+00
%518 = insertelement <4 x float> undef, float %temp48.0, i32 0
%519 = insertelement <4 x float> %518, float %temp49.0, i32 1
%520 = insertelement <4 x float> %519, float %temp50.0, i32 2
%521 = insertelement <4 x float> %520, float %517, i32 3
%522 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %521)
%523 = extractelement <4 x float> %522, i32 0
%524 = extractelement <4 x float> %522, i32 1
%525 = extractelement <4 x float> %522, i32 2
%526 = extractelement <4 x float> %522, i32 3
%527 = call float @llvm.fabs.f32(float %525)
%528 = fdiv float 1.000000e+00, %527
%529 = fmul float %523, %528
%530 = fadd float %529, 1.500000e+00
%531 = fmul float %524, %528
%532 = fadd float %531, 1.500000e+00
%533 = bitcast float %532 to i32
%534 = bitcast float %530 to i32
%535 = bitcast float %526 to i32
%536 = bitcast float %517 to i32
%537 = insertelement <4 x i32> undef, i32 %533, i32 0
%538 = insertelement <4 x i32> %537, i32 %534, i32 1
%539 = insertelement <4 x i32> %538, i32 %535, i32 2
%540 = insertelement <4 x i32> %539, i32 %536, i32 3
%541 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %540, <32 x i8> %88, <16 x i8> %91, i32 4)
%542 = extractelement <4 x float> %541, i32 0
%543 = extractelement <4 x float> %541, i32 1
%544 = extractelement <4 x float> %541, i32 2
%545 = extractelement <4 x float> %541, i32 3
%546 = call float @llvm.pow.f32(float %545, float %65)
%547 = fmul float %64, %546
%548 = fmul float %547, %542
%549 = fmul float %547, %543
%550 = fmul float %547, %544
%551 = call float @llvm.AMDGPU.lrp(float %50, float %319, float %548)
%552 = call float @llvm.AMDGPU.lrp(float %50, float %320, float %549)
%553 = call float @llvm.AMDGPU.lrp(float %50, float %321, float %550)
br label %ENDIF85
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000
v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001
v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100
v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500
v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800
v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801
v_interp_p1_f32 v6, v0, 1, 2, [m0] ; C8180900
v_interp_p2_f32 v6, [v6], v1, 1, 2, [m0] ; C8190901
v_interp_p1_f32 v7, v0, 2, 2, [m0] ; C81C0A00
v_interp_p2_f32 v7, [v7], v1, 2, 2, [m0] ; C81D0A01
v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00
v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01
v_interp_p1_f32 v16, v0, 1, 3, [m0] ; C8400D00
v_interp_p2_f32 v16, [v16], v1, 1, 3, [m0] ; C8410D01
v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00
v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01
v_interp_p1_f32 v9, v0, 0, 4, [m0] ; C8241000
v_interp_p2_f32 v9, [v9], v1, 0, 4, [m0] ; C8251001
v_interp_p1_f32 v18, v0, 1, 4, [m0] ; C8481100
v_interp_p2_f32 v18, [v18], v1, 1, 4, [m0] ; C8491101
v_interp_p1_f32 v19, v0, 2, 4, [m0] ; C84C1200
v_interp_p2_f32 v19, [v19], v1, 2, 4, [m0] ; C84D1201
v_interp_p1_f32 v20, v0, 3, 4, [m0] ; C8501300
v_interp_p2_f32 v20, [v20], v1, 3, 4, [m0] ; C8511301
v_mul_f32_e32 v4, v2, v2 ; 10080502
v_mac_f32_e32 v4, v3, v3 ; 3E080703
v_mac_f32_e32 v4, v8, v8 ; 3E081108
v_rsq_clamp_f32_e32 v10, v4 ; 7E145904
v_mul_f32_e32 v4, v18, v18 ; 10082512
v_mac_f32_e32 v4, v19, v19 ; 3E082713
v_mac_f32_e32 v4, v20, v20 ; 3E082914
v_rsq_clamp_f32_e32 v21, v4 ; 7E2A5904
v_mul_f32_e32 v4, v10, v2 ; 1008050A
v_mul_f32_e32 v3, v10, v3 ; 1006070A
v_mul_f32_e32 v2, v10, v8 ; 1004110A
v_mul_f32_e32 v11, v21, v18 ; 10162515
v_mul_f32_e32 v10, v21, v19 ; 10142715
v_mul_f32_e32 v8, v11, v4 ; 1010090B
v_mac_f32_e32 v8, v10, v3 ; 3E10070A
v_mul_f32_e32 v12, v21, v20 ; 10182915
v_mac_f32_e32 v8, v12, v2 ; 3E10050C
v_mul_f32_e32 v22, v4, v8 ; 102C1104
v_mac_f32_e32 v22, v4, v8 ; 3E2C1104
v_mul_f32_e32 v23, v3, v8 ; 102E1103
v_mac_f32_e32 v23, v3, v8 ; 3E2E1103
v_mad_f32 v25, v18, v21, -v22 ; D2820019 845A2B12
v_mad_f32 v26, v19, v21, -v23 ; D282001A 845E2B13
v_interp_p1_f32 v28, v0, 0, 5, [m0] ; C8701400
v_interp_p2_f32 v28, [v28], v1, 0, 5, [m0] ; C8711401
v_interp_p1_f32 v24, v0, 1, 5, [m0] ; C8601500
v_interp_p2_f32 v24, [v24], v1, 1, 5, [m0] ; C8611501
s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508
s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p1_f32 v29, v0, 2, 5, [m0] ; C8741600
v_interp_p2_f32 v29, [v29], v1, 2, 5, [m0] ; C8751601
v_mul_f32_e32 v0, v2, v8 ; 10001102
v_mac_f32_e32 v0, v2, v8 ; 3E001102
s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C
s_load_dwordx4 s[36:39], s[4:5], 0x10 ; C0920510
s_load_dwordx8 s[40:47], s[6:7], 0x18 ; C0D40718
s_load_dwordx8 s[48:55], s[6:7], 0x20 ; C0D80720
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[16:23], s[12:15] ; F0800700 00641E0D
s_buffer_load_dword s0, s[8:11], 0x4c ; C200094C
s_buffer_load_dword s1, s[8:11], 0x4d ; C200894D
s_buffer_load_dword s2, s[8:11], 0x4e ; C201094E
v_mad_f32 v27, v20, v21, -v0 ; D282001B 84022B14
v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000
v_cmp_gt_f32_e64 vcc, |v17|, v0 ; D008016A 00020111
v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000
v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2
v_mul_f32_e32 v1, v0, v17 ; 10022300
v_rcp_f32_e32 v1, v1 ; 7E025501
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v21, s0, v30 ; 102A3C00
v_mul_f32_e32 v22, s1, v31 ; 102C3E01
v_mul_f32_e32 v23, s2, v32 ; 102E4002
v_mul_f32_e32 v8, v1, v15 ; 10101F01
v_mul_f32_e32 v1, v1, v16 ; 10022101
s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940
s_buffer_load_dword s31, s[8:11], 0x54 ; C20F8954
s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941
s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942
v_mul_f32_e32 v15, v8, v0 ; 101E0108
v_mul_f32_e32 v16, v1, v0 ; 10200101
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B
s_buffer_load_dword s32, s[8:11], 0x2c ; C210092C
s_buffer_load_dword s33, s[8:11], 0x2d ; C210892D
s_buffer_load_dword s0, s[8:11], 0x58 ; C2000958
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e64 v0, 1.0, s31 ; D2080000 00003EF2
v_mul_f32_e32 v8, s1, v0 ; 10100001
v_mul_f32_e32 v1, s2, v0 ; 10020002
v_mul_f32_e32 v0, s3, v0 ; 10000003
v_mac_f32_e32 v8, s31, v21 ; 3E102A1F
v_mov_b32_e32 v30, v25 ; 7E3C0319
v_mac_f32_e32 v1, s31, v22 ; 3E022C1F
v_mov_b32_e32 v31, v26 ; 7E3E031A
v_mac_f32_e32 v0, s31, v23 ; 3E002E1F
v_mov_b32_e32 v32, v27 ; 7E40031B
v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80
image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[40:47], s[24:27] ; F0800F00 00CA110D
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[48:55], s[36:39] ; F0800F00 012C0D0F
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402
s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925
v_mul_f32_e32 v14, v25, v25 ; 101C3319
v_mac_f32_e32 v14, v26, v26 ; 3E1C351A
v_mac_f32_e32 v14, v27, v27 ; 3E1C371B
v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E
s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926
s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928
s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929
s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A
v_mul_f32_e32 v15, v14, v25 ; 101E330E
v_mul_f32_e32 v16, v14, v26 ; 1020350E
v_mul_f32_e32 v14, v14, v27 ; 101C370E
v_rcp_f32_e32 v17, v15 ; 7E22550F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v19, s1, v28 ; 08263801
v_sub_f32_e32 v20, s2, v24 ; 08283002
v_rcp_f32_e32 v30, v16 ; 7E3C5510
v_mul_f32_e32 v19, v17, v19 ; 10262711
v_sub_f32_e32 v31, s13, v28 ; 083E380D
v_mul_f32_e32 v17, v17, v31 ; 10223F11
v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80
v_cndmask_b32_e32 v17, v17, v19 ; 00222711
v_rcp_f32_e32 v19, v14 ; 7E26550E
v_mul_f32_e32 v20, v30, v20 ; 1028291E
v_sub_f32_e32 v31, s16, v24 ; 083E3010
v_mul_f32_e32 v30, v30, v31 ; 103C3F1E
v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080
v_cndmask_b32_e32 v20, v30, v20 ; 0028291E
v_sub_f32_e32 v30, s3, v29 ; 083C3A03
v_mul_f32_e32 v30, v19, v30 ; 103C3D13
v_sub_f32_e32 v31, s17, v29 ; 083E3A11
v_mul_f32_e32 v19, v19, v31 ; 10263F13
v_cmp_lt_f32_e32 vcc, 0, v14 ; 7C021C80
v_cndmask_b32_e32 v19, v19, v30 ; 00263D13
v_min3_f32 v17, v17, v20, v19 ; D2A20011 044E2911
v_mov_b32_e32 v19, s13 ; 7E26020D
v_add_f32_e32 v19, s1, v19 ; 06262601
v_mov_b32_e32 v20, s16 ; 7E280210
v_add_f32_e32 v20, s2, v20 ; 06282802
v_mov_b32_e32 v30, s17 ; 7E3C0211
v_add_f32_e32 v32, s3, v30 ; 06403C03
v_mad_f32 v30, 0.5, v19, -s18 ; D282001E 804A26F0
v_add_f32_e32 v30, v28, v30 ; 063C3D1C
v_mac_f32_e32 v30, v17, v15 ; 3E3C1F11
v_mad_f32 v15, 0.5, v20, -s19 ; D282000F 804E28F0
v_add_f32_e32 v15, v24, v15 ; 061E1F18
v_mac_f32_e32 v15, v17, v16 ; 3E1E2111
v_mad_f32 v16, 0.5, v32, -s20 ; D2820010 805240F0
v_add_f32_e32 v16, v29, v16 ; 0620211D
v_mac_f32_e32 v16, v17, v14 ; 3E201D11
v_mad_f32 v30, 0.5, -v19, v30 ; D282001E 447A26F0
v_mad_f32 v31, 0.5, -v20, v15 ; D282001F 443E28F0
v_mad_f32 v32, 0.5, -v32, v16 ; D2820020 444240F0
s_or_b64 exec, exec, s[14:15] ; 88FE0E7E
s_buffer_load_dword s27, s[8:11], 0x17 ; C20D8917
s_buffer_load_dword s28, s[8:11], 0x43 ; C20E0943
s_buffer_load_dword s26, s[8:11], 0x44 ; C20D0944
s_buffer_load_dword s19, s[8:11], 0x45 ; C2098945
s_buffer_load_dword s18, s[8:11], 0x46 ; C2090946
s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900
s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901
s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902
s_buffer_load_dword s13, s[8:11], 0x4 ; C2068904
s_buffer_load_dword s14, s[8:11], 0x5 ; C2070905
s_buffer_load_dword s15, s[8:11], 0x6 ; C2078906
s_buffer_load_dword s16, s[8:11], 0x7 ; C2080907
s_buffer_load_dword s17, s[8:11], 0x8 ; C2088908
s_buffer_load_dword s20, s[8:11], 0x9 ; C20A0909
s_buffer_load_dword s21, s[8:11], 0xa ; C20A890A
s_buffer_load_dword s22, s[8:11], 0xb ; C20B090B
s_buffer_load_dword s23, s[8:11], 0xc ; C20B890C
s_buffer_load_dword s24, s[8:11], 0xd ; C20C090D
s_buffer_load_dword s25, s[8:11], 0xe ; C20C890E
v_sub_f32_e64 v14, 1.0, s0 ; D208000E 000000F2
v_log_f32_e32 v14, v14 ; 7E1C4F0E
v_mul_legacy_f32_e32 v14, 0x3f400000, v14 ; 0E1C1CFF 3F400000
v_exp_f32_e32 v14, v14 ; 7E1C4B0E
v_mul_f32_e32 v33, 0x40e00000, v14 ; 10421CFF 40E00000
v_cubeid_f32 v17, v30, v31, v32 ; D2880011 04823F1E
v_cubema_f32 v16, v30, v31, v32 ; D28E0010 04823F1E
s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500
s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700
v_cubesc_f32 v15, v30, v31, v32 ; D28A000F 04823F1E
v_cubetc_f32 v14, v30, v31, v32 ; D28C000E 04823F1E
v_rcp_f32_e64 v16, |v16| ; D3540110 00000110
v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000
v_mad_f32 v31, v16, v14, v30 ; D282001F 047A1D10
v_mac_f32_e32 v30, v16, v15 ; 3E3C1F10
v_mov_b32_e32 v32, v17 ; 7E400311
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[40:47], s[36:39] ; F0900F00 012A1E1E
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v14, v33 ; 7E1C4F21
s_buffer_load_dword s29, s[8:11], 0xf ; C20E890F
s_buffer_load_dword s30, s[8:11], 0x60 ; C20F0960
v_mul_legacy_f32_e32 v14, s33, v14 ; 0E1C1C21
v_exp_f32_e32 v14, v14 ; 7E1C4B0E
v_mul_f32_e32 v14, s32, v14 ; 101C1C20
v_mul_f32_e32 v16, v30, v14 ; 10201D1E
v_mul_f32_e32 v15, v31, v14 ; 101E1D1F
v_mul_f32_e32 v14, v32, v14 ; 101C1D20
v_mov_b32_e32 v17, s31 ; 7E22021F
v_mov_b32_e32 v19, 0x3f7fff58 ; 7E2602FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v19 ; 7C02260C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[32:33], vcc ; BEA0246A
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s35, s[8:11], 0x3b ; C211893B
s_buffer_load_dword s31, s[8:11], 0x3c ; C20F893C
s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680
s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424
s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s35, s[8:11], 0x36 ; C2118936
s_buffer_load_dword s38, s[8:11], 0x38 ; C2130938
s_buffer_load_dword s39, s[8:11], 0x39 ; C2138939
s_buffer_load_dword s40, s[8:11], 0x3a ; C214093A
s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930
s_buffer_load_dword s42, s[8:11], 0x31 ; C2150931
s_buffer_load_dword s43, s[8:11], 0x32 ; C2158932
s_buffer_load_dword s44, s[8:11], 0x34 ; C2160934
s_buffer_load_dword s45, s[8:11], 0x35 ; C2168935
v_mul_f32_e32 v19, v25, v25 ; 10263319
v_mac_f32_e32 v19, v26, v26 ; 3E26351A
v_mac_f32_e32 v19, v27, v27 ; 3E26371B
v_rsq_clamp_f32_e32 v19, v19 ; 7E265913
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v20, s35, v29 ; 08283A23
v_mov_b32_e32 v30, s35 ; 7E3C0223
v_sub_f32_e32 v31, s41, v28 ; 083E3829
v_sub_f32_e32 v32, s42, v24 ; 0840302A
v_add_f32_e32 v30, s43, v30 ; 063C3C2B
v_sub_f32_e32 v33, s43, v29 ; 08423A2B
v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0
v_add_f32_e32 v29, v29, v34 ; 063A451D
v_mul_f32_e32 v25, v19, v25 ; 10323313
v_mul_f32_e32 v26, v19, v26 ; 10343513
v_mul_f32_e32 v19, v19, v27 ; 10263713
v_rcp_f32_e32 v27, v25 ; 7E365519
v_rcp_f32_e32 v34, v26 ; 7E44551A
v_rcp_f32_e32 v35, v19 ; 7E465513
v_sub_f32_e32 v36, s44, v28 ; 0848382C
v_mov_b32_e32 v37, s44 ; 7E4A022C
v_add_f32_e32 v37, s41, v37 ; 064A4A29
v_mul_f32_e32 v31, v27, v31 ; 103E3F1B
v_mul_f32_e32 v27, v27, v36 ; 1036491B
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v33, v35, v33 ; 10424323
v_mul_f32_e32 v20, v35, v20 ; 10282923
v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0
v_add_f32_e32 v28, v28, v35 ; 0638471C
v_sub_f32_e32 v35, s45, v24 ; 0846302D
v_mov_b32_e32 v36, s45 ; 7E48022D
v_mul_f32_e32 v34, v34, v35 ; 10444722
v_add_f32_e32 v35, s42, v36 ; 0646482A
v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280
v_cndmask_b32_e32 v27, v27, v31 ; 00363F1B
v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480
v_cndmask_b32_e32 v31, v34, v32 ; 003E4122
v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680
v_cndmask_b32_e32 v20, v20, v33 ; 00284314
v_min3_f32 v20, v27, v31, v20 ; D2A20014 04523F1B
v_mad_f32 v27, 0.5, v35, -s39 ; D282001B 809E46F0
v_add_f32_e32 v24, v24, v27 ; 06303718
v_mac_f32_e32 v28, v20, v25 ; 3E383314
v_mac_f32_e32 v24, v20, v26 ; 3E303514
v_mac_f32_e32 v29, v20, v19 ; 3E3A2714
v_mad_f32 v25, 0.5, -v37, v28 ; D2820019 44724AF0
v_mad_f32 v26, 0.5, -v35, v24 ; D282001A 446246F0
v_mad_f32 v27, 0.5, -v30, v29 ; D282001B 44763CF0
s_or_b64 exec, exec, s[36:37] ; 88FE247E
v_sub_f32_e64 v19, 1.0, s0 ; D2080013 000000F2
v_log_f32_e32 v19, v19 ; 7E264F13
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
v_mul_legacy_f32_e32 v19, 0x3f400000, v19 ; 0E2626FF 3F400000
v_exp_f32_e32 v19, v19 ; 7E264B13
v_mul_f32_e32 v28, 0x40e00000, v19 ; 103826FF 40E00000
v_cubeid_f32 v32, v25, v26, v27 ; D2880020 046E3519
v_cubema_f32 v31, v25, v26, v27 ; D28E001F 046E3519
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
v_cubesc_f32 v30, v25, v26, v27 ; D28A001E 046E3519
v_cubetc_f32 v29, v25, v26, v27 ; D28C001D 046E3519
v_rcp_f32_e64 v19, |v31| ; D3540113 0000011F
v_mov_b32_e32 v25, 0x3fc00000 ; 7E3202FF 3FC00000
v_mad_f32 v26, v19, v29, v25 ; D282001A 04663B13
v_mac_f32_e32 v25, v19, v30 ; 3E323D13
v_mov_b32_e32 v27, v32 ; 7E360320
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[40:47], s[36:39] ; F0900F00 012A1819
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v19, v27 ; 7E264F1B
v_sub_f32_e64 v20, 1.0, s12 ; D2080014 000018F2
v_mul_legacy_f32_e32 v19, s34, v19 ; 0E262622
v_exp_f32_e32 v19, v19 ; 7E264B13
v_mul_f32_e32 v19, s31, v19 ; 1026261F
v_mul_f32_e32 v24, v24, v19 ; 10302718
v_mul_f32_e32 v25, v25, v19 ; 10322719
v_mul_f32_e32 v19, v26, v19 ; 1026271A
v_mul_f32_e32 v24, v24, v20 ; 10302918
v_mul_f32_e32 v25, v25, v20 ; 10322919
v_mul_f32_e32 v19, v19, v20 ; 10262913
v_mac_f32_e32 v24, s12, v16 ; 3E30200C
v_mac_f32_e32 v25, s12, v15 ; 3E321E0C
v_mac_f32_e32 v19, s12, v14 ; 3E261C0C
v_mov_b32_e32 v14, v19 ; 7E1C0313
v_mov_b32_e32 v15, v25 ; 7E1E0319
v_mov_b32_e32 v16, v24 ; 7E200318
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_mad_f32 v24, -v17, s28, s28 ; D2820018 20703911
v_mov_b32_e32 v17, s27 ; 7E22021B
v_mul_f32_e32 v21, v24, v21 ; 102A2B18
v_mul_f32_e32 v20, v24, v22 ; 10282D18
v_mul_f32_e32 v19, v24, v23 ; 10262F18
v_mul_f32_e32 v22, s26, v13 ; 102C1A1A
v_sub_f32_e64 v25, 1.0, s30 ; D2080019 00003CF2
v_mac_f32_e32 v25, s30, v18 ; 3E32241E
v_mul_f32_e32 v18, s19, v13 ; 10241A13
v_mul_f32_e32 v13, s18, v13 ; 101A1A12
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s18, s[8:11], 0x16 ; C2090916
s_buffer_load_dword s7, s[8:11], 0x48 ; C2038948
s_buffer_load_dword s12, s[8:11], 0x49 ; C2060949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_mul_f32_e32 v23, s14, v3 ; 102E060E
v_mac_f32_e32 v23, s13, v4 ; 3E2E080D
v_mac_f32_e32 v23, s15, v2 ; 3E2E040F
v_add_f32_e32 v23, s16, v23 ; 062E2E10
v_mul_f32_e32 v26, s20, v3 ; 10340614
v_mac_f32_e32 v26, s17, v4 ; 3E340811
v_mac_f32_e32 v26, s21, v2 ; 3E340415
v_add_f32_e32 v26, s22, v26 ; 06343416
v_mul_f32_e32 v27, s24, v3 ; 10360618
v_mac_f32_e32 v27, s23, v4 ; 3E360817
v_mac_f32_e32 v27, s25, v2 ; 3E360419
v_add_f32_e32 v27, s29, v27 ; 0636361D
v_add_f32_e32 v5, v23, v5 ; 060A0B17
v_add_f32_e32 v6, v26, v6 ; 060C0D1A
v_add_f32_e32 v26, v27, v7 ; 06340F1B
v_mul_f32_e32 v7, s2, v4 ; 100E0802
v_mac_f32_e32 v7, s3, v3 ; 3E0E0603
v_mac_f32_e32 v7, s1, v2 ; 3E0E0401
v_max_f32_e32 v23, 0, v7 ; 202E0E80
v_mul_f32_e32 v7, v25, v5 ; 100E0B19
v_mul_f32_e32 v5, v25, v6 ; 100A0D19
v_mul_f32_e32 v6, v25, v26 ; 100C3519
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v17, s18, v9 ; 3E221212
v_mul_f32_e32 v9, v25, v16 ; 10122119
v_mul_f32_e32 v15, v25, v15 ; 101E1F19
v_mul_f32_e32 v14, v25, v14 ; 101C1D19
v_sub_f32_e32 v16, 1.0, v24 ; 082030F2
v_add_f32_e32 v16, s0, v16 ; 06202000
v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080
v_sub_f32_e32 v24, s2, v11 ; 08301602
v_sub_f32_e32 v25, s3, v10 ; 08321403
v_mul_f32_e32 v26, v24, v24 ; 10343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_sub_f32_e32 v27, s1, v12 ; 08361801
v_mac_f32_e32 v26, v27, v27 ; 3E34371B
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_mul_f32_e32 v26, v26, v27 ; 1034371A
v_mul_f32_e32 v11, v11, v4 ; 1016090B
v_mad_f32 v10, -v10, v3, -v11 ; D282000A A42E070A
v_mad_f32 v10, -v12, v2, v10 ; D282000A 242A050C
v_mul_f32_e32 v4, v24, v4 ; 10080918
v_mac_f32_e32 v4, v25, v3 ; 3E080719
v_mul_f32_e32 v3, s2, v24 ; 10063002
v_mac_f32_e32 v3, s3, v25 ; 3E063203
v_mac_f32_e32 v4, v26, v2 ; 3E08051A
v_mac_f32_e32 v3, s1, v26 ; 3E063401
v_max_f32_e32 v2, 0, v3 ; 20040680
v_sub_f32_e32 v3, 1.0, v2 ; 080604F2
v_mul_f32_e32 v11, v3, v3 ; 10160703
v_mul_f32_e32 v3, v3, v11 ; 10061703
v_mul_f32_e32 v3, v3, v11 ; 10061703
v_max_f32_e32 v10, 0, v10 ; 20141480
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mul_f32_e32 v24, v11, v12 ; 1030190B
v_mad_f32 v25, -v12, v24, 1.0 ; D2820019 23CA310C
v_mul_f32_e32 v26, v8, v25 ; 10343308
v_sub_f32_e32 v27, 1.0, v8 ; 083610F2
v_mac_f32_e32 v8, v3, v27 ; 3E103703
v_mul_f32_e32 v27, v1, v25 ; 10363301
v_sub_f32_e32 v28, 1.0, v1 ; 083802F2
v_mac_f32_e32 v1, v3, v28 ; 3E023903
v_mul_f32_e32 v25, v0, v25 ; 10323300
v_sub_f32_e32 v28, 1.0, v0 ; 083800F2
v_mac_f32_e32 v0, v3, v28 ; 3E003903
v_sub_f32_e64 v3, 1.0, s0 ; D2080003 000000F2
v_sub_f32_e32 v28, 1.0, v3 ; 083806F2
v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F
v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9
v_add_f32_e32 v29, v2, v2 ; 063A0502
v_mul_f32_e32 v2, v3, v2 ; 10040503
v_mad_f32 v2, v29, v2, 0.5 ; D2820002 03C2051D
v_mul_f32_e32 v12, v24, v12 ; 10181918
v_mac_f32_e32 v26, v16, v12 ; 3E341910
v_mac_f32_e32 v27, v16, v12 ; 3E361910
v_mac_f32_e32 v25, v16, v12 ; 3E321910
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_log_f32_e32 v16, v28 ; 7E204F1C
v_mul_f32_e32 v3, s8, v3 ; 10060608
v_mul_f32_e32 v11, v3, v11 ; 10161703
v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2
v_rcp_f32_e32 v10, v16 ; 7E145510
v_sub_f32_e32 v16, 1.0, v23 ; 08202EF2
v_mul_f32_e32 v3, v3, v16 ; 10062103
v_mac_f32_e32 v3, 1.0, v23 ; 3E062EF2
v_max_f32_e32 v4, 0, v4 ; 20080880
v_log_f32_e32 v4, v4 ; 7E084F04
v_madak_f32_e32 v3, v3, v11, 0x38d1b717 ; 42061703 38D1B717
v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000
v_mul_f32_e32 v11, v10, v10 ; 1016150A
v_mul_legacy_f32_e32 v4, v11, v4 ; 0E08090B
v_rcp_f32_e32 v3, v3 ; 7E065503
v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A
v_mul_f32_e32 v10, s12, v10 ; 1014140C
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mul_f32_e32 v4, v10, v4 ; 1008090A
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_mul_f32_e32 v4, v16, v16 ; 10082110
v_mul_f32_e32 v10, v16, v4 ; 10140910
v_mul_f32_e32 v4, v10, v4 ; 1008090A
v_add_f32_e32 v2, -1.0, v2 ; 060404F3
v_mad_f32 v4, v2, v4, 1.0 ; D2820004 03CA0902
v_mad_f32 v2, v2, v12, 1.0 ; D2820002 03CA1902
v_mul_f32_e32 v2, v2, v4 ; 10040902
v_mul_f32_e32 v3, v23, v3 ; 10060717
v_mul_f32_e32 v3, s7, v3 ; 10060607
v_mul_f32_e32 v2, v23, v2 ; 10040517
v_mac_f32_e32 v7, v2, v22 ; 3E0E2D02
v_mul_f32_e32 v4, v7, v21 ; 10082B07
v_max_f32_e32 v3, 0, v3 ; 20060680
v_mul_f32_e32 v7, v22, v3 ; 100E0716
v_mac_f32_e32 v4, v8, v7 ; 3E080F08
v_mac_f32_e32 v5, v2, v18 ; 3E0A2502
v_mac_f32_e32 v6, v2, v13 ; 3E0C1B02
v_mul_f32_e32 v2, v18, v3 ; 10040712
v_mul_f32_e32 v3, v13, v3 ; 1006070D
v_mul_f32_e32 v5, v5, v20 ; 100A2905
v_mul_f32_e32 v6, v6, v19 ; 100C2706
v_mac_f32_e32 v5, v1, v2 ; 3E0A0501
v_mac_f32_e32 v6, v0, v3 ; 3E0C0700
v_mac_f32_e32 v4, v26, v9 ; 3E08131A
v_mac_f32_e32 v5, v27, v15 ; 3E0A1F1B
v_mac_f32_e32 v6, v25, v14 ; 3E0C1D19
v_add_f32_e64 v0, 0, v17 clamp ; D2060800 00022280
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s6, v1 ; 10040206
v_mac_f32_e32 v2, v4, v0 ; 3E040104
v_mul_f32_e32 v3, s5, v1 ; 10060205
v_mac_f32_e32 v3, v5, v0 ; 3E060105
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v6, v0 ; 3E020106
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 40
Code Size: 2220 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL OUT[7], GENERIC[6]
DCL OUT[8], GENERIC[7]
DCL CONST[0..20]
DCL TEMP[0..10], LOCAL
IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[6], IN[0].xxxx
1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[10].xxxx
18: MOV TEMP[3].y, CONST[11].xxxx
19: MOV TEMP[3].z, CONST[12].xxxx
20: MOV TEMP[4].x, CONST[10].yyyy
21: MOV TEMP[4].y, CONST[11].yyyy
22: MOV TEMP[4].z, CONST[12].yyyy
23: MOV TEMP[5].x, CONST[10].zzzz
24: MOV TEMP[5].y, CONST[11].zzzz
25: MOV TEMP[5].z, CONST[12].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[6].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[8].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[6].xyz, TEMP[3].xyzx
44: MUL TEMP[7].xyw, TEMP[1], IMM[0].yyyy
45: MOV TEMP[8].x, TEMP[7].xxxx
46: MUL TEMP[9].x, TEMP[7].yyyy, CONST[1].xxxx
47: MOV TEMP[8].y, TEMP[9].xxxx
48: ADD TEMP[7].xy, TEMP[8].xyyy, TEMP[7].wwww
49: MOV TEMP[7].zw, TEMP[1].wwzw
50: MUL TEMP[8], TEMP[3].xyzz, TEMP[3].yzzx
51: DP4 TEMP[9].x, CONST[2], TEMP[8]
52: DP4 TEMP[10].x, CONST[3], TEMP[8]
53: MOV TEMP[9].y, TEMP[10].xxxx
54: DP4 TEMP[8].x, CONST[4], TEMP[8]
55: MOV TEMP[9].z, TEMP[8].xxxx
56: MUL TEMP[8].x, TEMP[3].yyyy, TEMP[3].yyyy
57: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[8].xxxx
58: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[9].xyzz
59: ADD TEMP[8].xyz, TEMP[0].xyzz, -CONST[0].xyzz
60: MOV TEMP[8].yzw, TEMP[8].yxyz
61: MOV TEMP[8].x, TEMP[1].zzzz
62: MOV TEMP[0].xyz, TEMP[0].xyzx
63: MOV OUT[8], TEMP[0]
64: MOV OUT[1], TEMP[2]
65: MOV OUT[3], TEMP[5]
66: MOV OUT[2], TEMP[4]
67: MOV OUT[4], TEMP[6]
68: MOV OUT[5], TEMP[3]
69: MOV OUT[6], TEMP[7]
70: MOV OUT[0], TEMP[1]
71: MOV OUT[7], TEMP[8]
72: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = add i32 %5, %7
%105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0
%110 = add i32 %5, %7
%111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = extractelement <4 x float> %111, i32 2
%115 = extractelement <4 x float> %111, i32 3
%116 = fmul float %32, %85
%117 = fmul float %33, %85
%118 = fmul float %34, %85
%119 = fmul float %35, %85
%120 = fmul float %36, %86
%121 = fadd float %120, %116
%122 = fmul float %37, %86
%123 = fadd float %122, %117
%124 = fmul float %38, %86
%125 = fadd float %124, %118
%126 = fmul float %39, %86
%127 = fadd float %126, %119
%128 = fmul float %40, %87
%129 = fadd float %128, %121
%130 = fmul float %41, %87
%131 = fadd float %130, %123
%132 = fmul float %42, %87
%133 = fadd float %132, %125
%134 = fmul float %43, %87
%135 = fadd float %134, %127
%136 = fmul float %44, %88
%137 = fadd float %136, %129
%138 = fmul float %45, %88
%139 = fadd float %138, %131
%140 = fmul float %46, %88
%141 = fadd float %140, %133
%142 = fmul float %65, %85
%143 = fmul float %66, %85
%144 = fmul float %67, %85
%145 = fmul float %68, %85
%146 = fmul float %69, %86
%147 = fadd float %146, %142
%148 = fmul float %70, %86
%149 = fadd float %148, %143
%150 = fmul float %71, %86
%151 = fadd float %150, %144
%152 = fmul float %72, %86
%153 = fadd float %152, %145
%154 = fmul float %73, %87
%155 = fadd float %154, %147
%156 = fmul float %74, %87
%157 = fadd float %156, %149
%158 = fmul float %75, %87
%159 = fadd float %158, %151
%160 = fmul float %76, %87
%161 = fadd float %160, %153
%162 = fmul float %77, %88
%163 = fadd float %162, %155
%164 = fmul float %78, %88
%165 = fadd float %164, %157
%166 = fmul float %79, %88
%167 = fadd float %166, %159
%168 = fmul float %80, %88
%169 = fadd float %168, %161
%170 = fmul float %100, %56
%171 = fadd float %170, %58
%172 = fmul float %101, %57
%173 = fadd float %172, %59
%174 = fcmp oeq float %64, 0.000000e+00
%. = select i1 %174, float %100, float %106
%.44 = select i1 %174, float %101, float %107
%175 = fmul float %., %60
%176 = fadd float %175, %62
%177 = fmul float %.44, %61
%178 = fadd float %177, %63
%179 = fmul float %47, %93
%180 = fmul float %50, %93
%181 = fmul float %53, %93
%182 = fmul float %48, %94
%183 = fadd float %182, %179
%184 = fmul float %51, %94
%185 = fadd float %184, %180
%186 = fmul float %54, %94
%187 = fadd float %186, %181
%188 = fmul float %49, %95
%189 = fadd float %188, %183
%190 = fmul float %52, %95
%191 = fadd float %190, %185
%192 = fmul float %55, %95
%193 = fadd float %192, %187
%194 = fmul float %189, %189
%195 = fmul float %191, %191
%196 = fadd float %195, %194
%197 = fmul float %193, %193
%198 = fadd float %196, %197
%199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198)
%200 = fmul float %189, %199
%201 = fmul float %191, %199
%202 = fmul float %193, %199
%203 = fmul float %32, %112
%204 = fmul float %33, %112
%205 = fmul float %34, %112
%206 = fmul float %36, %113
%207 = fadd float %206, %203
%208 = fmul float %37, %113
%209 = fadd float %208, %204
%210 = fmul float %38, %113
%211 = fadd float %210, %205
%212 = fmul float %40, %114
%213 = fadd float %212, %207
%214 = fmul float %41, %114
%215 = fadd float %214, %209
%216 = fmul float %42, %114
%217 = fadd float %216, %211
%218 = fmul float %213, %213
%219 = fmul float %215, %215
%220 = fadd float %219, %218
%221 = fmul float %217, %217
%222 = fadd float %220, %221
%223 = call float @llvm.AMDGPU.rsq.clamped.f32(float %222)
%224 = fmul float %213, %223
%225 = fmul float %215, %223
%226 = fmul float %217, %223
%227 = fmul float %202, %225
%228 = fmul float %200, %226
%229 = fmul float %201, %224
%230 = fmul float %201, %226
%231 = fsub float %230, %227
%232 = fmul float %202, %224
%233 = fsub float %232, %228
%234 = fmul float %200, %225
%235 = fsub float %234, %229
%236 = fmul float %231, %115
%237 = fmul float %233, %115
%238 = fmul float %235, %115
%239 = fmul float %163, 5.000000e-01
%240 = fmul float %165, 5.000000e-01
%241 = fmul float %169, 5.000000e-01
%242 = fmul float %240, %16
%243 = fadd float %239, %241
%244 = fadd float %242, %241
%245 = fmul float %200, %201
%246 = fmul float %201, %202
%247 = fmul float %202, %202
%248 = fmul float %202, %200
%249 = fmul float %17, %245
%250 = fmul float %18, %246
%251 = fadd float %249, %250
%252 = fmul float %19, %247
%253 = fadd float %251, %252
%254 = fmul float %20, %248
%255 = fadd float %253, %254
%256 = fmul float %21, %245
%257 = fmul float %22, %246
%258 = fadd float %256, %257
%259 = fmul float %23, %247
%260 = fadd float %258, %259
%261 = fmul float %24, %248
%262 = fadd float %260, %261
%263 = fmul float %25, %245
%264 = fmul float %26, %246
%265 = fadd float %263, %264
%266 = fmul float %27, %247
%267 = fadd float %265, %266
%268 = fmul float %28, %248
%269 = fadd float %267, %268
%270 = fmul float %201, %201
%271 = fmul float %200, %200
%272 = fsub float %271, %270
%273 = fmul float %29, %272
%274 = fadd float %273, %255
%275 = fmul float %30, %272
%276 = fadd float %275, %262
%277 = fmul float %31, %272
%278 = fadd float %277, %269
%279 = fsub float %137, %13
%280 = fsub float %139, %14
%281 = fsub float %141, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %171, float %173, float %176, float %178)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %224, float %225, float %226, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %236, float %237, float %238, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %200, float %201, float %202, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %274, float %276, float %278, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %243, float %244, float %167, float %169)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %167, float %279, float %280, float %281)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %137, float %139, float %141, float %135)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %163, float %165, float %167, float %169)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s32, s[28:31], 0x23 ; C2101D23
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00
s_buffer_load_dword s33, s[28:31], 0x24 ; C2109D24
s_buffer_load_dword s34, s[28:31], 0x25 ; C2111D25
s_buffer_load_dword s35, s[28:31], 0x26 ; C2119D26
s_buffer_load_dword s36, s[28:31], 0x28 ; C2121D28
s_buffer_load_dword s3, s[28:31], 0x13 ; C2019D13
s_buffer_load_dword s2, s[28:31], 0x14 ; C2011D14
s_buffer_load_dword s0, s[28:31], 0x15 ; C2001D15
s_buffer_load_dword s1, s[28:31], 0x16 ; C2009D16
s_buffer_load_dword s18, s[28:31], 0x18 ; C2091D18
s_buffer_load_dword s37, s[28:31], 0x29 ; C2129D29
s_buffer_load_dword s38, s[28:31], 0x2a ; C2131D2A
s_buffer_load_dword s39, s[28:31], 0x2c ; C2139D2C
s_buffer_load_dword s40, s[28:31], 0x2d ; C2141D2D
s_buffer_load_dword s41, s[28:31], 0x2e ; C2149D2E
s_buffer_load_dword s20, s[28:31], 0x19 ; C20A1D19
s_buffer_load_dword s19, s[28:31], 0x1a ; C2099D1A
s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B
s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C
s_buffer_load_dword s22, s[28:31], 0x1d ; C20B1D1D
s_buffer_load_dword s43, s[28:31], 0x30 ; C2159D30
s_buffer_load_dword s44, s[28:31], 0x31 ; C2161D31
s_buffer_load_dword s45, s[28:31], 0x32 ; C2169D32
s_buffer_load_dword s46, s[28:31], 0x38 ; C2171D38
s_buffer_load_dword s47, s[28:31], 0x39 ; C2179D39
s_buffer_load_dword s24, s[28:31], 0x1e ; C20C1D1E
s_buffer_load_dword s48, s[28:31], 0x1f ; C2181D1F
s_buffer_load_dword s26, s[28:31], 0x20 ; C20D1D20
s_buffer_load_dword s27, s[28:31], 0x21 ; C20D9D21
s_buffer_load_dword s25, s[28:31], 0x22 ; C20C9D22
s_buffer_load_dword s4, s[28:31], 0x3f ; C2021D3F
s_buffer_load_dword s5, s[28:31], 0x40 ; C2029D40
s_buffer_load_dword s49, s[28:31], 0x44 ; C2189D44
s_buffer_load_dword s50, s[28:31], 0x45 ; C2191D45
s_buffer_load_dword s51, s[28:31], 0x46 ; C2199D46
s_buffer_load_dword s6, s[28:31], 0x3a ; C2031D3A
s_buffer_load_dword s8, s[28:31], 0x3b ; C2041D3B
s_buffer_load_dword s52, s[28:31], 0x3c ; C21A1D3C
s_buffer_load_dword s53, s[28:31], 0x3d ; C21A9D3D
s_buffer_load_dword s14, s[28:31], 0x3e ; C2071D3E
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s4 ; 7E000204
s_buffer_load_dword s13, s[28:31], 0x9 ; C2069D09
v_cmp_eq_f32_e64 vcc, 0, s5 ; D004006A 00000A80
s_buffer_load_dword s9, s[28:31], 0xa ; C2049D0A
s_buffer_load_dword s7, s[28:31], 0xb ; C2039D0B
s_buffer_load_dword s12, s[28:31], 0xc ; C2061D0C
v_mov_b32_e32 v17, s6 ; 7E220206
s_buffer_load_dword s16, s[28:31], 0xd ; C2081D0D
v_mov_b32_e32 v18, s8 ; 7E240208
s_buffer_load_dword s11, s[28:31], 0xe ; C2059D0E
s_buffer_load_dword s10, s[28:31], 0xf ; C2051D0F
s_buffer_load_dword s15, s[28:31], 0x10 ; C2079D10
v_mov_b32_e32 v19, s14 ; 7E26020E
s_buffer_load_dword s17, s[28:31], 0x11 ; C2089D11
s_buffer_load_dword s14, s[28:31], 0x12 ; C2071D12
s_buffer_load_dword s54, s[28:31], 0x47 ; C21B1D47
s_buffer_load_dword s55, s[28:31], 0x48 ; C21B9D48
s_buffer_load_dword s56, s[28:31], 0x49 ; C21C1D49
s_buffer_load_dword s57, s[28:31], 0x4a ; C21C9D4A
s_buffer_load_dword s58, s[28:31], 0x4b ; C21D1D4B
s_buffer_load_dword s4, s[28:31], 0x0 ; C2021D00
s_buffer_load_dword s5, s[28:31], 0x1 ; C2029D01
s_buffer_load_dword s6, s[28:31], 0x2 ; C2031D02
s_buffer_load_dword s8, s[28:31], 0x4 ; C2041D04
s_buffer_load_dword s21, s[28:31], 0x8 ; C20A9D08
s_buffer_load_dword s59, s[28:31], 0x4c ; C21D9D4C
s_buffer_load_dword s60, s[28:31], 0x4d ; C21E1D4D
s_buffer_load_dword s61, s[28:31], 0x4e ; C21E9D4E
s_buffer_load_dword s62, s[28:31], 0x4f ; C21F1D4F
s_buffer_load_dword s63, s[28:31], 0x50 ; C21F9D50
s_buffer_load_dword s64, s[28:31], 0x51 ; C2201D51
s_buffer_load_dword s65, s[28:31], 0x52 ; C2209D52
s_buffer_load_dword s28, s[28:31], 0x53 ; C20E1D53
v_mul_f32_e32 v20, s42, v2 ; 1028042A
v_mac_f32_e32 v20, s48, v3 ; 3E280630
v_mac_f32_e32 v20, s32, v4 ; 3E280820
v_mac_f32_e32 v17, s46, v9 ; 3E22122E
v_mac_f32_e32 v18, s47, v10 ; 3E24142F
v_mul_f32_e32 v21, s49, v2 ; 102A0431
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s55, v3 ; 3E2A0637
v_mac_f32_e32 v21, s59, v4 ; 3E2A083B
v_mac_f32_e32 v21, s63, v5 ; 3E2A0A3F
v_mul_f32_e32 v22, s50, v2 ; 102C0432
v_mac_f32_e32 v22, s56, v3 ; 3E2C0638
v_mac_f32_e32 v22, s60, v4 ; 3E2C083C
v_mac_f32_e32 v22, s64, v5 ; 3E2C0A40
v_mul_f32_e32 v23, s51, v2 ; 102E0433
v_mac_f32_e32 v23, s57, v3 ; 3E2E0639
v_mac_f32_e32 v23, s61, v4 ; 3E2E083D
v_mac_f32_e32 v23, s65, v5 ; 3E2E0A41
v_mul_f32_e32 v24, s54, v2 ; 10300436
v_mac_f32_e32 v24, s58, v3 ; 3E30063A
v_mac_f32_e32 v24, s62, v4 ; 3E30083E
v_mac_f32_e32 v24, s28, v5 ; 3E300A1C
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mul_f32_e32 v11, s36, v6 ; 10160C24
v_mac_f32_e32 v11, s37, v7 ; 3E160E25
v_mul_f32_e32 v12, s39, v6 ; 10180C27
v_mac_f32_e32 v12, s40, v7 ; 3E180E28
v_mul_f32_e32 v6, s43, v6 ; 100C0C2B
v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C
v_mac_f32_e32 v11, s38, v8 ; 3E161026
v_mac_f32_e32 v12, s41, v8 ; 3E181029
v_mac_f32_e32 v6, s45, v8 ; 3E0C102D
v_mul_f32_e32 v7, s18, v2 ; 100E0412
v_mac_f32_e32 v7, s23, v3 ; 3E0E0617
v_mac_f32_e32 v7, s26, v4 ; 3E0E081A
v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21
v_mul_f32_e32 v8, s20, v2 ; 10100414
v_mac_f32_e32 v8, s22, v3 ; 3E100616
v_mac_f32_e32 v8, s27, v4 ; 3E10081B
v_mac_f32_e32 v8, s34, v5 ; 3E100A22
v_mul_f32_e32 v2, s19, v2 ; 10040413
v_mac_f32_e32 v2, s24, v3 ; 3E040618
v_mac_f32_e32 v2, s25, v4 ; 3E040819
v_mac_f32_e32 v2, s35, v5 ; 3E040A23
v_mac_f32_e32 v19, s52, v9 ; 3E261234
v_mac_f32_e32 v0, s53, v10 ; 3E001435
exp 15, 32, 0, 0, 0, v17, v18, v19, v0 ; F800020F 00131211
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s18, v13 ; 10001A12
v_mac_f32_e32 v0, s23, v14 ; 3E001C17
v_mul_f32_e32 v3, s20, v13 ; 10061A14
v_mac_f32_e32 v3, s22, v14 ; 3E061C16
v_mul_f32_e32 v4, s19, v13 ; 10081A13
v_mac_f32_e32 v4, s24, v14 ; 3E081C18
v_mac_f32_e32 v0, s26, v15 ; 3E001E1A
v_mac_f32_e32 v3, s27, v15 ; 3E061E1B
v_mac_f32_e32 v4, s25, v15 ; 3E081E19
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mac_f32_e32 v5, v12, v12 ; 3E0A190C
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v3, v3 ; 3E120703
v_mac_f32_e32 v9, v4, v4 ; 3E120904
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v10, v5, v11 ; 10141705
v_mul_f32_e32 v11, v5, v12 ; 10161905
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_mul_f32_e32 v3, v9, v3 ; 10060709
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v6, v3, v5 ; 100C0B03
v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B
v_mul_f32_e32 v9, v4, v10 ; 10121504
v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105
v_mul_f32_e32 v12, v0, v11 ; 10181700
v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A
v_mul_f32_e32 v6, v16, v6 ; 100C0D10
v_mul_f32_e32 v9, v16, v9 ; 10121310
v_mul_f32_e32 v12, v16, v12 ; 10181910
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v5, v11 ; 10001705
v_mul_f32_e32 v3, s13, v0 ; 1006000D
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mul_f32_e32 v0, s17, v0 ; 10000011
v_mul_f32_e32 v6, v11, v10 ; 100C150B
v_mac_f32_e32 v3, s21, v6 ; 3E060C15
v_mac_f32_e32 v4, s12, v6 ; 3E080C0C
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
v_mul_f32_e32 v6, v5, v5 ; 100C0B05
v_mac_f32_e32 v3, s9, v6 ; 3E060C09
v_mac_f32_e32 v4, s11, v6 ; 3E080C0B
v_mac_f32_e32 v0, s14, v6 ; 3E000C0E
v_mul_f32_e32 v6, v10, v5 ; 100C0B0A
v_mac_f32_e32 v3, s7, v6 ; 3E060C07
v_mac_f32_e32 v4, s10, v6 ; 3E080C0A
v_mac_f32_e32 v0, s3, v6 ; 3E000C03
exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mad_f32 v5, v10, v10, -v5 ; D2820005 8416150A
v_mac_f32_e32 v3, s2, v5 ; 3E060A02
v_mac_f32_e32 v4, s0, v5 ; 3E080A00
v_mac_f32_e32 v0, s1, v5 ; 3E000A01
v_mul_f32_e32 v5, 0.5, v22 ; 100A2CF0
v_mul_f32_e32 v6, 0.5, v24 ; 100C30F0
exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v0, 0.5, v21, v6 ; D2820000 041A2AF0
v_mac_f32_e32 v6, s8, v5 ; 3E0C0A08
exp 15, 37, 0, 0, 0, v0, v6, v23, v24 ; F800025F 18170600
s_waitcnt expcnt(0) ; BF8C070F
v_subrev_f32_e32 v0, s4, v7 ; 0A000E04
v_subrev_f32_e32 v1, s5, v8 ; 0A021005
v_subrev_f32_e32 v3, s6, v2 ; 0A060406
exp 15, 38, 0, 0, 0, v23, v0, v1, v3 ; F800026F 03010017
exp 15, 39, 0, 0, 0, v7, v8, v2, v20 ; F800027F 14020807
exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 28
Code Size: 932 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL IN[6], GENERIC[6], PERSPECTIVE
DCL IN[7], GENERIC[7], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[22..24]
DCL CONST[26]
DCL TEMP[0..18], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000}
IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000}
IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3].yw, TEMP[3], SAMP[3], 2D
11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy
12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[22].xxxx
13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy
14: MOV_SAT TEMP[4].x, TEMP[4].xxxx
15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
16: SQRT TEMP[4].x, TEMP[4].xxxx
17: MOV TEMP[3].z, TEMP[4].xxxx
18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
20: MOV TEMP[0].y, TEMP[1].xxxx
21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz
22: MOV TEMP[0].z, TEMP[1].xxxx
23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
24: RSQ TEMP[1].x, TEMP[1].xxxx
25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
26: DP3 TEMP[1].x, IN[6].yzww, IN[6].yzww
27: RSQ TEMP[1].x, TEMP[1].xxxx
28: MUL TEMP[1].xyz, IN[6].yzww, TEMP[1].xxxx
29: MOV TEMP[2].xy, IN[0].xyyy
30: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
31: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
32: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz
33: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww
34: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
36: MOV TEMP[5].xy, IN[0].xyyy
37: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D
38: ADD TEMP[6].x, IMM[0].zzzz, -CONST[26].xxxx
39: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx
40: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
41: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
42: MOV TEMP[7].xyz, IMM[0].wwww
43: MOV TEMP[8].w, IMM[0].zzzz
44: MOV TEMP[8].xyz, TEMP[0].xyzx
45: DP4 TEMP[9].x, CONST[1], TEMP[8]
46: DP4 TEMP[10].x, CONST[2], TEMP[8]
47: MOV TEMP[9].y, TEMP[10].xxxx
48: DP4 TEMP[8].x, CONST[3], TEMP[8]
49: MOV TEMP[9].z, TEMP[8].xxxx
50: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz
51: MOV TEMP[9].xy, IN[5].xyyy
52: MOV TEMP[9].w, IN[5].wwww
53: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D
54: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx
55: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
56: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz
57: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz
58: MUL TEMP[10].xyz, IMM[0].xxxx, TEMP[10].xyzz
59: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz
60: MOV TEMP[11].xyz, TEMP[10].xyzx
61: FSLT TEMP[12].x, IMM[0].wwww, CONST[10].wwww
62: UIF TEMP[12].xxxx :0
63: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz
64: RSQ TEMP[12].x, TEMP[12].xxxx
65: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx
66: MOV TEMP[13].xyz, -IN[7].xyzx
67: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz
68: RCP TEMP[15].x, TEMP[12].xxxx
69: RCP TEMP[15].y, TEMP[12].yyyy
70: RCP TEMP[15].z, TEMP[12].zzzz
71: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
72: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz
73: RCP TEMP[15].x, TEMP[12].xxxx
74: RCP TEMP[15].y, TEMP[12].yyyy
75: RCP TEMP[15].z, TEMP[12].zzzz
76: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
77: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[12].xyzz
78: UIF TEMP[15].xxxx :0
79: MOV TEMP[16].x, TEMP[14].xxxx
80: ELSE :0
81: MOV TEMP[16].x, TEMP[13].xxxx
82: ENDIF
83: UIF TEMP[15].yyyy :0
84: MOV TEMP[17].x, TEMP[14].yyyy
85: ELSE :0
86: MOV TEMP[17].x, TEMP[13].yyyy
87: ENDIF
88: UIF TEMP[15].zzzz :0
89: MOV TEMP[14].x, TEMP[14].zzzz
90: ELSE :0
91: MOV TEMP[14].x, TEMP[13].zzzz
92: ENDIF
93: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz
94: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx
95: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
96: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
97: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz
98: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz
99: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
100: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz
101: ENDIF
102: ADD TEMP[12].x, IMM[0].zzzz, -CONST[24].xxxx
103: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy
104: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz
105: MOV TEMP[11].xyz, TEMP[11].xyzz
106: MOV TEMP[11].w, TEMP[12].xxxx
107: TXL TEMP[11], TEMP[11], SAMP[0], CUBE
108: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy
109: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx
110: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz
111: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].wwww
112: UIF TEMP[12].xxxx :0
113: MOV TEMP[12].xyz, TEMP[10].xyzx
114: FSLT TEMP[13].x, IMM[0].wwww, CONST[14].wwww
115: UIF TEMP[13].xxxx :0
116: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz
117: RSQ TEMP[13].x, TEMP[13].xxxx
118: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx
119: MOV TEMP[13].xyz, -IN[7].xyzx
120: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz
121: RCP TEMP[15].x, TEMP[10].xxxx
122: RCP TEMP[15].y, TEMP[10].yyyy
123: RCP TEMP[15].z, TEMP[10].zzzz
124: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
125: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz
126: RCP TEMP[15].x, TEMP[10].xxxx
127: RCP TEMP[15].y, TEMP[10].yyyy
128: RCP TEMP[15].z, TEMP[10].zzzz
129: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
130: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[10].xyzz
131: UIF TEMP[15].xxxx :0
132: MOV TEMP[16].x, TEMP[14].xxxx
133: ELSE :0
134: MOV TEMP[16].x, TEMP[13].xxxx
135: ENDIF
136: UIF TEMP[15].yyyy :0
137: MOV TEMP[17].x, TEMP[14].yyyy
138: ELSE :0
139: MOV TEMP[17].x, TEMP[13].yyyy
140: ENDIF
141: UIF TEMP[15].zzzz :0
142: MOV TEMP[14].x, TEMP[14].zzzz
143: ELSE :0
144: MOV TEMP[14].x, TEMP[13].zzzz
145: ENDIF
146: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz
147: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx
148: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
149: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
150: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz
151: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz
152: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
153: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz
154: ENDIF
155: ADD TEMP[10].x, IMM[0].zzzz, -CONST[24].xxxx
156: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy
157: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
158: MOV TEMP[12].xyz, TEMP[12].xyzz
159: MOV TEMP[12].w, TEMP[10].xxxx
160: TXL TEMP[10], TEMP[12], SAMP[1], CUBE
161: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy
162: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx
163: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz
164: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz
165: ELSE :0
166: MOV TEMP[7].xyz, TEMP[11].xyzx
167: ENDIF
168: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
169: MOV TEMP[1].xyz, -TEMP[1].xyzx
170: ADD TEMP[5].x, IMM[0].zzzz, -CONST[24].xxxx
171: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz
172: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz
173: RSQ TEMP[11].x, TEMP[11].xxxx
174: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx
175: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
176: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
177: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz
178: MAX TEMP[11].x, IMM[0].wwww, TEMP[11].xxxx
179: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx
180: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww
181: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[5].xxxx
182: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy, IMM[2].zzzz
183: LG2 TEMP[13].x, TEMP[13].xxxx
184: RCP TEMP[13].x, TEMP[13].xxxx
185: MUL TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx
186: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx
187: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[6].xxxx
188: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx
189: MUL TEMP[16].x, IMM[0].xxxx, TEMP[11].xxxx
190: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx
191: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[1].xxxx
192: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[11].xxxx
193: ADD TEMP[16].x, IMM[0].zzzz, -TEMP[1].xxxx
194: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
195: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx
196: MOV_SAT TEMP[4].x, TEMP[4].xxxx
197: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx
198: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx
199: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx
200: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx
201: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
202: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[12].xxxx
203: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[12].xxxx
204: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].wwww
205: RCP TEMP[1].x, TEMP[1].xxxx
206: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz
207: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx
208: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx
209: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].zzzz
210: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy
211: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx
212: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx
213: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
214: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
215: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
216: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz
217: ADD TEMP[10].xyz, IMM[0].zzzz, -TEMP[3].xyzz
218: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx
219: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx
220: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx
221: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx
222: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz
223: ADD TEMP[10].x, TEMP[5].xxxx, IMM[0].yyyy
224: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
225: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx
226: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx
227: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
228: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].zzzz
229: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy
230: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx
231: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx
232: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx
233: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
234: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].zzzz
235: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
236: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
237: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
238: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
239: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
240: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
241: MOV TEMP[0].xyz, TEMP[0].xyzx
242: MAD TEMP[1].x, IN[6].xxxx, CONST[5].zzzz, CONST[5].wwww
243: MOV_SAT TEMP[1].x, TEMP[1].xxxx
244: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
245: MOV TEMP[0].xyz, TEMP[0].xyzx
246: MOV TEMP[0].w, IMM[0].zzzz
247: MOV OUT[0], TEMP[0]
248: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%83 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0
%85 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0
%87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)*
%89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0
%90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)*
%92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0
%93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)*
%95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0
%96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)*
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)*
%101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0
%102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)*
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)*
%107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0
%108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)*
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%112 = bitcast <8 x i32> addrspace(2)* %111 to <32 x i8> addrspace(2)*
%113 = load <32 x i8>, <32 x i8> addrspace(2)* %112, align 32, !tbaa !0
%114 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%115 = bitcast <4 x i32> addrspace(2)* %114 to <16 x i8> addrspace(2)*
%116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !tbaa !0
%117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%132 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%133 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7)
%134 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7)
%135 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7)
%136 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7)
%137 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7)
%138 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7)
%139 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7)
%140 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7)
%141 = bitcast float %117 to i32
%142 = bitcast float %118 to i32
%143 = insertelement <2 x i32> undef, i32 %141, i32 0
%144 = insertelement <2 x i32> %143, i32 %142, i32 1
%145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %144, <32 x i8> %101, <16 x i8> %104, i32 2)
%146 = extractelement <4 x float> %145, i32 1
%147 = extractelement <4 x float> %145, i32 3
%148 = fmul float %147, 2.000000e+00
%149 = fadd float %148, -1.000000e+00
%150 = fmul float %146, 2.000000e+00
%151 = fadd float %150, -1.000000e+00
%152 = fmul float %149, %79
%153 = fmul float %151, %79
%154 = fmul float %152, %152
%155 = fmul float %153, %153
%156 = fadd float %154, %155
%157 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00)
%158 = fsub float 1.000000e+00, %157
%159 = call float @llvm.sqrt.f32(float %158)
%160 = fmul float %152, %119
%161 = fmul float %153, %122
%162 = fadd float %161, %160
%163 = fmul float %159, %125
%164 = fadd float %162, %163
%165 = fmul float %152, %120
%166 = fmul float %153, %123
%167 = fadd float %166, %165
%168 = fmul float %159, %126
%169 = fadd float %167, %168
%170 = fmul float %152, %121
%171 = fmul float %153, %124
%172 = fadd float %171, %170
%173 = fmul float %159, %127
%174 = fadd float %172, %173
%175 = fmul float %164, %164
%176 = fmul float %169, %169
%177 = fadd float %176, %175
%178 = fmul float %174, %174
%179 = fadd float %177, %178
%180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179)
%181 = fmul float %164, %180
%182 = fmul float %169, %180
%183 = fmul float %174, %180
%184 = fmul float %135, %135
%185 = fmul float %136, %136
%186 = fadd float %185, %184
%187 = fmul float %137, %137
%188 = fadd float %186, %187
%189 = call float @llvm.AMDGPU.rsq.clamped.f32(float %188)
%190 = fmul float %135, %189
%191 = fmul float %136, %189
%192 = fmul float %137, %189
%193 = bitcast float %117 to i32
%194 = bitcast float %118 to i32
%195 = insertelement <2 x i32> undef, i32 %193, i32 0
%196 = insertelement <2 x i32> %195, i32 %194, i32 1
%197 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %196, <32 x i8> %95, <16 x i8> %98, i32 2)
%198 = extractelement <4 x float> %197, i32 0
%199 = extractelement <4 x float> %197, i32 1
%200 = extractelement <4 x float> %197, i32 2
%201 = fmul float %76, %198
%202 = fmul float %77, %199
%203 = fmul float %78, %200
%204 = call float @llvm.AMDGPU.lrp(float %80, float %201, float %66)
%205 = call float @llvm.AMDGPU.lrp(float %80, float %202, float %67)
%206 = call float @llvm.AMDGPU.lrp(float %80, float %203, float %68)
%207 = fmul float %80, %69
%208 = fsub float %69, %207
%209 = fmul float %201, %208
%210 = fmul float %202, %208
%211 = fmul float %203, %208
%212 = bitcast float %117 to i32
%213 = bitcast float %118 to i32
%214 = insertelement <2 x i32> undef, i32 %212, i32 0
%215 = insertelement <2 x i32> %214, i32 %213, i32 1
%216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %107, <16 x i8> %110, i32 2)
%217 = extractelement <4 x float> %216, i32 1
%218 = fsub float 1.000000e+00, %82
%219 = fmul float %217, %82
%220 = fadd float %219, %218
%221 = fmul float %181, %24
%222 = fmul float %182, %25
%223 = fadd float %222, %221
%224 = fmul float %183, %26
%225 = fadd float %223, %224
%226 = call float @llvm.maxnum.f32(float %225, float 0.000000e+00)
%227 = fmul float %27, %181
%228 = fmul float %28, %182
%229 = fadd float %227, %228
%230 = fmul float %29, %183
%231 = fadd float %229, %230
%232 = fadd float %231, %30
%233 = fmul float %31, %181
%234 = fmul float %32, %182
%235 = fadd float %233, %234
%236 = fmul float %33, %183
%237 = fadd float %235, %236
%238 = fadd float %237, %34
%239 = fmul float %35, %181
%240 = fmul float %36, %182
%241 = fadd float %239, %240
%242 = fmul float %37, %183
%243 = fadd float %241, %242
%244 = fadd float %243, %38
%245 = fadd float %128, %232
%246 = fadd float %129, %238
%247 = fadd float %130, %244
%248 = fdiv float %131, %133
%249 = fdiv float %132, %133
%250 = bitcast float %248 to i32
%251 = bitcast float %249 to i32
%252 = insertelement <2 x i32> undef, i32 %250, i32 0
%253 = insertelement <2 x i32> %252, i32 %251, i32 1
%254 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %253, <32 x i8> %113, <16 x i8> %116, i32 2)
%255 = extractelement <4 x float> %254, i32 0
%256 = fmul float %70, %255
%257 = fmul float %71, %255
%258 = fmul float %72, %255
%259 = fmul float %245, %220
%260 = fmul float %246, %220
%261 = fmul float %247, %220
%262 = fmul float %181, %190
%263 = fmul float %182, %191
%264 = fadd float %263, %262
%265 = fmul float %183, %192
%266 = fadd float %264, %265
%267 = fmul float %266, %181
%268 = fmul float %266, %182
%269 = fmul float %266, %183
%270 = fmul float %267, 2.000000e+00
%271 = fmul float %268, 2.000000e+00
%272 = fmul float %269, 2.000000e+00
%273 = fsub float %190, %270
%274 = fsub float %191, %271
%275 = fsub float %192, %272
%276 = fcmp ogt float %51, 0.000000e+00
br i1 %276, label %IF, label %ENDIF
IF: ; preds = %main_body
%277 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%278 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%279 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%280 = fmul float %273, %273
%281 = fmul float %274, %274
%282 = fadd float %281, %280
%283 = fmul float %275, %275
%284 = fadd float %282, %283
%285 = call float @llvm.AMDGPU.rsq.clamped.f32(float %284)
%286 = fmul float %273, %285
%287 = fmul float %274, %285
%288 = fmul float %275, %285
%289 = fsub float %44, %138
%290 = fsub float %45, %139
%291 = fsub float %46, %140
%292 = fdiv float 1.000000e+00, %286
%293 = fdiv float 1.000000e+00, %287
%294 = fdiv float 1.000000e+00, %288
%295 = fmul float %289, %292
%296 = fmul float %290, %293
%297 = fmul float %291, %294
%298 = fsub float %47, %138
%299 = fsub float %48, %139
%300 = fsub float %49, %140
%301 = fdiv float 1.000000e+00, %286
%302 = fdiv float 1.000000e+00, %287
%303 = fdiv float 1.000000e+00, %288
%304 = fmul float %298, %301
%305 = fmul float %299, %302
%306 = fmul float %300, %303
%307 = fcmp ogt float %286, 0.000000e+00
%308 = fcmp ogt float %287, 0.000000e+00
%309 = fcmp ogt float %288, 0.000000e+00
%. = select i1 %307, float %295, float %304
%temp68.0 = select i1 %308, float %296, float %305
%.100 = select i1 %309, float %297, float %306
%310 = fadd float %44, %47
%311 = fadd float %45, %48
%312 = fadd float %46, %49
%313 = fmul float %310, 5.000000e-01
%314 = fmul float %311, 5.000000e-01
%315 = fmul float %312, 5.000000e-01
%316 = call float @llvm.minnum.f32(float %., float %temp68.0)
%317 = call float @llvm.minnum.f32(float %316, float %.100)
%318 = fsub float %313, %279
%319 = fsub float %314, %278
%320 = fsub float %315, %277
%321 = fadd float %318, %138
%322 = fadd float %319, %139
%323 = fadd float %320, %140
%324 = fmul float %286, %317
%325 = fadd float %324, %321
%326 = fmul float %287, %317
%327 = fadd float %326, %322
%328 = fmul float %288, %317
%329 = fadd float %328, %323
%330 = fsub float %325, %313
%331 = fsub float %327, %314
%332 = fsub float %329, %315
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp44.0 = phi float [ %330, %IF ], [ %273, %main_body ]
%temp45.0 = phi float [ %331, %IF ], [ %274, %main_body ]
%temp46.0 = phi float [ %332, %IF ], [ %275, %main_body ]
%333 = fsub float 1.000000e+00, %81
%334 = call float @llvm.pow.f32(float %333, float 7.500000e-01)
%335 = fmul float %334, 7.000000e+00
%336 = insertelement <4 x float> undef, float %temp44.0, i32 0
%337 = insertelement <4 x float> %336, float %temp45.0, i32 1
%338 = insertelement <4 x float> %337, float %temp46.0, i32 2
%339 = insertelement <4 x float> %338, float %335, i32 3
%340 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %339)
%341 = extractelement <4 x float> %340, i32 0
%342 = extractelement <4 x float> %340, i32 1
%343 = extractelement <4 x float> %340, i32 2
%344 = extractelement <4 x float> %340, i32 3
%345 = call float @llvm.fabs.f32(float %343)
%346 = fdiv float 1.000000e+00, %345
%347 = fmul float %341, %346
%348 = fadd float %347, 1.500000e+00
%349 = fmul float %342, %346
%350 = fadd float %349, 1.500000e+00
%351 = bitcast float %350 to i32
%352 = bitcast float %348 to i32
%353 = bitcast float %344 to i32
%354 = bitcast float %335 to i32
%355 = insertelement <4 x i32> undef, i32 %351, i32 0
%356 = insertelement <4 x i32> %355, i32 %352, i32 1
%357 = insertelement <4 x i32> %356, i32 %353, i32 2
%358 = insertelement <4 x i32> %357, i32 %354, i32 3
%359 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %358, <32 x i8> %84, <16 x i8> %86, i32 4)
%360 = extractelement <4 x float> %359, i32 0
%361 = extractelement <4 x float> %359, i32 1
%362 = extractelement <4 x float> %359, i32 2
%363 = extractelement <4 x float> %359, i32 3
%364 = call float @llvm.pow.f32(float %363, float %53)
%365 = fmul float %52, %364
%366 = fmul float %365, %360
%367 = fmul float %365, %361
%368 = fmul float %365, %362
%369 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %369, label %IF86, label %ENDIF85
IF86: ; preds = %ENDIF
%370 = fcmp ogt float %63, 0.000000e+00
br i1 %370, label %IF89, label %ENDIF88
ENDIF85: ; preds = %ENDIF, %ENDIF88
%temp28.0 = phi float [ %598, %ENDIF88 ], [ %366, %ENDIF ]
%temp29.0 = phi float [ %599, %ENDIF88 ], [ %367, %ENDIF ]
%temp30.0 = phi float [ %600, %ENDIF88 ], [ %368, %ENDIF ]
%371 = fmul float %temp28.0, %220
%372 = fmul float %temp29.0, %220
%373 = fmul float %temp30.0, %220
%374 = fsub float 1.000000e+00, %81
%375 = fsub float %24, %190
%376 = fsub float %25, %191
%377 = fsub float %26, %192
%378 = fmul float %375, %375
%379 = fmul float %376, %376
%380 = fadd float %379, %378
%381 = fmul float %377, %377
%382 = fadd float %380, %381
%383 = call float @llvm.AMDGPU.rsq.clamped.f32(float %382)
%384 = fmul float %375, %383
%385 = fmul float %376, %383
%386 = fmul float %377, %383
%387 = fmul float %190, %181
%388 = fsub float -0.000000e+00, %387
%389 = fmul float %191, %182
%390 = fsub float %388, %389
%391 = fmul float %192, %183
%392 = fsub float %390, %391
%393 = call float @llvm.maxnum.f32(float %392, float 0.000000e+00)
%394 = fmul float %24, %384
%395 = fmul float %25, %385
%396 = fadd float %395, %394
%397 = fmul float %26, %386
%398 = fadd float %396, %397
%399 = call float @llvm.maxnum.f32(float %398, float 0.000000e+00)
%400 = fmul float %374, %374
%401 = fmul float %400, %75
%402 = fsub float 1.000000e+00, %374
%403 = fmul float %402, 0x3FEEF9DB20000000
%404 = fadd float %403, 0x3F9EB851E0000000
%405 = call float @llvm.log2.f32(float %404)
%406 = fdiv float 1.000000e+00, %405
%407 = fmul float %406, 1.000000e+01
%408 = fmul float %407, %407
%409 = fsub float 1.000000e+00, %226
%410 = fsub float 1.000000e+00, %393
%411 = fmul float %399, 2.000000e+00
%412 = fmul float %399, %374
%413 = fmul float %411, %412
%414 = fadd float %413, 5.000000e-01
%415 = fsub float 1.000000e+00, %399
%416 = fsub float 1.000000e+00, %393
%417 = fsub float 1.000000e+00, %208
%418 = fadd float %81, %417
%419 = call float @llvm.AMDIL.clamp.(float %418, float 0.000000e+00, float 1.000000e+00)
%420 = fmul float %416, %416
%421 = fmul float %416, %416
%422 = fmul float %421, %416
%423 = fmul float %420, %422
%424 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %204)
%425 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %205)
%426 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %206)
%427 = call float @llvm.AMDGPU.lrp(float %226, float 1.000000e+00, float %401)
%428 = call float @llvm.AMDGPU.lrp(float %393, float 1.000000e+00, float %401)
%429 = fmul float %427, %428
%430 = fadd float %429, 0x3F1A36E2E0000000
%431 = fdiv float 1.000000e+00, %430
%432 = fmul float %181, %384
%433 = fmul float %182, %385
%434 = fadd float %433, %432
%435 = fmul float %183, %386
%436 = fadd float %434, %435
%437 = call float @llvm.maxnum.f32(float %436, float 0.000000e+00)
%438 = call float @llvm.pow.f32(float %437, float %408)
%439 = fadd float %408, 1.000000e+00
%440 = fmul float %439, %74
%441 = fmul float %438, %440
%442 = fmul float %431, %441
%443 = fmul float %442, %226
%444 = fmul float %443, %73
%445 = call float @llvm.maxnum.f32(float %444, float 0.000000e+00)
%446 = fmul float %445, %256
%447 = fmul float %445, %257
%448 = fmul float %445, %258
%449 = fsub float 1.000000e+00, %204
%450 = fsub float 1.000000e+00, %205
%451 = fsub float 1.000000e+00, %206
%452 = fmul float %415, %415
%453 = fmul float %415, %415
%454 = fmul float %453, %415
%455 = fmul float %452, %454
%456 = fmul float %449, %455
%457 = fadd float %456, %204
%458 = fmul float %450, %455
%459 = fadd float %458, %205
%460 = fmul float %451, %455
%461 = fadd float %460, %206
%462 = fadd float %414, -1.000000e+00
%463 = fmul float %409, %409
%464 = fmul float %409, %409
%465 = fmul float %464, %409
%466 = fmul float %463, %465
%467 = fmul float %462, %466
%468 = fadd float %467, 1.000000e+00
%469 = fadd float %414, -1.000000e+00
%470 = fmul float %410, %410
%471 = fmul float %410, %410
%472 = fmul float %471, %410
%473 = fmul float %470, %472
%474 = fmul float %469, %473
%475 = fadd float %474, 1.000000e+00
%476 = fmul float %468, %475
%477 = fmul float %476, %226
%478 = fmul float %256, %477
%479 = fadd float %478, %259
%480 = fmul float %257, %477
%481 = fadd float %480, %260
%482 = fmul float %258, %477
%483 = fadd float %482, %261
%484 = fmul float %209, %479
%485 = fmul float %210, %481
%486 = fmul float %211, %483
%487 = fmul float %446, %457
%488 = fadd float %487, %484
%489 = fmul float %447, %459
%490 = fadd float %489, %485
%491 = fmul float %448, %461
%492 = fadd float %491, %486
%493 = fmul float %371, %424
%494 = fadd float %493, %488
%495 = fmul float %372, %425
%496 = fadd float %495, %490
%497 = fmul float %373, %426
%498 = fadd float %497, %492
%499 = fmul float %134, %42
%500 = fadd float %499, %43
%501 = call float @llvm.AMDIL.clamp.(float %500, float 0.000000e+00, float 1.000000e+00)
%502 = call float @llvm.AMDGPU.lrp(float %501, float %494, float %39)
%503 = call float @llvm.AMDGPU.lrp(float %501, float %496, float %40)
%504 = call float @llvm.AMDGPU.lrp(float %501, float %498, float %41)
%505 = call i32 @llvm.SI.packf16(float %502, float %503)
%506 = bitcast i32 %505 to float
%507 = call i32 @llvm.SI.packf16(float %504, float 1.000000e+00)
%508 = bitcast i32 %507 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %506, float %508, float %506, float %508)
ret void
IF89: ; preds = %IF86
%509 = fmul float %273, %273
%510 = fmul float %274, %274
%511 = fadd float %510, %509
%512 = fmul float %275, %275
%513 = fadd float %511, %512
%514 = call float @llvm.AMDGPU.rsq.clamped.f32(float %513)
%515 = fmul float %273, %514
%516 = fmul float %274, %514
%517 = fmul float %275, %514
%518 = fsub float %54, %138
%519 = fsub float %55, %139
%520 = fsub float %56, %140
%521 = fdiv float 1.000000e+00, %515
%522 = fdiv float 1.000000e+00, %516
%523 = fdiv float 1.000000e+00, %517
%524 = fmul float %518, %521
%525 = fmul float %519, %522
%526 = fmul float %520, %523
%527 = fsub float %57, %138
%528 = fsub float %58, %139
%529 = fsub float %59, %140
%530 = fdiv float 1.000000e+00, %515
%531 = fdiv float 1.000000e+00, %516
%532 = fdiv float 1.000000e+00, %517
%533 = fmul float %527, %530
%534 = fmul float %528, %531
%535 = fmul float %529, %532
%536 = fcmp ogt float %515, 0.000000e+00
%537 = fcmp ogt float %516, 0.000000e+00
%538 = fcmp ogt float %517, 0.000000e+00
%.101 = select i1 %536, float %524, float %533
%temp68.1 = select i1 %537, float %525, float %534
%.102 = select i1 %538, float %526, float %535
%539 = fadd float %54, %57
%540 = fadd float %55, %58
%541 = fadd float %56, %59
%542 = fmul float %539, 5.000000e-01
%543 = fmul float %540, 5.000000e-01
%544 = fmul float %541, 5.000000e-01
%545 = call float @llvm.minnum.f32(float %.101, float %temp68.1)
%546 = call float @llvm.minnum.f32(float %545, float %.102)
%547 = fsub float %542, %60
%548 = fsub float %543, %61
%549 = fsub float %544, %62
%550 = fadd float %547, %138
%551 = fadd float %548, %139
%552 = fadd float %549, %140
%553 = fmul float %515, %546
%554 = fadd float %553, %550
%555 = fmul float %516, %546
%556 = fadd float %555, %551
%557 = fmul float %517, %546
%558 = fadd float %557, %552
%559 = fsub float %554, %542
%560 = fsub float %556, %543
%561 = fsub float %558, %544
br label %ENDIF88
ENDIF88: ; preds = %IF86, %IF89
%temp48.0 = phi float [ %559, %IF89 ], [ %273, %IF86 ]
%temp49.0 = phi float [ %560, %IF89 ], [ %274, %IF86 ]
%temp50.0 = phi float [ %561, %IF89 ], [ %275, %IF86 ]
%562 = fsub float 1.000000e+00, %81
%563 = call float @llvm.pow.f32(float %562, float 7.500000e-01)
%564 = fmul float %563, 7.000000e+00
%565 = insertelement <4 x float> undef, float %temp48.0, i32 0
%566 = insertelement <4 x float> %565, float %temp49.0, i32 1
%567 = insertelement <4 x float> %566, float %temp50.0, i32 2
%568 = insertelement <4 x float> %567, float %564, i32 3
%569 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %568)
%570 = extractelement <4 x float> %569, i32 0
%571 = extractelement <4 x float> %569, i32 1
%572 = extractelement <4 x float> %569, i32 2
%573 = extractelement <4 x float> %569, i32 3
%574 = call float @llvm.fabs.f32(float %572)
%575 = fdiv float 1.000000e+00, %574
%576 = fmul float %570, %575
%577 = fadd float %576, 1.500000e+00
%578 = fmul float %571, %575
%579 = fadd float %578, 1.500000e+00
%580 = bitcast float %579 to i32
%581 = bitcast float %577 to i32
%582 = bitcast float %573 to i32
%583 = bitcast float %564 to i32
%584 = insertelement <4 x i32> undef, i32 %580, i32 0
%585 = insertelement <4 x i32> %584, i32 %581, i32 1
%586 = insertelement <4 x i32> %585, i32 %582, i32 2
%587 = insertelement <4 x i32> %586, i32 %583, i32 3
%588 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %587, <32 x i8> %89, <16 x i8> %92, i32 4)
%589 = extractelement <4 x float> %588, i32 0
%590 = extractelement <4 x float> %588, i32 1
%591 = extractelement <4 x float> %588, i32 2
%592 = extractelement <4 x float> %588, i32 3
%593 = call float @llvm.pow.f32(float %592, float %65)
%594 = fmul float %64, %593
%595 = fmul float %594, %589
%596 = fmul float %594, %590
%597 = fmul float %594, %591
%598 = call float @llvm.AMDGPU.lrp(float %50, float %366, float %595)
%599 = call float @llvm.AMDGPU.lrp(float %50, float %367, float %596)
%600 = call float @llvm.AMDGPU.lrp(float %50, float %368, float %597)
br label %ENDIF85
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000
v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001
v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100
v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s2, s[12:15], 0x58 ; C2010D58
s_buffer_load_dword s1, s[12:15], 0x5c ; C2008D5C
s_buffer_load_dword s0, s[12:15], 0x60 ; C2000D60
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800
v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801
v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900
v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901
v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00
v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01
v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00
v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01
v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00
v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01
v_interp_p1_f32 v19, v0, 2, 3, [m0] ; C84C0E00
v_interp_p2_f32 v19, [v19], v1, 2, 3, [m0] ; C84D0E01
v_interp_p1_f32 v2, v0, 0, 4, [m0] ; C8081000
v_interp_p2_f32 v2, [v2], v1, 0, 4, [m0] ; C8091001
v_interp_p1_f32 v3, v0, 1, 4, [m0] ; C80C1100
v_interp_p2_f32 v3, [v3], v1, 1, 4, [m0] ; C80D1101
v_interp_p1_f32 v5, v0, 2, 4, [m0] ; C8141200
v_interp_p2_f32 v5, [v5], v1, 2, 4, [m0] ; C8151201
v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400
v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401
v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500
v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501
v_interp_p1_f32 v22, v0, 3, 5, [m0] ; C8581700
v_interp_p2_f32 v22, [v22], v1, 3, 5, [m0] ; C8591701
v_interp_p1_f32 v4, v0, 0, 6, [m0] ; C8101800
v_interp_p2_f32 v4, [v4], v1, 0, 6, [m0] ; C8111801
v_interp_p1_f32 v23, v0, 1, 6, [m0] ; C85C1900
v_interp_p2_f32 v23, [v23], v1, 1, 6, [m0] ; C85D1901
v_interp_p1_f32 v24, v0, 2, 6, [m0] ; C8601A00
v_interp_p2_f32 v24, [v24], v1, 2, 6, [m0] ; C8611A01
s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C
s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510
s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514
s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710
v_interp_p1_f32 v25, v0, 3, 6, [m0] ; C8641B00
v_interp_p2_f32 v25, [v25], v1, 3, 6, [m0] ; C8651B01
v_interp_p1_f32 v14, v0, 0, 7, [m0] ; C8381C00
v_interp_p2_f32 v14, [v14], v1, 0, 7, [m0] ; C8391C01
v_interp_p1_f32 v12, v0, 1, 7, [m0] ; C8301D00
v_interp_p2_f32 v12, [v12], v1, 1, 7, [m0] ; C8311D01
v_interp_p1_f32 v15, v0, 2, 7, [m0] ; C83C1E00
v_interp_p2_f32 v15, [v15], v1, 2, 7, [m0] ; C83D1E01
s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718
s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720
s_load_dwordx8 s[32:39], s[6:7], 0x28 ; C0D00728
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[52:59], s[8:11] ; F0800A00 004D0010
image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[44:51], s[40:43] ; F0800700 014B1E10
s_waitcnt vmcnt(1) ; BF8C0771
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
v_mul_f32_e32 v1, s2, v1 ; 10020202
v_mul_f32_e32 v0, s2, v0 ; 10000002
v_mul_f32_e32 v6, v6, v1 ; 100C0306
v_mac_f32_e32 v6, v9, v0 ; 3E0C0109
v_mul_f32_e32 v7, v7, v1 ; 100E0307
v_mac_f32_e32 v7, v10, v0 ; 3E0E010A
v_mul_f32_e32 v10, v8, v1 ; 10140308
v_mac_f32_e32 v10, v11, v0 ; 3E14010B
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v6, v13, v0 ; 3E0C010D
v_mac_f32_e32 v7, v18, v0 ; 3E0E0112
v_mac_f32_e32 v10, v19, v0 ; 3E140113
v_mul_f32_e32 v0, v6, v6 ; 10000D06
v_mac_f32_e32 v0, v7, v7 ; 3E000F07
v_mac_f32_e32 v0, v10, v10 ; 3E00150A
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v23, v23 ; 10022F17
v_mac_f32_e32 v1, v24, v24 ; 3E023118
v_mac_f32_e32 v1, v25, v25 ; 3E023319
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v9, v0, v6 ; 10120D00
v_mul_f32_e32 v8, v0, v7 ; 10100F00
v_mul_f32_e32 v7, v0, v10 ; 100E1500
v_mul_f32_e32 v11, v1, v23 ; 10162F01
v_mul_f32_e32 v10, v1, v24 ; 10143101
v_mul_f32_e32 v0, v11, v9 ; 1000130B
v_mac_f32_e32 v0, v10, v8 ; 3E00110A
v_mul_f32_e32 v13, v1, v25 ; 101A3301
v_mac_f32_e32 v0, v13, v7 ; 3E000F0D
v_mul_f32_e32 v6, v9, v0 ; 100C0109
v_mac_f32_e32 v6, v9, v0 ; 3E0C0109
v_mul_f32_e32 v18, v8, v0 ; 10240108
v_mac_f32_e32 v18, v8, v0 ; 3E240108
v_mad_f32 v27, v23, v1, -v6 ; D282001B 841A0317
v_mad_f32 v28, v24, v1, -v18 ; D282001C 844A0318
v_mul_f32_e32 v6, v7, v0 ; 100C0107
v_mac_f32_e32 v6, v7, v0 ; 3E0C0107
s_buffer_load_dword s2, s[12:15], 0x4c ; C2010D4C
s_buffer_load_dword s3, s[12:15], 0x4d ; C2018D4D
s_buffer_load_dword s8, s[12:15], 0x4e ; C2040D4E
v_mad_f32 v29, v25, v1, -v6 ; D282001D 841A0319
v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000
v_cmp_gt_f32_e64 vcc, |v22|, v0 ; D008016A 00020116
v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000
v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2
v_mul_f32_e32 v1, v0, v22 ; 10022D00
v_rcp_f32_e32 v1, v1 ; 7E025501
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v24, s2, v30 ; 10303C02
v_mul_f32_e32 v25, s3, v31 ; 10323E03
v_mul_f32_e32 v26, s8, v32 ; 10344008
v_mul_f32_e32 v6, v1, v20 ; 100C2901
v_mul_f32_e32 v1, v1, v21 ; 10022B01
s_buffer_load_dword s2, s[12:15], 0x40 ; C2010D40
s_buffer_load_dword s3, s[12:15], 0x41 ; C2018D41
s_buffer_load_dword s8, s[12:15], 0x42 ; C2040D42
v_mul_f32_e32 v18, v6, v0 ; 10240106
v_mul_f32_e32 v19, v1, v0 ; 10260101
s_buffer_load_dword s9, s[12:15], 0x27 ; C2048D27
s_buffer_load_dword s10, s[12:15], 0x2b ; C2050D2B
s_buffer_load_dword s40, s[12:15], 0x2c ; C2140D2C
s_buffer_load_dword s41, s[12:15], 0x2d ; C2148D2D
v_sub_f32_e64 v0, 1.0, s1 ; D2080000 000002F2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s2, v0 ; 100C0002
v_mul_f32_e32 v1, s3, v0 ; 10020003
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_mac_f32_e32 v6, s1, v24 ; 3E0C3001
v_mov_b32_e32 v30, v27 ; 7E3C031B
v_mac_f32_e32 v1, s1, v25 ; 3E023201
v_mov_b32_e32 v31, v28 ; 7E3E031C
v_mac_f32_e32 v0, s1, v26 ; 3E003401
v_mov_b32_e32 v32, v29 ; 7E40031D
v_cmp_lt_f32_e64 s[2:3], 0, s10 ; D0020002 00001480
image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[16:19] ; F0800F00 00861410
image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[32:39], s[20:23] ; F0800F00 00A81012
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[18:19], s[2:3] ; BE922402
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s2, s[12:15], 0x20 ; C2010D20
s_buffer_load_dword s3, s[12:15], 0x21 ; C2018D21
s_buffer_load_dword s8, s[12:15], 0x22 ; C2040D22
s_buffer_load_dword s10, s[12:15], 0x24 ; C2050D24
s_buffer_load_dword s11, s[12:15], 0x25 ; C2058D25
v_mul_f32_e32 v17, v27, v27 ; 1022371B
v_mac_f32_e32 v17, v28, v28 ; 3E22391C
v_mac_f32_e32 v17, v29, v29 ; 3E223B1D
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
s_buffer_load_dword s16, s[12:15], 0x26 ; C2080D26
s_buffer_load_dword s17, s[12:15], 0x28 ; C2088D28
s_buffer_load_dword s20, s[12:15], 0x29 ; C20A0D29
s_buffer_load_dword s21, s[12:15], 0x2a ; C20A8D2A
v_mul_f32_e32 v18, v17, v27 ; 10243711
v_mul_f32_e32 v19, v17, v28 ; 10263911
v_mul_f32_e32 v17, v17, v29 ; 10223B11
v_rcp_f32_e32 v20, v18 ; 7E285512
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v22, s2, v14 ; 082C1C02
v_sub_f32_e32 v23, s3, v12 ; 082E1803
v_rcp_f32_e32 v30, v19 ; 7E3C5513
v_mul_f32_e32 v22, v20, v22 ; 102C2D14
v_sub_f32_e32 v31, s10, v14 ; 083E1C0A
v_mul_f32_e32 v20, v20, v31 ; 10283F14
v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480
v_cndmask_b32_e32 v20, v20, v22 ; 00282D14
v_rcp_f32_e32 v22, v17 ; 7E2C5511
v_mul_f32_e32 v23, v30, v23 ; 102E2F1E
v_sub_f32_e32 v31, s11, v12 ; 083E180B
v_mul_f32_e32 v30, v30, v31 ; 103C3F1E
v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680
v_cndmask_b32_e32 v23, v30, v23 ; 002E2F1E
v_sub_f32_e32 v30, s8, v15 ; 083C1E08
v_mul_f32_e32 v30, v22, v30 ; 103C3D16
v_sub_f32_e32 v31, s16, v15 ; 083E1E10
v_mul_f32_e32 v22, v22, v31 ; 102C3F16
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v22, v22, v30 ; 002C3D16
v_min3_f32 v20, v20, v23, v22 ; D2A20014 045A2F14
v_mov_b32_e32 v22, s10 ; 7E2C020A
v_add_f32_e32 v22, s2, v22 ; 062C2C02
v_mov_b32_e32 v23, s11 ; 7E2E020B
v_add_f32_e32 v23, s3, v23 ; 062E2E03
v_mov_b32_e32 v30, s16 ; 7E3C0210
v_add_f32_e32 v32, s8, v30 ; 06403C08
v_mad_f32 v30, 0.5, v22, -s17 ; D282001E 80462CF0
v_add_f32_e32 v30, v14, v30 ; 063C3D0E
v_mac_f32_e32 v30, v20, v18 ; 3E3C2514
v_mad_f32 v18, 0.5, v23, -s20 ; D2820012 80522EF0
v_add_f32_e32 v18, v12, v18 ; 0624250C
v_mac_f32_e32 v18, v20, v19 ; 3E242714
v_mad_f32 v19, 0.5, v32, -s21 ; D2820013 805640F0
v_add_f32_e32 v19, v15, v19 ; 0626270F
v_mac_f32_e32 v19, v20, v17 ; 3E262314
v_mad_f32 v30, 0.5, -v22, v30 ; D282001E 447A2CF0
v_mad_f32 v31, 0.5, -v23, v18 ; D282001F 444A2EF0
v_mad_f32 v32, 0.5, -v32, v19 ; D2820020 444E40F0
s_or_b64 exec, exec, s[18:19] ; 88FE127E
s_buffer_load_dword s28, s[12:15], 0x17 ; C20E0D17
s_buffer_load_dword s29, s[12:15], 0x43 ; C20E8D43
s_buffer_load_dword s27, s[12:15], 0x44 ; C20D8D44
s_buffer_load_dword s20, s[12:15], 0x45 ; C20A0D45
s_buffer_load_dword s17, s[12:15], 0x46 ; C2088D46
s_buffer_load_dword s3, s[12:15], 0x0 ; C2018D00
s_buffer_load_dword s8, s[12:15], 0x1 ; C2040D01
s_buffer_load_dword s2, s[12:15], 0x2 ; C2010D02
s_buffer_load_dword s10, s[12:15], 0x4 ; C2050D04
s_buffer_load_dword s11, s[12:15], 0x5 ; C2058D05
s_buffer_load_dword s16, s[12:15], 0x6 ; C2080D06
s_buffer_load_dword s18, s[12:15], 0x7 ; C2090D07
s_buffer_load_dword s19, s[12:15], 0x8 ; C2098D08
s_buffer_load_dword s21, s[12:15], 0x9 ; C20A8D09
s_buffer_load_dword s22, s[12:15], 0xa ; C20B0D0A
s_buffer_load_dword s23, s[12:15], 0xb ; C20B8D0B
s_buffer_load_dword s24, s[12:15], 0xc ; C20C0D0C
s_buffer_load_dword s25, s[12:15], 0xd ; C20C8D0D
s_buffer_load_dword s26, s[12:15], 0xe ; C20D0D0E
v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2
v_log_f32_e32 v17, v17 ; 7E224F11
v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v33, 0x40e00000, v17 ; 104222FF 40E00000
v_cubeid_f32 v20, v30, v31, v32 ; D2880014 04823F1E
v_cubema_f32 v19, v30, v31, v32 ; D28E0013 04823F1E
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700
v_cubesc_f32 v18, v30, v31, v32 ; D28A0012 04823F1E
v_cubetc_f32 v17, v30, v31, v32 ; D28C0011 04823F1E
v_rcp_f32_e64 v19, |v19| ; D3540113 00000113
v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000
v_mad_f32 v31, v19, v17, v30 ; D282001F 047A2313
v_mac_f32_e32 v30, v19, v18 ; 3E3C2513
v_mov_b32_e32 v32, v20 ; 7E400314
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[44:51], s[32:35] ; F0900F00 010B1E1E
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v17, v33 ; 7E224F21
s_buffer_load_dword s30, s[12:15], 0xf ; C20F0D0F
s_buffer_load_dword s31, s[12:15], 0x68 ; C20F8D68
v_mul_legacy_f32_e32 v17, s41, v17 ; 0E222229
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v17, s40, v17 ; 10222228
v_mul_f32_e32 v19, v30, v17 ; 1026231E
v_mul_f32_e32 v18, v31, v17 ; 1024231F
v_mul_f32_e32 v17, v32, v17 ; 10222320
v_mov_b32_e32 v20, s1 ; 7E280201
v_mov_b32_e32 v22, 0x3f7fff58 ; 7E2C02FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s9, v22 ; 7C022C09
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[32:33], vcc ; BEA0246A
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s35, s[12:15], 0x3b ; C2118D3B
s_buffer_load_dword s1, s[12:15], 0x3c ; C2008D3C
s_buffer_load_dword s34, s[12:15], 0x3d ; C2110D3D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680
s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424
s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s35, s[12:15], 0x36 ; C2118D36
s_buffer_load_dword s38, s[12:15], 0x38 ; C2130D38
s_buffer_load_dword s39, s[12:15], 0x39 ; C2138D39
s_buffer_load_dword s40, s[12:15], 0x3a ; C2140D3A
s_buffer_load_dword s41, s[12:15], 0x30 ; C2148D30
s_buffer_load_dword s42, s[12:15], 0x31 ; C2150D31
s_buffer_load_dword s43, s[12:15], 0x32 ; C2158D32
s_buffer_load_dword s44, s[12:15], 0x34 ; C2160D34
s_buffer_load_dword s45, s[12:15], 0x35 ; C2168D35
v_mul_f32_e32 v22, v27, v27 ; 102C371B
v_mac_f32_e32 v22, v28, v28 ; 3E2C391C
v_mac_f32_e32 v22, v29, v29 ; 3E2C3B1D
v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v23, s35, v15 ; 082E1E23
v_mov_b32_e32 v30, s35 ; 7E3C0223
v_sub_f32_e32 v31, s41, v14 ; 083E1C29
v_sub_f32_e32 v32, s42, v12 ; 0840182A
v_add_f32_e32 v30, s43, v30 ; 063C3C2B
v_sub_f32_e32 v33, s43, v15 ; 08421E2B
v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0
v_add_f32_e32 v15, v15, v34 ; 061E450F
v_mul_f32_e32 v27, v22, v27 ; 10363716
v_mul_f32_e32 v28, v22, v28 ; 10383916
v_mul_f32_e32 v22, v22, v29 ; 102C3B16
v_rcp_f32_e32 v29, v27 ; 7E3A551B
v_rcp_f32_e32 v34, v28 ; 7E44551C
v_rcp_f32_e32 v35, v22 ; 7E465516
v_sub_f32_e32 v36, s44, v14 ; 08481C2C
v_mov_b32_e32 v37, s44 ; 7E4A022C
v_add_f32_e32 v37, s41, v37 ; 064A4A29
v_mul_f32_e32 v31, v29, v31 ; 103E3F1D
v_mul_f32_e32 v29, v29, v36 ; 103A491D
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v33, v35, v33 ; 10424323
v_mul_f32_e32 v23, v35, v23 ; 102E2F23
v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0
v_add_f32_e32 v14, v14, v35 ; 061C470E
v_sub_f32_e32 v35, s45, v12 ; 0846182D
v_mov_b32_e32 v36, s45 ; 7E48022D
v_mul_f32_e32 v34, v34, v35 ; 10444722
v_add_f32_e32 v35, s42, v36 ; 0646482A
v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680
v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D
v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880
v_cndmask_b32_e32 v31, v34, v32 ; 003E4122
v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80
v_cndmask_b32_e32 v23, v23, v33 ; 002E4317
v_min3_f32 v23, v29, v31, v23 ; D2A20017 045E3F1D
v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0
v_add_f32_e32 v12, v12, v29 ; 06183B0C
v_mac_f32_e32 v14, v23, v27 ; 3E1C3717
v_mac_f32_e32 v12, v23, v28 ; 3E183917
v_mac_f32_e32 v15, v23, v22 ; 3E1E2D17
v_mad_f32 v27, 0.5, -v37, v14 ; D282001B 443A4AF0
v_mad_f32 v28, 0.5, -v35, v12 ; D282001C 443246F0
v_mad_f32 v29, 0.5, -v30, v15 ; D282001D 443E3CF0
s_or_b64 exec, exec, s[36:37] ; 88FE247E
v_sub_f32_e64 v12, 1.0, s0 ; D208000C 000000F2
v_log_f32_e32 v12, v12 ; 7E184F0C
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
v_mul_legacy_f32_e32 v12, 0x3f400000, v12 ; 0E1818FF 3F400000
v_exp_f32_e32 v12, v12 ; 7E184B0C
v_mul_f32_e32 v30, 0x40e00000, v12 ; 103C18FF 40E00000
v_cubeid_f32 v34, v27, v28, v29 ; D2880022 0476391B
v_cubema_f32 v33, v27, v28, v29 ; D28E0021 0476391B
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
v_cubesc_f32 v32, v27, v28, v29 ; D28A0020 0476391B
v_cubetc_f32 v31, v27, v28, v29 ; D28C001F 0476391B
v_rcp_f32_e64 v12, |v33| ; D354010C 00000121
v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000
v_mad_f32 v28, v12, v31, v27 ; D282001C 046E3F0C
v_mac_f32_e32 v27, v12, v32 ; 3E36410C
v_mov_b32_e32 v29, v34 ; 7E3A0322
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A1B1B
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v12, v30 ; 7E184F1E
v_sub_f32_e64 v14, 1.0, s9 ; D208000E 000012F2
v_mul_legacy_f32_e32 v12, s34, v12 ; 0E181822
v_exp_f32_e32 v12, v12 ; 7E184B0C
v_mul_f32_e32 v12, s1, v12 ; 10181801
v_mul_f32_e32 v15, v27, v12 ; 101E191B
v_mul_f32_e32 v22, v28, v12 ; 102C191C
v_mul_f32_e32 v12, v29, v12 ; 1018191D
v_mul_f32_e32 v15, v15, v14 ; 101E1D0F
v_mul_f32_e32 v22, v22, v14 ; 102C1D16
v_mul_f32_e32 v12, v12, v14 ; 10181D0C
v_mac_f32_e32 v15, s9, v19 ; 3E1E2609
v_mac_f32_e32 v22, s9, v18 ; 3E2C2409
v_mac_f32_e32 v12, s9, v17 ; 3E182209
v_mov_b32_e32 v17, v12 ; 7E22030C
v_mov_b32_e32 v18, v22 ; 7E240316
v_mov_b32_e32 v19, v15 ; 7E26030F
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_mad_f32 v27, -v20, s29, s29 ; D282001B 20743B14
v_mov_b32_e32 v12, s28 ; 7E18021C
v_mul_f32_e32 v20, v27, v24 ; 1028311B
v_mul_f32_e32 v15, v27, v25 ; 101E331B
v_mul_f32_e32 v14, v27, v26 ; 101C351B
v_mul_f32_e32 v22, s27, v16 ; 102C201B
v_sub_f32_e64 v24, 1.0, s31 ; D2080018 00003EF2
v_mac_f32_e32 v24, s31, v21 ; 3E302A1F
v_mul_f32_e32 v21, s20, v16 ; 102A2014
v_mul_f32_e32 v16, s17, v16 ; 10202011
s_buffer_load_dword s5, s[12:15], 0x10 ; C2028D10
s_buffer_load_dword s4, s[12:15], 0x11 ; C2020D11
s_buffer_load_dword s1, s[12:15], 0x12 ; C2008D12
s_buffer_load_dword s17, s[12:15], 0x16 ; C2088D16
s_buffer_load_dword s6, s[12:15], 0x48 ; C2030D48
s_buffer_load_dword s7, s[12:15], 0x49 ; C2038D49
s_buffer_load_dword s9, s[12:15], 0x4b ; C2048D4B
v_mul_f32_e32 v23, s11, v8 ; 102E100B
v_mac_f32_e32 v23, s10, v9 ; 3E2E120A
v_mac_f32_e32 v23, s16, v7 ; 3E2E0E10
v_add_f32_e32 v23, s18, v23 ; 062E2E12
v_mul_f32_e32 v25, s21, v8 ; 10321015
v_mac_f32_e32 v25, s19, v9 ; 3E321213
v_mac_f32_e32 v25, s22, v7 ; 3E320E16
v_add_f32_e32 v25, s23, v25 ; 06323217
v_mul_f32_e32 v26, s25, v8 ; 10341019
v_mac_f32_e32 v26, s24, v9 ; 3E341218
v_mac_f32_e32 v26, s26, v7 ; 3E340E1A
v_add_f32_e32 v26, s30, v26 ; 0634341E
v_add_f32_e32 v2, v23, v2 ; 06040517
v_add_f32_e32 v3, v25, v3 ; 06060719
v_add_f32_e32 v25, v26, v5 ; 06320B1A
v_mul_f32_e32 v5, s3, v9 ; 100A1203
v_mac_f32_e32 v5, s8, v8 ; 3E0A1008
v_mac_f32_e32 v5, s2, v7 ; 3E0A0E02
v_max_f32_e32 v23, 0, v5 ; 202E0A80
v_mul_f32_e32 v5, v24, v2 ; 100A0518
v_mul_f32_e32 v2, v24, v3 ; 10040718
v_mul_f32_e32 v3, v24, v25 ; 10063318
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v12, s17, v4 ; 3E180811
v_mul_f32_e32 v4, v24, v19 ; 10082718
v_mul_f32_e32 v18, v24, v18 ; 10242518
v_mul_f32_e32 v17, v24, v17 ; 10222318
v_sub_f32_e32 v19, 1.0, v27 ; 082636F2
v_add_f32_e32 v19, s0, v19 ; 06262600
v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680
v_sub_f32_e32 v24, s3, v11 ; 08301603
v_sub_f32_e32 v25, s8, v10 ; 08321408
v_mul_f32_e32 v26, v24, v24 ; 10343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_sub_f32_e32 v27, s2, v13 ; 08361A02
v_mac_f32_e32 v26, v27, v27 ; 3E34371B
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_mul_f32_e32 v26, v26, v27 ; 1034371A
v_mul_f32_e32 v11, v11, v9 ; 1016130B
v_mad_f32 v10, -v10, v8, -v11 ; D282000A A42E110A
v_mad_f32 v10, -v13, v7, v10 ; D282000A 242A0F0D
v_mul_f32_e32 v9, v24, v9 ; 10121318
v_mac_f32_e32 v9, v25, v8 ; 3E121119
v_mul_f32_e32 v8, s3, v24 ; 10103003
v_mac_f32_e32 v8, s8, v25 ; 3E103208
v_mac_f32_e32 v9, v26, v7 ; 3E120F1A
v_mac_f32_e32 v8, s2, v26 ; 3E103402
v_max_f32_e32 v7, 0, v8 ; 200E1080
v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2
v_mul_f32_e32 v11, v8, v8 ; 10161108
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_max_f32_e32 v10, 0, v10 ; 20141480
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v13, v11, v11 ; 101A170B
v_mul_f32_e32 v24, v11, v13 ; 10301B0B
v_mad_f32 v25, -v13, v24, 1.0 ; D2820019 23CA310D
v_mul_f32_e32 v26, v6, v25 ; 10343306
v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2
v_mac_f32_e32 v6, v8, v27 ; 3E0C3708
v_mul_f32_e32 v27, v1, v25 ; 10363301
v_sub_f32_e32 v28, 1.0, v1 ; 083802F2
v_mac_f32_e32 v1, v8, v28 ; 3E023908
v_mul_f32_e32 v25, v0, v25 ; 10323300
v_sub_f32_e32 v28, 1.0, v0 ; 083800F2
v_mac_f32_e32 v0, v8, v28 ; 3E003908
v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2
v_sub_f32_e32 v28, 1.0, v8 ; 083810F2
v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F
v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9
v_add_f32_e32 v29, v7, v7 ; 063A0F07
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mad_f32 v7, v29, v7, 0.5 ; D2820007 03C20F1D
v_mul_f32_e32 v13, v24, v13 ; 101A1B18
v_mac_f32_e32 v26, v19, v13 ; 3E341B13
v_mac_f32_e32 v27, v19, v13 ; 3E361B13
v_mac_f32_e32 v25, v19, v13 ; 3E321B13
v_mul_f32_e32 v8, v8, v8 ; 10101108
v_log_f32_e32 v19, v28 ; 7E264F1C
v_mul_f32_e32 v8, s9, v8 ; 10101009
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2
v_rcp_f32_e32 v10, v19 ; 7E145513
v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2
v_mul_f32_e32 v8, v8, v19 ; 10102708
v_mac_f32_e32 v8, 1.0, v23 ; 3E102EF2
v_max_f32_e32 v9, 0, v9 ; 20121280
v_log_f32_e32 v9, v9 ; 7E124F09
v_madak_f32_e32 v8, v8, v11, 0x38d1b717 ; 42101708 38D1B717
v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000
v_mul_f32_e32 v11, v10, v10 ; 1016150A
v_mul_legacy_f32_e32 v9, v11, v9 ; 0E12130B
v_rcp_f32_e32 v8, v8 ; 7E105508
v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A
v_mul_f32_e32 v10, s7, v10 ; 10141407
v_exp_f32_e32 v9, v9 ; 7E124B09
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_mul_f32_e32 v8, v9, v8 ; 10101109
v_mul_f32_e32 v9, v19, v19 ; 10122713
v_mul_f32_e32 v10, v19, v9 ; 10141313
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_mad_f32 v9, v7, v9, 1.0 ; D2820009 03CA1307
v_mad_f32 v7, v7, v13, 1.0 ; D2820007 03CA1B07
v_mul_f32_e32 v7, v7, v9 ; 100E1307
v_mul_f32_e32 v8, v23, v8 ; 10101117
v_mul_f32_e32 v8, s6, v8 ; 10101006
v_mul_f32_e32 v7, v23, v7 ; 100E0F17
v_mac_f32_e32 v5, v7, v22 ; 3E0A2D07
v_mul_f32_e32 v5, v5, v20 ; 100A2905
v_max_f32_e32 v8, 0, v8 ; 20101080
v_mul_f32_e32 v9, v22, v8 ; 10121116
v_mac_f32_e32 v5, v6, v9 ; 3E0A1306
v_mac_f32_e32 v2, v7, v21 ; 3E042B07
v_mac_f32_e32 v3, v7, v16 ; 3E062107
v_mul_f32_e32 v6, v21, v8 ; 100C1115
v_mul_f32_e32 v7, v16, v8 ; 100E1110
v_mul_f32_e32 v2, v2, v15 ; 10041F02
v_mul_f32_e32 v3, v3, v14 ; 10061D03
v_mac_f32_e32 v2, v1, v6 ; 3E040D01
v_mac_f32_e32 v3, v0, v7 ; 3E060F00
v_mac_f32_e32 v5, v26, v4 ; 3E0A091A
v_mac_f32_e32 v2, v27, v18 ; 3E04251B
v_mac_f32_e32 v3, v25, v17 ; 3E062319
v_add_f32_e64 v0, 0, v12 clamp ; D2060800 00021880
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v4, s5, v1 ; 10080205
v_mac_f32_e32 v4, v5, v0 ; 3E080105
v_mul_f32_e32 v5, s4, v1 ; 100A0204
v_mac_f32_e32 v5, v2, v0 ; 3E0A0102
v_mul_f32_e32 v1, s1, v1 ; 10020201
v_mac_f32_e32 v1, v3, v0 ; 3E020103
v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 40
Code Size: 2380 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL OUT[7], GENERIC[6]
DCL CONST[0..19]
DCL TEMP[0..9], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[16], IN[0].xxxx
5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[9].xxxx
18: MOV TEMP[3].y, CONST[10].xxxx
19: MOV TEMP[3].z, CONST[11].xxxx
20: MOV TEMP[4].x, CONST[9].yyyy
21: MOV TEMP[4].y, CONST[10].yyyy
22: MOV TEMP[4].z, CONST[11].yyyy
23: MOV TEMP[5].x, CONST[9].zzzz
24: MOV TEMP[5].y, CONST[10].zzzz
25: MOV TEMP[5].z, CONST[11].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[5].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[6].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[6].xyz, TEMP[3].xyzx
44: MUL TEMP[7], TEMP[3].xyzz, TEMP[3].yzzx
45: DP4 TEMP[8].x, CONST[1], TEMP[7]
46: DP4 TEMP[9].x, CONST[2], TEMP[7]
47: MOV TEMP[8].y, TEMP[9].xxxx
48: DP4 TEMP[7].x, CONST[3], TEMP[7]
49: MOV TEMP[8].z, TEMP[7].xxxx
50: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy
51: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx
52: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[8].xyzz
53: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[7].yzw, TEMP[7].yxyz
55: MOV TEMP[7].x, TEMP[1].zzzz
56: MOV TEMP[0].xyz, TEMP[0].xyzx
57: MOV OUT[7], TEMP[0]
58: MOV OUT[1], TEMP[2]
59: MOV OUT[3], TEMP[5]
60: MOV OUT[2], TEMP[4]
61: MOV OUT[4], TEMP[6]
62: MOV OUT[5], TEMP[3]
63: MOV OUT[0], TEMP[1]
64: MOV OUT[6], TEMP[7]
65: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0
%90 = add i32 %5, %7
%91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = extractelement <4 x float> %91, i32 2
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0
%103 = add i32 %5, %7
%104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103)
%105 = extractelement <4 x float> %104, i32 0
%106 = extractelement <4 x float> %104, i32 1
%107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0
%109 = add i32 %5, %7
%110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %109)
%111 = extractelement <4 x float> %110, i32 0
%112 = extractelement <4 x float> %110, i32 1
%113 = extractelement <4 x float> %110, i32 2
%114 = extractelement <4 x float> %110, i32 3
%115 = fmul float %31, %84
%116 = fmul float %32, %84
%117 = fmul float %33, %84
%118 = fmul float %34, %84
%119 = fmul float %35, %85
%120 = fadd float %119, %115
%121 = fmul float %36, %85
%122 = fadd float %121, %116
%123 = fmul float %37, %85
%124 = fadd float %123, %117
%125 = fmul float %38, %85
%126 = fadd float %125, %118
%127 = fmul float %39, %86
%128 = fadd float %127, %120
%129 = fmul float %40, %86
%130 = fadd float %129, %122
%131 = fmul float %41, %86
%132 = fadd float %131, %124
%133 = fmul float %42, %86
%134 = fadd float %133, %126
%135 = fmul float %43, %87
%136 = fadd float %135, %128
%137 = fmul float %44, %87
%138 = fadd float %137, %130
%139 = fmul float %45, %87
%140 = fadd float %139, %132
%141 = fmul float %64, %84
%142 = fmul float %65, %84
%143 = fmul float %66, %84
%144 = fmul float %67, %84
%145 = fmul float %68, %85
%146 = fadd float %145, %141
%147 = fmul float %69, %85
%148 = fadd float %147, %142
%149 = fmul float %70, %85
%150 = fadd float %149, %143
%151 = fmul float %71, %85
%152 = fadd float %151, %144
%153 = fmul float %72, %86
%154 = fadd float %153, %146
%155 = fmul float %73, %86
%156 = fadd float %155, %148
%157 = fmul float %74, %86
%158 = fadd float %157, %150
%159 = fmul float %75, %86
%160 = fadd float %159, %152
%161 = fmul float %76, %87
%162 = fadd float %161, %154
%163 = fmul float %77, %87
%164 = fadd float %163, %156
%165 = fmul float %78, %87
%166 = fadd float %165, %158
%167 = fmul float %79, %87
%168 = fadd float %167, %160
%169 = fmul float %99, %55
%170 = fadd float %169, %57
%171 = fmul float %100, %56
%172 = fadd float %171, %58
%173 = fcmp oeq float %63, 0.000000e+00
%. = select i1 %173, float %99, float %105
%.40 = select i1 %173, float %100, float %106
%174 = fmul float %., %59
%175 = fadd float %174, %61
%176 = fmul float %.40, %60
%177 = fadd float %176, %62
%178 = fmul float %46, %92
%179 = fmul float %49, %92
%180 = fmul float %52, %92
%181 = fmul float %47, %93
%182 = fadd float %181, %178
%183 = fmul float %50, %93
%184 = fadd float %183, %179
%185 = fmul float %53, %93
%186 = fadd float %185, %180
%187 = fmul float %48, %94
%188 = fadd float %187, %182
%189 = fmul float %51, %94
%190 = fadd float %189, %184
%191 = fmul float %54, %94
%192 = fadd float %191, %186
%193 = fmul float %188, %188
%194 = fmul float %190, %190
%195 = fadd float %194, %193
%196 = fmul float %192, %192
%197 = fadd float %195, %196
%198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197)
%199 = fmul float %188, %198
%200 = fmul float %190, %198
%201 = fmul float %192, %198
%202 = fmul float %31, %111
%203 = fmul float %32, %111
%204 = fmul float %33, %111
%205 = fmul float %35, %112
%206 = fadd float %205, %202
%207 = fmul float %36, %112
%208 = fadd float %207, %203
%209 = fmul float %37, %112
%210 = fadd float %209, %204
%211 = fmul float %39, %113
%212 = fadd float %211, %206
%213 = fmul float %40, %113
%214 = fadd float %213, %208
%215 = fmul float %41, %113
%216 = fadd float %215, %210
%217 = fmul float %212, %212
%218 = fmul float %214, %214
%219 = fadd float %218, %217
%220 = fmul float %216, %216
%221 = fadd float %219, %220
%222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221)
%223 = fmul float %212, %222
%224 = fmul float %214, %222
%225 = fmul float %216, %222
%226 = fmul float %201, %224
%227 = fmul float %199, %225
%228 = fmul float %200, %223
%229 = fmul float %200, %225
%230 = fsub float %229, %226
%231 = fmul float %201, %223
%232 = fsub float %231, %227
%233 = fmul float %199, %224
%234 = fsub float %233, %228
%235 = fmul float %230, %114
%236 = fmul float %232, %114
%237 = fmul float %234, %114
%238 = fmul float %199, %200
%239 = fmul float %200, %201
%240 = fmul float %201, %201
%241 = fmul float %201, %199
%242 = fmul float %16, %238
%243 = fmul float %17, %239
%244 = fadd float %242, %243
%245 = fmul float %18, %240
%246 = fadd float %244, %245
%247 = fmul float %19, %241
%248 = fadd float %246, %247
%249 = fmul float %20, %238
%250 = fmul float %21, %239
%251 = fadd float %249, %250
%252 = fmul float %22, %240
%253 = fadd float %251, %252
%254 = fmul float %23, %241
%255 = fadd float %253, %254
%256 = fmul float %24, %238
%257 = fmul float %25, %239
%258 = fadd float %256, %257
%259 = fmul float %26, %240
%260 = fadd float %258, %259
%261 = fmul float %27, %241
%262 = fadd float %260, %261
%263 = fmul float %200, %200
%264 = fmul float %199, %199
%265 = fsub float %264, %263
%266 = fmul float %28, %265
%267 = fadd float %266, %248
%268 = fmul float %29, %265
%269 = fadd float %268, %255
%270 = fmul float %30, %265
%271 = fadd float %270, %262
%272 = fsub float %136, %13
%273 = fsub float %138, %14
%274 = fsub float %140, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %170, float %172, float %175, float %177)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %223, float %224, float %225, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %236, float %237, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %201, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %267, float %269, float %271, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %166, float %272, float %273, float %274)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %136, float %138, float %140, float %134)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s27, s[28:31], 0x20 ; C20D9D20
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00
s_buffer_load_dword s32, s[28:31], 0x21 ; C2101D21
s_buffer_load_dword s33, s[28:31], 0x22 ; C2109D22
s_buffer_load_dword s34, s[28:31], 0x24 ; C2111D24
s_buffer_load_dword s35, s[28:31], 0x25 ; C2119D25
s_buffer_load_dword s1, s[28:31], 0x10 ; C2009D10
s_buffer_load_dword s2, s[28:31], 0x11 ; C2011D11
s_buffer_load_dword s0, s[28:31], 0x12 ; C2001D12
s_buffer_load_dword s18, s[28:31], 0x14 ; C2091D14
s_buffer_load_dword s17, s[28:31], 0x15 ; C2089D15
s_buffer_load_dword s36, s[28:31], 0x26 ; C2121D26
s_buffer_load_dword s37, s[28:31], 0x28 ; C2129D28
s_buffer_load_dword s38, s[28:31], 0x29 ; C2131D29
s_buffer_load_dword s39, s[28:31], 0x2a ; C2139D2A
s_buffer_load_dword s40, s[28:31], 0x2c ; C2141D2C
s_buffer_load_dword s20, s[28:31], 0x16 ; C20A1D16
s_buffer_load_dword s41, s[28:31], 0x17 ; C2149D17
s_buffer_load_dword s26, s[28:31], 0x18 ; C20D1D18
s_buffer_load_dword s25, s[28:31], 0x19 ; C20C9D19
s_buffer_load_dword s21, s[28:31], 0x1a ; C20A9D1A
s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B
s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C
s_buffer_load_dword s24, s[28:31], 0x1d ; C20C1D1D
s_buffer_load_dword s22, s[28:31], 0x1e ; C20B1D1E
s_buffer_load_dword s43, s[28:31], 0x1f ; C2159D1F
s_buffer_load_dword s44, s[28:31], 0x2d ; C2161D2D
s_buffer_load_dword s45, s[28:31], 0x2e ; C2169D2E
s_buffer_load_dword s46, s[28:31], 0x34 ; C2171D34
s_buffer_load_dword s47, s[28:31], 0x35 ; C2179D35
s_buffer_load_dword s3, s[28:31], 0x36 ; C2019D36
s_buffer_load_dword s4, s[28:31], 0x3c ; C2021D3C
s_buffer_load_dword s48, s[28:31], 0x40 ; C2181D40
s_buffer_load_dword s49, s[28:31], 0x41 ; C2189D41
s_buffer_load_dword s50, s[28:31], 0x42 ; C2191D42
s_buffer_load_dword s51, s[28:31], 0x43 ; C2199D43
s_buffer_load_dword s6, s[28:31], 0x37 ; C2031D37
s_buffer_load_dword s52, s[28:31], 0x38 ; C21A1D38
s_buffer_load_dword s53, s[28:31], 0x39 ; C21A9D39
s_buffer_load_dword s8, s[28:31], 0x3a ; C2041D3A
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s3 ; 7E000203
s_buffer_load_dword s10, s[28:31], 0x3b ; C2051D3B
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
s_buffer_load_dword s3, s[28:31], 0x0 ; C2019D00
s_buffer_load_dword s4, s[28:31], 0x1 ; C2021D01
s_buffer_load_dword s5, s[28:31], 0x2 ; C2029D02
s_buffer_load_dword s9, s[28:31], 0x4 ; C2049D04
v_mov_b32_e32 v17, s6 ; 7E220206
s_buffer_load_dword s14, s[28:31], 0x5 ; C2071D05
s_buffer_load_dword s7, s[28:31], 0x6 ; C2039D06
s_buffer_load_dword s6, s[28:31], 0x7 ; C2031D07
v_mov_b32_e32 v18, s8 ; 7E240208
s_buffer_load_dword s12, s[28:31], 0x8 ; C2061D08
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v19, s10 ; 7E26020A
s_buffer_load_dword s16, s[28:31], 0x9 ; C2081D09
s_buffer_load_dword s10, s[28:31], 0xa ; C2051D0A
s_buffer_load_dword s8, s[28:31], 0xb ; C2041D0B
s_buffer_load_dword s15, s[28:31], 0xc ; C2079D0C
s_buffer_load_dword s19, s[28:31], 0xd ; C2099D0D
s_buffer_load_dword s13, s[28:31], 0xe ; C2069D0E
s_buffer_load_dword s11, s[28:31], 0xf ; C2059D0F
s_buffer_load_dword s54, s[28:31], 0x44 ; C21B1D44
s_buffer_load_dword s55, s[28:31], 0x45 ; C21B9D45
s_buffer_load_dword s56, s[28:31], 0x46 ; C21C1D46
s_buffer_load_dword s57, s[28:31], 0x47 ; C21C9D47
s_buffer_load_dword s58, s[28:31], 0x48 ; C21D1D48
s_buffer_load_dword s59, s[28:31], 0x49 ; C21D9D49
s_buffer_load_dword s60, s[28:31], 0x4a ; C21E1D4A
s_buffer_load_dword s61, s[28:31], 0x4b ; C21E9D4B
s_buffer_load_dword s62, s[28:31], 0x4c ; C21F1D4C
s_buffer_load_dword s63, s[28:31], 0x4d ; C21F9D4D
s_buffer_load_dword s64, s[28:31], 0x4e ; C2201D4E
s_buffer_load_dword s28, s[28:31], 0x4f ; C20E1D4F
v_mul_f32_e32 v20, s41, v2 ; 10280429
v_mac_f32_e32 v20, s42, v3 ; 3E28062A
v_mac_f32_e32 v20, s43, v4 ; 3E28082B
v_mac_f32_e32 v0, s46, v9 ; 3E00122E
v_mac_f32_e32 v17, s47, v10 ; 3E22142F
v_mul_f32_e32 v21, s48, v2 ; 102A0430
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s54, v3 ; 3E2A0636
v_mac_f32_e32 v21, s58, v4 ; 3E2A083A
v_mac_f32_e32 v21, s62, v5 ; 3E2A0A3E
v_mul_f32_e32 v22, s49, v2 ; 102C0431
v_mac_f32_e32 v22, s55, v3 ; 3E2C0637
v_mac_f32_e32 v22, s59, v4 ; 3E2C083B
v_mac_f32_e32 v22, s63, v5 ; 3E2C0A3F
v_mul_f32_e32 v23, s50, v2 ; 102E0432
v_mac_f32_e32 v23, s56, v3 ; 3E2E0638
v_mac_f32_e32 v23, s60, v4 ; 3E2E083C
v_mac_f32_e32 v23, s64, v5 ; 3E2E0A40
v_mul_f32_e32 v24, s51, v2 ; 10300433
v_mac_f32_e32 v24, s57, v3 ; 3E300639
v_mac_f32_e32 v24, s61, v4 ; 3E30083D
v_mac_f32_e32 v24, s28, v5 ; 3E300A1C
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mul_f32_e32 v11, s34, v6 ; 10160C22
v_mac_f32_e32 v11, s35, v7 ; 3E160E23
v_mul_f32_e32 v12, s37, v6 ; 10180C25
v_mac_f32_e32 v12, s38, v7 ; 3E180E26
v_mul_f32_e32 v6, s40, v6 ; 100C0C28
v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C
v_mac_f32_e32 v11, s36, v8 ; 3E161024
v_mac_f32_e32 v12, s39, v8 ; 3E181027
v_mac_f32_e32 v6, s45, v8 ; 3E0C102D
v_mul_f32_e32 v7, s18, v2 ; 100E0412
v_mac_f32_e32 v7, s26, v3 ; 3E0E061A
v_mac_f32_e32 v7, s23, v4 ; 3E0E0817
v_mac_f32_e32 v7, s27, v5 ; 3E0E0A1B
v_mul_f32_e32 v8, s17, v2 ; 10100411
v_mac_f32_e32 v8, s25, v3 ; 3E100619
v_mac_f32_e32 v8, s24, v4 ; 3E100818
v_mac_f32_e32 v8, s32, v5 ; 3E100A20
v_mul_f32_e32 v2, s20, v2 ; 10040414
v_mac_f32_e32 v2, s21, v3 ; 3E040615
v_mac_f32_e32 v2, s22, v4 ; 3E040816
v_mac_f32_e32 v2, s33, v5 ; 3E040A21
v_mac_f32_e32 v18, s52, v9 ; 3E241234
v_mac_f32_e32 v19, s53, v10 ; 3E261435
exp 15, 32, 0, 0, 0, v0, v17, v18, v19 ; F800020F 13121100
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s18, v13 ; 10001A12
v_mac_f32_e32 v0, s26, v14 ; 3E001C1A
v_mul_f32_e32 v3, s17, v13 ; 10061A11
v_mac_f32_e32 v3, s25, v14 ; 3E061C19
v_mul_f32_e32 v4, s20, v13 ; 10081A14
v_mac_f32_e32 v4, s21, v14 ; 3E081C15
v_mac_f32_e32 v0, s23, v15 ; 3E001E17
v_mac_f32_e32 v3, s24, v15 ; 3E061E18
v_mac_f32_e32 v4, s22, v15 ; 3E081E16
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mac_f32_e32 v5, v12, v12 ; 3E0A190C
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v3, v3 ; 3E120703
v_mac_f32_e32 v9, v4, v4 ; 3E120904
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v10, v5, v11 ; 10141705
v_mul_f32_e32 v11, v5, v12 ; 10161905
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_mul_f32_e32 v3, v9, v3 ; 10060709
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v6, v3, v5 ; 100C0B03
v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B
v_mul_f32_e32 v9, v4, v10 ; 10121504
v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105
v_mul_f32_e32 v12, v0, v11 ; 10181700
v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A
v_mul_f32_e32 v6, v16, v6 ; 100C0D10
v_mul_f32_e32 v9, v16, v9 ; 10121310
v_mul_f32_e32 v12, v16, v12 ; 10181910
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v5, v11 ; 10001705
v_mul_f32_e32 v3, s14, v0 ; 1006000E
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mul_f32_e32 v0, s19, v0 ; 10000013
v_mul_f32_e32 v6, v11, v10 ; 100C150B
v_mac_f32_e32 v3, s9, v6 ; 3E060C09
v_mac_f32_e32 v4, s12, v6 ; 3E080C0C
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
v_mul_f32_e32 v6, v5, v5 ; 100C0B05
v_mac_f32_e32 v3, s7, v6 ; 3E060C07
v_mac_f32_e32 v4, s10, v6 ; 3E080C0A
v_mac_f32_e32 v0, s13, v6 ; 3E000C0D
v_mul_f32_e32 v6, v10, v5 ; 100C0B0A
v_mac_f32_e32 v3, s6, v6 ; 3E060C06
v_mac_f32_e32 v4, s8, v6 ; 3E080C08
v_mac_f32_e32 v0, s11, v6 ; 3E000C0B
v_mul_f32_e32 v6, v11, v11 ; 100C170B
v_mad_f32 v6, v10, v10, -v6 ; D2820006 841A150A
v_mac_f32_e32 v3, s1, v6 ; 3E060C01
v_mac_f32_e32 v4, s2, v6 ; 3E080C02
v_mac_f32_e32 v0, s0, v6 ; 3E000C00
v_subrev_f32_e32 v6, s3, v7 ; 0A0C0E03
v_subrev_f32_e32 v9, s4, v8 ; 0A121004
v_subrev_f32_e32 v12, s5, v2 ; 0A180405
exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A
exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403
exp 15, 37, 0, 0, 0, v23, v6, v9, v12 ; F800025F 0C090617
exp 15, 38, 0, 0, 0, v7, v8, v2, v20 ; F800026F 14020807
exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 28
Code Size: 892 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL IN[6], GENERIC[6], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[22..24]
DCL CONST[26]
DCL TEMP[0..17], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000}
IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000}
IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3].yw, TEMP[3], SAMP[3], 2D
11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy
12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[22].xxxx
13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy
14: MOV_SAT TEMP[4].x, TEMP[4].xxxx
15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
16: SQRT TEMP[4].x, TEMP[4].xxxx
17: MOV TEMP[3].z, TEMP[4].xxxx
18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
20: MOV TEMP[0].y, TEMP[1].xxxx
21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz
22: MOV TEMP[0].z, TEMP[1].xxxx
23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
24: RSQ TEMP[1].x, TEMP[1].xxxx
25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
26: DP3 TEMP[1].x, IN[5].yzww, IN[5].yzww
27: RSQ TEMP[1].x, TEMP[1].xxxx
28: MUL TEMP[1].xyz, IN[5].yzww, TEMP[1].xxxx
29: MOV TEMP[2].xy, IN[0].xyyy
30: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
31: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
32: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz
33: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww
34: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
36: MOV TEMP[5].xy, IN[0].xyyy
37: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D
38: ADD TEMP[6].x, IMM[0].zzzz, -CONST[26].xxxx
39: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx
40: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
41: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
42: MOV TEMP[7].xyz, IMM[0].wwww
43: MOV TEMP[8].w, IMM[0].zzzz
44: MOV TEMP[8].xyz, TEMP[0].xyzx
45: DP4 TEMP[9].x, CONST[1], TEMP[8]
46: DP4 TEMP[10].x, CONST[2], TEMP[8]
47: MOV TEMP[9].y, TEMP[10].xxxx
48: DP4 TEMP[8].x, CONST[3], TEMP[8]
49: MOV TEMP[9].z, TEMP[8].xxxx
50: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz
51: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
52: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz
53: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz
54: MUL TEMP[9].xyz, IMM[0].xxxx, TEMP[9].xyzz
55: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz
56: MOV TEMP[10].xyz, TEMP[9].xyzx
57: FSLT TEMP[11].x, IMM[0].wwww, CONST[10].wwww
58: UIF TEMP[11].xxxx :0
59: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz
60: RSQ TEMP[11].x, TEMP[11].xxxx
61: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx
62: MOV TEMP[12].xyz, -IN[6].xyzx
63: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz
64: RCP TEMP[14].x, TEMP[11].xxxx
65: RCP TEMP[14].y, TEMP[11].yyyy
66: RCP TEMP[14].z, TEMP[11].zzzz
67: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
68: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz
69: RCP TEMP[14].x, TEMP[11].xxxx
70: RCP TEMP[14].y, TEMP[11].yyyy
71: RCP TEMP[14].z, TEMP[11].zzzz
72: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
73: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[11].xyzz
74: UIF TEMP[14].xxxx :0
75: MOV TEMP[15].x, TEMP[13].xxxx
76: ELSE :0
77: MOV TEMP[15].x, TEMP[12].xxxx
78: ENDIF
79: UIF TEMP[14].yyyy :0
80: MOV TEMP[16].x, TEMP[13].yyyy
81: ELSE :0
82: MOV TEMP[16].x, TEMP[12].yyyy
83: ENDIF
84: UIF TEMP[14].zzzz :0
85: MOV TEMP[13].x, TEMP[13].zzzz
86: ELSE :0
87: MOV TEMP[13].x, TEMP[12].zzzz
88: ENDIF
89: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz
90: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx
91: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
92: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
93: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz
94: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz
95: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
96: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz
97: ENDIF
98: ADD TEMP[11].x, IMM[0].zzzz, -CONST[24].xxxx
99: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy
100: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz
101: MOV TEMP[10].xyz, TEMP[10].xyzz
102: MOV TEMP[10].w, TEMP[11].xxxx
103: TXL TEMP[10], TEMP[10], SAMP[0], CUBE
104: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy
105: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx
106: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz
107: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].wwww
108: UIF TEMP[11].xxxx :0
109: MOV TEMP[11].xyz, TEMP[9].xyzx
110: FSLT TEMP[12].x, IMM[0].wwww, CONST[14].wwww
111: UIF TEMP[12].xxxx :0
112: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz
113: RSQ TEMP[12].x, TEMP[12].xxxx
114: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx
115: MOV TEMP[12].xyz, -IN[6].xyzx
116: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz
117: RCP TEMP[14].x, TEMP[9].xxxx
118: RCP TEMP[14].y, TEMP[9].yyyy
119: RCP TEMP[14].z, TEMP[9].zzzz
120: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
121: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz
122: RCP TEMP[14].x, TEMP[9].xxxx
123: RCP TEMP[14].y, TEMP[9].yyyy
124: RCP TEMP[14].z, TEMP[9].zzzz
125: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
126: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[9].xyzz
127: UIF TEMP[14].xxxx :0
128: MOV TEMP[15].x, TEMP[13].xxxx
129: ELSE :0
130: MOV TEMP[15].x, TEMP[12].xxxx
131: ENDIF
132: UIF TEMP[14].yyyy :0
133: MOV TEMP[16].x, TEMP[13].yyyy
134: ELSE :0
135: MOV TEMP[16].x, TEMP[12].yyyy
136: ENDIF
137: UIF TEMP[14].zzzz :0
138: MOV TEMP[13].x, TEMP[13].zzzz
139: ELSE :0
140: MOV TEMP[13].x, TEMP[12].zzzz
141: ENDIF
142: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz
143: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx
144: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
145: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
146: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz
147: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz
148: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
149: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz
150: ENDIF
151: ADD TEMP[9].x, IMM[0].zzzz, -CONST[24].xxxx
152: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy
153: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz
154: MOV TEMP[11].xyz, TEMP[11].xyzz
155: MOV TEMP[11].w, TEMP[9].xxxx
156: TXL TEMP[9], TEMP[11], SAMP[1], CUBE
157: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy
158: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx
159: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz
160: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz
161: ELSE :0
162: MOV TEMP[7].xyz, TEMP[10].xyzx
163: ENDIF
164: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
165: MOV TEMP[1].xyz, -TEMP[1].xyzx
166: ADD TEMP[5].x, IMM[0].zzzz, -CONST[24].xxxx
167: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz
168: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz
169: RSQ TEMP[10].x, TEMP[10].xxxx
170: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx
171: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
172: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
173: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz
174: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx
175: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx
176: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww
177: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[5].xxxx
178: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].yyyy, IMM[2].zzzz
179: LG2 TEMP[12].x, TEMP[12].xxxx
180: RCP TEMP[12].x, TEMP[12].xxxx
181: MUL TEMP[12].x, IMM[2].xxxx, TEMP[12].xxxx
182: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx
183: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[6].xxxx
184: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[1].xxxx
185: MUL TEMP[15].x, IMM[0].xxxx, TEMP[10].xxxx
186: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
187: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[1].xxxx
188: ADD TEMP[10].x, IMM[0].zzzz, -TEMP[10].xxxx
189: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx
190: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
191: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx
192: MOV_SAT TEMP[4].x, TEMP[4].xxxx
193: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx
194: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx
195: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx
196: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx
197: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
198: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[11].xxxx
199: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[11].xxxx
200: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].wwww
201: RCP TEMP[1].x, TEMP[1].xxxx
202: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz
203: MAX TEMP[9].x, IMM[0].wwww, TEMP[9].xxxx
204: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
205: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].zzzz
206: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy
207: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
208: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx
209: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
210: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
211: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
212: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz
213: ADD TEMP[9].xyz, IMM[0].zzzz, -TEMP[3].xyzz
214: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx
215: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx
216: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx
217: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx
218: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz
219: ADD TEMP[9].x, TEMP[5].xxxx, IMM[0].yyyy
220: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx
221: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx
222: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx
223: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
224: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].zzzz
225: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy
226: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx
227: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
228: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx
229: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
230: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].zzzz
231: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx
232: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
233: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
234: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
235: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
236: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
237: MOV TEMP[0].xyz, TEMP[0].xyzx
238: MAD TEMP[1].x, IN[5].xxxx, CONST[5].zzzz, CONST[5].wwww
239: MOV_SAT TEMP[1].x, TEMP[1].xxxx
240: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
241: MOV TEMP[0].xyz, TEMP[0].xyzx
242: MOV TEMP[0].w, IMM[0].zzzz
243: MOV OUT[0], TEMP[0]
244: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%83 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0
%85 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0
%87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)*
%89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0
%90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)*
%92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0
%93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)*
%95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0
%96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)*
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)*
%101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0
%102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)*
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)*
%107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0
%108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)*
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%112 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7)
%132 = bitcast float %111 to i32
%133 = bitcast float %112 to i32
%134 = insertelement <2 x i32> undef, i32 %132, i32 0
%135 = insertelement <2 x i32> %134, i32 %133, i32 1
%136 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %135, <32 x i8> %101, <16 x i8> %104, i32 2)
%137 = extractelement <4 x float> %136, i32 1
%138 = extractelement <4 x float> %136, i32 3
%139 = fmul float %138, 2.000000e+00
%140 = fadd float %139, -1.000000e+00
%141 = fmul float %137, 2.000000e+00
%142 = fadd float %141, -1.000000e+00
%143 = fmul float %140, %79
%144 = fmul float %142, %79
%145 = fmul float %143, %143
%146 = fmul float %144, %144
%147 = fadd float %145, %146
%148 = call float @llvm.AMDIL.clamp.(float %147, float 0.000000e+00, float 1.000000e+00)
%149 = fsub float 1.000000e+00, %148
%150 = call float @llvm.sqrt.f32(float %149)
%151 = fmul float %143, %113
%152 = fmul float %144, %116
%153 = fadd float %152, %151
%154 = fmul float %150, %119
%155 = fadd float %153, %154
%156 = fmul float %143, %114
%157 = fmul float %144, %117
%158 = fadd float %157, %156
%159 = fmul float %150, %120
%160 = fadd float %158, %159
%161 = fmul float %143, %115
%162 = fmul float %144, %118
%163 = fadd float %162, %161
%164 = fmul float %150, %121
%165 = fadd float %163, %164
%166 = fmul float %155, %155
%167 = fmul float %160, %160
%168 = fadd float %167, %166
%169 = fmul float %165, %165
%170 = fadd float %168, %169
%171 = call float @llvm.AMDGPU.rsq.clamped.f32(float %170)
%172 = fmul float %155, %171
%173 = fmul float %160, %171
%174 = fmul float %165, %171
%175 = fmul float %126, %126
%176 = fmul float %127, %127
%177 = fadd float %176, %175
%178 = fmul float %128, %128
%179 = fadd float %177, %178
%180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179)
%181 = fmul float %126, %180
%182 = fmul float %127, %180
%183 = fmul float %128, %180
%184 = bitcast float %111 to i32
%185 = bitcast float %112 to i32
%186 = insertelement <2 x i32> undef, i32 %184, i32 0
%187 = insertelement <2 x i32> %186, i32 %185, i32 1
%188 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %187, <32 x i8> %95, <16 x i8> %98, i32 2)
%189 = extractelement <4 x float> %188, i32 0
%190 = extractelement <4 x float> %188, i32 1
%191 = extractelement <4 x float> %188, i32 2
%192 = fmul float %76, %189
%193 = fmul float %77, %190
%194 = fmul float %78, %191
%195 = call float @llvm.AMDGPU.lrp(float %80, float %192, float %66)
%196 = call float @llvm.AMDGPU.lrp(float %80, float %193, float %67)
%197 = call float @llvm.AMDGPU.lrp(float %80, float %194, float %68)
%198 = fmul float %80, %69
%199 = fsub float %69, %198
%200 = fmul float %192, %199
%201 = fmul float %193, %199
%202 = fmul float %194, %199
%203 = bitcast float %111 to i32
%204 = bitcast float %112 to i32
%205 = insertelement <2 x i32> undef, i32 %203, i32 0
%206 = insertelement <2 x i32> %205, i32 %204, i32 1
%207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %206, <32 x i8> %107, <16 x i8> %110, i32 2)
%208 = extractelement <4 x float> %207, i32 1
%209 = fsub float 1.000000e+00, %82
%210 = fmul float %208, %82
%211 = fadd float %210, %209
%212 = fmul float %172, %24
%213 = fmul float %173, %25
%214 = fadd float %213, %212
%215 = fmul float %174, %26
%216 = fadd float %214, %215
%217 = call float @llvm.maxnum.f32(float %216, float 0.000000e+00)
%218 = fmul float %27, %172
%219 = fmul float %28, %173
%220 = fadd float %218, %219
%221 = fmul float %29, %174
%222 = fadd float %220, %221
%223 = fadd float %222, %30
%224 = fmul float %31, %172
%225 = fmul float %32, %173
%226 = fadd float %224, %225
%227 = fmul float %33, %174
%228 = fadd float %226, %227
%229 = fadd float %228, %34
%230 = fmul float %35, %172
%231 = fmul float %36, %173
%232 = fadd float %230, %231
%233 = fmul float %37, %174
%234 = fadd float %232, %233
%235 = fadd float %234, %38
%236 = fadd float %122, %223
%237 = fadd float %123, %229
%238 = fadd float %124, %235
%239 = fmul float %236, %211
%240 = fmul float %237, %211
%241 = fmul float %238, %211
%242 = fmul float %172, %181
%243 = fmul float %173, %182
%244 = fadd float %243, %242
%245 = fmul float %174, %183
%246 = fadd float %244, %245
%247 = fmul float %246, %172
%248 = fmul float %246, %173
%249 = fmul float %246, %174
%250 = fmul float %247, 2.000000e+00
%251 = fmul float %248, 2.000000e+00
%252 = fmul float %249, 2.000000e+00
%253 = fsub float %181, %250
%254 = fsub float %182, %251
%255 = fsub float %183, %252
%256 = fcmp ogt float %51, 0.000000e+00
br i1 %256, label %IF, label %ENDIF
IF: ; preds = %main_body
%257 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%258 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%259 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%260 = fmul float %253, %253
%261 = fmul float %254, %254
%262 = fadd float %261, %260
%263 = fmul float %255, %255
%264 = fadd float %262, %263
%265 = call float @llvm.AMDGPU.rsq.clamped.f32(float %264)
%266 = fmul float %253, %265
%267 = fmul float %254, %265
%268 = fmul float %255, %265
%269 = fsub float %44, %129
%270 = fsub float %45, %130
%271 = fsub float %46, %131
%272 = fdiv float 1.000000e+00, %266
%273 = fdiv float 1.000000e+00, %267
%274 = fdiv float 1.000000e+00, %268
%275 = fmul float %269, %272
%276 = fmul float %270, %273
%277 = fmul float %271, %274
%278 = fsub float %47, %129
%279 = fsub float %48, %130
%280 = fsub float %49, %131
%281 = fdiv float 1.000000e+00, %266
%282 = fdiv float 1.000000e+00, %267
%283 = fdiv float 1.000000e+00, %268
%284 = fmul float %278, %281
%285 = fmul float %279, %282
%286 = fmul float %280, %283
%287 = fcmp ogt float %266, 0.000000e+00
%288 = fcmp ogt float %267, 0.000000e+00
%289 = fcmp ogt float %268, 0.000000e+00
%. = select i1 %287, float %275, float %284
%temp64.0 = select i1 %288, float %276, float %285
%.96 = select i1 %289, float %277, float %286
%290 = fadd float %44, %47
%291 = fadd float %45, %48
%292 = fadd float %46, %49
%293 = fmul float %290, 5.000000e-01
%294 = fmul float %291, 5.000000e-01
%295 = fmul float %292, 5.000000e-01
%296 = call float @llvm.minnum.f32(float %., float %temp64.0)
%297 = call float @llvm.minnum.f32(float %296, float %.96)
%298 = fsub float %293, %259
%299 = fsub float %294, %258
%300 = fsub float %295, %257
%301 = fadd float %298, %129
%302 = fadd float %299, %130
%303 = fadd float %300, %131
%304 = fmul float %266, %297
%305 = fadd float %304, %301
%306 = fmul float %267, %297
%307 = fadd float %306, %302
%308 = fmul float %268, %297
%309 = fadd float %308, %303
%310 = fsub float %305, %293
%311 = fsub float %307, %294
%312 = fsub float %309, %295
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp40.0 = phi float [ %310, %IF ], [ %253, %main_body ]
%temp41.0 = phi float [ %311, %IF ], [ %254, %main_body ]
%temp42.0 = phi float [ %312, %IF ], [ %255, %main_body ]
%313 = fsub float 1.000000e+00, %81
%314 = call float @llvm.pow.f32(float %313, float 7.500000e-01)
%315 = fmul float %314, 7.000000e+00
%316 = insertelement <4 x float> undef, float %temp40.0, i32 0
%317 = insertelement <4 x float> %316, float %temp41.0, i32 1
%318 = insertelement <4 x float> %317, float %temp42.0, i32 2
%319 = insertelement <4 x float> %318, float %315, i32 3
%320 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %319)
%321 = extractelement <4 x float> %320, i32 0
%322 = extractelement <4 x float> %320, i32 1
%323 = extractelement <4 x float> %320, i32 2
%324 = extractelement <4 x float> %320, i32 3
%325 = call float @llvm.fabs.f32(float %323)
%326 = fdiv float 1.000000e+00, %325
%327 = fmul float %321, %326
%328 = fadd float %327, 1.500000e+00
%329 = fmul float %322, %326
%330 = fadd float %329, 1.500000e+00
%331 = bitcast float %330 to i32
%332 = bitcast float %328 to i32
%333 = bitcast float %324 to i32
%334 = bitcast float %315 to i32
%335 = insertelement <4 x i32> undef, i32 %331, i32 0
%336 = insertelement <4 x i32> %335, i32 %332, i32 1
%337 = insertelement <4 x i32> %336, i32 %333, i32 2
%338 = insertelement <4 x i32> %337, i32 %334, i32 3
%339 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %338, <32 x i8> %84, <16 x i8> %86, i32 4)
%340 = extractelement <4 x float> %339, i32 0
%341 = extractelement <4 x float> %339, i32 1
%342 = extractelement <4 x float> %339, i32 2
%343 = extractelement <4 x float> %339, i32 3
%344 = call float @llvm.pow.f32(float %343, float %53)
%345 = fmul float %52, %344
%346 = fmul float %345, %340
%347 = fmul float %345, %341
%348 = fmul float %345, %342
%349 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %349, label %IF82, label %ENDIF81
IF82: ; preds = %ENDIF
%350 = fcmp ogt float %63, 0.000000e+00
br i1 %350, label %IF85, label %ENDIF84
ENDIF81: ; preds = %ENDIF, %ENDIF84
%temp28.0 = phi float [ %578, %ENDIF84 ], [ %346, %ENDIF ]
%temp29.0 = phi float [ %579, %ENDIF84 ], [ %347, %ENDIF ]
%temp30.0 = phi float [ %580, %ENDIF84 ], [ %348, %ENDIF ]
%351 = fmul float %temp28.0, %211
%352 = fmul float %temp29.0, %211
%353 = fmul float %temp30.0, %211
%354 = fsub float 1.000000e+00, %81
%355 = fsub float %24, %181
%356 = fsub float %25, %182
%357 = fsub float %26, %183
%358 = fmul float %355, %355
%359 = fmul float %356, %356
%360 = fadd float %359, %358
%361 = fmul float %357, %357
%362 = fadd float %360, %361
%363 = call float @llvm.AMDGPU.rsq.clamped.f32(float %362)
%364 = fmul float %355, %363
%365 = fmul float %356, %363
%366 = fmul float %357, %363
%367 = fmul float %181, %172
%368 = fsub float -0.000000e+00, %367
%369 = fmul float %182, %173
%370 = fsub float %368, %369
%371 = fmul float %183, %174
%372 = fsub float %370, %371
%373 = call float @llvm.maxnum.f32(float %372, float 0.000000e+00)
%374 = fmul float %24, %364
%375 = fmul float %25, %365
%376 = fadd float %375, %374
%377 = fmul float %26, %366
%378 = fadd float %376, %377
%379 = call float @llvm.maxnum.f32(float %378, float 0.000000e+00)
%380 = fmul float %354, %354
%381 = fmul float %380, %75
%382 = fsub float 1.000000e+00, %354
%383 = fmul float %382, 0x3FEEF9DB20000000
%384 = fadd float %383, 0x3F9EB851E0000000
%385 = call float @llvm.log2.f32(float %384)
%386 = fdiv float 1.000000e+00, %385
%387 = fmul float %386, 1.000000e+01
%388 = fmul float %387, %387
%389 = fsub float 1.000000e+00, %217
%390 = fsub float 1.000000e+00, %373
%391 = fmul float %379, 2.000000e+00
%392 = fmul float %379, %354
%393 = fmul float %391, %392
%394 = fadd float %393, 5.000000e-01
%395 = fsub float 1.000000e+00, %379
%396 = fsub float 1.000000e+00, %373
%397 = fsub float 1.000000e+00, %199
%398 = fadd float %81, %397
%399 = call float @llvm.AMDIL.clamp.(float %398, float 0.000000e+00, float 1.000000e+00)
%400 = fmul float %396, %396
%401 = fmul float %396, %396
%402 = fmul float %401, %396
%403 = fmul float %400, %402
%404 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %195)
%405 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %196)
%406 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %197)
%407 = call float @llvm.AMDGPU.lrp(float %217, float 1.000000e+00, float %381)
%408 = call float @llvm.AMDGPU.lrp(float %373, float 1.000000e+00, float %381)
%409 = fmul float %407, %408
%410 = fadd float %409, 0x3F1A36E2E0000000
%411 = fdiv float 1.000000e+00, %410
%412 = fmul float %172, %364
%413 = fmul float %173, %365
%414 = fadd float %413, %412
%415 = fmul float %174, %366
%416 = fadd float %414, %415
%417 = call float @llvm.maxnum.f32(float %416, float 0.000000e+00)
%418 = call float @llvm.pow.f32(float %417, float %388)
%419 = fadd float %388, 1.000000e+00
%420 = fmul float %419, %74
%421 = fmul float %418, %420
%422 = fmul float %411, %421
%423 = fmul float %422, %217
%424 = fmul float %423, %73
%425 = call float @llvm.maxnum.f32(float %424, float 0.000000e+00)
%426 = fmul float %425, %70
%427 = fmul float %425, %71
%428 = fmul float %425, %72
%429 = fsub float 1.000000e+00, %195
%430 = fsub float 1.000000e+00, %196
%431 = fsub float 1.000000e+00, %197
%432 = fmul float %395, %395
%433 = fmul float %395, %395
%434 = fmul float %433, %395
%435 = fmul float %432, %434
%436 = fmul float %429, %435
%437 = fadd float %436, %195
%438 = fmul float %430, %435
%439 = fadd float %438, %196
%440 = fmul float %431, %435
%441 = fadd float %440, %197
%442 = fadd float %394, -1.000000e+00
%443 = fmul float %389, %389
%444 = fmul float %389, %389
%445 = fmul float %444, %389
%446 = fmul float %443, %445
%447 = fmul float %442, %446
%448 = fadd float %447, 1.000000e+00
%449 = fadd float %394, -1.000000e+00
%450 = fmul float %390, %390
%451 = fmul float %390, %390
%452 = fmul float %451, %390
%453 = fmul float %450, %452
%454 = fmul float %449, %453
%455 = fadd float %454, 1.000000e+00
%456 = fmul float %448, %455
%457 = fmul float %456, %217
%458 = fmul float %70, %457
%459 = fadd float %458, %239
%460 = fmul float %71, %457
%461 = fadd float %460, %240
%462 = fmul float %72, %457
%463 = fadd float %462, %241
%464 = fmul float %200, %459
%465 = fmul float %201, %461
%466 = fmul float %202, %463
%467 = fmul float %426, %437
%468 = fadd float %467, %464
%469 = fmul float %427, %439
%470 = fadd float %469, %465
%471 = fmul float %428, %441
%472 = fadd float %471, %466
%473 = fmul float %351, %404
%474 = fadd float %473, %468
%475 = fmul float %352, %405
%476 = fadd float %475, %470
%477 = fmul float %353, %406
%478 = fadd float %477, %472
%479 = fmul float %125, %42
%480 = fadd float %479, %43
%481 = call float @llvm.AMDIL.clamp.(float %480, float 0.000000e+00, float 1.000000e+00)
%482 = call float @llvm.AMDGPU.lrp(float %481, float %474, float %39)
%483 = call float @llvm.AMDGPU.lrp(float %481, float %476, float %40)
%484 = call float @llvm.AMDGPU.lrp(float %481, float %478, float %41)
%485 = call i32 @llvm.SI.packf16(float %482, float %483)
%486 = bitcast i32 %485 to float
%487 = call i32 @llvm.SI.packf16(float %484, float 1.000000e+00)
%488 = bitcast i32 %487 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %486, float %488, float %486, float %488)
ret void
IF85: ; preds = %IF82
%489 = fmul float %253, %253
%490 = fmul float %254, %254
%491 = fadd float %490, %489
%492 = fmul float %255, %255
%493 = fadd float %491, %492
%494 = call float @llvm.AMDGPU.rsq.clamped.f32(float %493)
%495 = fmul float %253, %494
%496 = fmul float %254, %494
%497 = fmul float %255, %494
%498 = fsub float %54, %129
%499 = fsub float %55, %130
%500 = fsub float %56, %131
%501 = fdiv float 1.000000e+00, %495
%502 = fdiv float 1.000000e+00, %496
%503 = fdiv float 1.000000e+00, %497
%504 = fmul float %498, %501
%505 = fmul float %499, %502
%506 = fmul float %500, %503
%507 = fsub float %57, %129
%508 = fsub float %58, %130
%509 = fsub float %59, %131
%510 = fdiv float 1.000000e+00, %495
%511 = fdiv float 1.000000e+00, %496
%512 = fdiv float 1.000000e+00, %497
%513 = fmul float %507, %510
%514 = fmul float %508, %511
%515 = fmul float %509, %512
%516 = fcmp ogt float %495, 0.000000e+00
%517 = fcmp ogt float %496, 0.000000e+00
%518 = fcmp ogt float %497, 0.000000e+00
%.97 = select i1 %516, float %504, float %513
%temp64.1 = select i1 %517, float %505, float %514
%.98 = select i1 %518, float %506, float %515
%519 = fadd float %54, %57
%520 = fadd float %55, %58
%521 = fadd float %56, %59
%522 = fmul float %519, 5.000000e-01
%523 = fmul float %520, 5.000000e-01
%524 = fmul float %521, 5.000000e-01
%525 = call float @llvm.minnum.f32(float %.97, float %temp64.1)
%526 = call float @llvm.minnum.f32(float %525, float %.98)
%527 = fsub float %522, %60
%528 = fsub float %523, %61
%529 = fsub float %524, %62
%530 = fadd float %527, %129
%531 = fadd float %528, %130
%532 = fadd float %529, %131
%533 = fmul float %495, %526
%534 = fadd float %533, %530
%535 = fmul float %496, %526
%536 = fadd float %535, %531
%537 = fmul float %497, %526
%538 = fadd float %537, %532
%539 = fsub float %534, %522
%540 = fsub float %536, %523
%541 = fsub float %538, %524
br label %ENDIF84
ENDIF84: ; preds = %IF82, %IF85
%temp44.0 = phi float [ %539, %IF85 ], [ %253, %IF82 ]
%temp45.0 = phi float [ %540, %IF85 ], [ %254, %IF82 ]
%temp46.0 = phi float [ %541, %IF85 ], [ %255, %IF82 ]
%542 = fsub float 1.000000e+00, %81
%543 = call float @llvm.pow.f32(float %542, float 7.500000e-01)
%544 = fmul float %543, 7.000000e+00
%545 = insertelement <4 x float> undef, float %temp44.0, i32 0
%546 = insertelement <4 x float> %545, float %temp45.0, i32 1
%547 = insertelement <4 x float> %546, float %temp46.0, i32 2
%548 = insertelement <4 x float> %547, float %544, i32 3
%549 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %548)
%550 = extractelement <4 x float> %549, i32 0
%551 = extractelement <4 x float> %549, i32 1
%552 = extractelement <4 x float> %549, i32 2
%553 = extractelement <4 x float> %549, i32 3
%554 = call float @llvm.fabs.f32(float %552)
%555 = fdiv float 1.000000e+00, %554
%556 = fmul float %550, %555
%557 = fadd float %556, 1.500000e+00
%558 = fmul float %551, %555
%559 = fadd float %558, 1.500000e+00
%560 = bitcast float %559 to i32
%561 = bitcast float %557 to i32
%562 = bitcast float %553 to i32
%563 = bitcast float %544 to i32
%564 = insertelement <4 x i32> undef, i32 %560, i32 0
%565 = insertelement <4 x i32> %564, i32 %561, i32 1
%566 = insertelement <4 x i32> %565, i32 %562, i32 2
%567 = insertelement <4 x i32> %566, i32 %563, i32 3
%568 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %567, <32 x i8> %89, <16 x i8> %92, i32 4)
%569 = extractelement <4 x float> %568, i32 0
%570 = extractelement <4 x float> %568, i32 1
%571 = extractelement <4 x float> %568, i32 2
%572 = extractelement <4 x float> %568, i32 3
%573 = call float @llvm.pow.f32(float %572, float %65)
%574 = fmul float %64, %573
%575 = fmul float %574, %569
%576 = fmul float %574, %570
%577 = fmul float %574, %571
%578 = call float @llvm.AMDGPU.lrp(float %50, float %346, float %575)
%579 = call float @llvm.AMDGPU.lrp(float %50, float %347, float %576)
%580 = call float @llvm.AMDGPU.lrp(float %50, float %348, float %577)
br label %ENDIF81
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000
v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001
v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100
v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500
v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501
v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600
v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601
v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800
v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801
v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900
v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901
v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00
v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01
v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00
v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01
v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00
v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01
v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00
v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01
v_interp_p1_f32 v8, v0, 0, 4, [m0] ; C8201000
v_interp_p2_f32 v8, [v8], v1, 0, 4, [m0] ; C8211001
v_interp_p1_f32 v9, v0, 1, 4, [m0] ; C8241100
v_interp_p2_f32 v9, [v9], v1, 1, 4, [m0] ; C8251101
v_interp_p1_f32 v10, v0, 2, 4, [m0] ; C8281200
v_interp_p2_f32 v10, [v10], v1, 2, 4, [m0] ; C8291201
v_interp_p1_f32 v3, v0, 0, 5, [m0] ; C80C1400
v_interp_p2_f32 v3, [v3], v1, 0, 5, [m0] ; C80D1401
v_interp_p1_f32 v18, v0, 1, 5, [m0] ; C8481500
v_interp_p2_f32 v18, [v18], v1, 1, 5, [m0] ; C8491501
v_interp_p1_f32 v19, v0, 2, 5, [m0] ; C84C1600
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p2_f32 v19, [v19], v1, 2, 5, [m0] ; C84D1601
v_interp_p1_f32 v20, v0, 3, 5, [m0] ; C8501700
v_interp_p2_f32 v20, [v20], v1, 3, 5, [m0] ; C8511701
s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C
s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718
v_interp_p1_f32 v21, v0, 0, 6, [m0] ; C8541800
v_interp_p2_f32 v21, [v21], v1, 0, 6, [m0] ; C8551801
v_interp_p1_f32 v17, v0, 1, 6, [m0] ; C8441900
v_interp_p2_f32 v17, [v17], v1, 1, 6, [m0] ; C8451901
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[8:11], 0x58 ; C2060958
v_interp_p1_f32 v22, v0, 2, 6, [m0] ; C8581A00
v_interp_p2_f32 v22, [v22], v1, 2, 6, [m0] ; C8591A01
s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510
s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[20:27], s[0:3] ; F0800A00 0005000D
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
s_buffer_load_dword s21, s[8:11], 0x5c ; C20A895C
s_buffer_load_dword s0, s[8:11], 0x60 ; C2000960
v_mul_f32_e32 v1, s12, v1 ; 1002020C
v_mul_f32_e32 v0, s12, v0 ; 1000000C
v_mul_f32_e32 v2, v2, v1 ; 10040302
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v4, v4, v1 ; 10080304
v_mac_f32_e32 v4, v7, v0 ; 3E080107
v_mul_f32_e32 v7, v5, v1 ; 100E0305
v_mac_f32_e32 v7, v11, v0 ; 3E0E010B
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v2, v12, v0 ; 3E04010C
v_mac_f32_e32 v4, v15, v0 ; 3E08010F
v_mac_f32_e32 v7, v16, v0 ; 3E0E0110
v_mul_f32_e32 v0, v2, v2 ; 10000502
v_mac_f32_e32 v0, v4, v4 ; 3E000904
v_mac_f32_e32 v0, v7, v7 ; 3E000F07
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v18, v18 ; 10022512
v_mac_f32_e32 v1, v19, v19 ; 3E022713
v_mac_f32_e32 v1, v20, v20 ; 3E022914
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v6, v0, v2 ; 100C0500
v_mul_f32_e32 v5, v0, v4 ; 100A0900
v_mul_f32_e32 v4, v0, v7 ; 10080F00
v_mul_f32_e32 v12, v1, v18 ; 10182501
v_mul_f32_e32 v11, v1, v19 ; 10162701
v_mul_f32_e32 v0, v12, v6 ; 10000D0C
v_mac_f32_e32 v0, v11, v5 ; 3E000B0B
v_mul_f32_e32 v7, v1, v20 ; 100E2901
v_mac_f32_e32 v0, v7, v4 ; 3E000907
v_mul_f32_e32 v2, v6, v0 ; 10040106
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v15, v5, v0 ; 101E0105
s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508
s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710
v_mac_f32_e32 v15, v5, v0 ; 3E1E0105
v_mad_f32 v23, v18, v1, -v2 ; D2820017 840A0312
v_mad_f32 v24, v19, v1, -v15 ; D2820018 843E0313
s_buffer_load_dword s1, s[8:11], 0x4c ; C200894C
s_buffer_load_dword s2, s[8:11], 0x4d ; C201094D
s_buffer_load_dword s3, s[8:11], 0x4e ; C201894E
v_mul_f32_e32 v2, v4, v0 ; 10040104
v_mac_f32_e32 v2, v4, v0 ; 3E040104
v_mad_f32 v25, v20, v1, -v2 ; D2820019 840A0314
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[24:31], s[12:15] ; F0800700 0066000D
s_buffer_load_dword s13, s[8:11], 0x40 ; C2068940
s_buffer_load_dword s14, s[8:11], 0x41 ; C2070941
s_buffer_load_dword s15, s[8:11], 0x42 ; C2078942
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v18, s1, v0 ; 10240001
v_mul_f32_e32 v19, s2, v1 ; 10260202
v_mul_f32_e32 v20, s3, v2 ; 10280403
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s1, s[8:11], 0x2b ; C200892B
s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C
s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D
v_sub_f32_e64 v0, 1.0, s21 ; D2080000 00002AF2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v2, s13, v0 ; 1004000D
v_mul_f32_e32 v1, s14, v0 ; 1002000E
v_mul_f32_e32 v0, s15, v0 ; 1000000F
v_mac_f32_e32 v2, s21, v18 ; 3E042415
v_mov_b32_e32 v26, v23 ; 7E340317
v_mac_f32_e32 v1, s21, v19 ; 3E022615
v_mov_b32_e32 v27, v24 ; 7E360318
v_mac_f32_e32 v0, s21, v20 ; 3E002815
v_mov_b32_e32 v28, v25 ; 7E380319
v_cmp_lt_f32_e64 s[2:3], 0, s1 ; D0020002 00000280
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[16:19] ; F0800F00 00880D0D
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[22:23], s[2:3] ; BE962402
s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925
v_mul_f32_e32 v13, v23, v23 ; 101A2F17
v_mac_f32_e32 v13, v24, v24 ; 3E1A3118
v_mac_f32_e32 v13, v25, v25 ; 3E1A3319
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926
s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928
s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929
s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A
v_mul_f32_e32 v15, v13, v23 ; 101E2F0D
v_mul_f32_e32 v16, v13, v24 ; 1020310D
v_mul_f32_e32 v13, v13, v25 ; 101A330D
v_rcp_f32_e32 v26, v15 ; 7E34550F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v27, s1, v21 ; 08362A01
v_sub_f32_e32 v28, s2, v17 ; 08382202
v_rcp_f32_e32 v29, v16 ; 7E3A5510
v_mul_f32_e32 v27, v26, v27 ; 1036371A
v_sub_f32_e32 v30, s13, v21 ; 083C2A0D
v_mul_f32_e32 v26, v26, v30 ; 10343D1A
v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80
v_cndmask_b32_e32 v26, v26, v27 ; 0034371A
v_rcp_f32_e32 v27, v13 ; 7E36550D
v_mul_f32_e32 v28, v29, v28 ; 1038391D
v_sub_f32_e32 v30, s14, v17 ; 083C220E
v_mul_f32_e32 v29, v29, v30 ; 103A3D1D
v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080
v_cndmask_b32_e32 v28, v29, v28 ; 0038391D
v_sub_f32_e32 v29, s3, v22 ; 083A2C03
v_mul_f32_e32 v29, v27, v29 ; 103A3B1B
v_sub_f32_e32 v30, s15, v22 ; 083C2C0F
v_mul_f32_e32 v27, v27, v30 ; 10363D1B
v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80
v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B
v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A
v_mov_b32_e32 v27, s13 ; 7E36020D
v_add_f32_e32 v27, s1, v27 ; 06363601
v_mov_b32_e32 v28, s14 ; 7E38020E
v_add_f32_e32 v28, s2, v28 ; 06383802
v_mov_b32_e32 v29, s15 ; 7E3A020F
v_add_f32_e32 v29, s3, v29 ; 063A3A03
v_mad_f32 v30, 0.5, v27, -s16 ; D282001E 804236F0
v_add_f32_e32 v30, v21, v30 ; 063C3D15
v_mac_f32_e32 v30, v26, v15 ; 3E3C1F1A
v_mad_f32 v15, 0.5, v28, -s17 ; D282000F 804638F0
v_add_f32_e32 v15, v17, v15 ; 061E1F11
v_mac_f32_e32 v15, v26, v16 ; 3E1E211A
v_mad_f32 v16, 0.5, v29, -s18 ; D2820010 804A3AF0
v_add_f32_e32 v16, v22, v16 ; 06202116
v_mac_f32_e32 v16, v26, v13 ; 3E201B1A
v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0
v_mad_f32 v27, 0.5, -v28, v15 ; D282001B 443E38F0
v_mad_f32 v28, 0.5, -v29, v16 ; D282001C 44423AF0
s_or_b64 exec, exec, s[22:23] ; 88FE167E
s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917
s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943
s_buffer_load_dword s13, s[8:11], 0x68 ; C2068968
s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900
s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901
s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902
s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904
s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905
s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906
s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907
s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908
s_buffer_load_dword s22, s[8:11], 0x9 ; C20B0909
s_buffer_load_dword s23, s[8:11], 0xa ; C20B890A
s_buffer_load_dword s24, s[8:11], 0xb ; C20C090B
s_buffer_load_dword s25, s[8:11], 0xc ; C20C890C
s_buffer_load_dword s26, s[8:11], 0xd ; C20D090D
s_buffer_load_dword s27, s[8:11], 0xe ; C20D890E
s_buffer_load_dword s28, s[8:11], 0xf ; C20E090F
v_sub_f32_e64 v13, 1.0, s0 ; D208000D 000000F2
v_log_f32_e32 v13, v13 ; 7E1A4F0D
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_mul_legacy_f32_e32 v13, 0x3f400000, v13 ; 0E1A1AFF 3F400000
v_exp_f32_e32 v13, v13 ; 7E1A4B0D
v_mul_f32_e32 v29, 0x40e00000, v13 ; 103A1AFF 40E00000
v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A
v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A
v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A
v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A
v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000
v_rcp_f32_e64 v13, |v32| ; D354010D 00000120
v_mad_f32 v27, v13, v30, v26 ; D282001B 046A3D0D
v_mac_f32_e32 v26, v13, v31 ; 3E343F0D
v_mov_b32_e32 v28, v33 ; 7E380321
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v13, v29 ; 7E1A4F1D
v_mul_legacy_f32_e32 v13, s30, v13 ; 0E1A1A1E
v_exp_f32_e32 v13, v13 ; 7E1A4B0D
v_mul_f32_e32 v13, s29, v13 ; 101A1A1D
v_mul_f32_e32 v16, v26, v13 ; 10201B1A
v_mul_f32_e32 v15, v27, v13 ; 101E1B1B
v_mul_f32_e32 v13, v28, v13 ; 101A1B1C
v_mov_b32_e32 v27, s21 ; 7E360215
v_mov_b32_e32 v26, 0x3f7fff58 ; 7E3402FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v26 ; 7C02340C
s_and_saveexec_b64 s[30:31], vcc ; BE9E246A
s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B
s_buffer_load_dword s21, s[8:11], 0x3c ; C20A893C
s_buffer_load_dword s29, s[8:11], 0x3d ; C20E893D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080
s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936
s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938
s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939
s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A
s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930
s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931
s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932
s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934
s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935
v_mul_f32_e32 v26, v23, v23 ; 10342F17
v_mac_f32_e32 v26, v24, v24 ; 3E343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v28, s34, v22 ; 08382C22
v_mov_b32_e32 v29, s34 ; 7E3A0222
v_sub_f32_e32 v30, s38, v21 ; 083C2A26
v_sub_f32_e32 v31, s39, v17 ; 083E2227
v_add_f32_e32 v29, s40, v29 ; 063A3A28
v_sub_f32_e32 v32, s40, v22 ; 08402C28
v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0
v_add_f32_e32 v22, v22, v33 ; 062C4316
v_mul_f32_e32 v23, v26, v23 ; 102E2F1A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_rcp_f32_e32 v26, v23 ; 7E345517
v_rcp_f32_e32 v33, v24 ; 7E425518
v_rcp_f32_e32 v34, v25 ; 7E445519
v_sub_f32_e32 v35, s41, v21 ; 08462A29
v_mov_b32_e32 v36, s41 ; 7E480229
v_add_f32_e32 v36, s38, v36 ; 06484826
v_mul_f32_e32 v30, v26, v30 ; 103C3D1A
v_mul_f32_e32 v26, v26, v35 ; 1034471A
v_mul_f32_e32 v31, v33, v31 ; 103E3F21
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v28, v34, v28 ; 10383922
v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0
v_add_f32_e32 v21, v21, v34 ; 062A4515
v_sub_f32_e32 v34, s42, v17 ; 0844222A
v_mov_b32_e32 v35, s42 ; 7E46022A
v_mul_f32_e32 v33, v33, v34 ; 10424521
v_add_f32_e32 v34, s39, v35 ; 06444627
v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80
v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A
v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080
v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21
v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280
v_cndmask_b32_e32 v28, v28, v32 ; 0038411C
v_min3_f32 v26, v26, v30, v28 ; D2A2001A 04723D1A
v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0
v_add_f32_e32 v17, v17, v28 ; 06223911
v_mac_f32_e32 v21, v26, v23 ; 3E2A2F1A
v_mac_f32_e32 v17, v26, v24 ; 3E22311A
v_mac_f32_e32 v22, v26, v25 ; 3E2C331A
v_mad_f32 v23, 0.5, -v36, v21 ; D2820017 445648F0
v_mad_f32 v24, 0.5, -v34, v17 ; D2820018 444644F0
v_mad_f32 v25, 0.5, -v29, v22 ; D2820019 445A3AF0
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2
v_log_f32_e32 v17, v17 ; 7E224F11
s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504
v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v26, 0x40e00000, v17 ; 103422FF 40E00000
v_cubeid_f32 v31, v23, v24, v25 ; D288001F 04663117
v_cubema_f32 v30, v23, v24, v25 ; D28E001E 04663117
s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708
v_cubesc_f32 v29, v23, v24, v25 ; D28A001D 04663117
v_cubetc_f32 v28, v23, v24, v25 ; D28C001C 04663117
v_rcp_f32_e64 v17, |v30| ; D3540111 0000011E
v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000
v_mad_f32 v24, v17, v28, v23 ; D2820018 045E3911
v_mac_f32_e32 v23, v17, v29 ; 3E2E3B11
v_mov_b32_e32 v25, v31 ; 7E32031F
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091517
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v17, v24 ; 7E224F18
v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2
v_mul_legacy_f32_e32 v17, s29, v17 ; 0E22221D
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v17, s21, v17 ; 10222215
v_mul_f32_e32 v21, v21, v17 ; 102A2315
v_mul_f32_e32 v22, v22, v17 ; 102C2316
v_mul_f32_e32 v17, v23, v17 ; 10222317
v_mul_f32_e32 v21, v21, v24 ; 102A3115
v_mul_f32_e32 v22, v22, v24 ; 102C3116
v_mul_f32_e32 v17, v17, v24 ; 10223111
v_mac_f32_e32 v21, s12, v16 ; 3E2A200C
v_mac_f32_e32 v22, s12, v15 ; 3E2C1E0C
v_mac_f32_e32 v17, s12, v13 ; 3E221A0C
v_mov_b32_e32 v13, v17 ; 7E1A0311
v_mov_b32_e32 v15, v22 ; 7E1E0316
v_mov_b32_e32 v16, v21 ; 7E200315
s_or_b64 exec, exec, s[30:31] ; 88FE1E7E
v_mad_f32 v22, -v27, s15, s15 ; D2820016 203C1F1B
v_mov_b32_e32 v17, s14 ; 7E22020E
v_mul_f32_e32 v21, v22, v18 ; 102A2516
v_mul_f32_e32 v19, v22, v19 ; 10262716
v_mul_f32_e32 v18, v22, v20 ; 10242916
v_mul_f32_e32 v20, s17, v5 ; 10280A11
v_mac_f32_e32 v20, s16, v6 ; 3E280C10
v_mac_f32_e32 v20, s18, v4 ; 3E280812
v_add_f32_e32 v20, s20, v20 ; 06282814
v_add_f32_e32 v23, v20, v8 ; 062E1114
v_mul_f32_e32 v8, s22, v5 ; 10100A16
v_mac_f32_e32 v8, s19, v6 ; 3E100C13
v_mac_f32_e32 v8, s23, v4 ; 3E100817
v_add_f32_e32 v8, s24, v8 ; 06101018
v_add_f32_e32 v9, v8, v9 ; 06121308
v_mul_f32_e32 v8, s26, v5 ; 10100A1A
v_mac_f32_e32 v8, s25, v6 ; 3E100C19
v_mac_f32_e32 v8, s27, v4 ; 3E10081B
v_add_f32_e32 v8, s28, v8 ; 0610101C
v_add_f32_e32 v10, v8, v10 ; 06141508
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916
s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944
s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945
s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946
s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948
s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_sub_f32_e64 v20, 1.0, s13 ; D2080014 00001AF2
v_mac_f32_e32 v20, s13, v14 ; 3E281C0D
v_mul_f32_e32 v8, s1, v6 ; 10100C01
v_mac_f32_e32 v8, s2, v5 ; 3E100A02
v_mac_f32_e32 v8, s3, v4 ; 3E100803
v_max_f32_e32 v8, 0, v8 ; 20101080
v_mul_f32_e32 v14, v20, v23 ; 101C2F14
v_mul_f32_e32 v9, v20, v9 ; 10121314
v_mul_f32_e32 v10, v20, v10 ; 10141514
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v17, s17, v3 ; 3E220611
v_mul_f32_e32 v3, v20, v16 ; 10062114
v_mul_f32_e32 v15, v20, v15 ; 101E1F14
v_mul_f32_e32 v13, v20, v13 ; 101A1B14
v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2
v_add_f32_e32 v16, s0, v16 ; 06202000
v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2
v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080
v_sub_f32_e32 v22, s1, v12 ; 082C1801
v_sub_f32_e32 v23, s2, v11 ; 082E1602
v_mul_f32_e32 v24, v22, v22 ; 10302D16
v_mac_f32_e32 v24, v23, v23 ; 3E302F17
v_sub_f32_e32 v25, s3, v7 ; 08320E03
v_mac_f32_e32 v24, v25, v25 ; 3E303319
v_rsq_clamp_f32_e32 v24, v24 ; 7E305918
v_mul_f32_e32 v22, v24, v22 ; 102C2D18
v_mul_f32_e32 v23, v24, v23 ; 102E2F18
v_mul_f32_e32 v24, v24, v25 ; 10303318
v_mul_f32_e32 v12, v12, v6 ; 10180D0C
v_mad_f32 v11, -v11, v5, -v12 ; D282000B A4320B0B
v_mul_f32_e32 v6, v22, v6 ; 100C0D16
v_mac_f32_e32 v6, v23, v5 ; 3E0C0B17
v_mul_f32_e32 v5, s1, v22 ; 100A2C01
v_mac_f32_e32 v5, s2, v23 ; 3E0A2E02
v_mad_f32 v7, -v7, v4, v11 ; D2820007 242E0907
v_mac_f32_e32 v5, s3, v24 ; 3E0A3003
v_mac_f32_e32 v6, v24, v4 ; 3E0C0918
v_max_f32_e32 v4, 0, v5 ; 20080A80
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
v_mul_f32_e32 v11, v5, v5 ; 10160B05
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_sub_f32_e32 v11, 1.0, v7 ; 08160EF2
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mul_f32_e32 v22, v11, v12 ; 102C190B
v_mad_f32 v23, -v12, v22, 1.0 ; D2820017 23CA2D0C
v_mul_f32_e32 v24, v2, v23 ; 10302F02
v_sub_f32_e32 v25, 1.0, v2 ; 083204F2
v_mac_f32_e32 v2, v5, v25 ; 3E043305
v_mul_f32_e32 v25, v1, v23 ; 10322F01
v_sub_f32_e32 v26, 1.0, v1 ; 083402F2
v_mac_f32_e32 v1, v5, v26 ; 3E023505
v_mul_f32_e32 v23, v0, v23 ; 102E2F00
v_sub_f32_e32 v26, 1.0, v0 ; 083400F2
v_mac_f32_e32 v0, v5, v26 ; 3E003505
v_sub_f32_e32 v5, 1.0, v20 ; 080A28F2
v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F
v_madmk_f32_e32 v5, v5, v26, 0x3f77ced9 ; 400A3505 3F77CED9
v_add_f32_e32 v26, v4, v4 ; 06340904
v_mul_f32_e32 v4, v20, v4 ; 10080914
v_mad_f32 v4, v26, v4, 0.5 ; D2820004 03C2091A
v_mul_f32_e32 v12, v22, v12 ; 10181916
v_mac_f32_e32 v24, v16, v12 ; 3E301910
v_mac_f32_e32 v25, v16, v12 ; 3E321910
v_mac_f32_e32 v23, v16, v12 ; 3E2E1910
v_mul_f32_e32 v16, v20, v20 ; 10202914
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_mul_f32_e32 v16, s8, v16 ; 10202008
v_mul_f32_e32 v11, v16, v11 ; 10161710
v_mac_f32_e32 v11, 1.0, v7 ; 3E160EF2
v_rcp_f32_e32 v5, v5 ; 7E0A5505
v_sub_f32_e32 v7, 1.0, v8 ; 080E10F2
v_mul_f32_e32 v16, v16, v7 ; 10200F10
v_mac_f32_e32 v16, 1.0, v8 ; 3E2010F2
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_log_f32_e32 v6, v6 ; 7E0C4F06
v_madak_f32_e32 v11, v16, v11, 0x38d1b717 ; 42161710 38D1B717
v_mul_f32_e32 v5, 0x41200000, v5 ; 100A0AFF 41200000
v_mul_f32_e32 v16, v5, v5 ; 10200B05
v_mul_legacy_f32_e32 v6, v16, v6 ; 0E0C0D10
v_rcp_f32_e32 v11, v11 ; 7E16550B
v_mad_f32 v5, v5, v5, 1.0 ; D2820005 03CA0B05
v_mul_f32_e32 v5, s16, v5 ; 100A0A10
v_exp_f32_e32 v6, v6 ; 7E0C4B06
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mul_f32_e32 v5, v8, v5 ; 100A0B08
v_mul_f32_e32 v5, s15, v5 ; 100A0A0F
v_mul_f32_e32 v6, v7, v7 ; 100C0F07
v_mul_f32_e32 v7, v7, v6 ; 100E0D07
v_mul_f32_e32 v6, v7, v6 ; 100C0D07
v_add_f32_e32 v4, -1.0, v4 ; 060808F3
v_mad_f32 v6, v4, v6, 1.0 ; D2820006 03CA0D04
v_mad_f32 v4, v4, v12, 1.0 ; D2820004 03CA1904
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v4, v8, v4 ; 10080908
v_mac_f32_e32 v14, s14, v4 ; 3E1C080E
v_mul_f32_e32 v6, v14, v21 ; 100C2B0E
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_mul_f32_e32 v7, s14, v5 ; 100E0A0E
v_mac_f32_e32 v6, v2, v7 ; 3E0C0F02
v_mac_f32_e32 v9, s7, v4 ; 3E120807
v_mac_f32_e32 v10, s12, v4 ; 3E14080C
v_mul_f32_e32 v2, s7, v5 ; 10040A07
v_mul_f32_e32 v4, s12, v5 ; 10080A0C
v_mul_f32_e32 v5, v9, v19 ; 100A2709
v_mul_f32_e32 v7, v10, v18 ; 100E250A
v_mac_f32_e32 v5, v1, v2 ; 3E0A0501
v_mac_f32_e32 v7, v0, v4 ; 3E0E0900
v_mac_f32_e32 v6, v24, v3 ; 3E0C0718
v_mac_f32_e32 v5, v25, v15 ; 3E0A1F19
v_mac_f32_e32 v7, v23, v13 ; 3E0E1B17
v_add_f32_e64 v0, 0, v17 clamp ; D2060800 00022280
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s6, v1 ; 10040206
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v3, s5, v1 ; 10060205
v_mac_f32_e32 v3, v5, v0 ; 3E060105
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v7, v0 ; 3E020107
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 40
Code Size: 2272 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww
9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[6].xxxx
18: MOV TEMP[3].y, CONST[7].xxxx
19: MOV TEMP[3].z, CONST[8].xxxx
20: MOV TEMP[4].x, CONST[6].yyyy
21: MOV TEMP[4].y, CONST[7].yyyy
22: MOV TEMP[4].z, CONST[8].yyyy
23: MOV TEMP[5].x, CONST[6].zzzz
24: MOV TEMP[5].y, CONST[7].zzzz
25: MOV TEMP[5].z, CONST[8].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[3].xyz, TEMP[3].xyzx
44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww
45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz
46: MOV TEMP[4].w, TEMP[6].xxxx
47: MOV TEMP[5].w, TEMP[6].yyyy
48: MOV TEMP[3].w, TEMP[6].zzzz
49: MUL TEMP[6], CONST[2], IN[0].xxxx
50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6]
51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6]
52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6]
53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[0].yzw, TEMP[0].yxyz
55: MUL TEMP[7], CONST[13], TEMP[6].xxxx
56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7]
57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7]
58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7]
59: MOV TEMP[6].xyz, TEMP[6].xyzx
60: MOV TEMP[0].x, TEMP[1].zzzz
61: MOV OUT[1], TEMP[2]
62: MOV OUT[3], TEMP[5]
63: MOV OUT[2], TEMP[4]
64: MOV OUT[4], TEMP[3]
65: MOV OUT[0], TEMP[1]
66: MOV OUT[6], TEMP[6]
67: MOV OUT[5], TEMP[0]
68: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0
%84 = add i32 %5, %7
%85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84)
%86 = extractelement <4 x float> %85, i32 0
%87 = extractelement <4 x float> %85, i32 1
%88 = extractelement <4 x float> %85, i32 2
%89 = extractelement <4 x float> %85, i32 3
%90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = add i32 %5, %7
%93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92)
%94 = extractelement <4 x float> %93, i32 0
%95 = extractelement <4 x float> %93, i32 1
%96 = extractelement <4 x float> %93, i32 2
%97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = add i32 %5, %7
%100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99)
%101 = extractelement <4 x float> %100, i32 0
%102 = extractelement <4 x float> %100, i32 1
%103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = add i32 %5, %7
%106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105)
%107 = extractelement <4 x float> %106, i32 0
%108 = extractelement <4 x float> %106, i32 1
%109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = add i32 %5, %7
%112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111)
%113 = extractelement <4 x float> %112, i32 0
%114 = extractelement <4 x float> %112, i32 1
%115 = extractelement <4 x float> %112, i32 2
%116 = extractelement <4 x float> %112, i32 3
%117 = fmul float %20, %86
%118 = fmul float %21, %86
%119 = fmul float %22, %86
%120 = fmul float %24, %87
%121 = fadd float %120, %117
%122 = fmul float %25, %87
%123 = fadd float %122, %118
%124 = fmul float %26, %87
%125 = fadd float %124, %119
%126 = fmul float %28, %88
%127 = fadd float %126, %121
%128 = fmul float %29, %88
%129 = fadd float %128, %123
%130 = fmul float %30, %88
%131 = fadd float %130, %125
%132 = fmul float %32, %89
%133 = fadd float %132, %127
%134 = fmul float %33, %89
%135 = fadd float %134, %129
%136 = fmul float %34, %89
%137 = fadd float %136, %131
%138 = fmul float %66, %86
%139 = fmul float %67, %86
%140 = fmul float %68, %86
%141 = fmul float %69, %86
%142 = fmul float %70, %87
%143 = fadd float %142, %138
%144 = fmul float %71, %87
%145 = fadd float %144, %139
%146 = fmul float %72, %87
%147 = fadd float %146, %140
%148 = fmul float %73, %87
%149 = fadd float %148, %141
%150 = fmul float %74, %88
%151 = fadd float %150, %143
%152 = fmul float %75, %88
%153 = fadd float %152, %145
%154 = fmul float %76, %88
%155 = fadd float %154, %147
%156 = fmul float %77, %88
%157 = fadd float %156, %149
%158 = fmul float %78, %89
%159 = fadd float %158, %151
%160 = fmul float %79, %89
%161 = fadd float %160, %153
%162 = fmul float %80, %89
%163 = fadd float %162, %155
%164 = fmul float %81, %89
%165 = fadd float %164, %157
%166 = fmul float %101, %45
%167 = fadd float %166, %47
%168 = fmul float %102, %46
%169 = fadd float %168, %48
%170 = fcmp oeq float %53, 0.000000e+00
%. = select i1 %170, float %101, float %107
%.32 = select i1 %170, float %102, float %108
%171 = fmul float %., %49
%172 = fadd float %171, %51
%173 = fmul float %.32, %50
%174 = fadd float %173, %52
%175 = fmul float %36, %94
%176 = fmul float %39, %94
%177 = fmul float %42, %94
%178 = fmul float %37, %95
%179 = fadd float %178, %175
%180 = fmul float %40, %95
%181 = fadd float %180, %176
%182 = fmul float %43, %95
%183 = fadd float %182, %177
%184 = fmul float %38, %96
%185 = fadd float %184, %179
%186 = fmul float %41, %96
%187 = fadd float %186, %181
%188 = fmul float %44, %96
%189 = fadd float %188, %183
%190 = fmul float %185, %185
%191 = fmul float %187, %187
%192 = fadd float %191, %190
%193 = fmul float %189, %189
%194 = fadd float %192, %193
%195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194)
%196 = fmul float %185, %195
%197 = fmul float %187, %195
%198 = fmul float %189, %195
%199 = fmul float %20, %113
%200 = fmul float %21, %113
%201 = fmul float %22, %113
%202 = fmul float %24, %114
%203 = fadd float %202, %199
%204 = fmul float %25, %114
%205 = fadd float %204, %200
%206 = fmul float %26, %114
%207 = fadd float %206, %201
%208 = fmul float %28, %115
%209 = fadd float %208, %203
%210 = fmul float %29, %115
%211 = fadd float %210, %205
%212 = fmul float %30, %115
%213 = fadd float %212, %207
%214 = fmul float %209, %209
%215 = fmul float %211, %211
%216 = fadd float %215, %214
%217 = fmul float %213, %213
%218 = fadd float %216, %217
%219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218)
%220 = fmul float %209, %219
%221 = fmul float %211, %219
%222 = fmul float %213, %219
%223 = fmul float %198, %221
%224 = fmul float %196, %222
%225 = fmul float %197, %220
%226 = fmul float %197, %222
%227 = fsub float %226, %223
%228 = fmul float %198, %220
%229 = fsub float %228, %224
%230 = fmul float %196, %221
%231 = fsub float %230, %225
%232 = fmul float %227, %116
%233 = fmul float %229, %116
%234 = fmul float %231, %116
%235 = fmul float %133, %19
%236 = fmul float %135, %19
%237 = fmul float %137, %19
%238 = fsub float %16, %235
%239 = fsub float %17, %236
%240 = fsub float %18, %237
%241 = fmul float %20, %86
%242 = fmul float %21, %86
%243 = fmul float %22, %86
%244 = fmul float %23, %86
%245 = fmul float %24, %87
%246 = fadd float %245, %241
%247 = fmul float %25, %87
%248 = fadd float %247, %242
%249 = fmul float %26, %87
%250 = fadd float %249, %243
%251 = fmul float %27, %87
%252 = fadd float %251, %244
%253 = fmul float %28, %88
%254 = fadd float %253, %246
%255 = fmul float %29, %88
%256 = fadd float %255, %248
%257 = fmul float %30, %88
%258 = fadd float %257, %250
%259 = fmul float %31, %88
%260 = fadd float %259, %252
%261 = fmul float %32, %89
%262 = fadd float %261, %254
%263 = fmul float %33, %89
%264 = fadd float %263, %256
%265 = fmul float %34, %89
%266 = fadd float %265, %258
%267 = fmul float %35, %89
%268 = fadd float %267, %260
%269 = fsub float %133, %13
%270 = fsub float %135, %14
%271 = fsub float %137, %15
%272 = fmul float %54, %262
%273 = fmul float %55, %262
%274 = fmul float %56, %262
%275 = fmul float %57, %264
%276 = fadd float %275, %272
%277 = fmul float %58, %264
%278 = fadd float %277, %273
%279 = fmul float %59, %264
%280 = fadd float %279, %274
%281 = fmul float %60, %266
%282 = fadd float %281, %276
%283 = fmul float %61, %266
%284 = fadd float %283, %278
%285 = fmul float %62, %266
%286 = fadd float %285, %280
%287 = fmul float %63, %268
%288 = fadd float %287, %282
%289 = fmul float %64, %268
%290 = fadd float %289, %284
%291 = fmul float %65, %268
%292 = fadd float %291, %286
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130
s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A
s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128
s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134
s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135
s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136
s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
v_cndmask_b32_e32 v0, v10, v8 ; 0000110A
v_cndmask_b32_e32 v10, v11, v9 ; 0014130B
v_mov_b32_e32 v11, s5 ; 7E160205
s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129
s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B
v_mac_f32_e32 v11, s6, v8 ; 3E161006
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119
s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C
s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D
s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120
s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C
s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D
s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E
s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v8, s5 ; 7E100205
s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121
v_mac_f32_e32 v8, s4, v9 ; 3E101204
v_mul_f32_e32 v9, s6, v5 ; 10120A06
v_mac_f32_e32 v9, s11, v6 ; 3E120C0B
s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A
s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E
v_mul_f32_e32 v16, s12, v5 ; 10200A0C
v_mac_f32_e32 v16, s13, v6 ; 3E200C0D
v_mul_f32_e32 v5, s14, v5 ; 100A0A0E
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05
s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144
s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148
s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145
s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149
v_mac_f32_e32 v9, s4, v7 ; 3E120E04
s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146
s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A
s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147
v_mac_f32_e32 v16, s6, v7 ; 3E200E06
v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s5, v1 ; 100C0205
v_mac_f32_e32 v6, s12, v2 ; 3E0C040C
v_mul_f32_e32 v7, s13, v1 ; 100E020D
v_mac_f32_e32 v7, s14, v2 ; 3E0E040E
s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B
v_mul_f32_e32 v17, s4, v1 ; 10220204
v_mac_f32_e32 v17, s19, v2 ; 3E220413
v_mul_f32_e32 v18, s20, v1 ; 10240214
s_buffer_load_dword s4, s[0:3], 0xb ; C202010B
s_buffer_load_dword s6, s[0:3], 0xf ; C203010F
s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C
s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D
s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E
s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v18, s5, v2 ; 3E240405
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s19, s[0:3], 0xd ; C209810D
s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E
v_mul_f32_e32 v19, s4, v1 ; 10260204
s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113
v_mac_f32_e32 v19, s6, v2 ; 3E260406
v_mac_f32_e32 v6, s11, v3 ; 3E0C060B
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
v_mac_f32_e32 v7, s12, v3 ; 3E0E060C
v_mac_f32_e32 v17, s13, v3 ; 3E22060D
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114
v_mac_f32_e32 v18, s14, v3 ; 3E24060E
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v19, s4, v3 ; 3E260604
s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
v_mul_f32_e32 v20, s6, v1 ; 10280206
v_mac_f32_e32 v20, s5, v2 ; 3E280405
s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112
v_mac_f32_e32 v20, s11, v3 ; 3E28060B
v_mac_f32_e32 v20, s12, v4 ; 3E28080C
s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115
v_mul_f32_e32 v21, s13, v1 ; 102A020D
v_mac_f32_e32 v21, s19, v2 ; 3E2A0413
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s4, v3 ; 3E2A0604
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150
s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151
s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152
s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153
s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117
v_mac_f32_e32 v21, s12, v4 ; 3E2A080C
v_mul_f32_e32 v1, s14, v1 ; 1002020E
v_mac_f32_e32 v1, s20, v2 ; 3E020414
v_mac_f32_e32 v1, s21, v3 ; 3E020615
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s22, v4 ; 3E020816
v_mac_f32_e32 v6, s23, v4 ; 3E0C0817
v_mac_f32_e32 v7, s24, v4 ; 3E0E0818
v_mac_f32_e32 v17, s25, v4 ; 3E220819
v_mac_f32_e32 v18, s26, v4 ; 3E24081A
v_mac_f32_e32 v19, s27, v4 ; 3E26081B
v_mov_b32_e32 v2, s17 ; 7E040211
v_mac_f32_e32 v2, s15, v0 ; 3E04000F
v_mov_b32_e32 v0, s18 ; 7E000212
v_mac_f32_e32 v0, s16, v10 ; 3E001410
exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s6, v12 ; 10001806
v_mac_f32_e32 v0, s5, v13 ; 3E001A05
v_mul_f32_e32 v2, s13, v12 ; 1004180D
v_mac_f32_e32 v2, s19, v13 ; 3E041A13
v_mul_f32_e32 v3, s14, v12 ; 1006180E
v_mac_f32_e32 v3, s20, v13 ; 3E061A14
v_mac_f32_e32 v0, s11, v14 ; 3E001C0B
v_mac_f32_e32 v2, s4, v14 ; 3E041C04
v_mac_f32_e32 v3, s21, v14 ; 3E061C15
v_mul_f32_e32 v4, v9, v9 ; 10081309
v_mac_f32_e32 v4, v16, v16 ; 3E082110
v_mul_f32_e32 v8, v0, v0 ; 10100100
v_mac_f32_e32 v8, v2, v2 ; 3E100502
v_mac_f32_e32 v4, v5, v5 ; 3E080B05
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107
v_mac_f32_e32 v8, v3, v3 ; 3E100703
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v4, v9 ; 10121304
v_mul_f32_e32 v10, v4, v16 ; 10142104
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v2, v8, v2 ; 10040508
v_mul_f32_e32 v3, v8, v3 ; 10060708
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v5, s4 ; 7E0A0204
v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14
exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v5, v2, v4 ; 100A0902
v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A
v_mul_f32_e32 v3, v3, v9 ; 10061303
v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104
v_mul_f32_e32 v0, v0, v10 ; 10001500
v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509
v_mul_f32_e32 v2, v15, v5 ; 10040B0F
v_mul_f32_e32 v3, v15, v3 ; 1006070F
v_mul_f32_e32 v0, v15, v0 ; 1000010F
v_mov_b32_e32 v5, s6 ; 7E0A0206
v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15
exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101
s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102
s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140
s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141
s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142
s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139
s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A
s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C
s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D
s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mov_b32_e32 v0, s4 ; 7E000204
v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01
v_subrev_f32_e32 v2, s6, v20 ; 0A042806
v_mul_f32_e32 v3, s7, v20 ; 10062807
v_mul_f32_e32 v5, s8, v20 ; 100A2808
v_mul_f32_e32 v8, s9, v20 ; 10102809
v_mac_f32_e32 v3, s10, v21 ; 3E062A0A
v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10
v_mac_f32_e32 v8, s17, v21 ; 3E102A11
v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v5, s19, v1 ; 3E0A0213
v_mac_f32_e32 v8, s0, v1 ; 3E100200
v_subrev_f32_e32 v1, s12, v1 ; 0A02020C
v_mac_f32_e32 v3, s13, v19 ; 3E06260D
v_mac_f32_e32 v5, s14, v19 ; 3E0A260E
v_mac_f32_e32 v8, s15, v19 ; 3E10260F
exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09
exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211
exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503
exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 24
Code Size: 920 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[6..7]
DCL TEMP[0..13], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 10.0000, 0.9680}
IMM[1] FLT32 { 0.0300, 2.0000, 0.5000, 0.0001}
IMM[2] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000}
0: DP3 TEMP[0].x, IN[3].xyzz, IN[3].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[3].xyzz, TEMP[0].xxxx
3: MOV TEMP[1].xy, IN[0].xyyy
4: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
5: MUL TEMP[1].xyz, CONST[4].xyzz, TEMP[1].xyzz
6: LRP TEMP[2].xyz, CONST[6].xxxx, TEMP[1].xyzz, CONST[1].xyzz
7: MOV TEMP[3].x, IN[1].wwww
8: MOV TEMP[3].y, IN[2].wwww
9: MOV TEMP[3].z, IN[3].wwww
10: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
11: RSQ TEMP[4].x, TEMP[4].xxxx
12: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
13: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz
14: MAX TEMP[4].x, IMM[0].xxxx, TEMP[4].xxxx
15: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz
16: MOV TEMP[5].xy, TEMP[5].xxxx
17: TEX TEMP[5].w, TEMP[5], SAMP[1], 2D
18: MUL TEMP[5].xyz, CONST[2].xyzz, TEMP[5].wwww
19: DP3 TEMP[6].x, IN[4].yzww, IN[4].yzww
20: RSQ TEMP[6].x, TEMP[6].xxxx
21: MUL TEMP[6].xyz, IN[4].yzww, TEMP[6].xxxx
22: MOV TEMP[6].xyz, -TEMP[6].xyzx
23: ADD TEMP[7].x, IMM[0].yyyy, -CONST[7].xxxx
24: ADD TEMP[8].xyz, TEMP[3].xyzz, TEMP[6].xyzz
25: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz
26: RSQ TEMP[9].x, TEMP[9].xxxx
27: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx
28: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[6].xyzz
29: MAX TEMP[6].x, IMM[0].xxxx, TEMP[6].xxxx
30: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[8].xyzz
31: MAX TEMP[3].x, IMM[0].xxxx, TEMP[3].xxxx
32: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx
33: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww
34: ADD TEMP[10].x, IMM[0].yyyy, -TEMP[7].xxxx
35: MAD TEMP[10].x, TEMP[10].xxxx, IMM[0].wwww, IMM[1].xxxx
36: LG2 TEMP[10].x, TEMP[10].xxxx
37: RCP TEMP[10].x, TEMP[10].xxxx
38: MUL TEMP[10].x, IMM[0].zzzz, TEMP[10].xxxx
39: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx
40: ADD TEMP[11].x, IMM[0].yyyy, -TEMP[4].xxxx
41: ADD TEMP[12].x, IMM[0].yyyy, -TEMP[6].xxxx
42: MUL TEMP[13].x, IMM[1].yyyy, TEMP[3].xxxx
43: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].xxxx
44: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].zzzz
45: ADD TEMP[3].x, IMM[0].yyyy, -TEMP[3].xxxx
46: LRP TEMP[13].x, TEMP[4].xxxx, IMM[0].yyyy, TEMP[9].xxxx
47: LRP TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy, TEMP[9].xxxx
48: MAD TEMP[6].x, TEMP[13].xxxx, TEMP[6].xxxx, IMM[1].wwww
49: RCP TEMP[6].x, TEMP[6].xxxx
50: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz
51: MAX TEMP[8].x, IMM[0].xxxx, TEMP[8].xxxx
52: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx
53: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].yyyy
54: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy
55: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
56: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx
57: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx
58: MUL TEMP[6].x, TEMP[6].xxxx, CONST[3].xxxx
59: MAX TEMP[6].x, IMM[0].xxxx, TEMP[6].xxxx
60: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[5].xyzz
61: ADD TEMP[8].xyz, IMM[0].yyyy, -TEMP[2].xyzz
62: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[3].xxxx
63: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].xxxx
64: MUL TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx
65: MUL TEMP[3].x, TEMP[9].xxxx, TEMP[3].xxxx
66: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[3].xxxx, TEMP[2].xyzz
67: MUL TEMP[3].x, CONST[6].xxxx, CONST[1].wwww
68: ADD TEMP[3].x, CONST[1].wwww, -TEMP[3].xxxx
69: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
70: ADD TEMP[3].x, TEMP[7].xxxx, IMM[2].xxxx
71: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx
72: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx
73: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
74: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
75: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[8].xxxx, IMM[0].yyyy
76: ADD TEMP[7].x, TEMP[7].xxxx, IMM[2].xxxx
77: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx
78: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx
79: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
80: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
81: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].yyyy
82: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx
83: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
84: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx
85: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
86: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[2].xyzz, TEMP[1].xyzz
87: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww
88: MOV_SAT TEMP[1].x, TEMP[1].xxxx
89: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
90: MOV TEMP[0].xyz, TEMP[0].xyzx
91: MOV TEMP[0].w, IMM[0].yyyy
92: MOV OUT[0], TEMP[0]
93: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%41 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0
%43 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0
%45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%46 = bitcast <8 x i32> addrspace(2)* %45 to <32 x i8> addrspace(2)*
%47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0
%48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%49 = bitcast <4 x i32> addrspace(2)* %48 to <16 x i8> addrspace(2)*
%50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0
%51 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%52 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%54 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%55 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%56 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%57 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%58 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%59 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%60 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%61 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%66 = fmul float %55, %55
%67 = fmul float %56, %56
%68 = fadd float %67, %66
%69 = fmul float %57, %57
%70 = fadd float %68, %69
%71 = call float @llvm.AMDGPU.rsq.clamped.f32(float %70)
%72 = fmul float %55, %71
%73 = fmul float %56, %71
%74 = fmul float %57, %71
%75 = bitcast float %51 to i32
%76 = bitcast float %52 to i32
%77 = insertelement <2 x i32> undef, i32 %75, i32 0
%78 = insertelement <2 x i32> %77, i32 %76, i32 1
%79 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %78, <32 x i8> %42, <16 x i8> %44, i32 2)
%80 = extractelement <4 x float> %79, i32 0
%81 = extractelement <4 x float> %79, i32 1
%82 = extractelement <4 x float> %79, i32 2
%83 = fmul float %36, %80
%84 = fmul float %37, %81
%85 = fmul float %38, %82
%86 = call float @llvm.AMDGPU.lrp(float %39, float %83, float %26)
%87 = call float @llvm.AMDGPU.lrp(float %39, float %84, float %27)
%88 = call float @llvm.AMDGPU.lrp(float %39, float %85, float %28)
%89 = fmul float %53, %53
%90 = fmul float %54, %54
%91 = fadd float %90, %89
%92 = fmul float %58, %58
%93 = fadd float %91, %92
%94 = call float @llvm.AMDGPU.rsq.clamped.f32(float %93)
%95 = fmul float %53, %94
%96 = fmul float %54, %94
%97 = fmul float %58, %94
%98 = fmul float %72, %95
%99 = fmul float %73, %96
%100 = fadd float %99, %98
%101 = fmul float %74, %97
%102 = fadd float %100, %101
%103 = call float @llvm.maxnum.f32(float %102, float 0.000000e+00)
%104 = fmul float %63, %63
%105 = fmul float %64, %64
%106 = fadd float %105, %104
%107 = fmul float %65, %65
%108 = fadd float %106, %107
%109 = bitcast float %108 to i32
%110 = bitcast float %108 to i32
%111 = insertelement <2 x i32> undef, i32 %109, i32 0
%112 = insertelement <2 x i32> %111, i32 %110, i32 1
%113 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %47, <16 x i8> %50, i32 2)
%114 = extractelement <4 x float> %113, i32 3
%115 = fmul float %30, %114
%116 = fmul float %31, %114
%117 = fmul float %32, %114
%118 = fmul float %60, %60
%119 = fmul float %61, %61
%120 = fadd float %119, %118
%121 = fmul float %62, %62
%122 = fadd float %120, %121
%123 = call float @llvm.AMDGPU.rsq.clamped.f32(float %122)
%124 = fmul float %60, %123
%125 = fmul float %61, %123
%126 = fmul float %62, %123
%127 = fsub float 1.000000e+00, %40
%128 = fsub float %95, %124
%129 = fsub float %96, %125
%130 = fsub float %97, %126
%131 = fmul float %128, %128
%132 = fmul float %129, %129
%133 = fadd float %132, %131
%134 = fmul float %130, %130
%135 = fadd float %133, %134
%136 = call float @llvm.AMDGPU.rsq.clamped.f32(float %135)
%137 = fmul float %128, %136
%138 = fmul float %129, %136
%139 = fmul float %130, %136
%140 = fmul float %124, %72
%141 = fsub float -0.000000e+00, %140
%142 = fmul float %125, %73
%143 = fsub float %141, %142
%144 = fmul float %126, %74
%145 = fsub float %143, %144
%146 = call float @llvm.maxnum.f32(float %145, float 0.000000e+00)
%147 = fmul float %95, %137
%148 = fmul float %96, %138
%149 = fadd float %148, %147
%150 = fmul float %97, %139
%151 = fadd float %149, %150
%152 = call float @llvm.maxnum.f32(float %151, float 0.000000e+00)
%153 = fmul float %127, %127
%154 = fmul float %153, %35
%155 = fsub float 1.000000e+00, %127
%156 = fmul float %155, 0x3FEEF9DB20000000
%157 = fadd float %156, 0x3F9EB851E0000000
%158 = call float @llvm.log2.f32(float %157)
%159 = fdiv float 1.000000e+00, %158
%160 = fmul float %159, 1.000000e+01
%161 = fmul float %160, %160
%162 = fsub float 1.000000e+00, %103
%163 = fsub float 1.000000e+00, %146
%164 = fmul float %152, 2.000000e+00
%165 = fmul float %152, %127
%166 = fmul float %164, %165
%167 = fadd float %166, 5.000000e-01
%168 = fsub float 1.000000e+00, %152
%169 = call float @llvm.AMDGPU.lrp(float %103, float 1.000000e+00, float %154)
%170 = call float @llvm.AMDGPU.lrp(float %146, float 1.000000e+00, float %154)
%171 = fmul float %169, %170
%172 = fadd float %171, 0x3F1A36E2E0000000
%173 = fdiv float 1.000000e+00, %172
%174 = fmul float %72, %137
%175 = fmul float %73, %138
%176 = fadd float %175, %174
%177 = fmul float %74, %139
%178 = fadd float %176, %177
%179 = call float @llvm.maxnum.f32(float %178, float 0.000000e+00)
%180 = call float @llvm.pow.f32(float %179, float %161)
%181 = fadd float %161, 1.000000e+00
%182 = fmul float %181, %34
%183 = fmul float %180, %182
%184 = fmul float %173, %183
%185 = fmul float %184, %103
%186 = fmul float %185, %33
%187 = call float @llvm.maxnum.f32(float %186, float 0.000000e+00)
%188 = fmul float %187, %115
%189 = fmul float %187, %116
%190 = fmul float %187, %117
%191 = fsub float 1.000000e+00, %86
%192 = fsub float 1.000000e+00, %87
%193 = fsub float 1.000000e+00, %88
%194 = fmul float %168, %168
%195 = fmul float %168, %168
%196 = fmul float %195, %168
%197 = fmul float %194, %196
%198 = fmul float %191, %197
%199 = fadd float %198, %86
%200 = fmul float %192, %197
%201 = fadd float %200, %87
%202 = fmul float %193, %197
%203 = fadd float %202, %88
%204 = fmul float %39, %29
%205 = fsub float %29, %204
%206 = fmul float %83, %205
%207 = fmul float %84, %205
%208 = fmul float %85, %205
%209 = fadd float %167, -1.000000e+00
%210 = fmul float %162, %162
%211 = fmul float %162, %162
%212 = fmul float %211, %162
%213 = fmul float %210, %212
%214 = fmul float %209, %213
%215 = fadd float %214, 1.000000e+00
%216 = fadd float %167, -1.000000e+00
%217 = fmul float %163, %163
%218 = fmul float %163, %163
%219 = fmul float %218, %163
%220 = fmul float %217, %219
%221 = fmul float %216, %220
%222 = fadd float %221, 1.000000e+00
%223 = fmul float %215, %222
%224 = fmul float %223, %103
%225 = fmul float %115, %224
%226 = fmul float %116, %224
%227 = fmul float %117, %224
%228 = fmul float %206, %225
%229 = fmul float %207, %226
%230 = fmul float %208, %227
%231 = fmul float %188, %199
%232 = fadd float %231, %228
%233 = fmul float %189, %201
%234 = fadd float %233, %229
%235 = fmul float %190, %203
%236 = fadd float %235, %230
%237 = fmul float %59, %24
%238 = fadd float %237, %25
%239 = call float @llvm.AMDIL.clamp.(float %238, float 0.000000e+00, float 1.000000e+00)
%240 = fmul float %232, %239
%241 = fmul float %234, %239
%242 = fmul float %236, %239
%243 = call i32 @llvm.SI.packf16(float %240, float %241)
%244 = bitcast i32 %243 to float
%245 = call i32 @llvm.SI.packf16(float %242, float 1.000000e+00)
%246 = bitcast i32 %245 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %244, float %246, float %244, float %246)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 1, [m0] ; C8100700
v_interp_p2_f32 v4, [v4], v1, 3, 1, [m0] ; C8110701
v_interp_p1_f32 v5, v0, 3, 2, [m0] ; C8140B00
v_interp_p2_f32 v5, [v5], v1, 3, 2, [m0] ; C8150B01
v_interp_p1_f32 v6, v0, 0, 3, [m0] ; C8180C00
v_interp_p2_f32 v6, [v6], v1, 0, 3, [m0] ; C8190C01
v_interp_p1_f32 v7, v0, 1, 3, [m0] ; C81C0D00
v_interp_p2_f32 v7, [v7], v1, 1, 3, [m0] ; C81D0D01
v_interp_p1_f32 v8, v0, 2, 3, [m0] ; C8200E00
v_interp_p2_f32 v8, [v8], v1, 2, 3, [m0] ; C8210E01
v_interp_p1_f32 v9, v0, 3, 3, [m0] ; C8240F00
v_interp_p2_f32 v9, [v9], v1, 3, 3, [m0] ; C8250F01
v_interp_p1_f32 v10, v0, 0, 4, [m0] ; C8281000
v_interp_p2_f32 v10, [v10], v1, 0, 4, [m0] ; C8291001
v_interp_p1_f32 v11, v0, 1, 4, [m0] ; C82C1100
v_interp_p2_f32 v11, [v11], v1, 1, 4, [m0] ; C82D1101
v_interp_p1_f32 v12, v0, 2, 4, [m0] ; C8301200
v_interp_p2_f32 v12, [v12], v1, 2, 4, [m0] ; C8311201
v_interp_p1_f32 v13, v0, 3, 4, [m0] ; C8341300
v_interp_p2_f32 v13, [v13], v1, 3, 4, [m0] ; C8351301
v_interp_p1_f32 v14, v0, 0, 5, [m0] ; C8381400
v_interp_p2_f32 v14, [v14], v1, 0, 5, [m0] ; C8391401
s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500
s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504
s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700
s_load_dwordx8 s[12:19], s[6:7], 0x8 ; C0C60708
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v15, v0, 1, 5, [m0] ; C83C1500
v_interp_p2_f32 v15, [v15], v1, 1, 5, [m0] ; C83D1501
v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600
v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601
v_mul_f32_e32 v16, v14, v14 ; 10201D0E
v_mac_f32_e32 v16, v15, v15 ; 3E201F0F
v_mac_f32_e32 v16, v0, v0 ; 3E200100
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[20:23] ; F0800700 00A60002
v_mov_b32_e32 v17, v16 ; 7E220310
image_sample v3, 8, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[12:19], s[8:11] ; F0800800 00430310
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0xd ; C204810D
s_buffer_load_dword s10, s[0:3], 0xf ; C205010F
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s12, s[0:3], 0x11 ; C2060111
s_buffer_load_dword s13, s[0:3], 0x12 ; C2068112
s_buffer_load_dword s14, s[0:3], 0x7 ; C2070107
s_buffer_load_dword s15, s[0:3], 0x8 ; C2078108
s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109
s_buffer_load_dword s17, s[0:3], 0xa ; C208810A
s_buffer_load_dword s18, s[0:3], 0xc ; C209010C
s_buffer_load_dword s19, s[0:3], 0x18 ; C2098118
s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s11, v0 ; 1000000B
v_mul_f32_e32 v1, s12, v1 ; 1002020C
v_mul_f32_e32 v2, s13, v2 ; 1004040D
v_mul_f32_e32 v14, v6, v6 ; 101C0D06
v_mac_f32_e32 v14, v7, v7 ; 3E1C0F07
v_mac_f32_e32 v14, v8, v8 ; 3E1C1108
v_rsq_clamp_f32_e32 v14, v14 ; 7E1C590E
v_mul_f32_e32 v15, v11, v11 ; 101E170B
v_mac_f32_e32 v15, v12, v12 ; 3E1E190C
v_mac_f32_e32 v15, v13, v13 ; 3E1E1B0D
v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F
v_mul_f32_e32 v16, v4, v4 ; 10200904
v_mac_f32_e32 v16, v5, v5 ; 3E200B05
v_mac_f32_e32 v16, v9, v9 ; 3E201309
v_rsq_clamp_f32_e32 v16, v16 ; 7E205910
v_mul_f32_e32 v11, v15, v11 ; 1016170F
v_mul_f32_e32 v12, v15, v12 ; 1018190F
v_mul_f32_e32 v13, v15, v13 ; 101A1B0F
v_mul_f32_e32 v6, v14, v6 ; 100C0D0E
v_mul_f32_e32 v7, v14, v7 ; 100E0F0E
v_mul_f32_e32 v15, v16, v4 ; 101E0910
v_mad_f32 v4, v4, v16, -v11 ; D2820004 842E2104
v_mul_f32_e32 v11, v6, v11 ; 10161706
v_mad_f32 v11, -v12, v7, -v11 ; D282000B A42E0F0C
v_mad_f32 v12, v5, v16, -v12 ; D282000C 84322105
v_mul_f32_e32 v17, v4, v4 ; 10220904
v_mac_f32_e32 v17, v12, v12 ; 3E22190C
v_mad_f32 v18, v9, v16, -v13 ; D2820012 84362109
v_mac_f32_e32 v17, v18, v18 ; 3E222512
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
v_mul_f32_e32 v5, v16, v5 ; 100A0B10
v_mul_f32_e32 v19, v15, v6 ; 10260D0F
v_mac_f32_e32 v19, v5, v7 ; 3E260F05
v_mul_f32_e32 v4, v17, v4 ; 10080911
v_mul_f32_e32 v12, v17, v12 ; 10181911
v_mul_f32_e32 v15, v4, v15 ; 101E1F04
v_mac_f32_e32 v15, v12, v5 ; 3E1E0B0C
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mac_f32_e32 v4, v12, v7 ; 3E080F0C
v_mul_f32_e32 v5, v14, v8 ; 100A110E
v_mul_f32_e32 v6, v16, v9 ; 100C1310
v_mul_f32_e32 v7, v17, v18 ; 100E2511
v_mad_f32 v8, -v13, v5, v11 ; D2820008 242E0B0D
v_mac_f32_e32 v19, v6, v5 ; 3E260B06
v_mac_f32_e32 v15, v7, v6 ; 3E1E0D07
v_mac_f32_e32 v4, v7, v5 ; 3E080B07
v_max_f32_e32 v5, 0, v15 ; 200A1E80
v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2
v_mul_f32_e32 v7, v6, v6 ; 100E0D06
v_mul_f32_e32 v6, v6, v7 ; 100C0F06
v_mul_f32_e32 v6, v6, v7 ; 100C0F06
v_sub_f32_e64 v7, 1.0, s19 ; D2080007 000026F2
v_mul_f32_e32 v9, s6, v7 ; 10120E06
v_mad_f32 v11, -v7, s6, 1.0 ; D282000B 23C80D07
v_mul_f32_e32 v12, s7, v7 ; 10180E07
v_mad_f32 v13, -v7, s7, 1.0 ; D282000D 23C80F07
v_mul_f32_e32 v14, s8, v7 ; 101C0E08
v_mad_f32 v7, -v7, s8, 1.0 ; D2820007 23C81107
v_mac_f32_e32 v9, s19, v0 ; 3E120013
v_mad_f32 v11, -s19, v0, v11 ; D282000B 242E0013
v_mac_f32_e32 v9, v6, v11 ; 3E121706
v_mac_f32_e32 v12, s19, v1 ; 3E180213
v_mad_f32 v11, -s19, v1, v13 ; D282000B 24360213
v_mac_f32_e32 v12, v6, v11 ; 3E181706
v_mac_f32_e32 v14, s19, v2 ; 3E1C0413
v_mad_f32 v7, -s19, v2, v7 ; D2820007 241E0413
v_mac_f32_e32 v14, v6, v7 ; 3E1C0F06
v_sub_f32_e64 v6, 1.0, s0 ; D2080006 000000F2
v_mul_f32_e32 v7, v6, v6 ; 100E0D06
v_mul_f32_e32 v7, s10, v7 ; 100E0E0A
v_sub_f32_e32 v11, 1.0, v6 ; 08160CF2
v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F
v_madmk_f32_e32 v11, v11, v13, 0x3f77ced9 ; 40161B0B 3F77CED9
v_mul_f32_e32 v6, v6, v5 ; 100C0B06
v_add_f32_e32 v5, v5, v5 ; 060A0B05
v_mad_f32 v5, v5, v6, 0.5 ; D2820005 03C20D05
v_max_f32_e32 v6, 0, v8 ; 200C1080
v_log_f32_e32 v8, v11 ; 7E104F0B
v_sub_f32_e32 v11, 1.0, v6 ; 08160CF2
v_mul_f32_e32 v13, v7, v11 ; 101A1707
v_mac_f32_e32 v13, 1.0, v6 ; 3E1A0CF2
v_max_f32_e32 v6, 0, v19 ; 200C2680
v_rcp_f32_e32 v8, v8 ; 7E105508
v_sub_f32_e32 v15, 1.0, v6 ; 081E0CF2
v_mul_f32_e32 v7, v7, v15 ; 100E1F07
v_mac_f32_e32 v7, 1.0, v6 ; 3E0E0CF2
v_max_f32_e32 v4, 0, v4 ; 20080880
v_log_f32_e32 v4, v4 ; 7E084F04
v_madak_f32_e32 v7, v7, v13, 0x38d1b717 ; 420E1B07 38D1B717
v_mul_f32_e32 v8, 0x41200000, v8 ; 101010FF 41200000
v_mul_f32_e32 v13, v8, v8 ; 101A1108
v_mul_legacy_f32_e32 v4, v13, v4 ; 0E08090D
v_rcp_f32_e32 v7, v7 ; 7E0E5507
v_mad_f32 v8, v8, v8, 1.0 ; D2820008 03CA1108
v_mul_f32_e32 v8, s9, v8 ; 10101009
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mul_f32_e32 v4, v8, v4 ; 10080908
v_mul_f32_e32 v4, v4, v7 ; 10080F04
v_mul_f32_e32 v7, v15, v15 ; 100E1F0F
v_mul_f32_e32 v8, v15, v7 ; 10100F0F
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_f32_e32 v8, v11, v11 ; 1010170B
v_mul_f32_e32 v11, v11, v8 ; 1016110B
v_mul_f32_e32 v8, v11, v8 ; 1010110B
v_add_f32_e32 v5, -1.0, v5 ; 060A0AF3
v_mad_f32 v7, v5, v7, 1.0 ; D2820007 03CA0F05
v_mad_f32 v5, v5, v8, 1.0 ; D2820005 03CA1105
v_mov_b32_e32 v8, s19 ; 7E100213
v_mad_f32 v8, -v8, s14, s14 ; D2820008 20381D08
v_mul_f32_e32 v5, v5, v7 ; 100A0F05
v_mul_f32_e32 v7, s15, v3 ; 100E060F
v_mul_f32_e32 v4, v6, v4 ; 10080906
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_mul_f32_e32 v6, v5, v7 ; 100C0F05
v_mul_f32_e32 v0, v6, v0 ; 10000106
v_mul_f32_e32 v4, s18, v4 ; 10080812
v_max_f32_e32 v4, 0, v4 ; 20080880
v_mul_f32_e32 v6, v7, v4 ; 100C0907
v_mac_f32_e32 v0, v9, v6 ; 3E000D09
v_mul_f32_e32 v6, s16, v3 ; 100C0610
v_mul_f32_e32 v1, v8, v1 ; 10020308
v_mul_f32_e32 v7, v5, v6 ; 100E0D05
v_mul_f32_e32 v1, v7, v1 ; 10020307
v_mul_f32_e32 v6, v6, v4 ; 100C0906
v_mac_f32_e32 v1, v12, v6 ; 3E020D0C
v_mul_f32_e32 v2, v8, v2 ; 10040508
v_mul_f32_e32 v3, s17, v3 ; 10060611
v_mul_f32_e32 v4, v3, v4 ; 10080903
v_mul_f32_e32 v3, v5, v3 ; 10060705
v_mul_f32_e32 v2, v3, v2 ; 10040503
v_mac_f32_e32 v2, v14, v4 ; 3E04090E
v_mov_b32_e32 v3, s5 ; 7E060205
v_mac_f32_e32 v3, s4, v10 ; 3E061404
v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_mul_f32_e32 v2, v3, v2 ; 10040503
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e64 v1, v2, 1.0 ; D25E0001 0001E502
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 20
Code Size: 924 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww
9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[6].xxxx
18: MOV TEMP[3].y, CONST[7].xxxx
19: MOV TEMP[3].z, CONST[8].xxxx
20: MOV TEMP[4].x, CONST[6].yyyy
21: MOV TEMP[4].y, CONST[7].yyyy
22: MOV TEMP[4].z, CONST[8].yyyy
23: MOV TEMP[5].x, CONST[6].zzzz
24: MOV TEMP[5].y, CONST[7].zzzz
25: MOV TEMP[5].z, CONST[8].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[3].xyz, TEMP[3].xyzx
44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww
45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz
46: MOV TEMP[4].w, TEMP[6].xxxx
47: MOV TEMP[5].w, TEMP[6].yyyy
48: MOV TEMP[3].w, TEMP[6].zzzz
49: MUL TEMP[6], CONST[2], IN[0].xxxx
50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6]
51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6]
52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6]
53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[0].yzw, TEMP[0].yxyz
55: MUL TEMP[7], CONST[13], TEMP[6].xxxx
56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7]
57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7]
58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7]
59: MOV TEMP[6].xyz, TEMP[6].xyzx
60: MOV TEMP[0].x, TEMP[1].zzzz
61: MOV OUT[1], TEMP[2]
62: MOV OUT[3], TEMP[5]
63: MOV OUT[2], TEMP[4]
64: MOV OUT[4], TEMP[3]
65: MOV OUT[0], TEMP[1]
66: MOV OUT[6], TEMP[6]
67: MOV OUT[5], TEMP[0]
68: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0
%84 = add i32 %5, %7
%85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84)
%86 = extractelement <4 x float> %85, i32 0
%87 = extractelement <4 x float> %85, i32 1
%88 = extractelement <4 x float> %85, i32 2
%89 = extractelement <4 x float> %85, i32 3
%90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = add i32 %5, %7
%93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92)
%94 = extractelement <4 x float> %93, i32 0
%95 = extractelement <4 x float> %93, i32 1
%96 = extractelement <4 x float> %93, i32 2
%97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = add i32 %5, %7
%100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99)
%101 = extractelement <4 x float> %100, i32 0
%102 = extractelement <4 x float> %100, i32 1
%103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = add i32 %5, %7
%106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105)
%107 = extractelement <4 x float> %106, i32 0
%108 = extractelement <4 x float> %106, i32 1
%109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = add i32 %5, %7
%112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111)
%113 = extractelement <4 x float> %112, i32 0
%114 = extractelement <4 x float> %112, i32 1
%115 = extractelement <4 x float> %112, i32 2
%116 = extractelement <4 x float> %112, i32 3
%117 = fmul float %20, %86
%118 = fmul float %21, %86
%119 = fmul float %22, %86
%120 = fmul float %24, %87
%121 = fadd float %120, %117
%122 = fmul float %25, %87
%123 = fadd float %122, %118
%124 = fmul float %26, %87
%125 = fadd float %124, %119
%126 = fmul float %28, %88
%127 = fadd float %126, %121
%128 = fmul float %29, %88
%129 = fadd float %128, %123
%130 = fmul float %30, %88
%131 = fadd float %130, %125
%132 = fmul float %32, %89
%133 = fadd float %132, %127
%134 = fmul float %33, %89
%135 = fadd float %134, %129
%136 = fmul float %34, %89
%137 = fadd float %136, %131
%138 = fmul float %66, %86
%139 = fmul float %67, %86
%140 = fmul float %68, %86
%141 = fmul float %69, %86
%142 = fmul float %70, %87
%143 = fadd float %142, %138
%144 = fmul float %71, %87
%145 = fadd float %144, %139
%146 = fmul float %72, %87
%147 = fadd float %146, %140
%148 = fmul float %73, %87
%149 = fadd float %148, %141
%150 = fmul float %74, %88
%151 = fadd float %150, %143
%152 = fmul float %75, %88
%153 = fadd float %152, %145
%154 = fmul float %76, %88
%155 = fadd float %154, %147
%156 = fmul float %77, %88
%157 = fadd float %156, %149
%158 = fmul float %78, %89
%159 = fadd float %158, %151
%160 = fmul float %79, %89
%161 = fadd float %160, %153
%162 = fmul float %80, %89
%163 = fadd float %162, %155
%164 = fmul float %81, %89
%165 = fadd float %164, %157
%166 = fmul float %101, %45
%167 = fadd float %166, %47
%168 = fmul float %102, %46
%169 = fadd float %168, %48
%170 = fcmp oeq float %53, 0.000000e+00
%. = select i1 %170, float %101, float %107
%.32 = select i1 %170, float %102, float %108
%171 = fmul float %., %49
%172 = fadd float %171, %51
%173 = fmul float %.32, %50
%174 = fadd float %173, %52
%175 = fmul float %36, %94
%176 = fmul float %39, %94
%177 = fmul float %42, %94
%178 = fmul float %37, %95
%179 = fadd float %178, %175
%180 = fmul float %40, %95
%181 = fadd float %180, %176
%182 = fmul float %43, %95
%183 = fadd float %182, %177
%184 = fmul float %38, %96
%185 = fadd float %184, %179
%186 = fmul float %41, %96
%187 = fadd float %186, %181
%188 = fmul float %44, %96
%189 = fadd float %188, %183
%190 = fmul float %185, %185
%191 = fmul float %187, %187
%192 = fadd float %191, %190
%193 = fmul float %189, %189
%194 = fadd float %192, %193
%195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194)
%196 = fmul float %185, %195
%197 = fmul float %187, %195
%198 = fmul float %189, %195
%199 = fmul float %20, %113
%200 = fmul float %21, %113
%201 = fmul float %22, %113
%202 = fmul float %24, %114
%203 = fadd float %202, %199
%204 = fmul float %25, %114
%205 = fadd float %204, %200
%206 = fmul float %26, %114
%207 = fadd float %206, %201
%208 = fmul float %28, %115
%209 = fadd float %208, %203
%210 = fmul float %29, %115
%211 = fadd float %210, %205
%212 = fmul float %30, %115
%213 = fadd float %212, %207
%214 = fmul float %209, %209
%215 = fmul float %211, %211
%216 = fadd float %215, %214
%217 = fmul float %213, %213
%218 = fadd float %216, %217
%219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218)
%220 = fmul float %209, %219
%221 = fmul float %211, %219
%222 = fmul float %213, %219
%223 = fmul float %198, %221
%224 = fmul float %196, %222
%225 = fmul float %197, %220
%226 = fmul float %197, %222
%227 = fsub float %226, %223
%228 = fmul float %198, %220
%229 = fsub float %228, %224
%230 = fmul float %196, %221
%231 = fsub float %230, %225
%232 = fmul float %227, %116
%233 = fmul float %229, %116
%234 = fmul float %231, %116
%235 = fmul float %133, %19
%236 = fmul float %135, %19
%237 = fmul float %137, %19
%238 = fsub float %16, %235
%239 = fsub float %17, %236
%240 = fsub float %18, %237
%241 = fmul float %20, %86
%242 = fmul float %21, %86
%243 = fmul float %22, %86
%244 = fmul float %23, %86
%245 = fmul float %24, %87
%246 = fadd float %245, %241
%247 = fmul float %25, %87
%248 = fadd float %247, %242
%249 = fmul float %26, %87
%250 = fadd float %249, %243
%251 = fmul float %27, %87
%252 = fadd float %251, %244
%253 = fmul float %28, %88
%254 = fadd float %253, %246
%255 = fmul float %29, %88
%256 = fadd float %255, %248
%257 = fmul float %30, %88
%258 = fadd float %257, %250
%259 = fmul float %31, %88
%260 = fadd float %259, %252
%261 = fmul float %32, %89
%262 = fadd float %261, %254
%263 = fmul float %33, %89
%264 = fadd float %263, %256
%265 = fmul float %34, %89
%266 = fadd float %265, %258
%267 = fmul float %35, %89
%268 = fadd float %267, %260
%269 = fsub float %133, %13
%270 = fsub float %135, %14
%271 = fsub float %137, %15
%272 = fmul float %54, %262
%273 = fmul float %55, %262
%274 = fmul float %56, %262
%275 = fmul float %57, %264
%276 = fadd float %275, %272
%277 = fmul float %58, %264
%278 = fadd float %277, %273
%279 = fmul float %59, %264
%280 = fadd float %279, %274
%281 = fmul float %60, %266
%282 = fadd float %281, %276
%283 = fmul float %61, %266
%284 = fadd float %283, %278
%285 = fmul float %62, %266
%286 = fadd float %285, %280
%287 = fmul float %63, %268
%288 = fadd float %287, %282
%289 = fmul float %64, %268
%290 = fadd float %289, %284
%291 = fmul float %65, %268
%292 = fadd float %291, %286
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130
s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A
s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128
s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134
s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135
s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136
s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
v_cndmask_b32_e32 v0, v10, v8 ; 0000110A
v_cndmask_b32_e32 v10, v11, v9 ; 0014130B
v_mov_b32_e32 v11, s5 ; 7E160205
s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129
s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B
v_mac_f32_e32 v11, s6, v8 ; 3E161006
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119
s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C
s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D
s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120
s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C
s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D
s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E
s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v8, s5 ; 7E100205
s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121
v_mac_f32_e32 v8, s4, v9 ; 3E101204
v_mul_f32_e32 v9, s6, v5 ; 10120A06
v_mac_f32_e32 v9, s11, v6 ; 3E120C0B
s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A
s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E
v_mul_f32_e32 v16, s12, v5 ; 10200A0C
v_mac_f32_e32 v16, s13, v6 ; 3E200C0D
v_mul_f32_e32 v5, s14, v5 ; 100A0A0E
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05
s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144
s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148
s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145
s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149
v_mac_f32_e32 v9, s4, v7 ; 3E120E04
s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146
s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A
s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147
v_mac_f32_e32 v16, s6, v7 ; 3E200E06
v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s5, v1 ; 100C0205
v_mac_f32_e32 v6, s12, v2 ; 3E0C040C
v_mul_f32_e32 v7, s13, v1 ; 100E020D
v_mac_f32_e32 v7, s14, v2 ; 3E0E040E
s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B
v_mul_f32_e32 v17, s4, v1 ; 10220204
v_mac_f32_e32 v17, s19, v2 ; 3E220413
v_mul_f32_e32 v18, s20, v1 ; 10240214
s_buffer_load_dword s4, s[0:3], 0xb ; C202010B
s_buffer_load_dword s6, s[0:3], 0xf ; C203010F
s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C
s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D
s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E
s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v18, s5, v2 ; 3E240405
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s19, s[0:3], 0xd ; C209810D
s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E
v_mul_f32_e32 v19, s4, v1 ; 10260204
s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113
v_mac_f32_e32 v19, s6, v2 ; 3E260406
v_mac_f32_e32 v6, s11, v3 ; 3E0C060B
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
v_mac_f32_e32 v7, s12, v3 ; 3E0E060C
v_mac_f32_e32 v17, s13, v3 ; 3E22060D
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114
v_mac_f32_e32 v18, s14, v3 ; 3E24060E
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v19, s4, v3 ; 3E260604
s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
v_mul_f32_e32 v20, s6, v1 ; 10280206
v_mac_f32_e32 v20, s5, v2 ; 3E280405
s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112
v_mac_f32_e32 v20, s11, v3 ; 3E28060B
v_mac_f32_e32 v20, s12, v4 ; 3E28080C
s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115
v_mul_f32_e32 v21, s13, v1 ; 102A020D
v_mac_f32_e32 v21, s19, v2 ; 3E2A0413
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s4, v3 ; 3E2A0604
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150
s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151
s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152
s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153
s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117
v_mac_f32_e32 v21, s12, v4 ; 3E2A080C
v_mul_f32_e32 v1, s14, v1 ; 1002020E
v_mac_f32_e32 v1, s20, v2 ; 3E020414
v_mac_f32_e32 v1, s21, v3 ; 3E020615
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s22, v4 ; 3E020816
v_mac_f32_e32 v6, s23, v4 ; 3E0C0817
v_mac_f32_e32 v7, s24, v4 ; 3E0E0818
v_mac_f32_e32 v17, s25, v4 ; 3E220819
v_mac_f32_e32 v18, s26, v4 ; 3E24081A
v_mac_f32_e32 v19, s27, v4 ; 3E26081B
v_mov_b32_e32 v2, s17 ; 7E040211
v_mac_f32_e32 v2, s15, v0 ; 3E04000F
v_mov_b32_e32 v0, s18 ; 7E000212
v_mac_f32_e32 v0, s16, v10 ; 3E001410
exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s6, v12 ; 10001806
v_mac_f32_e32 v0, s5, v13 ; 3E001A05
v_mul_f32_e32 v2, s13, v12 ; 1004180D
v_mac_f32_e32 v2, s19, v13 ; 3E041A13
v_mul_f32_e32 v3, s14, v12 ; 1006180E
v_mac_f32_e32 v3, s20, v13 ; 3E061A14
v_mac_f32_e32 v0, s11, v14 ; 3E001C0B
v_mac_f32_e32 v2, s4, v14 ; 3E041C04
v_mac_f32_e32 v3, s21, v14 ; 3E061C15
v_mul_f32_e32 v4, v9, v9 ; 10081309
v_mac_f32_e32 v4, v16, v16 ; 3E082110
v_mul_f32_e32 v8, v0, v0 ; 10100100
v_mac_f32_e32 v8, v2, v2 ; 3E100502
v_mac_f32_e32 v4, v5, v5 ; 3E080B05
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107
v_mac_f32_e32 v8, v3, v3 ; 3E100703
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v4, v9 ; 10121304
v_mul_f32_e32 v10, v4, v16 ; 10142104
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v2, v8, v2 ; 10040508
v_mul_f32_e32 v3, v8, v3 ; 10060708
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v5, s4 ; 7E0A0204
v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14
exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v5, v2, v4 ; 100A0902
v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A
v_mul_f32_e32 v3, v3, v9 ; 10061303
v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104
v_mul_f32_e32 v0, v0, v10 ; 10001500
v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509
v_mul_f32_e32 v2, v15, v5 ; 10040B0F
v_mul_f32_e32 v3, v15, v3 ; 1006070F
v_mul_f32_e32 v0, v15, v0 ; 1000010F
v_mov_b32_e32 v5, s6 ; 7E0A0206
v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15
exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101
s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102
s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140
s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141
s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142
s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139
s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A
s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C
s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D
s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mov_b32_e32 v0, s4 ; 7E000204
v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01
v_subrev_f32_e32 v2, s6, v20 ; 0A042806
v_mul_f32_e32 v3, s7, v20 ; 10062807
v_mul_f32_e32 v5, s8, v20 ; 100A2808
v_mul_f32_e32 v8, s9, v20 ; 10102809
v_mac_f32_e32 v3, s10, v21 ; 3E062A0A
v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10
v_mac_f32_e32 v8, s17, v21 ; 3E102A11
v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v5, s19, v1 ; 3E0A0213
v_mac_f32_e32 v8, s0, v1 ; 3E100200
v_subrev_f32_e32 v1, s12, v1 ; 0A02020C
v_mac_f32_e32 v3, s13, v19 ; 3E06260D
v_mac_f32_e32 v5, s14, v19 ; 3E0A260E
v_mac_f32_e32 v8, s15, v19 ; 3E10260F
exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09
exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211
exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503
exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 24
Code Size: 920 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[7..9]
DCL TEMP[0..13], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000}
IMM[1] FLT32 { 10.0000, 0.9680, 0.0300, 0.5000}
IMM[2] FLT32 { 0.0001, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D
11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy
12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[7].xxxx
13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy
14: MOV_SAT TEMP[4].x, TEMP[4].xxxx
15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
16: SQRT TEMP[4].x, TEMP[4].xxxx
17: MOV TEMP[3].z, TEMP[4].xxxx
18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
20: MOV TEMP[0].y, TEMP[1].xxxx
21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz
22: MOV TEMP[0].z, TEMP[1].xxxx
23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
24: RSQ TEMP[1].x, TEMP[1].xxxx
25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
26: MOV TEMP[1].xy, IN[0].xyyy
27: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
28: MUL TEMP[1].xyz, CONST[4].xyzz, TEMP[1].xyzz
29: LRP TEMP[2].xyz, CONST[8].xxxx, TEMP[1].xyzz, CONST[1].xyzz
30: MOV TEMP[3].x, IN[1].wwww
31: MOV TEMP[3].y, IN[2].wwww
32: MOV TEMP[3].z, IN[3].wwww
33: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
34: RSQ TEMP[4].x, TEMP[4].xxxx
35: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
36: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz
37: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
38: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz
39: MOV TEMP[5].xy, TEMP[5].xxxx
40: TEX TEMP[5].w, TEMP[5], SAMP[2], 2D
41: MUL TEMP[5].xyz, CONST[2].xyzz, TEMP[5].wwww
42: DP3 TEMP[6].x, IN[4].yzww, IN[4].yzww
43: RSQ TEMP[6].x, TEMP[6].xxxx
44: MUL TEMP[6].xyz, IN[4].yzww, TEMP[6].xxxx
45: MOV TEMP[6].xyz, -TEMP[6].xyzx
46: ADD TEMP[7].x, IMM[0].zzzz, -CONST[9].xxxx
47: ADD TEMP[8].xyz, TEMP[3].xyzz, TEMP[6].xyzz
48: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz
49: RSQ TEMP[9].x, TEMP[9].xxxx
50: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx
51: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[6].xyzz
52: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
53: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[8].xyzz
54: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx
55: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx
56: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww
57: ADD TEMP[10].x, IMM[0].zzzz, -TEMP[7].xxxx
58: MAD TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy, IMM[1].zzzz
59: LG2 TEMP[10].x, TEMP[10].xxxx
60: RCP TEMP[10].x, TEMP[10].xxxx
61: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx
62: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx
63: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[4].xxxx
64: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[6].xxxx
65: MUL TEMP[13].x, IMM[0].xxxx, TEMP[3].xxxx
66: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].xxxx
67: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].wwww
68: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx
69: LRP TEMP[13].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[9].xxxx
70: LRP TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[9].xxxx
71: MAD TEMP[6].x, TEMP[13].xxxx, TEMP[6].xxxx, IMM[2].xxxx
72: RCP TEMP[6].x, TEMP[6].xxxx
73: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz
74: MAX TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx
75: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx
76: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].zzzz
77: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy
78: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
79: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx
80: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx
81: MUL TEMP[6].x, TEMP[6].xxxx, CONST[3].xxxx
82: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
83: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[5].xyzz
84: ADD TEMP[8].xyz, IMM[0].zzzz, -TEMP[2].xyzz
85: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[3].xxxx
86: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].xxxx
87: MUL TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx
88: MUL TEMP[3].x, TEMP[9].xxxx, TEMP[3].xxxx
89: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[3].xxxx, TEMP[2].xyzz
90: MUL TEMP[3].x, CONST[8].xxxx, CONST[1].wwww
91: ADD TEMP[3].x, CONST[1].wwww, -TEMP[3].xxxx
92: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
93: ADD TEMP[3].x, TEMP[7].xxxx, IMM[0].yyyy
94: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx
95: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx
96: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
97: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
98: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[8].xxxx, IMM[0].zzzz
99: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy
100: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx
101: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx
102: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
103: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
104: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].zzzz
105: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx
106: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
107: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx
108: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
109: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[2].xyzz, TEMP[1].xyzz
110: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww
111: MOV_SAT TEMP[1].x, TEMP[1].xxxx
112: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
113: MOV TEMP[0].xyz, TEMP[0].xyzx
114: MOV TEMP[0].w, IMM[0].zzzz
115: MOV OUT[0], TEMP[0]
116: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%42 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0
%44 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0
%46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%47 = bitcast <8 x i32> addrspace(2)* %46 to <32 x i8> addrspace(2)*
%48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0
%49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%50 = bitcast <4 x i32> addrspace(2)* %49 to <16 x i8> addrspace(2)*
%51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0
%52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%53 = bitcast <8 x i32> addrspace(2)* %52 to <32 x i8> addrspace(2)*
%54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0
%55 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%56 = bitcast <4 x i32> addrspace(2)* %55 to <16 x i8> addrspace(2)*
%57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0
%58 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%59 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%60 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%61 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%68 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%71 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%74 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%75 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%76 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%77 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%78 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%79 = bitcast float %58 to i32
%80 = bitcast float %59 to i32
%81 = insertelement <2 x i32> undef, i32 %79, i32 0
%82 = insertelement <2 x i32> %81, i32 %80, i32 1
%83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %82, <32 x i8> %48, <16 x i8> %51, i32 2)
%84 = extractelement <4 x float> %83, i32 1
%85 = extractelement <4 x float> %83, i32 3
%86 = fmul float %85, 2.000000e+00
%87 = fadd float %86, -1.000000e+00
%88 = fmul float %84, 2.000000e+00
%89 = fadd float %88, -1.000000e+00
%90 = fmul float %87, %39
%91 = fmul float %89, %39
%92 = fmul float %90, %90
%93 = fmul float %91, %91
%94 = fadd float %92, %93
%95 = call float @llvm.AMDIL.clamp.(float %94, float 0.000000e+00, float 1.000000e+00)
%96 = fsub float 1.000000e+00, %95
%97 = call float @llvm.sqrt.f32(float %96)
%98 = fmul float %90, %60
%99 = fmul float %91, %64
%100 = fadd float %99, %98
%101 = fmul float %97, %68
%102 = fadd float %100, %101
%103 = fmul float %90, %61
%104 = fmul float %91, %65
%105 = fadd float %104, %103
%106 = fmul float %97, %69
%107 = fadd float %105, %106
%108 = fmul float %90, %62
%109 = fmul float %91, %66
%110 = fadd float %109, %108
%111 = fmul float %97, %70
%112 = fadd float %110, %111
%113 = fmul float %102, %102
%114 = fmul float %107, %107
%115 = fadd float %114, %113
%116 = fmul float %112, %112
%117 = fadd float %115, %116
%118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117)
%119 = fmul float %102, %118
%120 = fmul float %107, %118
%121 = fmul float %112, %118
%122 = bitcast float %58 to i32
%123 = bitcast float %59 to i32
%124 = insertelement <2 x i32> undef, i32 %122, i32 0
%125 = insertelement <2 x i32> %124, i32 %123, i32 1
%126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %125, <32 x i8> %43, <16 x i8> %45, i32 2)
%127 = extractelement <4 x float> %126, i32 0
%128 = extractelement <4 x float> %126, i32 1
%129 = extractelement <4 x float> %126, i32 2
%130 = fmul float %36, %127
%131 = fmul float %37, %128
%132 = fmul float %38, %129
%133 = call float @llvm.AMDGPU.lrp(float %40, float %130, float %26)
%134 = call float @llvm.AMDGPU.lrp(float %40, float %131, float %27)
%135 = call float @llvm.AMDGPU.lrp(float %40, float %132, float %28)
%136 = fmul float %63, %63
%137 = fmul float %67, %67
%138 = fadd float %137, %136
%139 = fmul float %71, %71
%140 = fadd float %138, %139
%141 = call float @llvm.AMDGPU.rsq.clamped.f32(float %140)
%142 = fmul float %63, %141
%143 = fmul float %67, %141
%144 = fmul float %71, %141
%145 = fmul float %119, %142
%146 = fmul float %120, %143
%147 = fadd float %146, %145
%148 = fmul float %121, %144
%149 = fadd float %147, %148
%150 = call float @llvm.maxnum.f32(float %149, float 0.000000e+00)
%151 = fmul float %76, %76
%152 = fmul float %77, %77
%153 = fadd float %152, %151
%154 = fmul float %78, %78
%155 = fadd float %153, %154
%156 = bitcast float %155 to i32
%157 = bitcast float %155 to i32
%158 = insertelement <2 x i32> undef, i32 %156, i32 0
%159 = insertelement <2 x i32> %158, i32 %157, i32 1
%160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %54, <16 x i8> %57, i32 2)
%161 = extractelement <4 x float> %160, i32 3
%162 = fmul float %30, %161
%163 = fmul float %31, %161
%164 = fmul float %32, %161
%165 = fmul float %73, %73
%166 = fmul float %74, %74
%167 = fadd float %166, %165
%168 = fmul float %75, %75
%169 = fadd float %167, %168
%170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169)
%171 = fmul float %73, %170
%172 = fmul float %74, %170
%173 = fmul float %75, %170
%174 = fsub float 1.000000e+00, %41
%175 = fsub float %142, %171
%176 = fsub float %143, %172
%177 = fsub float %144, %173
%178 = fmul float %175, %175
%179 = fmul float %176, %176
%180 = fadd float %179, %178
%181 = fmul float %177, %177
%182 = fadd float %180, %181
%183 = call float @llvm.AMDGPU.rsq.clamped.f32(float %182)
%184 = fmul float %175, %183
%185 = fmul float %176, %183
%186 = fmul float %177, %183
%187 = fmul float %171, %119
%188 = fsub float -0.000000e+00, %187
%189 = fmul float %172, %120
%190 = fsub float %188, %189
%191 = fmul float %173, %121
%192 = fsub float %190, %191
%193 = call float @llvm.maxnum.f32(float %192, float 0.000000e+00)
%194 = fmul float %142, %184
%195 = fmul float %143, %185
%196 = fadd float %195, %194
%197 = fmul float %144, %186
%198 = fadd float %196, %197
%199 = call float @llvm.maxnum.f32(float %198, float 0.000000e+00)
%200 = fmul float %174, %174
%201 = fmul float %200, %35
%202 = fsub float 1.000000e+00, %174
%203 = fmul float %202, 0x3FEEF9DB20000000
%204 = fadd float %203, 0x3F9EB851E0000000
%205 = call float @llvm.log2.f32(float %204)
%206 = fdiv float 1.000000e+00, %205
%207 = fmul float %206, 1.000000e+01
%208 = fmul float %207, %207
%209 = fsub float 1.000000e+00, %150
%210 = fsub float 1.000000e+00, %193
%211 = fmul float %199, 2.000000e+00
%212 = fmul float %199, %174
%213 = fmul float %211, %212
%214 = fadd float %213, 5.000000e-01
%215 = fsub float 1.000000e+00, %199
%216 = call float @llvm.AMDGPU.lrp(float %150, float 1.000000e+00, float %201)
%217 = call float @llvm.AMDGPU.lrp(float %193, float 1.000000e+00, float %201)
%218 = fmul float %216, %217
%219 = fadd float %218, 0x3F1A36E2E0000000
%220 = fdiv float 1.000000e+00, %219
%221 = fmul float %119, %184
%222 = fmul float %120, %185
%223 = fadd float %222, %221
%224 = fmul float %121, %186
%225 = fadd float %223, %224
%226 = call float @llvm.maxnum.f32(float %225, float 0.000000e+00)
%227 = call float @llvm.pow.f32(float %226, float %208)
%228 = fadd float %208, 1.000000e+00
%229 = fmul float %228, %34
%230 = fmul float %227, %229
%231 = fmul float %220, %230
%232 = fmul float %231, %150
%233 = fmul float %232, %33
%234 = call float @llvm.maxnum.f32(float %233, float 0.000000e+00)
%235 = fmul float %234, %162
%236 = fmul float %234, %163
%237 = fmul float %234, %164
%238 = fsub float 1.000000e+00, %133
%239 = fsub float 1.000000e+00, %134
%240 = fsub float 1.000000e+00, %135
%241 = fmul float %215, %215
%242 = fmul float %215, %215
%243 = fmul float %242, %215
%244 = fmul float %241, %243
%245 = fmul float %238, %244
%246 = fadd float %245, %133
%247 = fmul float %239, %244
%248 = fadd float %247, %134
%249 = fmul float %240, %244
%250 = fadd float %249, %135
%251 = fmul float %40, %29
%252 = fsub float %29, %251
%253 = fmul float %130, %252
%254 = fmul float %131, %252
%255 = fmul float %132, %252
%256 = fadd float %214, -1.000000e+00
%257 = fmul float %209, %209
%258 = fmul float %209, %209
%259 = fmul float %258, %209
%260 = fmul float %257, %259
%261 = fmul float %256, %260
%262 = fadd float %261, 1.000000e+00
%263 = fadd float %214, -1.000000e+00
%264 = fmul float %210, %210
%265 = fmul float %210, %210
%266 = fmul float %265, %210
%267 = fmul float %264, %266
%268 = fmul float %263, %267
%269 = fadd float %268, 1.000000e+00
%270 = fmul float %262, %269
%271 = fmul float %270, %150
%272 = fmul float %162, %271
%273 = fmul float %163, %271
%274 = fmul float %164, %271
%275 = fmul float %253, %272
%276 = fmul float %254, %273
%277 = fmul float %255, %274
%278 = fmul float %235, %246
%279 = fadd float %278, %275
%280 = fmul float %236, %248
%281 = fadd float %280, %276
%282 = fmul float %237, %250
%283 = fadd float %282, %277
%284 = fmul float %72, %24
%285 = fadd float %284, %25
%286 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00)
%287 = fmul float %279, %286
%288 = fmul float %281, %286
%289 = fmul float %283, %286
%290 = call i32 @llvm.SI.packf16(float %287, float %288)
%291 = bitcast i32 %290 to float
%292 = call i32 @llvm.SI.packf16(float %289, float 1.000000e+00)
%293 = bitcast i32 %292 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %291, float %293, float %291, float %293)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700
v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701
v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800
v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801
v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900
v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901
v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00
v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01
v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00
v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01
v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00
v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01
v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00
v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01
v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00
v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01
v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00
v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01
v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000
v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001
v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100
v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101
v_interp_p1_f32 v18, v0, 2, 4, [m0] ; C8481200
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p2_f32 v18, [v18], v1, 2, 4, [m0] ; C8491201
v_interp_p1_f32 v19, v0, 3, 4, [m0] ; C84C1300
v_interp_p2_f32 v19, [v19], v1, 3, 4, [m0] ; C84D1301
v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400
v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401
v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500
v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501
v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600
v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s32, s[0:3], 0x10 ; C2100110
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
s_buffer_load_dword s33, s[0:3], 0x11 ; C2108111
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_buffer_load_dword s34, s[0:3], 0x12 ; C2110112
v_mul_f32_e32 v22, v20, v20 ; 102C2914
s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508
s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710
v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15
v_mac_f32_e32 v22, v0, v0 ; 3E2C0100
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800A00 00660002
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800700 00441702
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v2, s32, v23 ; 10042E20
v_mul_f32_e32 v3, s33, v24 ; 10063021
v_mul_f32_e32 v20, s34, v25 ; 10283222
v_mov_b32_e32 v23, v22 ; 7E2E0316
image_sample v21, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[40:47], s[36:39] ; F0800800 012A1516
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
v_mov_b32_e32 v22, s5 ; 7E2C0205
v_mac_f32_e32 v22, s4, v16 ; 3E2C2004
s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C
s_buffer_load_dword s5, s[0:3], 0xd ; C202810D
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107
s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108
s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109
s_buffer_load_dword s13, s[0:3], 0xa ; C206810A
s_buffer_load_dword s14, s[0:3], 0xc ; C207010C
s_buffer_load_dword s15, s[0:3], 0x20 ; C2078120
s_buffer_load_dword s0, s[0:3], 0x24 ; C2000124
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mul_f32_e32 v0, s4, v0 ; 10000004
v_mul_f32_e32 v4, v4, v1 ; 10080304
v_mac_f32_e32 v4, v8, v0 ; 3E080108
v_mul_f32_e32 v5, v5, v1 ; 100A0305
v_mac_f32_e32 v5, v9, v0 ; 3E0A0109
v_mul_f32_e32 v6, v6, v1 ; 100C0306
v_mac_f32_e32 v6, v10, v0 ; 3E0C010A
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v4, v12, v0 ; 3E08010C
v_mac_f32_e32 v5, v13, v0 ; 3E0A010D
v_mac_f32_e32 v6, v14, v0 ; 3E0C010E
v_mul_f32_e32 v0, v4, v4 ; 10000904
v_mac_f32_e32 v0, v5, v5 ; 3E000B05
v_mac_f32_e32 v0, v6, v6 ; 3E000D06
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v17, v17 ; 10022311
v_mac_f32_e32 v1, v18, v18 ; 3E022512
v_mac_f32_e32 v1, v19, v19 ; 3E022713
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v8, v7, v7 ; 10100F07
v_mac_f32_e32 v8, v11, v11 ; 3E10170B
v_mac_f32_e32 v8, v15, v15 ; 3E101F0F
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v1, v17 ; 10122301
v_mul_f32_e32 v10, v1, v18 ; 10142501
v_mul_f32_e32 v1, v1, v19 ; 10022701
v_mul_f32_e32 v4, v0, v4 ; 10080900
v_mul_f32_e32 v5, v0, v5 ; 100A0B00
v_mul_f32_e32 v12, v8, v7 ; 10180F08
v_mad_f32 v7, v7, v8, -v9 ; D2820007 84261107
v_mul_f32_e32 v9, v4, v9 ; 10121304
v_mad_f32 v9, -v10, v5, -v9 ; D2820009 A4260B0A
v_mad_f32 v10, v11, v8, -v10 ; D282000A 842A110B
v_mul_f32_e32 v13, v7, v7 ; 101A0F07
v_mac_f32_e32 v13, v10, v10 ; 3E1A150A
v_mad_f32 v14, v15, v8, -v1 ; D282000E 8406110F
v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mul_f32_e32 v16, v12, v4 ; 1020090C
v_mac_f32_e32 v16, v11, v5 ; 3E200B0B
v_mul_f32_e32 v7, v13, v7 ; 100E0F0D
v_mul_f32_e32 v10, v13, v10 ; 1014150D
v_mul_f32_e32 v12, v7, v12 ; 10181907
v_mac_f32_e32 v12, v10, v11 ; 3E18170A
v_mul_f32_e32 v4, v7, v4 ; 10080907
v_mac_f32_e32 v4, v10, v5 ; 3E080B0A
v_mul_f32_e32 v0, v0, v6 ; 10000D00
v_mul_f32_e32 v5, v8, v15 ; 100A1F08
v_mul_f32_e32 v6, v13, v14 ; 100C1D0D
v_mad_f32 v1, -v1, v0, v9 ; D2820001 24260101
v_mac_f32_e32 v16, v5, v0 ; 3E200105
v_mac_f32_e32 v12, v6, v5 ; 3E180B06
v_mac_f32_e32 v4, v6, v0 ; 3E080106
v_sub_f32_e64 v0, 1.0, s15 ; D2080000 00001EF2
v_mul_f32_e32 v5, s6, v0 ; 100A0006
v_mad_f32 v6, -v0, s6, 1.0 ; D2820006 23C80D00
v_mul_f32_e32 v7, s7, v0 ; 100E0007
v_mad_f32 v8, -v0, s7, 1.0 ; D2820008 23C80F00
v_mul_f32_e32 v9, s8, v0 ; 10120008
v_mad_f32 v0, -v0, s8, 1.0 ; D2820000 23C81100
v_sub_f32_e64 v10, 1.0, s0 ; D208000A 000000F2
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F
v_madmk_f32_e32 v11, v11, v13, 0x3f77ced9 ; 40161B0B 3F77CED9
v_max_f32_e32 v12, 0, v12 ; 20181880
v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2
v_mul_f32_e32 v14, v13, v13 ; 101C1B0D
v_mul_f32_e32 v13, v13, v14 ; 101A1D0D
v_mul_f32_e32 v13, v13, v14 ; 101A1D0D
v_mac_f32_e32 v5, s15, v2 ; 3E0A040F
v_mad_f32 v6, -s15, v2, v6 ; D2820006 241A040F
v_mac_f32_e32 v5, v13, v6 ; 3E0A0D0D
v_mac_f32_e32 v7, s15, v3 ; 3E0E060F
v_mad_f32 v6, -s15, v3, v8 ; D2820006 2422060F
v_mac_f32_e32 v7, v13, v6 ; 3E0E0D0D
v_mac_f32_e32 v9, s15, v20 ; 3E12280F
v_log_f32_e32 v6, v11 ; 7E0C4F0B
v_mad_f32 v0, -s15, v20, v0 ; D2820000 2402280F
v_mac_f32_e32 v9, v13, v0 ; 3E12010D
v_mul_f32_e32 v0, s11, v21 ; 10002A0B
v_mul_f32_e32 v8, s12, v21 ; 10102A0C
v_rcp_f32_e32 v6, v6 ; 7E0C5506
v_mul_f32_e32 v11, s13, v21 ; 10162A0D
v_mul_f32_e32 v13, v10, v10 ; 101A150A
v_mul_f32_e32 v13, s9, v13 ; 101A1A09
v_mul_f32_e32 v6, 0x41200000, v6 ; 100C0CFF 41200000
v_mad_f32 v14, v6, v6, 1.0 ; D282000E 03CA0D06
v_mul_f32_e32 v14, s5, v14 ; 101C1C05
v_max_f32_e32 v1, 0, v1 ; 20020280
v_sub_f32_e32 v15, 1.0, v1 ; 081E02F2
v_mul_f32_e32 v17, v13, v15 ; 10221F0D
v_mac_f32_e32 v17, 1.0, v1 ; 3E2202F2
v_max_f32_e32 v1, 0, v16 ; 20022080
v_sub_f32_e32 v16, 1.0, v1 ; 082002F2
v_mul_f32_e32 v13, v13, v16 ; 101A210D
v_mac_f32_e32 v13, 1.0, v1 ; 3E1A02F2
v_max_f32_e32 v4, 0, v4 ; 20080880
v_log_f32_e32 v4, v4 ; 7E084F04
v_madak_f32_e32 v13, v13, v17, 0x38d1b717 ; 421A230D 38D1B717
v_mul_f32_e32 v6, v6, v6 ; 100C0D06
v_rcp_f32_e32 v13, v13 ; 7E1A550D
v_mul_legacy_f32_e32 v4, v6, v4 ; 0E080906
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mul_f32_e32 v4, v14, v4 ; 1008090E
v_mul_f32_e32 v4, v4, v13 ; 10081B04
v_mul_f32_e32 v4, v1, v4 ; 10080901
v_mul_f32_e32 v4, s14, v4 ; 1008080E
v_mov_b32_e32 v6, s15 ; 7E0C020F
v_mad_f32 v6, -v6, s10, s10 ; D2820006 20281506
v_mul_f32_e32 v10, v10, v12 ; 1014190A
v_add_f32_e32 v12, v12, v12 ; 0618190C
v_mad_f32 v10, v12, v10, 0.5 ; D282000A 03C2150C
v_mul_f32_e32 v12, v16, v16 ; 10182110
v_mul_f32_e32 v13, v16, v12 ; 101A1910
v_mul_f32_e32 v12, v13, v12 ; 1018190D
v_mul_f32_e32 v13, v15, v15 ; 101A1F0F
v_mul_f32_e32 v14, v15, v13 ; 101C1B0F
v_mul_f32_e32 v13, v14, v13 ; 101A1B0E
v_add_f32_e32 v10, -1.0, v10 ; 061414F3
v_mad_f32 v12, v10, v12, 1.0 ; D282000C 03CA190A
v_mad_f32 v10, v10, v13, 1.0 ; D282000A 03CA1B0A
v_mul_f32_e32 v10, v10, v12 ; 1014190A
v_mul_f32_e32 v2, v6, v2 ; 10040506
v_mul_f32_e32 v1, v1, v10 ; 10021501
v_mul_f32_e32 v10, v1, v0 ; 10140101
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_max_f32_e32 v4, 0, v4 ; 20080880
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_mac_f32_e32 v2, v5, v0 ; 3E040105
v_mul_f32_e32 v0, v6, v3 ; 10000706
v_mul_f32_e32 v3, v1, v8 ; 10061101
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_mul_f32_e32 v3, v8, v4 ; 10060908
v_mac_f32_e32 v0, v7, v3 ; 3E000707
v_mul_f32_e32 v3, v6, v20 ; 10062906
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mul_f32_e32 v1, v1, v11 ; 10021701
v_mul_f32_e32 v1, v1, v3 ; 10020701
v_mac_f32_e32 v1, v9, v4 ; 3E020909
v_add_f32_e64 v3, 0, v22 clamp ; D2060803 00022C80
v_mul_f32_e32 v2, v3, v2 ; 10040503
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 28
Code Size: 1084 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..7]
DCL TEMP[0..2], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[1].xyxx
1: MOV TEMP[0].z, IMM[0].xxxx
2: MUL TEMP[1], CONST[0], IN[0].xxxx
3: MAD TEMP[1], CONST[1], IN[0].yyyy, TEMP[1]
4: MAD TEMP[1], CONST[2], IN[0].zzzz, TEMP[1]
5: MAD TEMP[1].xyz, CONST[3], IN[0].wwww, TEMP[1]
6: MUL TEMP[2], CONST[4], IN[0].xxxx
7: MAD TEMP[2], CONST[5], IN[0].yyyy, TEMP[2]
8: MAD TEMP[2], CONST[6], IN[0].zzzz, TEMP[2]
9: MAD TEMP[2], CONST[7], IN[0].wwww, TEMP[2]
10: MOV TEMP[0].xyz, TEMP[0].xyzx
11: MOV TEMP[0].w, TEMP[1].xxxx
12: MOV TEMP[1].xy, TEMP[1].yzyy
13: MOV OUT[2], TEMP[1]
14: MOV OUT[1], TEMP[0]
15: MOV OUT[0], TEMP[2]
16: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0
%46 = add i32 %5, %7
%47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46)
%48 = extractelement <4 x float> %47, i32 0
%49 = extractelement <4 x float> %47, i32 1
%50 = extractelement <4 x float> %47, i32 2
%51 = extractelement <4 x float> %47, i32 3
%52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0
%54 = add i32 %5, %7
%55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54)
%56 = extractelement <4 x float> %55, i32 0
%57 = extractelement <4 x float> %55, i32 1
%58 = fmul float %13, %48
%59 = fmul float %14, %48
%60 = fmul float %15, %48
%61 = fmul float %16, %48
%62 = fmul float %17, %49
%63 = fadd float %62, %58
%64 = fmul float %18, %49
%65 = fadd float %64, %59
%66 = fmul float %19, %49
%67 = fadd float %66, %60
%68 = fmul float %20, %49
%69 = fadd float %68, %61
%70 = fmul float %21, %50
%71 = fadd float %70, %63
%72 = fmul float %22, %50
%73 = fadd float %72, %65
%74 = fmul float %23, %50
%75 = fadd float %74, %67
%76 = fmul float %24, %50
%77 = fadd float %76, %69
%78 = fmul float %25, %51
%79 = fadd float %78, %71
%80 = fmul float %26, %51
%81 = fadd float %80, %73
%82 = fmul float %27, %51
%83 = fadd float %82, %75
%84 = fmul float %28, %48
%85 = fmul float %29, %48
%86 = fmul float %30, %48
%87 = fmul float %31, %48
%88 = fmul float %32, %49
%89 = fadd float %88, %84
%90 = fmul float %33, %49
%91 = fadd float %90, %85
%92 = fmul float %34, %49
%93 = fadd float %92, %86
%94 = fmul float %35, %49
%95 = fadd float %94, %87
%96 = fmul float %36, %50
%97 = fadd float %96, %89
%98 = fmul float %37, %50
%99 = fadd float %98, %91
%100 = fmul float %38, %50
%101 = fadd float %100, %93
%102 = fmul float %39, %50
%103 = fadd float %102, %95
%104 = fmul float %40, %51
%105 = fadd float %104, %97
%106 = fmul float %41, %51
%107 = fadd float %106, %99
%108 = fmul float %42, %51
%109 = fadd float %108, %101
%110 = fmul float %43, %51
%111 = fadd float %110, %103
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %56, float %57, float 0.000000e+00, float %79)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %81, float %83, float %83, float %77)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %105, float %107, float %109, float %111)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s6, s[0:3], 0xc ; C203010C
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s8, v1 ; 10000208
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s4, v2 ; 3E000404
s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109
v_mac_f32_e32 v0, s5, v3 ; 3E000605
s_buffer_load_dword s5, s[0:3], 0xd ; C202810D
s_buffer_load_dword s8, s[0:3], 0xe ; C204010E
v_mac_f32_e32 v0, s6, v4 ; 3E000806
v_mov_b32_e32 v7, 0 ; 7E0E0280
exp 15, 32, 0, 0, 0, v5, v6, v7, v0 ; F800020F 00070605
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_buffer_load_dword s11, s[0:3], 0x6 ; C2058106
s_buffer_load_dword s12, s[0:3], 0x7 ; C2060107
s_buffer_load_dword s13, s[0:3], 0xa ; C206810A
s_buffer_load_dword s14, s[0:3], 0xb ; C207010B
s_buffer_load_dword s15, s[0:3], 0x10 ; C2078110
s_buffer_load_dword s16, s[0:3], 0x11 ; C2080111
s_buffer_load_dword s17, s[0:3], 0x12 ; C2088112
s_buffer_load_dword s18, s[0:3], 0x13 ; C2090113
s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114
s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115
s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116
s_buffer_load_dword s22, s[0:3], 0x17 ; C20B0117
s_buffer_load_dword s23, s[0:3], 0x18 ; C20B8118
s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119
s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A
s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B
s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C
s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D
s_buffer_load_dword s29, s[0:3], 0x1e ; C20E811E
s_buffer_load_dword s0, s[0:3], 0x1f ; C200011F
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mul_f32_e32 v0, s9, v1 ; 10000209
v_mac_f32_e32 v0, s6, v2 ; 3E000406
v_mul_f32_e32 v5, s10, v1 ; 100A020A
v_mac_f32_e32 v5, s11, v2 ; 3E0A040B
v_mul_f32_e32 v6, s7, v1 ; 100C0207
v_mac_f32_e32 v6, s12, v2 ; 3E0C040C
v_mul_f32_e32 v7, s15, v1 ; 100E020F
v_mac_f32_e32 v7, s19, v2 ; 3E0E0413
v_mul_f32_e32 v8, s16, v1 ; 10100210
v_mac_f32_e32 v8, s20, v2 ; 3E100414
v_mul_f32_e32 v9, s17, v1 ; 10120211
v_mac_f32_e32 v9, s21, v2 ; 3E120415
v_mul_f32_e32 v1, s18, v1 ; 10020212
v_mac_f32_e32 v1, s22, v2 ; 3E020416
v_mac_f32_e32 v0, s4, v3 ; 3E000604
v_mac_f32_e32 v5, s13, v3 ; 3E0A060D
v_mac_f32_e32 v6, s14, v3 ; 3E0C060E
v_mac_f32_e32 v7, s23, v3 ; 3E0E0617
v_mac_f32_e32 v8, s24, v3 ; 3E100618
v_mac_f32_e32 v9, s25, v3 ; 3E120619
v_mac_f32_e32 v1, s26, v3 ; 3E02061A
v_mac_f32_e32 v0, s5, v4 ; 3E000805
v_mac_f32_e32 v5, s8, v4 ; 3E0A0808
v_mac_f32_e32 v7, s27, v4 ; 3E0E081B
v_mac_f32_e32 v8, s28, v4 ; 3E10081C
v_mac_f32_e32 v9, s29, v4 ; 3E12081D
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 33, 0, 0, 0, v0, v5, v5, v6 ; F800021F 06050500
exp 15, 12, 0, 1, 0, v7, v8, v9, v1 ; F80008CF 01090807
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 12
Code Size: 328 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0]
DCL CONST[2..14]
DCL TEMP[0..14], LOCAL
IMM[0] FLT32 { 0.1000, 0.0039, 0.0020, 0.0000}
IMM[1] FLT32 { 4.0000, -1.0000, -0.0000, 0.2000}
IMM[2] FLT32 { 1.1000, 1.0000, 0.1900, 3.3000}
IMM[3] FLT32 { 0.3030, 0.2100, 0.2300, 0.2500}
IMM[4] FLT32 { 0.5000, -0.2000, 0.1140, 0.5870}
IMM[5] FLT32 { 0.2990, 1.5000, -0.5000, 0.0000}
0: MOV TEMP[0].x, IN[0].wwww
1: MOV TEMP[0].yz, IN[1].yxyy
2: MAD TEMP[1].xyz, IN[0].xyzz, CONST[2].xxxx, CONST[12].xyzz
3: MOV TEMP[2].z, TEMP[1].zzzz
4: MUL TEMP[3].xyz, TEMP[1].xyzz, IMM[0].xxxx
5: FLR TEMP[4].xyz, TEMP[3].xyzz
6: MAD TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz, IMM[0].zzzz
7: FRC TEMP[3].xyz, TEMP[3].xyzz
8: MOV TEMP[5].xy, TEMP[4].xyyy
9: TEX TEMP[5].w, TEMP[5], SAMP[0], 2D
10: MOV TEMP[6].x, TEMP[5].wwww
11: MOV TEMP[6].y, TEMP[4].zzzz
12: MOV TEMP[5].x, TEMP[5].wwww
13: ADD TEMP[7].x, TEMP[4].zzzz, IMM[0].yyyy
14: MOV TEMP[5].y, TEMP[7].xxxx
15: ADD TEMP[7].xy, TEMP[4].xyyy, IMM[0].wyyy
16: MOV TEMP[7].xy, TEMP[7].xyyy
17: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D
18: MOV TEMP[8].x, TEMP[7].wwww
19: MOV TEMP[8].y, TEMP[4].zzzz
20: MOV TEMP[7].x, TEMP[7].wwww
21: ADD TEMP[9].x, TEMP[4].zzzz, IMM[0].yyyy
22: MOV TEMP[7].y, TEMP[9].xxxx
23: ADD TEMP[9].xy, TEMP[4].xyyy, IMM[0].ywww
24: MOV TEMP[9].xy, TEMP[9].xyyy
25: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D
26: MOV TEMP[10].x, TEMP[9].wwww
27: MOV TEMP[10].y, TEMP[4].zzzz
28: MOV TEMP[9].x, TEMP[9].wwww
29: ADD TEMP[11].x, TEMP[4].zzzz, IMM[0].yyyy
30: MOV TEMP[9].y, TEMP[11].xxxx
31: ADD TEMP[11].xy, TEMP[4].xyyy, IMM[0].yyyy
32: MOV TEMP[11].xy, TEMP[11].xyyy
33: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D
34: MOV TEMP[12].x, TEMP[11].wwww
35: MOV TEMP[12].y, TEMP[4].zzzz
36: MOV TEMP[11].x, TEMP[11].wwww
37: ADD TEMP[4].x, TEMP[4].zzzz, IMM[0].yyyy
38: MOV TEMP[11].y, TEMP[4].xxxx
39: MOV TEMP[4].xy, TEMP[6].xyyy
40: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
41: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
42: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz
43: MOV TEMP[5].xy, TEMP[5].xyyy
44: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
45: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
46: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zzyy
47: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
48: MOV TEMP[4].y, TEMP[5].xxxx
49: MOV TEMP[5].xy, TEMP[8].xyyy
50: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
51: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
52: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyzz
53: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
54: MOV TEMP[4].z, TEMP[5].xxxx
55: MOV TEMP[5].xy, TEMP[7].xyyy
56: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
57: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
58: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyyy
59: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
60: MOV TEMP[4].w, TEMP[5].xxxx
61: MOV TEMP[5].xy, TEMP[10].xyyy
62: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
63: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
64: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].yzzz
65: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
66: MOV TEMP[6].xy, TEMP[9].xyyy
67: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
68: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
69: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yzyy
70: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
71: MOV TEMP[5].y, TEMP[6].xxxx
72: MOV TEMP[6].xy, TEMP[12].xyyy
73: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
74: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
75: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyzz
76: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
77: MOV TEMP[5].z, TEMP[6].xxxx
78: MOV TEMP[6].xy, TEMP[11].xyyy
79: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
80: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
81: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyyy
82: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
83: MOV TEMP[5].w, TEMP[6].xxxx
84: LRP TEMP[4], TEMP[3].xxxx, TEMP[5], TEMP[4]
85: LRP TEMP[4].xy, TEMP[3].yyyy, TEMP[4].zwww, TEMP[4].xyyy
86: MUL TEMP[5].xyz, TEMP[1].yxzz, IMM[0].xxxx
87: FLR TEMP[6].xyz, TEMP[5].xyzz
88: MAD TEMP[6].xyz, IMM[0].yyyy, TEMP[6].xyzz, IMM[0].zzzz
89: FRC TEMP[5].xyz, TEMP[5].xyzz
90: MOV TEMP[7].xy, TEMP[6].xyyy
91: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D
92: MOV TEMP[8].x, TEMP[7].wwww
93: MOV TEMP[8].y, TEMP[6].zzzz
94: MOV TEMP[7].x, TEMP[7].wwww
95: ADD TEMP[9].x, TEMP[6].zzzz, IMM[0].yyyy
96: MOV TEMP[7].y, TEMP[9].xxxx
97: ADD TEMP[9].xy, TEMP[6].xyyy, IMM[0].wyyy
98: MOV TEMP[9].xy, TEMP[9].xyyy
99: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D
100: MOV TEMP[10].x, TEMP[9].wwww
101: MOV TEMP[10].y, TEMP[6].zzzz
102: MOV TEMP[9].x, TEMP[9].wwww
103: ADD TEMP[11].x, TEMP[6].zzzz, IMM[0].yyyy
104: MOV TEMP[9].y, TEMP[11].xxxx
105: ADD TEMP[11].xy, TEMP[6].xyyy, IMM[0].ywww
106: MOV TEMP[11].xy, TEMP[11].xyyy
107: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D
108: MOV TEMP[12].x, TEMP[11].wwww
109: MOV TEMP[12].y, TEMP[6].zzzz
110: MOV TEMP[11].x, TEMP[11].wwww
111: ADD TEMP[13].x, TEMP[6].zzzz, IMM[0].yyyy
112: MOV TEMP[11].y, TEMP[13].xxxx
113: ADD TEMP[13].xy, TEMP[6].xyyy, IMM[0].yyyy
114: MOV TEMP[13].xy, TEMP[13].xyyy
115: TEX TEMP[13].w, TEMP[13], SAMP[0], 2D
116: MOV TEMP[14].x, TEMP[13].wwww
117: MOV TEMP[14].y, TEMP[6].zzzz
118: MOV TEMP[13].x, TEMP[13].wwww
119: ADD TEMP[6].x, TEMP[6].zzzz, IMM[0].yyyy
120: MOV TEMP[13].y, TEMP[6].xxxx
121: MOV TEMP[6].xy, TEMP[8].xyyy
122: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
123: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
124: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[5].xyzz
125: MOV TEMP[7].xy, TEMP[7].xyyy
126: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D
127: MAD TEMP[7].xyz, TEMP[7].xyzz, IMM[1].xxxx, IMM[1].yyyy
128: ADD TEMP[8].xyz, TEMP[5].xyzz, IMM[1].zzyy
129: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[8].xyzz
130: MOV TEMP[6].y, TEMP[7].xxxx
131: MOV TEMP[7].xy, TEMP[10].xyyy
132: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D
133: MAD TEMP[7].xyz, TEMP[7].xyzz, IMM[1].xxxx, IMM[1].yyyy
134: ADD TEMP[8].xyz, TEMP[5].xyzz, IMM[1].zyzz
135: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[8].xyzz
136: MOV TEMP[6].z, TEMP[7].xxxx
137: MOV TEMP[7].xy, TEMP[9].xyyy
138: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D
139: MAD TEMP[7].xyz, TEMP[7].xyzz, IMM[1].xxxx, IMM[1].yyyy
140: ADD TEMP[8].xyz, TEMP[5].xyzz, IMM[1].zyyy
141: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[8].xyzz
142: MOV TEMP[6].w, TEMP[7].xxxx
143: MOV TEMP[7].xy, TEMP[12].xyyy
144: TEX TEMP[7].xyz, TEMP[7], SAMP[0], 2D
145: MAD TEMP[7].xyz, TEMP[7].xyzz, IMM[1].xxxx, IMM[1].yyyy
146: ADD TEMP[8].xyz, TEMP[5].xyzz, IMM[1].yzzz
147: DP3 TEMP[7].x, TEMP[7].xyzz, TEMP[8].xyzz
148: MOV TEMP[8].xy, TEMP[11].xyyy
149: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D
150: MAD TEMP[8].xyz, TEMP[8].xyzz, IMM[1].xxxx, IMM[1].yyyy
151: ADD TEMP[9].xyz, TEMP[5].xyzz, IMM[1].yzyy
152: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[9].xyzz
153: MOV TEMP[7].y, TEMP[8].xxxx
154: MOV TEMP[8].xy, TEMP[14].xyyy
155: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D
156: MAD TEMP[8].xyz, TEMP[8].xyzz, IMM[1].xxxx, IMM[1].yyyy
157: ADD TEMP[9].xyz, TEMP[5].xyzz, IMM[1].yyzz
158: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[9].xyzz
159: MOV TEMP[7].z, TEMP[8].xxxx
160: MOV TEMP[8].xy, TEMP[13].xyyy
161: TEX TEMP[8].xyz, TEMP[8], SAMP[0], 2D
162: MAD TEMP[8].xyz, TEMP[8].xyzz, IMM[1].xxxx, IMM[1].yyyy
163: ADD TEMP[9].xyz, TEMP[5].xyzz, IMM[1].yyyy
164: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[9].xyzz
165: MOV TEMP[7].w, TEMP[8].xxxx
166: LRP TEMP[6], TEMP[5].xxxx, TEMP[7], TEMP[6]
167: LRP TEMP[6].xy, TEMP[5].yyyy, TEMP[6].zwww, TEMP[6].xyyy
168: LRP TEMP[3].x, TEMP[3].zzzz, TEMP[4].yyyy, TEMP[4].xxxx
169: LRP TEMP[4].x, TEMP[5].zzzz, TEMP[6].yyyy, TEMP[6].xxxx
170: MOV TEMP[3].y, TEMP[4].xxxx
171: ADD TEMP[2].xy, TEMP[1].xyyy, TEMP[3].xyyy
172: ADD TEMP[1].xyz, TEMP[2].xyzz, IMM[1].wwww
173: FLR TEMP[3].xyz, TEMP[1].xyzz
174: MAD TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz, IMM[0].zzzz
175: FRC TEMP[1].xyz, TEMP[1].xyzz
176: MOV TEMP[4].xy, TEMP[3].xyyy
177: TEX TEMP[4].w, TEMP[4], SAMP[0], 2D
178: MOV TEMP[5].x, TEMP[4].wwww
179: MOV TEMP[5].y, TEMP[3].zzzz
180: MOV TEMP[4].x, TEMP[4].wwww
181: ADD TEMP[6].x, TEMP[3].zzzz, IMM[0].yyyy
182: MOV TEMP[4].y, TEMP[6].xxxx
183: ADD TEMP[6].xy, TEMP[3].xyyy, IMM[0].wyyy
184: MOV TEMP[6].xy, TEMP[6].xyyy
185: TEX TEMP[6].w, TEMP[6], SAMP[0], 2D
186: MOV TEMP[7].x, TEMP[6].wwww
187: MOV TEMP[7].y, TEMP[3].zzzz
188: MOV TEMP[6].x, TEMP[6].wwww
189: ADD TEMP[8].x, TEMP[3].zzzz, IMM[0].yyyy
190: MOV TEMP[6].y, TEMP[8].xxxx
191: ADD TEMP[8].xy, TEMP[3].xyyy, IMM[0].ywww
192: MOV TEMP[8].xy, TEMP[8].xyyy
193: TEX TEMP[8].w, TEMP[8], SAMP[0], 2D
194: MOV TEMP[9].x, TEMP[8].wwww
195: MOV TEMP[9].y, TEMP[3].zzzz
196: MOV TEMP[8].x, TEMP[8].wwww
197: ADD TEMP[10].x, TEMP[3].zzzz, IMM[0].yyyy
198: MOV TEMP[8].y, TEMP[10].xxxx
199: ADD TEMP[10].xy, TEMP[3].xyyy, IMM[0].yyyy
200: MOV TEMP[10].xy, TEMP[10].xyyy
201: TEX TEMP[10].w, TEMP[10], SAMP[0], 2D
202: MOV TEMP[11].x, TEMP[10].wwww
203: MOV TEMP[11].y, TEMP[3].zzzz
204: MOV TEMP[10].x, TEMP[10].wwww
205: ADD TEMP[3].x, TEMP[3].zzzz, IMM[0].yyyy
206: MOV TEMP[10].y, TEMP[3].xxxx
207: MOV TEMP[3].xy, TEMP[5].xyyy
208: TEX TEMP[3].xyz, TEMP[3], SAMP[0], 2D
209: MAD TEMP[3].xyz, TEMP[3].xyzz, IMM[1].xxxx, IMM[1].yyyy
210: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[1].xyzz
211: MOV TEMP[4].xy, TEMP[4].xyyy
212: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
213: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
214: ADD TEMP[5].xyz, TEMP[1].xyzz, IMM[1].zzyy
215: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz
216: MOV TEMP[3].y, TEMP[4].xxxx
217: MOV TEMP[4].xy, TEMP[7].xyyy
218: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
219: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
220: ADD TEMP[5].xyz, TEMP[1].xyzz, IMM[1].zyzz
221: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz
222: MOV TEMP[3].z, TEMP[4].xxxx
223: MOV TEMP[4].xy, TEMP[6].xyyy
224: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
225: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
226: ADD TEMP[5].xyz, TEMP[1].xyzz, IMM[1].zyyy
227: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz
228: MOV TEMP[3].w, TEMP[4].xxxx
229: MOV TEMP[4].xy, TEMP[9].xyyy
230: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
231: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
232: ADD TEMP[5].xyz, TEMP[1].xyzz, IMM[1].yzzz
233: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz
234: MOV TEMP[5].xy, TEMP[8].xyyy
235: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
236: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
237: ADD TEMP[6].xyz, TEMP[1].xyzz, IMM[1].yzyy
238: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
239: MOV TEMP[4].y, TEMP[5].xxxx
240: MOV TEMP[5].xy, TEMP[11].xyyy
241: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
242: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
243: ADD TEMP[6].xyz, TEMP[1].xyzz, IMM[1].yyzz
244: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
245: MOV TEMP[4].z, TEMP[5].xxxx
246: MOV TEMP[5].xy, TEMP[10].xyyy
247: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
248: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
249: ADD TEMP[6].xyz, TEMP[1].xyzz, IMM[1].yyyy
250: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
251: MOV TEMP[4].w, TEMP[5].xxxx
252: LRP TEMP[3], TEMP[1].xxxx, TEMP[4], TEMP[3]
253: LRP TEMP[3].xy, TEMP[1].yyyy, TEMP[3].zwww, TEMP[3].xyyy
254: LRP TEMP[1].x, TEMP[1].zzzz, TEMP[3].yyyy, TEMP[3].xxxx
255: MAD TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx, IMM[2].yyyy
256: MAD TEMP[3].xyz, TEMP[1].xxxx, IMM[2].zzzz, TEMP[2].xyzz
257: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[2].wwww
258: FLR TEMP[4].xyz, TEMP[3].xyzz
259: MAD TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz, IMM[0].zzzz
260: FRC TEMP[3].xyz, TEMP[3].xyzz
261: MOV TEMP[5].xy, TEMP[4].xyyy
262: TEX TEMP[5].w, TEMP[5], SAMP[0], 2D
263: MOV TEMP[6].x, TEMP[5].wwww
264: MOV TEMP[6].y, TEMP[4].zzzz
265: MOV TEMP[5].x, TEMP[5].wwww
266: ADD TEMP[7].x, TEMP[4].zzzz, IMM[0].yyyy
267: MOV TEMP[5].y, TEMP[7].xxxx
268: ADD TEMP[7].xy, TEMP[4].xyyy, IMM[0].wyyy
269: MOV TEMP[7].xy, TEMP[7].xyyy
270: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D
271: MOV TEMP[8].x, TEMP[7].wwww
272: MOV TEMP[8].y, TEMP[4].zzzz
273: MOV TEMP[7].x, TEMP[7].wwww
274: ADD TEMP[9].x, TEMP[4].zzzz, IMM[0].yyyy
275: MOV TEMP[7].y, TEMP[9].xxxx
276: ADD TEMP[9].xy, TEMP[4].xyyy, IMM[0].ywww
277: MOV TEMP[9].xy, TEMP[9].xyyy
278: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D
279: MOV TEMP[10].x, TEMP[9].wwww
280: MOV TEMP[10].y, TEMP[4].zzzz
281: MOV TEMP[9].x, TEMP[9].wwww
282: ADD TEMP[11].x, TEMP[4].zzzz, IMM[0].yyyy
283: MOV TEMP[9].y, TEMP[11].xxxx
284: ADD TEMP[11].xy, TEMP[4].xyyy, IMM[0].yyyy
285: MOV TEMP[11].xy, TEMP[11].xyyy
286: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D
287: MOV TEMP[12].x, TEMP[11].wwww
288: MOV TEMP[12].y, TEMP[4].zzzz
289: MOV TEMP[11].x, TEMP[11].wwww
290: ADD TEMP[4].x, TEMP[4].zzzz, IMM[0].yyyy
291: MOV TEMP[11].y, TEMP[4].xxxx
292: MOV TEMP[4].xy, TEMP[6].xyyy
293: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
294: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
295: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz
296: MOV TEMP[5].xy, TEMP[5].xyyy
297: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
298: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
299: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zzyy
300: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
301: MOV TEMP[4].y, TEMP[5].xxxx
302: MOV TEMP[5].xy, TEMP[8].xyyy
303: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
304: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
305: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyzz
306: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
307: MOV TEMP[4].z, TEMP[5].xxxx
308: MOV TEMP[5].xy, TEMP[7].xyyy
309: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
310: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
311: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyyy
312: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
313: MOV TEMP[4].w, TEMP[5].xxxx
314: MOV TEMP[5].xy, TEMP[10].xyyy
315: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
316: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
317: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].yzzz
318: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
319: MOV TEMP[6].xy, TEMP[9].xyyy
320: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
321: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
322: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yzyy
323: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
324: MOV TEMP[5].y, TEMP[6].xxxx
325: MOV TEMP[6].xy, TEMP[12].xyyy
326: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
327: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
328: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyzz
329: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
330: MOV TEMP[5].z, TEMP[6].xxxx
331: MOV TEMP[6].xy, TEMP[11].xyyy
332: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
333: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
334: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyyy
335: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
336: MOV TEMP[5].w, TEMP[6].xxxx
337: LRP TEMP[4], TEMP[3].xxxx, TEMP[5], TEMP[4]
338: LRP TEMP[4].xy, TEMP[3].yyyy, TEMP[4].zwww, TEMP[4].xyyy
339: LRP TEMP[3].x, TEMP[3].zzzz, TEMP[4].yyyy, TEMP[4].xxxx
340: ABS TEMP[3].x, TEMP[3].xxxx
341: MAD TEMP[1].x, TEMP[3].xxxx, IMM[3].xxxx, TEMP[1].xxxx
342: MAD TEMP[3].xyz, TEMP[1].xxxx, IMM[3].yyyy, TEMP[2].xyzz
343: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[13].xxxx
344: FLR TEMP[4].xyz, TEMP[3].xyzz
345: MAD TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz, IMM[0].zzzz
346: FRC TEMP[3].xyz, TEMP[3].xyzz
347: MOV TEMP[5].xy, TEMP[4].xyyy
348: TEX TEMP[5].w, TEMP[5], SAMP[0], 2D
349: MOV TEMP[6].x, TEMP[5].wwww
350: MOV TEMP[6].y, TEMP[4].zzzz
351: MOV TEMP[5].x, TEMP[5].wwww
352: ADD TEMP[7].x, TEMP[4].zzzz, IMM[0].yyyy
353: MOV TEMP[5].y, TEMP[7].xxxx
354: ADD TEMP[7].xy, TEMP[4].xyyy, IMM[0].wyyy
355: MOV TEMP[7].xy, TEMP[7].xyyy
356: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D
357: MOV TEMP[8].x, TEMP[7].wwww
358: MOV TEMP[8].y, TEMP[4].zzzz
359: MOV TEMP[7].x, TEMP[7].wwww
360: ADD TEMP[9].x, TEMP[4].zzzz, IMM[0].yyyy
361: MOV TEMP[7].y, TEMP[9].xxxx
362: ADD TEMP[9].xy, TEMP[4].xyyy, IMM[0].ywww
363: MOV TEMP[9].xy, TEMP[9].xyyy
364: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D
365: MOV TEMP[10].x, TEMP[9].wwww
366: MOV TEMP[10].y, TEMP[4].zzzz
367: MOV TEMP[9].x, TEMP[9].wwww
368: ADD TEMP[11].x, TEMP[4].zzzz, IMM[0].yyyy
369: MOV TEMP[9].y, TEMP[11].xxxx
370: ADD TEMP[11].xy, TEMP[4].xyyy, IMM[0].yyyy
371: MOV TEMP[11].xy, TEMP[11].xyyy
372: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D
373: MOV TEMP[12].x, TEMP[11].wwww
374: MOV TEMP[12].y, TEMP[4].zzzz
375: MOV TEMP[11].x, TEMP[11].wwww
376: ADD TEMP[4].x, TEMP[4].zzzz, IMM[0].yyyy
377: MOV TEMP[11].y, TEMP[4].xxxx
378: MOV TEMP[4].xy, TEMP[6].xyyy
379: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
380: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
381: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz
382: MOV TEMP[5].xy, TEMP[5].xyyy
383: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
384: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
385: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zzyy
386: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
387: MOV TEMP[4].y, TEMP[5].xxxx
388: MOV TEMP[5].xy, TEMP[8].xyyy
389: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
390: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
391: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyzz
392: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
393: MOV TEMP[4].z, TEMP[5].xxxx
394: MOV TEMP[5].xy, TEMP[7].xyyy
395: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
396: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
397: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyyy
398: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
399: MOV TEMP[4].w, TEMP[5].xxxx
400: MOV TEMP[5].xy, TEMP[10].xyyy
401: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
402: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
403: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].yzzz
404: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
405: MOV TEMP[6].xy, TEMP[9].xyyy
406: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
407: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
408: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yzyy
409: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
410: MOV TEMP[5].y, TEMP[6].xxxx
411: MOV TEMP[6].xy, TEMP[12].xyyy
412: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
413: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
414: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyzz
415: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
416: MOV TEMP[5].z, TEMP[6].xxxx
417: MOV TEMP[6].xy, TEMP[11].xyyy
418: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
419: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
420: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyyy
421: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
422: MOV TEMP[5].w, TEMP[6].xxxx
423: LRP TEMP[4], TEMP[3].xxxx, TEMP[5], TEMP[4]
424: LRP TEMP[4].xy, TEMP[3].yyyy, TEMP[4].zwww, TEMP[4].xyyy
425: LRP TEMP[3].x, TEMP[3].zzzz, TEMP[4].yyyy, TEMP[4].xxxx
426: ABS TEMP[3].x, TEMP[3].xxxx
427: RCP TEMP[4].x, CONST[14].xxxx
428: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[1].xxxx
429: MAD TEMP[3].xyz, TEMP[1].xxxx, IMM[3].zzzz, TEMP[2].xyzz
430: MUL TEMP[3].xyz, TEMP[3].xyzz, CONST[13].yyyy
431: FLR TEMP[4].xyz, TEMP[3].xyzz
432: MAD TEMP[4].xyz, IMM[0].yyyy, TEMP[4].xyzz, IMM[0].zzzz
433: FRC TEMP[3].xyz, TEMP[3].xyzz
434: MOV TEMP[5].xy, TEMP[4].xyyy
435: TEX TEMP[5].w, TEMP[5], SAMP[0], 2D
436: MOV TEMP[6].x, TEMP[5].wwww
437: MOV TEMP[6].y, TEMP[4].zzzz
438: MOV TEMP[5].x, TEMP[5].wwww
439: ADD TEMP[7].x, TEMP[4].zzzz, IMM[0].yyyy
440: MOV TEMP[5].y, TEMP[7].xxxx
441: ADD TEMP[7].xy, TEMP[4].xyyy, IMM[0].wyyy
442: MOV TEMP[7].xy, TEMP[7].xyyy
443: TEX TEMP[7].w, TEMP[7], SAMP[0], 2D
444: MOV TEMP[8].x, TEMP[7].wwww
445: MOV TEMP[8].y, TEMP[4].zzzz
446: MOV TEMP[7].x, TEMP[7].wwww
447: ADD TEMP[9].x, TEMP[4].zzzz, IMM[0].yyyy
448: MOV TEMP[7].y, TEMP[9].xxxx
449: ADD TEMP[9].xy, TEMP[4].xyyy, IMM[0].ywww
450: MOV TEMP[9].xy, TEMP[9].xyyy
451: TEX TEMP[9].w, TEMP[9], SAMP[0], 2D
452: MOV TEMP[10].x, TEMP[9].wwww
453: MOV TEMP[10].y, TEMP[4].zzzz
454: MOV TEMP[9].x, TEMP[9].wwww
455: ADD TEMP[11].x, TEMP[4].zzzz, IMM[0].yyyy
456: MOV TEMP[9].y, TEMP[11].xxxx
457: ADD TEMP[11].xy, TEMP[4].xyyy, IMM[0].yyyy
458: MOV TEMP[11].xy, TEMP[11].xyyy
459: TEX TEMP[11].w, TEMP[11], SAMP[0], 2D
460: MOV TEMP[12].x, TEMP[11].wwww
461: MOV TEMP[12].y, TEMP[4].zzzz
462: MOV TEMP[11].x, TEMP[11].wwww
463: ADD TEMP[4].x, TEMP[4].zzzz, IMM[0].yyyy
464: MOV TEMP[11].y, TEMP[4].xxxx
465: MOV TEMP[4].xy, TEMP[6].xyyy
466: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
467: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
468: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[3].xyzz
469: MOV TEMP[5].xy, TEMP[5].xyyy
470: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
471: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
472: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zzyy
473: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
474: MOV TEMP[4].y, TEMP[5].xxxx
475: MOV TEMP[5].xy, TEMP[8].xyyy
476: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
477: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
478: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyzz
479: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
480: MOV TEMP[4].z, TEMP[5].xxxx
481: MOV TEMP[5].xy, TEMP[7].xyyy
482: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
483: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
484: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].zyyy
485: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
486: MOV TEMP[4].w, TEMP[5].xxxx
487: MOV TEMP[5].xy, TEMP[10].xyyy
488: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
489: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
490: ADD TEMP[6].xyz, TEMP[3].xyzz, IMM[1].yzzz
491: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
492: MOV TEMP[6].xy, TEMP[9].xyyy
493: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
494: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
495: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yzyy
496: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
497: MOV TEMP[5].y, TEMP[6].xxxx
498: MOV TEMP[6].xy, TEMP[12].xyyy
499: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
500: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
501: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyzz
502: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
503: MOV TEMP[5].z, TEMP[6].xxxx
504: MOV TEMP[6].xy, TEMP[11].xyyy
505: TEX TEMP[6].xyz, TEMP[6], SAMP[0], 2D
506: MAD TEMP[6].xyz, TEMP[6].xyzz, IMM[1].xxxx, IMM[1].yyyy
507: ADD TEMP[7].xyz, TEMP[3].xyzz, IMM[1].yyyy
508: DP3 TEMP[6].x, TEMP[6].xyzz, TEMP[7].xyzz
509: MOV TEMP[5].w, TEMP[6].xxxx
510: LRP TEMP[4], TEMP[3].xxxx, TEMP[5], TEMP[4]
511: LRP TEMP[4].xy, TEMP[3].yyyy, TEMP[4].zwww, TEMP[4].xyyy
512: LRP TEMP[3].x, TEMP[3].zzzz, TEMP[4].yyyy, TEMP[4].xxxx
513: ABS TEMP[3].x, TEMP[3].xxxx
514: RCP TEMP[4].x, CONST[14].yyyy
515: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[1].xxxx
516: MAD TEMP[2].xyz, TEMP[1].xxxx, IMM[3].wwww, TEMP[2].xyzz
517: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[13].zzzz
518: FLR TEMP[3].xyz, TEMP[2].xyzz
519: MAD TEMP[3].xyz, IMM[0].yyyy, TEMP[3].xyzz, IMM[0].zzzz
520: FRC TEMP[2].xyz, TEMP[2].xyzz
521: MOV TEMP[4].xy, TEMP[3].xyyy
522: TEX TEMP[4].w, TEMP[4], SAMP[0], 2D
523: MOV TEMP[5].x, TEMP[4].wwww
524: MOV TEMP[5].y, TEMP[3].zzzz
525: MOV TEMP[4].x, TEMP[4].wwww
526: ADD TEMP[6].x, TEMP[3].zzzz, IMM[0].yyyy
527: MOV TEMP[4].y, TEMP[6].xxxx
528: ADD TEMP[6].xy, TEMP[3].xyyy, IMM[0].wyyy
529: MOV TEMP[6].xy, TEMP[6].xyyy
530: TEX TEMP[6].w, TEMP[6], SAMP[0], 2D
531: MOV TEMP[7].x, TEMP[6].wwww
532: MOV TEMP[7].y, TEMP[3].zzzz
533: MOV TEMP[6].x, TEMP[6].wwww
534: ADD TEMP[8].x, TEMP[3].zzzz, IMM[0].yyyy
535: MOV TEMP[6].y, TEMP[8].xxxx
536: ADD TEMP[8].xy, TEMP[3].xyyy, IMM[0].ywww
537: MOV TEMP[8].xy, TEMP[8].xyyy
538: TEX TEMP[8].w, TEMP[8], SAMP[0], 2D
539: MOV TEMP[9].x, TEMP[8].wwww
540: MOV TEMP[9].y, TEMP[3].zzzz
541: MOV TEMP[8].x, TEMP[8].wwww
542: ADD TEMP[10].x, TEMP[3].zzzz, IMM[0].yyyy
543: MOV TEMP[8].y, TEMP[10].xxxx
544: ADD TEMP[10].xy, TEMP[3].xyyy, IMM[0].yyyy
545: MOV TEMP[10].xy, TEMP[10].xyyy
546: TEX TEMP[10].w, TEMP[10], SAMP[0], 2D
547: MOV TEMP[11].x, TEMP[10].wwww
548: MOV TEMP[11].y, TEMP[3].zzzz
549: MOV TEMP[10].x, TEMP[10].wwww
550: ADD TEMP[3].x, TEMP[3].zzzz, IMM[0].yyyy
551: MOV TEMP[10].y, TEMP[3].xxxx
552: MOV TEMP[3].xy, TEMP[5].xyyy
553: TEX TEMP[3].xyz, TEMP[3], SAMP[0], 2D
554: MAD TEMP[3].xyz, TEMP[3].xyzz, IMM[1].xxxx, IMM[1].yyyy
555: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[2].xyzz
556: MOV TEMP[4].xy, TEMP[4].xyyy
557: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
558: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
559: ADD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].zzyy
560: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz
561: MOV TEMP[3].y, TEMP[4].xxxx
562: MOV TEMP[4].xy, TEMP[7].xyyy
563: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
564: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
565: ADD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].zyzz
566: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz
567: MOV TEMP[3].z, TEMP[4].xxxx
568: MOV TEMP[4].xy, TEMP[6].xyyy
569: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
570: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
571: ADD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].zyyy
572: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz
573: MOV TEMP[3].w, TEMP[4].xxxx
574: MOV TEMP[4].xy, TEMP[9].xyyy
575: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
576: MAD TEMP[4].xyz, TEMP[4].xyzz, IMM[1].xxxx, IMM[1].yyyy
577: ADD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].yzzz
578: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[5].xyzz
579: MOV TEMP[5].xy, TEMP[8].xyyy
580: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
581: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
582: ADD TEMP[6].xyz, TEMP[2].xyzz, IMM[1].yzyy
583: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
584: MOV TEMP[4].y, TEMP[5].xxxx
585: MOV TEMP[5].xy, TEMP[11].xyyy
586: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
587: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
588: ADD TEMP[6].xyz, TEMP[2].xyzz, IMM[1].yyzz
589: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
590: MOV TEMP[4].z, TEMP[5].xxxx
591: MOV TEMP[5].xy, TEMP[10].xyyy
592: TEX TEMP[5].xyz, TEMP[5], SAMP[0], 2D
593: MAD TEMP[5].xyz, TEMP[5].xyzz, IMM[1].xxxx, IMM[1].yyyy
594: ADD TEMP[6].xyz, TEMP[2].xyzz, IMM[1].yyyy
595: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[6].xyzz
596: MOV TEMP[4].w, TEMP[5].xxxx
597: LRP TEMP[3], TEMP[2].xxxx, TEMP[4], TEMP[3]
598: LRP TEMP[3].xy, TEMP[2].yyyy, TEMP[3].zwww, TEMP[3].xyyy
599: LRP TEMP[2].x, TEMP[2].zzzz, TEMP[3].yyyy, TEMP[3].xxxx
600: ABS TEMP[2].x, TEMP[2].xxxx
601: RCP TEMP[3].x, CONST[14].zzzz
602: MAD TEMP[1].x, TEMP[2].xxxx, TEMP[3].xxxx, TEMP[1].xxxx
603: ADD TEMP[1].x, TEMP[1].xxxx, -CONST[3].xxxx
604: ADD TEMP[2].xyz, TEMP[0].xyzz, -CONST[0].xyzz
605: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
606: RSQ TEMP[3].x, TEMP[3].xxxx
607: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
608: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[2].xxxx
609: DP3 TEMP[3].x, CONST[9].xyzz, CONST[9].xyzz
610: RSQ TEMP[3].x, TEMP[3].xxxx
611: MUL TEMP[3].xyz, CONST[9].xyzz, TEMP[3].xxxx
612: DP3 TEMP[2].x, TEMP[2].xyzz, TEMP[3].xyzz
613: MAD TEMP[2].x, TEMP[2].xxxx, IMM[4].xxxx, IMM[4].xxxx
614: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
615: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
616: ADD TEMP[3].x, TEMP[1].xxxx, IMM[4].yyyy
617: ABS TEMP[3].x, TEMP[3].xxxx
618: ADD TEMP[3].x, IMM[2].yyyy, -TEMP[3].xxxx
619: MOV_SAT TEMP[3].x, TEMP[3].xxxx
620: MOV_SAT TEMP[4].x, TEMP[2].xxxx
621: LRP TEMP[4], TEMP[4].xxxx, CONST[5], CONST[7]
622: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx
623: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
624: MOV_SAT TEMP[2].x, TEMP[2].xxxx
625: LRP TEMP[2], TEMP[2].xxxx, CONST[6], CONST[8]
626: MAD TEMP[2].xyz, TEMP[4], TEMP[3].xxxx, TEMP[2]
627: MUL TEMP[3].x, TEMP[2].xxxx, IMM[5].xxxx
628: MAD TEMP[3].x, TEMP[2].yyyy, IMM[4].wwww, TEMP[3].xxxx
629: MAD TEMP[3].x, TEMP[2].zzzz, IMM[4].zzzz, TEMP[3].xxxx
630: POW TEMP[4].x, CONST[10].xxxx, IMM[5].yyyy
631: LRP TEMP[0].xyz, TEMP[4].xxxx, TEMP[2].xyzz, TEMP[3].xxxx
632: MUL TEMP[0].xyz, TEMP[0].xyzz, CONST[10].xxxx
633: MUL TEMP[1].x, TEMP[1].xxxx, CONST[4].xxxx
634: ADD TEMP[2].xy, IN[0].xyyy, IMM[5].zzzz
635: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy
636: SQRT TEMP[2].x, TEMP[2].xxxx
637: RCP TEMP[3].x, CONST[11].xxxx
638: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx
639: ADD TEMP[2].x, IMM[2].yyyy, -TEMP[2].xxxx
640: MOV_SAT TEMP[2].x, TEMP[2].xxxx
641: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[2].xxxx
642: MOV_SAT TEMP[1].x, TEMP[1].xxxx
643: MOV TEMP[0].w, TEMP[1].xxxx
644: MOV OUT[0], TEMP[0]
645: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%60 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%61 = load <8 x i32>, <8 x i32> addrspace(2)* %60, align 32, !tbaa !0
%62 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%63 = load <4 x i32>, <4 x i32> addrspace(2)* %62, align 16, !tbaa !0
%64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%68 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%70 = fmul float %64, %27
%71 = fadd float %70, %51
%72 = fmul float %65, %27
%73 = fadd float %72, %52
%74 = fmul float %66, %27
%75 = fadd float %74, %53
%76 = fmul float %71, 0x3FB99999A0000000
%77 = fmul float %73, 0x3FB99999A0000000
%78 = fmul float %75, 0x3FB99999A0000000
%79 = call float @llvm.floor.f32(float %76)
%80 = call float @llvm.floor.f32(float %77)
%81 = call float @llvm.floor.f32(float %78)
%82 = fmul float %79, 3.906250e-03
%83 = fadd float %82, 1.953125e-03
%84 = fmul float %80, 3.906250e-03
%85 = fadd float %84, 1.953125e-03
%86 = fmul float %81, 3.906250e-03
%87 = fadd float %86, 1.953125e-03
%88 = call float @llvm.floor.f32(float %76)
%89 = fsub float %76, %88
%90 = call float @llvm.floor.f32(float %77)
%91 = fsub float %77, %90
%92 = call float @llvm.floor.f32(float %78)
%93 = fsub float %78, %92
%94 = bitcast float %83 to i32
%95 = bitcast float %85 to i32
%96 = insertelement <2 x i32> undef, i32 %94, i32 0
%97 = insertelement <2 x i32> %96, i32 %95, i32 1
%98 = bitcast <8 x i32> %61 to <32 x i8>
%99 = bitcast <4 x i32> %63 to <16 x i8>
%100 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %97, <32 x i8> %98, <16 x i8> %99, i32 2)
%101 = extractelement <4 x float> %100, i32 3
%102 = fadd float %87, 3.906250e-03
%103 = fadd float %83, 0.000000e+00
%104 = fadd float %85, 3.906250e-03
%105 = bitcast float %103 to i32
%106 = bitcast float %104 to i32
%107 = insertelement <2 x i32> undef, i32 %105, i32 0
%108 = insertelement <2 x i32> %107, i32 %106, i32 1
%109 = bitcast <8 x i32> %61 to <32 x i8>
%110 = bitcast <4 x i32> %63 to <16 x i8>
%111 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %108, <32 x i8> %109, <16 x i8> %110, i32 2)
%112 = extractelement <4 x float> %111, i32 3
%113 = fadd float %87, 3.906250e-03
%114 = fadd float %83, 3.906250e-03
%115 = fadd float %85, 0.000000e+00
%116 = bitcast float %114 to i32
%117 = bitcast float %115 to i32
%118 = insertelement <2 x i32> undef, i32 %116, i32 0
%119 = insertelement <2 x i32> %118, i32 %117, i32 1
%120 = bitcast <8 x i32> %61 to <32 x i8>
%121 = bitcast <4 x i32> %63 to <16 x i8>
%122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2)
%123 = extractelement <4 x float> %122, i32 3
%124 = fadd float %87, 3.906250e-03
%125 = fadd float %83, 3.906250e-03
%126 = fadd float %85, 3.906250e-03
%127 = bitcast float %125 to i32
%128 = bitcast float %126 to i32
%129 = insertelement <2 x i32> undef, i32 %127, i32 0
%130 = insertelement <2 x i32> %129, i32 %128, i32 1
%131 = bitcast <8 x i32> %61 to <32 x i8>
%132 = bitcast <4 x i32> %63 to <16 x i8>
%133 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %130, <32 x i8> %131, <16 x i8> %132, i32 2)
%134 = extractelement <4 x float> %133, i32 3
%135 = fadd float %87, 3.906250e-03
%136 = bitcast float %101 to i32
%137 = bitcast float %87 to i32
%138 = insertelement <2 x i32> undef, i32 %136, i32 0
%139 = insertelement <2 x i32> %138, i32 %137, i32 1
%140 = bitcast <8 x i32> %61 to <32 x i8>
%141 = bitcast <4 x i32> %63 to <16 x i8>
%142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2)
%143 = extractelement <4 x float> %142, i32 0
%144 = extractelement <4 x float> %142, i32 1
%145 = extractelement <4 x float> %142, i32 2
%146 = fmul float %143, 4.000000e+00
%147 = fadd float %146, -1.000000e+00
%148 = fmul float %144, 4.000000e+00
%149 = fadd float %148, -1.000000e+00
%150 = fmul float %145, 4.000000e+00
%151 = fadd float %150, -1.000000e+00
%152 = fmul float %147, %89
%153 = fmul float %149, %91
%154 = fadd float %153, %152
%155 = fmul float %151, %93
%156 = fadd float %154, %155
%157 = bitcast float %101 to i32
%158 = bitcast float %102 to i32
%159 = insertelement <2 x i32> undef, i32 %157, i32 0
%160 = insertelement <2 x i32> %159, i32 %158, i32 1
%161 = bitcast <8 x i32> %61 to <32 x i8>
%162 = bitcast <4 x i32> %63 to <16 x i8>
%163 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %160, <32 x i8> %161, <16 x i8> %162, i32 2)
%164 = extractelement <4 x float> %163, i32 0
%165 = extractelement <4 x float> %163, i32 1
%166 = extractelement <4 x float> %163, i32 2
%167 = fmul float %164, 4.000000e+00
%168 = fadd float %167, -1.000000e+00
%169 = fmul float %165, 4.000000e+00
%170 = fadd float %169, -1.000000e+00
%171 = fmul float %166, 4.000000e+00
%172 = fadd float %171, -1.000000e+00
%173 = fadd float %93, -1.000000e+00
%174 = fmul float %168, %89
%175 = fmul float %170, %91
%176 = fadd float %175, %174
%177 = fmul float %172, %173
%178 = fadd float %176, %177
%179 = bitcast float %112 to i32
%180 = bitcast float %87 to i32
%181 = insertelement <2 x i32> undef, i32 %179, i32 0
%182 = insertelement <2 x i32> %181, i32 %180, i32 1
%183 = bitcast <8 x i32> %61 to <32 x i8>
%184 = bitcast <4 x i32> %63 to <16 x i8>
%185 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %182, <32 x i8> %183, <16 x i8> %184, i32 2)
%186 = extractelement <4 x float> %185, i32 0
%187 = extractelement <4 x float> %185, i32 1
%188 = extractelement <4 x float> %185, i32 2
%189 = fmul float %186, 4.000000e+00
%190 = fadd float %189, -1.000000e+00
%191 = fmul float %187, 4.000000e+00
%192 = fadd float %191, -1.000000e+00
%193 = fmul float %188, 4.000000e+00
%194 = fadd float %193, -1.000000e+00
%195 = fadd float %91, -1.000000e+00
%196 = fmul float %190, %89
%197 = fmul float %192, %195
%198 = fadd float %197, %196
%199 = fmul float %194, %93
%200 = fadd float %198, %199
%201 = bitcast float %112 to i32
%202 = bitcast float %113 to i32
%203 = insertelement <2 x i32> undef, i32 %201, i32 0
%204 = insertelement <2 x i32> %203, i32 %202, i32 1
%205 = bitcast <8 x i32> %61 to <32 x i8>
%206 = bitcast <4 x i32> %63 to <16 x i8>
%207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %204, <32 x i8> %205, <16 x i8> %206, i32 2)
%208 = extractelement <4 x float> %207, i32 0
%209 = extractelement <4 x float> %207, i32 1
%210 = extractelement <4 x float> %207, i32 2
%211 = fmul float %208, 4.000000e+00
%212 = fadd float %211, -1.000000e+00
%213 = fmul float %209, 4.000000e+00
%214 = fadd float %213, -1.000000e+00
%215 = fmul float %210, 4.000000e+00
%216 = fadd float %215, -1.000000e+00
%217 = fadd float %91, -1.000000e+00
%218 = fadd float %93, -1.000000e+00
%219 = fmul float %212, %89
%220 = fmul float %214, %217
%221 = fadd float %220, %219
%222 = fmul float %216, %218
%223 = fadd float %221, %222
%224 = bitcast float %123 to i32
%225 = bitcast float %87 to i32
%226 = insertelement <2 x i32> undef, i32 %224, i32 0
%227 = insertelement <2 x i32> %226, i32 %225, i32 1
%228 = bitcast <8 x i32> %61 to <32 x i8>
%229 = bitcast <4 x i32> %63 to <16 x i8>
%230 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %227, <32 x i8> %228, <16 x i8> %229, i32 2)
%231 = extractelement <4 x float> %230, i32 0
%232 = extractelement <4 x float> %230, i32 1
%233 = extractelement <4 x float> %230, i32 2
%234 = fmul float %231, 4.000000e+00
%235 = fadd float %234, -1.000000e+00
%236 = fmul float %232, 4.000000e+00
%237 = fadd float %236, -1.000000e+00
%238 = fmul float %233, 4.000000e+00
%239 = fadd float %238, -1.000000e+00
%240 = fadd float %89, -1.000000e+00
%241 = fmul float %235, %240
%242 = fmul float %237, %91
%243 = fadd float %242, %241
%244 = fmul float %239, %93
%245 = fadd float %243, %244
%246 = bitcast float %123 to i32
%247 = bitcast float %124 to i32
%248 = insertelement <2 x i32> undef, i32 %246, i32 0
%249 = insertelement <2 x i32> %248, i32 %247, i32 1
%250 = bitcast <8 x i32> %61 to <32 x i8>
%251 = bitcast <4 x i32> %63 to <16 x i8>
%252 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %249, <32 x i8> %250, <16 x i8> %251, i32 2)
%253 = extractelement <4 x float> %252, i32 0
%254 = extractelement <4 x float> %252, i32 1
%255 = extractelement <4 x float> %252, i32 2
%256 = fmul float %253, 4.000000e+00
%257 = fadd float %256, -1.000000e+00
%258 = fmul float %254, 4.000000e+00
%259 = fadd float %258, -1.000000e+00
%260 = fmul float %255, 4.000000e+00
%261 = fadd float %260, -1.000000e+00
%262 = fadd float %89, -1.000000e+00
%263 = fadd float %93, -1.000000e+00
%264 = fmul float %257, %262
%265 = fmul float %259, %91
%266 = fadd float %265, %264
%267 = fmul float %261, %263
%268 = fadd float %266, %267
%269 = bitcast float %134 to i32
%270 = bitcast float %87 to i32
%271 = insertelement <2 x i32> undef, i32 %269, i32 0
%272 = insertelement <2 x i32> %271, i32 %270, i32 1
%273 = bitcast <8 x i32> %61 to <32 x i8>
%274 = bitcast <4 x i32> %63 to <16 x i8>
%275 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %272, <32 x i8> %273, <16 x i8> %274, i32 2)
%276 = extractelement <4 x float> %275, i32 0
%277 = extractelement <4 x float> %275, i32 1
%278 = extractelement <4 x float> %275, i32 2
%279 = fmul float %276, 4.000000e+00
%280 = fadd float %279, -1.000000e+00
%281 = fmul float %277, 4.000000e+00
%282 = fadd float %281, -1.000000e+00
%283 = fmul float %278, 4.000000e+00
%284 = fadd float %283, -1.000000e+00
%285 = fadd float %89, -1.000000e+00
%286 = fadd float %91, -1.000000e+00
%287 = fmul float %280, %285
%288 = fmul float %282, %286
%289 = fadd float %288, %287
%290 = fmul float %284, %93
%291 = fadd float %289, %290
%292 = bitcast float %134 to i32
%293 = bitcast float %135 to i32
%294 = insertelement <2 x i32> undef, i32 %292, i32 0
%295 = insertelement <2 x i32> %294, i32 %293, i32 1
%296 = bitcast <8 x i32> %61 to <32 x i8>
%297 = bitcast <4 x i32> %63 to <16 x i8>
%298 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %295, <32 x i8> %296, <16 x i8> %297, i32 2)
%299 = extractelement <4 x float> %298, i32 0
%300 = extractelement <4 x float> %298, i32 1
%301 = extractelement <4 x float> %298, i32 2
%302 = fmul float %299, 4.000000e+00
%303 = fadd float %302, -1.000000e+00
%304 = fmul float %300, 4.000000e+00
%305 = fadd float %304, -1.000000e+00
%306 = fmul float %301, 4.000000e+00
%307 = fadd float %306, -1.000000e+00
%308 = fadd float %89, -1.000000e+00
%309 = fadd float %91, -1.000000e+00
%310 = fadd float %93, -1.000000e+00
%311 = fmul float %303, %308
%312 = fmul float %305, %309
%313 = fadd float %312, %311
%314 = fmul float %307, %310
%315 = fadd float %313, %314
%316 = call float @llvm.AMDGPU.lrp(float %89, float %245, float %156)
%317 = call float @llvm.AMDGPU.lrp(float %89, float %268, float %178)
%318 = call float @llvm.AMDGPU.lrp(float %89, float %291, float %200)
%319 = call float @llvm.AMDGPU.lrp(float %89, float %315, float %223)
%320 = call float @llvm.AMDGPU.lrp(float %91, float %318, float %316)
%321 = call float @llvm.AMDGPU.lrp(float %91, float %319, float %317)
%322 = fmul float %73, 0x3FB99999A0000000
%323 = fmul float %71, 0x3FB99999A0000000
%324 = fmul float %75, 0x3FB99999A0000000
%325 = call float @llvm.floor.f32(float %322)
%326 = call float @llvm.floor.f32(float %323)
%327 = call float @llvm.floor.f32(float %324)
%328 = fmul float %325, 3.906250e-03
%329 = fadd float %328, 1.953125e-03
%330 = fmul float %326, 3.906250e-03
%331 = fadd float %330, 1.953125e-03
%332 = fmul float %327, 3.906250e-03
%333 = fadd float %332, 1.953125e-03
%334 = call float @llvm.floor.f32(float %322)
%335 = fsub float %322, %334
%336 = call float @llvm.floor.f32(float %323)
%337 = fsub float %323, %336
%338 = call float @llvm.floor.f32(float %324)
%339 = fsub float %324, %338
%340 = bitcast float %329 to i32
%341 = bitcast float %331 to i32
%342 = insertelement <2 x i32> undef, i32 %340, i32 0
%343 = insertelement <2 x i32> %342, i32 %341, i32 1
%344 = bitcast <8 x i32> %61 to <32 x i8>
%345 = bitcast <4 x i32> %63 to <16 x i8>
%346 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %343, <32 x i8> %344, <16 x i8> %345, i32 2)
%347 = extractelement <4 x float> %346, i32 3
%348 = fadd float %333, 3.906250e-03
%349 = fadd float %329, 0.000000e+00
%350 = fadd float %331, 3.906250e-03
%351 = bitcast float %349 to i32
%352 = bitcast float %350 to i32
%353 = insertelement <2 x i32> undef, i32 %351, i32 0
%354 = insertelement <2 x i32> %353, i32 %352, i32 1
%355 = bitcast <8 x i32> %61 to <32 x i8>
%356 = bitcast <4 x i32> %63 to <16 x i8>
%357 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %354, <32 x i8> %355, <16 x i8> %356, i32 2)
%358 = extractelement <4 x float> %357, i32 3
%359 = fadd float %333, 3.906250e-03
%360 = fadd float %329, 3.906250e-03
%361 = fadd float %331, 0.000000e+00
%362 = bitcast float %360 to i32
%363 = bitcast float %361 to i32
%364 = insertelement <2 x i32> undef, i32 %362, i32 0
%365 = insertelement <2 x i32> %364, i32 %363, i32 1
%366 = bitcast <8 x i32> %61 to <32 x i8>
%367 = bitcast <4 x i32> %63 to <16 x i8>
%368 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %365, <32 x i8> %366, <16 x i8> %367, i32 2)
%369 = extractelement <4 x float> %368, i32 3
%370 = fadd float %333, 3.906250e-03
%371 = fadd float %329, 3.906250e-03
%372 = fadd float %331, 3.906250e-03
%373 = bitcast float %371 to i32
%374 = bitcast float %372 to i32
%375 = insertelement <2 x i32> undef, i32 %373, i32 0
%376 = insertelement <2 x i32> %375, i32 %374, i32 1
%377 = bitcast <8 x i32> %61 to <32 x i8>
%378 = bitcast <4 x i32> %63 to <16 x i8>
%379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %376, <32 x i8> %377, <16 x i8> %378, i32 2)
%380 = extractelement <4 x float> %379, i32 3
%381 = fadd float %333, 3.906250e-03
%382 = bitcast float %347 to i32
%383 = bitcast float %333 to i32
%384 = insertelement <2 x i32> undef, i32 %382, i32 0
%385 = insertelement <2 x i32> %384, i32 %383, i32 1
%386 = bitcast <8 x i32> %61 to <32 x i8>
%387 = bitcast <4 x i32> %63 to <16 x i8>
%388 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %385, <32 x i8> %386, <16 x i8> %387, i32 2)
%389 = extractelement <4 x float> %388, i32 0
%390 = extractelement <4 x float> %388, i32 1
%391 = extractelement <4 x float> %388, i32 2
%392 = fmul float %389, 4.000000e+00
%393 = fadd float %392, -1.000000e+00
%394 = fmul float %390, 4.000000e+00
%395 = fadd float %394, -1.000000e+00
%396 = fmul float %391, 4.000000e+00
%397 = fadd float %396, -1.000000e+00
%398 = fmul float %393, %335
%399 = fmul float %395, %337
%400 = fadd float %399, %398
%401 = fmul float %397, %339
%402 = fadd float %400, %401
%403 = bitcast float %347 to i32
%404 = bitcast float %348 to i32
%405 = insertelement <2 x i32> undef, i32 %403, i32 0
%406 = insertelement <2 x i32> %405, i32 %404, i32 1
%407 = bitcast <8 x i32> %61 to <32 x i8>
%408 = bitcast <4 x i32> %63 to <16 x i8>
%409 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %406, <32 x i8> %407, <16 x i8> %408, i32 2)
%410 = extractelement <4 x float> %409, i32 0
%411 = extractelement <4 x float> %409, i32 1
%412 = extractelement <4 x float> %409, i32 2
%413 = fmul float %410, 4.000000e+00
%414 = fadd float %413, -1.000000e+00
%415 = fmul float %411, 4.000000e+00
%416 = fadd float %415, -1.000000e+00
%417 = fmul float %412, 4.000000e+00
%418 = fadd float %417, -1.000000e+00
%419 = fadd float %339, -1.000000e+00
%420 = fmul float %414, %335
%421 = fmul float %416, %337
%422 = fadd float %421, %420
%423 = fmul float %418, %419
%424 = fadd float %422, %423
%425 = bitcast float %358 to i32
%426 = bitcast float %333 to i32
%427 = insertelement <2 x i32> undef, i32 %425, i32 0
%428 = insertelement <2 x i32> %427, i32 %426, i32 1
%429 = bitcast <8 x i32> %61 to <32 x i8>
%430 = bitcast <4 x i32> %63 to <16 x i8>
%431 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %428, <32 x i8> %429, <16 x i8> %430, i32 2)
%432 = extractelement <4 x float> %431, i32 0
%433 = extractelement <4 x float> %431, i32 1
%434 = extractelement <4 x float> %431, i32 2
%435 = fmul float %432, 4.000000e+00
%436 = fadd float %435, -1.000000e+00
%437 = fmul float %433, 4.000000e+00
%438 = fadd float %437, -1.000000e+00
%439 = fmul float %434, 4.000000e+00
%440 = fadd float %439, -1.000000e+00
%441 = fadd float %337, -1.000000e+00
%442 = fmul float %436, %335
%443 = fmul float %438, %441
%444 = fadd float %443, %442
%445 = fmul float %440, %339
%446 = fadd float %444, %445
%447 = bitcast float %358 to i32
%448 = bitcast float %359 to i32
%449 = insertelement <2 x i32> undef, i32 %447, i32 0
%450 = insertelement <2 x i32> %449, i32 %448, i32 1
%451 = bitcast <8 x i32> %61 to <32 x i8>
%452 = bitcast <4 x i32> %63 to <16 x i8>
%453 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %450, <32 x i8> %451, <16 x i8> %452, i32 2)
%454 = extractelement <4 x float> %453, i32 0
%455 = extractelement <4 x float> %453, i32 1
%456 = extractelement <4 x float> %453, i32 2
%457 = fmul float %454, 4.000000e+00
%458 = fadd float %457, -1.000000e+00
%459 = fmul float %455, 4.000000e+00
%460 = fadd float %459, -1.000000e+00
%461 = fmul float %456, 4.000000e+00
%462 = fadd float %461, -1.000000e+00
%463 = fadd float %337, -1.000000e+00
%464 = fadd float %339, -1.000000e+00
%465 = fmul float %458, %335
%466 = fmul float %460, %463
%467 = fadd float %466, %465
%468 = fmul float %462, %464
%469 = fadd float %467, %468
%470 = bitcast float %369 to i32
%471 = bitcast float %333 to i32
%472 = insertelement <2 x i32> undef, i32 %470, i32 0
%473 = insertelement <2 x i32> %472, i32 %471, i32 1
%474 = bitcast <8 x i32> %61 to <32 x i8>
%475 = bitcast <4 x i32> %63 to <16 x i8>
%476 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %473, <32 x i8> %474, <16 x i8> %475, i32 2)
%477 = extractelement <4 x float> %476, i32 0
%478 = extractelement <4 x float> %476, i32 1
%479 = extractelement <4 x float> %476, i32 2
%480 = fmul float %477, 4.000000e+00
%481 = fadd float %480, -1.000000e+00
%482 = fmul float %478, 4.000000e+00
%483 = fadd float %482, -1.000000e+00
%484 = fmul float %479, 4.000000e+00
%485 = fadd float %484, -1.000000e+00
%486 = fadd float %335, -1.000000e+00
%487 = fmul float %481, %486
%488 = fmul float %483, %337
%489 = fadd float %488, %487
%490 = fmul float %485, %339
%491 = fadd float %489, %490
%492 = bitcast float %369 to i32
%493 = bitcast float %370 to i32
%494 = insertelement <2 x i32> undef, i32 %492, i32 0
%495 = insertelement <2 x i32> %494, i32 %493, i32 1
%496 = bitcast <8 x i32> %61 to <32 x i8>
%497 = bitcast <4 x i32> %63 to <16 x i8>
%498 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %495, <32 x i8> %496, <16 x i8> %497, i32 2)
%499 = extractelement <4 x float> %498, i32 0
%500 = extractelement <4 x float> %498, i32 1
%501 = extractelement <4 x float> %498, i32 2
%502 = fmul float %499, 4.000000e+00
%503 = fadd float %502, -1.000000e+00
%504 = fmul float %500, 4.000000e+00
%505 = fadd float %504, -1.000000e+00
%506 = fmul float %501, 4.000000e+00
%507 = fadd float %506, -1.000000e+00
%508 = fadd float %335, -1.000000e+00
%509 = fadd float %339, -1.000000e+00
%510 = fmul float %503, %508
%511 = fmul float %505, %337
%512 = fadd float %511, %510
%513 = fmul float %507, %509
%514 = fadd float %512, %513
%515 = bitcast float %380 to i32
%516 = bitcast float %333 to i32
%517 = insertelement <2 x i32> undef, i32 %515, i32 0
%518 = insertelement <2 x i32> %517, i32 %516, i32 1
%519 = bitcast <8 x i32> %61 to <32 x i8>
%520 = bitcast <4 x i32> %63 to <16 x i8>
%521 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %518, <32 x i8> %519, <16 x i8> %520, i32 2)
%522 = extractelement <4 x float> %521, i32 0
%523 = extractelement <4 x float> %521, i32 1
%524 = extractelement <4 x float> %521, i32 2
%525 = fmul float %522, 4.000000e+00
%526 = fadd float %525, -1.000000e+00
%527 = fmul float %523, 4.000000e+00
%528 = fadd float %527, -1.000000e+00
%529 = fmul float %524, 4.000000e+00
%530 = fadd float %529, -1.000000e+00
%531 = fadd float %335, -1.000000e+00
%532 = fadd float %337, -1.000000e+00
%533 = fmul float %526, %531
%534 = fmul float %528, %532
%535 = fadd float %534, %533
%536 = fmul float %530, %339
%537 = fadd float %535, %536
%538 = bitcast float %380 to i32
%539 = bitcast float %381 to i32
%540 = insertelement <2 x i32> undef, i32 %538, i32 0
%541 = insertelement <2 x i32> %540, i32 %539, i32 1
%542 = bitcast <8 x i32> %61 to <32 x i8>
%543 = bitcast <4 x i32> %63 to <16 x i8>
%544 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %541, <32 x i8> %542, <16 x i8> %543, i32 2)
%545 = extractelement <4 x float> %544, i32 0
%546 = extractelement <4 x float> %544, i32 1
%547 = extractelement <4 x float> %544, i32 2
%548 = fmul float %545, 4.000000e+00
%549 = fadd float %548, -1.000000e+00
%550 = fmul float %546, 4.000000e+00
%551 = fadd float %550, -1.000000e+00
%552 = fmul float %547, 4.000000e+00
%553 = fadd float %552, -1.000000e+00
%554 = fadd float %335, -1.000000e+00
%555 = fadd float %337, -1.000000e+00
%556 = fadd float %339, -1.000000e+00
%557 = fmul float %549, %554
%558 = fmul float %551, %555
%559 = fadd float %558, %557
%560 = fmul float %553, %556
%561 = fadd float %559, %560
%562 = call float @llvm.AMDGPU.lrp(float %335, float %491, float %402)
%563 = call float @llvm.AMDGPU.lrp(float %335, float %514, float %424)
%564 = call float @llvm.AMDGPU.lrp(float %335, float %537, float %446)
%565 = call float @llvm.AMDGPU.lrp(float %335, float %561, float %469)
%566 = call float @llvm.AMDGPU.lrp(float %337, float %564, float %562)
%567 = call float @llvm.AMDGPU.lrp(float %337, float %565, float %563)
%568 = call float @llvm.AMDGPU.lrp(float %93, float %321, float %320)
%569 = call float @llvm.AMDGPU.lrp(float %339, float %567, float %566)
%570 = fadd float %71, %568
%571 = fadd float %73, %569
%572 = fadd float %570, 0x3FC99999A0000000
%573 = fadd float %571, 0x3FC99999A0000000
%574 = fadd float %75, 0x3FC99999A0000000
%575 = call float @llvm.floor.f32(float %572)
%576 = call float @llvm.floor.f32(float %573)
%577 = call float @llvm.floor.f32(float %574)
%578 = fmul float %575, 3.906250e-03
%579 = fadd float %578, 1.953125e-03
%580 = fmul float %576, 3.906250e-03
%581 = fadd float %580, 1.953125e-03
%582 = fmul float %577, 3.906250e-03
%583 = fadd float %582, 1.953125e-03
%584 = call float @llvm.floor.f32(float %572)
%585 = fsub float %572, %584
%586 = call float @llvm.floor.f32(float %573)
%587 = fsub float %573, %586
%588 = call float @llvm.floor.f32(float %574)
%589 = fsub float %574, %588
%590 = bitcast float %579 to i32
%591 = bitcast float %581 to i32
%592 = insertelement <2 x i32> undef, i32 %590, i32 0
%593 = insertelement <2 x i32> %592, i32 %591, i32 1
%594 = bitcast <8 x i32> %61 to <32 x i8>
%595 = bitcast <4 x i32> %63 to <16 x i8>
%596 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %593, <32 x i8> %594, <16 x i8> %595, i32 2)
%597 = extractelement <4 x float> %596, i32 3
%598 = fadd float %583, 3.906250e-03
%599 = fadd float %579, 0.000000e+00
%600 = fadd float %581, 3.906250e-03
%601 = bitcast float %599 to i32
%602 = bitcast float %600 to i32
%603 = insertelement <2 x i32> undef, i32 %601, i32 0
%604 = insertelement <2 x i32> %603, i32 %602, i32 1
%605 = bitcast <8 x i32> %61 to <32 x i8>
%606 = bitcast <4 x i32> %63 to <16 x i8>
%607 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %604, <32 x i8> %605, <16 x i8> %606, i32 2)
%608 = extractelement <4 x float> %607, i32 3
%609 = fadd float %583, 3.906250e-03
%610 = fadd float %579, 3.906250e-03
%611 = fadd float %581, 0.000000e+00
%612 = bitcast float %610 to i32
%613 = bitcast float %611 to i32
%614 = insertelement <2 x i32> undef, i32 %612, i32 0
%615 = insertelement <2 x i32> %614, i32 %613, i32 1
%616 = bitcast <8 x i32> %61 to <32 x i8>
%617 = bitcast <4 x i32> %63 to <16 x i8>
%618 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %615, <32 x i8> %616, <16 x i8> %617, i32 2)
%619 = extractelement <4 x float> %618, i32 3
%620 = fadd float %583, 3.906250e-03
%621 = fadd float %579, 3.906250e-03
%622 = fadd float %581, 3.906250e-03
%623 = bitcast float %621 to i32
%624 = bitcast float %622 to i32
%625 = insertelement <2 x i32> undef, i32 %623, i32 0
%626 = insertelement <2 x i32> %625, i32 %624, i32 1
%627 = bitcast <8 x i32> %61 to <32 x i8>
%628 = bitcast <4 x i32> %63 to <16 x i8>
%629 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %626, <32 x i8> %627, <16 x i8> %628, i32 2)
%630 = extractelement <4 x float> %629, i32 3
%631 = fadd float %583, 3.906250e-03
%632 = bitcast float %597 to i32
%633 = bitcast float %583 to i32
%634 = insertelement <2 x i32> undef, i32 %632, i32 0
%635 = insertelement <2 x i32> %634, i32 %633, i32 1
%636 = bitcast <8 x i32> %61 to <32 x i8>
%637 = bitcast <4 x i32> %63 to <16 x i8>
%638 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %635, <32 x i8> %636, <16 x i8> %637, i32 2)
%639 = extractelement <4 x float> %638, i32 0
%640 = extractelement <4 x float> %638, i32 1
%641 = extractelement <4 x float> %638, i32 2
%642 = fmul float %639, 4.000000e+00
%643 = fadd float %642, -1.000000e+00
%644 = fmul float %640, 4.000000e+00
%645 = fadd float %644, -1.000000e+00
%646 = fmul float %641, 4.000000e+00
%647 = fadd float %646, -1.000000e+00
%648 = fmul float %643, %585
%649 = fmul float %645, %587
%650 = fadd float %649, %648
%651 = fmul float %647, %589
%652 = fadd float %650, %651
%653 = bitcast float %597 to i32
%654 = bitcast float %598 to i32
%655 = insertelement <2 x i32> undef, i32 %653, i32 0
%656 = insertelement <2 x i32> %655, i32 %654, i32 1
%657 = bitcast <8 x i32> %61 to <32 x i8>
%658 = bitcast <4 x i32> %63 to <16 x i8>
%659 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %656, <32 x i8> %657, <16 x i8> %658, i32 2)
%660 = extractelement <4 x float> %659, i32 0
%661 = extractelement <4 x float> %659, i32 1
%662 = extractelement <4 x float> %659, i32 2
%663 = fmul float %660, 4.000000e+00
%664 = fadd float %663, -1.000000e+00
%665 = fmul float %661, 4.000000e+00
%666 = fadd float %665, -1.000000e+00
%667 = fmul float %662, 4.000000e+00
%668 = fadd float %667, -1.000000e+00
%669 = fadd float %589, -1.000000e+00
%670 = fmul float %664, %585
%671 = fmul float %666, %587
%672 = fadd float %671, %670
%673 = fmul float %668, %669
%674 = fadd float %672, %673
%675 = bitcast float %608 to i32
%676 = bitcast float %583 to i32
%677 = insertelement <2 x i32> undef, i32 %675, i32 0
%678 = insertelement <2 x i32> %677, i32 %676, i32 1
%679 = bitcast <8 x i32> %61 to <32 x i8>
%680 = bitcast <4 x i32> %63 to <16 x i8>
%681 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %678, <32 x i8> %679, <16 x i8> %680, i32 2)
%682 = extractelement <4 x float> %681, i32 0
%683 = extractelement <4 x float> %681, i32 1
%684 = extractelement <4 x float> %681, i32 2
%685 = fmul float %682, 4.000000e+00
%686 = fadd float %685, -1.000000e+00
%687 = fmul float %683, 4.000000e+00
%688 = fadd float %687, -1.000000e+00
%689 = fmul float %684, 4.000000e+00
%690 = fadd float %689, -1.000000e+00
%691 = fadd float %587, -1.000000e+00
%692 = fmul float %686, %585
%693 = fmul float %688, %691
%694 = fadd float %693, %692
%695 = fmul float %690, %589
%696 = fadd float %694, %695
%697 = bitcast float %608 to i32
%698 = bitcast float %609 to i32
%699 = insertelement <2 x i32> undef, i32 %697, i32 0
%700 = insertelement <2 x i32> %699, i32 %698, i32 1
%701 = bitcast <8 x i32> %61 to <32 x i8>
%702 = bitcast <4 x i32> %63 to <16 x i8>
%703 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %700, <32 x i8> %701, <16 x i8> %702, i32 2)
%704 = extractelement <4 x float> %703, i32 0
%705 = extractelement <4 x float> %703, i32 1
%706 = extractelement <4 x float> %703, i32 2
%707 = fmul float %704, 4.000000e+00
%708 = fadd float %707, -1.000000e+00
%709 = fmul float %705, 4.000000e+00
%710 = fadd float %709, -1.000000e+00
%711 = fmul float %706, 4.000000e+00
%712 = fadd float %711, -1.000000e+00
%713 = fadd float %587, -1.000000e+00
%714 = fadd float %589, -1.000000e+00
%715 = fmul float %708, %585
%716 = fmul float %710, %713
%717 = fadd float %716, %715
%718 = fmul float %712, %714
%719 = fadd float %717, %718
%720 = bitcast float %619 to i32
%721 = bitcast float %583 to i32
%722 = insertelement <2 x i32> undef, i32 %720, i32 0
%723 = insertelement <2 x i32> %722, i32 %721, i32 1
%724 = bitcast <8 x i32> %61 to <32 x i8>
%725 = bitcast <4 x i32> %63 to <16 x i8>
%726 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %723, <32 x i8> %724, <16 x i8> %725, i32 2)
%727 = extractelement <4 x float> %726, i32 0
%728 = extractelement <4 x float> %726, i32 1
%729 = extractelement <4 x float> %726, i32 2
%730 = fmul float %727, 4.000000e+00
%731 = fadd float %730, -1.000000e+00
%732 = fmul float %728, 4.000000e+00
%733 = fadd float %732, -1.000000e+00
%734 = fmul float %729, 4.000000e+00
%735 = fadd float %734, -1.000000e+00
%736 = fadd float %585, -1.000000e+00
%737 = fmul float %731, %736
%738 = fmul float %733, %587
%739 = fadd float %738, %737
%740 = fmul float %735, %589
%741 = fadd float %739, %740
%742 = bitcast float %619 to i32
%743 = bitcast float %620 to i32
%744 = insertelement <2 x i32> undef, i32 %742, i32 0
%745 = insertelement <2 x i32> %744, i32 %743, i32 1
%746 = bitcast <8 x i32> %61 to <32 x i8>
%747 = bitcast <4 x i32> %63 to <16 x i8>
%748 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %745, <32 x i8> %746, <16 x i8> %747, i32 2)
%749 = extractelement <4 x float> %748, i32 0
%750 = extractelement <4 x float> %748, i32 1
%751 = extractelement <4 x float> %748, i32 2
%752 = fmul float %749, 4.000000e+00
%753 = fadd float %752, -1.000000e+00
%754 = fmul float %750, 4.000000e+00
%755 = fadd float %754, -1.000000e+00
%756 = fmul float %751, 4.000000e+00
%757 = fadd float %756, -1.000000e+00
%758 = fadd float %585, -1.000000e+00
%759 = fadd float %589, -1.000000e+00
%760 = fmul float %753, %758
%761 = fmul float %755, %587
%762 = fadd float %761, %760
%763 = fmul float %757, %759
%764 = fadd float %762, %763
%765 = bitcast float %630 to i32
%766 = bitcast float %583 to i32
%767 = insertelement <2 x i32> undef, i32 %765, i32 0
%768 = insertelement <2 x i32> %767, i32 %766, i32 1
%769 = bitcast <8 x i32> %61 to <32 x i8>
%770 = bitcast <4 x i32> %63 to <16 x i8>
%771 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %768, <32 x i8> %769, <16 x i8> %770, i32 2)
%772 = extractelement <4 x float> %771, i32 0
%773 = extractelement <4 x float> %771, i32 1
%774 = extractelement <4 x float> %771, i32 2
%775 = fmul float %772, 4.000000e+00
%776 = fadd float %775, -1.000000e+00
%777 = fmul float %773, 4.000000e+00
%778 = fadd float %777, -1.000000e+00
%779 = fmul float %774, 4.000000e+00
%780 = fadd float %779, -1.000000e+00
%781 = fadd float %585, -1.000000e+00
%782 = fadd float %587, -1.000000e+00
%783 = fmul float %776, %781
%784 = fmul float %778, %782
%785 = fadd float %784, %783
%786 = fmul float %780, %589
%787 = fadd float %785, %786
%788 = bitcast float %630 to i32
%789 = bitcast float %631 to i32
%790 = insertelement <2 x i32> undef, i32 %788, i32 0
%791 = insertelement <2 x i32> %790, i32 %789, i32 1
%792 = bitcast <8 x i32> %61 to <32 x i8>
%793 = bitcast <4 x i32> %63 to <16 x i8>
%794 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %791, <32 x i8> %792, <16 x i8> %793, i32 2)
%795 = extractelement <4 x float> %794, i32 0
%796 = extractelement <4 x float> %794, i32 1
%797 = extractelement <4 x float> %794, i32 2
%798 = fmul float %795, 4.000000e+00
%799 = fadd float %798, -1.000000e+00
%800 = fmul float %796, 4.000000e+00
%801 = fadd float %800, -1.000000e+00
%802 = fmul float %797, 4.000000e+00
%803 = fadd float %802, -1.000000e+00
%804 = fadd float %585, -1.000000e+00
%805 = fadd float %587, -1.000000e+00
%806 = fadd float %589, -1.000000e+00
%807 = fmul float %799, %804
%808 = fmul float %801, %805
%809 = fadd float %808, %807
%810 = fmul float %803, %806
%811 = fadd float %809, %810
%812 = call float @llvm.AMDGPU.lrp(float %585, float %741, float %652)
%813 = call float @llvm.AMDGPU.lrp(float %585, float %764, float %674)
%814 = call float @llvm.AMDGPU.lrp(float %585, float %787, float %696)
%815 = call float @llvm.AMDGPU.lrp(float %585, float %811, float %719)
%816 = call float @llvm.AMDGPU.lrp(float %587, float %814, float %812)
%817 = call float @llvm.AMDGPU.lrp(float %587, float %815, float %813)
%818 = call float @llvm.AMDGPU.lrp(float %589, float %817, float %816)
%819 = fmul float %818, 0x3FF19999A0000000
%820 = fadd float %819, 1.000000e+00
%821 = fmul float %820, 0x3FC851EB80000000
%822 = fadd float %821, %570
%823 = fmul float %820, 0x3FC851EB80000000
%824 = fadd float %823, %571
%825 = fmul float %820, 0x3FC851EB80000000
%826 = fadd float %825, %75
%827 = fmul float %822, 0x400A666660000000
%828 = fmul float %824, 0x400A666660000000
%829 = fmul float %826, 0x400A666660000000
%830 = call float @llvm.floor.f32(float %827)
%831 = call float @llvm.floor.f32(float %828)
%832 = call float @llvm.floor.f32(float %829)
%833 = fmul float %830, 3.906250e-03
%834 = fadd float %833, 1.953125e-03
%835 = fmul float %831, 3.906250e-03
%836 = fadd float %835, 1.953125e-03
%837 = fmul float %832, 3.906250e-03
%838 = fadd float %837, 1.953125e-03
%839 = call float @llvm.floor.f32(float %827)
%840 = fsub float %827, %839
%841 = call float @llvm.floor.f32(float %828)
%842 = fsub float %828, %841
%843 = call float @llvm.floor.f32(float %829)
%844 = fsub float %829, %843
%845 = bitcast float %834 to i32
%846 = bitcast float %836 to i32
%847 = insertelement <2 x i32> undef, i32 %845, i32 0
%848 = insertelement <2 x i32> %847, i32 %846, i32 1
%849 = bitcast <8 x i32> %61 to <32 x i8>
%850 = bitcast <4 x i32> %63 to <16 x i8>
%851 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %848, <32 x i8> %849, <16 x i8> %850, i32 2)
%852 = extractelement <4 x float> %851, i32 3
%853 = fadd float %838, 3.906250e-03
%854 = fadd float %834, 0.000000e+00
%855 = fadd float %836, 3.906250e-03
%856 = bitcast float %854 to i32
%857 = bitcast float %855 to i32
%858 = insertelement <2 x i32> undef, i32 %856, i32 0
%859 = insertelement <2 x i32> %858, i32 %857, i32 1
%860 = bitcast <8 x i32> %61 to <32 x i8>
%861 = bitcast <4 x i32> %63 to <16 x i8>
%862 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %859, <32 x i8> %860, <16 x i8> %861, i32 2)
%863 = extractelement <4 x float> %862, i32 3
%864 = fadd float %838, 3.906250e-03
%865 = fadd float %834, 3.906250e-03
%866 = fadd float %836, 0.000000e+00
%867 = bitcast float %865 to i32
%868 = bitcast float %866 to i32
%869 = insertelement <2 x i32> undef, i32 %867, i32 0
%870 = insertelement <2 x i32> %869, i32 %868, i32 1
%871 = bitcast <8 x i32> %61 to <32 x i8>
%872 = bitcast <4 x i32> %63 to <16 x i8>
%873 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %870, <32 x i8> %871, <16 x i8> %872, i32 2)
%874 = extractelement <4 x float> %873, i32 3
%875 = fadd float %838, 3.906250e-03
%876 = fadd float %834, 3.906250e-03
%877 = fadd float %836, 3.906250e-03
%878 = bitcast float %876 to i32
%879 = bitcast float %877 to i32
%880 = insertelement <2 x i32> undef, i32 %878, i32 0
%881 = insertelement <2 x i32> %880, i32 %879, i32 1
%882 = bitcast <8 x i32> %61 to <32 x i8>
%883 = bitcast <4 x i32> %63 to <16 x i8>
%884 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %881, <32 x i8> %882, <16 x i8> %883, i32 2)
%885 = extractelement <4 x float> %884, i32 3
%886 = fadd float %838, 3.906250e-03
%887 = bitcast float %852 to i32
%888 = bitcast float %838 to i32
%889 = insertelement <2 x i32> undef, i32 %887, i32 0
%890 = insertelement <2 x i32> %889, i32 %888, i32 1
%891 = bitcast <8 x i32> %61 to <32 x i8>
%892 = bitcast <4 x i32> %63 to <16 x i8>
%893 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %890, <32 x i8> %891, <16 x i8> %892, i32 2)
%894 = extractelement <4 x float> %893, i32 0
%895 = extractelement <4 x float> %893, i32 1
%896 = extractelement <4 x float> %893, i32 2
%897 = fmul float %894, 4.000000e+00
%898 = fadd float %897, -1.000000e+00
%899 = fmul float %895, 4.000000e+00
%900 = fadd float %899, -1.000000e+00
%901 = fmul float %896, 4.000000e+00
%902 = fadd float %901, -1.000000e+00
%903 = fmul float %898, %840
%904 = fmul float %900, %842
%905 = fadd float %904, %903
%906 = fmul float %902, %844
%907 = fadd float %905, %906
%908 = bitcast float %852 to i32
%909 = bitcast float %853 to i32
%910 = insertelement <2 x i32> undef, i32 %908, i32 0
%911 = insertelement <2 x i32> %910, i32 %909, i32 1
%912 = bitcast <8 x i32> %61 to <32 x i8>
%913 = bitcast <4 x i32> %63 to <16 x i8>
%914 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %911, <32 x i8> %912, <16 x i8> %913, i32 2)
%915 = extractelement <4 x float> %914, i32 0
%916 = extractelement <4 x float> %914, i32 1
%917 = extractelement <4 x float> %914, i32 2
%918 = fmul float %915, 4.000000e+00
%919 = fadd float %918, -1.000000e+00
%920 = fmul float %916, 4.000000e+00
%921 = fadd float %920, -1.000000e+00
%922 = fmul float %917, 4.000000e+00
%923 = fadd float %922, -1.000000e+00
%924 = fadd float %844, -1.000000e+00
%925 = fmul float %919, %840
%926 = fmul float %921, %842
%927 = fadd float %926, %925
%928 = fmul float %923, %924
%929 = fadd float %927, %928
%930 = bitcast float %863 to i32
%931 = bitcast float %838 to i32
%932 = insertelement <2 x i32> undef, i32 %930, i32 0
%933 = insertelement <2 x i32> %932, i32 %931, i32 1
%934 = bitcast <8 x i32> %61 to <32 x i8>
%935 = bitcast <4 x i32> %63 to <16 x i8>
%936 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %933, <32 x i8> %934, <16 x i8> %935, i32 2)
%937 = extractelement <4 x float> %936, i32 0
%938 = extractelement <4 x float> %936, i32 1
%939 = extractelement <4 x float> %936, i32 2
%940 = fmul float %937, 4.000000e+00
%941 = fadd float %940, -1.000000e+00
%942 = fmul float %938, 4.000000e+00
%943 = fadd float %942, -1.000000e+00
%944 = fmul float %939, 4.000000e+00
%945 = fadd float %944, -1.000000e+00
%946 = fadd float %842, -1.000000e+00
%947 = fmul float %941, %840
%948 = fmul float %943, %946
%949 = fadd float %948, %947
%950 = fmul float %945, %844
%951 = fadd float %949, %950
%952 = bitcast float %863 to i32
%953 = bitcast float %864 to i32
%954 = insertelement <2 x i32> undef, i32 %952, i32 0
%955 = insertelement <2 x i32> %954, i32 %953, i32 1
%956 = bitcast <8 x i32> %61 to <32 x i8>
%957 = bitcast <4 x i32> %63 to <16 x i8>
%958 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %955, <32 x i8> %956, <16 x i8> %957, i32 2)
%959 = extractelement <4 x float> %958, i32 0
%960 = extractelement <4 x float> %958, i32 1
%961 = extractelement <4 x float> %958, i32 2
%962 = fmul float %959, 4.000000e+00
%963 = fadd float %962, -1.000000e+00
%964 = fmul float %960, 4.000000e+00
%965 = fadd float %964, -1.000000e+00
%966 = fmul float %961, 4.000000e+00
%967 = fadd float %966, -1.000000e+00
%968 = fadd float %842, -1.000000e+00
%969 = fadd float %844, -1.000000e+00
%970 = fmul float %963, %840
%971 = fmul float %965, %968
%972 = fadd float %971, %970
%973 = fmul float %967, %969
%974 = fadd float %972, %973
%975 = bitcast float %874 to i32
%976 = bitcast float %838 to i32
%977 = insertelement <2 x i32> undef, i32 %975, i32 0
%978 = insertelement <2 x i32> %977, i32 %976, i32 1
%979 = bitcast <8 x i32> %61 to <32 x i8>
%980 = bitcast <4 x i32> %63 to <16 x i8>
%981 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %978, <32 x i8> %979, <16 x i8> %980, i32 2)
%982 = extractelement <4 x float> %981, i32 0
%983 = extractelement <4 x float> %981, i32 1
%984 = extractelement <4 x float> %981, i32 2
%985 = fmul float %982, 4.000000e+00
%986 = fadd float %985, -1.000000e+00
%987 = fmul float %983, 4.000000e+00
%988 = fadd float %987, -1.000000e+00
%989 = fmul float %984, 4.000000e+00
%990 = fadd float %989, -1.000000e+00
%991 = fadd float %840, -1.000000e+00
%992 = fmul float %986, %991
%993 = fmul float %988, %842
%994 = fadd float %993, %992
%995 = fmul float %990, %844
%996 = fadd float %994, %995
%997 = bitcast float %874 to i32
%998 = bitcast float %875 to i32
%999 = insertelement <2 x i32> undef, i32 %997, i32 0
%1000 = insertelement <2 x i32> %999, i32 %998, i32 1
%1001 = bitcast <8 x i32> %61 to <32 x i8>
%1002 = bitcast <4 x i32> %63 to <16 x i8>
%1003 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1000, <32 x i8> %1001, <16 x i8> %1002, i32 2)
%1004 = extractelement <4 x float> %1003, i32 0
%1005 = extractelement <4 x float> %1003, i32 1
%1006 = extractelement <4 x float> %1003, i32 2
%1007 = fmul float %1004, 4.000000e+00
%1008 = fadd float %1007, -1.000000e+00
%1009 = fmul float %1005, 4.000000e+00
%1010 = fadd float %1009, -1.000000e+00
%1011 = fmul float %1006, 4.000000e+00
%1012 = fadd float %1011, -1.000000e+00
%1013 = fadd float %840, -1.000000e+00
%1014 = fadd float %844, -1.000000e+00
%1015 = fmul float %1008, %1013
%1016 = fmul float %1010, %842
%1017 = fadd float %1016, %1015
%1018 = fmul float %1012, %1014
%1019 = fadd float %1017, %1018
%1020 = bitcast float %885 to i32
%1021 = bitcast float %838 to i32
%1022 = insertelement <2 x i32> undef, i32 %1020, i32 0
%1023 = insertelement <2 x i32> %1022, i32 %1021, i32 1
%1024 = bitcast <8 x i32> %61 to <32 x i8>
%1025 = bitcast <4 x i32> %63 to <16 x i8>
%1026 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1023, <32 x i8> %1024, <16 x i8> %1025, i32 2)
%1027 = extractelement <4 x float> %1026, i32 0
%1028 = extractelement <4 x float> %1026, i32 1
%1029 = extractelement <4 x float> %1026, i32 2
%1030 = fmul float %1027, 4.000000e+00
%1031 = fadd float %1030, -1.000000e+00
%1032 = fmul float %1028, 4.000000e+00
%1033 = fadd float %1032, -1.000000e+00
%1034 = fmul float %1029, 4.000000e+00
%1035 = fadd float %1034, -1.000000e+00
%1036 = fadd float %840, -1.000000e+00
%1037 = fadd float %842, -1.000000e+00
%1038 = fmul float %1031, %1036
%1039 = fmul float %1033, %1037
%1040 = fadd float %1039, %1038
%1041 = fmul float %1035, %844
%1042 = fadd float %1040, %1041
%1043 = bitcast float %885 to i32
%1044 = bitcast float %886 to i32
%1045 = insertelement <2 x i32> undef, i32 %1043, i32 0
%1046 = insertelement <2 x i32> %1045, i32 %1044, i32 1
%1047 = bitcast <8 x i32> %61 to <32 x i8>
%1048 = bitcast <4 x i32> %63 to <16 x i8>
%1049 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1046, <32 x i8> %1047, <16 x i8> %1048, i32 2)
%1050 = extractelement <4 x float> %1049, i32 0
%1051 = extractelement <4 x float> %1049, i32 1
%1052 = extractelement <4 x float> %1049, i32 2
%1053 = fmul float %1050, 4.000000e+00
%1054 = fadd float %1053, -1.000000e+00
%1055 = fmul float %1051, 4.000000e+00
%1056 = fadd float %1055, -1.000000e+00
%1057 = fmul float %1052, 4.000000e+00
%1058 = fadd float %1057, -1.000000e+00
%1059 = fadd float %840, -1.000000e+00
%1060 = fadd float %842, -1.000000e+00
%1061 = fadd float %844, -1.000000e+00
%1062 = fmul float %1054, %1059
%1063 = fmul float %1056, %1060
%1064 = fadd float %1063, %1062
%1065 = fmul float %1058, %1061
%1066 = fadd float %1064, %1065
%1067 = call float @llvm.AMDGPU.lrp(float %840, float %996, float %907)
%1068 = call float @llvm.AMDGPU.lrp(float %840, float %1019, float %929)
%1069 = call float @llvm.AMDGPU.lrp(float %840, float %1042, float %951)
%1070 = call float @llvm.AMDGPU.lrp(float %840, float %1066, float %974)
%1071 = call float @llvm.AMDGPU.lrp(float %842, float %1069, float %1067)
%1072 = call float @llvm.AMDGPU.lrp(float %842, float %1070, float %1068)
%1073 = call float @llvm.AMDGPU.lrp(float %844, float %1072, float %1071)
%1074 = call float @llvm.fabs.f32(float %1073)
%1075 = fmul float %1074, 0x3FD364D940000000
%1076 = fadd float %1075, %820
%1077 = fmul float %1076, 0x3FCAE147A0000000
%1078 = fadd float %1077, %570
%1079 = fmul float %1076, 0x3FCAE147A0000000
%1080 = fadd float %1079, %571
%1081 = fmul float %1076, 0x3FCAE147A0000000
%1082 = fadd float %1081, %75
%1083 = fmul float %1078, %54
%1084 = fmul float %1080, %54
%1085 = fmul float %1082, %54
%1086 = call float @llvm.floor.f32(float %1083)
%1087 = call float @llvm.floor.f32(float %1084)
%1088 = call float @llvm.floor.f32(float %1085)
%1089 = fmul float %1086, 3.906250e-03
%1090 = fadd float %1089, 1.953125e-03
%1091 = fmul float %1087, 3.906250e-03
%1092 = fadd float %1091, 1.953125e-03
%1093 = fmul float %1088, 3.906250e-03
%1094 = fadd float %1093, 1.953125e-03
%1095 = call float @llvm.floor.f32(float %1083)
%1096 = fsub float %1083, %1095
%1097 = call float @llvm.floor.f32(float %1084)
%1098 = fsub float %1084, %1097
%1099 = call float @llvm.floor.f32(float %1085)
%1100 = fsub float %1085, %1099
%1101 = bitcast float %1090 to i32
%1102 = bitcast float %1092 to i32
%1103 = insertelement <2 x i32> undef, i32 %1101, i32 0
%1104 = insertelement <2 x i32> %1103, i32 %1102, i32 1
%1105 = bitcast <8 x i32> %61 to <32 x i8>
%1106 = bitcast <4 x i32> %63 to <16 x i8>
%1107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1104, <32 x i8> %1105, <16 x i8> %1106, i32 2)
%1108 = extractelement <4 x float> %1107, i32 3
%1109 = fadd float %1094, 3.906250e-03
%1110 = fadd float %1090, 0.000000e+00
%1111 = fadd float %1092, 3.906250e-03
%1112 = bitcast float %1110 to i32
%1113 = bitcast float %1111 to i32
%1114 = insertelement <2 x i32> undef, i32 %1112, i32 0
%1115 = insertelement <2 x i32> %1114, i32 %1113, i32 1
%1116 = bitcast <8 x i32> %61 to <32 x i8>
%1117 = bitcast <4 x i32> %63 to <16 x i8>
%1118 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1115, <32 x i8> %1116, <16 x i8> %1117, i32 2)
%1119 = extractelement <4 x float> %1118, i32 3
%1120 = fadd float %1094, 3.906250e-03
%1121 = fadd float %1090, 3.906250e-03
%1122 = fadd float %1092, 0.000000e+00
%1123 = bitcast float %1121 to i32
%1124 = bitcast float %1122 to i32
%1125 = insertelement <2 x i32> undef, i32 %1123, i32 0
%1126 = insertelement <2 x i32> %1125, i32 %1124, i32 1
%1127 = bitcast <8 x i32> %61 to <32 x i8>
%1128 = bitcast <4 x i32> %63 to <16 x i8>
%1129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1126, <32 x i8> %1127, <16 x i8> %1128, i32 2)
%1130 = extractelement <4 x float> %1129, i32 3
%1131 = fadd float %1094, 3.906250e-03
%1132 = fadd float %1090, 3.906250e-03
%1133 = fadd float %1092, 3.906250e-03
%1134 = bitcast float %1132 to i32
%1135 = bitcast float %1133 to i32
%1136 = insertelement <2 x i32> undef, i32 %1134, i32 0
%1137 = insertelement <2 x i32> %1136, i32 %1135, i32 1
%1138 = bitcast <8 x i32> %61 to <32 x i8>
%1139 = bitcast <4 x i32> %63 to <16 x i8>
%1140 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1137, <32 x i8> %1138, <16 x i8> %1139, i32 2)
%1141 = extractelement <4 x float> %1140, i32 3
%1142 = fadd float %1094, 3.906250e-03
%1143 = bitcast float %1108 to i32
%1144 = bitcast float %1094 to i32
%1145 = insertelement <2 x i32> undef, i32 %1143, i32 0
%1146 = insertelement <2 x i32> %1145, i32 %1144, i32 1
%1147 = bitcast <8 x i32> %61 to <32 x i8>
%1148 = bitcast <4 x i32> %63 to <16 x i8>
%1149 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1146, <32 x i8> %1147, <16 x i8> %1148, i32 2)
%1150 = extractelement <4 x float> %1149, i32 0
%1151 = extractelement <4 x float> %1149, i32 1
%1152 = extractelement <4 x float> %1149, i32 2
%1153 = fmul float %1150, 4.000000e+00
%1154 = fadd float %1153, -1.000000e+00
%1155 = fmul float %1151, 4.000000e+00
%1156 = fadd float %1155, -1.000000e+00
%1157 = fmul float %1152, 4.000000e+00
%1158 = fadd float %1157, -1.000000e+00
%1159 = fmul float %1154, %1096
%1160 = fmul float %1156, %1098
%1161 = fadd float %1160, %1159
%1162 = fmul float %1158, %1100
%1163 = fadd float %1161, %1162
%1164 = bitcast float %1108 to i32
%1165 = bitcast float %1109 to i32
%1166 = insertelement <2 x i32> undef, i32 %1164, i32 0
%1167 = insertelement <2 x i32> %1166, i32 %1165, i32 1
%1168 = bitcast <8 x i32> %61 to <32 x i8>
%1169 = bitcast <4 x i32> %63 to <16 x i8>
%1170 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1167, <32 x i8> %1168, <16 x i8> %1169, i32 2)
%1171 = extractelement <4 x float> %1170, i32 0
%1172 = extractelement <4 x float> %1170, i32 1
%1173 = extractelement <4 x float> %1170, i32 2
%1174 = fmul float %1171, 4.000000e+00
%1175 = fadd float %1174, -1.000000e+00
%1176 = fmul float %1172, 4.000000e+00
%1177 = fadd float %1176, -1.000000e+00
%1178 = fmul float %1173, 4.000000e+00
%1179 = fadd float %1178, -1.000000e+00
%1180 = fadd float %1100, -1.000000e+00
%1181 = fmul float %1175, %1096
%1182 = fmul float %1177, %1098
%1183 = fadd float %1182, %1181
%1184 = fmul float %1179, %1180
%1185 = fadd float %1183, %1184
%1186 = bitcast float %1119 to i32
%1187 = bitcast float %1094 to i32
%1188 = insertelement <2 x i32> undef, i32 %1186, i32 0
%1189 = insertelement <2 x i32> %1188, i32 %1187, i32 1
%1190 = bitcast <8 x i32> %61 to <32 x i8>
%1191 = bitcast <4 x i32> %63 to <16 x i8>
%1192 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1189, <32 x i8> %1190, <16 x i8> %1191, i32 2)
%1193 = extractelement <4 x float> %1192, i32 0
%1194 = extractelement <4 x float> %1192, i32 1
%1195 = extractelement <4 x float> %1192, i32 2
%1196 = fmul float %1193, 4.000000e+00
%1197 = fadd float %1196, -1.000000e+00
%1198 = fmul float %1194, 4.000000e+00
%1199 = fadd float %1198, -1.000000e+00
%1200 = fmul float %1195, 4.000000e+00
%1201 = fadd float %1200, -1.000000e+00
%1202 = fadd float %1098, -1.000000e+00
%1203 = fmul float %1197, %1096
%1204 = fmul float %1199, %1202
%1205 = fadd float %1204, %1203
%1206 = fmul float %1201, %1100
%1207 = fadd float %1205, %1206
%1208 = bitcast float %1119 to i32
%1209 = bitcast float %1120 to i32
%1210 = insertelement <2 x i32> undef, i32 %1208, i32 0
%1211 = insertelement <2 x i32> %1210, i32 %1209, i32 1
%1212 = bitcast <8 x i32> %61 to <32 x i8>
%1213 = bitcast <4 x i32> %63 to <16 x i8>
%1214 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1211, <32 x i8> %1212, <16 x i8> %1213, i32 2)
%1215 = extractelement <4 x float> %1214, i32 0
%1216 = extractelement <4 x float> %1214, i32 1
%1217 = extractelement <4 x float> %1214, i32 2
%1218 = fmul float %1215, 4.000000e+00
%1219 = fadd float %1218, -1.000000e+00
%1220 = fmul float %1216, 4.000000e+00
%1221 = fadd float %1220, -1.000000e+00
%1222 = fmul float %1217, 4.000000e+00
%1223 = fadd float %1222, -1.000000e+00
%1224 = fadd float %1098, -1.000000e+00
%1225 = fadd float %1100, -1.000000e+00
%1226 = fmul float %1219, %1096
%1227 = fmul float %1221, %1224
%1228 = fadd float %1227, %1226
%1229 = fmul float %1223, %1225
%1230 = fadd float %1228, %1229
%1231 = bitcast float %1130 to i32
%1232 = bitcast float %1094 to i32
%1233 = insertelement <2 x i32> undef, i32 %1231, i32 0
%1234 = insertelement <2 x i32> %1233, i32 %1232, i32 1
%1235 = bitcast <8 x i32> %61 to <32 x i8>
%1236 = bitcast <4 x i32> %63 to <16 x i8>
%1237 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1234, <32 x i8> %1235, <16 x i8> %1236, i32 2)
%1238 = extractelement <4 x float> %1237, i32 0
%1239 = extractelement <4 x float> %1237, i32 1
%1240 = extractelement <4 x float> %1237, i32 2
%1241 = fmul float %1238, 4.000000e+00
%1242 = fadd float %1241, -1.000000e+00
%1243 = fmul float %1239, 4.000000e+00
%1244 = fadd float %1243, -1.000000e+00
%1245 = fmul float %1240, 4.000000e+00
%1246 = fadd float %1245, -1.000000e+00
%1247 = fadd float %1096, -1.000000e+00
%1248 = fmul float %1242, %1247
%1249 = fmul float %1244, %1098
%1250 = fadd float %1249, %1248
%1251 = fmul float %1246, %1100
%1252 = fadd float %1250, %1251
%1253 = bitcast float %1130 to i32
%1254 = bitcast float %1131 to i32
%1255 = insertelement <2 x i32> undef, i32 %1253, i32 0
%1256 = insertelement <2 x i32> %1255, i32 %1254, i32 1
%1257 = bitcast <8 x i32> %61 to <32 x i8>
%1258 = bitcast <4 x i32> %63 to <16 x i8>
%1259 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1256, <32 x i8> %1257, <16 x i8> %1258, i32 2)
%1260 = extractelement <4 x float> %1259, i32 0
%1261 = extractelement <4 x float> %1259, i32 1
%1262 = extractelement <4 x float> %1259, i32 2
%1263 = fmul float %1260, 4.000000e+00
%1264 = fadd float %1263, -1.000000e+00
%1265 = fmul float %1261, 4.000000e+00
%1266 = fadd float %1265, -1.000000e+00
%1267 = fmul float %1262, 4.000000e+00
%1268 = fadd float %1267, -1.000000e+00
%1269 = fadd float %1096, -1.000000e+00
%1270 = fadd float %1100, -1.000000e+00
%1271 = fmul float %1264, %1269
%1272 = fmul float %1266, %1098
%1273 = fadd float %1272, %1271
%1274 = fmul float %1268, %1270
%1275 = fadd float %1273, %1274
%1276 = bitcast float %1141 to i32
%1277 = bitcast float %1094 to i32
%1278 = insertelement <2 x i32> undef, i32 %1276, i32 0
%1279 = insertelement <2 x i32> %1278, i32 %1277, i32 1
%1280 = bitcast <8 x i32> %61 to <32 x i8>
%1281 = bitcast <4 x i32> %63 to <16 x i8>
%1282 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1279, <32 x i8> %1280, <16 x i8> %1281, i32 2)
%1283 = extractelement <4 x float> %1282, i32 0
%1284 = extractelement <4 x float> %1282, i32 1
%1285 = extractelement <4 x float> %1282, i32 2
%1286 = fmul float %1283, 4.000000e+00
%1287 = fadd float %1286, -1.000000e+00
%1288 = fmul float %1284, 4.000000e+00
%1289 = fadd float %1288, -1.000000e+00
%1290 = fmul float %1285, 4.000000e+00
%1291 = fadd float %1290, -1.000000e+00
%1292 = fadd float %1096, -1.000000e+00
%1293 = fadd float %1098, -1.000000e+00
%1294 = fmul float %1287, %1292
%1295 = fmul float %1289, %1293
%1296 = fadd float %1295, %1294
%1297 = fmul float %1291, %1100
%1298 = fadd float %1296, %1297
%1299 = bitcast float %1141 to i32
%1300 = bitcast float %1142 to i32
%1301 = insertelement <2 x i32> undef, i32 %1299, i32 0
%1302 = insertelement <2 x i32> %1301, i32 %1300, i32 1
%1303 = bitcast <8 x i32> %61 to <32 x i8>
%1304 = bitcast <4 x i32> %63 to <16 x i8>
%1305 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1302, <32 x i8> %1303, <16 x i8> %1304, i32 2)
%1306 = extractelement <4 x float> %1305, i32 0
%1307 = extractelement <4 x float> %1305, i32 1
%1308 = extractelement <4 x float> %1305, i32 2
%1309 = fmul float %1306, 4.000000e+00
%1310 = fadd float %1309, -1.000000e+00
%1311 = fmul float %1307, 4.000000e+00
%1312 = fadd float %1311, -1.000000e+00
%1313 = fmul float %1308, 4.000000e+00
%1314 = fadd float %1313, -1.000000e+00
%1315 = fadd float %1096, -1.000000e+00
%1316 = fadd float %1098, -1.000000e+00
%1317 = fadd float %1100, -1.000000e+00
%1318 = fmul float %1310, %1315
%1319 = fmul float %1312, %1316
%1320 = fadd float %1319, %1318
%1321 = fmul float %1314, %1317
%1322 = fadd float %1320, %1321
%1323 = call float @llvm.AMDGPU.lrp(float %1096, float %1252, float %1163)
%1324 = call float @llvm.AMDGPU.lrp(float %1096, float %1275, float %1185)
%1325 = call float @llvm.AMDGPU.lrp(float %1096, float %1298, float %1207)
%1326 = call float @llvm.AMDGPU.lrp(float %1096, float %1322, float %1230)
%1327 = call float @llvm.AMDGPU.lrp(float %1098, float %1325, float %1323)
%1328 = call float @llvm.AMDGPU.lrp(float %1098, float %1326, float %1324)
%1329 = call float @llvm.AMDGPU.lrp(float %1100, float %1328, float %1327)
%1330 = call float @llvm.fabs.f32(float %1329)
%1331 = fdiv float 1.000000e+00, %57
%1332 = fmul float %1330, %1331
%1333 = fadd float %1332, %1076
%1334 = fmul float %1333, 0x3FCD70A3E0000000
%1335 = fadd float %1334, %570
%1336 = fmul float %1333, 0x3FCD70A3E0000000
%1337 = fadd float %1336, %571
%1338 = fmul float %1333, 0x3FCD70A3E0000000
%1339 = fadd float %1338, %75
%1340 = fmul float %1335, %55
%1341 = fmul float %1337, %55
%1342 = fmul float %1339, %55
%1343 = call float @llvm.floor.f32(float %1340)
%1344 = call float @llvm.floor.f32(float %1341)
%1345 = call float @llvm.floor.f32(float %1342)
%1346 = fmul float %1343, 3.906250e-03
%1347 = fadd float %1346, 1.953125e-03
%1348 = fmul float %1344, 3.906250e-03
%1349 = fadd float %1348, 1.953125e-03
%1350 = fmul float %1345, 3.906250e-03
%1351 = fadd float %1350, 1.953125e-03
%1352 = call float @llvm.floor.f32(float %1340)
%1353 = fsub float %1340, %1352
%1354 = call float @llvm.floor.f32(float %1341)
%1355 = fsub float %1341, %1354
%1356 = call float @llvm.floor.f32(float %1342)
%1357 = fsub float %1342, %1356
%1358 = bitcast float %1347 to i32
%1359 = bitcast float %1349 to i32
%1360 = insertelement <2 x i32> undef, i32 %1358, i32 0
%1361 = insertelement <2 x i32> %1360, i32 %1359, i32 1
%1362 = bitcast <8 x i32> %61 to <32 x i8>
%1363 = bitcast <4 x i32> %63 to <16 x i8>
%1364 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1361, <32 x i8> %1362, <16 x i8> %1363, i32 2)
%1365 = extractelement <4 x float> %1364, i32 3
%1366 = fadd float %1351, 3.906250e-03
%1367 = fadd float %1347, 0.000000e+00
%1368 = fadd float %1349, 3.906250e-03
%1369 = bitcast float %1367 to i32
%1370 = bitcast float %1368 to i32
%1371 = insertelement <2 x i32> undef, i32 %1369, i32 0
%1372 = insertelement <2 x i32> %1371, i32 %1370, i32 1
%1373 = bitcast <8 x i32> %61 to <32 x i8>
%1374 = bitcast <4 x i32> %63 to <16 x i8>
%1375 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1372, <32 x i8> %1373, <16 x i8> %1374, i32 2)
%1376 = extractelement <4 x float> %1375, i32 3
%1377 = fadd float %1351, 3.906250e-03
%1378 = fadd float %1347, 3.906250e-03
%1379 = fadd float %1349, 0.000000e+00
%1380 = bitcast float %1378 to i32
%1381 = bitcast float %1379 to i32
%1382 = insertelement <2 x i32> undef, i32 %1380, i32 0
%1383 = insertelement <2 x i32> %1382, i32 %1381, i32 1
%1384 = bitcast <8 x i32> %61 to <32 x i8>
%1385 = bitcast <4 x i32> %63 to <16 x i8>
%1386 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1383, <32 x i8> %1384, <16 x i8> %1385, i32 2)
%1387 = extractelement <4 x float> %1386, i32 3
%1388 = fadd float %1351, 3.906250e-03
%1389 = fadd float %1347, 3.906250e-03
%1390 = fadd float %1349, 3.906250e-03
%1391 = bitcast float %1389 to i32
%1392 = bitcast float %1390 to i32
%1393 = insertelement <2 x i32> undef, i32 %1391, i32 0
%1394 = insertelement <2 x i32> %1393, i32 %1392, i32 1
%1395 = bitcast <8 x i32> %61 to <32 x i8>
%1396 = bitcast <4 x i32> %63 to <16 x i8>
%1397 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1394, <32 x i8> %1395, <16 x i8> %1396, i32 2)
%1398 = extractelement <4 x float> %1397, i32 3
%1399 = fadd float %1351, 3.906250e-03
%1400 = bitcast float %1365 to i32
%1401 = bitcast float %1351 to i32
%1402 = insertelement <2 x i32> undef, i32 %1400, i32 0
%1403 = insertelement <2 x i32> %1402, i32 %1401, i32 1
%1404 = bitcast <8 x i32> %61 to <32 x i8>
%1405 = bitcast <4 x i32> %63 to <16 x i8>
%1406 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1403, <32 x i8> %1404, <16 x i8> %1405, i32 2)
%1407 = extractelement <4 x float> %1406, i32 0
%1408 = extractelement <4 x float> %1406, i32 1
%1409 = extractelement <4 x float> %1406, i32 2
%1410 = fmul float %1407, 4.000000e+00
%1411 = fadd float %1410, -1.000000e+00
%1412 = fmul float %1408, 4.000000e+00
%1413 = fadd float %1412, -1.000000e+00
%1414 = fmul float %1409, 4.000000e+00
%1415 = fadd float %1414, -1.000000e+00
%1416 = fmul float %1411, %1353
%1417 = fmul float %1413, %1355
%1418 = fadd float %1417, %1416
%1419 = fmul float %1415, %1357
%1420 = fadd float %1418, %1419
%1421 = bitcast float %1365 to i32
%1422 = bitcast float %1366 to i32
%1423 = insertelement <2 x i32> undef, i32 %1421, i32 0
%1424 = insertelement <2 x i32> %1423, i32 %1422, i32 1
%1425 = bitcast <8 x i32> %61 to <32 x i8>
%1426 = bitcast <4 x i32> %63 to <16 x i8>
%1427 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1424, <32 x i8> %1425, <16 x i8> %1426, i32 2)
%1428 = extractelement <4 x float> %1427, i32 0
%1429 = extractelement <4 x float> %1427, i32 1
%1430 = extractelement <4 x float> %1427, i32 2
%1431 = fmul float %1428, 4.000000e+00
%1432 = fadd float %1431, -1.000000e+00
%1433 = fmul float %1429, 4.000000e+00
%1434 = fadd float %1433, -1.000000e+00
%1435 = fmul float %1430, 4.000000e+00
%1436 = fadd float %1435, -1.000000e+00
%1437 = fadd float %1357, -1.000000e+00
%1438 = fmul float %1432, %1353
%1439 = fmul float %1434, %1355
%1440 = fadd float %1439, %1438
%1441 = fmul float %1436, %1437
%1442 = fadd float %1440, %1441
%1443 = bitcast float %1376 to i32
%1444 = bitcast float %1351 to i32
%1445 = insertelement <2 x i32> undef, i32 %1443, i32 0
%1446 = insertelement <2 x i32> %1445, i32 %1444, i32 1
%1447 = bitcast <8 x i32> %61 to <32 x i8>
%1448 = bitcast <4 x i32> %63 to <16 x i8>
%1449 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1446, <32 x i8> %1447, <16 x i8> %1448, i32 2)
%1450 = extractelement <4 x float> %1449, i32 0
%1451 = extractelement <4 x float> %1449, i32 1
%1452 = extractelement <4 x float> %1449, i32 2
%1453 = fmul float %1450, 4.000000e+00
%1454 = fadd float %1453, -1.000000e+00
%1455 = fmul float %1451, 4.000000e+00
%1456 = fadd float %1455, -1.000000e+00
%1457 = fmul float %1452, 4.000000e+00
%1458 = fadd float %1457, -1.000000e+00
%1459 = fadd float %1355, -1.000000e+00
%1460 = fmul float %1454, %1353
%1461 = fmul float %1456, %1459
%1462 = fadd float %1461, %1460
%1463 = fmul float %1458, %1357
%1464 = fadd float %1462, %1463
%1465 = bitcast float %1376 to i32
%1466 = bitcast float %1377 to i32
%1467 = insertelement <2 x i32> undef, i32 %1465, i32 0
%1468 = insertelement <2 x i32> %1467, i32 %1466, i32 1
%1469 = bitcast <8 x i32> %61 to <32 x i8>
%1470 = bitcast <4 x i32> %63 to <16 x i8>
%1471 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1468, <32 x i8> %1469, <16 x i8> %1470, i32 2)
%1472 = extractelement <4 x float> %1471, i32 0
%1473 = extractelement <4 x float> %1471, i32 1
%1474 = extractelement <4 x float> %1471, i32 2
%1475 = fmul float %1472, 4.000000e+00
%1476 = fadd float %1475, -1.000000e+00
%1477 = fmul float %1473, 4.000000e+00
%1478 = fadd float %1477, -1.000000e+00
%1479 = fmul float %1474, 4.000000e+00
%1480 = fadd float %1479, -1.000000e+00
%1481 = fadd float %1355, -1.000000e+00
%1482 = fadd float %1357, -1.000000e+00
%1483 = fmul float %1476, %1353
%1484 = fmul float %1478, %1481
%1485 = fadd float %1484, %1483
%1486 = fmul float %1480, %1482
%1487 = fadd float %1485, %1486
%1488 = bitcast float %1387 to i32
%1489 = bitcast float %1351 to i32
%1490 = insertelement <2 x i32> undef, i32 %1488, i32 0
%1491 = insertelement <2 x i32> %1490, i32 %1489, i32 1
%1492 = bitcast <8 x i32> %61 to <32 x i8>
%1493 = bitcast <4 x i32> %63 to <16 x i8>
%1494 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1491, <32 x i8> %1492, <16 x i8> %1493, i32 2)
%1495 = extractelement <4 x float> %1494, i32 0
%1496 = extractelement <4 x float> %1494, i32 1
%1497 = extractelement <4 x float> %1494, i32 2
%1498 = fmul float %1495, 4.000000e+00
%1499 = fadd float %1498, -1.000000e+00
%1500 = fmul float %1496, 4.000000e+00
%1501 = fadd float %1500, -1.000000e+00
%1502 = fmul float %1497, 4.000000e+00
%1503 = fadd float %1502, -1.000000e+00
%1504 = fadd float %1353, -1.000000e+00
%1505 = fmul float %1499, %1504
%1506 = fmul float %1501, %1355
%1507 = fadd float %1506, %1505
%1508 = fmul float %1503, %1357
%1509 = fadd float %1507, %1508
%1510 = bitcast float %1387 to i32
%1511 = bitcast float %1388 to i32
%1512 = insertelement <2 x i32> undef, i32 %1510, i32 0
%1513 = insertelement <2 x i32> %1512, i32 %1511, i32 1
%1514 = bitcast <8 x i32> %61 to <32 x i8>
%1515 = bitcast <4 x i32> %63 to <16 x i8>
%1516 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1513, <32 x i8> %1514, <16 x i8> %1515, i32 2)
%1517 = extractelement <4 x float> %1516, i32 0
%1518 = extractelement <4 x float> %1516, i32 1
%1519 = extractelement <4 x float> %1516, i32 2
%1520 = fmul float %1517, 4.000000e+00
%1521 = fadd float %1520, -1.000000e+00
%1522 = fmul float %1518, 4.000000e+00
%1523 = fadd float %1522, -1.000000e+00
%1524 = fmul float %1519, 4.000000e+00
%1525 = fadd float %1524, -1.000000e+00
%1526 = fadd float %1353, -1.000000e+00
%1527 = fadd float %1357, -1.000000e+00
%1528 = fmul float %1521, %1526
%1529 = fmul float %1523, %1355
%1530 = fadd float %1529, %1528
%1531 = fmul float %1525, %1527
%1532 = fadd float %1530, %1531
%1533 = bitcast float %1398 to i32
%1534 = bitcast float %1351 to i32
%1535 = insertelement <2 x i32> undef, i32 %1533, i32 0
%1536 = insertelement <2 x i32> %1535, i32 %1534, i32 1
%1537 = bitcast <8 x i32> %61 to <32 x i8>
%1538 = bitcast <4 x i32> %63 to <16 x i8>
%1539 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1536, <32 x i8> %1537, <16 x i8> %1538, i32 2)
%1540 = extractelement <4 x float> %1539, i32 0
%1541 = extractelement <4 x float> %1539, i32 1
%1542 = extractelement <4 x float> %1539, i32 2
%1543 = fmul float %1540, 4.000000e+00
%1544 = fadd float %1543, -1.000000e+00
%1545 = fmul float %1541, 4.000000e+00
%1546 = fadd float %1545, -1.000000e+00
%1547 = fmul float %1542, 4.000000e+00
%1548 = fadd float %1547, -1.000000e+00
%1549 = fadd float %1353, -1.000000e+00
%1550 = fadd float %1355, -1.000000e+00
%1551 = fmul float %1544, %1549
%1552 = fmul float %1546, %1550
%1553 = fadd float %1552, %1551
%1554 = fmul float %1548, %1357
%1555 = fadd float %1553, %1554
%1556 = bitcast float %1398 to i32
%1557 = bitcast float %1399 to i32
%1558 = insertelement <2 x i32> undef, i32 %1556, i32 0
%1559 = insertelement <2 x i32> %1558, i32 %1557, i32 1
%1560 = bitcast <8 x i32> %61 to <32 x i8>
%1561 = bitcast <4 x i32> %63 to <16 x i8>
%1562 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1559, <32 x i8> %1560, <16 x i8> %1561, i32 2)
%1563 = extractelement <4 x float> %1562, i32 0
%1564 = extractelement <4 x float> %1562, i32 1
%1565 = extractelement <4 x float> %1562, i32 2
%1566 = fmul float %1563, 4.000000e+00
%1567 = fadd float %1566, -1.000000e+00
%1568 = fmul float %1564, 4.000000e+00
%1569 = fadd float %1568, -1.000000e+00
%1570 = fmul float %1565, 4.000000e+00
%1571 = fadd float %1570, -1.000000e+00
%1572 = fadd float %1353, -1.000000e+00
%1573 = fadd float %1355, -1.000000e+00
%1574 = fadd float %1357, -1.000000e+00
%1575 = fmul float %1567, %1572
%1576 = fmul float %1569, %1573
%1577 = fadd float %1576, %1575
%1578 = fmul float %1571, %1574
%1579 = fadd float %1577, %1578
%1580 = call float @llvm.AMDGPU.lrp(float %1353, float %1509, float %1420)
%1581 = call float @llvm.AMDGPU.lrp(float %1353, float %1532, float %1442)
%1582 = call float @llvm.AMDGPU.lrp(float %1353, float %1555, float %1464)
%1583 = call float @llvm.AMDGPU.lrp(float %1353, float %1579, float %1487)
%1584 = call float @llvm.AMDGPU.lrp(float %1355, float %1582, float %1580)
%1585 = call float @llvm.AMDGPU.lrp(float %1355, float %1583, float %1581)
%1586 = call float @llvm.AMDGPU.lrp(float %1357, float %1585, float %1584)
%1587 = call float @llvm.fabs.f32(float %1586)
%1588 = fdiv float 1.000000e+00, %58
%1589 = fmul float %1587, %1588
%1590 = fadd float %1589, %1333
%1591 = fmul float %1590, 2.500000e-01
%1592 = fadd float %1591, %570
%1593 = fmul float %1590, 2.500000e-01
%1594 = fadd float %1593, %571
%1595 = fmul float %1590, 2.500000e-01
%1596 = fadd float %1595, %75
%1597 = fmul float %1592, %56
%1598 = fmul float %1594, %56
%1599 = fmul float %1596, %56
%1600 = call float @llvm.floor.f32(float %1597)
%1601 = call float @llvm.floor.f32(float %1598)
%1602 = call float @llvm.floor.f32(float %1599)
%1603 = fmul float %1600, 3.906250e-03
%1604 = fadd float %1603, 1.953125e-03
%1605 = fmul float %1601, 3.906250e-03
%1606 = fadd float %1605, 1.953125e-03
%1607 = fmul float %1602, 3.906250e-03
%1608 = fadd float %1607, 1.953125e-03
%1609 = call float @llvm.floor.f32(float %1597)
%1610 = fsub float %1597, %1609
%1611 = call float @llvm.floor.f32(float %1598)
%1612 = fsub float %1598, %1611
%1613 = call float @llvm.floor.f32(float %1599)
%1614 = fsub float %1599, %1613
%1615 = bitcast float %1604 to i32
%1616 = bitcast float %1606 to i32
%1617 = insertelement <2 x i32> undef, i32 %1615, i32 0
%1618 = insertelement <2 x i32> %1617, i32 %1616, i32 1
%1619 = bitcast <8 x i32> %61 to <32 x i8>
%1620 = bitcast <4 x i32> %63 to <16 x i8>
%1621 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1618, <32 x i8> %1619, <16 x i8> %1620, i32 2)
%1622 = extractelement <4 x float> %1621, i32 3
%1623 = fadd float %1608, 3.906250e-03
%1624 = fadd float %1604, 0.000000e+00
%1625 = fadd float %1606, 3.906250e-03
%1626 = bitcast float %1624 to i32
%1627 = bitcast float %1625 to i32
%1628 = insertelement <2 x i32> undef, i32 %1626, i32 0
%1629 = insertelement <2 x i32> %1628, i32 %1627, i32 1
%1630 = bitcast <8 x i32> %61 to <32 x i8>
%1631 = bitcast <4 x i32> %63 to <16 x i8>
%1632 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1629, <32 x i8> %1630, <16 x i8> %1631, i32 2)
%1633 = extractelement <4 x float> %1632, i32 3
%1634 = fadd float %1608, 3.906250e-03
%1635 = fadd float %1604, 3.906250e-03
%1636 = fadd float %1606, 0.000000e+00
%1637 = bitcast float %1635 to i32
%1638 = bitcast float %1636 to i32
%1639 = insertelement <2 x i32> undef, i32 %1637, i32 0
%1640 = insertelement <2 x i32> %1639, i32 %1638, i32 1
%1641 = bitcast <8 x i32> %61 to <32 x i8>
%1642 = bitcast <4 x i32> %63 to <16 x i8>
%1643 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1640, <32 x i8> %1641, <16 x i8> %1642, i32 2)
%1644 = extractelement <4 x float> %1643, i32 3
%1645 = fadd float %1608, 3.906250e-03
%1646 = fadd float %1604, 3.906250e-03
%1647 = fadd float %1606, 3.906250e-03
%1648 = bitcast float %1646 to i32
%1649 = bitcast float %1647 to i32
%1650 = insertelement <2 x i32> undef, i32 %1648, i32 0
%1651 = insertelement <2 x i32> %1650, i32 %1649, i32 1
%1652 = bitcast <8 x i32> %61 to <32 x i8>
%1653 = bitcast <4 x i32> %63 to <16 x i8>
%1654 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1651, <32 x i8> %1652, <16 x i8> %1653, i32 2)
%1655 = extractelement <4 x float> %1654, i32 3
%1656 = fadd float %1608, 3.906250e-03
%1657 = bitcast float %1622 to i32
%1658 = bitcast float %1608 to i32
%1659 = insertelement <2 x i32> undef, i32 %1657, i32 0
%1660 = insertelement <2 x i32> %1659, i32 %1658, i32 1
%1661 = bitcast <8 x i32> %61 to <32 x i8>
%1662 = bitcast <4 x i32> %63 to <16 x i8>
%1663 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1660, <32 x i8> %1661, <16 x i8> %1662, i32 2)
%1664 = extractelement <4 x float> %1663, i32 0
%1665 = extractelement <4 x float> %1663, i32 1
%1666 = extractelement <4 x float> %1663, i32 2
%1667 = fmul float %1664, 4.000000e+00
%1668 = fadd float %1667, -1.000000e+00
%1669 = fmul float %1665, 4.000000e+00
%1670 = fadd float %1669, -1.000000e+00
%1671 = fmul float %1666, 4.000000e+00
%1672 = fadd float %1671, -1.000000e+00
%1673 = fmul float %1668, %1610
%1674 = fmul float %1670, %1612
%1675 = fadd float %1674, %1673
%1676 = fmul float %1672, %1614
%1677 = fadd float %1675, %1676
%1678 = bitcast float %1622 to i32
%1679 = bitcast float %1623 to i32
%1680 = insertelement <2 x i32> undef, i32 %1678, i32 0
%1681 = insertelement <2 x i32> %1680, i32 %1679, i32 1
%1682 = bitcast <8 x i32> %61 to <32 x i8>
%1683 = bitcast <4 x i32> %63 to <16 x i8>
%1684 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1681, <32 x i8> %1682, <16 x i8> %1683, i32 2)
%1685 = extractelement <4 x float> %1684, i32 0
%1686 = extractelement <4 x float> %1684, i32 1
%1687 = extractelement <4 x float> %1684, i32 2
%1688 = fmul float %1685, 4.000000e+00
%1689 = fadd float %1688, -1.000000e+00
%1690 = fmul float %1686, 4.000000e+00
%1691 = fadd float %1690, -1.000000e+00
%1692 = fmul float %1687, 4.000000e+00
%1693 = fadd float %1692, -1.000000e+00
%1694 = fadd float %1614, -1.000000e+00
%1695 = fmul float %1689, %1610
%1696 = fmul float %1691, %1612
%1697 = fadd float %1696, %1695
%1698 = fmul float %1693, %1694
%1699 = fadd float %1697, %1698
%1700 = bitcast float %1633 to i32
%1701 = bitcast float %1608 to i32
%1702 = insertelement <2 x i32> undef, i32 %1700, i32 0
%1703 = insertelement <2 x i32> %1702, i32 %1701, i32 1
%1704 = bitcast <8 x i32> %61 to <32 x i8>
%1705 = bitcast <4 x i32> %63 to <16 x i8>
%1706 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1703, <32 x i8> %1704, <16 x i8> %1705, i32 2)
%1707 = extractelement <4 x float> %1706, i32 0
%1708 = extractelement <4 x float> %1706, i32 1
%1709 = extractelement <4 x float> %1706, i32 2
%1710 = fmul float %1707, 4.000000e+00
%1711 = fadd float %1710, -1.000000e+00
%1712 = fmul float %1708, 4.000000e+00
%1713 = fadd float %1712, -1.000000e+00
%1714 = fmul float %1709, 4.000000e+00
%1715 = fadd float %1714, -1.000000e+00
%1716 = fadd float %1612, -1.000000e+00
%1717 = fmul float %1711, %1610
%1718 = fmul float %1713, %1716
%1719 = fadd float %1718, %1717
%1720 = fmul float %1715, %1614
%1721 = fadd float %1719, %1720
%1722 = bitcast float %1633 to i32
%1723 = bitcast float %1634 to i32
%1724 = insertelement <2 x i32> undef, i32 %1722, i32 0
%1725 = insertelement <2 x i32> %1724, i32 %1723, i32 1
%1726 = bitcast <8 x i32> %61 to <32 x i8>
%1727 = bitcast <4 x i32> %63 to <16 x i8>
%1728 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1725, <32 x i8> %1726, <16 x i8> %1727, i32 2)
%1729 = extractelement <4 x float> %1728, i32 0
%1730 = extractelement <4 x float> %1728, i32 1
%1731 = extractelement <4 x float> %1728, i32 2
%1732 = fmul float %1729, 4.000000e+00
%1733 = fadd float %1732, -1.000000e+00
%1734 = fmul float %1730, 4.000000e+00
%1735 = fadd float %1734, -1.000000e+00
%1736 = fmul float %1731, 4.000000e+00
%1737 = fadd float %1736, -1.000000e+00
%1738 = fadd float %1612, -1.000000e+00
%1739 = fadd float %1614, -1.000000e+00
%1740 = fmul float %1733, %1610
%1741 = fmul float %1735, %1738
%1742 = fadd float %1741, %1740
%1743 = fmul float %1737, %1739
%1744 = fadd float %1742, %1743
%1745 = bitcast float %1644 to i32
%1746 = bitcast float %1608 to i32
%1747 = insertelement <2 x i32> undef, i32 %1745, i32 0
%1748 = insertelement <2 x i32> %1747, i32 %1746, i32 1
%1749 = bitcast <8 x i32> %61 to <32 x i8>
%1750 = bitcast <4 x i32> %63 to <16 x i8>
%1751 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1748, <32 x i8> %1749, <16 x i8> %1750, i32 2)
%1752 = extractelement <4 x float> %1751, i32 0
%1753 = extractelement <4 x float> %1751, i32 1
%1754 = extractelement <4 x float> %1751, i32 2
%1755 = fmul float %1752, 4.000000e+00
%1756 = fadd float %1755, -1.000000e+00
%1757 = fmul float %1753, 4.000000e+00
%1758 = fadd float %1757, -1.000000e+00
%1759 = fmul float %1754, 4.000000e+00
%1760 = fadd float %1759, -1.000000e+00
%1761 = fadd float %1610, -1.000000e+00
%1762 = fmul float %1756, %1761
%1763 = fmul float %1758, %1612
%1764 = fadd float %1763, %1762
%1765 = fmul float %1760, %1614
%1766 = fadd float %1764, %1765
%1767 = bitcast float %1644 to i32
%1768 = bitcast float %1645 to i32
%1769 = insertelement <2 x i32> undef, i32 %1767, i32 0
%1770 = insertelement <2 x i32> %1769, i32 %1768, i32 1
%1771 = bitcast <8 x i32> %61 to <32 x i8>
%1772 = bitcast <4 x i32> %63 to <16 x i8>
%1773 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1770, <32 x i8> %1771, <16 x i8> %1772, i32 2)
%1774 = extractelement <4 x float> %1773, i32 0
%1775 = extractelement <4 x float> %1773, i32 1
%1776 = extractelement <4 x float> %1773, i32 2
%1777 = fmul float %1774, 4.000000e+00
%1778 = fadd float %1777, -1.000000e+00
%1779 = fmul float %1775, 4.000000e+00
%1780 = fadd float %1779, -1.000000e+00
%1781 = fmul float %1776, 4.000000e+00
%1782 = fadd float %1781, -1.000000e+00
%1783 = fadd float %1610, -1.000000e+00
%1784 = fadd float %1614, -1.000000e+00
%1785 = fmul float %1778, %1783
%1786 = fmul float %1780, %1612
%1787 = fadd float %1786, %1785
%1788 = fmul float %1782, %1784
%1789 = fadd float %1787, %1788
%1790 = bitcast float %1655 to i32
%1791 = bitcast float %1608 to i32
%1792 = insertelement <2 x i32> undef, i32 %1790, i32 0
%1793 = insertelement <2 x i32> %1792, i32 %1791, i32 1
%1794 = bitcast <8 x i32> %61 to <32 x i8>
%1795 = bitcast <4 x i32> %63 to <16 x i8>
%1796 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1793, <32 x i8> %1794, <16 x i8> %1795, i32 2)
%1797 = extractelement <4 x float> %1796, i32 0
%1798 = extractelement <4 x float> %1796, i32 1
%1799 = extractelement <4 x float> %1796, i32 2
%1800 = fmul float %1797, 4.000000e+00
%1801 = fadd float %1800, -1.000000e+00
%1802 = fmul float %1798, 4.000000e+00
%1803 = fadd float %1802, -1.000000e+00
%1804 = fmul float %1799, 4.000000e+00
%1805 = fadd float %1804, -1.000000e+00
%1806 = fadd float %1610, -1.000000e+00
%1807 = fadd float %1612, -1.000000e+00
%1808 = fmul float %1801, %1806
%1809 = fmul float %1803, %1807
%1810 = fadd float %1809, %1808
%1811 = fmul float %1805, %1614
%1812 = fadd float %1810, %1811
%1813 = bitcast float %1655 to i32
%1814 = bitcast float %1656 to i32
%1815 = insertelement <2 x i32> undef, i32 %1813, i32 0
%1816 = insertelement <2 x i32> %1815, i32 %1814, i32 1
%1817 = bitcast <8 x i32> %61 to <32 x i8>
%1818 = bitcast <4 x i32> %63 to <16 x i8>
%1819 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1816, <32 x i8> %1817, <16 x i8> %1818, i32 2)
%1820 = extractelement <4 x float> %1819, i32 0
%1821 = extractelement <4 x float> %1819, i32 1
%1822 = extractelement <4 x float> %1819, i32 2
%1823 = fmul float %1820, 4.000000e+00
%1824 = fadd float %1823, -1.000000e+00
%1825 = fmul float %1821, 4.000000e+00
%1826 = fadd float %1825, -1.000000e+00
%1827 = fmul float %1822, 4.000000e+00
%1828 = fadd float %1827, -1.000000e+00
%1829 = fadd float %1610, -1.000000e+00
%1830 = fadd float %1612, -1.000000e+00
%1831 = fadd float %1614, -1.000000e+00
%1832 = fmul float %1824, %1829
%1833 = fmul float %1826, %1830
%1834 = fadd float %1833, %1832
%1835 = fmul float %1828, %1831
%1836 = fadd float %1834, %1835
%1837 = call float @llvm.AMDGPU.lrp(float %1610, float %1766, float %1677)
%1838 = call float @llvm.AMDGPU.lrp(float %1610, float %1789, float %1699)
%1839 = call float @llvm.AMDGPU.lrp(float %1610, float %1812, float %1721)
%1840 = call float @llvm.AMDGPU.lrp(float %1610, float %1836, float %1744)
%1841 = call float @llvm.AMDGPU.lrp(float %1612, float %1839, float %1837)
%1842 = call float @llvm.AMDGPU.lrp(float %1612, float %1840, float %1838)
%1843 = call float @llvm.AMDGPU.lrp(float %1614, float %1842, float %1841)
%1844 = call float @llvm.fabs.f32(float %1843)
%1845 = fdiv float 1.000000e+00, %59
%1846 = fmul float %1844, %1845
%1847 = fadd float %1846, %1590
%1848 = fsub float %1847, %28
%1849 = fsub float %67, %24
%1850 = fsub float %68, %25
%1851 = fsub float %69, %26
%1852 = fmul float %1849, %1849
%1853 = fmul float %1850, %1850
%1854 = fadd float %1853, %1852
%1855 = fmul float %1851, %1851
%1856 = fadd float %1854, %1855
%1857 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1856)
%1858 = fmul float %1849, %1857
%1859 = fmul float %1850, %1857
%1860 = fmul float %1851, %1857
%1861 = fmul float %1858, 0x3FF19999A0000000
%1862 = fmul float %1859, 0x3FF19999A0000000
%1863 = fmul float %1860, 0x3FF19999A0000000
%1864 = fmul float %46, %46
%1865 = fmul float %47, %47
%1866 = fadd float %1865, %1864
%1867 = fmul float %48, %48
%1868 = fadd float %1866, %1867
%1869 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1868)
%1870 = fmul float %46, %1869
%1871 = fmul float %47, %1869
%1872 = fmul float %48, %1869
%1873 = fmul float %1861, %1870
%1874 = fmul float %1862, %1871
%1875 = fadd float %1874, %1873
%1876 = fmul float %1863, %1872
%1877 = fadd float %1875, %1876
%1878 = fmul float %1877, 5.000000e-01
%1879 = fadd float %1878, 5.000000e-01
%1880 = fmul float %1879, %1879
%1881 = fmul float %1880, %1880
%1882 = fadd float %1848, 0xBFC99999A0000000
%1883 = call float @llvm.fabs.f32(float %1882)
%1884 = fsub float 1.000000e+00, %1883
%1885 = call float @llvm.AMDIL.clamp.(float %1884, float 0.000000e+00, float 1.000000e+00)
%1886 = call float @llvm.AMDIL.clamp.(float %1881, float 0.000000e+00, float 1.000000e+00)
%1887 = call float @llvm.AMDGPU.lrp(float %1886, float %30, float %38)
%1888 = call float @llvm.AMDGPU.lrp(float %1886, float %31, float %39)
%1889 = call float @llvm.AMDGPU.lrp(float %1886, float %32, float %40)
%1890 = call float @llvm.AMDGPU.lrp(float %1886, float %33, float %41)
%1891 = fmul float %1885, %1885
%1892 = call float @llvm.maxnum.f32(float %1891, float 0.000000e+00)
%1893 = call float @llvm.AMDIL.clamp.(float %1881, float 0.000000e+00, float 1.000000e+00)
%1894 = call float @llvm.AMDGPU.lrp(float %1893, float %34, float %42)
%1895 = call float @llvm.AMDGPU.lrp(float %1893, float %35, float %43)
%1896 = call float @llvm.AMDGPU.lrp(float %1893, float %36, float %44)
%1897 = call float @llvm.AMDGPU.lrp(float %1893, float %37, float %45)
%1898 = fmul float %1887, %1892
%1899 = fadd float %1898, %1894
%1900 = fmul float %1888, %1892
%1901 = fadd float %1900, %1895
%1902 = fmul float %1889, %1892
%1903 = fadd float %1902, %1896
%1904 = fmul float %1899, 0x3FD322D0E0000000
%1905 = fmul float %1901, 0x3FE2C8B440000000
%1906 = fadd float %1905, %1904
%1907 = fmul float %1903, 0x3FBD2F1AA0000000
%1908 = fadd float %1907, %1906
%1909 = call float @llvm.pow.f32(float %49, float 1.500000e+00)
%1910 = call float @llvm.AMDGPU.lrp(float %1909, float %1899, float %1908)
%1911 = call float @llvm.AMDGPU.lrp(float %1909, float %1901, float %1908)
%1912 = call float @llvm.AMDGPU.lrp(float %1909, float %1903, float %1908)
%1913 = fmul float %1910, %49
%1914 = fmul float %1911, %49
%1915 = fmul float %1912, %49
%1916 = fmul float %1848, %29
%1917 = fadd float %64, -5.000000e-01
%1918 = fadd float %65, -5.000000e-01
%1919 = fmul float %1917, %1917
%1920 = fmul float %1918, %1918
%1921 = fadd float %1919, %1920
%1922 = call float @llvm.sqrt.f32(float %1921)
%1923 = fdiv float 1.000000e+00, %50
%1924 = fmul float %1922, %1923
%1925 = fsub float 1.000000e+00, %1924
%1926 = call float @llvm.AMDIL.clamp.(float %1925, float 0.000000e+00, float 1.000000e+00)
%1927 = fmul float %1916, %1926
%1928 = call float @llvm.AMDIL.clamp.(float %1927, float 0.000000e+00, float 1.000000e+00)
%1929 = call i32 @llvm.SI.packf16(float %1913, float %1914)
%1930 = bitcast i32 %1929 to float
%1931 = call i32 @llvm.SI.packf16(float %1915, float %1928)
%1932 = bitcast i32 %1931 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1930, float %1932, float %1930, float %1932)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v6, v0, 2, 0, [m0] ; C8180200
v_interp_p2_f32 v6, [v6], v1, 2, 0, [m0] ; C8190201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x32 ; C2040132
s_buffer_load_dword s11, s[0:3], 0x34 ; C2058134
v_interp_p1_f32 v0, v0, 1, 1, [m0] ; C8000500
v_interp_p2_f32 v0, [v0], v1, 1, 1, [m0] ; C8010501
s_buffer_load_dword s10, s[0:3], 0x35 ; C2050135
s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x30 ; C2068130
s_buffer_load_dword s14, s[0:3], 0x31 ; C2070131
s_buffer_load_dword s24, s[0:3], 0x38 ; C20C0138
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v1, s8 ; 7E020208
s_buffer_load_dword s8, s[0:3], 0xc ; C204010C
v_mac_f32_e32 v1, s12, v6 ; 3E020C0C
v_mov_b32_e32 v6, s13 ; 7E0C020D
v_mac_f32_e32 v6, s12, v2 ; 3E0C040C
v_mov_b32_e32 v7, s14 ; 7E0E020E
v_mac_f32_e32 v7, s12, v3 ; 3E0E060C
v_mov_b32_e32 v10, 0x3dcccccd ; 7E1402FF 3DCCCCCD
v_mul_f32_e32 v8, v10, v6 ; 10100D0A
v_floor_f32_e32 v11, v8 ; 7E164908
v_mul_f32_e32 v8, v10, v7 ; 10100F0A
v_floor_f32_e32 v12, v8 ; 7E184908
v_mul_f32_e32 v8, v10, v1 ; 1010030A
v_floor_f32_e32 v13, v8 ; 7E1A4908
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
v_mov_b32_e32 v9, 0x3b000000 ; 7E1202FF 3B000000
v_mov_b32_e32 v8, 0x3b800000 ; 7E1002FF 3B800000
v_mad_f32 v14, v8, v11, v9 ; D282000E 04261708
v_mad_f32 v15, v8, v12, v9 ; D282000F 04261908
v_add_f32_e32 v17, 0, v14 ; 06221C80
v_add_f32_e32 v18, v8, v15 ; 06241F08
v_add_f32_e32 v20, v8, v14 ; 06281D08
v_add_f32_e32 v21, 0, v15 ; 062A1E80
v_mad_f32 v24, v8, v13, v9 ; D2820018 04261B08
v_mov_b32_e32 v25, v20 ; 7E320314
v_mov_b32_e32 v26, v21 ; 7E340315
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v23, 8, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[16:23], s[12:15] ; F0800800 0064170E
v_mov_b32_e32 v26, v18 ; 7E340312
v_add_f32_e32 v27, v8, v24 ; 06363108
image_sample v28, 8, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[12:15] ; F0800800 00641C11
image_sample v30, 8, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800800 00641E14
s_waitcnt vmcnt(2) ; BF8C0772
v_mov_b32_e32 v31, v23 ; 7E3E0317
v_mov_b32_e32 v32, v24 ; 7E400318
image_sample v25, 8, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800800 00641919
v_mov_b32_e32 v32, v27 ; 7E40031B
v_mov_b32_e32 v29, v24 ; 7E3A0318
image_sample v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00642117
image_sample v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[31:32], s[16:23], s[12:15] ; F0800700 0064241F
s_waitcnt vmcnt(4) ; BF8C0774
image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800700 0064271C
v_mov_b32_e32 v29, v27 ; 7E3A031B
v_mov_b32_e32 v31, v24 ; 7E3E0318
image_sample v[42:44], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800700 00642A1C
s_waitcnt vmcnt(5) ; BF8C0775
image_sample v[45:47], 7, 0, 0, 0, 0, 0, 0, 0, v[30:31], s[16:23], s[12:15] ; F0800700 00642D1E
v_mov_b32_e32 v31, v27 ; 7E3E031B
v_mov_b32_e32 v26, v24 ; 7E340318
image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[30:31], s[16:23], s[12:15] ; F0800700 00641C1E
s_waitcnt vmcnt(6) ; BF8C0776
image_sample v[48:50], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800700 00643019
v_mov_b32_e32 v26, v27 ; 7E34031B
v_mov_b32_e32 v16, v14 ; 7E20030E
image_sample v[51:53], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800700 00643319
image_sample v23, 8, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800800 0064170F
v_mov_b32_e32 v22, v20 ; 7E2C0314
v_mov_b32_e32 v19, v17 ; 7E260311
image_sample v14, 8, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800800 00640E15
image_sample v16, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800800 00641012
v_mov_b32_e32 v19, v20 ; 7E260314
s_waitcnt vmcnt(10) ; BF8C077A
v_mad_f32 v20, 4.0, v33, -1.0 ; D2820014 03CE42F6
v_mad_f32 v21, 4.0, v34, -1.0 ; D2820015 03CE44F6
v_mad_f32 v22, 4.0, v35, -1.0 ; D2820016 03CE46F6
s_waitcnt vmcnt(9) ; BF8C0779
v_mad_f32 v25, 4.0, v36, -1.0 ; D2820019 03CE48F6
v_mad_f32 v26, 4.0, v37, -1.0 ; D282001A 03CE4AF6
v_mad_f32 v31, 4.0, v38, -1.0 ; D282001F 03CE4CF6
s_waitcnt vmcnt(8) ; BF8C0778
v_mad_f32 v32, 4.0, v39, -1.0 ; D2820020 03CE4EF6
v_mad_f32 v33, 4.0, v40, -1.0 ; D2820021 03CE50F6
v_mad_f32 v34, 4.0, v41, -1.0 ; D2820022 03CE52F6
s_waitcnt vmcnt(7) ; BF8C0777
v_mad_f32 v35, 4.0, v42, -1.0 ; D2820023 03CE54F6
v_mad_f32 v36, 4.0, v43, -1.0 ; D2820024 03CE56F6
v_mad_f32 v37, 4.0, v44, -1.0 ; D2820025 03CE58F6
s_waitcnt vmcnt(6) ; BF8C0776
v_mad_f32 v38, 4.0, v45, -1.0 ; D2820026 03CE5AF6
v_mad_f32 v39, 4.0, v46, -1.0 ; D2820027 03CE5CF6
v_mad_f32 v40, 4.0, v47, -1.0 ; D2820028 03CE5EF6
s_waitcnt vmcnt(5) ; BF8C0775
v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6
v_mad_f32 v29, 4.0, v29, -1.0 ; D282001D 03CE3AF6
v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6
s_waitcnt vmcnt(4) ; BF8C0774
v_mad_f32 v41, 4.0, v48, -1.0 ; D2820029 03CE60F6
v_mad_f32 v42, 4.0, v49, -1.0 ; D282002A 03CE62F6
v_mad_f32 v43, 4.0, v50, -1.0 ; D282002B 03CE64F6
image_sample v18, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800800 00641212
s_waitcnt vmcnt(4) ; BF8C0774
v_mad_f32 v44, 4.0, v51, -1.0 ; D282002C 03CE66F6
v_mad_f32 v45, 4.0, v52, -1.0 ; D282002D 03CE68F6
v_mad_f32 v46, 4.0, v53, -1.0 ; D282002E 03CE6AF6
v_mov_b32_e32 v15, v24 ; 7E1E0318
v_mov_b32_e32 v17, v24 ; 7E220318
v_mov_b32_e32 v19, v24 ; 7E260318
s_waitcnt vmcnt(3) ; BF8C0773
v_mov_b32_e32 v47, v23 ; 7E5E0317
v_mov_b32_e32 v48, v24 ; 7E600318
image_sample v[48:50], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00643017
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v23, 4.0, v48, -1.0 ; D2820017 03CE60F6
v_mad_f32 v24, 4.0, v49, -1.0 ; D2820018 03CE62F6
v_mad_f32 v49, 4.0, v50, -1.0 ; D2820031 03CE64F6
v_mov_b32_e32 v48, v27 ; 7E60031B
image_sample v[50:52], 7, 0, 0, 0, 0, 0, 0, 0, v[47:48], s[16:23], s[12:15] ; F0800700 0064322F
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v47, 4.0, v50, -1.0 ; D282002F 03CE64F6
v_mad_f32 v48, 4.0, v51, -1.0 ; D2820030 03CE66F6
v_mad_f32 v50, 4.0, v52, -1.0 ; D2820032 03CE68F6
image_sample v[51:53], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[16:23], s[12:15] ; F0800700 0064330E
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v51, 4.0, v51, -1.0 ; D2820033 03CE66F6
v_mad_f32 v52, 4.0, v52, -1.0 ; D2820034 03CE68F6
v_mad_f32 v53, 4.0, v53, -1.0 ; D2820035 03CE6AF6
v_mov_b32_e32 v15, v27 ; 7E1E031B
image_sample v[54:56], 7, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[16:23], s[12:15] ; F0800700 0064360E
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v14, 4.0, v54, -1.0 ; D282000E 03CE6CF6
v_mad_f32 v15, 4.0, v55, -1.0 ; D282000F 03CE6EF6
v_mad_f32 v54, 4.0, v56, -1.0 ; D2820036 03CE70F6
image_sample v[55:57], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800700 00643710
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v55, 4.0, v55, -1.0 ; D2820037 03CE6EF6
v_mad_f32 v56, 4.0, v56, -1.0 ; D2820038 03CE70F6
v_mad_f32 v57, 4.0, v57, -1.0 ; D2820039 03CE72F6
v_mov_b32_e32 v17, v27 ; 7E22031B
image_sample v[58:60], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800700 00643A10
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v16, 4.0, v58, -1.0 ; D2820010 03CE74F6
v_mad_f32 v17, 4.0, v59, -1.0 ; D2820011 03CE76F6
v_mad_f32 v58, 4.0, v60, -1.0 ; D282003A 03CE78F6
image_sample v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00643B12
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v59, 4.0, v59, -1.0 ; D282003B 03CE76F6
v_mad_f32 v60, 4.0, v60, -1.0 ; D282003C 03CE78F6
v_mad_f32 v61, 4.0, v61, -1.0 ; D282003D 03CE7AF6
v_mov_b32_e32 v19, v27 ; 7E26031B
image_sample v[62:64], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00643E12
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v18, 4.0, v62, -1.0 ; D2820012 03CE7CF6
v_mad_f32 v19, 4.0, v63, -1.0 ; D2820013 03CE7EF6
v_mad_f32 v27, 4.0, v64, -1.0 ; D282001B 03CE80F6
v_mad_f32 v11, v6, v10, -v11 ; D282000B 842E1506
v_mad_f32 v12, v7, v10, -v12 ; D282000C 84321507
v_mad_f32 v10, v1, v10, -v13 ; D282000A 84361501
v_mul_f32_e32 v13, v11, v25 ; 101A330B
v_mac_f32_e32 v13, v12, v26 ; 3E1A350C
v_add_f32_e32 v25, -1.0, v10 ; 063214F3
v_mac_f32_e32 v13, v25, v31 ; 3E1A3F19
v_mul_f32_e32 v26, v11, v35 ; 1034470B
v_add_f32_e32 v31, -1.0, v12 ; 063E18F3
v_mac_f32_e32 v26, v31, v36 ; 3E34491F
v_mac_f32_e32 v26, v25, v37 ; 3E344B19
v_add_f32_e32 v35, -1.0, v11 ; 064616F3
v_mul_f32_e32 v28, v35, v28 ; 10383923
v_mac_f32_e32 v28, v12, v29 ; 3E383B0C
v_mac_f32_e32 v28, v25, v30 ; 3E383D19
v_mul_f32_e32 v29, v35, v44 ; 103A5923
v_mac_f32_e32 v29, v31, v45 ; 3E3A5B1F
v_mac_f32_e32 v29, v25, v46 ; 3E3A5D19
v_mul_f32_e32 v30, v12, v47 ; 103C5F0C
v_mac_f32_e32 v30, v11, v48 ; 3E3C610B
v_mac_f32_e32 v30, v25, v50 ; 3E3C6519
v_mul_f32_e32 v14, v12, v14 ; 101C1D0C
v_mac_f32_e32 v14, v35, v15 ; 3E1C1F23
v_mac_f32_e32 v14, v25, v54 ; 3E1C6D19
v_mul_f32_e32 v15, v31, v16 ; 101E211F
v_mac_f32_e32 v15, v11, v17 ; 3E1E230B
v_mac_f32_e32 v15, v25, v58 ; 3E1E7519
v_mul_f32_e32 v16, v31, v18 ; 1020251F
v_mac_f32_e32 v16, v35, v19 ; 3E202723
v_mac_f32_e32 v16, v25, v27 ; 3E203719
v_mul_f32_e32 v17, v11, v20 ; 1022290B
v_mac_f32_e32 v17, v12, v21 ; 3E222B0C
v_mac_f32_e32 v17, v10, v22 ; 3E222D0A
v_mul_f32_e32 v18, v11, v32 ; 1024410B
v_mac_f32_e32 v18, v31, v33 ; 3E24431F
v_mac_f32_e32 v18, v10, v34 ; 3E24450A
v_mul_f32_e32 v19, v35, v38 ; 10264D23
v_mac_f32_e32 v19, v12, v39 ; 3E264F0C
v_mac_f32_e32 v19, v10, v40 ; 3E26510A
v_mul_f32_e32 v20, v12, v51 ; 1028670C
v_mac_f32_e32 v20, v35, v52 ; 3E286923
v_mul_f32_e32 v21, v31, v59 ; 102A771F
v_mac_f32_e32 v21, v35, v60 ; 3E2A7923
v_mul_f32_e32 v22, v35, v41 ; 102C5323
v_mac_f32_e32 v22, v31, v42 ; 3E2C551F
v_mac_f32_e32 v22, v10, v43 ; 3E2C570A
v_sub_f32_e32 v25, 1.0, v11 ; 083216F2
v_mul_f32_e32 v17, v17, v25 ; 10223311
v_mac_f32_e32 v17, v19, v11 ; 3E221713
v_mul_f32_e32 v13, v13, v25 ; 101A330D
v_mac_f32_e32 v13, v28, v11 ; 3E1A171C
v_mul_f32_e32 v18, v18, v25 ; 10243312
v_mac_f32_e32 v18, v22, v11 ; 3E241716
v_mul_f32_e32 v19, v26, v25 ; 1026331A
v_mac_f32_e32 v19, v29, v11 ; 3E26171D
v_sub_f32_e32 v22, 1.0, v12 ; 082C18F2
v_mul_f32_e32 v17, v17, v22 ; 10222D11
v_mac_f32_e32 v17, v18, v12 ; 3E221912
v_mul_f32_e32 v13, v13, v22 ; 101A2D0D
v_mac_f32_e32 v13, v19, v12 ; 3E1A1913
v_mul_f32_e32 v18, v12, v23 ; 10242F0C
v_mac_f32_e32 v18, v11, v24 ; 3E24310B
v_mac_f32_e32 v18, v10, v49 ; 3E24630A
v_mac_f32_e32 v20, v10, v53 ; 3E286B0A
v_mul_f32_e32 v19, v31, v55 ; 10266F1F
v_mac_f32_e32 v19, v11, v56 ; 3E26710B
v_mac_f32_e32 v19, v10, v57 ; 3E26730A
v_mac_f32_e32 v21, v10, v61 ; 3E2A7B0A
v_mul_f32_e32 v18, v18, v22 ; 10242D12
v_mac_f32_e32 v18, v19, v12 ; 3E241913
v_mul_f32_e32 v19, v30, v22 ; 10262D1E
v_mac_f32_e32 v19, v15, v12 ; 3E26190F
v_mul_f32_e32 v15, v20, v22 ; 101E2D14
v_mac_f32_e32 v15, v21, v12 ; 3E1E1915
v_mul_f32_e32 v14, v14, v22 ; 101C2D0E
v_mac_f32_e32 v14, v16, v12 ; 3E1C1910
v_mul_f32_e32 v12, v18, v25 ; 10183312
v_mac_f32_e32 v12, v15, v11 ; 3E18170F
v_mul_f32_e32 v15, v19, v25 ; 101E3313
v_mac_f32_e32 v15, v14, v11 ; 3E1E170E
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mac_f32_e32 v6, v17, v11 ; 3E0C1711
v_mac_f32_e32 v7, v12, v11 ; 3E0E170C
v_mac_f32_e32 v6, v13, v10 ; 3E0C150D
v_mac_f32_e32 v7, v15, v10 ; 3E0E150F
v_mov_b32_e32 v10, 0x3e4ccccd ; 7E1402FF 3E4CCCCD
v_add_f32_e32 v11, v10, v6 ; 06160D0A
v_add_f32_e32 v12, v10, v7 ; 06180F0A
v_add_f32_e32 v10, v10, v1 ; 0614030A
v_floor_f32_e32 v13, v11 ; 7E1A490B
v_floor_f32_e32 v14, v12 ; 7E1C490C
v_mad_f32 v15, v8, v13, v9 ; D282000F 04261B08
v_mad_f32 v16, v8, v14, v9 ; D2820010 04261D08
v_floor_f32_e32 v17, v10 ; 7E22490A
v_mad_f32 v19, v8, v17, v9 ; D2820013 04262308
image_sample v18, 8, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800800 0064120F
v_add_f32_e32 v20, 0, v15 ; 06281E80
v_add_f32_e32 v21, v8, v16 ; 062A2108
v_add_f32_e32 v22, v8, v15 ; 062C1F08
v_add_f32_e32 v23, 0, v16 ; 062E2080
image_sample v15, 8, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800800 00640F14
image_sample v24, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800800 00641816
v_mov_b32_e32 v23, v21 ; 7E2E0315
image_sample v20, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800800 00641416
v_mov_b32_e32 v16, v19 ; 7E200313
v_mov_b32_e32 v25, v19 ; 7E320313
v_mov_b32_e32 v21, v19 ; 7E2A0313
v_add_f32_e32 v22, v8, v19 ; 062C2708
s_waitcnt vmcnt(3) ; BF8C0773
v_mov_b32_e32 v26, v18 ; 7E340312
v_mov_b32_e32 v27, v19 ; 7E360313
image_sample v[27:29], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00641B12
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v18, 4.0, v27, -1.0 ; D2820012 03CE36F6
v_mad_f32 v19, 4.0, v28, -1.0 ; D2820013 03CE38F6
v_mad_f32 v23, 4.0, v29, -1.0 ; D2820017 03CE3AF6
v_mov_b32_e32 v27, v22 ; 7E360316
image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 00641A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v26, 4.0, v26, -1.0 ; D282001A 03CE34F6
v_mad_f32 v27, 4.0, v27, -1.0 ; D282001B 03CE36F6
v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6
image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800700 00641D0F
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v29, 4.0, v29, -1.0 ; D282001D 03CE3AF6
v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6
v_mad_f32 v31, 4.0, v31, -1.0 ; D282001F 03CE3EF6
v_mov_b32_e32 v16, v22 ; 7E200316
image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800700 0064200F
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v15, 4.0, v32, -1.0 ; D282000F 03CE40F6
v_mad_f32 v16, 4.0, v33, -1.0 ; D2820010 03CE42F6
v_mad_f32 v32, 4.0, v34, -1.0 ; D2820020 03CE44F6
image_sample v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800700 00642118
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v33, 4.0, v33, -1.0 ; D2820021 03CE42F6
v_mad_f32 v34, 4.0, v34, -1.0 ; D2820022 03CE44F6
v_mad_f32 v35, 4.0, v35, -1.0 ; D2820023 03CE46F6
v_mov_b32_e32 v25, v22 ; 7E320316
image_sample v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800700 00642418
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v24, 4.0, v36, -1.0 ; D2820018 03CE48F6
v_mad_f32 v25, 4.0, v37, -1.0 ; D2820019 03CE4AF6
v_mad_f32 v36, 4.0, v38, -1.0 ; D2820024 03CE4CF6
image_sample v[37:39], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800700 00642514
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v37, 4.0, v37, -1.0 ; D2820025 03CE4AF6
v_mad_f32 v38, 4.0, v38, -1.0 ; D2820026 03CE4CF6
v_mad_f32 v39, 4.0, v39, -1.0 ; D2820027 03CE4EF6
v_mov_b32_e32 v21, v22 ; 7E2A0316
image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800700 00641414
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v20, 4.0, v20, -1.0 ; D2820014 03CE28F6
v_mad_f32 v21, 4.0, v21, -1.0 ; D2820015 03CE2AF6
v_mad_f32 v22, 4.0, v22, -1.0 ; D2820016 03CE2CF6
v_subrev_f32_e32 v11, v13, v11 ; 0A16170D
v_subrev_f32_e32 v12, v14, v12 ; 0A18190E
v_mul_f32_e32 v13, v11, v29 ; 101A3B0B
v_add_f32_e32 v14, -1.0, v12 ; 061C18F3
v_mac_f32_e32 v13, v14, v30 ; 3E1A3D0E
v_mul_f32_e32 v15, v11, v15 ; 101E1F0B
v_mac_f32_e32 v15, v14, v16 ; 3E1E210E
v_add_f32_e32 v16, -1.0, v11 ; 062016F3
v_mul_f32_e32 v29, v16, v37 ; 103A4B10
v_mac_f32_e32 v29, v14, v38 ; 3E3A4D0E
v_mul_f32_e32 v20, v16, v20 ; 10282910
v_mac_f32_e32 v20, v14, v21 ; 3E282B0E
v_subrev_f32_e32 v10, v17, v10 ; 0A141511
v_mul_f32_e32 v14, v11, v26 ; 101C350B
v_mac_f32_e32 v14, v12, v27 ; 3E1C370C
v_add_f32_e32 v17, -1.0, v10 ; 062214F3
v_mac_f32_e32 v14, v17, v28 ; 3E1C3911
v_mac_f32_e32 v15, v17, v32 ; 3E1E4111
v_mul_f32_e32 v21, v16, v24 ; 102A3110
v_mac_f32_e32 v21, v12, v25 ; 3E2A330C
v_mac_f32_e32 v21, v17, v36 ; 3E2A4911
v_mac_f32_e32 v20, v17, v22 ; 3E282D11
v_mul_f32_e32 v17, v11, v18 ; 1022250B
v_mac_f32_e32 v17, v12, v19 ; 3E22270C
v_mac_f32_e32 v17, v10, v23 ; 3E222F0A
v_mac_f32_e32 v13, v10, v31 ; 3E1A3F0A
v_mul_f32_e32 v16, v16, v33 ; 10204310
v_mac_f32_e32 v16, v12, v34 ; 3E20450C
v_mac_f32_e32 v16, v10, v35 ; 3E20470A
v_mac_f32_e32 v29, v10, v39 ; 3E3A4F0A
v_sub_f32_e32 v18, 1.0, v11 ; 082416F2
v_mul_f32_e32 v17, v17, v18 ; 10222511
v_mac_f32_e32 v17, v16, v11 ; 3E221710
v_mul_f32_e32 v14, v14, v18 ; 101C250E
v_mac_f32_e32 v14, v21, v11 ; 3E1C1715
v_mul_f32_e32 v13, v13, v18 ; 101A250D
v_mac_f32_e32 v13, v29, v11 ; 3E1A171D
v_mul_f32_e32 v15, v15, v18 ; 101E250F
v_mac_f32_e32 v15, v20, v11 ; 3E1E1714
v_sub_f32_e32 v11, 1.0, v12 ; 081618F2
v_mul_f32_e32 v16, v17, v11 ; 10201711
v_mac_f32_e32 v16, v13, v12 ; 3E20190D
v_mul_f32_e32 v11, v14, v11 ; 1016170E
v_mac_f32_e32 v11, v15, v12 ; 3E16190F
v_sub_f32_e32 v12, 1.0, v10 ; 081814F2
v_mul_f32_e32 v12, v16, v12 ; 10181910
v_mac_f32_e32 v12, v11, v10 ; 3E18150B
v_mov_b32_e32 v10, 0x3f8ccccd ; 7E1402FF 3F8CCCCD
v_mad_f32 v11, v12, v10, 1.0 ; D282000B 03CA150C
v_mov_b32_e32 v12, 0x3e428f5c ; 7E1802FF 3E428F5C
v_mad_f32 v13, v12, v11, v6 ; D282000D 041A170C
v_mad_f32 v14, v12, v11, v7 ; D282000E 041E170C
v_mad_f32 v12, v12, v11, v1 ; D282000C 0406170C
v_mov_b32_e32 v15, 0x40533333 ; 7E1E02FF 40533333
v_mul_f32_e32 v16, v15, v13 ; 10201B0F
v_floor_f32_e32 v16, v16 ; 7E204910
v_mul_f32_e32 v17, v15, v14 ; 10221D0F
v_floor_f32_e32 v17, v17 ; 7E224911
v_mul_f32_e32 v18, v15, v12 ; 1024190F
v_floor_f32_e32 v18, v18 ; 7E244912
v_mad_f32 v19, v8, v16, v9 ; D2820013 04262108
v_mad_f32 v20, v8, v17, v9 ; D2820014 04262308
v_mad_f32 v22, v8, v18, v9 ; D2820016 04262508
image_sample v21, 8, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800800 00641513
v_add_f32_e32 v23, 0, v19 ; 062E2680
v_add_f32_e32 v24, v8, v20 ; 06302908
v_add_f32_e32 v25, v8, v19 ; 06322708
v_add_f32_e32 v26, 0, v20 ; 06342880
image_sample v19, 8, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800800 00641317
image_sample v27, 8, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800800 00641B19
v_mov_b32_e32 v26, v24 ; 7E340318
v_mad_f32 v13, v13, v15, -v16 ; D282000D 84421F0D
v_mad_f32 v14, v14, v15, -v17 ; D282000E 84461F0E
image_sample v16, 8, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[16:23], s[12:15] ; F0800800 00641019
v_mad_f32 v12, v12, v15, -v18 ; D282000C 844A1F0C
v_mov_b32_e32 v20, v22 ; 7E280316
v_mov_b32_e32 v28, v22 ; 7E380316
v_mov_b32_e32 v17, v22 ; 7E220316
v_add_f32_e32 v15, v8, v22 ; 061E2D08
s_waitcnt vmcnt(3) ; BF8C0773
v_mov_b32_e32 v23, v21 ; 7E2E0315
v_mov_b32_e32 v24, v22 ; 7E300316
image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800700 00641815
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v18, 4.0, v24, -1.0 ; D2820012 03CE30F6
v_mad_f32 v21, 4.0, v25, -1.0 ; D2820015 03CE32F6
v_mad_f32 v22, 4.0, v26, -1.0 ; D2820016 03CE34F6
v_mov_b32_e32 v24, v15 ; 7E30030F
image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00641717
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v23, 4.0, v23, -1.0 ; D2820017 03CE2EF6
v_mad_f32 v24, 4.0, v24, -1.0 ; D2820018 03CE30F6
v_mad_f32 v25, 4.0, v25, -1.0 ; D2820019 03CE32F6
image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800700 00641D13
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v26, 4.0, v29, -1.0 ; D282001A 03CE3AF6
v_mad_f32 v29, 4.0, v30, -1.0 ; D282001D 03CE3CF6
v_mad_f32 v30, 4.0, v31, -1.0 ; D282001E 03CE3EF6
v_mov_b32_e32 v20, v15 ; 7E28030F
image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800700 00641F13
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v19, 4.0, v31, -1.0 ; D2820013 03CE3EF6
v_mad_f32 v20, 4.0, v32, -1.0 ; D2820014 03CE40F6
v_mad_f32 v31, 4.0, v33, -1.0 ; D282001F 03CE42F6
image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[16:23], s[12:15] ; F0800700 0064201B
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v32, 4.0, v32, -1.0 ; D2820020 03CE40F6
v_mad_f32 v33, 4.0, v33, -1.0 ; D2820021 03CE42F6
v_mad_f32 v34, 4.0, v34, -1.0 ; D2820022 03CE44F6
v_mov_b32_e32 v28, v15 ; 7E38030F
image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[16:23], s[12:15] ; F0800700 0064231B
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v27, 4.0, v35, -1.0 ; D282001B 03CE46F6
v_mad_f32 v28, 4.0, v36, -1.0 ; D282001C 03CE48F6
v_mad_f32 v35, 4.0, v37, -1.0 ; D2820023 03CE4AF6
image_sample v[36:38], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800700 00642410
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v36, 4.0, v36, -1.0 ; D2820024 03CE48F6
v_mad_f32 v37, 4.0, v37, -1.0 ; D2820025 03CE4AF6
v_mad_f32 v38, 4.0, v38, -1.0 ; D2820026 03CE4CF6
v_mov_b32_e32 v17, v15 ; 7E22030F
image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[16:23], s[12:15] ; F0800700 00640F10
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v15, 4.0, v15, -1.0 ; D282000F 03CE1EF6
v_mad_f32 v16, 4.0, v16, -1.0 ; D2820010 03CE20F6
v_mad_f32 v17, 4.0, v17, -1.0 ; D2820011 03CE22F6
v_mul_f32_e32 v26, v13, v26 ; 1034350D
v_add_f32_e32 v39, -1.0, v14 ; 064E1CF3
v_mac_f32_e32 v26, v39, v29 ; 3E343B27
v_mul_f32_e32 v19, v13, v19 ; 1026270D
v_mac_f32_e32 v19, v39, v20 ; 3E262927
v_add_f32_e32 v20, -1.0, v13 ; 06281AF3
v_mul_f32_e32 v29, v20, v36 ; 103A4914
v_mac_f32_e32 v29, v39, v37 ; 3E3A4B27
v_mul_f32_e32 v15, v20, v15 ; 101E1F14
v_mac_f32_e32 v15, v39, v16 ; 3E1E2127
v_mul_f32_e32 v16, v13, v23 ; 10202F0D
v_mac_f32_e32 v16, v14, v24 ; 3E20310E
v_add_f32_e32 v23, -1.0, v12 ; 062E18F3
v_mac_f32_e32 v16, v23, v25 ; 3E203317
v_mac_f32_e32 v19, v23, v31 ; 3E263F17
v_mul_f32_e32 v24, v20, v27 ; 10303714
v_mac_f32_e32 v24, v14, v28 ; 3E30390E
v_mac_f32_e32 v24, v23, v35 ; 3E304717
v_mac_f32_e32 v15, v23, v17 ; 3E1E2317
v_mul_f32_e32 v17, v13, v18 ; 1022250D
v_mac_f32_e32 v17, v14, v21 ; 3E222B0E
v_mac_f32_e32 v17, v12, v22 ; 3E222D0C
v_mac_f32_e32 v26, v12, v30 ; 3E343D0C
v_mul_f32_e32 v18, v20, v32 ; 10244114
v_mac_f32_e32 v18, v14, v33 ; 3E24430E
v_mac_f32_e32 v18, v12, v34 ; 3E24450C
v_mac_f32_e32 v29, v12, v38 ; 3E3A4D0C
v_sub_f32_e32 v20, 1.0, v13 ; 08281AF2
v_mul_f32_e32 v17, v17, v20 ; 10222911
v_mac_f32_e32 v17, v18, v13 ; 3E221B12
v_mul_f32_e32 v16, v16, v20 ; 10202910
v_mac_f32_e32 v16, v24, v13 ; 3E201B18
v_mul_f32_e32 v18, v26, v20 ; 1024291A
v_mac_f32_e32 v18, v29, v13 ; 3E241B1D
v_mul_f32_e32 v19, v19, v20 ; 10262913
v_mac_f32_e32 v19, v15, v13 ; 3E261B0F
v_sub_f32_e32 v13, 1.0, v14 ; 081A1CF2
v_mul_f32_e32 v15, v17, v13 ; 101E1B11
v_mac_f32_e32 v15, v18, v14 ; 3E1E1D12
v_mul_f32_e32 v13, v16, v13 ; 101A1B10
v_mac_f32_e32 v13, v19, v14 ; 3E1A1D13
v_sub_f32_e32 v14, 1.0, v12 ; 081C18F2
v_mul_f32_e32 v14, v15, v14 ; 101C1D0F
v_mac_f32_e32 v14, v13, v12 ; 3E1C190D
v_mov_b32_e32 v12, 0x3e9b26ca ; 7E1802FF 3E9B26CA
v_mad_f32 v11, |v14|, v12, v11 ; D282010B 042E190E
v_mov_b32_e32 v12, 0x3e570a3d ; 7E1802FF 3E570A3D
v_mad_f32 v13, v12, v11, v6 ; D282000D 041A170C
v_mad_f32 v14, v12, v11, v7 ; D282000E 041E170C
v_mad_f32 v12, v12, v11, v1 ; D282000C 0406170C
v_mul_f32_e32 v15, s11, v13 ; 101E1A0B
v_floor_f32_e32 v15, v15 ; 7E1E490F
v_mul_f32_e32 v16, s11, v14 ; 10201C0B
v_floor_f32_e32 v16, v16 ; 7E204910
v_mul_f32_e32 v17, s11, v12 ; 1022180B
v_floor_f32_e32 v17, v17 ; 7E224911
v_mad_f32 v18, v8, v15, v9 ; D2820012 04261F08
v_mad_f32 v19, v8, v16, v9 ; D2820013 04262108
v_mad_f32 v21, v8, v17, v9 ; D2820015 04262308
image_sample v20, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800800 00641412
v_add_f32_e32 v22, 0, v18 ; 062C2480
v_add_f32_e32 v23, v8, v19 ; 062E2708
v_add_f32_e32 v24, v8, v18 ; 06302508
v_add_f32_e32 v25, 0, v19 ; 06322680
image_sample v18, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800800 00641216
image_sample v26, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00641A18
v_mov_b32_e32 v25, v23 ; 7E320317
image_sample v22, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00641618
v_mov_b32_e32 v19, v21 ; 7E260315
v_mov_b32_e32 v27, v21 ; 7E360315
v_mov_b32_e32 v23, v21 ; 7E2E0315
v_add_f32_e32 v24, v8, v21 ; 06302B08
s_waitcnt vmcnt(3) ; BF8C0773
v_mov_b32_e32 v28, v20 ; 7E380314
v_mov_b32_e32 v29, v21 ; 7E3A0315
image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800700 00641D14
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v20, 4.0, v29, -1.0 ; D2820014 03CE3AF6
v_mad_f32 v21, 4.0, v30, -1.0 ; D2820015 03CE3CF6
v_mad_f32 v25, 4.0, v31, -1.0 ; D2820019 03CE3EF6
v_mov_b32_e32 v29, v24 ; 7E3A0318
image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800700 00641C1C
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6
v_mad_f32 v29, 4.0, v29, -1.0 ; D282001D 03CE3AF6
v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6
image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00641F12
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v31, 4.0, v31, -1.0 ; D282001F 03CE3EF6
v_mad_f32 v32, 4.0, v32, -1.0 ; D2820020 03CE40F6
v_mad_f32 v33, 4.0, v33, -1.0 ; D2820021 03CE42F6
v_mov_b32_e32 v19, v24 ; 7E260318
image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00642212
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v18, 4.0, v34, -1.0 ; D2820012 03CE44F6
v_mad_f32 v19, 4.0, v35, -1.0 ; D2820013 03CE46F6
v_mad_f32 v34, 4.0, v36, -1.0 ; D2820022 03CE48F6
image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 0064231A
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v35, 4.0, v35, -1.0 ; D2820023 03CE46F6
v_mad_f32 v36, 4.0, v36, -1.0 ; D2820024 03CE48F6
v_mad_f32 v37, 4.0, v37, -1.0 ; D2820025 03CE4AF6
v_mov_b32_e32 v27, v24 ; 7E360318
image_sample v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 0064261A
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v26, 4.0, v38, -1.0 ; D282001A 03CE4CF6
v_mad_f32 v27, 4.0, v39, -1.0 ; D282001B 03CE4EF6
v_mad_f32 v38, 4.0, v40, -1.0 ; D2820026 03CE50F6
image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00642716
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v39, 4.0, v39, -1.0 ; D2820027 03CE4EF6
v_mad_f32 v40, 4.0, v40, -1.0 ; D2820028 03CE50F6
v_mad_f32 v41, 4.0, v41, -1.0 ; D2820029 03CE52F6
v_mov_b32_e32 v23, v24 ; 7E2E0318
image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00641616
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v22, 4.0, v22, -1.0 ; D2820016 03CE2CF6
v_mad_f32 v23, 4.0, v23, -1.0 ; D2820017 03CE2EF6
v_mad_f32 v24, 4.0, v24, -1.0 ; D2820018 03CE30F6
v_mad_f32 v13, v13, s11, -v15 ; D282000D 843C170D
v_mad_f32 v14, v14, s11, -v16 ; D282000E 8440170E
v_mul_f32_e32 v15, v13, v31 ; 101E3F0D
v_add_f32_e32 v16, -1.0, v14 ; 06201CF3
v_mac_f32_e32 v15, v16, v32 ; 3E1E4110
v_mul_f32_e32 v18, v13, v18 ; 1024250D
v_mac_f32_e32 v18, v16, v19 ; 3E242710
v_add_f32_e32 v19, -1.0, v13 ; 06261AF3
v_mul_f32_e32 v31, v19, v39 ; 103E4F13
v_mac_f32_e32 v31, v16, v40 ; 3E3E5110
v_mul_f32_e32 v22, v19, v22 ; 102C2D13
v_mac_f32_e32 v22, v16, v23 ; 3E2C2F10
v_mad_f32 v12, v12, s11, -v17 ; D282000C 8444170C
v_mul_f32_e32 v16, v13, v28 ; 1020390D
v_mac_f32_e32 v16, v14, v29 ; 3E203B0E
v_add_f32_e32 v17, -1.0, v12 ; 062218F3
v_mac_f32_e32 v16, v17, v30 ; 3E203D11
v_mac_f32_e32 v18, v17, v34 ; 3E244511
v_mul_f32_e32 v23, v19, v26 ; 102E3513
v_mac_f32_e32 v23, v14, v27 ; 3E2E370E
v_mac_f32_e32 v23, v17, v38 ; 3E2E4D11
v_mac_f32_e32 v22, v17, v24 ; 3E2C3111
v_mul_f32_e32 v17, v13, v20 ; 1022290D
v_mac_f32_e32 v17, v14, v21 ; 3E222B0E
v_mac_f32_e32 v17, v12, v25 ; 3E22330C
v_mac_f32_e32 v15, v12, v33 ; 3E1E430C
v_mul_f32_e32 v19, v19, v35 ; 10264713
v_mac_f32_e32 v19, v14, v36 ; 3E26490E
v_mac_f32_e32 v19, v12, v37 ; 3E264B0C
v_mac_f32_e32 v31, v12, v41 ; 3E3E530C
v_sub_f32_e32 v20, 1.0, v13 ; 08281AF2
v_mul_f32_e32 v17, v17, v20 ; 10222911
v_mac_f32_e32 v17, v19, v13 ; 3E221B13
v_mul_f32_e32 v16, v16, v20 ; 10202910
v_mac_f32_e32 v16, v23, v13 ; 3E201B17
v_mul_f32_e32 v15, v15, v20 ; 101E290F
v_mac_f32_e32 v15, v31, v13 ; 3E1E1B1F
v_mul_f32_e32 v18, v18, v20 ; 10242912
v_mac_f32_e32 v18, v22, v13 ; 3E241B16
v_sub_f32_e32 v13, 1.0, v14 ; 081A1CF2
v_mul_f32_e32 v17, v17, v13 ; 10221B11
v_mac_f32_e32 v17, v15, v14 ; 3E221D0F
v_mul_f32_e32 v13, v16, v13 ; 101A1B10
v_mac_f32_e32 v13, v18, v14 ; 3E1A1D12
v_rcp_f32_e32 v14, s24 ; 7E1C5418
v_sub_f32_e32 v15, 1.0, v12 ; 081E18F2
v_mul_f32_e32 v15, v17, v15 ; 101E1F11
v_mac_f32_e32 v15, v13, v12 ; 3E1E190D
v_mad_f32 v11, |v15|, v14, v11 ; D282010B 042E1D0F
v_mov_b32_e32 v12, 0x3e6b851f ; 7E1802FF 3E6B851F
v_mad_f32 v13, v12, v11, v6 ; D282000D 041A170C
v_mad_f32 v14, v12, v11, v7 ; D282000E 041E170C
v_mad_f32 v12, v12, v11, v1 ; D282000C 0406170C
v_mul_f32_e32 v15, s10, v13 ; 101E1A0A
v_floor_f32_e32 v15, v15 ; 7E1E490F
v_mul_f32_e32 v16, s10, v14 ; 10201C0A
v_floor_f32_e32 v16, v16 ; 7E204910
v_mul_f32_e32 v17, s10, v12 ; 1022180A
v_floor_f32_e32 v17, v17 ; 7E224911
v_mad_f32 v18, v8, v15, v9 ; D2820012 04261F08
v_mad_f32 v19, v8, v16, v9 ; D2820013 04262108
v_mad_f32 v21, v8, v17, v9 ; D2820015 04262308
image_sample v20, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800800 00641412
v_add_f32_e32 v22, 0, v18 ; 062C2480
v_add_f32_e32 v23, v8, v19 ; 062E2708
v_add_f32_e32 v24, v8, v18 ; 06302508
v_add_f32_e32 v25, 0, v19 ; 06322680
image_sample v18, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800800 00641216
image_sample v26, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00641A18
v_mov_b32_e32 v25, v23 ; 7E320317
image_sample v22, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00641618
v_mov_b32_e32 v19, v21 ; 7E260315
v_mov_b32_e32 v27, v21 ; 7E360315
v_mov_b32_e32 v23, v21 ; 7E2E0315
v_add_f32_e32 v24, v8, v21 ; 06302B08
s_waitcnt vmcnt(3) ; BF8C0773
v_mov_b32_e32 v28, v20 ; 7E380314
v_mov_b32_e32 v29, v21 ; 7E3A0315
image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[12:15] ; F0800700 00641D14
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v20, 4.0, v29, -1.0 ; D2820014 03CE3AF6
v_mad_f32 v21, 4.0, v30, -1.0 ; D2820015 03CE3CF6
v_mad_f32 v25, 4.0, v31, -1.0 ; D2820019 03CE3EF6
v_mov_b32_e32 v29, v24 ; 7E3A0318
image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800700 00641C1C
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6
v_mad_f32 v29, 4.0, v29, -1.0 ; D282001D 03CE3AF6
v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6
image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00641F12
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v31, 4.0, v31, -1.0 ; D282001F 03CE3EF6
v_mad_f32 v32, 4.0, v32, -1.0 ; D2820020 03CE40F6
v_mad_f32 v33, 4.0, v33, -1.0 ; D2820021 03CE42F6
v_mov_b32_e32 v19, v24 ; 7E260318
image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[16:23], s[12:15] ; F0800700 00642212
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v18, 4.0, v34, -1.0 ; D2820012 03CE44F6
v_mad_f32 v19, 4.0, v35, -1.0 ; D2820013 03CE46F6
v_mad_f32 v34, 4.0, v36, -1.0 ; D2820022 03CE48F6
image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 0064231A
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v35, 4.0, v35, -1.0 ; D2820023 03CE46F6
v_mad_f32 v36, 4.0, v36, -1.0 ; D2820024 03CE48F6
v_mad_f32 v37, 4.0, v37, -1.0 ; D2820025 03CE4AF6
v_mov_b32_e32 v27, v24 ; 7E360318
image_sample v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[16:23], s[12:15] ; F0800700 0064261A
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v26, 4.0, v38, -1.0 ; D282001A 03CE4CF6
v_mad_f32 v27, 4.0, v39, -1.0 ; D282001B 03CE4EF6
v_mad_f32 v38, 4.0, v40, -1.0 ; D2820026 03CE50F6
image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00642716
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v39, 4.0, v39, -1.0 ; D2820027 03CE4EF6
v_mad_f32 v40, 4.0, v40, -1.0 ; D2820028 03CE50F6
v_mad_f32 v41, 4.0, v41, -1.0 ; D2820029 03CE52F6
v_mov_b32_e32 v23, v24 ; 7E2E0318
image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[16:23], s[12:15] ; F0800700 00641616
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v22, 4.0, v22, -1.0 ; D2820016 03CE2CF6
v_mad_f32 v23, 4.0, v23, -1.0 ; D2820017 03CE2EF6
v_mad_f32 v24, 4.0, v24, -1.0 ; D2820018 03CE30F6
v_mad_f32 v13, v13, s10, -v15 ; D282000D 843C150D
v_mad_f32 v14, v14, s10, -v16 ; D282000E 8440150E
v_mul_f32_e32 v15, v13, v31 ; 101E3F0D
v_add_f32_e32 v16, -1.0, v14 ; 06201CF3
v_mac_f32_e32 v15, v16, v32 ; 3E1E4110
v_mul_f32_e32 v18, v13, v18 ; 1024250D
v_mac_f32_e32 v18, v16, v19 ; 3E242710
v_add_f32_e32 v19, -1.0, v13 ; 06261AF3
v_mul_f32_e32 v31, v19, v39 ; 103E4F13
v_mac_f32_e32 v31, v16, v40 ; 3E3E5110
v_mul_f32_e32 v22, v19, v22 ; 102C2D13
v_mac_f32_e32 v22, v16, v23 ; 3E2C2F10
v_mad_f32 v12, v12, s10, -v17 ; D282000C 8444150C
v_mul_f32_e32 v16, v13, v28 ; 1020390D
v_mac_f32_e32 v16, v14, v29 ; 3E203B0E
v_add_f32_e32 v17, -1.0, v12 ; 062218F3
v_mac_f32_e32 v16, v17, v30 ; 3E203D11
v_mac_f32_e32 v18, v17, v34 ; 3E244511
v_mul_f32_e32 v23, v19, v26 ; 102E3513
v_mac_f32_e32 v23, v14, v27 ; 3E2E370E
v_mac_f32_e32 v23, v17, v38 ; 3E2E4D11
v_mac_f32_e32 v22, v17, v24 ; 3E2C3111
v_mul_f32_e32 v17, v13, v20 ; 1022290D
v_mac_f32_e32 v17, v14, v21 ; 3E222B0E
v_mac_f32_e32 v17, v12, v25 ; 3E22330C
v_mac_f32_e32 v15, v12, v33 ; 3E1E430C
v_mul_f32_e32 v19, v19, v35 ; 10264713
v_mac_f32_e32 v19, v14, v36 ; 3E26490E
v_mac_f32_e32 v19, v12, v37 ; 3E264B0C
v_mac_f32_e32 v31, v12, v41 ; 3E3E530C
v_sub_f32_e32 v20, 1.0, v13 ; 08281AF2
v_mul_f32_e32 v17, v17, v20 ; 10222911
v_mac_f32_e32 v17, v19, v13 ; 3E221B13
v_mul_f32_e32 v16, v16, v20 ; 10202910
v_mac_f32_e32 v16, v23, v13 ; 3E201B17
v_mul_f32_e32 v15, v15, v20 ; 101E290F
s_buffer_load_dword s4, s[0:3], 0x39 ; C2020139
v_mac_f32_e32 v15, v31, v13 ; 3E1E1B1F
v_mul_f32_e32 v18, v18, v20 ; 10242912
v_mac_f32_e32 v18, v22, v13 ; 3E241B16
v_sub_f32_e32 v13, 1.0, v14 ; 081A1CF2
v_mul_f32_e32 v17, v17, v13 ; 10221B11
v_mac_f32_e32 v17, v15, v14 ; 3E221D0F
v_mul_f32_e32 v13, v16, v13 ; 101A1B10
v_mac_f32_e32 v13, v18, v14 ; 3E1A1D12
s_buffer_load_dword s5, s[0:3], 0x3a ; C202813A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_rcp_f32_e32 v14, s4 ; 7E1C5404
v_sub_f32_e32 v15, 1.0, v12 ; 081E18F2
v_mul_f32_e32 v15, v17, v15 ; 101E1F11
v_mac_f32_e32 v15, v13, v12 ; 3E1E190D
v_mad_f32 v11, |v15|, v14, v11 ; D282010B 042E1D0F
v_mov_b32_e32 v12, 0x3e800000 ; 7E1802FF 3E800000
v_mac_f32_e32 v6, v12, v11 ; 3E0C170C
v_mac_f32_e32 v7, v12, v11 ; 3E0E170C
v_mac_f32_e32 v1, v12, v11 ; 3E02170C
v_mul_f32_e32 v12, s9, v6 ; 10180C09
v_floor_f32_e32 v12, v12 ; 7E18490C
v_mul_f32_e32 v13, s9, v7 ; 101A0E09
v_floor_f32_e32 v13, v13 ; 7E1A490D
v_mul_f32_e32 v14, s9, v1 ; 101C0209
v_floor_f32_e32 v14, v14 ; 7E1C490E
v_mad_f32 v15, v8, v12, v9 ; D282000F 04261908
v_mad_f32 v16, v8, v13, v9 ; D2820010 04261B08
v_mad_f32 v18, v8, v14, v9 ; D2820012 04261D08
image_sample v17, 8, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800800 0064110F
v_add_f32_e32 v19, 0, v15 ; 06261E80
v_add_f32_e32 v20, v8, v16 ; 06282108
v_add_f32_e32 v21, v8, v15 ; 062A1F08
v_add_f32_e32 v22, 0, v16 ; 062C2080
image_sample v15, 8, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800800 00640F13
image_sample v23, 8, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800800 00641715
v_mov_b32_e32 v22, v20 ; 7E2C0314
image_sample v19, 8, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800800 00641315
v_add_f32_e32 v8, v8, v18 ; 06102508
v_mov_b32_e32 v16, v18 ; 7E200312
v_mov_b32_e32 v24, v18 ; 7E300312
v_mov_b32_e32 v20, v18 ; 7E280312
s_waitcnt vmcnt(3) ; BF8C0773
v_mov_b32_e32 v21, v17 ; 7E2A0311
v_mov_b32_e32 v22, v18 ; 7E2C0312
image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[12:15] ; F0800700 00641911
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v9, 4.0, v25, -1.0 ; D2820009 03CE32F6
v_mad_f32 v17, 4.0, v26, -1.0 ; D2820011 03CE34F6
v_mad_f32 v18, 4.0, v27, -1.0 ; D2820012 03CE36F6
v_mov_b32_e32 v22, v8 ; 7E2C0308
image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[16:23], s[12:15] ; F0800700 00641915
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v21, 4.0, v25, -1.0 ; D2820015 03CE32F6
v_mad_f32 v22, 4.0, v26, -1.0 ; D2820016 03CE34F6
v_mad_f32 v25, 4.0, v27, -1.0 ; D2820019 03CE36F6
image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800700 00641A0F
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v26, 4.0, v26, -1.0 ; D282001A 03CE34F6
v_mad_f32 v27, 4.0, v27, -1.0 ; D282001B 03CE36F6
v_mad_f32 v28, 4.0, v28, -1.0 ; D282001C 03CE38F6
v_mov_b32_e32 v16, v8 ; 7E200308
image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[16:23], s[12:15] ; F0800700 00641D0F
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v15, 4.0, v29, -1.0 ; D282000F 03CE3AF6
v_mad_f32 v16, 4.0, v30, -1.0 ; D2820010 03CE3CF6
v_mad_f32 v29, 4.0, v31, -1.0 ; D282001D 03CE3EF6
image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00641E17
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v30, 4.0, v30, -1.0 ; D282001E 03CE3CF6
v_mad_f32 v31, 4.0, v31, -1.0 ; D282001F 03CE3EF6
v_mad_f32 v32, 4.0, v32, -1.0 ; D2820020 03CE40F6
v_mov_b32_e32 v24, v8 ; 7E300308
image_sample v[33:35], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[16:23], s[12:15] ; F0800700 00642117
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v23, 4.0, v33, -1.0 ; D2820017 03CE42F6
v_mad_f32 v24, 4.0, v34, -1.0 ; D2820018 03CE44F6
v_mad_f32 v33, 4.0, v35, -1.0 ; D2820021 03CE46F6
image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800700 00642213
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v34, 4.0, v34, -1.0 ; D2820022 03CE44F6
v_mad_f32 v35, 4.0, v35, -1.0 ; D2820023 03CE46F6
v_mad_f32 v36, 4.0, v36, -1.0 ; D2820024 03CE48F6
v_mov_b32_e32 v20, v8 ; 7E280308
image_sample v[37:39], 7, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[16:23], s[12:15] ; F0800700 00642513
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v8, 4.0, v37, -1.0 ; D2820008 03CE4AF6
v_mad_f32 v19, 4.0, v38, -1.0 ; D2820013 03CE4CF6
v_mad_f32 v20, 4.0, v39, -1.0 ; D2820014 03CE4EF6
v_mad_f32 v6, v6, s9, -v12 ; D2820006 84301306
v_mad_f32 v7, v7, s9, -v13 ; D2820007 84341307
v_mul_f32_e32 v12, v6, v26 ; 10183506
v_add_f32_e32 v13, -1.0, v7 ; 061A0EF3
v_mac_f32_e32 v12, v13, v27 ; 3E18370D
v_mul_f32_e32 v15, v6, v15 ; 101E1F06
v_mac_f32_e32 v15, v13, v16 ; 3E1E210D
v_add_f32_e32 v16, -1.0, v6 ; 06200CF3
v_mul_f32_e32 v26, v16, v34 ; 10344510
v_mac_f32_e32 v26, v13, v35 ; 3E34470D
v_mul_f32_e32 v8, v16, v8 ; 10101110
v_mac_f32_e32 v8, v13, v19 ; 3E10270D
v_mad_f32 v1, v1, s9, -v14 ; D2820001 84381301
v_mul_f32_e32 v13, v6, v21 ; 101A2B06
v_mac_f32_e32 v13, v7, v22 ; 3E1A2D07
v_add_f32_e32 v14, -1.0, v1 ; 061C02F3
v_mac_f32_e32 v13, v14, v25 ; 3E1A330E
v_mac_f32_e32 v15, v14, v29 ; 3E1E3B0E
v_mul_f32_e32 v19, v16, v23 ; 10262F10
v_mac_f32_e32 v19, v7, v24 ; 3E263107
v_mac_f32_e32 v19, v14, v33 ; 3E26430E
v_mac_f32_e32 v8, v14, v20 ; 3E10290E
v_mul_f32_e32 v9, v6, v9 ; 10121306
v_mac_f32_e32 v9, v7, v17 ; 3E122307
v_mac_f32_e32 v9, v1, v18 ; 3E122501
v_mac_f32_e32 v12, v1, v28 ; 3E183901
v_mul_f32_e32 v14, v16, v30 ; 101C3D10
v_mac_f32_e32 v14, v7, v31 ; 3E1C3F07
v_mac_f32_e32 v14, v1, v32 ; 3E1C4101
v_mac_f32_e32 v26, v1, v36 ; 3E344901
v_sub_f32_e32 v16, 1.0, v6 ; 08200CF2
v_mul_f32_e32 v9, v9, v16 ; 10122109
v_mac_f32_e32 v9, v14, v6 ; 3E120D0E
v_mul_f32_e32 v13, v13, v16 ; 101A210D
v_mac_f32_e32 v13, v19, v6 ; 3E1A0D13
v_mul_f32_e32 v12, v12, v16 ; 1018210C
v_mac_f32_e32 v12, v26, v6 ; 3E180D1A
v_mul_f32_e32 v14, v15, v16 ; 101C210F
v_mac_f32_e32 v14, v8, v6 ; 3E1C0D08
v_sub_f32_e32 v6, 1.0, v7 ; 080C0EF2
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
v_mul_f32_e32 v8, v9, v6 ; 10100D09
v_mac_f32_e32 v8, v12, v7 ; 3E100F0C
v_mul_f32_e32 v6, v13, v6 ; 100C0D0D
s_buffer_load_dword s6, s[0:3], 0x1 ; C2030101
s_buffer_load_dword s7, s[0:3], 0x2 ; C2038102
v_mac_f32_e32 v6, v14, v7 ; 3E0C0F0E
v_sub_f32_e32 v7, 1.0, v1 ; 080E02F2
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mac_f32_e32 v7, v6, v1 ; 3E0E0306
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v1, s4, v5 ; 0A020A04
s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124
s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125
v_rcp_f32_e32 v5, s5 ; 7E0A5405
v_subrev_f32_e32 v4, s6, v4 ; 0A080806
v_subrev_f32_e32 v0, s7, v0 ; 0A000007
v_mul_f32_e32 v6, v1, v1 ; 100C0301
v_mac_f32_e32 v6, v4, v4 ; 3E0C0904
v_mac_f32_e32 v6, v0, v0 ; 3E0C0100
s_buffer_load_dword s5, s[0:3], 0x26 ; C2028126
v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906
v_mad_f32 v5, |v7|, v5, v11 ; D2820105 042E0B07
s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128
s_buffer_load_dword s7, s[0:3], 0x2c ; C203812C
v_mul_f32_e32 v1, v6, v1 ; 10020306
v_mul_f32_e32 v4, v6, v4 ; 10080906
v_mul_f32_e32 v0, v6, v0 ; 10000106
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e64 v6, s4, s4 ; D2100006 00000804
v_mac_f32_e64 v6, s9, s9 ; D23E0006 00001209
v_mac_f32_e64 v6, s5, s5 ; D23E0006 00000A05
v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906
v_mul_f32_e32 v1, v10, v1 ; 1002030A
v_mul_f32_e32 v4, v10, v4 ; 1008090A
v_mul_f32_e32 v0, v10, v0 ; 1000010A
v_mul_f32_e32 v7, s4, v6 ; 100E0C04
v_mul_f32_e32 v8, s9, v6 ; 10100C09
v_mul_f32_e32 v6, s5, v6 ; 100C0C05
v_mul_f32_e32 v1, v7, v1 ; 10020307
s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C
s_buffer_load_dword s5, s[0:3], 0x1d ; C202811D
s_buffer_load_dword s9, s[0:3], 0x1e ; C204811E
v_mac_f32_e32 v1, v8, v4 ; 3E020908
v_mac_f32_e32 v1, v6, v0 ; 3E020106
v_mad_f32 v0, 0.5, v1, 0.5 ; D2820000 03C202F0
s_buffer_load_dword s10, s[0:3], 0x14 ; C2050114
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s4, v1 ; 10080204
s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115
s_buffer_load_dword s11, s[0:3], 0x16 ; C2058116
s_buffer_load_dword s12, s[0:3], 0x18 ; C2060118
s_buffer_load_dword s13, s[0:3], 0x20 ; C2068120
v_mac_f32_e32 v4, s10, v0 ; 3E08000A
s_buffer_load_dword s10, s[0:3], 0x21 ; C2050121
s_buffer_load_dword s14, s[0:3], 0x19 ; C2070119
s_buffer_load_dword s15, s[0:3], 0x22 ; C2078122
s_buffer_load_dword s16, s[0:3], 0x1a ; C208011A
v_mul_f32_e32 v6, s5, v1 ; 100C0205
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v6, s4, v0 ; 3E0C0004
v_mul_f32_e32 v7, s9, v1 ; 100E0209
v_mac_f32_e32 v7, s11, v0 ; 3E0E000B
v_mul_f32_e32 v8, s13, v1 ; 1010020D
v_mac_f32_e32 v8, s12, v0 ; 3E10000C
v_mul_f32_e32 v9, s10, v1 ; 1012020A
v_mac_f32_e32 v9, s14, v0 ; 3E12000E
v_mul_f32_e32 v1, s15, v1 ; 1002020F
v_mac_f32_e32 v1, s16, v0 ; 3E020010
v_subrev_f32_e32 v0, s8, v5 ; 0A000A08
v_mov_b32_e32 v5, 0xbe4ccccd ; 7E0A02FF BE4CCCCD
v_add_f32_e32 v5, v0, v5 ; 060A0B00
v_sub_f32_e64 v5, 1.0, |v5| ; D2080205 00020AF2
v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80
v_mul_f32_e32 v5, v5, v5 ; 100A0B05
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_mac_f32_e32 v8, v5, v4 ; 3E100905
v_mac_f32_e32 v9, v5, v6 ; 3E120D05
v_mac_f32_e32 v1, v5, v7 ; 3E020F05
v_log_f32_e32 v4, s6 ; 7E084E06
v_mul_f32_e32 v5, 0x3e991687, v8 ; 100A10FF 3E991687
v_madmk_f32_e32 v5, v9, v5, 0x3f1645a2 ; 400A0B09 3F1645A2
v_madmk_f32_e32 v5, v1, v5, 0x3de978d5 ; 400A0B01 3DE978D5
v_mul_legacy_f32_e32 v4, 0x3fc00000, v4 ; 0E0808FF 3FC00000
v_exp_f32_e32 v4, v4 ; 7E084B04
v_sub_f32_e32 v6, 1.0, v4 ; 080C08F2
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
s_buffer_load_dword s0, s[0:3], 0x10 ; C2000110
v_mad_f32 v6, v8, v4, v5 ; D2820006 04160908
v_mad_f32 v7, v9, v4, v5 ; D2820007 04160909
v_mac_f32_e32 v5, v1, v4 ; 3E0A0901
v_add_f32_e32 v1, -0.5, v2 ; 060204F1
v_add_f32_e32 v2, -0.5, v3 ; 060406F1
v_rcp_f32_e32 v3, s7 ; 7E065407
v_mul_f32_e32 v2, v2, v2 ; 10040502
v_mac_f32_e32 v2, v1, v1 ; 3E040301
v_sqrt_f32_e32 v1, v2 ; 7E026702
v_mad_f32 v1, -v1, v3, 1.0 ; D2820001 23CA0701
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s0, v0 ; 10000000
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mul_f32_e32 v1, s6, v6 ; 10020C06
v_mul_f32_e32 v2, s6, v7 ; 10040E06
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_mul_f32_e32 v2, s6, v5 ; 10040A06
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 68
Code Size: 5196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..12]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0].xyz, CONST[1].xyzz, IN[1].xxxx
1: MAD TEMP[0].xyz, CONST[2].xyzz, IN[1].yyyy, TEMP[0].xyzz
2: MAD TEMP[0].xyz, CONST[3].xyzz, IN[1].zzzz, TEMP[0].xyzz
3: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
6: MUL TEMP[1], CONST[9], IN[0].xxxx
7: MAD TEMP[1], CONST[10], IN[0].yyyy, TEMP[1]
8: MAD TEMP[1], CONST[11], IN[0].zzzz, TEMP[1]
9: MAD TEMP[1].z, CONST[12], IN[0].wwww, TEMP[1]
10: MUL TEMP[1].x, TEMP[1].zzzz, CONST[0].wwww
11: MOV TEMP[0].w, -TEMP[1].xxxx
12: MUL TEMP[1], CONST[5], IN[0].xxxx
13: MAD TEMP[1], CONST[6], IN[0].yyyy, TEMP[1]
14: MAD TEMP[1], CONST[7], IN[0].zzzz, TEMP[1]
15: MAD TEMP[1], CONST[8], IN[0].wwww, TEMP[1]
16: MOV OUT[1], TEMP[0]
17: MOV OUT[0], TEMP[1]
18: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0
%45 = add i32 %5, %7
%46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45)
%47 = extractelement <4 x float> %46, i32 0
%48 = extractelement <4 x float> %46, i32 1
%49 = extractelement <4 x float> %46, i32 2
%50 = extractelement <4 x float> %46, i32 3
%51 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%52 = load <16 x i8>, <16 x i8> addrspace(2)* %51, align 16, !tbaa !0
%53 = add i32 %5, %7
%54 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %52, i32 0, i32 %53)
%55 = extractelement <4 x float> %54, i32 0
%56 = extractelement <4 x float> %54, i32 1
%57 = extractelement <4 x float> %54, i32 2
%58 = fmul float %14, %55
%59 = fmul float %15, %55
%60 = fmul float %16, %55
%61 = fmul float %17, %56
%62 = fadd float %61, %58
%63 = fmul float %18, %56
%64 = fadd float %63, %59
%65 = fmul float %19, %56
%66 = fadd float %65, %60
%67 = fmul float %20, %57
%68 = fadd float %67, %62
%69 = fmul float %21, %57
%70 = fadd float %69, %64
%71 = fmul float %22, %57
%72 = fadd float %71, %66
%73 = fmul float %68, %68
%74 = fmul float %70, %70
%75 = fadd float %74, %73
%76 = fmul float %72, %72
%77 = fadd float %75, %76
%78 = call float @llvm.AMDGPU.rsq.clamped.f32(float %77)
%79 = fmul float %68, %78
%80 = fmul float %70, %78
%81 = fmul float %72, %78
%82 = fmul float %39, %47
%83 = fmul float %40, %48
%84 = fadd float %83, %82
%85 = fmul float %41, %49
%86 = fadd float %85, %84
%87 = fmul float %42, %50
%88 = fadd float %87, %86
%89 = fmul float %88, %13
%90 = fsub float -0.000000e+00, %89
%91 = fmul float %23, %47
%92 = fmul float %24, %47
%93 = fmul float %25, %47
%94 = fmul float %26, %47
%95 = fmul float %27, %48
%96 = fadd float %95, %91
%97 = fmul float %28, %48
%98 = fadd float %97, %92
%99 = fmul float %29, %48
%100 = fadd float %99, %93
%101 = fmul float %30, %48
%102 = fadd float %101, %94
%103 = fmul float %31, %49
%104 = fadd float %103, %96
%105 = fmul float %32, %49
%106 = fadd float %105, %98
%107 = fmul float %33, %49
%108 = fadd float %107, %100
%109 = fmul float %34, %49
%110 = fadd float %109, %102
%111 = fmul float %35, %50
%112 = fadd float %111, %104
%113 = fmul float %36, %50
%114 = fadd float %113, %106
%115 = fmul float %37, %50
%116 = fadd float %115, %108
%117 = fmul float %38, %50
%118 = fadd float %117, %110
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %79, float %80, float %81, float %90)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %114, float %116, float %118)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_buffer_load_dword s7, s[0:3], 0x9 ; C2038109
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0xa ; C204810A
s_buffer_load_dword s10, s[0:3], 0xc ; C205010C
s_buffer_load_dword s11, s[0:3], 0xd ; C205810D
s_buffer_load_dword s12, s[0:3], 0xe ; C206010E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v5 ; 10000A04
v_mac_f32_e32 v0, s5, v6 ; 3E000C05
v_mul_f32_e32 v8, s6, v5 ; 10100A06
v_mac_f32_e32 v8, s7, v6 ; 3E100C07
v_mul_f32_e32 v5, s8, v5 ; 100A0A08
v_mac_f32_e32 v5, s9, v6 ; 3E0A0C09
v_mac_f32_e32 v0, s10, v7 ; 3E000E0A
s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103
v_mac_f32_e32 v8, s11, v7 ; 3E100E0B
v_mac_f32_e32 v5, s12, v7 ; 3E0A0E0C
s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114
s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115
s_buffer_load_dword s7, s[0:3], 0x16 ; C2038116
s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117
s_buffer_load_dword s9, s[0:3], 0x18 ; C2048118
s_buffer_load_dword s10, s[0:3], 0x23 ; C2050123
s_buffer_load_dword s11, s[0:3], 0x26 ; C2058126
s_buffer_load_dword s12, s[0:3], 0x2a ; C206012A
s_buffer_load_dword s13, s[0:3], 0x19 ; C2068119
s_buffer_load_dword s14, s[0:3], 0x1a ; C207011A
s_buffer_load_dword s15, s[0:3], 0x1b ; C207811B
s_buffer_load_dword s16, s[0:3], 0x1c ; C208011C
s_buffer_load_dword s17, s[0:3], 0x1d ; C208811D
s_buffer_load_dword s18, s[0:3], 0x1e ; C209011E
s_buffer_load_dword s19, s[0:3], 0x1f ; C209811F
s_buffer_load_dword s20, s[0:3], 0x20 ; C20A0120
s_buffer_load_dword s21, s[0:3], 0x21 ; C20A8121
s_buffer_load_dword s22, s[0:3], 0x22 ; C20B0122
s_buffer_load_dword s23, s[0:3], 0x2e ; C20B812E
s_buffer_load_dword s0, s[0:3], 0x32 ; C2000132
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s11, v1 ; 100C020B
v_mac_f32_e32 v6, s12, v2 ; 3E0C040C
v_mul_f32_e32 v7, s5, v1 ; 100E0205
v_mac_f32_e32 v7, s9, v2 ; 3E0E0409
v_mul_f32_e32 v9, s6, v1 ; 10120206
v_mac_f32_e32 v9, s13, v2 ; 3E12040D
v_mul_f32_e32 v10, s7, v1 ; 10140207
v_mac_f32_e32 v10, s14, v2 ; 3E14040E
v_mul_f32_e32 v1, s8, v1 ; 10020208
v_mac_f32_e32 v1, s15, v2 ; 3E02040F
v_mac_f32_e32 v6, s23, v3 ; 3E0C0617
v_mac_f32_e32 v7, s16, v3 ; 3E0E0610
v_mac_f32_e32 v9, s17, v3 ; 3E120611
v_mac_f32_e32 v10, s18, v3 ; 3E140612
v_mac_f32_e32 v1, s19, v3 ; 3E020613
v_mac_f32_e32 v6, s0, v4 ; 3E0C0800
v_mac_f32_e32 v7, s20, v4 ; 3E0E0814
v_mac_f32_e32 v9, s21, v4 ; 3E120815
v_mul_f32_e32 v2, v0, v0 ; 10040100
v_mac_f32_e32 v2, v8, v8 ; 3E041108
v_mac_f32_e32 v2, v5, v5 ; 3E040B05
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
v_mac_f32_e32 v10, s22, v4 ; 3E140816
v_mac_f32_e32 v1, s10, v4 ; 3E02080A
v_mul_f32_e32 v3, s4, v6 ; 10060C04
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mul_f32_e32 v4, v2, v8 ; 10081102
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000
exp 15, 32, 0, 0, 0, v0, v4, v2, v3 ; F800020F 03020400
exp 15, 12, 0, 1, 0, v7, v9, v10, v1 ; F80008CF 010A0907
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 12
Code Size: 344 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.2813, 1.0000, 0.5000, 255.0000}
IMM[1] FLT32 { 0.0039, 0.0000, 0.0000, 0.0000}
0: ADD TEMP[0].x, IN[0].zzzz, IMM[0].yyyy
1: RCP TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx
3: MAD TEMP[0].xy, IMM[0].xxxx, TEMP[0].xyyy, IMM[0].zzzz
4: MUL TEMP[1].xy, IMM[0].ywww, IN[0].wwww
5: FRC TEMP[1].xy, TEMP[1].xyyy
6: MOV TEMP[2].y, TEMP[1].yyyy
7: MUL TEMP[3].x, TEMP[1].yyyy, IMM[1].xxxx
8: ADD TEMP[2].x, TEMP[1].xxxx, -TEMP[3].xxxx
9: MOV TEMP[0].zw, TEMP[2].yyxy
10: MOV OUT[0], TEMP[0]
11: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%26 = fadd float %24, 1.000000e+00
%27 = fdiv float 1.000000e+00, %26
%28 = fmul float %22, %27
%29 = fmul float %23, %27
%30 = fmul float %28, 0x3FD20033A0000000
%31 = fadd float %30, 5.000000e-01
%32 = fmul float %29, 0x3FD20033A0000000
%33 = fadd float %32, 5.000000e-01
%34 = fmul float %25, 2.550000e+02
%35 = call float @llvm.floor.f32(float %25)
%36 = fsub float %25, %35
%37 = call float @llvm.floor.f32(float %34)
%38 = fsub float %34, %37
%39 = fmul float %38, 0x3F70101020000000
%40 = fsub float %36, %39
%41 = call i32 @llvm.SI.packf16(float %31, float %33)
%42 = bitcast i32 %41 to float
%43 = call i32 @llvm.SI.packf16(float %40, float %38)
%44 = bitcast i32 %43 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_add_f32_e32 v4, 1.0, v4 ; 060808F2
v_rcp_f32_e32 v4, v4 ; 7E085504
v_interp_p1_f32 v0, v0, 3, 0, [m0] ; C8000300
v_interp_p2_f32 v0, [v0], v1, 3, 0, [m0] ; C8010301
v_mul_f32_e32 v1, v4, v2 ; 10020504
v_mul_f32_e32 v2, v4, v3 ; 10040704
v_mov_b32_e32 v3, 0x3e90019d ; 7E0602FF 3E90019D
v_mad_f32 v1, v1, v3, 0.5 ; D2820001 03C20701
v_mad_f32 v2, v2, v3, 0.5 ; D2820002 03C20702
v_mov_b32_e32 v3, 0x437f0000 ; 7E0602FF 437F0000
v_mul_f32_e32 v4, v3, v0 ; 10080103
v_floor_f32_e32 v5, v0 ; 7E0A4900
v_subrev_f32_e32 v5, v5, v0 ; 0A0A0105
v_floor_f32_e32 v4, v4 ; 7E084904
v_mad_f32 v0, v0, v3, -v4 ; D2820000 84120700
v_madmk_f32_e32 v3, v0, v5, 0xbb808081 ; 40060B00 BB808081
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e32 v0, v3, v0 ; 5E000103
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 16
VGPRS: 8
Code Size: 136 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..5]
DCL TEMP[0..2], LOCAL
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[5], IN[0].wwww, TEMP[0]
4: MAD TEMP[1].xy, IN[1].xyyy, CONST[1].xyyy, CONST[1].zwww
5: MUL TEMP[2].xy, IN[1].xyyy, CONST[0].xyyy
6: MOV TEMP[1].zw, TEMP[2].yyxy
7: MOV OUT[1], TEMP[1]
8: MOV OUT[0], TEMP[0]
9: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = add i32 %5, %7
%38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0
%45 = add i32 %5, %7
%46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45)
%47 = extractelement <4 x float> %46, i32 0
%48 = extractelement <4 x float> %46, i32 1
%49 = fmul float %19, %39
%50 = fmul float %20, %39
%51 = fmul float %21, %39
%52 = fmul float %22, %39
%53 = fmul float %23, %40
%54 = fadd float %53, %49
%55 = fmul float %24, %40
%56 = fadd float %55, %50
%57 = fmul float %25, %40
%58 = fadd float %57, %51
%59 = fmul float %26, %40
%60 = fadd float %59, %52
%61 = fmul float %27, %41
%62 = fadd float %61, %54
%63 = fmul float %28, %41
%64 = fadd float %63, %56
%65 = fmul float %29, %41
%66 = fadd float %65, %58
%67 = fmul float %30, %41
%68 = fadd float %67, %60
%69 = fmul float %31, %42
%70 = fadd float %69, %62
%71 = fmul float %32, %42
%72 = fadd float %71, %64
%73 = fmul float %33, %42
%74 = fadd float %73, %66
%75 = fmul float %34, %42
%76 = fadd float %75, %68
%77 = fmul float %47, %15
%78 = fadd float %77, %17
%79 = fmul float %48, %16
%80 = fadd float %79, %18
%81 = fmul float %47, %13
%82 = fmul float %48, %14
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %78, float %80, float %81, float %82)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %70, float %72, float %74, float %76)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s12, s[0:3], 0xa ; C206010A
s_buffer_load_dword s13, s[0:3], 0xb ; C206810B
s_buffer_load_dword s14, s[0:3], 0xc ; C207010C
s_buffer_load_dword s15, s[0:3], 0xd ; C207810D
s_buffer_load_dword s16, s[0:3], 0xe ; C208010E
s_buffer_load_dword s17, s[0:3], 0xf ; C208810F
s_buffer_load_dword s18, s[0:3], 0x10 ; C2090110
s_buffer_load_dword s19, s[0:3], 0x11 ; C2098111
s_buffer_load_dword s20, s[0:3], 0x12 ; C20A0112
s_buffer_load_dword s21, s[0:3], 0x13 ; C20A8113
s_buffer_load_dword s22, s[0:3], 0x14 ; C20B0114
s_buffer_load_dword s23, s[0:3], 0x15 ; C20B8115
s_buffer_load_dword s24, s[0:3], 0x16 ; C20C0116
s_buffer_load_dword s0, s[0:3], 0x17 ; C2000117
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s10, v1 ; 1000020A
v_mac_f32_e32 v0, s14, v2 ; 3E00040E
v_mul_f32_e32 v7, s11, v1 ; 100E020B
v_mac_f32_e32 v7, s15, v2 ; 3E0E040F
v_mul_f32_e32 v8, s12, v1 ; 1010020C
v_mac_f32_e32 v8, s16, v2 ; 3E100410
v_mul_f32_e32 v1, s13, v1 ; 1002020D
v_mac_f32_e32 v1, s17, v2 ; 3E020411
v_mac_f32_e32 v0, s18, v3 ; 3E000612
v_mac_f32_e32 v7, s19, v3 ; 3E0E0613
v_mac_f32_e32 v8, s20, v3 ; 3E100614
v_mac_f32_e32 v1, s21, v3 ; 3E020615
v_mac_f32_e32 v0, s22, v4 ; 3E000816
v_mac_f32_e32 v7, s23, v4 ; 3E0E0817
v_mac_f32_e32 v8, s24, v4 ; 3E100818
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_mov_b32_e32 v2, s8 ; 7E040208
v_mac_f32_e32 v2, s6, v5 ; 3E040A06
v_mov_b32_e32 v3, s9 ; 7E060209
v_mac_f32_e32 v3, s7, v6 ; 3E060C07
v_mul_f32_e32 v4, s4, v5 ; 10080A04
v_mul_f32_e32 v5, s5, v6 ; 100A0C05
exp 15, 32, 0, 0, 0, v2, v3, v4, v5 ; F800020F 05040302
exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 12
Code Size: 240 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[3]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 3.5554, 0.0000}
IMM[1] FLT32 { -1.7777, 1.0000, 0.0039, 0.3000}
IMM[2] FLT32 { 0.4010, 0.8899, -0.0175, 0.0714}
IMM[3] FLT32 { 0.1618, 0.1339, -0.3530, -0.6984}
IMM[4] FLT32 { -0.2305, -0.1900, 0.5025, -0.6984}
IMM[5] FLT32 { -0.6257, 0.1242, 0.1164, -0.6984}
IMM[6] FLT32 { 0.3821, -0.3241, 0.4113, -0.6984}
IMM[7] FLT32 { -0.0883, 0.1650, 0.1396, -0.6984}
IMM[8] FLT32 { 0.1892, -0.1284, -0.0987, -0.6984}
IMM[9] FLT32 { 0.1986, 0.1767, 0.4380, -0.6984}
IMM[10] FLT32 { -0.3295, 0.0268, -0.4022, -0.6984}
IMM[11] FLT32 { -0.0196, -0.3108, -0.4107, -0.6984}
IMM[12] FLT32 { -0.3215, 0.6832, -0.3433, -0.6984}
IMM[13] FLT32 { 0.7026, 0.1648, 0.0225, -0.6984}
IMM[14] FLT32 { 0.0370, -0.9391, 0.1359, -0.6984}
IMM[15] FLT32 { -0.6984, -0.6003, -0.0402, 0.0000}
0: MOV TEMP[0].xy, IN[0].zwww
1: TEX TEMP[0].xyz, TEMP[0], SAMP[1], 2D
2: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx, IMM[0].yyyy
3: MOV TEMP[1].xy, IN[0].xyyy
4: TEX TEMP[1], TEMP[1], SAMP[0], 2D
5: MAD TEMP[2].xyz, TEMP[1].xyzz, IMM[0].zzww, IMM[1].xxyy
6: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
7: RCP TEMP[3].x, TEMP[3].xxxx
8: MUL TEMP[3].x, IMM[0].xxxx, TEMP[3].xxxx
9: MUL TEMP[2].xy, TEMP[3].xxxx, TEMP[2].xyyy
10: ADD TEMP[3].x, TEMP[3].xxxx, IMM[0].yyyy
11: MOV TEMP[2].z, TEMP[3].xxxx
12: DP2 TEMP[1].x, TEMP[1].zwww, IMM[1].yzzz
13: MUL TEMP[1].x, TEMP[1].xxxx, CONST[0].zzzz
14: RCP TEMP[3].x, TEMP[1].xxxx
15: MUL TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx
16: MOV TEMP[4].x, IMM[0].wwww
17: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[2].xyzz
18: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
19: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
20: ADD TEMP[5].xyz, IMM[2].xyzz, -TEMP[5].xyzz
21: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
22: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
23: UIF TEMP[6].xxxx :0
24: MOV TEMP[6].x, IMM[1].yyyy
25: ELSE :0
26: MOV TEMP[6].x, IMM[0].yyyy
27: ENDIF
28: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
29: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
30: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
31: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
32: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
33: MOV TEMP[5].xy, TEMP[5].xyyy
34: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
35: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
36: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
37: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
38: MOV_SAT TEMP[5].x, TEMP[5].xxxx
39: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
40: UIF TEMP[6].xxxx :0
41: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
42: POW TEMP[4].x, TEMP[5].xxxx, CONST[3].zzzz
43: ENDIF
44: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[3].xyzz
45: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
46: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
47: ADD TEMP[5].xyz, IMM[3].xyzz, -TEMP[5].xyzz
48: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
49: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
50: UIF TEMP[6].xxxx :0
51: MOV TEMP[6].x, IMM[1].yyyy
52: ELSE :0
53: MOV TEMP[6].x, IMM[0].yyyy
54: ENDIF
55: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
56: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
57: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
58: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
59: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
60: MOV TEMP[5].xy, TEMP[5].xyyy
61: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
62: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
63: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
64: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
65: MOV_SAT TEMP[5].x, TEMP[5].xxxx
66: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
67: UIF TEMP[6].xxxx :0
68: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
69: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
70: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
71: ENDIF
72: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[4].xyzz
73: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
74: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
75: ADD TEMP[5].xyz, IMM[4].xyzz, -TEMP[5].xyzz
76: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
77: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
78: UIF TEMP[6].xxxx :0
79: MOV TEMP[6].x, IMM[1].yyyy
80: ELSE :0
81: MOV TEMP[6].x, IMM[0].yyyy
82: ENDIF
83: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
84: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
85: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
86: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
87: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
88: MOV TEMP[5].xy, TEMP[5].xyyy
89: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
90: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
91: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
92: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
93: MOV_SAT TEMP[5].x, TEMP[5].xxxx
94: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
95: UIF TEMP[6].xxxx :0
96: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
97: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
98: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
99: ENDIF
100: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[5].xyzz
101: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
102: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
103: ADD TEMP[5].xyz, IMM[5].xyzz, -TEMP[5].xyzz
104: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
105: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
106: UIF TEMP[6].xxxx :0
107: MOV TEMP[6].x, IMM[1].yyyy
108: ELSE :0
109: MOV TEMP[6].x, IMM[0].yyyy
110: ENDIF
111: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
112: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
113: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
114: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
115: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
116: MOV TEMP[5].xy, TEMP[5].xyyy
117: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
118: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
119: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
120: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
121: MOV_SAT TEMP[5].x, TEMP[5].xxxx
122: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
123: UIF TEMP[6].xxxx :0
124: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
125: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
126: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
127: ENDIF
128: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[6].xyzz
129: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
130: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
131: ADD TEMP[5].xyz, IMM[6].xyzz, -TEMP[5].xyzz
132: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
133: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
134: UIF TEMP[6].xxxx :0
135: MOV TEMP[6].x, IMM[1].yyyy
136: ELSE :0
137: MOV TEMP[6].x, IMM[0].yyyy
138: ENDIF
139: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
140: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
141: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
142: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
143: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
144: MOV TEMP[5].xy, TEMP[5].xyyy
145: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
146: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
147: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
148: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
149: MOV_SAT TEMP[5].x, TEMP[5].xxxx
150: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
151: UIF TEMP[6].xxxx :0
152: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
153: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
154: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
155: ENDIF
156: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[7].xyzz
157: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
158: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
159: ADD TEMP[5].xyz, IMM[7].xyzz, -TEMP[5].xyzz
160: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
161: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
162: UIF TEMP[6].xxxx :0
163: MOV TEMP[6].x, IMM[1].yyyy
164: ELSE :0
165: MOV TEMP[6].x, IMM[0].yyyy
166: ENDIF
167: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
168: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
169: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
170: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
171: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
172: MOV TEMP[5].xy, TEMP[5].xyyy
173: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
174: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
175: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
176: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
177: MOV_SAT TEMP[5].x, TEMP[5].xxxx
178: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
179: UIF TEMP[6].xxxx :0
180: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
181: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
182: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
183: ENDIF
184: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[8].xyzz
185: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
186: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
187: ADD TEMP[5].xyz, IMM[8].xyzz, -TEMP[5].xyzz
188: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
189: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
190: UIF TEMP[6].xxxx :0
191: MOV TEMP[6].x, IMM[1].yyyy
192: ELSE :0
193: MOV TEMP[6].x, IMM[0].yyyy
194: ENDIF
195: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
196: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
197: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
198: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
199: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
200: MOV TEMP[5].xy, TEMP[5].xyyy
201: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
202: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
203: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
204: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
205: MOV_SAT TEMP[5].x, TEMP[5].xxxx
206: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
207: UIF TEMP[6].xxxx :0
208: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
209: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
210: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
211: ENDIF
212: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[9].xyzz
213: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
214: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
215: ADD TEMP[5].xyz, IMM[9].xyzz, -TEMP[5].xyzz
216: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
217: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
218: UIF TEMP[6].xxxx :0
219: MOV TEMP[6].x, IMM[1].yyyy
220: ELSE :0
221: MOV TEMP[6].x, IMM[0].yyyy
222: ENDIF
223: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
224: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
225: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
226: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
227: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
228: MOV TEMP[5].xy, TEMP[5].xyyy
229: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
230: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
231: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
232: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
233: MOV_SAT TEMP[5].x, TEMP[5].xxxx
234: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
235: UIF TEMP[6].xxxx :0
236: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
237: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
238: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
239: ENDIF
240: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[10].xyzz
241: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
242: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
243: ADD TEMP[5].xyz, IMM[10].xyzz, -TEMP[5].xyzz
244: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
245: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
246: UIF TEMP[6].xxxx :0
247: MOV TEMP[6].x, IMM[1].yyyy
248: ELSE :0
249: MOV TEMP[6].x, IMM[0].yyyy
250: ENDIF
251: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
252: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
253: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
254: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
255: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
256: MOV TEMP[5].xy, TEMP[5].xyyy
257: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
258: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
259: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
260: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
261: MOV_SAT TEMP[5].x, TEMP[5].xxxx
262: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
263: UIF TEMP[6].xxxx :0
264: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
265: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
266: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
267: ENDIF
268: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[11].xyzz
269: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
270: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
271: ADD TEMP[5].xyz, IMM[11].xyzz, -TEMP[5].xyzz
272: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
273: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
274: UIF TEMP[6].xxxx :0
275: MOV TEMP[6].x, IMM[1].yyyy
276: ELSE :0
277: MOV TEMP[6].x, IMM[0].yyyy
278: ENDIF
279: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
280: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
281: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
282: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
283: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
284: MOV TEMP[5].xy, TEMP[5].xyyy
285: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
286: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
287: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
288: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
289: MOV_SAT TEMP[5].x, TEMP[5].xxxx
290: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
291: UIF TEMP[6].xxxx :0
292: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
293: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
294: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
295: ENDIF
296: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[12].xyzz
297: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
298: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
299: ADD TEMP[5].xyz, IMM[12].xyzz, -TEMP[5].xyzz
300: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
301: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
302: UIF TEMP[6].xxxx :0
303: MOV TEMP[6].x, IMM[1].yyyy
304: ELSE :0
305: MOV TEMP[6].x, IMM[0].yyyy
306: ENDIF
307: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
308: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
309: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
310: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
311: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
312: MOV TEMP[5].xy, TEMP[5].xyyy
313: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
314: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
315: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
316: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
317: MOV_SAT TEMP[5].x, TEMP[5].xxxx
318: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
319: UIF TEMP[6].xxxx :0
320: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
321: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
322: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
323: ENDIF
324: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[13].xyzz
325: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
326: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
327: ADD TEMP[5].xyz, IMM[13].xyzz, -TEMP[5].xyzz
328: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
329: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
330: UIF TEMP[6].xxxx :0
331: MOV TEMP[6].x, IMM[1].yyyy
332: ELSE :0
333: MOV TEMP[6].x, IMM[0].yyyy
334: ENDIF
335: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
336: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
337: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
338: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
339: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
340: MOV TEMP[5].xy, TEMP[5].xyyy
341: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
342: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
343: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
344: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
345: MOV_SAT TEMP[5].x, TEMP[5].xxxx
346: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
347: UIF TEMP[6].xxxx :0
348: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
349: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
350: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
351: ENDIF
352: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[14].xyzz
353: MUL TEMP[5].xyz, TEMP[5].xxxx, TEMP[0].xyzz
354: MUL TEMP[5].xyz, IMM[0].xxxx, TEMP[5].xyzz
355: ADD TEMP[5].xyz, IMM[14].xyzz, -TEMP[5].xyzz
356: DP3 TEMP[6].x, TEMP[2].xyzz, TEMP[5].xyzz
357: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
358: UIF TEMP[6].xxxx :0
359: MOV TEMP[6].x, IMM[1].yyyy
360: ELSE :0
361: MOV TEMP[6].x, IMM[0].yyyy
362: ENDIF
363: MUL TEMP[5].xyz, TEMP[5].xyzz, -TEMP[6].xxxx
364: MAD TEMP[5].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[5].xyzz
365: MUL TEMP[6].x, TEMP[5].zzzz, CONST[3].xxxx
366: ADD TEMP[6].x, TEMP[1].xxxx, -TEMP[6].xxxx
367: MAD TEMP[5].xy, TEMP[5].xyyy, TEMP[3].xxxx, IN[0].xyyy
368: MOV TEMP[5].xy, TEMP[5].xyyy
369: TEX TEMP[5].zw, TEMP[5], SAMP[0], 2D
370: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].yzzz
371: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
372: ADD TEMP[5].x, TEMP[6].xxxx, -TEMP[5].xxxx
373: MOV_SAT TEMP[5].x, TEMP[5].xxxx
374: FSLT TEMP[6].x, CONST[3].yyyy, TEMP[5].xxxx
375: UIF TEMP[6].xxxx :0
376: ADD TEMP[5].x, IMM[1].yyyy, -TEMP[5].xxxx
377: POW TEMP[5].x, TEMP[5].xxxx, CONST[3].zzzz
378: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
379: ENDIF
380: DP3 TEMP[5].x, TEMP[0].xyzz, IMM[15].xyzz
381: MUL TEMP[0].xyz, TEMP[5].xxxx, TEMP[0].xyzz
382: MUL TEMP[0].xyz, IMM[0].xxxx, TEMP[0].xyzz
383: ADD TEMP[0].xyz, IMM[15].xyzz, -TEMP[0].xyzz
384: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[0].xyzz
385: FSLT TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
386: UIF TEMP[5].xxxx :0
387: MOV TEMP[5].x, IMM[1].yyyy
388: ELSE :0
389: MOV TEMP[5].x, IMM[0].yyyy
390: ENDIF
391: MUL TEMP[0].xyz, TEMP[0].xyzz, -TEMP[5].xxxx
392: MAD TEMP[0].xyz, TEMP[2].xyzz, IMM[1].wwww, TEMP[0].xyzz
393: MUL TEMP[2].x, TEMP[0].zzzz, CONST[3].xxxx
394: ADD TEMP[1].x, TEMP[1].xxxx, -TEMP[2].xxxx
395: MAD TEMP[0].xy, TEMP[0].xyyy, TEMP[3].xxxx, IN[0].xyyy
396: MOV TEMP[0].xy, TEMP[0].xyyy
397: TEX TEMP[0].zw, TEMP[0], SAMP[0], 2D
398: DP2 TEMP[0].x, TEMP[0].zwww, IMM[1].yzzz
399: MUL TEMP[0].x, TEMP[0].xxxx, CONST[0].zzzz
400: ADD TEMP[0].x, TEMP[1].xxxx, -TEMP[0].xxxx
401: MOV_SAT TEMP[0].x, TEMP[0].xxxx
402: FSLT TEMP[1].x, CONST[3].yyyy, TEMP[0].xxxx
403: UIF TEMP[1].xxxx :0
404: ADD TEMP[0].x, IMM[1].yyyy, -TEMP[0].xxxx
405: POW TEMP[0].x, TEMP[0].xxxx, CONST[3].zzzz
406: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx
407: ENDIF
408: MUL TEMP[0].x, TEMP[4].xxxx, IMM[2].wwww
409: ADD TEMP[0].x, IMM[1].yyyy, -TEMP[0].xxxx
410: MOV OUT[0], TEMP[0].xxxx
411: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%28 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%29 = load <8 x i32>, <8 x i32> addrspace(2)* %28, align 32, !tbaa !0
%30 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%31 = load <4 x i32>, <4 x i32> addrspace(2)* %30, align 16, !tbaa !0
%32 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%33 = bitcast <8 x i32> addrspace(2)* %32 to <32 x i8> addrspace(2)*
%34 = load <32 x i8>, <32 x i8> addrspace(2)* %33, align 32, !tbaa !0
%35 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%36 = bitcast <4 x i32> addrspace(2)* %35 to <16 x i8> addrspace(2)*
%37 = load <16 x i8>, <16 x i8> addrspace(2)* %36, align 16, !tbaa !0
%38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%42 = bitcast float %40 to i32
%43 = bitcast float %41 to i32
%44 = insertelement <2 x i32> undef, i32 %42, i32 0
%45 = insertelement <2 x i32> %44, i32 %43, i32 1
%46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %34, <16 x i8> %37, i32 2)
%47 = extractelement <4 x float> %46, i32 0
%48 = extractelement <4 x float> %46, i32 1
%49 = extractelement <4 x float> %46, i32 2
%50 = fmul float %47, 2.000000e+00
%51 = fadd float %50, -1.000000e+00
%52 = fmul float %48, 2.000000e+00
%53 = fadd float %52, -1.000000e+00
%54 = fmul float %49, 2.000000e+00
%55 = fadd float %54, -1.000000e+00
%56 = bitcast float %38 to i32
%57 = bitcast float %39 to i32
%58 = insertelement <2 x i32> undef, i32 %56, i32 0
%59 = insertelement <2 x i32> %58, i32 %57, i32 1
%60 = bitcast <8 x i32> %29 to <32 x i8>
%61 = bitcast <4 x i32> %31 to <16 x i8>
%62 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %59, <32 x i8> %60, <16 x i8> %61, i32 2)
%63 = extractelement <4 x float> %62, i32 0
%64 = extractelement <4 x float> %62, i32 1
%65 = extractelement <4 x float> %62, i32 2
%66 = extractelement <4 x float> %62, i32 3
%67 = fmul float %63, 0x400C717580000000
%68 = fadd float %67, 0xBFFC717580000000
%69 = fmul float %64, 0x400C717580000000
%70 = fadd float %69, 0xBFFC717580000000
%71 = fmul float %65, 0.000000e+00
%72 = fadd float %71, 1.000000e+00
%73 = fmul float %68, %68
%74 = fmul float %70, %70
%75 = fadd float %74, %73
%76 = fmul float %72, %72
%77 = fadd float %75, %76
%78 = fdiv float 1.000000e+00, %77
%79 = fmul float %78, 2.000000e+00
%80 = fmul float %79, %68
%81 = fmul float %79, %70
%82 = fadd float %79, -1.000000e+00
%83 = fmul float %66, 0x3F70101020000000
%84 = fadd float %65, %83
%85 = fmul float %84, %24
%86 = fdiv float 1.000000e+00, %85
%87 = fmul float %25, %86
%88 = fmul float %51, 0x3FD9AA0C40000000
%89 = fmul float %53, 0x3FEC7A5F80000000
%90 = fadd float %89, %88
%91 = fmul float %55, 0xBF91F02A40000000
%92 = fadd float %90, %91
%93 = fmul float %92, %51
%94 = fmul float %92, %53
%95 = fmul float %92, %55
%96 = fmul float %93, 2.000000e+00
%97 = fmul float %94, 2.000000e+00
%98 = fmul float %95, 2.000000e+00
%99 = fsub float 0x3FD9AA0C40000000, %96
%100 = fsub float 0x3FEC7A5F80000000, %97
%101 = fsub float 0xBF91F02A40000000, %98
%102 = fmul float %80, %99
%103 = fmul float %81, %100
%104 = fadd float %103, %102
%105 = fmul float %82, %101
%106 = fadd float %104, %105
%107 = fcmp olt float %106, 0.000000e+00
%. = select i1 %107, float 1.000000e+00, float -1.000000e+00
%108 = fmul float %., %99
%109 = fmul float %., %100
%110 = fmul float %., %101
%111 = fmul float %80, 0x3FD3333340000000
%112 = fsub float %111, %108
%113 = fmul float %81, 0x3FD3333340000000
%114 = fsub float %113, %109
%115 = fmul float %82, 0x3FD3333340000000
%116 = fsub float %115, %110
%117 = fmul float %116, %25
%118 = fsub float %85, %117
%119 = fmul float %112, %87
%120 = fadd float %119, %38
%121 = fmul float %114, %87
%122 = fadd float %121, %39
%123 = bitcast float %120 to i32
%124 = bitcast float %122 to i32
%125 = insertelement <2 x i32> undef, i32 %123, i32 0
%126 = insertelement <2 x i32> %125, i32 %124, i32 1
%127 = bitcast <8 x i32> %29 to <32 x i8>
%128 = bitcast <4 x i32> %31 to <16 x i8>
%129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2)
%130 = extractelement <4 x float> %129, i32 2
%131 = extractelement <4 x float> %129, i32 3
%132 = fmul float %131, 0x3F70101020000000
%133 = fadd float %130, %132
%134 = fmul float %133, %24
%135 = fsub float %118, %134
%136 = call float @llvm.AMDIL.clamp.(float %135, float 0.000000e+00, float 1.000000e+00)
%137 = fcmp olt float %26, %136
br i1 %137, label %IF29, label %ENDIF28
IF29: ; preds = %main_body
%138 = fsub float 1.000000e+00, %136
%139 = call float @llvm.pow.f32(float %138, float %27)
br label %ENDIF28
ENDIF28: ; preds = %main_body, %IF29
%temp16.0 = phi float [ %139, %IF29 ], [ 0.000000e+00, %main_body ]
%140 = fmul float %51, 0x3FC4B55400000000
%141 = fmul float %53, 0x3FC1222AC0000000
%142 = fadd float %141, %140
%143 = fmul float %55, 0xBFD6985920000000
%144 = fadd float %142, %143
%145 = fmul float %144, %51
%146 = fmul float %144, %53
%147 = fmul float %144, %55
%148 = fmul float %145, 2.000000e+00
%149 = fmul float %146, 2.000000e+00
%150 = fmul float %147, 2.000000e+00
%151 = fsub float 0x3FC4B55400000000, %148
%152 = fsub float 0x3FC1222AC0000000, %149
%153 = fsub float 0xBFD6985920000000, %150
%154 = fmul float %80, %151
%155 = fmul float %81, %152
%156 = fadd float %155, %154
%157 = fmul float %82, %153
%158 = fadd float %156, %157
%159 = fcmp olt float %158, 0.000000e+00
%.109 = select i1 %159, float 1.000000e+00, float -1.000000e+00
%160 = fmul float %.109, %151
%161 = fmul float %.109, %152
%162 = fmul float %.109, %153
%163 = fmul float %80, 0x3FD3333340000000
%164 = fsub float %163, %160
%165 = fmul float %81, 0x3FD3333340000000
%166 = fsub float %165, %161
%167 = fmul float %82, 0x3FD3333340000000
%168 = fsub float %167, %162
%169 = fmul float %168, %25
%170 = fsub float %85, %169
%171 = fmul float %164, %87
%172 = fadd float %171, %38
%173 = fmul float %166, %87
%174 = fadd float %173, %39
%175 = bitcast float %172 to i32
%176 = bitcast float %174 to i32
%177 = insertelement <2 x i32> undef, i32 %175, i32 0
%178 = insertelement <2 x i32> %177, i32 %176, i32 1
%179 = bitcast <8 x i32> %29 to <32 x i8>
%180 = bitcast <4 x i32> %31 to <16 x i8>
%181 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %178, <32 x i8> %179, <16 x i8> %180, i32 2)
%182 = extractelement <4 x float> %181, i32 2
%183 = extractelement <4 x float> %181, i32 3
%184 = fmul float %183, 0x3F70101020000000
%185 = fadd float %182, %184
%186 = fmul float %185, %24
%187 = fsub float %170, %186
%188 = call float @llvm.AMDIL.clamp.(float %187, float 0.000000e+00, float 1.000000e+00)
%189 = fcmp olt float %26, %188
br i1 %189, label %IF35, label %ENDIF34
IF35: ; preds = %ENDIF28
%190 = fsub float 1.000000e+00, %188
%191 = call float @llvm.pow.f32(float %190, float %27)
%192 = fadd float %temp16.0, %191
br label %ENDIF34
ENDIF34: ; preds = %ENDIF28, %IF35
%temp16.1 = phi float [ %192, %IF35 ], [ %temp16.0, %ENDIF28 ]
%193 = fmul float %51, 0xBFCD81FE80000000
%194 = fmul float %53, 0xBFC85232E0000000
%195 = fadd float %194, %193
%196 = fmul float %55, 0x3FE014CDE0000000
%197 = fadd float %195, %196
%198 = fmul float %197, %51
%199 = fmul float %197, %53
%200 = fmul float %197, %55
%201 = fmul float %198, 2.000000e+00
%202 = fmul float %199, 2.000000e+00
%203 = fmul float %200, 2.000000e+00
%204 = fsub float 0xBFCD81FE80000000, %201
%205 = fsub float 0xBFC85232E0000000, %202
%206 = fsub float 0x3FE014CDE0000000, %203
%207 = fmul float %80, %204
%208 = fmul float %81, %205
%209 = fadd float %208, %207
%210 = fmul float %82, %206
%211 = fadd float %209, %210
%212 = fcmp olt float %211, 0.000000e+00
%.110 = select i1 %212, float 1.000000e+00, float -1.000000e+00
%213 = fmul float %.110, %204
%214 = fmul float %.110, %205
%215 = fmul float %.110, %206
%216 = fmul float %80, 0x3FD3333340000000
%217 = fsub float %216, %213
%218 = fmul float %81, 0x3FD3333340000000
%219 = fsub float %218, %214
%220 = fmul float %82, 0x3FD3333340000000
%221 = fsub float %220, %215
%222 = fmul float %221, %25
%223 = fsub float %85, %222
%224 = fmul float %217, %87
%225 = fadd float %224, %38
%226 = fmul float %219, %87
%227 = fadd float %226, %39
%228 = bitcast float %225 to i32
%229 = bitcast float %227 to i32
%230 = insertelement <2 x i32> undef, i32 %228, i32 0
%231 = insertelement <2 x i32> %230, i32 %229, i32 1
%232 = bitcast <8 x i32> %29 to <32 x i8>
%233 = bitcast <4 x i32> %31 to <16 x i8>
%234 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %231, <32 x i8> %232, <16 x i8> %233, i32 2)
%235 = extractelement <4 x float> %234, i32 2
%236 = extractelement <4 x float> %234, i32 3
%237 = fmul float %236, 0x3F70101020000000
%238 = fadd float %235, %237
%239 = fmul float %238, %24
%240 = fsub float %223, %239
%241 = call float @llvm.AMDIL.clamp.(float %240, float 0.000000e+00, float 1.000000e+00)
%242 = fcmp olt float %26, %241
br i1 %242, label %IF41, label %ENDIF40
IF41: ; preds = %ENDIF34
%243 = fsub float 1.000000e+00, %241
%244 = call float @llvm.pow.f32(float %243, float %27)
%245 = fadd float %temp16.1, %244
br label %ENDIF40
ENDIF40: ; preds = %ENDIF34, %IF41
%temp16.2 = phi float [ %245, %IF41 ], [ %temp16.1, %ENDIF34 ]
%246 = fmul float %51, 0xBFE40579C0000000
%247 = fmul float %53, 0x3FBFC95980000000
%248 = fadd float %247, %246
%249 = fmul float %55, 0x3FBDCBF1E0000000
%250 = fadd float %248, %249
%251 = fmul float %250, %51
%252 = fmul float %250, %53
%253 = fmul float %250, %55
%254 = fmul float %251, 2.000000e+00
%255 = fmul float %252, 2.000000e+00
%256 = fmul float %253, 2.000000e+00
%257 = fsub float 0xBFE40579C0000000, %254
%258 = fsub float 0x3FBFC95980000000, %255
%259 = fsub float 0x3FBDCBF1E0000000, %256
%260 = fmul float %80, %257
%261 = fmul float %81, %258
%262 = fadd float %261, %260
%263 = fmul float %82, %259
%264 = fadd float %262, %263
%265 = fcmp olt float %264, 0.000000e+00
%.111 = select i1 %265, float 1.000000e+00, float -1.000000e+00
%266 = fmul float %.111, %257
%267 = fmul float %.111, %258
%268 = fmul float %.111, %259
%269 = fmul float %80, 0x3FD3333340000000
%270 = fsub float %269, %266
%271 = fmul float %81, 0x3FD3333340000000
%272 = fsub float %271, %267
%273 = fmul float %82, 0x3FD3333340000000
%274 = fsub float %273, %268
%275 = fmul float %274, %25
%276 = fsub float %85, %275
%277 = fmul float %270, %87
%278 = fadd float %277, %38
%279 = fmul float %272, %87
%280 = fadd float %279, %39
%281 = bitcast float %278 to i32
%282 = bitcast float %280 to i32
%283 = insertelement <2 x i32> undef, i32 %281, i32 0
%284 = insertelement <2 x i32> %283, i32 %282, i32 1
%285 = bitcast <8 x i32> %29 to <32 x i8>
%286 = bitcast <4 x i32> %31 to <16 x i8>
%287 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %284, <32 x i8> %285, <16 x i8> %286, i32 2)
%288 = extractelement <4 x float> %287, i32 2
%289 = extractelement <4 x float> %287, i32 3
%290 = fmul float %289, 0x3F70101020000000
%291 = fadd float %288, %290
%292 = fmul float %291, %24
%293 = fsub float %276, %292
%294 = call float @llvm.AMDIL.clamp.(float %293, float 0.000000e+00, float 1.000000e+00)
%295 = fcmp olt float %26, %294
br i1 %295, label %IF47, label %ENDIF46
IF47: ; preds = %ENDIF40
%296 = fsub float 1.000000e+00, %294
%297 = call float @llvm.pow.f32(float %296, float %27)
%298 = fadd float %temp16.2, %297
br label %ENDIF46
ENDIF46: ; preds = %ENDIF40, %IF47
%temp16.3 = phi float [ %298, %IF47 ], [ %temp16.2, %ENDIF40 ]
%299 = fmul float %51, 0x3FD873F9C0000000
%300 = fmul float %53, 0xBFD4BEB4E0000000
%301 = fadd float %300, %299
%302 = fmul float %55, 0x3FDA5273E0000000
%303 = fadd float %301, %302
%304 = fmul float %303, %51
%305 = fmul float %303, %53
%306 = fmul float %303, %55
%307 = fmul float %304, 2.000000e+00
%308 = fmul float %305, 2.000000e+00
%309 = fmul float %306, 2.000000e+00
%310 = fsub float 0x3FD873F9C0000000, %307
%311 = fsub float 0xBFD4BEB4E0000000, %308
%312 = fsub float 0x3FDA5273E0000000, %309
%313 = fmul float %80, %310
%314 = fmul float %81, %311
%315 = fadd float %314, %313
%316 = fmul float %82, %312
%317 = fadd float %315, %316
%318 = fcmp olt float %317, 0.000000e+00
%.112 = select i1 %318, float 1.000000e+00, float -1.000000e+00
%319 = fmul float %.112, %310
%320 = fmul float %.112, %311
%321 = fmul float %.112, %312
%322 = fmul float %80, 0x3FD3333340000000
%323 = fsub float %322, %319
%324 = fmul float %81, 0x3FD3333340000000
%325 = fsub float %324, %320
%326 = fmul float %82, 0x3FD3333340000000
%327 = fsub float %326, %321
%328 = fmul float %327, %25
%329 = fsub float %85, %328
%330 = fmul float %323, %87
%331 = fadd float %330, %38
%332 = fmul float %325, %87
%333 = fadd float %332, %39
%334 = bitcast float %331 to i32
%335 = bitcast float %333 to i32
%336 = insertelement <2 x i32> undef, i32 %334, i32 0
%337 = insertelement <2 x i32> %336, i32 %335, i32 1
%338 = bitcast <8 x i32> %29 to <32 x i8>
%339 = bitcast <4 x i32> %31 to <16 x i8>
%340 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %337, <32 x i8> %338, <16 x i8> %339, i32 2)
%341 = extractelement <4 x float> %340, i32 2
%342 = extractelement <4 x float> %340, i32 3
%343 = fmul float %342, 0x3F70101020000000
%344 = fadd float %341, %343
%345 = fmul float %344, %24
%346 = fsub float %329, %345
%347 = call float @llvm.AMDIL.clamp.(float %346, float 0.000000e+00, float 1.000000e+00)
%348 = fcmp olt float %26, %347
br i1 %348, label %IF53, label %ENDIF52
IF53: ; preds = %ENDIF46
%349 = fsub float 1.000000e+00, %347
%350 = call float @llvm.pow.f32(float %349, float %27)
%351 = fadd float %temp16.3, %350
br label %ENDIF52
ENDIF52: ; preds = %ENDIF46, %IF53
%temp16.4 = phi float [ %351, %IF53 ], [ %temp16.3, %ENDIF46 ]
%352 = fmul float %51, 0xBFB69A9A00000000
%353 = fmul float %53, 0x3FC51DEE20000000
%354 = fadd float %353, %352
%355 = fmul float %55, 0x3FC1DE0420000000
%356 = fadd float %354, %355
%357 = fmul float %356, %51
%358 = fmul float %356, %53
%359 = fmul float %356, %55
%360 = fmul float %357, 2.000000e+00
%361 = fmul float %358, 2.000000e+00
%362 = fmul float %359, 2.000000e+00
%363 = fsub float 0xBFB69A9A00000000, %360
%364 = fsub float 0x3FC51DEE20000000, %361
%365 = fsub float 0x3FC1DE0420000000, %362
%366 = fmul float %80, %363
%367 = fmul float %81, %364
%368 = fadd float %367, %366
%369 = fmul float %82, %365
%370 = fadd float %368, %369
%371 = fcmp olt float %370, 0.000000e+00
%.113 = select i1 %371, float 1.000000e+00, float -1.000000e+00
%372 = fmul float %.113, %363
%373 = fmul float %.113, %364
%374 = fmul float %.113, %365
%375 = fmul float %80, 0x3FD3333340000000
%376 = fsub float %375, %372
%377 = fmul float %81, 0x3FD3333340000000
%378 = fsub float %377, %373
%379 = fmul float %82, 0x3FD3333340000000
%380 = fsub float %379, %374
%381 = fmul float %380, %25
%382 = fsub float %85, %381
%383 = fmul float %376, %87
%384 = fadd float %383, %38
%385 = fmul float %378, %87
%386 = fadd float %385, %39
%387 = bitcast float %384 to i32
%388 = bitcast float %386 to i32
%389 = insertelement <2 x i32> undef, i32 %387, i32 0
%390 = insertelement <2 x i32> %389, i32 %388, i32 1
%391 = bitcast <8 x i32> %29 to <32 x i8>
%392 = bitcast <4 x i32> %31 to <16 x i8>
%393 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %390, <32 x i8> %391, <16 x i8> %392, i32 2)
%394 = extractelement <4 x float> %393, i32 2
%395 = extractelement <4 x float> %393, i32 3
%396 = fmul float %395, 0x3F70101020000000
%397 = fadd float %394, %396
%398 = fmul float %397, %24
%399 = fsub float %382, %398
%400 = call float @llvm.AMDIL.clamp.(float %399, float 0.000000e+00, float 1.000000e+00)
%401 = fcmp olt float %26, %400
br i1 %401, label %IF59, label %ENDIF58
IF59: ; preds = %ENDIF52
%402 = fsub float 1.000000e+00, %400
%403 = call float @llvm.pow.f32(float %402, float %27)
%404 = fadd float %temp16.4, %403
br label %ENDIF58
ENDIF58: ; preds = %ENDIF52, %IF59
%temp16.5 = phi float [ %404, %IF59 ], [ %temp16.4, %ENDIF52 ]
%405 = fmul float %51, 0x3FC836A5A0000000
%406 = fmul float %53, 0xBFC06E9BC0000000
%407 = fadd float %406, %405
%408 = fmul float %55, 0xBFB946BC00000000
%409 = fadd float %407, %408
%410 = fmul float %409, %51
%411 = fmul float %409, %53
%412 = fmul float %409, %55
%413 = fmul float %410, 2.000000e+00
%414 = fmul float %411, 2.000000e+00
%415 = fmul float %412, 2.000000e+00
%416 = fsub float 0x3FC836A5A0000000, %413
%417 = fsub float 0xBFC06E9BC0000000, %414
%418 = fsub float 0xBFB946BC00000000, %415
%419 = fmul float %80, %416
%420 = fmul float %81, %417
%421 = fadd float %420, %419
%422 = fmul float %82, %418
%423 = fadd float %421, %422
%424 = fcmp olt float %423, 0.000000e+00
%.114 = select i1 %424, float 1.000000e+00, float -1.000000e+00
%425 = fmul float %.114, %416
%426 = fmul float %.114, %417
%427 = fmul float %.114, %418
%428 = fmul float %80, 0x3FD3333340000000
%429 = fsub float %428, %425
%430 = fmul float %81, 0x3FD3333340000000
%431 = fsub float %430, %426
%432 = fmul float %82, 0x3FD3333340000000
%433 = fsub float %432, %427
%434 = fmul float %433, %25
%435 = fsub float %85, %434
%436 = fmul float %429, %87
%437 = fadd float %436, %38
%438 = fmul float %431, %87
%439 = fadd float %438, %39
%440 = bitcast float %437 to i32
%441 = bitcast float %439 to i32
%442 = insertelement <2 x i32> undef, i32 %440, i32 0
%443 = insertelement <2 x i32> %442, i32 %441, i32 1
%444 = bitcast <8 x i32> %29 to <32 x i8>
%445 = bitcast <4 x i32> %31 to <16 x i8>
%446 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %443, <32 x i8> %444, <16 x i8> %445, i32 2)
%447 = extractelement <4 x float> %446, i32 2
%448 = extractelement <4 x float> %446, i32 3
%449 = fmul float %448, 0x3F70101020000000
%450 = fadd float %447, %449
%451 = fmul float %450, %24
%452 = fsub float %435, %451
%453 = call float @llvm.AMDIL.clamp.(float %452, float 0.000000e+00, float 1.000000e+00)
%454 = fcmp olt float %26, %453
br i1 %454, label %IF65, label %ENDIF64
IF65: ; preds = %ENDIF58
%455 = fsub float 1.000000e+00, %453
%456 = call float @llvm.pow.f32(float %455, float %27)
%457 = fadd float %temp16.5, %456
br label %ENDIF64
ENDIF64: ; preds = %ENDIF58, %IF65
%temp16.6 = phi float [ %457, %IF65 ], [ %temp16.5, %ENDIF58 ]
%458 = fmul float %51, 0x3FC96C30A0000000
%459 = fmul float %53, 0x3FC69EE380000000
%460 = fadd float %459, %458
%461 = fmul float %55, 0x3FDC08FF20000000
%462 = fadd float %460, %461
%463 = fmul float %462, %51
%464 = fmul float %462, %53
%465 = fmul float %462, %55
%466 = fmul float %463, 2.000000e+00
%467 = fmul float %464, 2.000000e+00
%468 = fmul float %465, 2.000000e+00
%469 = fsub float 0x3FC96C30A0000000, %466
%470 = fsub float 0x3FC69EE380000000, %467
%471 = fsub float 0x3FDC08FF20000000, %468
%472 = fmul float %80, %469
%473 = fmul float %81, %470
%474 = fadd float %473, %472
%475 = fmul float %82, %471
%476 = fadd float %474, %475
%477 = fcmp olt float %476, 0.000000e+00
%.115 = select i1 %477, float 1.000000e+00, float -1.000000e+00
%478 = fmul float %.115, %469
%479 = fmul float %.115, %470
%480 = fmul float %.115, %471
%481 = fmul float %80, 0x3FD3333340000000
%482 = fsub float %481, %478
%483 = fmul float %81, 0x3FD3333340000000
%484 = fsub float %483, %479
%485 = fmul float %82, 0x3FD3333340000000
%486 = fsub float %485, %480
%487 = fmul float %486, %25
%488 = fsub float %85, %487
%489 = fmul float %482, %87
%490 = fadd float %489, %38
%491 = fmul float %484, %87
%492 = fadd float %491, %39
%493 = bitcast float %490 to i32
%494 = bitcast float %492 to i32
%495 = insertelement <2 x i32> undef, i32 %493, i32 0
%496 = insertelement <2 x i32> %495, i32 %494, i32 1
%497 = bitcast <8 x i32> %29 to <32 x i8>
%498 = bitcast <4 x i32> %31 to <16 x i8>
%499 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %496, <32 x i8> %497, <16 x i8> %498, i32 2)
%500 = extractelement <4 x float> %499, i32 2
%501 = extractelement <4 x float> %499, i32 3
%502 = fmul float %501, 0x3F70101020000000
%503 = fadd float %500, %502
%504 = fmul float %503, %24
%505 = fsub float %488, %504
%506 = call float @llvm.AMDIL.clamp.(float %505, float 0.000000e+00, float 1.000000e+00)
%507 = fcmp olt float %26, %506
br i1 %507, label %IF71, label %ENDIF70
IF71: ; preds = %ENDIF64
%508 = fsub float 1.000000e+00, %506
%509 = call float @llvm.pow.f32(float %508, float %27)
%510 = fadd float %temp16.6, %509
br label %ENDIF70
ENDIF70: ; preds = %ENDIF64, %IF71
%temp16.7 = phi float [ %510, %IF71 ], [ %temp16.6, %ENDIF64 ]
%511 = fmul float %51, 0xBFD51678E0000000
%512 = fmul float %53, 0x3F9B7CD6C0000000
%513 = fadd float %512, %511
%514 = fmul float %55, 0xBFD9BD6040000000
%515 = fadd float %513, %514
%516 = fmul float %515, %51
%517 = fmul float %515, %53
%518 = fmul float %515, %55
%519 = fmul float %516, 2.000000e+00
%520 = fmul float %517, 2.000000e+00
%521 = fmul float %518, 2.000000e+00
%522 = fsub float 0xBFD51678E0000000, %519
%523 = fsub float 0x3F9B7CD6C0000000, %520
%524 = fsub float 0xBFD9BD6040000000, %521
%525 = fmul float %80, %522
%526 = fmul float %81, %523
%527 = fadd float %526, %525
%528 = fmul float %82, %524
%529 = fadd float %527, %528
%530 = fcmp olt float %529, 0.000000e+00
%.116 = select i1 %530, float 1.000000e+00, float -1.000000e+00
%531 = fmul float %.116, %522
%532 = fmul float %.116, %523
%533 = fmul float %.116, %524
%534 = fmul float %80, 0x3FD3333340000000
%535 = fsub float %534, %531
%536 = fmul float %81, 0x3FD3333340000000
%537 = fsub float %536, %532
%538 = fmul float %82, 0x3FD3333340000000
%539 = fsub float %538, %533
%540 = fmul float %539, %25
%541 = fsub float %85, %540
%542 = fmul float %535, %87
%543 = fadd float %542, %38
%544 = fmul float %537, %87
%545 = fadd float %544, %39
%546 = bitcast float %543 to i32
%547 = bitcast float %545 to i32
%548 = insertelement <2 x i32> undef, i32 %546, i32 0
%549 = insertelement <2 x i32> %548, i32 %547, i32 1
%550 = bitcast <8 x i32> %29 to <32 x i8>
%551 = bitcast <4 x i32> %31 to <16 x i8>
%552 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %549, <32 x i8> %550, <16 x i8> %551, i32 2)
%553 = extractelement <4 x float> %552, i32 2
%554 = extractelement <4 x float> %552, i32 3
%555 = fmul float %554, 0x3F70101020000000
%556 = fadd float %553, %555
%557 = fmul float %556, %24
%558 = fsub float %541, %557
%559 = call float @llvm.AMDIL.clamp.(float %558, float 0.000000e+00, float 1.000000e+00)
%560 = fcmp olt float %26, %559
br i1 %560, label %IF77, label %ENDIF76
IF77: ; preds = %ENDIF70
%561 = fsub float 1.000000e+00, %559
%562 = call float @llvm.pow.f32(float %561, float %27)
%563 = fadd float %temp16.7, %562
br label %ENDIF76
ENDIF76: ; preds = %ENDIF70, %IF77
%temp16.8 = phi float [ %563, %IF77 ], [ %temp16.7, %ENDIF70 ]
%564 = fmul float %51, 0xBF9408DAE0000000
%565 = fmul float %53, 0xBFD3E43FC0000000
%566 = fadd float %565, %564
%567 = fmul float %55, 0xBFDA484D80000000
%568 = fadd float %566, %567
%569 = fmul float %568, %51
%570 = fmul float %568, %53
%571 = fmul float %568, %55
%572 = fmul float %569, 2.000000e+00
%573 = fmul float %570, 2.000000e+00
%574 = fmul float %571, 2.000000e+00
%575 = fsub float 0xBF9408DAE0000000, %572
%576 = fsub float 0xBFD3E43FC0000000, %573
%577 = fsub float 0xBFDA484D80000000, %574
%578 = fmul float %80, %575
%579 = fmul float %81, %576
%580 = fadd float %579, %578
%581 = fmul float %82, %577
%582 = fadd float %580, %581
%583 = fcmp olt float %582, 0.000000e+00
%.117 = select i1 %583, float 1.000000e+00, float -1.000000e+00
%584 = fmul float %.117, %575
%585 = fmul float %.117, %576
%586 = fmul float %.117, %577
%587 = fmul float %80, 0x3FD3333340000000
%588 = fsub float %587, %584
%589 = fmul float %81, 0x3FD3333340000000
%590 = fsub float %589, %585
%591 = fmul float %82, 0x3FD3333340000000
%592 = fsub float %591, %586
%593 = fmul float %592, %25
%594 = fsub float %85, %593
%595 = fmul float %588, %87
%596 = fadd float %595, %38
%597 = fmul float %590, %87
%598 = fadd float %597, %39
%599 = bitcast float %596 to i32
%600 = bitcast float %598 to i32
%601 = insertelement <2 x i32> undef, i32 %599, i32 0
%602 = insertelement <2 x i32> %601, i32 %600, i32 1
%603 = bitcast <8 x i32> %29 to <32 x i8>
%604 = bitcast <4 x i32> %31 to <16 x i8>
%605 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %602, <32 x i8> %603, <16 x i8> %604, i32 2)
%606 = extractelement <4 x float> %605, i32 2
%607 = extractelement <4 x float> %605, i32 3
%608 = fmul float %607, 0x3F70101020000000
%609 = fadd float %606, %608
%610 = fmul float %609, %24
%611 = fsub float %594, %610
%612 = call float @llvm.AMDIL.clamp.(float %611, float 0.000000e+00, float 1.000000e+00)
%613 = fcmp olt float %26, %612
br i1 %613, label %IF83, label %ENDIF82
IF83: ; preds = %ENDIF76
%614 = fsub float 1.000000e+00, %612
%615 = call float @llvm.pow.f32(float %614, float %27)
%616 = fadd float %temp16.8, %615
br label %ENDIF82
ENDIF82: ; preds = %ENDIF76, %IF83
%temp16.9 = phi float [ %616, %IF83 ], [ %temp16.8, %ENDIF76 ]
%617 = fmul float %51, 0xBFD4944600000000
%618 = fmul float %53, 0x3FE5DCD060000000
%619 = fadd float %618, %617
%620 = fmul float %55, 0xBFD5F95BA0000000
%621 = fadd float %619, %620
%622 = fmul float %621, %51
%623 = fmul float %621, %53
%624 = fmul float %621, %55
%625 = fmul float %622, 2.000000e+00
%626 = fmul float %623, 2.000000e+00
%627 = fmul float %624, 2.000000e+00
%628 = fsub float 0xBFD4944600000000, %625
%629 = fsub float 0x3FE5DCD060000000, %626
%630 = fsub float 0xBFD5F95BA0000000, %627
%631 = fmul float %80, %628
%632 = fmul float %81, %629
%633 = fadd float %632, %631
%634 = fmul float %82, %630
%635 = fadd float %633, %634
%636 = fcmp olt float %635, 0.000000e+00
%.118 = select i1 %636, float 1.000000e+00, float -1.000000e+00
%637 = fmul float %.118, %628
%638 = fmul float %.118, %629
%639 = fmul float %.118, %630
%640 = fmul float %80, 0x3FD3333340000000
%641 = fsub float %640, %637
%642 = fmul float %81, 0x3FD3333340000000
%643 = fsub float %642, %638
%644 = fmul float %82, 0x3FD3333340000000
%645 = fsub float %644, %639
%646 = fmul float %645, %25
%647 = fsub float %85, %646
%648 = fmul float %641, %87
%649 = fadd float %648, %38
%650 = fmul float %643, %87
%651 = fadd float %650, %39
%652 = bitcast float %649 to i32
%653 = bitcast float %651 to i32
%654 = insertelement <2 x i32> undef, i32 %652, i32 0
%655 = insertelement <2 x i32> %654, i32 %653, i32 1
%656 = bitcast <8 x i32> %29 to <32 x i8>
%657 = bitcast <4 x i32> %31 to <16 x i8>
%658 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %655, <32 x i8> %656, <16 x i8> %657, i32 2)
%659 = extractelement <4 x float> %658, i32 2
%660 = extractelement <4 x float> %658, i32 3
%661 = fmul float %660, 0x3F70101020000000
%662 = fadd float %659, %661
%663 = fmul float %662, %24
%664 = fsub float %647, %663
%665 = call float @llvm.AMDIL.clamp.(float %664, float 0.000000e+00, float 1.000000e+00)
%666 = fcmp olt float %26, %665
br i1 %666, label %IF89, label %ENDIF88
IF89: ; preds = %ENDIF82
%667 = fsub float 1.000000e+00, %665
%668 = call float @llvm.pow.f32(float %667, float %27)
%669 = fadd float %temp16.9, %668
br label %ENDIF88
ENDIF88: ; preds = %ENDIF82, %IF89
%temp16.10 = phi float [ %669, %IF89 ], [ %temp16.9, %ENDIF82 ]
%670 = fmul float %51, 0x3FE67BCD40000000
%671 = fmul float %53, 0x3FC518FB80000000
%672 = fadd float %671, %670
%673 = fmul float %55, 0x3F970BE0E0000000
%674 = fadd float %672, %673
%675 = fmul float %674, %51
%676 = fmul float %674, %53
%677 = fmul float %674, %55
%678 = fmul float %675, 2.000000e+00
%679 = fmul float %676, 2.000000e+00
%680 = fmul float %677, 2.000000e+00
%681 = fsub float 0x3FE67BCD40000000, %678
%682 = fsub float 0x3FC518FB80000000, %679
%683 = fsub float 0x3F970BE0E0000000, %680
%684 = fmul float %80, %681
%685 = fmul float %81, %682
%686 = fadd float %685, %684
%687 = fmul float %82, %683
%688 = fadd float %686, %687
%689 = fcmp olt float %688, 0.000000e+00
%.119 = select i1 %689, float 1.000000e+00, float -1.000000e+00
%690 = fmul float %.119, %681
%691 = fmul float %.119, %682
%692 = fmul float %.119, %683
%693 = fmul float %80, 0x3FD3333340000000
%694 = fsub float %693, %690
%695 = fmul float %81, 0x3FD3333340000000
%696 = fsub float %695, %691
%697 = fmul float %82, 0x3FD3333340000000
%698 = fsub float %697, %692
%699 = fmul float %698, %25
%700 = fsub float %85, %699
%701 = fmul float %694, %87
%702 = fadd float %701, %38
%703 = fmul float %696, %87
%704 = fadd float %703, %39
%705 = bitcast float %702 to i32
%706 = bitcast float %704 to i32
%707 = insertelement <2 x i32> undef, i32 %705, i32 0
%708 = insertelement <2 x i32> %707, i32 %706, i32 1
%709 = bitcast <8 x i32> %29 to <32 x i8>
%710 = bitcast <4 x i32> %31 to <16 x i8>
%711 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %708, <32 x i8> %709, <16 x i8> %710, i32 2)
%712 = extractelement <4 x float> %711, i32 2
%713 = extractelement <4 x float> %711, i32 3
%714 = fmul float %713, 0x3F70101020000000
%715 = fadd float %712, %714
%716 = fmul float %715, %24
%717 = fsub float %700, %716
%718 = call float @llvm.AMDIL.clamp.(float %717, float 0.000000e+00, float 1.000000e+00)
%719 = fcmp olt float %26, %718
br i1 %719, label %IF95, label %ENDIF94
IF95: ; preds = %ENDIF88
%720 = fsub float 1.000000e+00, %718
%721 = call float @llvm.pow.f32(float %720, float %27)
%722 = fadd float %temp16.10, %721
br label %ENDIF94
ENDIF94: ; preds = %ENDIF88, %IF95
%temp16.11 = phi float [ %722, %IF95 ], [ %temp16.10, %ENDIF88 ]
%723 = fmul float %51, 0x3FA2F783E0000000
%724 = fmul float %53, 0xBFEE0D5C80000000
%725 = fadd float %724, %723
%726 = fmul float %55, 0x3FC16466C0000000
%727 = fadd float %725, %726
%728 = fmul float %727, %51
%729 = fmul float %727, %53
%730 = fmul float %727, %55
%731 = fmul float %728, 2.000000e+00
%732 = fmul float %729, 2.000000e+00
%733 = fmul float %730, 2.000000e+00
%734 = fsub float 0x3FA2F783E0000000, %731
%735 = fsub float 0xBFEE0D5C80000000, %732
%736 = fsub float 0x3FC16466C0000000, %733
%737 = fmul float %80, %734
%738 = fmul float %81, %735
%739 = fadd float %738, %737
%740 = fmul float %82, %736
%741 = fadd float %739, %740
%742 = fcmp olt float %741, 0.000000e+00
%.120 = select i1 %742, float 1.000000e+00, float -1.000000e+00
%743 = fmul float %.120, %734
%744 = fmul float %.120, %735
%745 = fmul float %.120, %736
%746 = fmul float %80, 0x3FD3333340000000
%747 = fsub float %746, %743
%748 = fmul float %81, 0x3FD3333340000000
%749 = fsub float %748, %744
%750 = fmul float %82, 0x3FD3333340000000
%751 = fsub float %750, %745
%752 = fmul float %751, %25
%753 = fsub float %85, %752
%754 = fmul float %747, %87
%755 = fadd float %754, %38
%756 = fmul float %749, %87
%757 = fadd float %756, %39
%758 = bitcast float %755 to i32
%759 = bitcast float %757 to i32
%760 = insertelement <2 x i32> undef, i32 %758, i32 0
%761 = insertelement <2 x i32> %760, i32 %759, i32 1
%762 = bitcast <8 x i32> %29 to <32 x i8>
%763 = bitcast <4 x i32> %31 to <16 x i8>
%764 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %761, <32 x i8> %762, <16 x i8> %763, i32 2)
%765 = extractelement <4 x float> %764, i32 2
%766 = extractelement <4 x float> %764, i32 3
%767 = fmul float %766, 0x3F70101020000000
%768 = fadd float %765, %767
%769 = fmul float %768, %24
%770 = fsub float %753, %769
%771 = call float @llvm.AMDIL.clamp.(float %770, float 0.000000e+00, float 1.000000e+00)
%772 = fcmp olt float %26, %771
br i1 %772, label %IF101, label %ENDIF100
IF101: ; preds = %ENDIF94
%773 = fsub float 1.000000e+00, %771
%774 = call float @llvm.pow.f32(float %773, float %27)
%775 = fadd float %temp16.11, %774
br label %ENDIF100
ENDIF100: ; preds = %ENDIF94, %IF101
%temp16.12 = phi float [ %775, %IF101 ], [ %temp16.11, %ENDIF94 ]
%776 = fmul float %51, 0xBFE659A880000000
%777 = fmul float %53, 0xBFE33600E0000000
%778 = fadd float %777, %776
%779 = fmul float %55, 0xBFA4911660000000
%780 = fadd float %778, %779
%781 = fmul float %780, %51
%782 = fmul float %780, %53
%783 = fmul float %780, %55
%784 = fmul float %781, 2.000000e+00
%785 = fmul float %782, 2.000000e+00
%786 = fmul float %783, 2.000000e+00
%787 = fsub float 0xBFE659A880000000, %784
%788 = fsub float 0xBFE33600E0000000, %785
%789 = fsub float 0xBFA4911660000000, %786
%790 = fmul float %80, %787
%791 = fmul float %81, %788
%792 = fadd float %791, %790
%793 = fmul float %82, %789
%794 = fadd float %792, %793
%795 = fcmp olt float %794, 0.000000e+00
%.121 = select i1 %795, float 1.000000e+00, float -1.000000e+00
%796 = fmul float %.121, %787
%797 = fmul float %.121, %788
%798 = fmul float %.121, %789
%799 = fmul float %80, 0x3FD3333340000000
%800 = fsub float %799, %796
%801 = fmul float %81, 0x3FD3333340000000
%802 = fsub float %801, %797
%803 = fmul float %82, 0x3FD3333340000000
%804 = fsub float %803, %798
%805 = fmul float %804, %25
%806 = fsub float %85, %805
%807 = fmul float %800, %87
%808 = fadd float %807, %38
%809 = fmul float %802, %87
%810 = fadd float %809, %39
%811 = bitcast float %808 to i32
%812 = bitcast float %810 to i32
%813 = insertelement <2 x i32> undef, i32 %811, i32 0
%814 = insertelement <2 x i32> %813, i32 %812, i32 1
%815 = bitcast <8 x i32> %29 to <32 x i8>
%816 = bitcast <4 x i32> %31 to <16 x i8>
%817 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %814, <32 x i8> %815, <16 x i8> %816, i32 2)
%818 = extractelement <4 x float> %817, i32 2
%819 = extractelement <4 x float> %817, i32 3
%820 = fmul float %819, 0x3F70101020000000
%821 = fadd float %818, %820
%822 = fmul float %821, %24
%823 = fsub float %806, %822
%824 = call float @llvm.AMDIL.clamp.(float %823, float 0.000000e+00, float 1.000000e+00)
%825 = fcmp olt float %26, %824
br i1 %825, label %IF107, label %ENDIF106
IF107: ; preds = %ENDIF100
%826 = fsub float 1.000000e+00, %824
%827 = call float @llvm.pow.f32(float %826, float %27)
%828 = fadd float %temp16.12, %827
br label %ENDIF106
ENDIF106: ; preds = %ENDIF100, %IF107
%temp16.13 = phi float [ %828, %IF107 ], [ %temp16.12, %ENDIF100 ]
%829 = fmul float %temp16.13, 0x3FB24924A0000000
%830 = fsub float 1.000000e+00, %829
%831 = call i32 @llvm.SI.packf16(float %830, float %830)
%832 = bitcast i32 %831 to float
%833 = call i32 @llvm.SI.packf16(float %830, float %830)
%834 = bitcast i32 %833 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %832, float %834, float %832, float %834)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504
s_mov_b32 m0, s9 ; BEFC0309
v_mov_b32_e32 v4, 0xbfe38bac ; 7E0802FF BFE38BAC
v_mov_b32_e32 v5, 0x40638bac ; 7E0A02FF 40638BAC
v_mov_b32_e32 v11, 0x3b808081 ; 7E1602FF 3B808081
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[20:23], 0x2 ; C2021502
s_buffer_load_dword s16, s[20:23], 0xc ; C208150C
s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v6, v0, 2, 0, [m0] ; C8180200
v_interp_p2_f32 v6, [v6], v1, 2, 0, [m0] ; C8190201
v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300
v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[28:35], s[24:27] ; F0800700 00C70606
s_buffer_load_dword s6, s[20:23], 0xd ; C203150D
s_buffer_load_dword s5, s[20:23], 0xe ; C202950E
image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800F00 00020C02
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mad_f32 v0, v5, v12, v4 ; D2820000 04121905
v_mac_f32_e32 v4, v5, v13 ; 3E081B05
v_mad_f32 v1, 0, v14, 1.0 ; D2820001 03CA1C80
v_mad_f32 v5, v11, v15, v14 ; D2820005 043A1F0B
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v4, v4 ; 3E120904
v_mac_f32_e32 v9, v1, v1 ; 3E120301
v_rcp_f32_e32 v12, v9 ; 7E185509
v_mad_f32 v10, 2.0, v6, -1.0 ; D282000A 03CE0CF4
v_mad_f32 v9, 2.0, v7, -1.0 ; D2820009 03CE0EF4
v_mad_f32 v8, 2.0, v8, -1.0 ; D2820008 03CE10F4
v_add_f32_e32 v6, v12, v12 ; 060C190C
v_mul_f32_e32 v1, v0, v6 ; 10020D00
v_mul_f32_e32 v0, v4, v6 ; 10000D04
v_mov_b32_e32 v13, 0x3ecd5062 ; 7E1A02FF 3ECD5062
v_mul_f32_e32 v4, v13, v10 ; 1008150D
v_mov_b32_e32 v14, 0x3f63d2fc ; 7E1C02FF 3F63D2FC
v_mac_f32_e32 v4, v14, v9 ; 3E08130E
v_mov_b32_e32 v15, 0xbc8f8152 ; 7E1E02FF BC8F8152
v_mac_f32_e32 v4, v15, v8 ; 3E08110F
v_mul_f32_e32 v6, v10, v4 ; 100C090A
v_mac_f32_e32 v13, -2.0, v6 ; 3E1A0CF5
v_mul_f32_e32 v6, v9, v4 ; 100C0909
v_mac_f32_e32 v14, -2.0, v6 ; 3E1C0CF5
v_mul_f32_e32 v4, v8, v4 ; 10080908
v_mac_f32_e32 v15, -2.0, v4 ; 3E1E08F5
v_mad_f32 v4, 2.0, v12, -1.0 ; D2820004 03CE18F4
v_mul_f32_e32 v7, v13, v1 ; 100E030D
v_mac_f32_e32 v7, v14, v0 ; 3E0E010E
v_mul_f32_e32 v6, s4, v5 ; 100C0A04
v_rcp_f32_e32 v12, v6 ; 7E185506
v_mac_f32_e32 v7, v15, v4 ; 3E0E090F
v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80
v_cndmask_b32_e64 v16, -1.0, 1.0, vcc ; D2000010 01A9E4F3
v_mul_f32_e32 v7, s16, v12 ; 100E1810
v_mul_f32_e32 v12, v13, v16 ; 1018210D
v_mul_f32_e32 v13, v14, v16 ; 101A210E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v12, v1, v14, -v12 ; D282000C 84321D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v17, v7, v12, v2 ; D2820011 040A1907
v_mad_f32 v18, v7, v13, v3 ; D2820012 040E1B07
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[8:15], s[0:3] ; F0800C00 00020C11
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v11, v11, v13, v12 ; D282000B 04321B0B
v_mul_f32_e32 v12, v15, v16 ; 1018210F
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mul_f32_e32 v12, s16, v12 ; 10181810
v_mad_f32 v5, v5, s4, -v12 ; D2820005 84300905
v_mad_f32 v5, -v11, s4, v5 ; D2820005 2414090B
v_add_f32_e64 v11, 0, v5 clamp ; D206080B 00020A80
v_mov_b32_e32 v5, 0 ; 7E0A0280
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v5, 1.0, v11 ; 080A16F2
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_mul_legacy_f32_e32 v5, s5, v5 ; 0E0A0A05
v_exp_f32_e32 v5, v5 ; 7E0A4B05
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0x3e25aaa0 ; 7E1602FF 3E25AAA0
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0x3e091156 ; 7E1A02FF 3E091156
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0xbeb4c2c9 ; 7E1C02FF BEB4C2C9
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0xbe6c0ff4 ; 7E1602FF BE6C0FF4
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0xbe429197 ; 7E1A02FF BE429197
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0x3f00a66f ; 7E1C02FF 3F00A66F
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0xbf202bce ; 7E1602FF BF202BCE
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0x3dfe4acc ; 7E1A02FF 3DFE4ACC
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0x3dee5f8f ; 7E1C02FF 3DEE5F8F
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0x3ec39fce ; 7E1602FF 3EC39FCE
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0xbea5f5a7 ; 7E1A02FF BEA5F5A7
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0x3ed2939f ; 7E1C02FF 3ED2939F
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0xbdb4d4d0 ; 7E1602FF BDB4D4D0
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0x3e28ef71 ; 7E1A02FF 3E28EF71
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0x3e0ef021 ; 7E1C02FF 3E0EF021
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0x3e41b52d ; 7E1602FF 3E41B52D
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0xbe0374de ; 7E1A02FF BE0374DE
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0xbdca35e0 ; 7E1C02FF BDCA35E0
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0x3e4b6185 ; 7E1602FF 3E4B6185
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0x3e34f71c ; 7E1A02FF 3E34F71C
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0x3ee047f9 ; 7E1C02FF 3EE047F9
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0xbea8b3c7 ; 7E1602FF BEA8B3C7
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0x3cdbe6b6 ; 7E1A02FF 3CDBE6B6
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0xbecdeb02 ; 7E1C02FF BECDEB02
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0xbca046d7 ; 7E1602FF BCA046D7
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0xbe9f21fe ; 7E1A02FF BE9F21FE
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0xbed2426c ; 7E1C02FF BED2426C
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0xbea4a230 ; 7E1602FF BEA4A230
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0x3f2ee683 ; 7E1A02FF 3F2EE683
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0xbeafcadd ; 7E1C02FF BEAFCADD
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0x3f33de6a ; 7E1602FF 3F33DE6A
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0x3e28c7dc ; 7E1A02FF 3E28C7DC
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0x3cb85f07 ; 7E1C02FF 3CB85F07
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0x3d17bc1f ; 7E1602FF 3D17BC1F
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0xbf706ae4 ; 7E1A02FF BF706AE4
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0x3e0b2336 ; 7E1C02FF 3E0B2336
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v15, v10, v12 ; 101E190A
v_mac_f32_e32 v11, -2.0, v15 ; 3E161EF5
v_mul_f32_e32 v15, v9, v12 ; 101E1909
v_mul_f32_e32 v12, v8, v12 ; 10181908
v_mac_f32_e32 v13, -2.0, v15 ; 3E1A1EF5
v_mac_f32_e32 v14, -2.0, v12 ; 3E1C18F5
v_mul_f32_e32 v12, v11, v1 ; 1018030B
v_mac_f32_e32 v12, v13, v0 ; 3E18010D
v_mac_f32_e32 v12, v14, v4 ; 3E18090E
v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880
v_cndmask_b32_e64 v12, -1.0, 1.0, vcc ; D200000C 01A9E4F3
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v14, v12 ; 1018190E
v_mov_b32_e32 v14, 0x3e99999a ; 7E1C02FF 3E99999A
v_mad_f32 v11, v1, v14, -v11 ; D282000B 842E1D01
v_mad_f32 v13, v0, v14, -v13 ; D282000D 84361D00
v_mad_f32 v12, v4, v14, -v12 ; D282000C 84321D04
v_mad_f32 v14, v7, v11, v2 ; D282000E 040A1707
v_mad_f32 v15, v7, v13, v3 ; D282000F 040E1B07
v_mad_f32 v11, -v12, s16, v6 ; D282000B 2418210C
image_sample v[12:13], 12, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[8:15], s[0:3] ; F0800C00 00020C0E
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v12, v13, v12, 0x3b808081 ; 4018190D 3B808081
v_mad_f32 v11, -v12, s4, v11 ; D282000B 242C090C
v_add_f32_e64 v11, 0, v11 clamp ; D206080B 00021680
v_cmp_lt_f32_e32 vcc, s6, v11 ; 7C021606
s_and_saveexec_b64 s[18:19], vcc ; BE92246A
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
v_sub_f32_e32 v11, 1.0, v11 ; 081616F2
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_legacy_f32_e32 v11, s5, v11 ; 0E161605
v_exp_f32_e32 v11, v11 ; 7E164B0B
v_add_f32_e32 v5, v11, v5 ; 060A0B0B
s_or_b64 exec, exec, s[18:19] ; 88FE127E
v_mov_b32_e32 v11, 0xbf32cd44 ; 7E1602FF BF32CD44
v_mul_f32_e32 v12, v11, v10 ; 1018150B
v_mov_b32_e32 v13, 0xbf19b007 ; 7E1A02FF BF19B007
v_mac_f32_e32 v12, v13, v9 ; 3E18130D
v_mov_b32_e32 v14, 0xbd2488b3 ; 7E1C02FF BD2488B3
v_mac_f32_e32 v12, v14, v8 ; 3E18110E
v_mul_f32_e32 v10, v10, v12 ; 1014190A
v_mul_f32_e32 v9, v9, v12 ; 10121909
v_mul_f32_e32 v8, v8, v12 ; 10101908
v_mac_f32_e32 v11, -2.0, v10 ; 3E1614F5
v_mac_f32_e32 v13, -2.0, v9 ; 3E1A12F5
v_mac_f32_e32 v14, -2.0, v8 ; 3E1C10F5
v_mul_f32_e32 v8, v11, v1 ; 1010030B
v_mac_f32_e32 v8, v13, v0 ; 3E10010D
v_mac_f32_e32 v8, v14, v4 ; 3E10090E
v_cmp_gt_f32_e32 vcc, 0, v8 ; 7C081080
v_cndmask_b32_e64 v8, -1.0, 1.0, vcc ; D2000008 01A9E4F3
v_mul_f32_e32 v9, v11, v8 ; 1012110B
v_mul_f32_e32 v10, v13, v8 ; 1014110D
v_mul_f32_e32 v8, v14, v8 ; 1010110E
v_mov_b32_e32 v11, 0x3e99999a ; 7E1602FF 3E99999A
v_mad_f32 v1, v1, v11, -v9 ; D2820001 84261701
v_mad_f32 v0, v0, v11, -v10 ; D2820000 842A1700
v_mad_f32 v4, v4, v11, -v8 ; D2820004 84221704
v_mad_f32 v4, -v4, s16, v6 ; D2820004 24182104
v_mac_f32_e32 v2, v7, v1 ; 3E040307
v_mac_f32_e32 v3, v7, v0 ; 3E060107
image_sample v[0:1], 12, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800C00 00020002
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v0, v1, v0, 0x3b808081 ; 40000101 3B808081
v_mad_f32 v0, -v0, s4, v4 ; D2820000 24100900
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_cmp_lt_f32_e32 vcc, s6, v0 ; 7C020006
s_and_saveexec_b64 s[0:1], vcc ; BE80246A
s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_log_f32_e32 v0, v0 ; 7E004F00
v_mul_legacy_f32_e32 v0, s5, v0 ; 0E000005
v_exp_f32_e32 v0, v0 ; 7E004B00
v_add_f32_e32 v5, v0, v5 ; 060A0B00
s_or_b64 exec, exec, s[0:1] ; 88FE007E
v_mov_b32_e32 v0, 0xbd924925 ; 7E0002FF BD924925
v_mad_f32 v0, v0, v5, 1.0 ; D2820000 03CA0B00
v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 20
Code Size: 3392 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..4]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[1], IN[0].xxxx
1: MAD TEMP[0], CONST[2], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[3], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[4], IN[0].wwww, TEMP[0]
4: MAD TEMP[1].xy, IN[1].xyyy, CONST[0].xyyy, CONST[0].zwww
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0
%35 = add i32 %5, %7
%36 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %34, i32 0, i32 %35)
%37 = extractelement <4 x float> %36, i32 0
%38 = extractelement <4 x float> %36, i32 1
%39 = extractelement <4 x float> %36, i32 2
%40 = extractelement <4 x float> %36, i32 3
%41 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%42 = load <16 x i8>, <16 x i8> addrspace(2)* %41, align 16, !tbaa !0
%43 = add i32 %5, %7
%44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43)
%45 = extractelement <4 x float> %44, i32 0
%46 = extractelement <4 x float> %44, i32 1
%47 = fmul float %17, %37
%48 = fmul float %18, %37
%49 = fmul float %19, %37
%50 = fmul float %20, %37
%51 = fmul float %21, %38
%52 = fadd float %51, %47
%53 = fmul float %22, %38
%54 = fadd float %53, %48
%55 = fmul float %23, %38
%56 = fadd float %55, %49
%57 = fmul float %24, %38
%58 = fadd float %57, %50
%59 = fmul float %25, %39
%60 = fadd float %59, %52
%61 = fmul float %26, %39
%62 = fadd float %61, %54
%63 = fmul float %27, %39
%64 = fadd float %63, %56
%65 = fmul float %28, %39
%66 = fadd float %65, %58
%67 = fmul float %29, %40
%68 = fadd float %67, %60
%69 = fmul float %30, %40
%70 = fadd float %69, %62
%71 = fmul float %31, %40
%72 = fadd float %71, %64
%73 = fmul float %32, %40
%74 = fadd float %73, %66
%75 = fmul float %45, %13
%76 = fadd float %75, %15
%77 = fmul float %46, %14
%78 = fadd float %77, %16
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %76, float %78, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %68, float %70, float %72, float %74)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101
s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102
s_buffer_load_dword s6, s[0:3], 0x3 ; C2030103
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107
s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s5 ; 7E000205
v_mov_b32_e32 v7, s6 ; 7E0E0206
v_mac_f32_e32 v0, s12, v5 ; 3E000A0C
v_mac_f32_e32 v7, s4, v6 ; 3E0E0C04
s_buffer_load_dword s4, s[0:3], 0xb ; C202010B
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s6, s[0:3], 0xd ; C203010D
s_buffer_load_dword s12, s[0:3], 0xe ; C206010E
s_buffer_load_dword s15, s[0:3], 0xf ; C207810F
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s0, s[0:3], 0x13 ; C2000113
v_mul_f32_e32 v5, s7, v1 ; 100A0207
v_mac_f32_e32 v5, s11, v2 ; 3E0A040B
v_mul_f32_e32 v6, s8, v1 ; 100C0208
v_mac_f32_e32 v6, s13, v2 ; 3E0C040D
v_mul_f32_e32 v8, s9, v1 ; 10100209
v_mac_f32_e32 v8, s14, v2 ; 3E10040E
v_mul_f32_e32 v1, s10, v1 ; 1002020A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s4, v2 ; 3E020404
v_mac_f32_e32 v5, s5, v3 ; 3E0A0605
v_mac_f32_e32 v6, s6, v3 ; 3E0C0606
v_mac_f32_e32 v8, s12, v3 ; 3E10060C
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v5, s16, v4 ; 3E0A0810
v_mac_f32_e32 v6, s17, v4 ; 3E0C0811
v_mac_f32_e32 v8, s18, v4 ; 3E100812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_mov_b32_e32 v2, 0 ; 7E040280
exp 15, 32, 0, 0, 0, v0, v7, v2, v2 ; F800020F 02020700
exp 15, 12, 0, 1, 0, v5, v6, v8, v1 ; F80008CF 01080605
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 228 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[3]
DCL TEMP[0..9], LOCAL
IMM[0] FLT32 { 5.0000, 4.0000, 0.1000, 0.2000}
IMM[1] FLT32 { 1.0000, 0.0039, 2.0000, 3.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0].x, TEMP[0], SAMP[1], 2D
2: MUL TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
3: MOV TEMP[1].xy, IN[0].xyyy
4: TEX TEMP[1], TEMP[1], SAMP[0], 2D
5: ADD TEMP[2].xy, IN[0].xyyy, CONST[3].xyyy
6: MOV TEMP[3].xy, TEMP[2].xyyy
7: TEX TEMP[3], TEMP[3], SAMP[0], 2D
8: ADD TEMP[4].xy, TEMP[1].xyyy, -TEMP[3].xyyy
9: ABS TEMP[4].xy, TEMP[4].xyyy
10: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[4].yyyy
11: FSLT TEMP[4].x, TEMP[4].xxxx, IMM[0].zzzz
12: DP2 TEMP[5].x, TEMP[1].zwww, IMM[1].xyyy
13: DP2 TEMP[3].x, TEMP[3].zwww, IMM[1].xyyy
14: ADD TEMP[3].x, TEMP[5].xxxx, -TEMP[3].xxxx
15: ABS TEMP[3].x, TEMP[3].xxxx
16: MUL TEMP[3].x, TEMP[3].xxxx, CONST[0].zzzz
17: FSLT TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
18: AND TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx
19: AND TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx
20: MUL TEMP[3].x, IMM[0].yyyy, TEMP[3].xxxx
21: MOV TEMP[2].xy, TEMP[2].xyyy
22: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D
23: MAD TEMP[0].x, TEMP[2].xxxx, TEMP[3].xxxx, TEMP[0].xxxx
24: ADD TEMP[2].x, IMM[0].xxxx, TEMP[3].xxxx
25: MUL TEMP[3].xy, CONST[3].xyyy, IMM[1].zzzz
26: ADD TEMP[4].xy, IN[0].xyyy, TEMP[3].xyyy
27: MOV TEMP[5].xy, TEMP[4].xyyy
28: TEX TEMP[5], TEMP[5], SAMP[0], 2D
29: ADD TEMP[6].xy, TEMP[1].xyyy, -TEMP[5].xyyy
30: ABS TEMP[6].xy, TEMP[6].xyyy
31: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[6].yyyy
32: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz
33: DP2 TEMP[7].x, TEMP[1].zwww, IMM[1].xyyy
34: DP2 TEMP[5].x, TEMP[5].zwww, IMM[1].xyyy
35: ADD TEMP[5].x, TEMP[7].xxxx, -TEMP[5].xxxx
36: ABS TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[5].x, TEMP[5].xxxx, CONST[0].zzzz
38: FSLT TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww
39: AND TEMP[5].x, TEMP[6].xxxx, TEMP[5].xxxx
40: AND TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx
41: MUL TEMP[5].x, IMM[1].wwww, TEMP[5].xxxx
42: MOV TEMP[4].xy, TEMP[4].xyyy
43: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D
44: MAD TEMP[0].x, TEMP[4].xxxx, TEMP[5].xxxx, TEMP[0].xxxx
45: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[5].xxxx
46: MUL TEMP[4].xy, CONST[3].xyyy, IMM[1].wwww
47: ADD TEMP[5].xy, IN[0].xyyy, TEMP[4].xyyy
48: MOV TEMP[6].xy, TEMP[5].xyyy
49: TEX TEMP[6], TEMP[6], SAMP[0], 2D
50: ADD TEMP[7].xy, TEMP[1].xyyy, -TEMP[6].xyyy
51: ABS TEMP[7].xy, TEMP[7].xyyy
52: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[7].yyyy
53: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[0].zzzz
54: DP2 TEMP[8].x, TEMP[1].zwww, IMM[1].xyyy
55: DP2 TEMP[6].x, TEMP[6].zwww, IMM[1].xyyy
56: ADD TEMP[6].x, TEMP[8].xxxx, -TEMP[6].xxxx
57: ABS TEMP[6].x, TEMP[6].xxxx
58: MUL TEMP[6].x, TEMP[6].xxxx, CONST[0].zzzz
59: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
60: AND TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx
61: AND TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx
62: MUL TEMP[6].x, IMM[1].zzzz, TEMP[6].xxxx
63: MOV TEMP[5].xy, TEMP[5].xyyy
64: TEX TEMP[5].x, TEMP[5], SAMP[1], 2D
65: MAD TEMP[0].x, TEMP[5].xxxx, TEMP[6].xxxx, TEMP[0].xxxx
66: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx
67: MUL TEMP[5].xy, CONST[3].xyyy, IMM[0].yyyy
68: ADD TEMP[6].xy, IN[0].xyyy, TEMP[5].xyyy
69: MOV TEMP[7].xy, TEMP[6].xyyy
70: TEX TEMP[7], TEMP[7], SAMP[0], 2D
71: ADD TEMP[8].xy, TEMP[1].xyyy, -TEMP[7].xyyy
72: ABS TEMP[8].xy, TEMP[8].xyyy
73: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[8].yyyy
74: FSLT TEMP[8].x, TEMP[8].xxxx, IMM[0].zzzz
75: DP2 TEMP[9].x, TEMP[1].zwww, IMM[1].xyyy
76: DP2 TEMP[7].x, TEMP[7].zwww, IMM[1].xyyy
77: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx
78: ABS TEMP[7].x, TEMP[7].xxxx
79: MUL TEMP[7].x, TEMP[7].xxxx, CONST[0].zzzz
80: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[0].wwww
81: AND TEMP[7].x, TEMP[8].xxxx, TEMP[7].xxxx
82: AND TEMP[7].x, TEMP[7].xxxx, IMM[1].xxxx
83: MOV TEMP[6].xy, TEMP[6].xyyy
84: TEX TEMP[6].x, TEMP[6], SAMP[1], 2D
85: MAD TEMP[0].x, TEMP[6].xxxx, TEMP[7].xxxx, TEMP[0].xxxx
86: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[7].xxxx
87: ADD TEMP[6].xy, IN[0].xyyy, -CONST[3].xyyy
88: MOV TEMP[7].xy, TEMP[6].xyyy
89: TEX TEMP[7], TEMP[7], SAMP[0], 2D
90: ADD TEMP[8].xy, TEMP[1].xyyy, -TEMP[7].xyyy
91: ABS TEMP[8].xy, TEMP[8].xyyy
92: ADD TEMP[8].x, TEMP[8].xxxx, TEMP[8].yyyy
93: FSLT TEMP[8].x, TEMP[8].xxxx, IMM[0].zzzz
94: DP2 TEMP[9].x, TEMP[1].zwww, IMM[1].xyyy
95: DP2 TEMP[7].x, TEMP[7].zwww, IMM[1].xyyy
96: ADD TEMP[7].x, TEMP[9].xxxx, -TEMP[7].xxxx
97: ABS TEMP[7].x, TEMP[7].xxxx
98: MUL TEMP[7].x, TEMP[7].xxxx, CONST[0].zzzz
99: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[0].wwww
100: AND TEMP[7].x, TEMP[8].xxxx, TEMP[7].xxxx
101: AND TEMP[7].x, TEMP[7].xxxx, IMM[1].xxxx
102: MUL TEMP[7].x, IMM[0].yyyy, TEMP[7].xxxx
103: MOV TEMP[6].xy, TEMP[6].xyyy
104: TEX TEMP[6].x, TEMP[6], SAMP[1], 2D
105: MAD TEMP[0].x, TEMP[6].xxxx, TEMP[7].xxxx, TEMP[0].xxxx
106: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[7].xxxx
107: ADD TEMP[3].xy, IN[0].xyyy, -TEMP[3].xyyy
108: MOV TEMP[6].xy, TEMP[3].xyyy
109: TEX TEMP[6], TEMP[6], SAMP[0], 2D
110: ADD TEMP[7].xy, TEMP[1].xyyy, -TEMP[6].xyyy
111: ABS TEMP[7].xy, TEMP[7].xyyy
112: ADD TEMP[7].x, TEMP[7].xxxx, TEMP[7].yyyy
113: FSLT TEMP[7].x, TEMP[7].xxxx, IMM[0].zzzz
114: DP2 TEMP[8].x, TEMP[1].zwww, IMM[1].xyyy
115: DP2 TEMP[6].x, TEMP[6].zwww, IMM[1].xyyy
116: ADD TEMP[6].x, TEMP[8].xxxx, -TEMP[6].xxxx
117: ABS TEMP[6].x, TEMP[6].xxxx
118: MUL TEMP[6].x, TEMP[6].xxxx, CONST[0].zzzz
119: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].wwww
120: AND TEMP[6].x, TEMP[7].xxxx, TEMP[6].xxxx
121: AND TEMP[6].x, TEMP[6].xxxx, IMM[1].xxxx
122: MUL TEMP[6].x, IMM[1].wwww, TEMP[6].xxxx
123: MOV TEMP[3].xy, TEMP[3].xyyy
124: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D
125: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[6].xxxx, TEMP[0].xxxx
126: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx
127: ADD TEMP[3].xy, IN[0].xyyy, -TEMP[4].xyyy
128: MOV TEMP[4].xy, TEMP[3].xyyy
129: TEX TEMP[4], TEMP[4], SAMP[0], 2D
130: ADD TEMP[6].xy, TEMP[1].xyyy, -TEMP[4].xyyy
131: ABS TEMP[6].xy, TEMP[6].xyyy
132: ADD TEMP[6].x, TEMP[6].xxxx, TEMP[6].yyyy
133: FSLT TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz
134: DP2 TEMP[7].x, TEMP[1].zwww, IMM[1].xyyy
135: DP2 TEMP[4].x, TEMP[4].zwww, IMM[1].xyyy
136: ADD TEMP[4].x, TEMP[7].xxxx, -TEMP[4].xxxx
137: ABS TEMP[4].x, TEMP[4].xxxx
138: MUL TEMP[4].x, TEMP[4].xxxx, CONST[0].zzzz
139: FSLT TEMP[4].x, TEMP[4].xxxx, IMM[0].wwww
140: AND TEMP[4].x, TEMP[6].xxxx, TEMP[4].xxxx
141: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].xxxx
142: MUL TEMP[4].x, IMM[1].zzzz, TEMP[4].xxxx
143: MOV TEMP[3].xy, TEMP[3].xyyy
144: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D
145: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[0].xxxx
146: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[4].xxxx
147: ADD TEMP[3].xy, IN[0].xyyy, -TEMP[5].xyyy
148: MOV TEMP[4].xy, TEMP[3].xyyy
149: TEX TEMP[4], TEMP[4], SAMP[0], 2D
150: ADD TEMP[5].xy, TEMP[1].xyyy, -TEMP[4].xyyy
151: ABS TEMP[5].xy, TEMP[5].xyyy
152: ADD TEMP[5].x, TEMP[5].xxxx, TEMP[5].yyyy
153: FSLT TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz
154: DP2 TEMP[1].x, TEMP[1].zwww, IMM[1].xyyy
155: DP2 TEMP[4].x, TEMP[4].zwww, IMM[1].xyyy
156: ADD TEMP[1].x, TEMP[1].xxxx, -TEMP[4].xxxx
157: ABS TEMP[1].x, TEMP[1].xxxx
158: MUL TEMP[1].x, TEMP[1].xxxx, CONST[0].zzzz
159: FSLT TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww
160: AND TEMP[1].x, TEMP[5].xxxx, TEMP[1].xxxx
161: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx
162: MOV TEMP[3].xy, TEMP[3].xyyy
163: TEX TEMP[3].x, TEMP[3], SAMP[1], 2D
164: MAD TEMP[0].x, TEMP[3].xxxx, TEMP[1].xxxx, TEMP[0].xxxx
165: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx
166: RCP TEMP[1].x, TEMP[2].xxxx
167: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[1].xxxx
168: MOV OUT[0], TEMP[0].xxxx
169: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%27 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%28 = load <8 x i32>, <8 x i32> addrspace(2)* %27, align 32, !tbaa !0
%29 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%30 = load <4 x i32>, <4 x i32> addrspace(2)* %29, align 16, !tbaa !0
%31 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%32 = load <8 x i32>, <8 x i32> addrspace(2)* %31, align 32, !tbaa !0
%33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%34 = load <4 x i32>, <4 x i32> addrspace(2)* %33, align 16, !tbaa !0
%35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%37 = bitcast float %35 to i32
%38 = bitcast float %36 to i32
%39 = insertelement <2 x i32> undef, i32 %37, i32 0
%40 = insertelement <2 x i32> %39, i32 %38, i32 1
%41 = bitcast <8 x i32> %32 to <32 x i8>
%42 = bitcast <4 x i32> %34 to <16 x i8>
%43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %40, <32 x i8> %41, <16 x i8> %42, i32 2)
%44 = extractelement <4 x float> %43, i32 0
%45 = fmul float %44, 5.000000e+00
%46 = bitcast float %35 to i32
%47 = bitcast float %36 to i32
%48 = insertelement <2 x i32> undef, i32 %46, i32 0
%49 = insertelement <2 x i32> %48, i32 %47, i32 1
%50 = bitcast <8 x i32> %28 to <32 x i8>
%51 = bitcast <4 x i32> %30 to <16 x i8>
%52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %49, <32 x i8> %50, <16 x i8> %51, i32 2)
%53 = extractelement <4 x float> %52, i32 0
%54 = extractelement <4 x float> %52, i32 1
%55 = extractelement <4 x float> %52, i32 2
%56 = extractelement <4 x float> %52, i32 3
%57 = fadd float %35, %25
%58 = fadd float %36, %26
%59 = bitcast float %57 to i32
%60 = bitcast float %58 to i32
%61 = insertelement <2 x i32> undef, i32 %59, i32 0
%62 = insertelement <2 x i32> %61, i32 %60, i32 1
%63 = bitcast <8 x i32> %28 to <32 x i8>
%64 = bitcast <4 x i32> %30 to <16 x i8>
%65 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %62, <32 x i8> %63, <16 x i8> %64, i32 2)
%66 = extractelement <4 x float> %65, i32 0
%67 = extractelement <4 x float> %65, i32 1
%68 = extractelement <4 x float> %65, i32 2
%69 = extractelement <4 x float> %65, i32 3
%70 = fsub float %53, %66
%71 = fsub float %54, %67
%72 = call float @llvm.fabs.f32(float %70)
%73 = call float @llvm.fabs.f32(float %71)
%74 = fadd float %72, %73
%75 = fcmp olt float %74, 0x3FB99999A0000000
%76 = fmul float %56, 0x3F70101020000000
%77 = fadd float %55, %76
%78 = fmul float %69, 0x3F70101020000000
%79 = fadd float %68, %78
%80 = fsub float %77, %79
%81 = call float @llvm.fabs.f32(float %80)
%82 = fmul float %81, %24
%83 = fcmp olt float %82, 0x3FC99999A0000000
%84 = and i1 %75, %83
%85 = select i1 %84, float 4.000000e+00, float 0.000000e+00
%86 = bitcast float %57 to i32
%87 = bitcast float %58 to i32
%88 = insertelement <2 x i32> undef, i32 %86, i32 0
%89 = insertelement <2 x i32> %88, i32 %87, i32 1
%90 = bitcast <8 x i32> %32 to <32 x i8>
%91 = bitcast <4 x i32> %34 to <16 x i8>
%92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %90, <16 x i8> %91, i32 2)
%93 = extractelement <4 x float> %92, i32 0
%94 = fmul float %93, %85
%95 = fadd float %94, %45
%96 = fadd float %85, 5.000000e+00
%97 = fmul float %25, 2.000000e+00
%98 = fmul float %26, 2.000000e+00
%99 = fadd float %35, %97
%100 = fadd float %36, %98
%101 = bitcast float %99 to i32
%102 = bitcast float %100 to i32
%103 = insertelement <2 x i32> undef, i32 %101, i32 0
%104 = insertelement <2 x i32> %103, i32 %102, i32 1
%105 = bitcast <8 x i32> %28 to <32 x i8>
%106 = bitcast <4 x i32> %30 to <16 x i8>
%107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %105, <16 x i8> %106, i32 2)
%108 = extractelement <4 x float> %107, i32 0
%109 = extractelement <4 x float> %107, i32 1
%110 = extractelement <4 x float> %107, i32 2
%111 = extractelement <4 x float> %107, i32 3
%112 = fsub float %53, %108
%113 = fsub float %54, %109
%114 = call float @llvm.fabs.f32(float %112)
%115 = call float @llvm.fabs.f32(float %113)
%116 = fadd float %114, %115
%117 = fcmp olt float %116, 0x3FB99999A0000000
%118 = fmul float %56, 0x3F70101020000000
%119 = fadd float %55, %118
%120 = fmul float %111, 0x3F70101020000000
%121 = fadd float %110, %120
%122 = fsub float %119, %121
%123 = call float @llvm.fabs.f32(float %122)
%124 = fmul float %123, %24
%125 = fcmp olt float %124, 0x3FC99999A0000000
%126 = and i1 %117, %125
%127 = select i1 %126, float 3.000000e+00, float 0.000000e+00
%128 = bitcast float %99 to i32
%129 = bitcast float %100 to i32
%130 = insertelement <2 x i32> undef, i32 %128, i32 0
%131 = insertelement <2 x i32> %130, i32 %129, i32 1
%132 = bitcast <8 x i32> %32 to <32 x i8>
%133 = bitcast <4 x i32> %34 to <16 x i8>
%134 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %131, <32 x i8> %132, <16 x i8> %133, i32 2)
%135 = extractelement <4 x float> %134, i32 0
%136 = fmul float %135, %127
%137 = fadd float %136, %95
%138 = fadd float %96, %127
%139 = fmul float %25, 3.000000e+00
%140 = fmul float %26, 3.000000e+00
%141 = fadd float %35, %139
%142 = fadd float %36, %140
%143 = bitcast float %141 to i32
%144 = bitcast float %142 to i32
%145 = insertelement <2 x i32> undef, i32 %143, i32 0
%146 = insertelement <2 x i32> %145, i32 %144, i32 1
%147 = bitcast <8 x i32> %28 to <32 x i8>
%148 = bitcast <4 x i32> %30 to <16 x i8>
%149 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %147, <16 x i8> %148, i32 2)
%150 = extractelement <4 x float> %149, i32 0
%151 = extractelement <4 x float> %149, i32 1
%152 = extractelement <4 x float> %149, i32 2
%153 = extractelement <4 x float> %149, i32 3
%154 = fsub float %53, %150
%155 = fsub float %54, %151
%156 = call float @llvm.fabs.f32(float %154)
%157 = call float @llvm.fabs.f32(float %155)
%158 = fadd float %156, %157
%159 = fcmp olt float %158, 0x3FB99999A0000000
%160 = fmul float %56, 0x3F70101020000000
%161 = fadd float %55, %160
%162 = fmul float %153, 0x3F70101020000000
%163 = fadd float %152, %162
%164 = fsub float %161, %163
%165 = call float @llvm.fabs.f32(float %164)
%166 = fmul float %165, %24
%167 = fcmp olt float %166, 0x3FC99999A0000000
%168 = and i1 %159, %167
%169 = select i1 %168, float 2.000000e+00, float 0.000000e+00
%170 = bitcast float %141 to i32
%171 = bitcast float %142 to i32
%172 = insertelement <2 x i32> undef, i32 %170, i32 0
%173 = insertelement <2 x i32> %172, i32 %171, i32 1
%174 = bitcast <8 x i32> %32 to <32 x i8>
%175 = bitcast <4 x i32> %34 to <16 x i8>
%176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2)
%177 = extractelement <4 x float> %176, i32 0
%178 = fmul float %177, %169
%179 = fadd float %178, %137
%180 = fadd float %138, %169
%181 = fmul float %25, 4.000000e+00
%182 = fmul float %26, 4.000000e+00
%183 = fadd float %35, %181
%184 = fadd float %36, %182
%185 = bitcast float %183 to i32
%186 = bitcast float %184 to i32
%187 = insertelement <2 x i32> undef, i32 %185, i32 0
%188 = insertelement <2 x i32> %187, i32 %186, i32 1
%189 = bitcast <8 x i32> %28 to <32 x i8>
%190 = bitcast <4 x i32> %30 to <16 x i8>
%191 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %188, <32 x i8> %189, <16 x i8> %190, i32 2)
%192 = extractelement <4 x float> %191, i32 0
%193 = extractelement <4 x float> %191, i32 1
%194 = extractelement <4 x float> %191, i32 2
%195 = extractelement <4 x float> %191, i32 3
%196 = fsub float %53, %192
%197 = fsub float %54, %193
%198 = call float @llvm.fabs.f32(float %196)
%199 = call float @llvm.fabs.f32(float %197)
%200 = fadd float %198, %199
%201 = fcmp olt float %200, 0x3FB99999A0000000
%202 = fmul float %56, 0x3F70101020000000
%203 = fadd float %55, %202
%204 = fmul float %195, 0x3F70101020000000
%205 = fadd float %194, %204
%206 = fsub float %203, %205
%207 = call float @llvm.fabs.f32(float %206)
%208 = fmul float %207, %24
%209 = fcmp olt float %208, 0x3FC99999A0000000
%210 = and i1 %201, %209
%211 = select i1 %210, float 1.000000e+00, float 0.000000e+00
%212 = bitcast float %183 to i32
%213 = bitcast float %184 to i32
%214 = insertelement <2 x i32> undef, i32 %212, i32 0
%215 = insertelement <2 x i32> %214, i32 %213, i32 1
%216 = bitcast <8 x i32> %32 to <32 x i8>
%217 = bitcast <4 x i32> %34 to <16 x i8>
%218 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %216, <16 x i8> %217, i32 2)
%219 = extractelement <4 x float> %218, i32 0
%220 = fmul float %219, %211
%221 = fadd float %220, %179
%222 = fadd float %180, %211
%223 = fsub float %35, %25
%224 = fsub float %36, %26
%225 = bitcast float %223 to i32
%226 = bitcast float %224 to i32
%227 = insertelement <2 x i32> undef, i32 %225, i32 0
%228 = insertelement <2 x i32> %227, i32 %226, i32 1
%229 = bitcast <8 x i32> %28 to <32 x i8>
%230 = bitcast <4 x i32> %30 to <16 x i8>
%231 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %229, <16 x i8> %230, i32 2)
%232 = extractelement <4 x float> %231, i32 0
%233 = extractelement <4 x float> %231, i32 1
%234 = extractelement <4 x float> %231, i32 2
%235 = extractelement <4 x float> %231, i32 3
%236 = fsub float %53, %232
%237 = fsub float %54, %233
%238 = call float @llvm.fabs.f32(float %236)
%239 = call float @llvm.fabs.f32(float %237)
%240 = fadd float %238, %239
%241 = fcmp olt float %240, 0x3FB99999A0000000
%242 = fmul float %56, 0x3F70101020000000
%243 = fadd float %55, %242
%244 = fmul float %235, 0x3F70101020000000
%245 = fadd float %234, %244
%246 = fsub float %243, %245
%247 = call float @llvm.fabs.f32(float %246)
%248 = fmul float %247, %24
%249 = fcmp olt float %248, 0x3FC99999A0000000
%250 = and i1 %241, %249
%251 = select i1 %250, float 4.000000e+00, float 0.000000e+00
%252 = bitcast float %223 to i32
%253 = bitcast float %224 to i32
%254 = insertelement <2 x i32> undef, i32 %252, i32 0
%255 = insertelement <2 x i32> %254, i32 %253, i32 1
%256 = bitcast <8 x i32> %32 to <32 x i8>
%257 = bitcast <4 x i32> %34 to <16 x i8>
%258 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %255, <32 x i8> %256, <16 x i8> %257, i32 2)
%259 = extractelement <4 x float> %258, i32 0
%260 = fmul float %259, %251
%261 = fadd float %260, %221
%262 = fadd float %222, %251
%263 = fsub float %35, %97
%264 = fsub float %36, %98
%265 = bitcast float %263 to i32
%266 = bitcast float %264 to i32
%267 = insertelement <2 x i32> undef, i32 %265, i32 0
%268 = insertelement <2 x i32> %267, i32 %266, i32 1
%269 = bitcast <8 x i32> %28 to <32 x i8>
%270 = bitcast <4 x i32> %30 to <16 x i8>
%271 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %269, <16 x i8> %270, i32 2)
%272 = extractelement <4 x float> %271, i32 0
%273 = extractelement <4 x float> %271, i32 1
%274 = extractelement <4 x float> %271, i32 2
%275 = extractelement <4 x float> %271, i32 3
%276 = fsub float %53, %272
%277 = fsub float %54, %273
%278 = call float @llvm.fabs.f32(float %276)
%279 = call float @llvm.fabs.f32(float %277)
%280 = fadd float %278, %279
%281 = fcmp olt float %280, 0x3FB99999A0000000
%282 = fmul float %56, 0x3F70101020000000
%283 = fadd float %55, %282
%284 = fmul float %275, 0x3F70101020000000
%285 = fadd float %274, %284
%286 = fsub float %283, %285
%287 = call float @llvm.fabs.f32(float %286)
%288 = fmul float %287, %24
%289 = fcmp olt float %288, 0x3FC99999A0000000
%290 = and i1 %281, %289
%291 = select i1 %290, float 3.000000e+00, float 0.000000e+00
%292 = bitcast float %263 to i32
%293 = bitcast float %264 to i32
%294 = insertelement <2 x i32> undef, i32 %292, i32 0
%295 = insertelement <2 x i32> %294, i32 %293, i32 1
%296 = bitcast <8 x i32> %32 to <32 x i8>
%297 = bitcast <4 x i32> %34 to <16 x i8>
%298 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %295, <32 x i8> %296, <16 x i8> %297, i32 2)
%299 = extractelement <4 x float> %298, i32 0
%300 = fmul float %299, %291
%301 = fadd float %300, %261
%302 = fadd float %262, %291
%303 = fsub float %35, %139
%304 = fsub float %36, %140
%305 = bitcast float %303 to i32
%306 = bitcast float %304 to i32
%307 = insertelement <2 x i32> undef, i32 %305, i32 0
%308 = insertelement <2 x i32> %307, i32 %306, i32 1
%309 = bitcast <8 x i32> %28 to <32 x i8>
%310 = bitcast <4 x i32> %30 to <16 x i8>
%311 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %308, <32 x i8> %309, <16 x i8> %310, i32 2)
%312 = extractelement <4 x float> %311, i32 0
%313 = extractelement <4 x float> %311, i32 1
%314 = extractelement <4 x float> %311, i32 2
%315 = extractelement <4 x float> %311, i32 3
%316 = fsub float %53, %312
%317 = fsub float %54, %313
%318 = call float @llvm.fabs.f32(float %316)
%319 = call float @llvm.fabs.f32(float %317)
%320 = fadd float %318, %319
%321 = fcmp olt float %320, 0x3FB99999A0000000
%322 = fmul float %56, 0x3F70101020000000
%323 = fadd float %55, %322
%324 = fmul float %315, 0x3F70101020000000
%325 = fadd float %314, %324
%326 = fsub float %323, %325
%327 = call float @llvm.fabs.f32(float %326)
%328 = fmul float %327, %24
%329 = fcmp olt float %328, 0x3FC99999A0000000
%330 = and i1 %321, %329
%331 = select i1 %330, float 2.000000e+00, float 0.000000e+00
%332 = bitcast float %303 to i32
%333 = bitcast float %304 to i32
%334 = insertelement <2 x i32> undef, i32 %332, i32 0
%335 = insertelement <2 x i32> %334, i32 %333, i32 1
%336 = bitcast <8 x i32> %32 to <32 x i8>
%337 = bitcast <4 x i32> %34 to <16 x i8>
%338 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %335, <32 x i8> %336, <16 x i8> %337, i32 2)
%339 = extractelement <4 x float> %338, i32 0
%340 = fmul float %339, %331
%341 = fadd float %340, %301
%342 = fadd float %302, %331
%343 = fsub float %35, %181
%344 = fsub float %36, %182
%345 = bitcast float %343 to i32
%346 = bitcast float %344 to i32
%347 = insertelement <2 x i32> undef, i32 %345, i32 0
%348 = insertelement <2 x i32> %347, i32 %346, i32 1
%349 = bitcast <8 x i32> %28 to <32 x i8>
%350 = bitcast <4 x i32> %30 to <16 x i8>
%351 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %348, <32 x i8> %349, <16 x i8> %350, i32 2)
%352 = extractelement <4 x float> %351, i32 0
%353 = extractelement <4 x float> %351, i32 1
%354 = extractelement <4 x float> %351, i32 2
%355 = extractelement <4 x float> %351, i32 3
%356 = fsub float %53, %352
%357 = fsub float %54, %353
%358 = call float @llvm.fabs.f32(float %356)
%359 = call float @llvm.fabs.f32(float %357)
%360 = fadd float %358, %359
%361 = fcmp olt float %360, 0x3FB99999A0000000
%362 = fmul float %56, 0x3F70101020000000
%363 = fadd float %55, %362
%364 = fmul float %355, 0x3F70101020000000
%365 = fadd float %354, %364
%366 = fsub float %363, %365
%367 = call float @llvm.fabs.f32(float %366)
%368 = fmul float %367, %24
%369 = fcmp olt float %368, 0x3FC99999A0000000
%370 = and i1 %361, %369
%371 = select i1 %370, float 1.000000e+00, float 0.000000e+00
%372 = bitcast float %343 to i32
%373 = bitcast float %344 to i32
%374 = insertelement <2 x i32> undef, i32 %372, i32 0
%375 = insertelement <2 x i32> %374, i32 %373, i32 1
%376 = bitcast <8 x i32> %32 to <32 x i8>
%377 = bitcast <4 x i32> %34 to <16 x i8>
%378 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %375, <32 x i8> %376, <16 x i8> %377, i32 2)
%379 = extractelement <4 x float> %378, i32 0
%380 = fmul float %379, %371
%381 = fadd float %380, %341
%382 = fadd float %342, %371
%383 = fdiv float 1.000000e+00, %382
%384 = fmul float %381, %383
%385 = call i32 @llvm.SI.packf16(float %384, float %384)
%386 = bitcast i32 %385 to float
%387 = call i32 @llvm.SI.packf16(float %384, float %384)
%388 = bitcast i32 %387 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %386, float %388, float %386, float %388)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504
s_mov_b32 m0, s9 ; BEFC0309
v_mov_b32_e32 v2, 0x3dcccccd ; 7E0402FF 3DCCCCCD
v_mov_b32_e32 v3, 0x3b808081 ; 7E0602FF 3B808081
v_mov_b32_e32 v4, 0x3e4ccccd ; 7E0802FF 3E4CCCCD
v_mov_b32_e32 v5, 0x40400000 ; 7E0A02FF 40400000
v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000
v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001
v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100
v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s16, s[20:23], 0x2 ; C2081502
s_buffer_load_dword s17, s[20:23], 0xc ; C208950C
s_buffer_load_dword s18, s[20:23], 0xd ; C209150D
s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700
s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708
s_waitcnt lgkmcnt(0) ; BF8C007F
v_add_f32_e32 v0, s17, v6 ; 06000C11
v_add_f32_e32 v1, s18, v7 ; 06020E12
image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[0:3] ; F0800100 00010806
image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[20:27], s[12:15] ; F0800F00 00650906
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[20:27], s[12:15] ; F0800F00 00650D00
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v13, v13, v9 ; 0A1A130D
v_subrev_f32_e32 v14, v14, v10 ; 0A1C150E
v_mad_f32 v15, v3, v16, v15 ; D282000F 043E2103
image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[0:3] ; F0800100 00010000
v_mad_f32 v16, 2.0, s17, v6 ; D2820010 041822F4
v_mad_f32 v17, 2.0, s18, v7 ; D2820011 041C24F4
image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[20:27], s[12:15] ; F0800F00 00651210
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v1, v18, v9 ; 0A021312
v_subrev_f32_e32 v18, v19, v10 ; 0A241513
v_mad_f32 v19, v3, v21, v20 ; D2820013 04522B03
image_sample v16, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[4:11], s[0:3] ; F0800100 00011010
v_mad_f32 v20, s17, v5, v6 ; D2820014 041A0A11
v_mad_f32 v21, s18, v5, v7 ; D2820015 041E0A12
image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[20:27], s[12:15] ; F0800F00 00651614
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v17, v22, v9 ; 0A221316
v_subrev_f32_e32 v22, v23, v10 ; 0A2C1517
v_mad_f32 v23, v3, v25, v24 ; D2820017 04623303
image_sample v20, 1, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[4:11], s[0:3] ; F0800100 00011414
v_mad_f32 v24, 4.0, s17, v6 ; D2820018 041822F6
v_mad_f32 v25, 4.0, s18, v7 ; D2820019 041C24F6
image_sample v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[20:27], s[12:15] ; F0800F00 00651A18
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v21, v26, v9 ; 0A2A131A
v_subrev_f32_e32 v26, v27, v10 ; 0A34151B
v_mad_f32 v27, v3, v29, v28 ; D282001B 04723B03
image_sample v24, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[4:11], s[0:3] ; F0800100 00011818
v_subrev_f32_e32 v28, s17, v6 ; 0A380C11
v_subrev_f32_e32 v29, s18, v7 ; 0A3A0E12
image_sample v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[20:27], s[12:15] ; F0800F00 00651E1C
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v25, v30, v9 ; 0A32131E
v_subrev_f32_e32 v30, v31, v10 ; 0A3C151F
v_mad_f32 v31, v3, v33, v32 ; D282001F 04824303
image_sample v28, 1, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[4:11], s[0:3] ; F0800100 00011C1C
v_mad_f32 v32, -2.0, s17, v6 ; D2820020 041822F5
v_mad_f32 v33, -2.0, s18, v7 ; D2820021 041C24F5
image_sample v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[32:33], s[20:27], s[12:15] ; F0800F00 00652220
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v29, v34, v9 ; 0A3A1322
v_subrev_f32_e32 v34, v35, v10 ; 0A441523
v_mad_f32 v35, v3, v37, v36 ; D2820023 04924B03
image_sample v32, 1, 0, 0, 0, 0, 0, 0, 0, v[32:33], s[4:11], s[0:3] ; F0800100 00012020
v_mad_f32 v36, -s17, v5, v6 ; D2820024 241A0A11
v_mad_f32 v37, -s18, v5, v7 ; D2820025 241E0A12
image_sample v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[20:27], s[12:15] ; F0800F00 00652624
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v33, v38, v9 ; 0A421326
v_subrev_f32_e32 v38, v39, v10 ; 0A4C1527
v_mad_f32 v39, v3, v41, v40 ; D2820027 04A25303
v_mad_f32 v40, 4.0, -s17, v6 ; D2820028 441822F6
v_mad_f32 v41, 4.0, -s18, v7 ; D2820029 441C24F6
image_sample v6, 1, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[4:11], s[0:3] ; F0800100 00010624
v_mad_f32 v7, v3, v12, v11 ; D2820007 042E1903
image_sample v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[40:41], s[20:27], s[12:15] ; F0800F00 00652A28
s_waitcnt vmcnt(0) ; BF8C0770
v_subrev_f32_e32 v9, v42, v9 ; 0A12132A
v_subrev_f32_e32 v10, v43, v10 ; 0A14152B
v_mad_f32 v3, v3, v45, v44 ; D2820003 04B25B03
image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[40:41], s[4:11], s[0:3] ; F0800100 00010B28
v_add_f32_e64 v12, |v13|, |v14| ; D206030C 00021D0D
v_cmp_lt_f32_e32 vcc, v12, v2 ; 7C02050C
v_subrev_f32_e32 v12, v15, v7 ; 0A180F0F
v_mul_f32_e64 v12, |v12|, s16 ; D210010C 0000210C
v_cmp_lt_f32_e64 s[0:1], v12, v4 ; D0020000 0002090C
v_add_f32_e64 v1, |v1|, |v18| ; D2060301 00022501
v_cmp_lt_f32_e64 s[2:3], v1, v2 ; D0020002 00020501
v_subrev_f32_e32 v1, v19, v7 ; 0A020F13
v_mul_f32_e64 v1, |v1|, s16 ; D2100101 00002101
v_cmp_lt_f32_e64 s[4:5], v1, v4 ; D0020004 00020901
v_add_f32_e64 v1, |v17|, |v22| ; D2060301 00022D11
v_cmp_lt_f32_e64 s[6:7], v1, v2 ; D0020006 00020501
v_add_f32_e64 v1, |v21|, |v26| ; D2060301 00023515
v_subrev_f32_e32 v12, v23, v7 ; 0A180F17
v_mul_f32_e64 v12, |v12|, s16 ; D210010C 0000210C
v_cmp_lt_f32_e64 s[8:9], v12, v4 ; D0020008 0002090C
v_add_f32_e64 v12, |v25|, |v30| ; D206030C 00023D19
v_cmp_lt_f32_e64 s[10:11], v1, v2 ; D002000A 00020501
v_add_f32_e64 v1, |v29|, |v34| ; D2060301 0002451D
v_subrev_f32_e32 v13, v27, v7 ; 0A1A0F1B
v_mul_f32_e64 v13, |v13|, s16 ; D210010D 0000210D
v_cmp_lt_f32_e64 s[12:13], v13, v4 ; D002000C 0002090D
v_add_f32_e64 v13, |v33|, |v38| ; D206030D 00024D21
v_cmp_lt_f32_e64 s[14:15], v12, v2 ; D002000E 0002050C
v_cndmask_b32_e64 v12, 0, 4.0, s[0:1] ; D200000C 0001EC80
v_cndmask_b32_e32 v12, 0, v12 ; 00181880
v_subrev_f32_e32 v14, v31, v7 ; 0A1C0F1F
v_mul_f32_e64 v14, |v14|, s16 ; D210010E 0000210E
v_cmp_lt_f32_e32 vcc, v14, v4 ; 7C02090E
v_cndmask_b32_e64 v14, 0, v5, s[4:5] ; D200000E 00120A80
v_cndmask_b32_e64 v14, 0, v14, s[2:3] ; D200000E 000A1C80
v_cmp_lt_f32_e64 s[0:1], v1, v2 ; D0020000 00020501
v_cndmask_b32_e64 v1, 0, 2.0, s[8:9] ; D2000001 0021E880
v_cndmask_b32_e64 v1, 0, v1, s[6:7] ; D2000001 001A0280
v_subrev_f32_e32 v15, v35, v7 ; 0A1E0F23
v_mul_f32_e64 v15, |v15|, s16 ; D210010F 0000210F
v_cmp_lt_f32_e64 s[2:3], v15, v4 ; D0020002 0002090F
v_cndmask_b32_e64 v15, 0, 1.0, s[12:13] ; D200000F 0031E480
v_cndmask_b32_e64 v15, 0, v15, s[10:11] ; D200000F 002A1E80
v_cmp_lt_f32_e64 s[4:5], v13, v2 ; D0020004 0002050D
v_cndmask_b32_e64 v13, 0, 4.0, vcc ; D200000D 01A9EC80
v_cndmask_b32_e64 v13, 0, v13, s[14:15] ; D200000D 003A1A80
v_cndmask_b32_e64 v5, 0, v5, s[2:3] ; D2000005 000A0A80
v_cndmask_b32_e64 v5, 0, v5, s[0:1] ; D2000005 00020A80
v_subrev_f32_e32 v17, v39, v7 ; 0A220F27
v_mul_f32_e64 v17, |v17|, s16 ; D2100111 00002111
v_cmp_lt_f32_e32 vcc, v17, v4 ; 7C020911
v_cndmask_b32_e64 v17, 0, 2.0, vcc ; D2000011 01A9E880
v_cndmask_b32_e64 v17, 0, v17, s[4:5] ; D2000011 00122280
v_add_f32_e64 v9, |v9|, |v10| ; D2060309 00021509
v_subrev_f32_e32 v3, v3, v7 ; 0A060F03
v_mul_f32_e64 v3, |v3|, s16 ; D2100103 00002103
v_cmp_lt_f32_e32 vcc, v3, v4 ; 7C020903
v_cndmask_b32_e64 v3, 0, 1.0, vcc ; D2000003 01A9E480
v_cmp_lt_f32_e32 vcc, v9, v2 ; 7C020509
v_cndmask_b32_e32 v2, 0, v3 ; 00040680
v_mov_b32_e32 v3, 0x40a00000 ; 7E0602FF 40A00000
v_mul_f32_e32 v4, v3, v8 ; 10081103
v_mac_f32_e32 v4, v12, v0 ; 3E08010C
v_add_f32_e32 v0, v3, v12 ; 06001903
v_mac_f32_e32 v4, v14, v16 ; 3E08210E
v_add_f32_e32 v0, v14, v0 ; 0600010E
v_mac_f32_e32 v4, v1, v20 ; 3E082901
v_add_f32_e32 v0, v1, v0 ; 06000101
v_mac_f32_e32 v4, v15, v24 ; 3E08310F
v_add_f32_e32 v0, v15, v0 ; 0600010F
v_mac_f32_e32 v4, v13, v28 ; 3E08390D
v_add_f32_e32 v0, v13, v0 ; 0600010D
v_add_f32_e32 v0, v5, v0 ; 06000105
v_add_f32_e32 v0, v17, v0 ; 06000111
v_add_f32_e32 v0, v2, v0 ; 06000102
v_rcp_f32_e32 v0, v0 ; 7E005500
v_mac_f32_e32 v4, v5, v32 ; 3E084105
v_mac_f32_e32 v4, v17, v6 ; 3E080D11
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v4, v2, v11 ; 3E081702
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 48
Code Size: 1012 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], IMM[0].xyyy, IN[1].xxxx
5: MUL TEMP[2], IMM[0].yxyy, IN[1].yyyy
6: ADD TEMP[3].xy, TEMP[1], TEMP[2]
7: MOV TEMP[3].xy, TEMP[3].xyxx
8: ADD TEMP[1].xy, TEMP[1], TEMP[2]
9: MOV TEMP[3].zw, TEMP[1].yyxy
10: MOV OUT[1], TEMP[3]
11: MOV OUT[0], TEMP[0]
12: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
%71 = fmul float %41, 0.000000e+00
%72 = fmul float %42, 0.000000e+00
%73 = fadd float %41, %72
%74 = fadd float %71, %42
%75 = fadd float %41, %72
%76 = fadd float %71, %42
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %73, float %74, float %75, float %76)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
v_mac_f32_e32 v0, s8, v2 ; 3E000408
v_mul_f32_e32 v7, s5, v1 ; 100E0205
v_mac_f32_e32 v7, s9, v2 ; 3E0E0409
v_mul_f32_e32 v8, s6, v1 ; 10100206
v_mac_f32_e32 v8, s10, v2 ; 3E10040A
v_mul_f32_e32 v1, s7, v1 ; 10020207
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v7, s13, v3 ; 3E0E060D
v_mac_f32_e32 v8, s14, v3 ; 3E10060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v7, s17, v4 ; 3E0E0811
v_mac_f32_e32 v8, s18, v4 ; 3E100812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
v_mad_f32 v2, 0, v6, v5 ; D2820002 04160C80
v_mac_f32_e32 v6, 0, v5 ; 3E0C0A80
exp 15, 32, 0, 0, 0, v2, v6, v2, v6 ; F800020F 06020602
exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 204 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL TEMP[0..2], LOCAL
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].w, TEMP[0].wwww
3: MOV TEMP[2].xy, IN[0].zwww
4: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D
5: POW TEMP[2].x, TEMP[2].xxxx, CONST[0].wwww
6: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xxxx
7: MOV OUT[0], TEMP[1]
8: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0
%27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0
%29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%30 = bitcast <8 x i32> addrspace(2)* %29 to <32 x i8> addrspace(2)*
%31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0
%32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%33 = bitcast <4 x i32> addrspace(2)* %32 to <16 x i8> addrspace(2)*
%34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0
%35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%39 = bitcast float %35 to i32
%40 = bitcast float %36 to i32
%41 = insertelement <2 x i32> undef, i32 %39, i32 0
%42 = insertelement <2 x i32> %41, i32 %40, i32 1
%43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %26, <16 x i8> %28, i32 2)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = extractelement <4 x float> %43, i32 2
%47 = extractelement <4 x float> %43, i32 3
%48 = bitcast float %37 to i32
%49 = bitcast float %38 to i32
%50 = insertelement <2 x i32> undef, i32 %48, i32 0
%51 = insertelement <2 x i32> %50, i32 %49, i32 1
%52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %31, <16 x i8> %34, i32 2)
%53 = extractelement <4 x float> %52, i32 0
%54 = call float @llvm.pow.f32(float %53, float %24)
%55 = fmul float %44, %54
%56 = fmul float %45, %54
%57 = fmul float %46, %54
%58 = call i32 @llvm.SI.packf16(float %55, float %56)
%59 = bitcast i32 %58 to float
%60 = call i32 @llvm.SI.packf16(float %57, float %47)
%61 = bitcast i32 %60 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %59, float %61, float %59, float %61)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504
s_mov_b32 m0, s9 ; BEFC0309
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103
s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700
s_load_dwordx8 s[4:11], s[6:7], 0x8 ; C0C20708
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[12:15] ; F0800F00 00650002
image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[16:19] ; F0800100 00810404
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v4, v4 ; 7E084F04
v_mul_legacy_f32_e32 v4, s0, v4 ; 0E080800
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_cvt_pkrtz_f16_f32_e32 v2, v2, v3 ; 5E040702
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 8
Code Size: 136 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..16]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[13], IN[0].xxxx
1: MAD TEMP[0], CONST[14], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[15], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[16], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].x, CONST[7].xxxx
5: MOV TEMP[1].y, CONST[8].xxxx
6: MOV TEMP[1].z, CONST[9].xxxx
7: MOV TEMP[2].x, CONST[7].yyyy
8: MOV TEMP[2].y, CONST[8].yyyy
9: MOV TEMP[2].z, CONST[9].yyyy
10: MOV TEMP[3].x, CONST[7].zzzz
11: MOV TEMP[3].y, CONST[8].zzzz
12: MOV TEMP[3].z, CONST[9].zzzz
13: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx
14: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz
15: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz
16: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
17: RSQ TEMP[2].x, TEMP[2].xxxx
18: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
19: MOV TEMP[2].w, IMM[0].xxxx
20: MOV TEMP[2].xyz, TEMP[1].xyzx
21: DP4 TEMP[3].x, CONST[0], TEMP[2]
22: DP4 TEMP[4].x, CONST[1], TEMP[2]
23: MOV TEMP[3].y, TEMP[4].xxxx
24: DP4 TEMP[2].x, CONST[2], TEMP[2]
25: MOV TEMP[3].z, TEMP[2].xxxx
26: MUL TEMP[2], TEMP[1].xyzz, TEMP[1].yzzx
27: DP4 TEMP[4].x, CONST[3], TEMP[2]
28: DP4 TEMP[5].x, CONST[4], TEMP[2]
29: MOV TEMP[4].y, TEMP[5].xxxx
30: DP4 TEMP[2].x, CONST[5], TEMP[2]
31: MOV TEMP[4].z, TEMP[2].xxxx
32: MUL TEMP[2].x, TEMP[1].yyyy, TEMP[1].yyyy
33: MAD TEMP[2].x, TEMP[1].xxxx, TEMP[1].xxxx, -TEMP[2].xxxx
34: MAD TEMP[2].xyz, CONST[6].xyzz, TEMP[2].xxxx, TEMP[4].xyzz
35: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz
36: MAD TEMP[3].xy, IN[2].xyyy, CONST[12].xyyy, CONST[12].zwww
37: MOV TEMP[3].w, TEMP[1].xxxx
38: MOV TEMP[1].xy, TEMP[1].yzyy
39: MOV TEMP[1].zw, TEMP[2].yyxy
40: MOV TEMP[2].x, TEMP[2].zzzz
41: MAD TEMP[4].x, TEMP[0].zzzz, CONST[11].zzzz, CONST[11].wwww
42: MOV TEMP[3].z, TEMP[4].xxxx
43: MOV OUT[3], TEMP[2]
44: MOV OUT[2], TEMP[1]
45: MOV OUT[0], TEMP[0]
46: MOV OUT[1], TEMP[3]
47: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0
%73 = add i32 %5, %7
%74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73)
%75 = extractelement <4 x float> %74, i32 0
%76 = extractelement <4 x float> %74, i32 1
%77 = extractelement <4 x float> %74, i32 2
%78 = extractelement <4 x float> %74, i32 3
%79 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%80 = load <16 x i8>, <16 x i8> addrspace(2)* %79, align 16, !tbaa !0
%81 = add i32 %5, %7
%82 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %80, i32 0, i32 %81)
%83 = extractelement <4 x float> %82, i32 0
%84 = extractelement <4 x float> %82, i32 1
%85 = extractelement <4 x float> %82, i32 2
%86 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0
%88 = add i32 %5, %7
%89 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %87, i32 0, i32 %88)
%90 = extractelement <4 x float> %89, i32 0
%91 = extractelement <4 x float> %89, i32 1
%92 = fmul float %55, %75
%93 = fmul float %56, %75
%94 = fmul float %57, %75
%95 = fmul float %58, %75
%96 = fmul float %59, %76
%97 = fadd float %96, %92
%98 = fmul float %60, %76
%99 = fadd float %98, %93
%100 = fmul float %61, %76
%101 = fadd float %100, %94
%102 = fmul float %62, %76
%103 = fadd float %102, %95
%104 = fmul float %63, %77
%105 = fadd float %104, %97
%106 = fmul float %64, %77
%107 = fadd float %106, %99
%108 = fmul float %65, %77
%109 = fadd float %108, %101
%110 = fmul float %66, %77
%111 = fadd float %110, %103
%112 = fmul float %67, %78
%113 = fadd float %112, %105
%114 = fmul float %68, %78
%115 = fadd float %114, %107
%116 = fmul float %69, %78
%117 = fadd float %116, %109
%118 = fmul float %70, %78
%119 = fadd float %118, %111
%120 = fmul float %40, %83
%121 = fmul float %43, %83
%122 = fmul float %46, %83
%123 = fmul float %41, %84
%124 = fadd float %123, %120
%125 = fmul float %44, %84
%126 = fadd float %125, %121
%127 = fmul float %47, %84
%128 = fadd float %127, %122
%129 = fmul float %42, %85
%130 = fadd float %129, %124
%131 = fmul float %45, %85
%132 = fadd float %131, %126
%133 = fmul float %48, %85
%134 = fadd float %133, %128
%135 = fmul float %130, %130
%136 = fmul float %132, %132
%137 = fadd float %136, %135
%138 = fmul float %134, %134
%139 = fadd float %137, %138
%140 = call float @llvm.AMDGPU.rsq.clamped.f32(float %139)
%141 = fmul float %130, %140
%142 = fmul float %132, %140
%143 = fmul float %134, %140
%144 = fmul float %13, %141
%145 = fmul float %14, %142
%146 = fadd float %144, %145
%147 = fmul float %15, %143
%148 = fadd float %146, %147
%149 = fadd float %148, %16
%150 = fmul float %17, %141
%151 = fmul float %18, %142
%152 = fadd float %150, %151
%153 = fmul float %19, %143
%154 = fadd float %152, %153
%155 = fadd float %154, %20
%156 = fmul float %21, %141
%157 = fmul float %22, %142
%158 = fadd float %156, %157
%159 = fmul float %23, %143
%160 = fadd float %158, %159
%161 = fadd float %160, %24
%162 = fmul float %141, %142
%163 = fmul float %142, %143
%164 = fmul float %143, %143
%165 = fmul float %143, %141
%166 = fmul float %25, %162
%167 = fmul float %26, %163
%168 = fadd float %166, %167
%169 = fmul float %27, %164
%170 = fadd float %168, %169
%171 = fmul float %28, %165
%172 = fadd float %170, %171
%173 = fmul float %29, %162
%174 = fmul float %30, %163
%175 = fadd float %173, %174
%176 = fmul float %31, %164
%177 = fadd float %175, %176
%178 = fmul float %32, %165
%179 = fadd float %177, %178
%180 = fmul float %33, %162
%181 = fmul float %34, %163
%182 = fadd float %180, %181
%183 = fmul float %35, %164
%184 = fadd float %182, %183
%185 = fmul float %36, %165
%186 = fadd float %184, %185
%187 = fmul float %142, %142
%188 = fmul float %141, %141
%189 = fsub float %188, %187
%190 = fmul float %37, %189
%191 = fadd float %190, %172
%192 = fmul float %38, %189
%193 = fadd float %192, %179
%194 = fmul float %39, %189
%195 = fadd float %194, %186
%196 = fadd float %191, %149
%197 = fadd float %193, %155
%198 = fadd float %195, %161
%199 = fmul float %90, %51
%200 = fadd float %199, %53
%201 = fmul float %91, %52
%202 = fadd float %201, %54
%203 = fmul float %117, %49
%204 = fadd float %203, %50
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %200, float %202, float %204, float %141)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %142, float %143, float %196, float %197)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %198, float %197, float %198, float %165)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %113, float %115, float %117, float %119)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132
s_buffer_load_dword s5, s[0:3], 0x30 ; C2028130
s_buffer_load_dword s6, s[0:3], 0x33 ; C2030133
s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C
s_buffer_load_dword s8, s[0:3], 0x31 ; C2040131
s_buffer_load_dword s9, s[0:3], 0x34 ; C2048134
s_buffer_load_dword s10, s[0:3], 0x35 ; C2050135
s_buffer_load_dword s11, s[0:3], 0x36 ; C2058136
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s4 ; 7E000204
s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D
s_buffer_load_dword s12, s[0:3], 0x20 ; C2060120
s_buffer_load_dword s13, s[0:3], 0x21 ; C2068121
v_mac_f32_e32 v0, s5, v8 ; 3E001005
v_mov_b32_e32 v8, s6 ; 7E100206
s_buffer_load_dword s5, s[0:3], 0x24 ; C2028124
s_buffer_load_dword s6, s[0:3], 0x25 ; C2030125
v_mac_f32_e32 v8, s8, v9 ; 3E101208
s_buffer_load_dword s8, s[0:3], 0x1e ; C204011E
v_mul_f32_e32 v9, s7, v5 ; 10120A07
s_buffer_load_dword s7, s[0:3], 0x22 ; C2038122
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v9, s4, v6 ; 3E120C04
v_mul_f32_e32 v10, s12, v5 ; 10140A0C
v_mac_f32_e32 v10, s13, v6 ; 3E140C0D
s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126
v_mul_f32_e32 v5, s5, v5 ; 100A0A05
v_mac_f32_e32 v5, s6, v6 ; 3E0A0C06
s_buffer_load_dword s5, s[0:3], 0x38 ; C2028138
v_mac_f32_e32 v9, s8, v7 ; 3E120E08
s_buffer_load_dword s6, s[0:3], 0x39 ; C2030139
v_mac_f32_e32 v10, s7, v7 ; 3E140E07
s_buffer_load_dword s7, s[0:3], 0x3a ; C203813A
s_buffer_load_dword s8, s[0:3], 0x2e ; C204012E
s_buffer_load_dword s12, s[0:3], 0x2f ; C206012F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v5, s4, v7 ; 3E0A0E04
v_mul_f32_e32 v6, s9, v1 ; 100C0209
s_buffer_load_dword s4, s[0:3], 0x3b ; C202013B
v_mac_f32_e32 v6, s5, v2 ; 3E0C0405
v_mul_f32_e32 v7, s10, v1 ; 100E020A
v_mac_f32_e32 v7, s6, v2 ; 3E0E0406
v_mul_f32_e32 v11, s11, v1 ; 1016020B
s_buffer_load_dword s5, s[0:3], 0x37 ; C2028137
v_mac_f32_e32 v11, s7, v2 ; 3E160407
s_buffer_load_dword s6, s[0:3], 0x3c ; C203013C
s_buffer_load_dword s7, s[0:3], 0x3d ; C203813D
s_buffer_load_dword s9, s[0:3], 0x3e ; C204813E
s_buffer_load_dword s10, s[0:3], 0x3f ; C205013F
s_buffer_load_dword s11, s[0:3], 0x40 ; C2058140
s_buffer_load_dword s13, s[0:3], 0x41 ; C2068141
s_buffer_load_dword s14, s[0:3], 0x42 ; C2070142
s_buffer_load_dword s15, s[0:3], 0x43 ; C2078143
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s5, v1 ; 10020205
v_mac_f32_e32 v1, s4, v2 ; 3E020404
v_mac_f32_e32 v6, s6, v3 ; 3E0C0606
v_mac_f32_e32 v7, s7, v3 ; 3E0E0607
v_mac_f32_e32 v11, s9, v3 ; 3E160609
v_mac_f32_e32 v1, s10, v3 ; 3E02060A
v_mac_f32_e32 v6, s11, v4 ; 3E0C080B
v_mac_f32_e32 v7, s13, v4 ; 3E0E080D
v_mac_f32_e32 v11, s14, v4 ; 3E16080E
v_mac_f32_e32 v1, s15, v4 ; 3E02080F
s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119
s_buffer_load_dword s5, s[0:3], 0x1a ; C202811A
v_mul_f32_e32 v2, v9, v9 ; 10041309
v_mac_f32_e32 v2, v10, v10 ; 3E04150A
v_mac_f32_e32 v2, v5, v5 ; 3E040B05
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101
s_buffer_load_dword s9, s[0:3], 0x2 ; C2048102
s_buffer_load_dword s10, s[0:3], 0x3 ; C2050103
s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104
s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105
s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106
s_buffer_load_dword s15, s[0:3], 0x7 ; C2078107
s_buffer_load_dword s16, s[0:3], 0x8 ; C2080108
s_buffer_load_dword s17, s[0:3], 0x9 ; C2088109
s_buffer_load_dword s18, s[0:3], 0xa ; C209010A
s_buffer_load_dword s19, s[0:3], 0xb ; C209810B
s_buffer_load_dword s20, s[0:3], 0xc ; C20A010C
s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D
s_buffer_load_dword s22, s[0:3], 0xe ; C20B010E
s_buffer_load_dword s23, s[0:3], 0x14 ; C20B8114
s_buffer_load_dword s24, s[0:3], 0x15 ; C20C0115
s_buffer_load_dword s25, s[0:3], 0xf ; C20C810F
s_buffer_load_dword s26, s[0:3], 0x10 ; C20D0110
s_buffer_load_dword s27, s[0:3], 0x11 ; C20D8111
s_buffer_load_dword s28, s[0:3], 0x12 ; C20E0112
s_buffer_load_dword s29, s[0:3], 0x13 ; C20E8113
s_buffer_load_dword s30, s[0:3], 0x16 ; C20F0116
s_buffer_load_dword s31, s[0:3], 0x17 ; C20F8117
s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118
v_mul_f32_e32 v3, v2, v10 ; 10061502
v_mul_f32_e32 v4, v2, v5 ; 10080B02
v_mul_f32_e32 v5, v4, v3 ; 100A0704
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v10, s21, v5 ; 10140A15
v_mul_f32_e32 v12, s27, v5 ; 10180A1B
v_mul_f32_e32 v5, s24, v5 ; 100A0A18
v_mul_f32_e32 v2, v2, v9 ; 10041302
v_mul_f32_e32 v9, v3, v2 ; 10120503
v_mac_f32_e32 v10, s20, v9 ; 3E141214
v_mac_f32_e32 v12, s26, v9 ; 3E18121A
v_mac_f32_e32 v5, s23, v9 ; 3E0A1217
v_mul_f32_e32 v9, v4, v4 ; 10120904
v_mac_f32_e32 v10, s22, v9 ; 3E141216
v_mac_f32_e32 v12, s28, v9 ; 3E18121C
v_mac_f32_e32 v5, s30, v9 ; 3E0A121E
v_mul_f32_e32 v9, v2, v4 ; 10120902
v_mac_f32_e32 v10, s25, v9 ; 3E141219
v_mac_f32_e32 v12, s29, v9 ; 3E18121D
v_mac_f32_e32 v5, s31, v9 ; 3E0A121F
v_mul_f32_e32 v13, v3, v3 ; 101A0703
v_mad_f32 v13, v2, v2, -v13 ; D282000D 84360502
v_mac_f32_e32 v10, s0, v13 ; 3E141A00
v_mac_f32_e32 v12, s4, v13 ; 3E181A04
v_mac_f32_e32 v5, s5, v13 ; 3E0A1A05
v_mov_b32_e32 v13, s12 ; 7E1A020C
v_mac_f32_e32 v13, s8, v11 ; 3E1A1608
v_mul_f32_e32 v14, s7, v3 ; 101C0607
v_mac_f32_e32 v14, s6, v2 ; 3E1C0406
v_mul_f32_e32 v15, s13, v3 ; 101E060D
v_mac_f32_e32 v15, s11, v2 ; 3E1E040B
v_mul_f32_e32 v16, s17, v3 ; 10200611
v_mac_f32_e32 v16, s16, v2 ; 3E200410
v_mac_f32_e32 v14, s9, v4 ; 3E1C0809
v_mac_f32_e32 v15, s14, v4 ; 3E1E080E
v_mac_f32_e32 v16, s18, v4 ; 3E200812
v_add_f32_e32 v14, s10, v14 ; 061C1C0A
v_add_f32_e32 v15, s15, v15 ; 061E1E0F
v_add_f32_e32 v16, s19, v16 ; 06202013
exp 15, 32, 0, 0, 0, v0, v8, v13, v2 ; F800020F 020D0800
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, v14, v10 ; 0600150E
v_add_f32_e32 v2, v15, v12 ; 0604190F
exp 15, 33, 0, 0, 0, v3, v4, v0, v2 ; F800021F 02000403
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, v16, v5 ; 06000B10
exp 15, 34, 0, 0, 0, v0, v2, v0, v9 ; F800022F 09000200
exp 15, 12, 0, 1, 0, v6, v7, v11, v1 ; F80008CF 010B0706
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 20
Code Size: 652 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..3]
DCL CONST[5]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { 2.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[0].wwww
1: MOV TEMP[0].yz, IN[1].yxyy
2: MOV TEMP[1].xy, IN[1].zwzz
3: MOV TEMP[1].z, IN[2].xxxx
4: MOV TEMP[2].xy, IN[0].xyyy
5: TEX TEMP[2], TEMP[2], SAMP[0], 2D
6: ADD TEMP[3].x, IMM[0].xxxx, -CONST[3].wwww
7: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx
8: FSLT TEMP[4].x, TEMP[2].wwww, CONST[5].xxxx
9: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy
10: KILL_IF -TEMP[4].xxxx
11: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[2].xyzz
12: DP3 TEMP[0].x, TEMP[0].xyzz, CONST[0].xyzz
13: MAX TEMP[0].x, IMM[0].zzzz, TEMP[0].xxxx
14: MUL TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xxxx
15: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[1].xyzz, TEMP[0].xyzz
16: MOV TEMP[1].w, TEMP[2].wwww
17: MOV_SAT TEMP[2].x, IN[0].zzzz
18: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[1].xyzz
19: MOV OUT[0], TEMP[1]
20: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%35 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0
%37 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%48 = bitcast float %39 to i32
%49 = bitcast float %40 to i32
%50 = insertelement <2 x i32> undef, i32 %48, i32 0
%51 = insertelement <2 x i32> %50, i32 %49, i32 1
%52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %36, <16 x i8> %38, i32 2)
%53 = extractelement <4 x float> %52, i32 0
%54 = extractelement <4 x float> %52, i32 1
%55 = extractelement <4 x float> %52, i32 2
%56 = extractelement <4 x float> %52, i32 3
%57 = fsub float 2.000000e+00, %33
%58 = fmul float %53, %57
%59 = fmul float %54, %57
%60 = fmul float %55, %57
%61 = fcmp olt float %56, %34
%62 = select i1 %61, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %62)
%63 = fmul float %58, %30
%64 = fmul float %59, %31
%65 = fmul float %60, %32
%66 = fmul float %42, %24
%67 = fmul float %43, %25
%68 = fadd float %67, %66
%69 = fmul float %44, %26
%70 = fadd float %68, %69
%71 = call float @llvm.maxnum.f32(float %70, float 0.000000e+00)
%72 = fmul float %63, %71
%73 = fmul float %64, %71
%74 = fmul float %65, %71
%75 = fmul float %58, %45
%76 = fadd float %75, %72
%77 = fmul float %59, %46
%78 = fadd float %77, %73
%79 = fmul float %60, %47
%80 = fadd float %79, %74
%81 = call float @llvm.AMDIL.clamp.(float %41, float 0.000000e+00, float 1.000000e+00)
%82 = call float @llvm.AMDGPU.lrp(float %81, float %76, float %27)
%83 = call float @llvm.AMDGPU.lrp(float %81, float %78, float %28)
%84 = call float @llvm.AMDGPU.lrp(float %81, float %80, float %29)
%85 = call i32 @llvm.SI.packf16(float %82, float %83)
%86 = bitcast i32 %85 to float
%87 = call i32 @llvm.SI.packf16(float %84, float %56)
%88 = bitcast i32 %87 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %86, float %88, float %86, float %88)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v0, v0, 0, 2, [m0] ; C8000800
v_interp_p2_f32 v0, [v0], v1, 0, 2, [m0] ; C8010801
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430A02
s_buffer_load_dword s4, s[0:3], 0xf ; C202010F
s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114
s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106
s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108
s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109
s_buffer_load_dword s9, s[0:3], 0xa ; C204810A
s_buffer_load_dword s10, s[0:3], 0x0 ; C2050100
s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101
s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102
s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104
s_buffer_load_dword s0, s[0:3], 0x5 ; C2000105
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_gt_f32_e32 vcc, s5, v13 ; 7C081A05
v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680
v_sub_f32_e64 v2, 2.0, s4 ; D2080002 000008F4
v_mul_f32_e32 v3, v2, v10 ; 10061502
v_mul_f32_e32 v10, v2, v11 ; 10141702
v_mul_f32_e32 v2, v2, v12 ; 10041902
v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280
v_mul_f32_e32 v1, s10, v5 ; 10020A0A
v_mac_f32_e32 v1, s11, v6 ; 3E020C0B
v_mac_f32_e32 v1, s12, v7 ; 3E020E0C
v_mul_f32_e32 v5, s9, v2 ; 100A0409
v_max_f32_e32 v1, 0, v1 ; 20020280
v_mul_f32_e32 v5, v1, v5 ; 100A0B01
v_mac_f32_e32 v5, v0, v2 ; 3E0A0500
v_add_f32_e64 v0, 0, v4 clamp ; D2060800 00020880
v_sub_f32_e32 v2, 1.0, v0 ; 080400F2
v_mul_f32_e32 v4, s6, v2 ; 10080406
v_mac_f32_e32 v4, v5, v0 ; 3E080105
v_cvt_pkrtz_f16_f32_e32 v4, v4, v13 ; 5E081B04
v_mul_f32_e32 v5, s7, v3 ; 100A0607
v_mul_f32_e32 v5, v1, v5 ; 100A0B01
v_mac_f32_e32 v5, v8, v3 ; 3E0A0708
v_mul_f32_e32 v3, s8, v10 ; 10061408
v_mul_f32_e32 v1, v1, v3 ; 10020701
v_mac_f32_e32 v1, v9, v10 ; 3E021509
v_mul_f32_e32 v3, s13, v2 ; 1006040D
v_mac_f32_e32 v3, v5, v0 ; 3E060105
v_mul_f32_e32 v2, s0, v2 ; 10040400
v_mac_f32_e32 v2, v1, v0 ; 3E040101
v_cvt_pkrtz_f16_f32_e32 v0, v3, v2 ; 5E000503
exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 296 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..28]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[25], IN[0].xxxx
1: MAD TEMP[0], CONST[26], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[27], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[28], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[15], IN[0].xxxx
5: MAD TEMP[1], CONST[16], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[17], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1].xyz, CONST[18], IN[0].wwww, TEMP[1]
8: MOV TEMP[2].x, CONST[19].xxxx
9: MOV TEMP[2].y, CONST[20].xxxx
10: MOV TEMP[2].z, CONST[21].xxxx
11: MOV TEMP[3].x, CONST[19].yyyy
12: MOV TEMP[3].y, CONST[20].yyyy
13: MOV TEMP[3].z, CONST[21].yyyy
14: MOV TEMP[4].x, CONST[19].zzzz
15: MOV TEMP[4].y, CONST[20].zzzz
16: MOV TEMP[4].z, CONST[21].zzzz
17: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx
18: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz
19: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz
20: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
21: RSQ TEMP[3].x, TEMP[3].xxxx
22: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
23: MOV TEMP[3].w, IMM[0].xxxx
24: MOV TEMP[3].xyz, TEMP[2].xyzx
25: DP4 TEMP[4].x, CONST[8], TEMP[3]
26: DP4 TEMP[5].x, CONST[9], TEMP[3]
27: MOV TEMP[4].y, TEMP[5].xxxx
28: DP4 TEMP[3].x, CONST[10], TEMP[3]
29: MOV TEMP[4].z, TEMP[3].xxxx
30: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx
31: DP4 TEMP[5].x, CONST[11], TEMP[3]
32: DP4 TEMP[6].x, CONST[12], TEMP[3]
33: MOV TEMP[5].y, TEMP[6].xxxx
34: DP4 TEMP[3].x, CONST[13], TEMP[3]
35: MOV TEMP[5].z, TEMP[3].xxxx
36: ADD TEMP[3], CONST[0], -TEMP[1].xxxx
37: ADD TEMP[6], CONST[1], -TEMP[1].yyyy
38: ADD TEMP[1], CONST[2], -TEMP[1].zzzz
39: MUL TEMP[7], TEMP[3], TEMP[3]
40: MAD TEMP[7], TEMP[6], TEMP[6], TEMP[7]
41: MAD TEMP[7], TEMP[1], TEMP[1], TEMP[7]
42: MUL TEMP[3], TEMP[3], TEMP[2].xxxx
43: MAD TEMP[3], TEMP[6], TEMP[2].yyyy, TEMP[3]
44: MAD TEMP[1], TEMP[1], TEMP[2].zzzz, TEMP[3]
45: RSQ TEMP[3].x, TEMP[7].xxxx
46: RSQ TEMP[3].y, TEMP[7].yyyy
47: RSQ TEMP[3].z, TEMP[7].zzzz
48: RSQ TEMP[3].w, TEMP[7].wwww
49: MUL TEMP[1], TEMP[1], TEMP[3]
50: MAX TEMP[1], IMM[0].yyyy, TEMP[1]
51: MAD TEMP[3], TEMP[7], CONST[3], IMM[0].xxxx
52: RCP TEMP[6].x, TEMP[3].xxxx
53: RCP TEMP[6].y, TEMP[3].yyyy
54: RCP TEMP[6].z, TEMP[3].zzzz
55: RCP TEMP[6].w, TEMP[3].wwww
56: MUL TEMP[1], TEMP[1], TEMP[6]
57: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy
58: MAD TEMP[3].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx
59: MAD TEMP[3].xyz, CONST[14].xyzz, TEMP[3].xxxx, TEMP[5].xyzz
60: ADD TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz
61: MUL TEMP[4].xyz, CONST[4].xyzz, TEMP[1].xxxx
62: MAD TEMP[4].xyz, CONST[5].xyzz, TEMP[1].yyyy, TEMP[4].xyzz
63: MAD TEMP[4].xyz, CONST[6].xyzz, TEMP[1].zzzz, TEMP[4].xyzz
64: MAD TEMP[1].xyz, CONST[7].xyzz, TEMP[1].wwww, TEMP[4].xyzz
65: ADD TEMP[1].xyz, TEMP[3].xyzz, TEMP[1].xyzz
66: MAD TEMP[3].xy, IN[2].xyyy, CONST[24].xyyy, CONST[24].zwww
67: MOV TEMP[3].w, TEMP[2].xxxx
68: MOV TEMP[2].xy, TEMP[2].yzyy
69: MOV TEMP[2].zw, TEMP[1].yyxy
70: MOV TEMP[1].x, TEMP[1].zzzz
71: MAD TEMP[4].x, TEMP[0].zzzz, CONST[23].zzzz, CONST[23].wwww
72: MOV TEMP[3].z, TEMP[4].xxxx
73: MOV OUT[3], TEMP[1]
74: MOV OUT[2], TEMP[2]
75: MOV OUT[0], TEMP[0]
76: MOV OUT[1], TEMP[3]
77: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400)
%96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404)
%97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408)
%98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412)
%99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416)
%100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420)
%101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424)
%102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 428)
%103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 432)
%104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 436)
%105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 440)
%106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 444)
%107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 448)
%108 = call float @llvm.SI.load.const(<16 x i8> %12, i32 452)
%109 = call float @llvm.SI.load.const(<16 x i8> %12, i32 456)
%110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 460)
%111 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0
%113 = add i32 %5, %7
%114 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %112, i32 0, i32 %113)
%115 = extractelement <4 x float> %114, i32 0
%116 = extractelement <4 x float> %114, i32 1
%117 = extractelement <4 x float> %114, i32 2
%118 = extractelement <4 x float> %114, i32 3
%119 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%120 = load <16 x i8>, <16 x i8> addrspace(2)* %119, align 16, !tbaa !0
%121 = add i32 %5, %7
%122 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %120, i32 0, i32 %121)
%123 = extractelement <4 x float> %122, i32 0
%124 = extractelement <4 x float> %122, i32 1
%125 = extractelement <4 x float> %122, i32 2
%126 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%127 = load <16 x i8>, <16 x i8> addrspace(2)* %126, align 16, !tbaa !0
%128 = add i32 %5, %7
%129 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %127, i32 0, i32 %128)
%130 = extractelement <4 x float> %129, i32 0
%131 = extractelement <4 x float> %129, i32 1
%132 = fmul float %95, %115
%133 = fmul float %96, %115
%134 = fmul float %97, %115
%135 = fmul float %98, %115
%136 = fmul float %99, %116
%137 = fadd float %136, %132
%138 = fmul float %100, %116
%139 = fadd float %138, %133
%140 = fmul float %101, %116
%141 = fadd float %140, %134
%142 = fmul float %102, %116
%143 = fadd float %142, %135
%144 = fmul float %103, %117
%145 = fadd float %144, %137
%146 = fmul float %104, %117
%147 = fadd float %146, %139
%148 = fmul float %105, %117
%149 = fadd float %148, %141
%150 = fmul float %106, %117
%151 = fadd float %150, %143
%152 = fmul float %107, %118
%153 = fadd float %152, %145
%154 = fmul float %108, %118
%155 = fadd float %154, %147
%156 = fmul float %109, %118
%157 = fadd float %156, %149
%158 = fmul float %110, %118
%159 = fadd float %158, %151
%160 = fmul float %68, %115
%161 = fmul float %69, %115
%162 = fmul float %70, %115
%163 = fmul float %71, %116
%164 = fadd float %163, %160
%165 = fmul float %72, %116
%166 = fadd float %165, %161
%167 = fmul float %73, %116
%168 = fadd float %167, %162
%169 = fmul float %74, %117
%170 = fadd float %169, %164
%171 = fmul float %75, %117
%172 = fadd float %171, %166
%173 = fmul float %76, %117
%174 = fadd float %173, %168
%175 = fmul float %77, %118
%176 = fadd float %175, %170
%177 = fmul float %78, %118
%178 = fadd float %177, %172
%179 = fmul float %79, %118
%180 = fadd float %179, %174
%181 = fmul float %80, %123
%182 = fmul float %83, %123
%183 = fmul float %86, %123
%184 = fmul float %81, %124
%185 = fadd float %184, %181
%186 = fmul float %84, %124
%187 = fadd float %186, %182
%188 = fmul float %87, %124
%189 = fadd float %188, %183
%190 = fmul float %82, %125
%191 = fadd float %190, %185
%192 = fmul float %85, %125
%193 = fadd float %192, %187
%194 = fmul float %88, %125
%195 = fadd float %194, %189
%196 = fmul float %191, %191
%197 = fmul float %193, %193
%198 = fadd float %197, %196
%199 = fmul float %195, %195
%200 = fadd float %198, %199
%201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200)
%202 = fmul float %191, %201
%203 = fmul float %193, %201
%204 = fmul float %195, %201
%205 = fmul float %41, %202
%206 = fmul float %42, %203
%207 = fadd float %205, %206
%208 = fmul float %43, %204
%209 = fadd float %207, %208
%210 = fadd float %209, %44
%211 = fmul float %45, %202
%212 = fmul float %46, %203
%213 = fadd float %211, %212
%214 = fmul float %47, %204
%215 = fadd float %213, %214
%216 = fadd float %215, %48
%217 = fmul float %49, %202
%218 = fmul float %50, %203
%219 = fadd float %217, %218
%220 = fmul float %51, %204
%221 = fadd float %219, %220
%222 = fadd float %221, %52
%223 = fmul float %202, %203
%224 = fmul float %203, %204
%225 = fmul float %204, %204
%226 = fmul float %204, %202
%227 = fmul float %53, %223
%228 = fmul float %54, %224
%229 = fadd float %227, %228
%230 = fmul float %55, %225
%231 = fadd float %229, %230
%232 = fmul float %56, %226
%233 = fadd float %231, %232
%234 = fmul float %57, %223
%235 = fmul float %58, %224
%236 = fadd float %234, %235
%237 = fmul float %59, %225
%238 = fadd float %236, %237
%239 = fmul float %60, %226
%240 = fadd float %238, %239
%241 = fmul float %61, %223
%242 = fmul float %62, %224
%243 = fadd float %241, %242
%244 = fmul float %63, %225
%245 = fadd float %243, %244
%246 = fmul float %64, %226
%247 = fadd float %245, %246
%248 = fsub float %13, %176
%249 = fsub float %14, %176
%250 = fsub float %15, %176
%251 = fsub float %16, %176
%252 = fsub float %17, %178
%253 = fsub float %18, %178
%254 = fsub float %19, %178
%255 = fsub float %20, %178
%256 = fsub float %21, %180
%257 = fsub float %22, %180
%258 = fsub float %23, %180
%259 = fsub float %24, %180
%260 = fmul float %248, %248
%261 = fmul float %249, %249
%262 = fmul float %250, %250
%263 = fmul float %251, %251
%264 = fmul float %252, %252
%265 = fadd float %264, %260
%266 = fmul float %253, %253
%267 = fadd float %266, %261
%268 = fmul float %254, %254
%269 = fadd float %268, %262
%270 = fmul float %255, %255
%271 = fadd float %270, %263
%272 = fmul float %256, %256
%273 = fadd float %272, %265
%274 = fmul float %257, %257
%275 = fadd float %274, %267
%276 = fmul float %258, %258
%277 = fadd float %276, %269
%278 = fmul float %259, %259
%279 = fadd float %278, %271
%280 = fmul float %248, %202
%281 = fmul float %249, %202
%282 = fmul float %250, %202
%283 = fmul float %251, %202
%284 = fmul float %252, %203
%285 = fadd float %284, %280
%286 = fmul float %253, %203
%287 = fadd float %286, %281
%288 = fmul float %254, %203
%289 = fadd float %288, %282
%290 = fmul float %255, %203
%291 = fadd float %290, %283
%292 = fmul float %256, %204
%293 = fadd float %292, %285
%294 = fmul float %257, %204
%295 = fadd float %294, %287
%296 = fmul float %258, %204
%297 = fadd float %296, %289
%298 = fmul float %259, %204
%299 = fadd float %298, %291
%300 = call float @llvm.AMDGPU.rsq.clamped.f32(float %273)
%301 = call float @llvm.AMDGPU.rsq.clamped.f32(float %275)
%302 = call float @llvm.AMDGPU.rsq.clamped.f32(float %277)
%303 = call float @llvm.AMDGPU.rsq.clamped.f32(float %279)
%304 = fmul float %293, %300
%305 = fmul float %295, %301
%306 = fmul float %297, %302
%307 = fmul float %299, %303
%308 = call float @llvm.maxnum.f32(float %304, float 0.000000e+00)
%309 = call float @llvm.maxnum.f32(float %305, float 0.000000e+00)
%310 = call float @llvm.maxnum.f32(float %306, float 0.000000e+00)
%311 = call float @llvm.maxnum.f32(float %307, float 0.000000e+00)
%312 = fmul float %273, %25
%313 = fadd float %312, 1.000000e+00
%314 = fmul float %275, %26
%315 = fadd float %314, 1.000000e+00
%316 = fmul float %277, %27
%317 = fadd float %316, 1.000000e+00
%318 = fmul float %279, %28
%319 = fadd float %318, 1.000000e+00
%320 = fdiv float 1.000000e+00, %313
%321 = fdiv float 1.000000e+00, %315
%322 = fdiv float 1.000000e+00, %317
%323 = fdiv float 1.000000e+00, %319
%324 = fmul float %308, %320
%325 = fmul float %309, %321
%326 = fmul float %310, %322
%327 = fmul float %311, %323
%328 = fmul float %203, %203
%329 = fmul float %202, %202
%330 = fsub float %329, %328
%331 = fmul float %65, %330
%332 = fadd float %331, %233
%333 = fmul float %66, %330
%334 = fadd float %333, %240
%335 = fmul float %67, %330
%336 = fadd float %335, %247
%337 = fadd float %332, %210
%338 = fadd float %334, %216
%339 = fadd float %336, %222
%340 = fmul float %29, %324
%341 = fmul float %30, %324
%342 = fmul float %31, %324
%343 = fmul float %32, %325
%344 = fadd float %343, %340
%345 = fmul float %33, %325
%346 = fadd float %345, %341
%347 = fmul float %34, %325
%348 = fadd float %347, %342
%349 = fmul float %35, %326
%350 = fadd float %349, %344
%351 = fmul float %36, %326
%352 = fadd float %351, %346
%353 = fmul float %37, %326
%354 = fadd float %353, %348
%355 = fmul float %38, %327
%356 = fadd float %355, %350
%357 = fmul float %39, %327
%358 = fadd float %357, %352
%359 = fmul float %40, %327
%360 = fadd float %359, %354
%361 = fadd float %337, %356
%362 = fadd float %338, %358
%363 = fadd float %339, %360
%364 = fmul float %130, %91
%365 = fadd float %364, %93
%366 = fmul float %131, %92
%367 = fadd float %366, %94
%368 = fmul float %157, %89
%369 = fadd float %368, %90
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %365, float %367, float %369, float %202)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %203, float %204, float %361, float %362)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %363, float %362, float %363, float %327)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %153, float %155, float %157, float %159)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[8:11], 0 idxen ; E00C2000 80020800
s_load_dwordx4 s[76:79], s[2:3], 0x0 ; C0A60300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s33, s[76:79], 0x0 ; C210CD00
s_buffer_load_dword s26, s[76:79], 0x1 ; C20D4D01
s_buffer_load_dword s8, s[76:79], 0x2 ; C2044D02
s_buffer_load_dword s4, s[76:79], 0x3 ; C2024D03
s_buffer_load_dword s37, s[76:79], 0x4 ; C212CD04
s_buffer_load_dword s27, s[76:79], 0x5 ; C20DCD05
s_buffer_load_dword s11, s[76:79], 0x6 ; C205CD06
s_buffer_load_dword s5, s[76:79], 0x7 ; C202CD07
s_buffer_load_dword s38, s[76:79], 0x8 ; C2134D08
s_buffer_load_dword s31, s[76:79], 0x9 ; C20FCD09
s_buffer_load_dword s18, s[76:79], 0xa ; C2094D0A
s_buffer_load_dword s6, s[76:79], 0xb ; C2034D0B
s_buffer_load_dword s39, s[76:79], 0xc ; C213CD0C
s_buffer_load_dword s32, s[76:79], 0xd ; C2104D0D
s_buffer_load_dword s19, s[76:79], 0xe ; C209CD0E
s_buffer_load_dword s0, s[76:79], 0xf ; C2004D0F
s_buffer_load_dword s34, s[76:79], 0x10 ; C2114D10
s_buffer_load_dword s35, s[76:79], 0x11 ; C211CD11
s_buffer_load_dword s36, s[76:79], 0x12 ; C2124D12
s_buffer_load_dword s28, s[76:79], 0x14 ; C20E4D14
s_buffer_load_dword s29, s[76:79], 0x15 ; C20ECD15
s_buffer_load_dword s30, s[76:79], 0x16 ; C20F4D16
s_buffer_load_dword s14, s[76:79], 0x18 ; C2074D18
s_buffer_load_dword s15, s[76:79], 0x19 ; C207CD19
s_buffer_load_dword s16, s[76:79], 0x1a ; C2084D1A
s_buffer_load_dword s1, s[76:79], 0x1c ; C200CD1C
s_buffer_load_dword s2, s[76:79], 0x1d ; C2014D1D
s_buffer_load_dword s3, s[76:79], 0x1e ; C201CD1E
s_buffer_load_dword s12, s[76:79], 0x20 ; C2064D20
s_buffer_load_dword s20, s[76:79], 0x21 ; C20A4D21
s_buffer_load_dword s9, s[76:79], 0x22 ; C204CD22
s_buffer_load_dword s7, s[76:79], 0x23 ; C203CD23
s_buffer_load_dword s21, s[76:79], 0x24 ; C20ACD24
s_buffer_load_dword s23, s[76:79], 0x25 ; C20BCD25
s_buffer_load_dword s13, s[76:79], 0x26 ; C206CD26
s_buffer_load_dword s10, s[76:79], 0x27 ; C2054D27
s_buffer_load_dword s24, s[76:79], 0x28 ; C20C4D28
s_buffer_load_dword s25, s[76:79], 0x29 ; C20CCD29
s_buffer_load_dword s22, s[76:79], 0x2a ; C20B4D2A
s_buffer_load_dword s17, s[76:79], 0x2b ; C208CD2B
s_buffer_load_dword s46, s[76:79], 0x2c ; C2174D2C
s_buffer_load_dword s51, s[76:79], 0x2d ; C219CD2D
s_buffer_load_dword s41, s[76:79], 0x2e ; C214CD2E
s_buffer_load_dword s40, s[76:79], 0x2f ; C2144D2F
s_buffer_load_dword s49, s[76:79], 0x30 ; C218CD30
s_buffer_load_dword s53, s[76:79], 0x31 ; C21ACD31
s_buffer_load_dword s48, s[76:79], 0x32 ; C2184D32
s_buffer_load_dword s42, s[76:79], 0x33 ; C2154D33
s_buffer_load_dword s52, s[76:79], 0x34 ; C21A4D34
s_buffer_load_dword s54, s[76:79], 0x35 ; C21B4D35
s_buffer_load_dword s50, s[76:79], 0x36 ; C2194D36
s_buffer_load_dword s47, s[76:79], 0x37 ; C217CD37
s_buffer_load_dword s43, s[76:79], 0x38 ; C215CD38
s_buffer_load_dword s44, s[76:79], 0x39 ; C2164D39
s_buffer_load_dword s45, s[76:79], 0x3a ; C216CD3A
s_buffer_load_dword s69, s[76:79], 0x3c ; C222CD3C
s_buffer_load_dword s70, s[76:79], 0x3d ; C2234D3D
s_buffer_load_dword s71, s[76:79], 0x3e ; C223CD3E
s_buffer_load_dword s63, s[76:79], 0x40 ; C21FCD40
s_buffer_load_dword s64, s[76:79], 0x41 ; C2204D41
s_buffer_load_dword s65, s[76:79], 0x42 ; C220CD42
s_buffer_load_dword s59, s[76:79], 0x44 ; C21DCD44
s_buffer_load_dword s60, s[76:79], 0x45 ; C21E4D45
s_buffer_load_dword s61, s[76:79], 0x46 ; C21ECD46
s_buffer_load_dword s57, s[76:79], 0x48 ; C21CCD48
s_buffer_load_dword s55, s[76:79], 0x49 ; C21BCD49
s_buffer_load_dword s56, s[76:79], 0x4a ; C21C4D4A
s_buffer_load_dword s74, s[76:79], 0x4c ; C2254D4C
s_buffer_load_dword s68, s[76:79], 0x4d ; C2224D4D
s_buffer_load_dword s62, s[76:79], 0x4e ; C21F4D4E
s_buffer_load_dword s75, s[76:79], 0x50 ; C225CD50
s_buffer_load_dword s72, s[76:79], 0x51 ; C2244D51
s_buffer_load_dword s66, s[76:79], 0x52 ; C2214D52
s_buffer_load_dword s80, s[76:79], 0x54 ; C2284D54
s_buffer_load_dword s73, s[76:79], 0x55 ; C224CD55
s_buffer_load_dword s67, s[76:79], 0x56 ; C221CD56
s_buffer_load_dword s58, s[76:79], 0x5e ; C21D4D5E
s_buffer_load_dword s81, s[76:79], 0x5f ; C228CD5F
s_buffer_load_dword s82, s[76:79], 0x60 ; C2294D60
s_buffer_load_dword s83, s[76:79], 0x61 ; C229CD61
s_buffer_load_dword s84, s[76:79], 0x62 ; C22A4D62
s_buffer_load_dword s85, s[76:79], 0x63 ; C22ACD63
s_buffer_load_dword s86, s[76:79], 0x64 ; C22B4D64
s_buffer_load_dword s87, s[76:79], 0x65 ; C22BCD65
s_buffer_load_dword s88, s[76:79], 0x66 ; C22C4D66
s_buffer_load_dword s89, s[76:79], 0x67 ; C22CCD67
s_buffer_load_dword s90, s[76:79], 0x68 ; C22D4D68
s_buffer_load_dword s91, s[76:79], 0x69 ; C22DCD69
s_buffer_load_dword s92, s[76:79], 0x6a ; C22E4D6A
s_buffer_load_dword s93, s[76:79], 0x6b ; C22ECD6B
s_buffer_load_dword s94, s[76:79], 0x6c ; C22F4D6C
s_buffer_load_dword s95, s[76:79], 0x6d ; C22FCD6D
s_buffer_load_dword s96, s[76:79], 0x6e ; C2304D6E
s_buffer_load_dword s97, s[76:79], 0x6f ; C230CD6F
s_buffer_load_dword s98, s[76:79], 0x70 ; C2314D70
s_buffer_load_dword s99, s[76:79], 0x71 ; C231CD71
s_buffer_load_dword s100, s[76:79], 0x72 ; C2324D72
s_buffer_load_dword s76, s[76:79], 0x73 ; C2264D73
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s84 ; 7E000254
v_mov_b32_e32 v10, s85 ; 7E140255
v_mov_b32_e32 v11, s81 ; 7E160251
v_mul_f32_e32 v12, s86, v1 ; 10180256
v_mul_f32_e32 v13, s87, v1 ; 101A0257
v_mul_f32_e32 v14, s88, v1 ; 101C0258
v_mul_f32_e32 v15, s89, v1 ; 101E0259
v_mul_f32_e32 v16, s69, v1 ; 10200245
v_mul_f32_e32 v17, s70, v1 ; 10220246
v_mul_f32_e32 v1, s71, v1 ; 10020247
v_mul_f32_e32 v18, s74, v5 ; 10240A4A
v_mul_f32_e32 v19, s75, v5 ; 10260A4B
v_mul_f32_e32 v5, s80, v5 ; 100A0A50
v_mac_f32_e32 v0, s82, v8 ; 3E001052
v_mac_f32_e32 v10, s83, v9 ; 3E141253
v_mac_f32_e32 v12, s90, v2 ; 3E18045A
v_mac_f32_e32 v13, s91, v2 ; 3E1A045B
v_mac_f32_e32 v14, s92, v2 ; 3E1C045C
v_mac_f32_e32 v15, s93, v2 ; 3E1E045D
v_mac_f32_e32 v16, s63, v2 ; 3E20043F
v_mac_f32_e32 v17, s64, v2 ; 3E220440
v_mac_f32_e32 v1, s65, v2 ; 3E020441
v_mac_f32_e32 v18, s68, v6 ; 3E240C44
v_mac_f32_e32 v19, s72, v6 ; 3E260C48
v_mac_f32_e32 v5, s73, v6 ; 3E0A0C49
v_mac_f32_e32 v12, s94, v3 ; 3E18065E
v_mac_f32_e32 v13, s95, v3 ; 3E1A065F
v_mac_f32_e32 v14, s96, v3 ; 3E1C0660
v_mac_f32_e32 v15, s97, v3 ; 3E1E0661
v_mac_f32_e32 v16, s59, v3 ; 3E20063B
v_mac_f32_e32 v17, s60, v3 ; 3E22063C
v_mac_f32_e32 v1, s61, v3 ; 3E02063D
v_mac_f32_e32 v18, s62, v7 ; 3E240E3E
v_mac_f32_e32 v19, s66, v7 ; 3E260E42
v_mac_f32_e32 v5, s67, v7 ; 3E0A0E43
v_mac_f32_e32 v12, s98, v4 ; 3E180862
v_mac_f32_e32 v13, s99, v4 ; 3E1A0863
v_mac_f32_e32 v14, s100, v4 ; 3E1C0864
v_mac_f32_e32 v15, s76, v4 ; 3E1E084C
v_mac_f32_e32 v16, s57, v4 ; 3E200839
v_mul_f32_e32 v2, v18, v18 ; 10042512
v_mac_f32_e32 v2, v19, v19 ; 3E042713
v_mac_f32_e32 v2, v5, v5 ; 3E040B05
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
v_mac_f32_e32 v17, s55, v4 ; 3E220837
v_mac_f32_e32 v1, s56, v4 ; 3E020838
v_mac_f32_e32 v11, s58, v14 ; 3E161C3A
v_mul_f32_e32 v3, v2, v19 ; 10062702
v_mul_f32_e32 v4, v2, v5 ; 10080B02
v_mul_f32_e32 v5, v4, v3 ; 100A0704
v_mul_f32_e32 v6, s51, v5 ; 100C0A33
v_mul_f32_e32 v7, s53, v5 ; 100E0A35
v_mul_f32_e32 v5, s54, v5 ; 100A0A36
v_mul_f32_e32 v2, v2, v18 ; 10042502
v_mul_f32_e32 v8, v3, v2 ; 10100503
v_mac_f32_e32 v6, s46, v8 ; 3E0C102E
v_mac_f32_e32 v7, s49, v8 ; 3E0E1031
v_mac_f32_e32 v5, s52, v8 ; 3E0A1034
v_mul_f32_e32 v8, v4, v4 ; 10100904
v_mac_f32_e32 v6, s41, v8 ; 3E0C1029
v_mac_f32_e32 v7, s48, v8 ; 3E0E1030
v_mac_f32_e32 v5, s50, v8 ; 3E0A1032
v_mul_f32_e32 v8, v2, v4 ; 10100902
v_mac_f32_e32 v6, s40, v8 ; 3E0C1028
v_mac_f32_e32 v7, s42, v8 ; 3E0E102A
exp 15, 32, 0, 0, 0, v0, v10, v11, v2 ; F800020F 020B0A00
v_mac_f32_e32 v5, s47, v8 ; 3E0A102F
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v3, v3 ; 10000703
v_mad_f32 v0, v2, v2, -v0 ; D2820000 84020502
v_mac_f32_e32 v6, s43, v0 ; 3E0C002B
v_mac_f32_e32 v7, s44, v0 ; 3E0E002C
v_mac_f32_e32 v5, s45, v0 ; 3E0A002D
v_sub_f32_e32 v0, s33, v16 ; 08002021
v_sub_f32_e32 v8, s37, v17 ; 08102225
v_sub_f32_e32 v9, s38, v1 ; 08120226
v_mul_f32_e32 v10, v0, v0 ; 10140100
v_mac_f32_e32 v10, v8, v8 ; 3E141108
v_mac_f32_e32 v10, v9, v9 ; 3E141309
v_mad_f32 v11, v10, s39, 1.0 ; D282000B 03C84F0A
v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A
v_mul_f32_e32 v0, v2, v0 ; 10000102
v_mac_f32_e32 v0, v3, v8 ; 3E001103
v_rcp_f32_e32 v8, v11 ; 7E10550B
v_mac_f32_e32 v0, v4, v9 ; 3E001304
v_mul_f32_e32 v0, v10, v0 ; 1000010A
v_max_f32_e32 v0, 0, v0 ; 20000080
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v8, s34, v0 ; 10100022
v_mul_f32_e32 v9, s35, v0 ; 10120023
v_mul_f32_e32 v0, s36, v0 ; 10000024
v_sub_f32_e32 v10, s26, v16 ; 0814201A
v_sub_f32_e32 v11, s27, v17 ; 0816221B
v_sub_f32_e32 v18, s31, v1 ; 0824021F
v_mul_f32_e32 v19, v10, v10 ; 1026150A
v_mac_f32_e32 v19, v11, v11 ; 3E26170B
v_mac_f32_e32 v19, v18, v18 ; 3E262512
v_mad_f32 v20, v19, s32, 1.0 ; D2820014 03C84113
v_rsq_clamp_f32_e32 v19, v19 ; 7E265913
v_mul_f32_e32 v10, v2, v10 ; 10141502
v_mac_f32_e32 v10, v3, v11 ; 3E141703
v_rcp_f32_e32 v11, v20 ; 7E165514
v_mac_f32_e32 v10, v4, v18 ; 3E142504
v_mul_f32_e32 v10, v19, v10 ; 10141513
v_max_f32_e32 v10, 0, v10 ; 20141480
v_mul_f32_e32 v10, v11, v10 ; 1014150B
v_mac_f32_e32 v8, s28, v10 ; 3E10141C
v_mac_f32_e32 v9, s29, v10 ; 3E12141D
v_mac_f32_e32 v0, s30, v10 ; 3E00141E
v_sub_f32_e32 v10, s8, v16 ; 08142008
v_sub_f32_e32 v11, s11, v17 ; 0816220B
v_sub_f32_e32 v18, s18, v1 ; 08240212
v_mul_f32_e32 v19, v10, v10 ; 1026150A
v_mac_f32_e32 v19, v11, v11 ; 3E26170B
v_mac_f32_e32 v19, v18, v18 ; 3E262512
v_mad_f32 v20, v19, s19, 1.0 ; D2820014 03C82713
v_rsq_clamp_f32_e32 v19, v19 ; 7E265913
v_mul_f32_e32 v10, v2, v10 ; 10141502
v_mac_f32_e32 v10, v3, v11 ; 3E141703
v_rcp_f32_e32 v11, v20 ; 7E165514
v_mac_f32_e32 v10, v4, v18 ; 3E142504
v_mul_f32_e32 v10, v19, v10 ; 10141513
v_max_f32_e32 v10, 0, v10 ; 20141480
v_mul_f32_e32 v10, v11, v10 ; 1014150B
v_mac_f32_e32 v8, s14, v10 ; 3E10140E
v_mac_f32_e32 v9, s15, v10 ; 3E12140F
v_mac_f32_e32 v0, s16, v10 ; 3E001410
v_sub_f32_e32 v10, s4, v16 ; 08142004
v_sub_f32_e32 v11, s5, v17 ; 08162205
v_sub_f32_e32 v1, s6, v1 ; 08020206
v_mul_f32_e32 v16, s20, v3 ; 10200614
v_mac_f32_e32 v16, s12, v2 ; 3E20040C
v_mul_f32_e32 v17, s23, v3 ; 10220617
v_mac_f32_e32 v17, s21, v2 ; 3E220415
v_mul_f32_e32 v18, s25, v3 ; 10240619
v_mac_f32_e32 v18, s24, v2 ; 3E240418
v_mac_f32_e32 v16, s9, v4 ; 3E200809
v_mac_f32_e32 v17, s13, v4 ; 3E22080D
v_mac_f32_e32 v18, s22, v4 ; 3E240816
v_add_f32_e32 v16, s7, v16 ; 06202007
v_add_f32_e32 v17, s10, v17 ; 0622220A
v_add_f32_e32 v18, s17, v18 ; 06242411
v_mul_f32_e32 v2, v2, v10 ; 10041502
v_mul_f32_e32 v10, v10, v10 ; 1014150A
v_mac_f32_e32 v10, v11, v11 ; 3E14170B
v_mac_f32_e32 v10, v1, v1 ; 3E140301
v_mad_f32 v19, v10, s0, 1.0 ; D2820013 03C8010A
v_rsq_clamp_f32_e32 v10, v10 ; 7E14590A
v_mac_f32_e32 v2, v3, v11 ; 3E041703
v_rcp_f32_e32 v11, v19 ; 7E165513
v_mac_f32_e32 v2, v4, v1 ; 3E040304
v_mul_f32_e32 v1, v10, v2 ; 1002050A
v_max_f32_e32 v1, 0, v1 ; 20020280
v_mul_f32_e32 v1, v11, v1 ; 1002030B
v_mac_f32_e32 v8, s1, v1 ; 3E100201
v_mac_f32_e32 v9, s2, v1 ; 3E120202
v_mac_f32_e32 v0, s3, v1 ; 3E000203
v_add_f32_e32 v2, v16, v6 ; 06040D10
v_add_f32_e32 v6, v17, v7 ; 060C0F11
v_add_f32_e32 v2, v8, v2 ; 06040508
v_add_f32_e32 v6, v9, v6 ; 060C0D09
exp 15, 33, 0, 0, 0, v3, v4, v2, v6 ; F800021F 06020403
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v2, v18, v5 ; 06040B12
v_add_f32_e32 v0, v0, v2 ; 06000500
exp 15, 34, 0, 0, 0, v0, v6, v0, v1 ; F800022F 01000600
exp 15, 12, 0, 1, 0, v12, v13, v14, v15 ; F80008CF 0F0E0D0C
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 24
Code Size: 1160 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..3]
DCL CONST[5]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { 2.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[0].wwww
1: MOV TEMP[0].yz, IN[1].yxyy
2: MOV TEMP[1].xy, IN[1].zwzz
3: MOV TEMP[1].z, IN[2].xxxx
4: MOV TEMP[2].xy, IN[0].xyyy
5: TEX TEMP[2], TEMP[2], SAMP[0], 2D
6: ADD TEMP[3].x, IMM[0].xxxx, -CONST[3].wwww
7: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx
8: FSLT TEMP[4].x, TEMP[2].wwww, CONST[5].xxxx
9: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy
10: KILL_IF -TEMP[4].xxxx
11: MUL TEMP[4].xyz, TEMP[3].xyzz, CONST[2].xyzz
12: DP3 TEMP[0].x, TEMP[0].xyzz, CONST[0].xyzz
13: MAX TEMP[0].x, IMM[0].zzzz, TEMP[0].xxxx
14: MUL TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xxxx
15: MAD TEMP[0].xyz, TEMP[3].xyzz, TEMP[1].xyzz, TEMP[0].xyzz
16: MOV TEMP[1].w, TEMP[2].wwww
17: MOV_SAT TEMP[2].x, IN[0].zzzz
18: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[1].xyzz
19: MOV OUT[0], TEMP[1]
20: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%35 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0
%37 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%45 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%46 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%47 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%48 = bitcast float %39 to i32
%49 = bitcast float %40 to i32
%50 = insertelement <2 x i32> undef, i32 %48, i32 0
%51 = insertelement <2 x i32> %50, i32 %49, i32 1
%52 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %51, <32 x i8> %36, <16 x i8> %38, i32 2)
%53 = extractelement <4 x float> %52, i32 0
%54 = extractelement <4 x float> %52, i32 1
%55 = extractelement <4 x float> %52, i32 2
%56 = extractelement <4 x float> %52, i32 3
%57 = fsub float 2.000000e+00, %33
%58 = fmul float %53, %57
%59 = fmul float %54, %57
%60 = fmul float %55, %57
%61 = fcmp olt float %56, %34
%62 = select i1 %61, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %62)
%63 = fmul float %58, %30
%64 = fmul float %59, %31
%65 = fmul float %60, %32
%66 = fmul float %42, %24
%67 = fmul float %43, %25
%68 = fadd float %67, %66
%69 = fmul float %44, %26
%70 = fadd float %68, %69
%71 = call float @llvm.maxnum.f32(float %70, float 0.000000e+00)
%72 = fmul float %63, %71
%73 = fmul float %64, %71
%74 = fmul float %65, %71
%75 = fmul float %58, %45
%76 = fadd float %75, %72
%77 = fmul float %59, %46
%78 = fadd float %77, %73
%79 = fmul float %60, %47
%80 = fadd float %79, %74
%81 = call float @llvm.AMDIL.clamp.(float %41, float 0.000000e+00, float 1.000000e+00)
%82 = call float @llvm.AMDGPU.lrp(float %81, float %76, float %27)
%83 = call float @llvm.AMDGPU.lrp(float %81, float %78, float %28)
%84 = call float @llvm.AMDGPU.lrp(float %81, float %80, float %29)
%85 = call i32 @llvm.SI.packf16(float %82, float %83)
%86 = bitcast i32 %85 to float
%87 = call i32 @llvm.SI.packf16(float %84, float %56)
%88 = bitcast i32 %87 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %86, float %88, float %86, float %88)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v0, v0, 0, 2, [m0] ; C8000800
v_interp_p2_f32 v0, [v0], v1, 0, 2, [m0] ; C8010801
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430A02
s_buffer_load_dword s4, s[0:3], 0xf ; C202010F
s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114
s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106
s_buffer_load_dword s7, s[0:3], 0x8 ; C2038108
s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109
s_buffer_load_dword s9, s[0:3], 0xa ; C204810A
s_buffer_load_dword s10, s[0:3], 0x0 ; C2050100
s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101
s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102
s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104
s_buffer_load_dword s0, s[0:3], 0x5 ; C2000105
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_gt_f32_e32 vcc, s5, v13 ; 7C081A05
v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680
v_sub_f32_e64 v2, 2.0, s4 ; D2080002 000008F4
v_mul_f32_e32 v3, v2, v10 ; 10061502
v_mul_f32_e32 v10, v2, v11 ; 10141702
v_mul_f32_e32 v2, v2, v12 ; 10041902
v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280
v_mul_f32_e32 v1, s10, v5 ; 10020A0A
v_mac_f32_e32 v1, s11, v6 ; 3E020C0B
v_mac_f32_e32 v1, s12, v7 ; 3E020E0C
v_mul_f32_e32 v5, s9, v2 ; 100A0409
v_max_f32_e32 v1, 0, v1 ; 20020280
v_mul_f32_e32 v5, v1, v5 ; 100A0B01
v_mac_f32_e32 v5, v0, v2 ; 3E0A0500
v_add_f32_e64 v0, 0, v4 clamp ; D2060800 00020880
v_sub_f32_e32 v2, 1.0, v0 ; 080400F2
v_mul_f32_e32 v4, s6, v2 ; 10080406
v_mac_f32_e32 v4, v5, v0 ; 3E080105
v_cvt_pkrtz_f16_f32_e32 v4, v4, v13 ; 5E081B04
v_mul_f32_e32 v5, s7, v3 ; 100A0607
v_mul_f32_e32 v5, v1, v5 ; 100A0B01
v_mac_f32_e32 v5, v8, v3 ; 3E0A0708
v_mul_f32_e32 v3, s8, v10 ; 10061408
v_mul_f32_e32 v1, v1, v3 ; 10020701
v_mac_f32_e32 v1, v9, v10 ; 3E021509
v_mul_f32_e32 v3, s13, v2 ; 1006040D
v_mac_f32_e32 v3, v5, v0 ; 3E060105
v_mul_f32_e32 v2, s0, v2 ; 10040400
v_mac_f32_e32 v2, v1, v0 ; 3E040101
v_cvt_pkrtz_f16_f32_e32 v0, v3, v2 ; 5E000503
exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Code Size: 296 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV OUT[0], TEMP[0]
5: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = fmul float %13, %33
%38 = fmul float %14, %33
%39 = fmul float %15, %33
%40 = fmul float %16, %33
%41 = fmul float %17, %34
%42 = fadd float %41, %37
%43 = fmul float %18, %34
%44 = fadd float %43, %38
%45 = fmul float %19, %34
%46 = fadd float %45, %39
%47 = fmul float %20, %34
%48 = fadd float %47, %40
%49 = fmul float %21, %35
%50 = fadd float %49, %42
%51 = fmul float %22, %35
%52 = fadd float %51, %44
%53 = fmul float %23, %35
%54 = fadd float %53, %46
%55 = fmul float %24, %35
%56 = fadd float %55, %48
%57 = fmul float %25, %36
%58 = fadd float %57, %50
%59 = fmul float %26, %36
%60 = fadd float %59, %52
%61 = fmul float %27, %36
%62 = fadd float %61, %54
%63 = fmul float %28, %36
%64 = fadd float %63, %56
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %58, float %60, float %62, float %64)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s4, v0 ; 10080004
v_mac_f32_e32 v4, s8, v1 ; 3E080208
v_mul_f32_e32 v5, s5, v0 ; 100A0005
v_mac_f32_e32 v5, s9, v1 ; 3E0A0209
v_mul_f32_e32 v6, s6, v0 ; 100C0006
v_mac_f32_e32 v6, s10, v1 ; 3E0C020A
v_mul_f32_e32 v0, s7, v0 ; 10000007
v_mac_f32_e32 v0, s11, v1 ; 3E00020B
v_mac_f32_e32 v4, s12, v2 ; 3E08040C
v_mac_f32_e32 v5, s13, v2 ; 3E0A040D
v_mac_f32_e32 v6, s14, v2 ; 3E0C040E
v_mac_f32_e32 v0, s15, v2 ; 3E00040F
v_mac_f32_e32 v4, s16, v3 ; 3E080610
v_mac_f32_e32 v5, s17, v3 ; 3E0A0611
v_mac_f32_e32 v6, s18, v3 ; 3E0C0612
v_mac_f32_e32 v0, s0, v3 ; 3E000600
exp 15, 12, 0, 1, 0, v4, v5, v6, v0 ; F80008CF 00060504
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL OUT[0], COLOR
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MOV OUT[0], IMM[0].xxxx
1: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%23 = bitcast i32 %22 to float
%24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%25 = bitcast i32 %24 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
Shader Disassembly:
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 8
VGPRS: 4
Code Size: 20 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL OUT[7], GENERIC[6]
DCL CONST[0..19]
DCL TEMP[0..9], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[16], IN[0].xxxx
5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[9].xxxx
18: MOV TEMP[3].y, CONST[10].xxxx
19: MOV TEMP[3].z, CONST[11].xxxx
20: MOV TEMP[4].x, CONST[9].yyyy
21: MOV TEMP[4].y, CONST[10].yyyy
22: MOV TEMP[4].z, CONST[11].yyyy
23: MOV TEMP[5].x, CONST[9].zzzz
24: MOV TEMP[5].y, CONST[10].zzzz
25: MOV TEMP[5].z, CONST[11].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[5].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[6].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[6].xyz, TEMP[3].xyzx
44: MUL TEMP[7], TEMP[3].xyzz, TEMP[3].yzzx
45: DP4 TEMP[8].x, CONST[1], TEMP[7]
46: DP4 TEMP[9].x, CONST[2], TEMP[7]
47: MOV TEMP[8].y, TEMP[9].xxxx
48: DP4 TEMP[7].x, CONST[3], TEMP[7]
49: MOV TEMP[8].z, TEMP[7].xxxx
50: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy
51: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx
52: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[8].xyzz
53: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[7].yzw, TEMP[7].yxyz
55: MOV TEMP[7].x, TEMP[1].zzzz
56: MOV TEMP[0].xyz, TEMP[0].xyzx
57: MOV OUT[7], TEMP[0]
58: MOV OUT[1], TEMP[2]
59: MOV OUT[3], TEMP[5]
60: MOV OUT[2], TEMP[4]
61: MOV OUT[4], TEMP[6]
62: MOV OUT[5], TEMP[3]
63: MOV OUT[0], TEMP[1]
64: MOV OUT[6], TEMP[7]
65: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0
%90 = add i32 %5, %7
%91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = extractelement <4 x float> %91, i32 2
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0
%103 = add i32 %5, %7
%104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103)
%105 = extractelement <4 x float> %104, i32 0
%106 = extractelement <4 x float> %104, i32 1
%107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0
%109 = add i32 %5, %7
%110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %109)
%111 = extractelement <4 x float> %110, i32 0
%112 = extractelement <4 x float> %110, i32 1
%113 = extractelement <4 x float> %110, i32 2
%114 = extractelement <4 x float> %110, i32 3
%115 = fmul float %31, %84
%116 = fmul float %32, %84
%117 = fmul float %33, %84
%118 = fmul float %34, %84
%119 = fmul float %35, %85
%120 = fadd float %119, %115
%121 = fmul float %36, %85
%122 = fadd float %121, %116
%123 = fmul float %37, %85
%124 = fadd float %123, %117
%125 = fmul float %38, %85
%126 = fadd float %125, %118
%127 = fmul float %39, %86
%128 = fadd float %127, %120
%129 = fmul float %40, %86
%130 = fadd float %129, %122
%131 = fmul float %41, %86
%132 = fadd float %131, %124
%133 = fmul float %42, %86
%134 = fadd float %133, %126
%135 = fmul float %43, %87
%136 = fadd float %135, %128
%137 = fmul float %44, %87
%138 = fadd float %137, %130
%139 = fmul float %45, %87
%140 = fadd float %139, %132
%141 = fmul float %64, %84
%142 = fmul float %65, %84
%143 = fmul float %66, %84
%144 = fmul float %67, %84
%145 = fmul float %68, %85
%146 = fadd float %145, %141
%147 = fmul float %69, %85
%148 = fadd float %147, %142
%149 = fmul float %70, %85
%150 = fadd float %149, %143
%151 = fmul float %71, %85
%152 = fadd float %151, %144
%153 = fmul float %72, %86
%154 = fadd float %153, %146
%155 = fmul float %73, %86
%156 = fadd float %155, %148
%157 = fmul float %74, %86
%158 = fadd float %157, %150
%159 = fmul float %75, %86
%160 = fadd float %159, %152
%161 = fmul float %76, %87
%162 = fadd float %161, %154
%163 = fmul float %77, %87
%164 = fadd float %163, %156
%165 = fmul float %78, %87
%166 = fadd float %165, %158
%167 = fmul float %79, %87
%168 = fadd float %167, %160
%169 = fmul float %99, %55
%170 = fadd float %169, %57
%171 = fmul float %100, %56
%172 = fadd float %171, %58
%173 = fcmp oeq float %63, 0.000000e+00
%. = select i1 %173, float %99, float %105
%.40 = select i1 %173, float %100, float %106
%174 = fmul float %., %59
%175 = fadd float %174, %61
%176 = fmul float %.40, %60
%177 = fadd float %176, %62
%178 = fmul float %46, %92
%179 = fmul float %49, %92
%180 = fmul float %52, %92
%181 = fmul float %47, %93
%182 = fadd float %181, %178
%183 = fmul float %50, %93
%184 = fadd float %183, %179
%185 = fmul float %53, %93
%186 = fadd float %185, %180
%187 = fmul float %48, %94
%188 = fadd float %187, %182
%189 = fmul float %51, %94
%190 = fadd float %189, %184
%191 = fmul float %54, %94
%192 = fadd float %191, %186
%193 = fmul float %188, %188
%194 = fmul float %190, %190
%195 = fadd float %194, %193
%196 = fmul float %192, %192
%197 = fadd float %195, %196
%198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197)
%199 = fmul float %188, %198
%200 = fmul float %190, %198
%201 = fmul float %192, %198
%202 = fmul float %31, %111
%203 = fmul float %32, %111
%204 = fmul float %33, %111
%205 = fmul float %35, %112
%206 = fadd float %205, %202
%207 = fmul float %36, %112
%208 = fadd float %207, %203
%209 = fmul float %37, %112
%210 = fadd float %209, %204
%211 = fmul float %39, %113
%212 = fadd float %211, %206
%213 = fmul float %40, %113
%214 = fadd float %213, %208
%215 = fmul float %41, %113
%216 = fadd float %215, %210
%217 = fmul float %212, %212
%218 = fmul float %214, %214
%219 = fadd float %218, %217
%220 = fmul float %216, %216
%221 = fadd float %219, %220
%222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221)
%223 = fmul float %212, %222
%224 = fmul float %214, %222
%225 = fmul float %216, %222
%226 = fmul float %201, %224
%227 = fmul float %199, %225
%228 = fmul float %200, %223
%229 = fmul float %200, %225
%230 = fsub float %229, %226
%231 = fmul float %201, %223
%232 = fsub float %231, %227
%233 = fmul float %199, %224
%234 = fsub float %233, %228
%235 = fmul float %230, %114
%236 = fmul float %232, %114
%237 = fmul float %234, %114
%238 = fmul float %199, %200
%239 = fmul float %200, %201
%240 = fmul float %201, %201
%241 = fmul float %201, %199
%242 = fmul float %16, %238
%243 = fmul float %17, %239
%244 = fadd float %242, %243
%245 = fmul float %18, %240
%246 = fadd float %244, %245
%247 = fmul float %19, %241
%248 = fadd float %246, %247
%249 = fmul float %20, %238
%250 = fmul float %21, %239
%251 = fadd float %249, %250
%252 = fmul float %22, %240
%253 = fadd float %251, %252
%254 = fmul float %23, %241
%255 = fadd float %253, %254
%256 = fmul float %24, %238
%257 = fmul float %25, %239
%258 = fadd float %256, %257
%259 = fmul float %26, %240
%260 = fadd float %258, %259
%261 = fmul float %27, %241
%262 = fadd float %260, %261
%263 = fmul float %200, %200
%264 = fmul float %199, %199
%265 = fsub float %264, %263
%266 = fmul float %28, %265
%267 = fadd float %266, %248
%268 = fmul float %29, %265
%269 = fadd float %268, %255
%270 = fmul float %30, %265
%271 = fadd float %270, %262
%272 = fsub float %136, %13
%273 = fsub float %138, %14
%274 = fsub float %140, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %170, float %172, float %175, float %177)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %223, float %224, float %225, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %236, float %237, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %201, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %267, float %269, float %271, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %166, float %272, float %273, float %274)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %136, float %138, float %140, float %134)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s27, s[28:31], 0x20 ; C20D9D20
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00
s_buffer_load_dword s32, s[28:31], 0x21 ; C2101D21
s_buffer_load_dword s33, s[28:31], 0x22 ; C2109D22
s_buffer_load_dword s34, s[28:31], 0x24 ; C2111D24
s_buffer_load_dword s35, s[28:31], 0x25 ; C2119D25
s_buffer_load_dword s1, s[28:31], 0x10 ; C2009D10
s_buffer_load_dword s2, s[28:31], 0x11 ; C2011D11
s_buffer_load_dword s0, s[28:31], 0x12 ; C2001D12
s_buffer_load_dword s18, s[28:31], 0x14 ; C2091D14
s_buffer_load_dword s17, s[28:31], 0x15 ; C2089D15
s_buffer_load_dword s36, s[28:31], 0x26 ; C2121D26
s_buffer_load_dword s37, s[28:31], 0x28 ; C2129D28
s_buffer_load_dword s38, s[28:31], 0x29 ; C2131D29
s_buffer_load_dword s39, s[28:31], 0x2a ; C2139D2A
s_buffer_load_dword s40, s[28:31], 0x2c ; C2141D2C
s_buffer_load_dword s20, s[28:31], 0x16 ; C20A1D16
s_buffer_load_dword s41, s[28:31], 0x17 ; C2149D17
s_buffer_load_dword s26, s[28:31], 0x18 ; C20D1D18
s_buffer_load_dword s25, s[28:31], 0x19 ; C20C9D19
s_buffer_load_dword s21, s[28:31], 0x1a ; C20A9D1A
s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B
s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C
s_buffer_load_dword s24, s[28:31], 0x1d ; C20C1D1D
s_buffer_load_dword s22, s[28:31], 0x1e ; C20B1D1E
s_buffer_load_dword s43, s[28:31], 0x1f ; C2159D1F
s_buffer_load_dword s44, s[28:31], 0x2d ; C2161D2D
s_buffer_load_dword s45, s[28:31], 0x2e ; C2169D2E
s_buffer_load_dword s46, s[28:31], 0x34 ; C2171D34
s_buffer_load_dword s47, s[28:31], 0x35 ; C2179D35
s_buffer_load_dword s3, s[28:31], 0x36 ; C2019D36
s_buffer_load_dword s4, s[28:31], 0x3c ; C2021D3C
s_buffer_load_dword s48, s[28:31], 0x40 ; C2181D40
s_buffer_load_dword s49, s[28:31], 0x41 ; C2189D41
s_buffer_load_dword s50, s[28:31], 0x42 ; C2191D42
s_buffer_load_dword s51, s[28:31], 0x43 ; C2199D43
s_buffer_load_dword s6, s[28:31], 0x37 ; C2031D37
s_buffer_load_dword s52, s[28:31], 0x38 ; C21A1D38
s_buffer_load_dword s53, s[28:31], 0x39 ; C21A9D39
s_buffer_load_dword s8, s[28:31], 0x3a ; C2041D3A
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s3 ; 7E000203
s_buffer_load_dword s10, s[28:31], 0x3b ; C2051D3B
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
s_buffer_load_dword s3, s[28:31], 0x0 ; C2019D00
s_buffer_load_dword s4, s[28:31], 0x1 ; C2021D01
s_buffer_load_dword s5, s[28:31], 0x2 ; C2029D02
s_buffer_load_dword s9, s[28:31], 0x4 ; C2049D04
v_mov_b32_e32 v17, s6 ; 7E220206
s_buffer_load_dword s14, s[28:31], 0x5 ; C2071D05
s_buffer_load_dword s7, s[28:31], 0x6 ; C2039D06
s_buffer_load_dword s6, s[28:31], 0x7 ; C2031D07
v_mov_b32_e32 v18, s8 ; 7E240208
s_buffer_load_dword s12, s[28:31], 0x8 ; C2061D08
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v19, s10 ; 7E26020A
s_buffer_load_dword s16, s[28:31], 0x9 ; C2081D09
s_buffer_load_dword s10, s[28:31], 0xa ; C2051D0A
s_buffer_load_dword s8, s[28:31], 0xb ; C2041D0B
s_buffer_load_dword s15, s[28:31], 0xc ; C2079D0C
s_buffer_load_dword s19, s[28:31], 0xd ; C2099D0D
s_buffer_load_dword s13, s[28:31], 0xe ; C2069D0E
s_buffer_load_dword s11, s[28:31], 0xf ; C2059D0F
s_buffer_load_dword s54, s[28:31], 0x44 ; C21B1D44
s_buffer_load_dword s55, s[28:31], 0x45 ; C21B9D45
s_buffer_load_dword s56, s[28:31], 0x46 ; C21C1D46
s_buffer_load_dword s57, s[28:31], 0x47 ; C21C9D47
s_buffer_load_dword s58, s[28:31], 0x48 ; C21D1D48
s_buffer_load_dword s59, s[28:31], 0x49 ; C21D9D49
s_buffer_load_dword s60, s[28:31], 0x4a ; C21E1D4A
s_buffer_load_dword s61, s[28:31], 0x4b ; C21E9D4B
s_buffer_load_dword s62, s[28:31], 0x4c ; C21F1D4C
s_buffer_load_dword s63, s[28:31], 0x4d ; C21F9D4D
s_buffer_load_dword s64, s[28:31], 0x4e ; C2201D4E
s_buffer_load_dword s28, s[28:31], 0x4f ; C20E1D4F
v_mul_f32_e32 v20, s41, v2 ; 10280429
v_mac_f32_e32 v20, s42, v3 ; 3E28062A
v_mac_f32_e32 v20, s43, v4 ; 3E28082B
v_mac_f32_e32 v0, s46, v9 ; 3E00122E
v_mac_f32_e32 v17, s47, v10 ; 3E22142F
v_mul_f32_e32 v21, s48, v2 ; 102A0430
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s54, v3 ; 3E2A0636
v_mac_f32_e32 v21, s58, v4 ; 3E2A083A
v_mac_f32_e32 v21, s62, v5 ; 3E2A0A3E
v_mul_f32_e32 v22, s49, v2 ; 102C0431
v_mac_f32_e32 v22, s55, v3 ; 3E2C0637
v_mac_f32_e32 v22, s59, v4 ; 3E2C083B
v_mac_f32_e32 v22, s63, v5 ; 3E2C0A3F
v_mul_f32_e32 v23, s50, v2 ; 102E0432
v_mac_f32_e32 v23, s56, v3 ; 3E2E0638
v_mac_f32_e32 v23, s60, v4 ; 3E2E083C
v_mac_f32_e32 v23, s64, v5 ; 3E2E0A40
v_mul_f32_e32 v24, s51, v2 ; 10300433
v_mac_f32_e32 v24, s57, v3 ; 3E300639
v_mac_f32_e32 v24, s61, v4 ; 3E30083D
v_mac_f32_e32 v24, s28, v5 ; 3E300A1C
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mul_f32_e32 v11, s34, v6 ; 10160C22
v_mac_f32_e32 v11, s35, v7 ; 3E160E23
v_mul_f32_e32 v12, s37, v6 ; 10180C25
v_mac_f32_e32 v12, s38, v7 ; 3E180E26
v_mul_f32_e32 v6, s40, v6 ; 100C0C28
v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C
v_mac_f32_e32 v11, s36, v8 ; 3E161024
v_mac_f32_e32 v12, s39, v8 ; 3E181027
v_mac_f32_e32 v6, s45, v8 ; 3E0C102D
v_mul_f32_e32 v7, s18, v2 ; 100E0412
v_mac_f32_e32 v7, s26, v3 ; 3E0E061A
v_mac_f32_e32 v7, s23, v4 ; 3E0E0817
v_mac_f32_e32 v7, s27, v5 ; 3E0E0A1B
v_mul_f32_e32 v8, s17, v2 ; 10100411
v_mac_f32_e32 v8, s25, v3 ; 3E100619
v_mac_f32_e32 v8, s24, v4 ; 3E100818
v_mac_f32_e32 v8, s32, v5 ; 3E100A20
v_mul_f32_e32 v2, s20, v2 ; 10040414
v_mac_f32_e32 v2, s21, v3 ; 3E040615
v_mac_f32_e32 v2, s22, v4 ; 3E040816
v_mac_f32_e32 v2, s33, v5 ; 3E040A21
v_mac_f32_e32 v18, s52, v9 ; 3E241234
v_mac_f32_e32 v19, s53, v10 ; 3E261435
exp 15, 32, 0, 0, 0, v0, v17, v18, v19 ; F800020F 13121100
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s18, v13 ; 10001A12
v_mac_f32_e32 v0, s26, v14 ; 3E001C1A
v_mul_f32_e32 v3, s17, v13 ; 10061A11
v_mac_f32_e32 v3, s25, v14 ; 3E061C19
v_mul_f32_e32 v4, s20, v13 ; 10081A14
v_mac_f32_e32 v4, s21, v14 ; 3E081C15
v_mac_f32_e32 v0, s23, v15 ; 3E001E17
v_mac_f32_e32 v3, s24, v15 ; 3E061E18
v_mac_f32_e32 v4, s22, v15 ; 3E081E16
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mac_f32_e32 v5, v12, v12 ; 3E0A190C
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v3, v3 ; 3E120703
v_mac_f32_e32 v9, v4, v4 ; 3E120904
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v10, v5, v11 ; 10141705
v_mul_f32_e32 v11, v5, v12 ; 10161905
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_mul_f32_e32 v3, v9, v3 ; 10060709
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v6, v3, v5 ; 100C0B03
v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B
v_mul_f32_e32 v9, v4, v10 ; 10121504
v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105
v_mul_f32_e32 v12, v0, v11 ; 10181700
v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A
v_mul_f32_e32 v6, v16, v6 ; 100C0D10
v_mul_f32_e32 v9, v16, v9 ; 10121310
v_mul_f32_e32 v12, v16, v12 ; 10181910
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v5, v11 ; 10001705
v_mul_f32_e32 v3, s14, v0 ; 1006000E
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mul_f32_e32 v0, s19, v0 ; 10000013
v_mul_f32_e32 v6, v11, v10 ; 100C150B
v_mac_f32_e32 v3, s9, v6 ; 3E060C09
v_mac_f32_e32 v4, s12, v6 ; 3E080C0C
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
v_mul_f32_e32 v6, v5, v5 ; 100C0B05
v_mac_f32_e32 v3, s7, v6 ; 3E060C07
v_mac_f32_e32 v4, s10, v6 ; 3E080C0A
v_mac_f32_e32 v0, s13, v6 ; 3E000C0D
v_mul_f32_e32 v6, v10, v5 ; 100C0B0A
v_mac_f32_e32 v3, s6, v6 ; 3E060C06
v_mac_f32_e32 v4, s8, v6 ; 3E080C08
v_mac_f32_e32 v0, s11, v6 ; 3E000C0B
v_mul_f32_e32 v6, v11, v11 ; 100C170B
v_mad_f32 v6, v10, v10, -v6 ; D2820006 841A150A
v_mac_f32_e32 v3, s1, v6 ; 3E060C01
v_mac_f32_e32 v4, s2, v6 ; 3E080C02
v_mac_f32_e32 v0, s0, v6 ; 3E000C00
v_subrev_f32_e32 v6, s3, v7 ; 0A0C0E03
v_subrev_f32_e32 v9, s4, v8 ; 0A121004
v_subrev_f32_e32 v12, s5, v2 ; 0A180405
exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A
exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403
exp 15, 37, 0, 0, 0, v23, v6, v9, v12 ; F800025F 0C090617
exp 15, 38, 0, 0, 0, v7, v8, v2, v20 ; F800026F 14020807
exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 28
Code Size: 892 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL IN[6], GENERIC[6], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[22..24]
DCL CONST[26]
DCL TEMP[0..17], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000}
IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000}
IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3].yw, TEMP[3], SAMP[3], 2D
11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy
12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[22].xxxx
13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy
14: MOV_SAT TEMP[4].x, TEMP[4].xxxx
15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
16: SQRT TEMP[4].x, TEMP[4].xxxx
17: MOV TEMP[3].z, TEMP[4].xxxx
18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
20: MOV TEMP[0].y, TEMP[1].xxxx
21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz
22: MOV TEMP[0].z, TEMP[1].xxxx
23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
24: RSQ TEMP[1].x, TEMP[1].xxxx
25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
26: DP3 TEMP[1].x, IN[5].yzww, IN[5].yzww
27: RSQ TEMP[1].x, TEMP[1].xxxx
28: MUL TEMP[1].xyz, IN[5].yzww, TEMP[1].xxxx
29: MOV TEMP[2].xy, IN[0].xyyy
30: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
31: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
32: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz
33: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww
34: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
36: MOV TEMP[5].xy, IN[0].xyyy
37: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D
38: ADD TEMP[6].x, IMM[0].zzzz, -CONST[26].xxxx
39: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx
40: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
41: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
42: MOV TEMP[7].xyz, IMM[0].wwww
43: MOV TEMP[8].w, IMM[0].zzzz
44: MOV TEMP[8].xyz, TEMP[0].xyzx
45: DP4 TEMP[9].x, CONST[1], TEMP[8]
46: DP4 TEMP[10].x, CONST[2], TEMP[8]
47: MOV TEMP[9].y, TEMP[10].xxxx
48: DP4 TEMP[8].x, CONST[3], TEMP[8]
49: MOV TEMP[9].z, TEMP[8].xxxx
50: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz
51: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
52: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz
53: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz
54: MUL TEMP[9].xyz, IMM[0].xxxx, TEMP[9].xyzz
55: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz
56: MOV TEMP[10].xyz, TEMP[9].xyzx
57: FSLT TEMP[11].x, IMM[0].wwww, CONST[10].wwww
58: UIF TEMP[11].xxxx :0
59: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz
60: RSQ TEMP[11].x, TEMP[11].xxxx
61: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx
62: MOV TEMP[12].xyz, -IN[6].xyzx
63: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz
64: RCP TEMP[14].x, TEMP[11].xxxx
65: RCP TEMP[14].y, TEMP[11].yyyy
66: RCP TEMP[14].z, TEMP[11].zzzz
67: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
68: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz
69: RCP TEMP[14].x, TEMP[11].xxxx
70: RCP TEMP[14].y, TEMP[11].yyyy
71: RCP TEMP[14].z, TEMP[11].zzzz
72: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
73: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[11].xyzz
74: UIF TEMP[14].xxxx :0
75: MOV TEMP[15].x, TEMP[13].xxxx
76: ELSE :0
77: MOV TEMP[15].x, TEMP[12].xxxx
78: ENDIF
79: UIF TEMP[14].yyyy :0
80: MOV TEMP[16].x, TEMP[13].yyyy
81: ELSE :0
82: MOV TEMP[16].x, TEMP[12].yyyy
83: ENDIF
84: UIF TEMP[14].zzzz :0
85: MOV TEMP[13].x, TEMP[13].zzzz
86: ELSE :0
87: MOV TEMP[13].x, TEMP[12].zzzz
88: ENDIF
89: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz
90: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx
91: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
92: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
93: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz
94: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz
95: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
96: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz
97: ENDIF
98: ADD TEMP[11].x, IMM[0].zzzz, -CONST[24].xxxx
99: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy
100: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz
101: MOV TEMP[10].xyz, TEMP[10].xyzz
102: MOV TEMP[10].w, TEMP[11].xxxx
103: TXL TEMP[10], TEMP[10], SAMP[0], CUBE
104: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy
105: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx
106: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz
107: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].wwww
108: UIF TEMP[11].xxxx :0
109: MOV TEMP[11].xyz, TEMP[9].xyzx
110: FSLT TEMP[12].x, IMM[0].wwww, CONST[14].wwww
111: UIF TEMP[12].xxxx :0
112: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz
113: RSQ TEMP[12].x, TEMP[12].xxxx
114: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx
115: MOV TEMP[12].xyz, -IN[6].xyzx
116: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz
117: RCP TEMP[14].x, TEMP[9].xxxx
118: RCP TEMP[14].y, TEMP[9].yyyy
119: RCP TEMP[14].z, TEMP[9].zzzz
120: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
121: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz
122: RCP TEMP[14].x, TEMP[9].xxxx
123: RCP TEMP[14].y, TEMP[9].yyyy
124: RCP TEMP[14].z, TEMP[9].zzzz
125: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
126: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[9].xyzz
127: UIF TEMP[14].xxxx :0
128: MOV TEMP[15].x, TEMP[13].xxxx
129: ELSE :0
130: MOV TEMP[15].x, TEMP[12].xxxx
131: ENDIF
132: UIF TEMP[14].yyyy :0
133: MOV TEMP[16].x, TEMP[13].yyyy
134: ELSE :0
135: MOV TEMP[16].x, TEMP[12].yyyy
136: ENDIF
137: UIF TEMP[14].zzzz :0
138: MOV TEMP[13].x, TEMP[13].zzzz
139: ELSE :0
140: MOV TEMP[13].x, TEMP[12].zzzz
141: ENDIF
142: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz
143: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx
144: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
145: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
146: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz
147: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz
148: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
149: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz
150: ENDIF
151: ADD TEMP[9].x, IMM[0].zzzz, -CONST[24].xxxx
152: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy
153: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz
154: MOV TEMP[11].xyz, TEMP[11].xyzz
155: MOV TEMP[11].w, TEMP[9].xxxx
156: TXL TEMP[9], TEMP[11], SAMP[1], CUBE
157: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy
158: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx
159: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz
160: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz
161: ELSE :0
162: MOV TEMP[7].xyz, TEMP[10].xyzx
163: ENDIF
164: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
165: MOV TEMP[1].xyz, -TEMP[1].xyzx
166: ADD TEMP[5].x, IMM[0].zzzz, -CONST[24].xxxx
167: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz
168: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz
169: RSQ TEMP[10].x, TEMP[10].xxxx
170: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx
171: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
172: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
173: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz
174: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx
175: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx
176: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww
177: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[5].xxxx
178: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].yyyy, IMM[2].zzzz
179: LG2 TEMP[12].x, TEMP[12].xxxx
180: RCP TEMP[12].x, TEMP[12].xxxx
181: MUL TEMP[12].x, IMM[2].xxxx, TEMP[12].xxxx
182: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx
183: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[6].xxxx
184: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[1].xxxx
185: MUL TEMP[15].x, IMM[0].xxxx, TEMP[10].xxxx
186: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
187: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[1].xxxx
188: ADD TEMP[10].x, IMM[0].zzzz, -TEMP[10].xxxx
189: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx
190: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
191: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx
192: MOV_SAT TEMP[4].x, TEMP[4].xxxx
193: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx
194: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx
195: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx
196: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx
197: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
198: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[11].xxxx
199: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[11].xxxx
200: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].wwww
201: RCP TEMP[1].x, TEMP[1].xxxx
202: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz
203: MAX TEMP[9].x, IMM[0].wwww, TEMP[9].xxxx
204: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
205: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].zzzz
206: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy
207: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
208: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx
209: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
210: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
211: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
212: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz
213: ADD TEMP[9].xyz, IMM[0].zzzz, -TEMP[3].xyzz
214: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx
215: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx
216: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx
217: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx
218: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz
219: ADD TEMP[9].x, TEMP[5].xxxx, IMM[0].yyyy
220: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx
221: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx
222: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx
223: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
224: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].zzzz
225: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy
226: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx
227: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
228: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx
229: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
230: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].zzzz
231: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx
232: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
233: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
234: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
235: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
236: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
237: MOV TEMP[0].xyz, TEMP[0].xyzx
238: MAD TEMP[1].x, IN[5].xxxx, CONST[5].zzzz, CONST[5].wwww
239: MOV_SAT TEMP[1].x, TEMP[1].xxxx
240: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
241: MOV TEMP[0].xyz, TEMP[0].xyzx
242: MOV TEMP[0].w, IMM[0].zzzz
243: MOV OUT[0], TEMP[0]
244: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%83 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0
%85 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0
%87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)*
%89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0
%90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)*
%92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0
%93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)*
%95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0
%96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)*
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)*
%101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0
%102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)*
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)*
%107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0
%108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)*
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%112 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7)
%132 = bitcast float %111 to i32
%133 = bitcast float %112 to i32
%134 = insertelement <2 x i32> undef, i32 %132, i32 0
%135 = insertelement <2 x i32> %134, i32 %133, i32 1
%136 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %135, <32 x i8> %101, <16 x i8> %104, i32 2)
%137 = extractelement <4 x float> %136, i32 1
%138 = extractelement <4 x float> %136, i32 3
%139 = fmul float %138, 2.000000e+00
%140 = fadd float %139, -1.000000e+00
%141 = fmul float %137, 2.000000e+00
%142 = fadd float %141, -1.000000e+00
%143 = fmul float %140, %79
%144 = fmul float %142, %79
%145 = fmul float %143, %143
%146 = fmul float %144, %144
%147 = fadd float %145, %146
%148 = call float @llvm.AMDIL.clamp.(float %147, float 0.000000e+00, float 1.000000e+00)
%149 = fsub float 1.000000e+00, %148
%150 = call float @llvm.sqrt.f32(float %149)
%151 = fmul float %143, %113
%152 = fmul float %144, %116
%153 = fadd float %152, %151
%154 = fmul float %150, %119
%155 = fadd float %153, %154
%156 = fmul float %143, %114
%157 = fmul float %144, %117
%158 = fadd float %157, %156
%159 = fmul float %150, %120
%160 = fadd float %158, %159
%161 = fmul float %143, %115
%162 = fmul float %144, %118
%163 = fadd float %162, %161
%164 = fmul float %150, %121
%165 = fadd float %163, %164
%166 = fmul float %155, %155
%167 = fmul float %160, %160
%168 = fadd float %167, %166
%169 = fmul float %165, %165
%170 = fadd float %168, %169
%171 = call float @llvm.AMDGPU.rsq.clamped.f32(float %170)
%172 = fmul float %155, %171
%173 = fmul float %160, %171
%174 = fmul float %165, %171
%175 = fmul float %126, %126
%176 = fmul float %127, %127
%177 = fadd float %176, %175
%178 = fmul float %128, %128
%179 = fadd float %177, %178
%180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179)
%181 = fmul float %126, %180
%182 = fmul float %127, %180
%183 = fmul float %128, %180
%184 = bitcast float %111 to i32
%185 = bitcast float %112 to i32
%186 = insertelement <2 x i32> undef, i32 %184, i32 0
%187 = insertelement <2 x i32> %186, i32 %185, i32 1
%188 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %187, <32 x i8> %95, <16 x i8> %98, i32 2)
%189 = extractelement <4 x float> %188, i32 0
%190 = extractelement <4 x float> %188, i32 1
%191 = extractelement <4 x float> %188, i32 2
%192 = fmul float %76, %189
%193 = fmul float %77, %190
%194 = fmul float %78, %191
%195 = call float @llvm.AMDGPU.lrp(float %80, float %192, float %66)
%196 = call float @llvm.AMDGPU.lrp(float %80, float %193, float %67)
%197 = call float @llvm.AMDGPU.lrp(float %80, float %194, float %68)
%198 = fmul float %80, %69
%199 = fsub float %69, %198
%200 = fmul float %192, %199
%201 = fmul float %193, %199
%202 = fmul float %194, %199
%203 = bitcast float %111 to i32
%204 = bitcast float %112 to i32
%205 = insertelement <2 x i32> undef, i32 %203, i32 0
%206 = insertelement <2 x i32> %205, i32 %204, i32 1
%207 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %206, <32 x i8> %107, <16 x i8> %110, i32 2)
%208 = extractelement <4 x float> %207, i32 1
%209 = fsub float 1.000000e+00, %82
%210 = fmul float %208, %82
%211 = fadd float %210, %209
%212 = fmul float %172, %24
%213 = fmul float %173, %25
%214 = fadd float %213, %212
%215 = fmul float %174, %26
%216 = fadd float %214, %215
%217 = call float @llvm.maxnum.f32(float %216, float 0.000000e+00)
%218 = fmul float %27, %172
%219 = fmul float %28, %173
%220 = fadd float %218, %219
%221 = fmul float %29, %174
%222 = fadd float %220, %221
%223 = fadd float %222, %30
%224 = fmul float %31, %172
%225 = fmul float %32, %173
%226 = fadd float %224, %225
%227 = fmul float %33, %174
%228 = fadd float %226, %227
%229 = fadd float %228, %34
%230 = fmul float %35, %172
%231 = fmul float %36, %173
%232 = fadd float %230, %231
%233 = fmul float %37, %174
%234 = fadd float %232, %233
%235 = fadd float %234, %38
%236 = fadd float %122, %223
%237 = fadd float %123, %229
%238 = fadd float %124, %235
%239 = fmul float %236, %211
%240 = fmul float %237, %211
%241 = fmul float %238, %211
%242 = fmul float %172, %181
%243 = fmul float %173, %182
%244 = fadd float %243, %242
%245 = fmul float %174, %183
%246 = fadd float %244, %245
%247 = fmul float %246, %172
%248 = fmul float %246, %173
%249 = fmul float %246, %174
%250 = fmul float %247, 2.000000e+00
%251 = fmul float %248, 2.000000e+00
%252 = fmul float %249, 2.000000e+00
%253 = fsub float %181, %250
%254 = fsub float %182, %251
%255 = fsub float %183, %252
%256 = fcmp ogt float %51, 0.000000e+00
br i1 %256, label %IF, label %ENDIF
IF: ; preds = %main_body
%257 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%258 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%259 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%260 = fmul float %253, %253
%261 = fmul float %254, %254
%262 = fadd float %261, %260
%263 = fmul float %255, %255
%264 = fadd float %262, %263
%265 = call float @llvm.AMDGPU.rsq.clamped.f32(float %264)
%266 = fmul float %253, %265
%267 = fmul float %254, %265
%268 = fmul float %255, %265
%269 = fsub float %44, %129
%270 = fsub float %45, %130
%271 = fsub float %46, %131
%272 = fdiv float 1.000000e+00, %266
%273 = fdiv float 1.000000e+00, %267
%274 = fdiv float 1.000000e+00, %268
%275 = fmul float %269, %272
%276 = fmul float %270, %273
%277 = fmul float %271, %274
%278 = fsub float %47, %129
%279 = fsub float %48, %130
%280 = fsub float %49, %131
%281 = fdiv float 1.000000e+00, %266
%282 = fdiv float 1.000000e+00, %267
%283 = fdiv float 1.000000e+00, %268
%284 = fmul float %278, %281
%285 = fmul float %279, %282
%286 = fmul float %280, %283
%287 = fcmp ogt float %266, 0.000000e+00
%288 = fcmp ogt float %267, 0.000000e+00
%289 = fcmp ogt float %268, 0.000000e+00
%. = select i1 %287, float %275, float %284
%temp64.0 = select i1 %288, float %276, float %285
%.96 = select i1 %289, float %277, float %286
%290 = fadd float %44, %47
%291 = fadd float %45, %48
%292 = fadd float %46, %49
%293 = fmul float %290, 5.000000e-01
%294 = fmul float %291, 5.000000e-01
%295 = fmul float %292, 5.000000e-01
%296 = call float @llvm.minnum.f32(float %., float %temp64.0)
%297 = call float @llvm.minnum.f32(float %296, float %.96)
%298 = fsub float %293, %259
%299 = fsub float %294, %258
%300 = fsub float %295, %257
%301 = fadd float %298, %129
%302 = fadd float %299, %130
%303 = fadd float %300, %131
%304 = fmul float %266, %297
%305 = fadd float %304, %301
%306 = fmul float %267, %297
%307 = fadd float %306, %302
%308 = fmul float %268, %297
%309 = fadd float %308, %303
%310 = fsub float %305, %293
%311 = fsub float %307, %294
%312 = fsub float %309, %295
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp40.0 = phi float [ %310, %IF ], [ %253, %main_body ]
%temp41.0 = phi float [ %311, %IF ], [ %254, %main_body ]
%temp42.0 = phi float [ %312, %IF ], [ %255, %main_body ]
%313 = fsub float 1.000000e+00, %81
%314 = call float @llvm.pow.f32(float %313, float 7.500000e-01)
%315 = fmul float %314, 7.000000e+00
%316 = insertelement <4 x float> undef, float %temp40.0, i32 0
%317 = insertelement <4 x float> %316, float %temp41.0, i32 1
%318 = insertelement <4 x float> %317, float %temp42.0, i32 2
%319 = insertelement <4 x float> %318, float %315, i32 3
%320 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %319)
%321 = extractelement <4 x float> %320, i32 0
%322 = extractelement <4 x float> %320, i32 1
%323 = extractelement <4 x float> %320, i32 2
%324 = extractelement <4 x float> %320, i32 3
%325 = call float @llvm.fabs.f32(float %323)
%326 = fdiv float 1.000000e+00, %325
%327 = fmul float %321, %326
%328 = fadd float %327, 1.500000e+00
%329 = fmul float %322, %326
%330 = fadd float %329, 1.500000e+00
%331 = bitcast float %330 to i32
%332 = bitcast float %328 to i32
%333 = bitcast float %324 to i32
%334 = bitcast float %315 to i32
%335 = insertelement <4 x i32> undef, i32 %331, i32 0
%336 = insertelement <4 x i32> %335, i32 %332, i32 1
%337 = insertelement <4 x i32> %336, i32 %333, i32 2
%338 = insertelement <4 x i32> %337, i32 %334, i32 3
%339 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %338, <32 x i8> %84, <16 x i8> %86, i32 4)
%340 = extractelement <4 x float> %339, i32 0
%341 = extractelement <4 x float> %339, i32 1
%342 = extractelement <4 x float> %339, i32 2
%343 = extractelement <4 x float> %339, i32 3
%344 = call float @llvm.pow.f32(float %343, float %53)
%345 = fmul float %52, %344
%346 = fmul float %345, %340
%347 = fmul float %345, %341
%348 = fmul float %345, %342
%349 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %349, label %IF82, label %ENDIF81
IF82: ; preds = %ENDIF
%350 = fcmp ogt float %63, 0.000000e+00
br i1 %350, label %IF85, label %ENDIF84
ENDIF81: ; preds = %ENDIF, %ENDIF84
%temp28.0 = phi float [ %578, %ENDIF84 ], [ %346, %ENDIF ]
%temp29.0 = phi float [ %579, %ENDIF84 ], [ %347, %ENDIF ]
%temp30.0 = phi float [ %580, %ENDIF84 ], [ %348, %ENDIF ]
%351 = fmul float %temp28.0, %211
%352 = fmul float %temp29.0, %211
%353 = fmul float %temp30.0, %211
%354 = fsub float 1.000000e+00, %81
%355 = fsub float %24, %181
%356 = fsub float %25, %182
%357 = fsub float %26, %183
%358 = fmul float %355, %355
%359 = fmul float %356, %356
%360 = fadd float %359, %358
%361 = fmul float %357, %357
%362 = fadd float %360, %361
%363 = call float @llvm.AMDGPU.rsq.clamped.f32(float %362)
%364 = fmul float %355, %363
%365 = fmul float %356, %363
%366 = fmul float %357, %363
%367 = fmul float %181, %172
%368 = fsub float -0.000000e+00, %367
%369 = fmul float %182, %173
%370 = fsub float %368, %369
%371 = fmul float %183, %174
%372 = fsub float %370, %371
%373 = call float @llvm.maxnum.f32(float %372, float 0.000000e+00)
%374 = fmul float %24, %364
%375 = fmul float %25, %365
%376 = fadd float %375, %374
%377 = fmul float %26, %366
%378 = fadd float %376, %377
%379 = call float @llvm.maxnum.f32(float %378, float 0.000000e+00)
%380 = fmul float %354, %354
%381 = fmul float %380, %75
%382 = fsub float 1.000000e+00, %354
%383 = fmul float %382, 0x3FEEF9DB20000000
%384 = fadd float %383, 0x3F9EB851E0000000
%385 = call float @llvm.log2.f32(float %384)
%386 = fdiv float 1.000000e+00, %385
%387 = fmul float %386, 1.000000e+01
%388 = fmul float %387, %387
%389 = fsub float 1.000000e+00, %217
%390 = fsub float 1.000000e+00, %373
%391 = fmul float %379, 2.000000e+00
%392 = fmul float %379, %354
%393 = fmul float %391, %392
%394 = fadd float %393, 5.000000e-01
%395 = fsub float 1.000000e+00, %379
%396 = fsub float 1.000000e+00, %373
%397 = fsub float 1.000000e+00, %199
%398 = fadd float %81, %397
%399 = call float @llvm.AMDIL.clamp.(float %398, float 0.000000e+00, float 1.000000e+00)
%400 = fmul float %396, %396
%401 = fmul float %396, %396
%402 = fmul float %401, %396
%403 = fmul float %400, %402
%404 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %195)
%405 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %196)
%406 = call float @llvm.AMDGPU.lrp(float %403, float %399, float %197)
%407 = call float @llvm.AMDGPU.lrp(float %217, float 1.000000e+00, float %381)
%408 = call float @llvm.AMDGPU.lrp(float %373, float 1.000000e+00, float %381)
%409 = fmul float %407, %408
%410 = fadd float %409, 0x3F1A36E2E0000000
%411 = fdiv float 1.000000e+00, %410
%412 = fmul float %172, %364
%413 = fmul float %173, %365
%414 = fadd float %413, %412
%415 = fmul float %174, %366
%416 = fadd float %414, %415
%417 = call float @llvm.maxnum.f32(float %416, float 0.000000e+00)
%418 = call float @llvm.pow.f32(float %417, float %388)
%419 = fadd float %388, 1.000000e+00
%420 = fmul float %419, %74
%421 = fmul float %418, %420
%422 = fmul float %411, %421
%423 = fmul float %422, %217
%424 = fmul float %423, %73
%425 = call float @llvm.maxnum.f32(float %424, float 0.000000e+00)
%426 = fmul float %425, %70
%427 = fmul float %425, %71
%428 = fmul float %425, %72
%429 = fsub float 1.000000e+00, %195
%430 = fsub float 1.000000e+00, %196
%431 = fsub float 1.000000e+00, %197
%432 = fmul float %395, %395
%433 = fmul float %395, %395
%434 = fmul float %433, %395
%435 = fmul float %432, %434
%436 = fmul float %429, %435
%437 = fadd float %436, %195
%438 = fmul float %430, %435
%439 = fadd float %438, %196
%440 = fmul float %431, %435
%441 = fadd float %440, %197
%442 = fadd float %394, -1.000000e+00
%443 = fmul float %389, %389
%444 = fmul float %389, %389
%445 = fmul float %444, %389
%446 = fmul float %443, %445
%447 = fmul float %442, %446
%448 = fadd float %447, 1.000000e+00
%449 = fadd float %394, -1.000000e+00
%450 = fmul float %390, %390
%451 = fmul float %390, %390
%452 = fmul float %451, %390
%453 = fmul float %450, %452
%454 = fmul float %449, %453
%455 = fadd float %454, 1.000000e+00
%456 = fmul float %448, %455
%457 = fmul float %456, %217
%458 = fmul float %70, %457
%459 = fadd float %458, %239
%460 = fmul float %71, %457
%461 = fadd float %460, %240
%462 = fmul float %72, %457
%463 = fadd float %462, %241
%464 = fmul float %200, %459
%465 = fmul float %201, %461
%466 = fmul float %202, %463
%467 = fmul float %426, %437
%468 = fadd float %467, %464
%469 = fmul float %427, %439
%470 = fadd float %469, %465
%471 = fmul float %428, %441
%472 = fadd float %471, %466
%473 = fmul float %351, %404
%474 = fadd float %473, %468
%475 = fmul float %352, %405
%476 = fadd float %475, %470
%477 = fmul float %353, %406
%478 = fadd float %477, %472
%479 = fmul float %125, %42
%480 = fadd float %479, %43
%481 = call float @llvm.AMDIL.clamp.(float %480, float 0.000000e+00, float 1.000000e+00)
%482 = call float @llvm.AMDGPU.lrp(float %481, float %474, float %39)
%483 = call float @llvm.AMDGPU.lrp(float %481, float %476, float %40)
%484 = call float @llvm.AMDGPU.lrp(float %481, float %478, float %41)
%485 = call i32 @llvm.SI.packf16(float %482, float %483)
%486 = bitcast i32 %485 to float
%487 = call i32 @llvm.SI.packf16(float %484, float 1.000000e+00)
%488 = bitcast i32 %487 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %486, float %488, float %486, float %488)
ret void
IF85: ; preds = %IF82
%489 = fmul float %253, %253
%490 = fmul float %254, %254
%491 = fadd float %490, %489
%492 = fmul float %255, %255
%493 = fadd float %491, %492
%494 = call float @llvm.AMDGPU.rsq.clamped.f32(float %493)
%495 = fmul float %253, %494
%496 = fmul float %254, %494
%497 = fmul float %255, %494
%498 = fsub float %54, %129
%499 = fsub float %55, %130
%500 = fsub float %56, %131
%501 = fdiv float 1.000000e+00, %495
%502 = fdiv float 1.000000e+00, %496
%503 = fdiv float 1.000000e+00, %497
%504 = fmul float %498, %501
%505 = fmul float %499, %502
%506 = fmul float %500, %503
%507 = fsub float %57, %129
%508 = fsub float %58, %130
%509 = fsub float %59, %131
%510 = fdiv float 1.000000e+00, %495
%511 = fdiv float 1.000000e+00, %496
%512 = fdiv float 1.000000e+00, %497
%513 = fmul float %507, %510
%514 = fmul float %508, %511
%515 = fmul float %509, %512
%516 = fcmp ogt float %495, 0.000000e+00
%517 = fcmp ogt float %496, 0.000000e+00
%518 = fcmp ogt float %497, 0.000000e+00
%.97 = select i1 %516, float %504, float %513
%temp64.1 = select i1 %517, float %505, float %514
%.98 = select i1 %518, float %506, float %515
%519 = fadd float %54, %57
%520 = fadd float %55, %58
%521 = fadd float %56, %59
%522 = fmul float %519, 5.000000e-01
%523 = fmul float %520, 5.000000e-01
%524 = fmul float %521, 5.000000e-01
%525 = call float @llvm.minnum.f32(float %.97, float %temp64.1)
%526 = call float @llvm.minnum.f32(float %525, float %.98)
%527 = fsub float %522, %60
%528 = fsub float %523, %61
%529 = fsub float %524, %62
%530 = fadd float %527, %129
%531 = fadd float %528, %130
%532 = fadd float %529, %131
%533 = fmul float %495, %526
%534 = fadd float %533, %530
%535 = fmul float %496, %526
%536 = fadd float %535, %531
%537 = fmul float %497, %526
%538 = fadd float %537, %532
%539 = fsub float %534, %522
%540 = fsub float %536, %523
%541 = fsub float %538, %524
br label %ENDIF84
ENDIF84: ; preds = %IF82, %IF85
%temp44.0 = phi float [ %539, %IF85 ], [ %253, %IF82 ]
%temp45.0 = phi float [ %540, %IF85 ], [ %254, %IF82 ]
%temp46.0 = phi float [ %541, %IF85 ], [ %255, %IF82 ]
%542 = fsub float 1.000000e+00, %81
%543 = call float @llvm.pow.f32(float %542, float 7.500000e-01)
%544 = fmul float %543, 7.000000e+00
%545 = insertelement <4 x float> undef, float %temp44.0, i32 0
%546 = insertelement <4 x float> %545, float %temp45.0, i32 1
%547 = insertelement <4 x float> %546, float %temp46.0, i32 2
%548 = insertelement <4 x float> %547, float %544, i32 3
%549 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %548)
%550 = extractelement <4 x float> %549, i32 0
%551 = extractelement <4 x float> %549, i32 1
%552 = extractelement <4 x float> %549, i32 2
%553 = extractelement <4 x float> %549, i32 3
%554 = call float @llvm.fabs.f32(float %552)
%555 = fdiv float 1.000000e+00, %554
%556 = fmul float %550, %555
%557 = fadd float %556, 1.500000e+00
%558 = fmul float %551, %555
%559 = fadd float %558, 1.500000e+00
%560 = bitcast float %559 to i32
%561 = bitcast float %557 to i32
%562 = bitcast float %553 to i32
%563 = bitcast float %544 to i32
%564 = insertelement <4 x i32> undef, i32 %560, i32 0
%565 = insertelement <4 x i32> %564, i32 %561, i32 1
%566 = insertelement <4 x i32> %565, i32 %562, i32 2
%567 = insertelement <4 x i32> %566, i32 %563, i32 3
%568 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %567, <32 x i8> %89, <16 x i8> %92, i32 4)
%569 = extractelement <4 x float> %568, i32 0
%570 = extractelement <4 x float> %568, i32 1
%571 = extractelement <4 x float> %568, i32 2
%572 = extractelement <4 x float> %568, i32 3
%573 = call float @llvm.pow.f32(float %572, float %65)
%574 = fmul float %64, %573
%575 = fmul float %574, %569
%576 = fmul float %574, %570
%577 = fmul float %574, %571
%578 = call float @llvm.AMDGPU.lrp(float %50, float %346, float %575)
%579 = call float @llvm.AMDGPU.lrp(float %50, float %347, float %576)
%580 = call float @llvm.AMDGPU.lrp(float %50, float %348, float %577)
br label %ENDIF81
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000
v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001
v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100
v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500
v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501
v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600
v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601
v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800
v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801
v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900
v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901
v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00
v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01
v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00
v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01
v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00
v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01
v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00
v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01
v_interp_p1_f32 v8, v0, 0, 4, [m0] ; C8201000
v_interp_p2_f32 v8, [v8], v1, 0, 4, [m0] ; C8211001
v_interp_p1_f32 v9, v0, 1, 4, [m0] ; C8241100
v_interp_p2_f32 v9, [v9], v1, 1, 4, [m0] ; C8251101
v_interp_p1_f32 v10, v0, 2, 4, [m0] ; C8281200
v_interp_p2_f32 v10, [v10], v1, 2, 4, [m0] ; C8291201
v_interp_p1_f32 v3, v0, 0, 5, [m0] ; C80C1400
v_interp_p2_f32 v3, [v3], v1, 0, 5, [m0] ; C80D1401
v_interp_p1_f32 v18, v0, 1, 5, [m0] ; C8481500
v_interp_p2_f32 v18, [v18], v1, 1, 5, [m0] ; C8491501
v_interp_p1_f32 v19, v0, 2, 5, [m0] ; C84C1600
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p2_f32 v19, [v19], v1, 2, 5, [m0] ; C84D1601
v_interp_p1_f32 v20, v0, 3, 5, [m0] ; C8501700
v_interp_p2_f32 v20, [v20], v1, 3, 5, [m0] ; C8511701
s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C
s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718
v_interp_p1_f32 v21, v0, 0, 6, [m0] ; C8541800
v_interp_p2_f32 v21, [v21], v1, 0, 6, [m0] ; C8551801
v_interp_p1_f32 v17, v0, 1, 6, [m0] ; C8441900
v_interp_p2_f32 v17, [v17], v1, 1, 6, [m0] ; C8451901
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[8:11], 0x58 ; C2060958
v_interp_p1_f32 v22, v0, 2, 6, [m0] ; C8581A00
v_interp_p2_f32 v22, [v22], v1, 2, 6, [m0] ; C8591A01
s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510
s_load_dwordx8 s[32:39], s[6:7], 0x20 ; C0D00720
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[20:27], s[0:3] ; F0800A00 0005000D
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
s_buffer_load_dword s21, s[8:11], 0x5c ; C20A895C
s_buffer_load_dword s0, s[8:11], 0x60 ; C2000960
v_mul_f32_e32 v1, s12, v1 ; 1002020C
v_mul_f32_e32 v0, s12, v0 ; 1000000C
v_mul_f32_e32 v2, v2, v1 ; 10040302
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v4, v4, v1 ; 10080304
v_mac_f32_e32 v4, v7, v0 ; 3E080107
v_mul_f32_e32 v7, v5, v1 ; 100E0305
v_mac_f32_e32 v7, v11, v0 ; 3E0E010B
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v2, v12, v0 ; 3E04010C
v_mac_f32_e32 v4, v15, v0 ; 3E08010F
v_mac_f32_e32 v7, v16, v0 ; 3E0E0110
v_mul_f32_e32 v0, v2, v2 ; 10000502
v_mac_f32_e32 v0, v4, v4 ; 3E000904
v_mac_f32_e32 v0, v7, v7 ; 3E000F07
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v18, v18 ; 10022512
v_mac_f32_e32 v1, v19, v19 ; 3E022713
v_mac_f32_e32 v1, v20, v20 ; 3E022914
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v6, v0, v2 ; 100C0500
v_mul_f32_e32 v5, v0, v4 ; 100A0900
v_mul_f32_e32 v4, v0, v7 ; 10080F00
v_mul_f32_e32 v12, v1, v18 ; 10182501
v_mul_f32_e32 v11, v1, v19 ; 10162701
v_mul_f32_e32 v0, v12, v6 ; 10000D0C
v_mac_f32_e32 v0, v11, v5 ; 3E000B0B
v_mul_f32_e32 v7, v1, v20 ; 100E2901
v_mac_f32_e32 v0, v7, v4 ; 3E000907
v_mul_f32_e32 v2, v6, v0 ; 10040106
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v15, v5, v0 ; 101E0105
s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508
s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710
v_mac_f32_e32 v15, v5, v0 ; 3E1E0105
v_mad_f32 v23, v18, v1, -v2 ; D2820017 840A0312
v_mad_f32 v24, v19, v1, -v15 ; D2820018 843E0313
s_buffer_load_dword s1, s[8:11], 0x4c ; C200894C
s_buffer_load_dword s2, s[8:11], 0x4d ; C201094D
s_buffer_load_dword s3, s[8:11], 0x4e ; C201894E
v_mul_f32_e32 v2, v4, v0 ; 10040104
v_mac_f32_e32 v2, v4, v0 ; 3E040104
v_mad_f32 v25, v20, v1, -v2 ; D2820019 840A0314
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[24:31], s[12:15] ; F0800700 0066000D
s_buffer_load_dword s13, s[8:11], 0x40 ; C2068940
s_buffer_load_dword s14, s[8:11], 0x41 ; C2070941
s_buffer_load_dword s15, s[8:11], 0x42 ; C2078942
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v18, s1, v0 ; 10240001
v_mul_f32_e32 v19, s2, v1 ; 10260202
v_mul_f32_e32 v20, s3, v2 ; 10280403
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s1, s[8:11], 0x2b ; C200892B
s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C
s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D
v_sub_f32_e64 v0, 1.0, s21 ; D2080000 00002AF2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v2, s13, v0 ; 1004000D
v_mul_f32_e32 v1, s14, v0 ; 1002000E
v_mul_f32_e32 v0, s15, v0 ; 1000000F
v_mac_f32_e32 v2, s21, v18 ; 3E042415
v_mov_b32_e32 v26, v23 ; 7E340317
v_mac_f32_e32 v1, s21, v19 ; 3E022615
v_mov_b32_e32 v27, v24 ; 7E360318
v_mac_f32_e32 v0, s21, v20 ; 3E002815
v_mov_b32_e32 v28, v25 ; 7E380319
v_cmp_lt_f32_e64 s[2:3], 0, s1 ; D0020002 00000280
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[16:19] ; F0800F00 00880D0D
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[22:23], s[2:3] ; BE962402
s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925
v_mul_f32_e32 v13, v23, v23 ; 101A2F17
v_mac_f32_e32 v13, v24, v24 ; 3E1A3118
v_mac_f32_e32 v13, v25, v25 ; 3E1A3319
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926
s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928
s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929
s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A
v_mul_f32_e32 v15, v13, v23 ; 101E2F0D
v_mul_f32_e32 v16, v13, v24 ; 1020310D
v_mul_f32_e32 v13, v13, v25 ; 101A330D
v_rcp_f32_e32 v26, v15 ; 7E34550F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v27, s1, v21 ; 08362A01
v_sub_f32_e32 v28, s2, v17 ; 08382202
v_rcp_f32_e32 v29, v16 ; 7E3A5510
v_mul_f32_e32 v27, v26, v27 ; 1036371A
v_sub_f32_e32 v30, s13, v21 ; 083C2A0D
v_mul_f32_e32 v26, v26, v30 ; 10343D1A
v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80
v_cndmask_b32_e32 v26, v26, v27 ; 0034371A
v_rcp_f32_e32 v27, v13 ; 7E36550D
v_mul_f32_e32 v28, v29, v28 ; 1038391D
v_sub_f32_e32 v30, s14, v17 ; 083C220E
v_mul_f32_e32 v29, v29, v30 ; 103A3D1D
v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080
v_cndmask_b32_e32 v28, v29, v28 ; 0038391D
v_sub_f32_e32 v29, s3, v22 ; 083A2C03
v_mul_f32_e32 v29, v27, v29 ; 103A3B1B
v_sub_f32_e32 v30, s15, v22 ; 083C2C0F
v_mul_f32_e32 v27, v27, v30 ; 10363D1B
v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80
v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B
v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A
v_mov_b32_e32 v27, s13 ; 7E36020D
v_add_f32_e32 v27, s1, v27 ; 06363601
v_mov_b32_e32 v28, s14 ; 7E38020E
v_add_f32_e32 v28, s2, v28 ; 06383802
v_mov_b32_e32 v29, s15 ; 7E3A020F
v_add_f32_e32 v29, s3, v29 ; 063A3A03
v_mad_f32 v30, 0.5, v27, -s16 ; D282001E 804236F0
v_add_f32_e32 v30, v21, v30 ; 063C3D15
v_mac_f32_e32 v30, v26, v15 ; 3E3C1F1A
v_mad_f32 v15, 0.5, v28, -s17 ; D282000F 804638F0
v_add_f32_e32 v15, v17, v15 ; 061E1F11
v_mac_f32_e32 v15, v26, v16 ; 3E1E211A
v_mad_f32 v16, 0.5, v29, -s18 ; D2820010 804A3AF0
v_add_f32_e32 v16, v22, v16 ; 06202116
v_mac_f32_e32 v16, v26, v13 ; 3E201B1A
v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0
v_mad_f32 v27, 0.5, -v28, v15 ; D282001B 443E38F0
v_mad_f32 v28, 0.5, -v29, v16 ; D282001C 44423AF0
s_or_b64 exec, exec, s[22:23] ; 88FE167E
s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917
s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943
s_buffer_load_dword s13, s[8:11], 0x68 ; C2068968
s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900
s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901
s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902
s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904
s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905
s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906
s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907
s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908
s_buffer_load_dword s22, s[8:11], 0x9 ; C20B0909
s_buffer_load_dword s23, s[8:11], 0xa ; C20B890A
s_buffer_load_dword s24, s[8:11], 0xb ; C20C090B
s_buffer_load_dword s25, s[8:11], 0xc ; C20C890C
s_buffer_load_dword s26, s[8:11], 0xd ; C20D090D
s_buffer_load_dword s27, s[8:11], 0xe ; C20D890E
s_buffer_load_dword s28, s[8:11], 0xf ; C20E090F
v_sub_f32_e64 v13, 1.0, s0 ; D208000D 000000F2
v_log_f32_e32 v13, v13 ; 7E1A4F0D
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_mul_legacy_f32_e32 v13, 0x3f400000, v13 ; 0E1A1AFF 3F400000
v_exp_f32_e32 v13, v13 ; 7E1A4B0D
v_mul_f32_e32 v29, 0x40e00000, v13 ; 103A1AFF 40E00000
v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A
v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A
v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A
v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A
v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000
v_rcp_f32_e64 v13, |v32| ; D354010D 00000120
v_mad_f32 v27, v13, v30, v26 ; D282001B 046A3D0D
v_mac_f32_e32 v26, v13, v31 ; 3E343F0D
v_mov_b32_e32 v28, v33 ; 7E380321
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v13, v29 ; 7E1A4F1D
v_mul_legacy_f32_e32 v13, s30, v13 ; 0E1A1A1E
v_exp_f32_e32 v13, v13 ; 7E1A4B0D
v_mul_f32_e32 v13, s29, v13 ; 101A1A1D
v_mul_f32_e32 v16, v26, v13 ; 10201B1A
v_mul_f32_e32 v15, v27, v13 ; 101E1B1B
v_mul_f32_e32 v13, v28, v13 ; 101A1B1C
v_mov_b32_e32 v27, s21 ; 7E360215
v_mov_b32_e32 v26, 0x3f7fff58 ; 7E3402FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v26 ; 7C02340C
s_and_saveexec_b64 s[30:31], vcc ; BE9E246A
s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B
s_buffer_load_dword s21, s[8:11], 0x3c ; C20A893C
s_buffer_load_dword s29, s[8:11], 0x3d ; C20E893D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080
s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936
s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938
s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939
s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A
s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930
s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931
s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932
s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934
s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935
v_mul_f32_e32 v26, v23, v23 ; 10342F17
v_mac_f32_e32 v26, v24, v24 ; 3E343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v28, s34, v22 ; 08382C22
v_mov_b32_e32 v29, s34 ; 7E3A0222
v_sub_f32_e32 v30, s38, v21 ; 083C2A26
v_sub_f32_e32 v31, s39, v17 ; 083E2227
v_add_f32_e32 v29, s40, v29 ; 063A3A28
v_sub_f32_e32 v32, s40, v22 ; 08402C28
v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0
v_add_f32_e32 v22, v22, v33 ; 062C4316
v_mul_f32_e32 v23, v26, v23 ; 102E2F1A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_rcp_f32_e32 v26, v23 ; 7E345517
v_rcp_f32_e32 v33, v24 ; 7E425518
v_rcp_f32_e32 v34, v25 ; 7E445519
v_sub_f32_e32 v35, s41, v21 ; 08462A29
v_mov_b32_e32 v36, s41 ; 7E480229
v_add_f32_e32 v36, s38, v36 ; 06484826
v_mul_f32_e32 v30, v26, v30 ; 103C3D1A
v_mul_f32_e32 v26, v26, v35 ; 1034471A
v_mul_f32_e32 v31, v33, v31 ; 103E3F21
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v28, v34, v28 ; 10383922
v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0
v_add_f32_e32 v21, v21, v34 ; 062A4515
v_sub_f32_e32 v34, s42, v17 ; 0844222A
v_mov_b32_e32 v35, s42 ; 7E46022A
v_mul_f32_e32 v33, v33, v34 ; 10424521
v_add_f32_e32 v34, s39, v35 ; 06444627
v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80
v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A
v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080
v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21
v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280
v_cndmask_b32_e32 v28, v28, v32 ; 0038411C
v_min3_f32 v26, v26, v30, v28 ; D2A2001A 04723D1A
v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0
v_add_f32_e32 v17, v17, v28 ; 06223911
v_mac_f32_e32 v21, v26, v23 ; 3E2A2F1A
v_mac_f32_e32 v17, v26, v24 ; 3E22311A
v_mac_f32_e32 v22, v26, v25 ; 3E2C331A
v_mad_f32 v23, 0.5, -v36, v21 ; D2820017 445648F0
v_mad_f32 v24, 0.5, -v34, v17 ; D2820018 444644F0
v_mad_f32 v25, 0.5, -v29, v22 ; D2820019 445A3AF0
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2
v_log_f32_e32 v17, v17 ; 7E224F11
s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504
v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v26, 0x40e00000, v17 ; 103422FF 40E00000
v_cubeid_f32 v31, v23, v24, v25 ; D288001F 04663117
v_cubema_f32 v30, v23, v24, v25 ; D28E001E 04663117
s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708
v_cubesc_f32 v29, v23, v24, v25 ; D28A001D 04663117
v_cubetc_f32 v28, v23, v24, v25 ; D28C001C 04663117
v_rcp_f32_e64 v17, |v30| ; D3540111 0000011E
v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000
v_mad_f32 v24, v17, v28, v23 ; D2820018 045E3911
v_mac_f32_e32 v23, v17, v29 ; 3E2E3B11
v_mov_b32_e32 v25, v31 ; 7E32031F
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091517
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v17, v24 ; 7E224F18
v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2
v_mul_legacy_f32_e32 v17, s29, v17 ; 0E22221D
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v17, s21, v17 ; 10222215
v_mul_f32_e32 v21, v21, v17 ; 102A2315
v_mul_f32_e32 v22, v22, v17 ; 102C2316
v_mul_f32_e32 v17, v23, v17 ; 10222317
v_mul_f32_e32 v21, v21, v24 ; 102A3115
v_mul_f32_e32 v22, v22, v24 ; 102C3116
v_mul_f32_e32 v17, v17, v24 ; 10223111
v_mac_f32_e32 v21, s12, v16 ; 3E2A200C
v_mac_f32_e32 v22, s12, v15 ; 3E2C1E0C
v_mac_f32_e32 v17, s12, v13 ; 3E221A0C
v_mov_b32_e32 v13, v17 ; 7E1A0311
v_mov_b32_e32 v15, v22 ; 7E1E0316
v_mov_b32_e32 v16, v21 ; 7E200315
s_or_b64 exec, exec, s[30:31] ; 88FE1E7E
v_mad_f32 v22, -v27, s15, s15 ; D2820016 203C1F1B
v_mov_b32_e32 v17, s14 ; 7E22020E
v_mul_f32_e32 v21, v22, v18 ; 102A2516
v_mul_f32_e32 v19, v22, v19 ; 10262716
v_mul_f32_e32 v18, v22, v20 ; 10242916
v_mul_f32_e32 v20, s17, v5 ; 10280A11
v_mac_f32_e32 v20, s16, v6 ; 3E280C10
v_mac_f32_e32 v20, s18, v4 ; 3E280812
v_add_f32_e32 v20, s20, v20 ; 06282814
v_add_f32_e32 v23, v20, v8 ; 062E1114
v_mul_f32_e32 v8, s22, v5 ; 10100A16
v_mac_f32_e32 v8, s19, v6 ; 3E100C13
v_mac_f32_e32 v8, s23, v4 ; 3E100817
v_add_f32_e32 v8, s24, v8 ; 06101018
v_add_f32_e32 v9, v8, v9 ; 06121308
v_mul_f32_e32 v8, s26, v5 ; 10100A1A
v_mac_f32_e32 v8, s25, v6 ; 3E100C19
v_mac_f32_e32 v8, s27, v4 ; 3E10081B
v_add_f32_e32 v8, s28, v8 ; 0610101C
v_add_f32_e32 v10, v8, v10 ; 06141508
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916
s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944
s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945
s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946
s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948
s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_sub_f32_e64 v20, 1.0, s13 ; D2080014 00001AF2
v_mac_f32_e32 v20, s13, v14 ; 3E281C0D
v_mul_f32_e32 v8, s1, v6 ; 10100C01
v_mac_f32_e32 v8, s2, v5 ; 3E100A02
v_mac_f32_e32 v8, s3, v4 ; 3E100803
v_max_f32_e32 v8, 0, v8 ; 20101080
v_mul_f32_e32 v14, v20, v23 ; 101C2F14
v_mul_f32_e32 v9, v20, v9 ; 10121314
v_mul_f32_e32 v10, v20, v10 ; 10141514
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v17, s17, v3 ; 3E220611
v_mul_f32_e32 v3, v20, v16 ; 10062114
v_mul_f32_e32 v15, v20, v15 ; 101E1F14
v_mul_f32_e32 v13, v20, v13 ; 101A1B14
v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2
v_add_f32_e32 v16, s0, v16 ; 06202000
v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2
v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080
v_sub_f32_e32 v22, s1, v12 ; 082C1801
v_sub_f32_e32 v23, s2, v11 ; 082E1602
v_mul_f32_e32 v24, v22, v22 ; 10302D16
v_mac_f32_e32 v24, v23, v23 ; 3E302F17
v_sub_f32_e32 v25, s3, v7 ; 08320E03
v_mac_f32_e32 v24, v25, v25 ; 3E303319
v_rsq_clamp_f32_e32 v24, v24 ; 7E305918
v_mul_f32_e32 v22, v24, v22 ; 102C2D18
v_mul_f32_e32 v23, v24, v23 ; 102E2F18
v_mul_f32_e32 v24, v24, v25 ; 10303318
v_mul_f32_e32 v12, v12, v6 ; 10180D0C
v_mad_f32 v11, -v11, v5, -v12 ; D282000B A4320B0B
v_mul_f32_e32 v6, v22, v6 ; 100C0D16
v_mac_f32_e32 v6, v23, v5 ; 3E0C0B17
v_mul_f32_e32 v5, s1, v22 ; 100A2C01
v_mac_f32_e32 v5, s2, v23 ; 3E0A2E02
v_mad_f32 v7, -v7, v4, v11 ; D2820007 242E0907
v_mac_f32_e32 v5, s3, v24 ; 3E0A3003
v_mac_f32_e32 v6, v24, v4 ; 3E0C0918
v_max_f32_e32 v4, 0, v5 ; 20080A80
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
v_mul_f32_e32 v11, v5, v5 ; 10160B05
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_sub_f32_e32 v11, 1.0, v7 ; 08160EF2
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mul_f32_e32 v22, v11, v12 ; 102C190B
v_mad_f32 v23, -v12, v22, 1.0 ; D2820017 23CA2D0C
v_mul_f32_e32 v24, v2, v23 ; 10302F02
v_sub_f32_e32 v25, 1.0, v2 ; 083204F2
v_mac_f32_e32 v2, v5, v25 ; 3E043305
v_mul_f32_e32 v25, v1, v23 ; 10322F01
v_sub_f32_e32 v26, 1.0, v1 ; 083402F2
v_mac_f32_e32 v1, v5, v26 ; 3E023505
v_mul_f32_e32 v23, v0, v23 ; 102E2F00
v_sub_f32_e32 v26, 1.0, v0 ; 083400F2
v_mac_f32_e32 v0, v5, v26 ; 3E003505
v_sub_f32_e32 v5, 1.0, v20 ; 080A28F2
v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F
v_madmk_f32_e32 v5, v5, v26, 0x3f77ced9 ; 400A3505 3F77CED9
v_add_f32_e32 v26, v4, v4 ; 06340904
v_mul_f32_e32 v4, v20, v4 ; 10080914
v_mad_f32 v4, v26, v4, 0.5 ; D2820004 03C2091A
v_mul_f32_e32 v12, v22, v12 ; 10181916
v_mac_f32_e32 v24, v16, v12 ; 3E301910
v_mac_f32_e32 v25, v16, v12 ; 3E321910
v_mac_f32_e32 v23, v16, v12 ; 3E2E1910
v_mul_f32_e32 v16, v20, v20 ; 10202914
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_mul_f32_e32 v16, s8, v16 ; 10202008
v_mul_f32_e32 v11, v16, v11 ; 10161710
v_mac_f32_e32 v11, 1.0, v7 ; 3E160EF2
v_rcp_f32_e32 v5, v5 ; 7E0A5505
v_sub_f32_e32 v7, 1.0, v8 ; 080E10F2
v_mul_f32_e32 v16, v16, v7 ; 10200F10
v_mac_f32_e32 v16, 1.0, v8 ; 3E2010F2
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_log_f32_e32 v6, v6 ; 7E0C4F06
v_madak_f32_e32 v11, v16, v11, 0x38d1b717 ; 42161710 38D1B717
v_mul_f32_e32 v5, 0x41200000, v5 ; 100A0AFF 41200000
v_mul_f32_e32 v16, v5, v5 ; 10200B05
v_mul_legacy_f32_e32 v6, v16, v6 ; 0E0C0D10
v_rcp_f32_e32 v11, v11 ; 7E16550B
v_mad_f32 v5, v5, v5, 1.0 ; D2820005 03CA0B05
v_mul_f32_e32 v5, s16, v5 ; 100A0A10
v_exp_f32_e32 v6, v6 ; 7E0C4B06
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mul_f32_e32 v5, v8, v5 ; 100A0B08
v_mul_f32_e32 v5, s15, v5 ; 100A0A0F
v_mul_f32_e32 v6, v7, v7 ; 100C0F07
v_mul_f32_e32 v7, v7, v6 ; 100E0D07
v_mul_f32_e32 v6, v7, v6 ; 100C0D07
v_add_f32_e32 v4, -1.0, v4 ; 060808F3
v_mad_f32 v6, v4, v6, 1.0 ; D2820006 03CA0D04
v_mad_f32 v4, v4, v12, 1.0 ; D2820004 03CA1904
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v4, v8, v4 ; 10080908
v_mac_f32_e32 v14, s14, v4 ; 3E1C080E
v_mul_f32_e32 v6, v14, v21 ; 100C2B0E
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_mul_f32_e32 v7, s14, v5 ; 100E0A0E
v_mac_f32_e32 v6, v2, v7 ; 3E0C0F02
v_mac_f32_e32 v9, s7, v4 ; 3E120807
v_mac_f32_e32 v10, s12, v4 ; 3E14080C
v_mul_f32_e32 v2, s7, v5 ; 10040A07
v_mul_f32_e32 v4, s12, v5 ; 10080A0C
v_mul_f32_e32 v5, v9, v19 ; 100A2709
v_mul_f32_e32 v7, v10, v18 ; 100E250A
v_mac_f32_e32 v5, v1, v2 ; 3E0A0501
v_mac_f32_e32 v7, v0, v4 ; 3E0E0900
v_mac_f32_e32 v6, v24, v3 ; 3E0C0718
v_mac_f32_e32 v5, v25, v15 ; 3E0A1F19
v_mac_f32_e32 v7, v23, v13 ; 3E0E1B17
v_add_f32_e64 v0, 0, v17 clamp ; D2060800 00022280
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s6, v1 ; 10040206
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v3, s5, v1 ; 10060205
v_mac_f32_e32 v3, v5, v0 ; 3E060105
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v7, v0 ; 3E020107
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 40
Code Size: 2272 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL CONST[0..19]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[16], IN[0].xxxx
5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[9].xxxx
18: MOV TEMP[3].y, CONST[10].xxxx
19: MOV TEMP[3].z, CONST[11].xxxx
20: MOV TEMP[4].x, CONST[9].yyyy
21: MOV TEMP[4].y, CONST[10].yyyy
22: MOV TEMP[4].z, CONST[11].yyyy
23: MOV TEMP[5].x, CONST[9].zzzz
24: MOV TEMP[5].y, CONST[10].zzzz
25: MOV TEMP[5].z, CONST[11].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MOV TEMP[4].xyz, TEMP[3].xyzx
33: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].yzzx
34: DP4 TEMP[6].x, CONST[1], TEMP[5]
35: DP4 TEMP[7].x, CONST[2], TEMP[5]
36: MOV TEMP[6].y, TEMP[7].xxxx
37: DP4 TEMP[5].x, CONST[3], TEMP[5]
38: MOV TEMP[6].z, TEMP[5].xxxx
39: MUL TEMP[5].x, TEMP[3].yyyy, TEMP[3].yyyy
40: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[5].xxxx
41: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
42: ADD TEMP[5].xyz, TEMP[0].xyzz, -CONST[0].xyzz
43: MOV TEMP[5].yzw, TEMP[5].yxyz
44: MOV TEMP[5].x, TEMP[1].zzzz
45: MOV TEMP[0].xyz, TEMP[0].xyzx
46: MOV OUT[5], TEMP[0]
47: MOV OUT[1], TEMP[2]
48: MOV OUT[2], TEMP[4]
49: MOV OUT[3], TEMP[3]
50: MOV OUT[0], TEMP[1]
51: MOV OUT[4], TEMP[5]
52: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0
%90 = add i32 %5, %7
%91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = extractelement <4 x float> %91, i32 2
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0
%103 = add i32 %5, %7
%104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103)
%105 = extractelement <4 x float> %104, i32 0
%106 = extractelement <4 x float> %104, i32 1
%107 = fmul float %31, %84
%108 = fmul float %32, %84
%109 = fmul float %33, %84
%110 = fmul float %34, %84
%111 = fmul float %35, %85
%112 = fadd float %111, %107
%113 = fmul float %36, %85
%114 = fadd float %113, %108
%115 = fmul float %37, %85
%116 = fadd float %115, %109
%117 = fmul float %38, %85
%118 = fadd float %117, %110
%119 = fmul float %39, %86
%120 = fadd float %119, %112
%121 = fmul float %40, %86
%122 = fadd float %121, %114
%123 = fmul float %41, %86
%124 = fadd float %123, %116
%125 = fmul float %42, %86
%126 = fadd float %125, %118
%127 = fmul float %43, %87
%128 = fadd float %127, %120
%129 = fmul float %44, %87
%130 = fadd float %129, %122
%131 = fmul float %45, %87
%132 = fadd float %131, %124
%133 = fmul float %64, %84
%134 = fmul float %65, %84
%135 = fmul float %66, %84
%136 = fmul float %67, %84
%137 = fmul float %68, %85
%138 = fadd float %137, %133
%139 = fmul float %69, %85
%140 = fadd float %139, %134
%141 = fmul float %70, %85
%142 = fadd float %141, %135
%143 = fmul float %71, %85
%144 = fadd float %143, %136
%145 = fmul float %72, %86
%146 = fadd float %145, %138
%147 = fmul float %73, %86
%148 = fadd float %147, %140
%149 = fmul float %74, %86
%150 = fadd float %149, %142
%151 = fmul float %75, %86
%152 = fadd float %151, %144
%153 = fmul float %76, %87
%154 = fadd float %153, %146
%155 = fmul float %77, %87
%156 = fadd float %155, %148
%157 = fmul float %78, %87
%158 = fadd float %157, %150
%159 = fmul float %79, %87
%160 = fadd float %159, %152
%161 = fmul float %99, %55
%162 = fadd float %161, %57
%163 = fmul float %100, %56
%164 = fadd float %163, %58
%165 = fcmp oeq float %63, 0.000000e+00
%. = select i1 %165, float %99, float %105
%.32 = select i1 %165, float %100, float %106
%166 = fmul float %., %59
%167 = fadd float %166, %61
%168 = fmul float %.32, %60
%169 = fadd float %168, %62
%170 = fmul float %46, %92
%171 = fmul float %49, %92
%172 = fmul float %52, %92
%173 = fmul float %47, %93
%174 = fadd float %173, %170
%175 = fmul float %50, %93
%176 = fadd float %175, %171
%177 = fmul float %53, %93
%178 = fadd float %177, %172
%179 = fmul float %48, %94
%180 = fadd float %179, %174
%181 = fmul float %51, %94
%182 = fadd float %181, %176
%183 = fmul float %54, %94
%184 = fadd float %183, %178
%185 = fmul float %180, %180
%186 = fmul float %182, %182
%187 = fadd float %186, %185
%188 = fmul float %184, %184
%189 = fadd float %187, %188
%190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189)
%191 = fmul float %180, %190
%192 = fmul float %182, %190
%193 = fmul float %184, %190
%194 = fmul float %191, %192
%195 = fmul float %192, %193
%196 = fmul float %193, %193
%197 = fmul float %193, %191
%198 = fmul float %16, %194
%199 = fmul float %17, %195
%200 = fadd float %198, %199
%201 = fmul float %18, %196
%202 = fadd float %200, %201
%203 = fmul float %19, %197
%204 = fadd float %202, %203
%205 = fmul float %20, %194
%206 = fmul float %21, %195
%207 = fadd float %205, %206
%208 = fmul float %22, %196
%209 = fadd float %207, %208
%210 = fmul float %23, %197
%211 = fadd float %209, %210
%212 = fmul float %24, %194
%213 = fmul float %25, %195
%214 = fadd float %212, %213
%215 = fmul float %26, %196
%216 = fadd float %214, %215
%217 = fmul float %27, %197
%218 = fadd float %216, %217
%219 = fmul float %192, %192
%220 = fmul float %191, %191
%221 = fsub float %220, %219
%222 = fmul float %28, %221
%223 = fadd float %222, %204
%224 = fmul float %29, %221
%225 = fadd float %224, %211
%226 = fmul float %30, %221
%227 = fadd float %226, %218
%228 = fsub float %128, %13
%229 = fsub float %130, %14
%230 = fsub float %132, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %162, float %164, float %167, float %169)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %192, float %193, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %223, float %225, float %227, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %158, float %228, float %229, float %230)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %128, float %130, float %132, float %126)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %154, float %156, float %158, float %160)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s18, s[20:23], 0x20 ; C2091520
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s19, s[20:23], 0x21 ; C2099521
s_buffer_load_dword s24, s[20:23], 0x22 ; C20C1522
s_buffer_load_dword s25, s[20:23], 0x24 ; C20C9524
s_buffer_load_dword s26, s[20:23], 0x25 ; C20D1525
s_buffer_load_dword s27, s[20:23], 0x26 ; C20D9526
s_buffer_load_dword s28, s[20:23], 0x28 ; C20E1528
s_buffer_load_dword s29, s[20:23], 0x29 ; C20E9529
s_buffer_load_dword s30, s[20:23], 0x2a ; C20F152A
s_buffer_load_dword s31, s[20:23], 0x2c ; C20F952C
s_buffer_load_dword s32, s[20:23], 0x2d ; C210152D
s_buffer_load_dword s33, s[20:23], 0x2e ; C210952E
s_buffer_load_dword s34, s[20:23], 0x34 ; C2111534
s_buffer_load_dword s35, s[20:23], 0x35 ; C2119535
s_buffer_load_dword s5, s[20:23], 0x36 ; C2029536
s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500
s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501
s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502
s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504
s_buffer_load_dword s14, s[20:23], 0x5 ; C2071505
s_buffer_load_dword s4, s[20:23], 0x6 ; C2021506
s_buffer_load_dword s3, s[20:23], 0x7 ; C2019507
s_buffer_load_dword s12, s[20:23], 0x8 ; C2061508
s_buffer_load_dword s16, s[20:23], 0x9 ; C2081509
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s5 ; 7E000205
s_buffer_load_dword s7, s[20:23], 0xa ; C203950A
s_buffer_load_dword s5, s[20:23], 0xb ; C202950B
s_buffer_load_dword s15, s[20:23], 0xc ; C207950C
s_buffer_load_dword s17, s[20:23], 0xd ; C208950D
s_buffer_load_dword s13, s[20:23], 0xe ; C206950E
s_buffer_load_dword s8, s[20:23], 0xf ; C204150F
s_buffer_load_dword s36, s[20:23], 0x3c ; C212153C
s_buffer_load_dword s37, s[20:23], 0x40 ; C2129540
s_buffer_load_dword s38, s[20:23], 0x41 ; C2131541
s_buffer_load_dword s39, s[20:23], 0x42 ; C2139542
s_buffer_load_dword s40, s[20:23], 0x43 ; C2141543
s_buffer_load_dword s9, s[20:23], 0x10 ; C2049510
s_buffer_load_dword s10, s[20:23], 0x11 ; C2051511
s_buffer_load_dword s11, s[20:23], 0x12 ; C2059512
s_buffer_load_dword s41, s[20:23], 0x14 ; C2149514
s_buffer_load_dword s42, s[20:23], 0x15 ; C2151515
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s36 ; D004006A 00004880
s_buffer_load_dword s36, s[20:23], 0x16 ; C2121516
s_buffer_load_dword s43, s[20:23], 0x17 ; C2159517
s_buffer_load_dword s44, s[20:23], 0x18 ; C2161518
s_buffer_load_dword s45, s[20:23], 0x19 ; C2169519
s_buffer_load_dword s46, s[20:23], 0x1a ; C217151A
s_buffer_load_dword s47, s[20:23], 0x37 ; C2179537
s_buffer_load_dword s48, s[20:23], 0x38 ; C2181538
s_buffer_load_dword s49, s[20:23], 0x39 ; C2189539
s_buffer_load_dword s50, s[20:23], 0x3a ; C219153A
s_buffer_load_dword s51, s[20:23], 0x3b ; C219953B
s_buffer_load_dword s52, s[20:23], 0x1b ; C21A151B
s_buffer_load_dword s53, s[20:23], 0x1c ; C21A951C
s_buffer_load_dword s54, s[20:23], 0x1d ; C21B151D
s_buffer_load_dword s55, s[20:23], 0x1e ; C21B951E
s_buffer_load_dword s56, s[20:23], 0x1f ; C21C151F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s47 ; 7E1A022F
s_buffer_load_dword s47, s[20:23], 0x44 ; C2179544
s_buffer_load_dword s57, s[20:23], 0x45 ; C21C9545
s_buffer_load_dword s58, s[20:23], 0x46 ; C21D1546
v_mov_b32_e32 v14, s50 ; 7E1C0232
s_buffer_load_dword s50, s[20:23], 0x47 ; C2191547
v_mov_b32_e32 v15, s51 ; 7E1E0233
s_buffer_load_dword s51, s[20:23], 0x48 ; C2199548
s_buffer_load_dword s59, s[20:23], 0x49 ; C21D9549
s_buffer_load_dword s60, s[20:23], 0x4a ; C21E154A
s_buffer_load_dword s61, s[20:23], 0x4b ; C21E954B
s_buffer_load_dword s62, s[20:23], 0x4c ; C21F154C
s_buffer_load_dword s63, s[20:23], 0x4d ; C21F954D
s_buffer_load_dword s64, s[20:23], 0x4e ; C220154E
s_buffer_load_dword s20, s[20:23], 0x4f ; C20A154F
v_mul_f32_e32 v16, s41, v2 ; 10200429
v_mac_f32_e32 v0, s34, v9 ; 3E001222
v_mul_f32_e32 v17, s42, v2 ; 1022042A
v_mul_f32_e32 v18, s36, v2 ; 10240424
v_mul_f32_e32 v19, s43, v2 ; 1026042B
v_mac_f32_e32 v13, s35, v10 ; 3E1A1423
v_mul_f32_e32 v20, s25, v6 ; 10280C19
v_mul_f32_e32 v21, s28, v6 ; 102A0C1C
v_mul_f32_e32 v6, s31, v6 ; 100C0C1F
v_mac_f32_e32 v16, s44, v3 ; 3E20062C
v_mac_f32_e32 v17, s45, v3 ; 3E22062D
v_mac_f32_e32 v18, s46, v3 ; 3E24062E
v_mac_f32_e32 v20, s26, v7 ; 3E280E1A
v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mac_f32_e32 v6, s32, v7 ; 3E0C0E20
v_mac_f32_e32 v20, s27, v8 ; 3E28101B
v_mac_f32_e32 v21, s30, v8 ; 3E2A101E
v_mac_f32_e32 v6, s33, v8 ; 3E0C1021
v_mac_f32_e32 v19, s52, v3 ; 3E260634
v_mul_f32_e32 v7, s37, v2 ; 100E0425
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v7, s47, v3 ; 3E0E062F
v_mul_f32_e32 v8, s38, v2 ; 10100426
v_mac_f32_e32 v8, s57, v3 ; 3E100639
v_mul_f32_e32 v11, s39, v2 ; 10160427
v_mac_f32_e32 v11, s58, v3 ; 3E16063A
v_mul_f32_e32 v2, s40, v2 ; 10040428
v_mac_f32_e32 v2, s50, v3 ; 3E040632
v_mac_f32_e32 v16, s53, v4 ; 3E200835
v_mac_f32_e32 v17, s54, v4 ; 3E220836
v_mac_f32_e32 v18, s55, v4 ; 3E240837
v_mac_f32_e32 v19, s56, v4 ; 3E260838
v_mac_f32_e32 v7, s51, v4 ; 3E0E0833
v_mac_f32_e32 v8, s59, v4 ; 3E10083B
v_mac_f32_e32 v11, s60, v4 ; 3E16083C
v_mac_f32_e32 v2, s61, v4 ; 3E04083D
v_mac_f32_e32 v16, s18, v5 ; 3E200A12
v_mac_f32_e32 v17, s19, v5 ; 3E220A13
v_mac_f32_e32 v18, s24, v5 ; 3E240A18
v_mac_f32_e32 v7, s62, v5 ; 3E0E0A3E
v_mac_f32_e32 v8, s63, v5 ; 3E100A3F
v_mac_f32_e32 v11, s64, v5 ; 3E160A40
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mul_f32_e32 v3, v20, v20 ; 10062914
v_mac_f32_e32 v3, v21, v21 ; 3E062B15
v_mac_f32_e32 v3, v6, v6 ; 3E060D06
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mac_f32_e32 v14, s48, v9 ; 3E1C1230
v_mac_f32_e32 v15, s49, v10 ; 3E1E1431
exp 15, 32, 0, 0, 0, v0, v13, v14, v15 ; F800020F 0F0E0D00
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v3, v20 ; 10002903
v_mul_f32_e32 v4, v3, v21 ; 10082B03
v_mul_f32_e32 v3, v3, v6 ; 10060D03
v_mul_f32_e32 v5, v3, v4 ; 100A0903
v_mul_f32_e32 v6, s14, v5 ; 100C0A0E
v_mul_f32_e32 v9, s16, v5 ; 10120A10
v_mul_f32_e32 v5, s17, v5 ; 100A0A11
v_mul_f32_e32 v10, v4, v0 ; 10140104
v_mac_f32_e32 v6, s6, v10 ; 3E0C1406
v_mac_f32_e32 v9, s12, v10 ; 3E12140C
v_mac_f32_e32 v5, s15, v10 ; 3E0A140F
v_mul_f32_e32 v10, v3, v3 ; 10140703
v_mac_f32_e32 v6, s4, v10 ; 3E0C1404
v_mac_f32_e32 v9, s7, v10 ; 3E121407
v_mac_f32_e32 v5, s13, v10 ; 3E0A140D
v_mul_f32_e32 v10, v0, v3 ; 10140700
v_mac_f32_e32 v6, s3, v10 ; 3E0C1403
v_mac_f32_e32 v9, s5, v10 ; 3E121405
v_mac_f32_e32 v5, s8, v10 ; 3E0A1408
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mad_f32 v10, v0, v0, -v10 ; D282000A 842A0100
v_mac_f32_e32 v6, s9, v10 ; 3E0C1409
v_mac_f32_e32 v9, s10, v10 ; 3E12140A
v_mac_f32_e32 v5, s11, v10 ; 3E0A140B
v_subrev_f32_e32 v10, s0, v16 ; 0A142000
v_subrev_f32_e32 v12, s1, v17 ; 0A182201
v_subrev_f32_e32 v13, s2, v18 ; 0A1A2402
exp 15, 33, 0, 0, 0, v0, v4, v3, v1 ; F800021F 01030400
exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906
exp 15, 35, 0, 0, 0, v11, v10, v12, v13 ; F800023F 0D0C0A0B
exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110
exp 15, 12, 0, 1, 0, v7, v8, v11, v2 ; F80008CF 020B0807
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 24
Code Size: 748 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..20]
DCL CONST[23..24]
DCL CONST[26]
DCL TEMP[0..17], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000}
IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000}
IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000}
0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[3].yzww, IN[3].yzww
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[3].yzww, TEMP[1].xxxx
6: MOV TEMP[2].xy, IN[0].xyyy
7: TEX TEMP[2].x, TEMP[2], SAMP[3], 2D
8: MOV TEMP[3].xyz, IMM[0].xxxx
9: FSLT TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
10: UIF TEMP[2].xxxx :0
11: MUL TEMP[2].xyz, CONST[20].xyzz, CONST[19].xyzz
12: MOV TEMP[4].xy, IN[0].xyyy
13: TEX TEMP[4].xyz, TEMP[4], SAMP[2], 2D
14: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xyzz
15: ELSE :0
16: MOV TEMP[2].xy, IN[0].xyyy
17: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
18: MUL TEMP[3].xyz, CONST[19].xyzz, TEMP[2].xyzz
19: ENDIF
20: LRP TEMP[2].xyz, CONST[23].xxxx, TEMP[3].xyzz, CONST[16].xyzz
21: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww
22: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
23: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
24: MOV TEMP[5].xy, IN[0].xyyy
25: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D
26: ADD TEMP[6].x, IMM[0].xxxx, -CONST[26].xxxx
27: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx
28: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
29: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
30: MOV TEMP[7].xyz, IMM[0].yyyy
31: MOV TEMP[8].w, IMM[0].xxxx
32: MOV TEMP[8].xyz, TEMP[0].xyzx
33: DP4 TEMP[9].x, CONST[1], TEMP[8]
34: DP4 TEMP[10].x, CONST[2], TEMP[8]
35: MOV TEMP[9].y, TEMP[10].xxxx
36: DP4 TEMP[8].x, CONST[3], TEMP[8]
37: MOV TEMP[9].z, TEMP[8].xxxx
38: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz
39: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
40: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz
41: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz
42: MUL TEMP[9].xyz, IMM[0].zzzz, TEMP[9].xyzz
43: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz
44: MOV TEMP[10].xyz, TEMP[9].xyzx
45: FSLT TEMP[11].x, IMM[0].yyyy, CONST[10].wwww
46: UIF TEMP[11].xxxx :0
47: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz
48: RSQ TEMP[11].x, TEMP[11].xxxx
49: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx
50: MOV TEMP[12].xyz, -IN[4].xyzx
51: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz
52: RCP TEMP[14].x, TEMP[11].xxxx
53: RCP TEMP[14].y, TEMP[11].yyyy
54: RCP TEMP[14].z, TEMP[11].zzzz
55: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
56: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz
57: RCP TEMP[14].x, TEMP[11].xxxx
58: RCP TEMP[14].y, TEMP[11].yyyy
59: RCP TEMP[14].z, TEMP[11].zzzz
60: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
61: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[11].xyzz
62: UIF TEMP[14].xxxx :0
63: MOV TEMP[15].x, TEMP[13].xxxx
64: ELSE :0
65: MOV TEMP[15].x, TEMP[12].xxxx
66: ENDIF
67: UIF TEMP[14].yyyy :0
68: MOV TEMP[16].x, TEMP[13].yyyy
69: ELSE :0
70: MOV TEMP[16].x, TEMP[12].yyyy
71: ENDIF
72: UIF TEMP[14].zzzz :0
73: MOV TEMP[13].x, TEMP[13].zzzz
74: ELSE :0
75: MOV TEMP[13].x, TEMP[12].zzzz
76: ENDIF
77: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz
78: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww
79: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
80: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
81: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz
82: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz
83: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
84: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz
85: ENDIF
86: ADD TEMP[11].x, IMM[0].xxxx, -CONST[24].xxxx
87: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx
88: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy
89: MOV TEMP[10].xyz, TEMP[10].xyzz
90: MOV TEMP[10].w, TEMP[11].xxxx
91: TXL TEMP[10], TEMP[10], SAMP[0], CUBE
92: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy
93: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx
94: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz
95: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].zzzz
96: UIF TEMP[11].xxxx :0
97: MOV TEMP[11].xyz, TEMP[9].xyzx
98: FSLT TEMP[12].x, IMM[0].yyyy, CONST[14].wwww
99: UIF TEMP[12].xxxx :0
100: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz
101: RSQ TEMP[12].x, TEMP[12].xxxx
102: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx
103: MOV TEMP[12].xyz, -IN[4].xyzx
104: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz
105: RCP TEMP[14].x, TEMP[9].xxxx
106: RCP TEMP[14].y, TEMP[9].yyyy
107: RCP TEMP[14].z, TEMP[9].zzzz
108: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
109: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz
110: RCP TEMP[14].x, TEMP[9].xxxx
111: RCP TEMP[14].y, TEMP[9].yyyy
112: RCP TEMP[14].z, TEMP[9].zzzz
113: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
114: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[9].xyzz
115: UIF TEMP[14].xxxx :0
116: MOV TEMP[15].x, TEMP[13].xxxx
117: ELSE :0
118: MOV TEMP[15].x, TEMP[12].xxxx
119: ENDIF
120: UIF TEMP[14].yyyy :0
121: MOV TEMP[16].x, TEMP[13].yyyy
122: ELSE :0
123: MOV TEMP[16].x, TEMP[12].yyyy
124: ENDIF
125: UIF TEMP[14].zzzz :0
126: MOV TEMP[13].x, TEMP[13].zzzz
127: ELSE :0
128: MOV TEMP[13].x, TEMP[12].zzzz
129: ENDIF
130: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz
131: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww
132: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
133: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
134: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz
135: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz
136: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
137: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz
138: ENDIF
139: ADD TEMP[9].x, IMM[0].xxxx, -CONST[24].xxxx
140: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx
141: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy
142: MOV TEMP[11].xyz, TEMP[11].xyzz
143: MOV TEMP[11].w, TEMP[9].xxxx
144: TXL TEMP[9], TEMP[11], SAMP[1], CUBE
145: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy
146: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx
147: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz
148: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz
149: ELSE :0
150: MOV TEMP[7].xyz, TEMP[10].xyzx
151: ENDIF
152: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
153: MOV TEMP[1].xyz, -TEMP[1].xyzx
154: ADD TEMP[5].x, IMM[0].xxxx, -CONST[24].xxxx
155: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz
156: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz
157: RSQ TEMP[10].x, TEMP[10].xxxx
158: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx
159: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
160: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
161: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz
162: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx
163: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx
164: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww
165: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx
166: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx, IMM[2].yyyy
167: LG2 TEMP[12].x, TEMP[12].xxxx
168: RCP TEMP[12].x, TEMP[12].xxxx
169: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx
170: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx
171: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx
172: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx
173: MUL TEMP[15].x, IMM[0].zzzz, TEMP[10].xxxx
174: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
175: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[0].wwww
176: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx
177: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
178: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
179: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx
180: MOV_SAT TEMP[4].x, TEMP[4].xxxx
181: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx
182: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx
183: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx
184: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx
185: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[2].xyzz
186: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx
187: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx
188: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].zzzz
189: RCP TEMP[1].x, TEMP[1].xxxx
190: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz
191: MAX TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx
192: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
193: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx
194: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy
195: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
196: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx
197: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
198: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
199: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
200: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz
201: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[2].xyzz
202: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx
203: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx
204: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx
205: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx
206: MAD TEMP[2].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[2].xyzz
207: ADD TEMP[9].x, TEMP[5].xxxx, IMM[2].wwww
208: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx
209: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx
210: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx
211: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
212: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx
213: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww
214: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx
215: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
216: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx
217: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
218: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx
219: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx
220: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
221: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
222: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz
223: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz, TEMP[3].xyzz
224: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
225: MOV TEMP[0].xyz, TEMP[0].xyzx
226: MAD TEMP[1].x, IN[3].xxxx, CONST[5].zzzz, CONST[5].wwww
227: MOV_SAT TEMP[1].x, TEMP[1].xxxx
228: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
229: MOV TEMP[0].xyz, TEMP[0].xyzx
230: MOV TEMP[0].w, IMM[0].xxxx
231: MOV OUT[0], TEMP[0]
232: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0
%87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)*
%91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0
%92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)*
%94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0
%95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0
%97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%98 = load <4 x i32>, <4 x i32> addrspace(2)* %97, align 16, !tbaa !0
%99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)*
%101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0
%102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)*
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)*
%107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0
%108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)*
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%112 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%126 = fmul float %113, %113
%127 = fmul float %114, %114
%128 = fadd float %127, %126
%129 = fmul float %115, %115
%130 = fadd float %128, %129
%131 = call float @llvm.AMDGPU.rsq.clamped.f32(float %130)
%132 = fmul float %113, %131
%133 = fmul float %114, %131
%134 = fmul float %115, %131
%135 = fmul float %120, %120
%136 = fmul float %121, %121
%137 = fadd float %136, %135
%138 = fmul float %122, %122
%139 = fadd float %137, %138
%140 = call float @llvm.AMDGPU.rsq.clamped.f32(float %139)
%141 = fmul float %120, %140
%142 = fmul float %121, %140
%143 = fmul float %122, %140
%144 = bitcast float %111 to i32
%145 = bitcast float %112 to i32
%146 = insertelement <2 x i32> undef, i32 %144, i32 0
%147 = insertelement <2 x i32> %146, i32 %145, i32 1
%148 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %147, <32 x i8> %101, <16 x i8> %104, i32 2)
%149 = extractelement <4 x float> %148, i32 0
%150 = fcmp ogt float %149, 0.000000e+00
br i1 %150, label %IF, label %ELSE
IF: ; preds = %main_body
%151 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328)
%152 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324)
%153 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%154 = fmul float %153, %79
%155 = fmul float %152, %80
%156 = fmul float %151, %81
%157 = bitcast float %111 to i32
%158 = bitcast float %112 to i32
%159 = insertelement <2 x i32> undef, i32 %157, i32 0
%160 = insertelement <2 x i32> %159, i32 %158, i32 1
%161 = bitcast <8 x i32> %96 to <32 x i8>
%162 = bitcast <4 x i32> %98 to <16 x i8>
%163 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %160, <32 x i8> %161, <16 x i8> %162, i32 2)
%164 = extractelement <4 x float> %163, i32 0
%165 = extractelement <4 x float> %163, i32 1
%166 = extractelement <4 x float> %163, i32 2
%167 = fmul float %154, %164
%168 = fmul float %155, %165
%169 = fmul float %156, %166
br label %ENDIF
ELSE: ; preds = %main_body
%170 = bitcast float %111 to i32
%171 = bitcast float %112 to i32
%172 = insertelement <2 x i32> undef, i32 %170, i32 0
%173 = insertelement <2 x i32> %172, i32 %171, i32 1
%174 = bitcast <8 x i32> %96 to <32 x i8>
%175 = bitcast <4 x i32> %98 to <16 x i8>
%176 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 2)
%177 = extractelement <4 x float> %176, i32 0
%178 = extractelement <4 x float> %176, i32 1
%179 = extractelement <4 x float> %176, i32 2
%180 = fmul float %79, %177
%181 = fmul float %80, %178
%182 = fmul float %81, %179
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp14.0 = phi float [ %169, %IF ], [ %182, %ELSE ]
%temp13.0 = phi float [ %168, %IF ], [ %181, %ELSE ]
%temp12.0 = phi float [ %167, %IF ], [ %180, %ELSE ]
%183 = call float @llvm.AMDGPU.lrp(float %82, float %temp12.0, float %69)
%184 = call float @llvm.AMDGPU.lrp(float %82, float %temp13.0, float %70)
%185 = call float @llvm.AMDGPU.lrp(float %82, float %temp14.0, float %71)
%186 = fmul float %82, %72
%187 = fsub float %72, %186
%188 = fmul float %temp12.0, %187
%189 = fmul float %temp13.0, %187
%190 = fmul float %temp14.0, %187
%191 = bitcast float %111 to i32
%192 = bitcast float %112 to i32
%193 = insertelement <2 x i32> undef, i32 %191, i32 0
%194 = insertelement <2 x i32> %193, i32 %192, i32 1
%195 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %194, <32 x i8> %107, <16 x i8> %110, i32 2)
%196 = extractelement <4 x float> %195, i32 1
%197 = fsub float 1.000000e+00, %84
%198 = fmul float %196, %84
%199 = fadd float %198, %197
%200 = fmul float %132, %24
%201 = fmul float %133, %25
%202 = fadd float %201, %200
%203 = fmul float %134, %26
%204 = fadd float %202, %203
%205 = call float @llvm.maxnum.f32(float %204, float 0.000000e+00)
%206 = fmul float %27, %132
%207 = fmul float %28, %133
%208 = fadd float %206, %207
%209 = fmul float %29, %134
%210 = fadd float %208, %209
%211 = fadd float %210, %30
%212 = fmul float %31, %132
%213 = fmul float %32, %133
%214 = fadd float %212, %213
%215 = fmul float %33, %134
%216 = fadd float %214, %215
%217 = fadd float %216, %34
%218 = fmul float %35, %132
%219 = fmul float %36, %133
%220 = fadd float %218, %219
%221 = fmul float %37, %134
%222 = fadd float %220, %221
%223 = fadd float %222, %38
%224 = fadd float %116, %211
%225 = fadd float %117, %217
%226 = fadd float %118, %223
%227 = fmul float %224, %199
%228 = fmul float %225, %199
%229 = fmul float %226, %199
%230 = fmul float %132, %141
%231 = fmul float %133, %142
%232 = fadd float %231, %230
%233 = fmul float %134, %143
%234 = fadd float %232, %233
%235 = fmul float %234, %132
%236 = fmul float %234, %133
%237 = fmul float %234, %134
%238 = fmul float %235, 2.000000e+00
%239 = fmul float %236, 2.000000e+00
%240 = fmul float %237, 2.000000e+00
%241 = fsub float %141, %238
%242 = fsub float %142, %239
%243 = fsub float %143, %240
%244 = fcmp ogt float %54, 0.000000e+00
br i1 %244, label %IF73, label %ENDIF72
IF73: ; preds = %ENDIF
%245 = fmul float %241, %241
%246 = fmul float %242, %242
%247 = fadd float %246, %245
%248 = fmul float %243, %243
%249 = fadd float %247, %248
%250 = call float @llvm.AMDGPU.rsq.clamped.f32(float %249)
%251 = fmul float %241, %250
%252 = fmul float %242, %250
%253 = fmul float %243, %250
%254 = fsub float %44, %123
%255 = fsub float %45, %124
%256 = fsub float %46, %125
%257 = fdiv float 1.000000e+00, %251
%258 = fdiv float 1.000000e+00, %252
%259 = fdiv float 1.000000e+00, %253
%260 = fmul float %254, %257
%261 = fmul float %255, %258
%262 = fmul float %256, %259
%263 = fsub float %47, %123
%264 = fsub float %48, %124
%265 = fsub float %49, %125
%266 = fdiv float 1.000000e+00, %251
%267 = fdiv float 1.000000e+00, %252
%268 = fdiv float 1.000000e+00, %253
%269 = fmul float %263, %266
%270 = fmul float %264, %267
%271 = fmul float %265, %268
%272 = fcmp ogt float %251, 0.000000e+00
%273 = fcmp ogt float %252, 0.000000e+00
%274 = fcmp ogt float %253, 0.000000e+00
%. = select i1 %272, float %260, float %269
%temp64.0 = select i1 %273, float %261, float %270
%.99 = select i1 %274, float %262, float %271
%275 = fadd float %44, %47
%276 = fadd float %45, %48
%277 = fadd float %46, %49
%278 = fmul float %275, 5.000000e-01
%279 = fmul float %276, 5.000000e-01
%280 = fmul float %277, 5.000000e-01
%281 = call float @llvm.minnum.f32(float %., float %temp64.0)
%282 = call float @llvm.minnum.f32(float %281, float %.99)
%283 = fsub float %278, %51
%284 = fsub float %279, %52
%285 = fsub float %280, %53
%286 = fadd float %283, %123
%287 = fadd float %284, %124
%288 = fadd float %285, %125
%289 = fmul float %251, %282
%290 = fadd float %289, %286
%291 = fmul float %252, %282
%292 = fadd float %291, %287
%293 = fmul float %253, %282
%294 = fadd float %293, %288
%295 = fsub float %290, %278
%296 = fsub float %292, %279
%297 = fsub float %294, %280
br label %ENDIF72
ENDIF72: ; preds = %ENDIF, %IF73
%temp40.0 = phi float [ %295, %IF73 ], [ %241, %ENDIF ]
%temp41.0 = phi float [ %296, %IF73 ], [ %242, %ENDIF ]
%temp42.0 = phi float [ %297, %IF73 ], [ %243, %ENDIF ]
%298 = fsub float 1.000000e+00, %83
%299 = call float @llvm.pow.f32(float %298, float 7.500000e-01)
%300 = fmul float %299, 7.000000e+00
%301 = insertelement <4 x float> undef, float %temp40.0, i32 0
%302 = insertelement <4 x float> %301, float %temp41.0, i32 1
%303 = insertelement <4 x float> %302, float %temp42.0, i32 2
%304 = insertelement <4 x float> %303, float %300, i32 3
%305 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %304)
%306 = extractelement <4 x float> %305, i32 0
%307 = extractelement <4 x float> %305, i32 1
%308 = extractelement <4 x float> %305, i32 2
%309 = extractelement <4 x float> %305, i32 3
%310 = call float @llvm.fabs.f32(float %308)
%311 = fdiv float 1.000000e+00, %310
%312 = fmul float %306, %311
%313 = fadd float %312, 1.500000e+00
%314 = fmul float %307, %311
%315 = fadd float %314, 1.500000e+00
%316 = bitcast float %315 to i32
%317 = bitcast float %313 to i32
%318 = bitcast float %309 to i32
%319 = bitcast float %300 to i32
%320 = insertelement <4 x i32> undef, i32 %316, i32 0
%321 = insertelement <4 x i32> %320, i32 %317, i32 1
%322 = insertelement <4 x i32> %321, i32 %318, i32 2
%323 = insertelement <4 x i32> %322, i32 %319, i32 3
%324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %323, <32 x i8> %86, <16 x i8> %88, i32 4)
%325 = extractelement <4 x float> %324, i32 0
%326 = extractelement <4 x float> %324, i32 1
%327 = extractelement <4 x float> %324, i32 2
%328 = extractelement <4 x float> %324, i32 3
%329 = call float @llvm.pow.f32(float %328, float %56)
%330 = fmul float %55, %329
%331 = fmul float %330, %325
%332 = fmul float %330, %326
%333 = fmul float %330, %327
%334 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %334, label %IF85, label %ENDIF84
IF85: ; preds = %ENDIF72
%335 = fcmp ogt float %66, 0.000000e+00
br i1 %335, label %IF88, label %ENDIF87
ENDIF84: ; preds = %ENDIF72, %ENDIF87
%temp28.0 = phi float [ %563, %ENDIF87 ], [ %331, %ENDIF72 ]
%temp29.0 = phi float [ %564, %ENDIF87 ], [ %332, %ENDIF72 ]
%temp30.0 = phi float [ %565, %ENDIF87 ], [ %333, %ENDIF72 ]
%336 = fmul float %temp28.0, %199
%337 = fmul float %temp29.0, %199
%338 = fmul float %temp30.0, %199
%339 = fsub float 1.000000e+00, %83
%340 = fsub float %24, %141
%341 = fsub float %25, %142
%342 = fsub float %26, %143
%343 = fmul float %340, %340
%344 = fmul float %341, %341
%345 = fadd float %344, %343
%346 = fmul float %342, %342
%347 = fadd float %345, %346
%348 = call float @llvm.AMDGPU.rsq.clamped.f32(float %347)
%349 = fmul float %340, %348
%350 = fmul float %341, %348
%351 = fmul float %342, %348
%352 = fmul float %141, %132
%353 = fsub float -0.000000e+00, %352
%354 = fmul float %142, %133
%355 = fsub float %353, %354
%356 = fmul float %143, %134
%357 = fsub float %355, %356
%358 = call float @llvm.maxnum.f32(float %357, float 0.000000e+00)
%359 = fmul float %24, %349
%360 = fmul float %25, %350
%361 = fadd float %360, %359
%362 = fmul float %26, %351
%363 = fadd float %361, %362
%364 = call float @llvm.maxnum.f32(float %363, float 0.000000e+00)
%365 = fmul float %339, %339
%366 = fmul float %365, %78
%367 = fsub float 1.000000e+00, %339
%368 = fmul float %367, 0x3FEEF9DB20000000
%369 = fadd float %368, 0x3F9EB851E0000000
%370 = call float @llvm.log2.f32(float %369)
%371 = fdiv float 1.000000e+00, %370
%372 = fmul float %371, 1.000000e+01
%373 = fmul float %372, %372
%374 = fsub float 1.000000e+00, %205
%375 = fsub float 1.000000e+00, %358
%376 = fmul float %364, 2.000000e+00
%377 = fmul float %364, %339
%378 = fmul float %376, %377
%379 = fadd float %378, 5.000000e-01
%380 = fsub float 1.000000e+00, %364
%381 = fsub float 1.000000e+00, %358
%382 = fsub float 1.000000e+00, %187
%383 = fadd float %83, %382
%384 = call float @llvm.AMDIL.clamp.(float %383, float 0.000000e+00, float 1.000000e+00)
%385 = fmul float %381, %381
%386 = fmul float %381, %381
%387 = fmul float %386, %381
%388 = fmul float %385, %387
%389 = call float @llvm.AMDGPU.lrp(float %388, float %384, float %183)
%390 = call float @llvm.AMDGPU.lrp(float %388, float %384, float %184)
%391 = call float @llvm.AMDGPU.lrp(float %388, float %384, float %185)
%392 = call float @llvm.AMDGPU.lrp(float %205, float 1.000000e+00, float %366)
%393 = call float @llvm.AMDGPU.lrp(float %358, float 1.000000e+00, float %366)
%394 = fmul float %392, %393
%395 = fadd float %394, 0x3F1A36E2E0000000
%396 = fdiv float 1.000000e+00, %395
%397 = fmul float %132, %349
%398 = fmul float %133, %350
%399 = fadd float %398, %397
%400 = fmul float %134, %351
%401 = fadd float %399, %400
%402 = call float @llvm.maxnum.f32(float %401, float 0.000000e+00)
%403 = call float @llvm.pow.f32(float %402, float %373)
%404 = fadd float %373, 1.000000e+00
%405 = fmul float %404, %77
%406 = fmul float %403, %405
%407 = fmul float %396, %406
%408 = fmul float %407, %205
%409 = fmul float %408, %76
%410 = call float @llvm.maxnum.f32(float %409, float 0.000000e+00)
%411 = fmul float %410, %73
%412 = fmul float %410, %74
%413 = fmul float %410, %75
%414 = fsub float 1.000000e+00, %183
%415 = fsub float 1.000000e+00, %184
%416 = fsub float 1.000000e+00, %185
%417 = fmul float %380, %380
%418 = fmul float %380, %380
%419 = fmul float %418, %380
%420 = fmul float %417, %419
%421 = fmul float %414, %420
%422 = fadd float %421, %183
%423 = fmul float %415, %420
%424 = fadd float %423, %184
%425 = fmul float %416, %420
%426 = fadd float %425, %185
%427 = fadd float %379, -1.000000e+00
%428 = fmul float %374, %374
%429 = fmul float %374, %374
%430 = fmul float %429, %374
%431 = fmul float %428, %430
%432 = fmul float %427, %431
%433 = fadd float %432, 1.000000e+00
%434 = fadd float %379, -1.000000e+00
%435 = fmul float %375, %375
%436 = fmul float %375, %375
%437 = fmul float %436, %375
%438 = fmul float %435, %437
%439 = fmul float %434, %438
%440 = fadd float %439, 1.000000e+00
%441 = fmul float %433, %440
%442 = fmul float %441, %205
%443 = fmul float %73, %442
%444 = fadd float %443, %227
%445 = fmul float %74, %442
%446 = fadd float %445, %228
%447 = fmul float %75, %442
%448 = fadd float %447, %229
%449 = fmul float %188, %444
%450 = fmul float %189, %446
%451 = fmul float %190, %448
%452 = fmul float %411, %422
%453 = fadd float %452, %449
%454 = fmul float %412, %424
%455 = fadd float %454, %450
%456 = fmul float %413, %426
%457 = fadd float %456, %451
%458 = fmul float %336, %389
%459 = fadd float %458, %453
%460 = fmul float %337, %390
%461 = fadd float %460, %455
%462 = fmul float %338, %391
%463 = fadd float %462, %457
%464 = fmul float %119, %42
%465 = fadd float %464, %43
%466 = call float @llvm.AMDIL.clamp.(float %465, float 0.000000e+00, float 1.000000e+00)
%467 = call float @llvm.AMDGPU.lrp(float %466, float %459, float %39)
%468 = call float @llvm.AMDGPU.lrp(float %466, float %461, float %40)
%469 = call float @llvm.AMDGPU.lrp(float %466, float %463, float %41)
%470 = call i32 @llvm.SI.packf16(float %467, float %468)
%471 = bitcast i32 %470 to float
%472 = call i32 @llvm.SI.packf16(float %469, float 1.000000e+00)
%473 = bitcast i32 %472 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %471, float %473, float %471, float %473)
ret void
IF88: ; preds = %IF85
%474 = fmul float %241, %241
%475 = fmul float %242, %242
%476 = fadd float %475, %474
%477 = fmul float %243, %243
%478 = fadd float %476, %477
%479 = call float @llvm.AMDGPU.rsq.clamped.f32(float %478)
%480 = fmul float %241, %479
%481 = fmul float %242, %479
%482 = fmul float %243, %479
%483 = fsub float %57, %123
%484 = fsub float %58, %124
%485 = fsub float %59, %125
%486 = fdiv float 1.000000e+00, %480
%487 = fdiv float 1.000000e+00, %481
%488 = fdiv float 1.000000e+00, %482
%489 = fmul float %483, %486
%490 = fmul float %484, %487
%491 = fmul float %485, %488
%492 = fsub float %60, %123
%493 = fsub float %61, %124
%494 = fsub float %62, %125
%495 = fdiv float 1.000000e+00, %480
%496 = fdiv float 1.000000e+00, %481
%497 = fdiv float 1.000000e+00, %482
%498 = fmul float %492, %495
%499 = fmul float %493, %496
%500 = fmul float %494, %497
%501 = fcmp ogt float %480, 0.000000e+00
%502 = fcmp ogt float %481, 0.000000e+00
%503 = fcmp ogt float %482, 0.000000e+00
%.100 = select i1 %501, float %489, float %498
%temp64.1 = select i1 %502, float %490, float %499
%.101 = select i1 %503, float %491, float %500
%504 = fadd float %57, %60
%505 = fadd float %58, %61
%506 = fadd float %59, %62
%507 = fmul float %504, 5.000000e-01
%508 = fmul float %505, 5.000000e-01
%509 = fmul float %506, 5.000000e-01
%510 = call float @llvm.minnum.f32(float %.100, float %temp64.1)
%511 = call float @llvm.minnum.f32(float %510, float %.101)
%512 = fsub float %507, %63
%513 = fsub float %508, %64
%514 = fsub float %509, %65
%515 = fadd float %512, %123
%516 = fadd float %513, %124
%517 = fadd float %514, %125
%518 = fmul float %480, %511
%519 = fadd float %518, %515
%520 = fmul float %481, %511
%521 = fadd float %520, %516
%522 = fmul float %482, %511
%523 = fadd float %522, %517
%524 = fsub float %519, %507
%525 = fsub float %521, %508
%526 = fsub float %523, %509
br label %ENDIF87
ENDIF87: ; preds = %IF85, %IF88
%temp44.0 = phi float [ %524, %IF88 ], [ %241, %IF85 ]
%temp45.0 = phi float [ %525, %IF88 ], [ %242, %IF85 ]
%temp46.0 = phi float [ %526, %IF88 ], [ %243, %IF85 ]
%527 = fsub float 1.000000e+00, %83
%528 = call float @llvm.pow.f32(float %527, float 7.500000e-01)
%529 = fmul float %528, 7.000000e+00
%530 = insertelement <4 x float> undef, float %temp44.0, i32 0
%531 = insertelement <4 x float> %530, float %temp45.0, i32 1
%532 = insertelement <4 x float> %531, float %temp46.0, i32 2
%533 = insertelement <4 x float> %532, float %529, i32 3
%534 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %533)
%535 = extractelement <4 x float> %534, i32 0
%536 = extractelement <4 x float> %534, i32 1
%537 = extractelement <4 x float> %534, i32 2
%538 = extractelement <4 x float> %534, i32 3
%539 = call float @llvm.fabs.f32(float %537)
%540 = fdiv float 1.000000e+00, %539
%541 = fmul float %535, %540
%542 = fadd float %541, 1.500000e+00
%543 = fmul float %536, %540
%544 = fadd float %543, 1.500000e+00
%545 = bitcast float %544 to i32
%546 = bitcast float %542 to i32
%547 = bitcast float %538 to i32
%548 = bitcast float %529 to i32
%549 = insertelement <4 x i32> undef, i32 %545, i32 0
%550 = insertelement <4 x i32> %549, i32 %546, i32 1
%551 = insertelement <4 x i32> %550, i32 %547, i32 2
%552 = insertelement <4 x i32> %551, i32 %548, i32 3
%553 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %552, <32 x i8> %91, <16 x i8> %94, i32 4)
%554 = extractelement <4 x float> %553, i32 0
%555 = extractelement <4 x float> %553, i32 1
%556 = extractelement <4 x float> %553, i32 2
%557 = extractelement <4 x float> %553, i32 3
%558 = call float @llvm.pow.f32(float %557, float %68)
%559 = fmul float %67, %558
%560 = fmul float %559, %554
%561 = fmul float %559, %555
%562 = fmul float %559, %556
%563 = call float @llvm.AMDGPU.lrp(float %50, float %331, float %560)
%564 = call float @llvm.AMDGPU.lrp(float %50, float %332, float %561)
%565 = call float @llvm.AMDGPU.lrp(float %50, float %333, float %562)
br label %ENDIF84
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000
v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001
v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100
v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101
v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400
v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401
v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500
v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501
v_interp_p1_f32 v13, v0, 2, 1, [m0] ; C8340600
v_interp_p2_f32 v13, [v13], v1, 2, 1, [m0] ; C8350601
v_interp_p1_f32 v7, v0, 0, 2, [m0] ; C81C0800
v_interp_p2_f32 v7, [v7], v1, 0, 2, [m0] ; C81D0801
v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900
v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901
v_interp_p1_f32 v9, v0, 2, 2, [m0] ; C8240A00
v_interp_p2_f32 v9, [v9], v1, 2, 2, [m0] ; C8250A01
v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00
v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01
v_interp_p1_f32 v23, v0, 1, 3, [m0] ; C85C0D00
v_interp_p2_f32 v23, [v23], v1, 1, 3, [m0] ; C85D0D01
v_interp_p1_f32 v24, v0, 2, 3, [m0] ; C8600E00
v_interp_p2_f32 v24, [v24], v1, 2, 3, [m0] ; C8610E01
v_interp_p1_f32 v25, v0, 3, 3, [m0] ; C8640F00
v_interp_p2_f32 v25, [v25], v1, 3, 3, [m0] ; C8650F01
v_interp_p1_f32 v21, v0, 0, 4, [m0] ; C8541000
v_interp_p2_f32 v21, [v21], v1, 0, 4, [m0] ; C8551001
v_interp_p1_f32 v16, v0, 1, 4, [m0] ; C8401100
v_interp_p2_f32 v16, [v16], v1, 1, 4, [m0] ; C8411101
v_interp_p1_f32 v22, v0, 2, 4, [m0] ; C8581200
v_mul_f32_e32 v0, v3, v3 ; 10000703
v_mac_f32_e32 v0, v10, v10 ; 3E00150A
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x4c ; C204014C
s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D
s_buffer_load_dword s14, s[0:3], 0x4e ; C207014E
s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C
s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718
s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508
s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710
v_mac_f32_e32 v0, v13, v13 ; 3E001B0D
v_rsq_clamp_f32_e32 v14, v0 ; 7E1C5900
v_mul_f32_e32 v0, v23, v23 ; 10002F17
v_mac_f32_e32 v0, v24, v24 ; 3E003118
v_mac_f32_e32 v0, v25, v25 ; 3E003319
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_interp_p2_f32 v22, [v22], v1, 2, 4, [m0] ; C8591201
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[32:39], s[28:31] ; F0800100 00E80111
s_waitcnt vmcnt(0) ; BF8C0770
v_cmp_nlt_f32_e32 vcc, 0, v1 ; 7C1C0280
s_and_saveexec_b64 s[28:29], vcc ; BE9C246A
s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E
image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[24:27] ; F0800700 00C41A11
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v4, s8, v26 ; 10083408
v_mul_f32_e32 v5, s12, v27 ; 100A360C
v_mul_f32_e32 v6, s14, v28 ; 100C380E
s_or_saveexec_b64 s[28:29], s[28:29] ; BE9C251C
s_buffer_load_dword s9, s[0:3], 0x2b ; C204812B
s_buffer_load_dword s10, s[0:3], 0x40 ; C2050140
s_buffer_load_dword s11, s[0:3], 0x41 ; C2058141
s_buffer_load_dword s13, s[0:3], 0x42 ; C2068142
s_buffer_load_dword s15, s[0:3], 0x5c ; C207815C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[28:29] ; 89FE1C7E
s_cbranch_execz BB0_4 ; BF880000
v_mov_b32_e32 v1, s8 ; 7E020208
v_mov_b32_e32 v4, s12 ; 7E08020C
v_mov_b32_e32 v5, s14 ; 7E0A020E
s_buffer_load_dword s30, s[0:3], 0x50 ; C20F0150
s_buffer_load_dword s31, s[0:3], 0x51 ; C20F8151
s_buffer_load_dword s32, s[0:3], 0x52 ; C2100152
image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[24:27] ; F0800700 00C41A11
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s30, v1 ; 1002021E
v_mul_f32_e32 v6, s31, v4 ; 100C081F
v_mul_f32_e32 v11, s32, v5 ; 10160A20
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v4, v26, v1 ; 1008031A
v_mul_f32_e32 v5, v27, v6 ; 100A0D1B
v_mul_f32_e32 v6, v28, v11 ; 100C171C
s_or_b64 exec, exec, s[28:29] ; 88FE1C7E
s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510
v_mul_f32_e32 v12, v14, v3 ; 1018070E
v_mul_f32_e32 v11, v14, v10 ; 1016150E
v_mul_f32_e32 v10, v14, v13 ; 10141B0E
v_mul_f32_e32 v14, v0, v23 ; 101C2F00
s_load_dwordx8 s[20:27], s[6:7], 0x20 ; C0CA0720
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[20:27], s[16:19] ; F0800F00 00851111
v_mul_f32_e32 v15, v0, v24 ; 101E3100
v_mul_f32_e32 v1, v14, v12 ; 1002190E
v_mac_f32_e32 v1, v15, v11 ; 3E02170F
v_mul_f32_e32 v13, v0, v25 ; 101A3300
v_mac_f32_e32 v1, v13, v10 ; 3E02150D
v_mul_f32_e32 v3, v12, v1 ; 1006030C
v_mac_f32_e32 v3, v12, v1 ; 3E06030C
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v17, v11, v1 ; 1022030B
v_mac_f32_e32 v17, v11, v1 ; 3E22030B
v_mad_f32 v23, v23, v0, -v3 ; D2820017 840E0117
v_mad_f32 v24, v24, v0, -v17 ; D2820018 84460118
v_mul_f32_e32 v3, v10, v1 ; 1006030A
v_mac_f32_e32 v3, v10, v1 ; 3E06030A
s_buffer_load_dword s12, s[0:3], 0x27 ; C2060127
s_buffer_load_dword s29, s[0:3], 0x2c ; C20E812C
s_buffer_load_dword s30, s[0:3], 0x2d ; C20F012D
s_buffer_load_dword s8, s[0:3], 0x60 ; C2040160
v_mad_f32 v25, v25, v0, -v3 ; D2820019 840E0119
v_sub_f32_e64 v0, 1.0, s15 ; D2080000 00001EF2
v_mul_f32_e32 v3, s10, v0 ; 1006000A
v_mul_f32_e32 v1, s11, v0 ; 1002000B
v_mul_f32_e32 v0, s13, v0 ; 1000000D
v_mac_f32_e32 v3, s15, v4 ; 3E06080F
v_mov_b32_e32 v26, v23 ; 7E340317
v_mac_f32_e32 v1, s15, v5 ; 3E020A0F
v_mov_b32_e32 v27, v24 ; 7E360318
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
v_mov_b32_e32 v28, v25 ; 7E380319
v_cmp_lt_f32_e64 s[10:11], 0, s9 ; D002000A 00001280
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[20:21], s[10:11] ; BE94240A
s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s9, s[0:3], 0x26 ; C2048126
s_buffer_load_dword s10, s[0:3], 0x28 ; C2050128
s_buffer_load_dword s11, s[0:3], 0x29 ; C2058129
s_buffer_load_dword s13, s[0:3], 0x2a ; C206812A
s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120
s_buffer_load_dword s16, s[0:3], 0x21 ; C2080121
s_buffer_load_dword s17, s[0:3], 0x22 ; C2088122
s_buffer_load_dword s18, s[0:3], 0x24 ; C2090124
s_buffer_load_dword s19, s[0:3], 0x25 ; C2098125
v_mul_f32_e32 v17, v23, v23 ; 10222F17
v_mac_f32_e32 v17, v24, v24 ; 3E223118
v_mac_f32_e32 v17, v25, v25 ; 3E223319
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v19, s9, v22 ; 08262C09
v_mov_b32_e32 v20, s9 ; 7E280209
v_sub_f32_e32 v26, s14, v21 ; 08342A0E
v_sub_f32_e32 v27, s16, v16 ; 08362010
v_add_f32_e32 v20, s17, v20 ; 06282811
v_sub_f32_e32 v28, s17, v22 ; 08382C11
v_mad_f32 v29, 0.5, v20, -s13 ; D282001D 803628F0
v_add_f32_e32 v29, v22, v29 ; 063A3B16
v_mul_f32_e32 v30, v17, v23 ; 103C2F11
v_mul_f32_e32 v31, v17, v24 ; 103E3111
v_mul_f32_e32 v17, v17, v25 ; 10223311
v_rcp_f32_e32 v32, v30 ; 7E40551E
v_rcp_f32_e32 v33, v31 ; 7E42551F
v_rcp_f32_e32 v34, v17 ; 7E445511
v_sub_f32_e32 v35, s18, v21 ; 08462A12
v_mov_b32_e32 v36, s18 ; 7E480212
v_add_f32_e32 v36, s14, v36 ; 0648480E
v_mul_f32_e32 v26, v32, v26 ; 10343520
v_mul_f32_e32 v27, v33, v27 ; 10363721
v_mul_f32_e32 v28, v34, v28 ; 10383922
v_mul_f32_e32 v32, v32, v35 ; 10404720
v_sub_f32_e32 v35, s19, v16 ; 08462013
v_mov_b32_e32 v37, s19 ; 7E4A0213
v_mul_f32_e32 v33, v33, v35 ; 10424721
v_mul_f32_e32 v19, v34, v19 ; 10262722
v_cmp_lt_f32_e32 vcc, 0, v30 ; 7C023C80
v_cndmask_b32_e32 v26, v32, v26 ; 00343520
v_cmp_lt_f32_e32 vcc, 0, v31 ; 7C023E80
v_cndmask_b32_e32 v27, v33, v27 ; 00363721
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v19, v19, v28 ; 00263913
v_add_f32_e32 v28, s16, v37 ; 06384A10
v_min3_f32 v19, v26, v27, v19 ; D2A20013 044E371A
v_mad_f32 v26, 0.5, v36, -s10 ; D282001A 802A48F0
v_mad_f32 v27, 0.5, v28, -s11 ; D282001B 802E38F0
v_add_f32_e32 v26, v21, v26 ; 06343515
v_add_f32_e32 v27, v16, v27 ; 06363710
v_mac_f32_e32 v26, v19, v30 ; 3E343D13
v_mac_f32_e32 v27, v19, v31 ; 3E363F13
v_mac_f32_e32 v29, v19, v17 ; 3E3A2313
v_mad_f32 v26, 0.5, -v36, v26 ; D282001A 446A48F0
v_mad_f32 v27, 0.5, -v28, v27 ; D282001B 446E38F0
v_mad_f32 v28, 0.5, -v20, v29 ; D282001C 447628F0
s_or_b64 exec, exec, s[20:21] ; 88FE147E
s_buffer_load_dword s14, s[0:3], 0x17 ; C2070117
s_buffer_load_dword s16, s[0:3], 0x43 ; C2080143
s_buffer_load_dword s13, s[0:3], 0x68 ; C2068168
s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100
s_buffer_load_dword s10, s[0:3], 0x1 ; C2050101
s_buffer_load_dword s11, s[0:3], 0x2 ; C2058102
s_buffer_load_dword s17, s[0:3], 0x4 ; C2088104
s_buffer_load_dword s18, s[0:3], 0x5 ; C2090105
s_buffer_load_dword s19, s[0:3], 0x6 ; C2098106
s_buffer_load_dword s21, s[0:3], 0x7 ; C20A8107
s_buffer_load_dword s20, s[0:3], 0x8 ; C20A0108
s_buffer_load_dword s22, s[0:3], 0x9 ; C20B0109
s_buffer_load_dword s23, s[0:3], 0xa ; C20B810A
s_buffer_load_dword s24, s[0:3], 0xb ; C20C010B
s_buffer_load_dword s25, s[0:3], 0xc ; C20C810C
s_buffer_load_dword s26, s[0:3], 0xd ; C20D010D
s_buffer_load_dword s27, s[0:3], 0xe ; C20D810E
s_buffer_load_dword s28, s[0:3], 0xf ; C20E010F
v_sub_f32_e64 v17, 1.0, s8 ; D2080011 000010F2
v_log_f32_e32 v17, v17 ; 7E224F11
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v29, 0x40e00000, v17 ; 103A22FF 40E00000
v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A
v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A
v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A
v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A
v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000
v_rcp_f32_e64 v17, |v32| ; D3540111 00000120
v_mad_f32 v27, v17, v30, v26 ; D282001B 046A3D11
v_mac_f32_e32 v26, v17, v31 ; 3E343F11
v_mov_b32_e32 v28, v33 ; 7E380321
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v17, v29 ; 7E224F1D
v_mul_legacy_f32_e32 v17, s30, v17 ; 0E22221E
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v17, s29, v17 ; 1022221D
v_mul_f32_e32 v20, v26, v17 ; 1028231A
v_mul_f32_e32 v19, v27, v17 ; 1026231B
v_mul_f32_e32 v17, v28, v17 ; 1022231C
v_mov_b32_e32 v27, s15 ; 7E36020F
v_mov_b32_e32 v26, 0x3f7fff58 ; 7E3402FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v26 ; 7C02340C
s_and_saveexec_b64 s[30:31], vcc ; BE9E246A
s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E
s_cbranch_execz BB0_10 ; BF880000
s_buffer_load_dword s32, s[0:3], 0x3b ; C210013B
s_buffer_load_dword s15, s[0:3], 0x3c ; C207813C
s_buffer_load_dword s29, s[0:3], 0x3d ; C20E813D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080
s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_11 ; BF880000
s_buffer_load_dword s34, s[0:3], 0x36 ; C2110136
s_buffer_load_dword s35, s[0:3], 0x38 ; C2118138
s_buffer_load_dword s36, s[0:3], 0x39 ; C2120139
s_buffer_load_dword s37, s[0:3], 0x3a ; C212813A
s_buffer_load_dword s38, s[0:3], 0x30 ; C2130130
s_buffer_load_dword s39, s[0:3], 0x31 ; C2138131
s_buffer_load_dword s40, s[0:3], 0x32 ; C2140132
s_buffer_load_dword s41, s[0:3], 0x34 ; C2148134
s_buffer_load_dword s42, s[0:3], 0x35 ; C2150135
v_mul_f32_e32 v26, v23, v23 ; 10342F17
v_mac_f32_e32 v26, v24, v24 ; 3E343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v28, s34, v22 ; 08382C22
v_mov_b32_e32 v29, s34 ; 7E3A0222
v_sub_f32_e32 v30, s38, v21 ; 083C2A26
v_sub_f32_e32 v31, s39, v16 ; 083E2027
v_add_f32_e32 v29, s40, v29 ; 063A3A28
v_sub_f32_e32 v32, s40, v22 ; 08402C28
v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0
v_add_f32_e32 v22, v22, v33 ; 062C4316
v_mul_f32_e32 v23, v26, v23 ; 102E2F1A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_rcp_f32_e32 v26, v23 ; 7E345517
v_rcp_f32_e32 v33, v24 ; 7E425518
v_rcp_f32_e32 v34, v25 ; 7E445519
v_sub_f32_e32 v35, s41, v21 ; 08462A29
v_mov_b32_e32 v36, s41 ; 7E480229
v_add_f32_e32 v36, s38, v36 ; 06484826
v_mul_f32_e32 v30, v26, v30 ; 103C3D1A
v_mul_f32_e32 v26, v26, v35 ; 1034471A
v_mul_f32_e32 v31, v33, v31 ; 103E3F21
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v28, v34, v28 ; 10383922
v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0
v_add_f32_e32 v21, v21, v34 ; 062A4515
v_sub_f32_e32 v34, s42, v16 ; 0844202A
v_mov_b32_e32 v35, s42 ; 7E46022A
v_mul_f32_e32 v33, v33, v34 ; 10424521
v_add_f32_e32 v34, s39, v35 ; 06444627
v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80
v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A
v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080
v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21
v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280
v_cndmask_b32_e32 v28, v28, v32 ; 0038411C
v_min3_f32 v26, v26, v30, v28 ; D2A2001A 04723D1A
v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0
v_add_f32_e32 v16, v16, v28 ; 06203910
v_mac_f32_e32 v21, v26, v23 ; 3E2A2F1A
v_mac_f32_e32 v16, v26, v24 ; 3E20311A
v_mac_f32_e32 v22, v26, v25 ; 3E2C331A
v_mad_f32 v23, 0.5, -v36, v21 ; D2820017 445648F0
v_mad_f32 v24, 0.5, -v34, v16 ; D2820018 444244F0
v_mad_f32 v25, 0.5, -v29, v22 ; D2820019 445A3AF0
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_sub_f32_e64 v16, 1.0, s8 ; D2080010 000010F2
v_log_f32_e32 v16, v16 ; 7E204F10
s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504
v_mul_legacy_f32_e32 v16, 0x3f400000, v16 ; 0E2020FF 3F400000
v_exp_f32_e32 v16, v16 ; 7E204B10
v_mul_f32_e32 v26, 0x40e00000, v16 ; 103420FF 40E00000
v_cubeid_f32 v31, v23, v24, v25 ; D288001F 04663117
v_cubema_f32 v30, v23, v24, v25 ; D28E001E 04663117
s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708
v_cubesc_f32 v29, v23, v24, v25 ; D28A001D 04663117
v_cubetc_f32 v28, v23, v24, v25 ; D28C001C 04663117
v_rcp_f32_e64 v16, |v30| ; D3540110 0000011E
v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000
v_mad_f32 v24, v16, v28, v23 ; D2820018 045E3910
v_mac_f32_e32 v23, v16, v29 ; 3E2E3B10
v_mov_b32_e32 v25, v31 ; 7E32031F
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091517
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v16, v24 ; 7E204F18
v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2
v_mul_legacy_f32_e32 v16, s29, v16 ; 0E20201D
v_exp_f32_e32 v16, v16 ; 7E204B10
v_mul_f32_e32 v16, s15, v16 ; 1020200F
v_mul_f32_e32 v21, v21, v16 ; 102A2115
v_mul_f32_e32 v22, v22, v16 ; 102C2116
v_mul_f32_e32 v16, v23, v16 ; 10202117
v_mul_f32_e32 v21, v21, v24 ; 102A3115
v_mul_f32_e32 v22, v22, v24 ; 102C3116
v_mul_f32_e32 v16, v16, v24 ; 10203110
v_mac_f32_e32 v21, s12, v20 ; 3E2A280C
v_mac_f32_e32 v22, s12, v19 ; 3E2C260C
v_mac_f32_e32 v16, s12, v17 ; 3E20220C
v_mov_b32_e32 v17, v16 ; 7E220310
v_mov_b32_e32 v19, v22 ; 7E260316
v_mov_b32_e32 v20, v21 ; 7E280315
s_or_b64 exec, exec, s[30:31] ; 88FE1E7E
v_mad_f32 v22, -v27, s16, s16 ; D2820016 2040211B
v_mov_b32_e32 v16, s14 ; 7E20020E
v_mul_f32_e32 v21, v22, v4 ; 102A0916
v_mul_f32_e32 v5, v22, v5 ; 100A0B16
v_mul_f32_e32 v4, v22, v6 ; 10080D16
v_mul_f32_e32 v6, s18, v11 ; 100C1612
v_mac_f32_e32 v6, s17, v12 ; 3E0C1811
v_mac_f32_e32 v6, s19, v10 ; 3E0C1413
v_add_f32_e32 v6, s21, v6 ; 060C0C15
v_add_f32_e32 v7, v6, v7 ; 060E0F06
v_mul_f32_e32 v6, s22, v11 ; 100C1616
v_mac_f32_e32 v6, s20, v12 ; 3E0C1814
v_mac_f32_e32 v6, s23, v10 ; 3E0C1417
v_add_f32_e32 v6, s24, v6 ; 060C0C18
v_add_f32_e32 v8, v6, v8 ; 06101106
v_mul_f32_e32 v6, s26, v11 ; 100C161A
v_mac_f32_e32 v6, s25, v12 ; 3E0C1819
v_mac_f32_e32 v6, s27, v10 ; 3E0C141B
v_add_f32_e32 v6, s28, v6 ; 060C0C1C
v_add_f32_e32 v24, v6, v9 ; 06301306
s_buffer_load_dword s6, s[0:3], 0x10 ; C2030110
s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111
s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112
s_buffer_load_dword s17, s[0:3], 0x16 ; C2088116
s_buffer_load_dword s14, s[0:3], 0x44 ; C2070144
s_buffer_load_dword s7, s[0:3], 0x45 ; C2038145
s_buffer_load_dword s12, s[0:3], 0x46 ; C2060146
s_buffer_load_dword s15, s[0:3], 0x48 ; C2078148
s_buffer_load_dword s16, s[0:3], 0x49 ; C2080149
s_buffer_load_dword s0, s[0:3], 0x4b ; C200014B
v_sub_f32_e64 v23, 1.0, s13 ; D2080017 00001AF2
v_mac_f32_e32 v23, s13, v18 ; 3E2E240D
v_mul_f32_e32 v6, s9, v12 ; 100C1809
v_mac_f32_e32 v6, s10, v11 ; 3E0C160A
v_mac_f32_e32 v6, s11, v10 ; 3E0C140B
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_mul_f32_e32 v9, v23, v7 ; 10120F17
v_mul_f32_e32 v7, v23, v8 ; 100E1117
v_mul_f32_e32 v8, v23, v24 ; 10103117
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v16, s17, v2 ; 3E200411
v_mul_f32_e32 v2, v23, v20 ; 10042917
v_mul_f32_e32 v18, v23, v19 ; 10242717
v_mul_f32_e32 v17, v23, v17 ; 10222317
v_sub_f32_e32 v19, 1.0, v22 ; 08262CF2
v_add_f32_e32 v19, s8, v19 ; 06262608
v_sub_f32_e64 v20, 1.0, s8 ; D2080014 000010F2
v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680
v_sub_f32_e32 v22, s9, v14 ; 082C1C09
v_sub_f32_e32 v23, s10, v15 ; 082E1E0A
v_mul_f32_e32 v24, v22, v22 ; 10302D16
v_mac_f32_e32 v24, v23, v23 ; 3E302F17
v_sub_f32_e32 v25, s11, v13 ; 08321A0B
v_mac_f32_e32 v24, v25, v25 ; 3E303319
v_rsq_clamp_f32_e32 v24, v24 ; 7E305918
v_mul_f32_e32 v22, v24, v22 ; 102C2D18
v_mul_f32_e32 v23, v24, v23 ; 102E2F18
v_mul_f32_e32 v24, v24, v25 ; 10303318
v_mul_f32_e32 v14, v14, v12 ; 101C190E
v_mad_f32 v14, -v15, v11, -v14 ; D282000E A43A170F
v_mul_f32_e32 v12, v22, v12 ; 10181916
v_mac_f32_e32 v12, v23, v11 ; 3E181717
v_mul_f32_e32 v11, s9, v22 ; 10162C09
v_mac_f32_e32 v11, s10, v23 ; 3E162E0A
v_mad_f32 v13, -v13, v10, v14 ; D282000D 243A150D
v_mac_f32_e32 v11, s11, v24 ; 3E16300B
v_mac_f32_e32 v12, v24, v10 ; 3E181518
v_max_f32_e32 v10, 0, v11 ; 20141680
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v14, v11, v11 ; 101C170B
v_mul_f32_e32 v11, v11, v14 ; 10161D0B
v_mul_f32_e32 v11, v11, v14 ; 10161D0B
v_max_f32_e32 v13, 0, v13 ; 201A1A80
v_sub_f32_e32 v14, 1.0, v13 ; 081C1AF2
v_mul_f32_e32 v15, v14, v14 ; 101E1D0E
v_mul_f32_e32 v22, v14, v15 ; 102C1F0E
v_mad_f32 v23, -v15, v22, 1.0 ; D2820017 23CA2D0F
v_mul_f32_e32 v24, v3, v23 ; 10302F03
v_sub_f32_e32 v25, 1.0, v3 ; 083206F2
v_mac_f32_e32 v3, v11, v25 ; 3E06330B
v_mul_f32_e32 v25, v1, v23 ; 10322F01
v_sub_f32_e32 v26, 1.0, v1 ; 083402F2
v_mac_f32_e32 v1, v11, v26 ; 3E02350B
v_mul_f32_e32 v23, v0, v23 ; 102E2F00
v_sub_f32_e32 v26, 1.0, v0 ; 083400F2
v_mac_f32_e32 v0, v11, v26 ; 3E00350B
v_sub_f32_e32 v11, 1.0, v20 ; 081628F2
v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F
v_madmk_f32_e32 v11, v11, v26, 0x3f77ced9 ; 4016350B 3F77CED9
v_add_f32_e32 v26, v10, v10 ; 0634150A
v_mul_f32_e32 v10, v20, v10 ; 10141514
v_mad_f32 v10, v26, v10, 0.5 ; D282000A 03C2151A
v_mul_f32_e32 v15, v22, v15 ; 101E1F16
v_mac_f32_e32 v24, v19, v15 ; 3E301F13
v_mac_f32_e32 v25, v19, v15 ; 3E321F13
v_mac_f32_e32 v23, v19, v15 ; 3E2E1F13
v_mul_f32_e32 v19, v20, v20 ; 10262914
v_log_f32_e32 v11, v11 ; 7E164F0B
v_mul_f32_e32 v19, s0, v19 ; 10262600
v_mul_f32_e32 v14, v19, v14 ; 101C1D13
v_mac_f32_e32 v14, 1.0, v13 ; 3E1C1AF2
v_rcp_f32_e32 v11, v11 ; 7E16550B
v_sub_f32_e32 v13, 1.0, v6 ; 081A0CF2
v_mul_f32_e32 v19, v19, v13 ; 10261B13
v_mac_f32_e32 v19, 1.0, v6 ; 3E260CF2
v_max_f32_e32 v12, 0, v12 ; 20181880
v_log_f32_e32 v12, v12 ; 7E184F0C
v_madak_f32_e32 v14, v19, v14, 0x38d1b717 ; 421C1D13 38D1B717
v_mul_f32_e32 v11, 0x41200000, v11 ; 101616FF 41200000
v_mul_f32_e32 v19, v11, v11 ; 1026170B
v_mul_legacy_f32_e32 v12, v19, v12 ; 0E181913
v_rcp_f32_e32 v14, v14 ; 7E1C550E
v_mad_f32 v11, v11, v11, 1.0 ; D282000B 03CA170B
v_mul_f32_e32 v11, s16, v11 ; 10161610
v_exp_f32_e32 v12, v12 ; 7E184B0C
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mul_f32_e32 v11, v11, v14 ; 10161D0B
v_mul_f32_e32 v11, v6, v11 ; 10161706
v_mul_f32_e32 v11, s15, v11 ; 1016160F
v_mul_f32_e32 v12, v13, v13 ; 10181B0D
v_mul_f32_e32 v13, v13, v12 ; 101A190D
v_mul_f32_e32 v12, v13, v12 ; 1018190D
v_add_f32_e32 v10, -1.0, v10 ; 061414F3
v_mad_f32 v12, v10, v12, 1.0 ; D282000C 03CA190A
v_mad_f32 v10, v10, v15, 1.0 ; D282000A 03CA1F0A
v_mul_f32_e32 v10, v10, v12 ; 1014190A
v_mul_f32_e32 v6, v6, v10 ; 100C1506
v_mac_f32_e32 v9, s14, v6 ; 3E120C0E
v_mul_f32_e32 v9, v9, v21 ; 10122B09
v_max_f32_e32 v10, 0, v11 ; 20141680
v_mul_f32_e32 v11, s14, v10 ; 1016140E
v_mac_f32_e32 v9, v3, v11 ; 3E121703
v_mac_f32_e32 v7, s7, v6 ; 3E0E0C07
v_mac_f32_e32 v8, s12, v6 ; 3E100C0C
v_mul_f32_e32 v3, s7, v10 ; 10061407
v_mul_f32_e32 v6, s12, v10 ; 100C140C
v_mul_f32_e32 v5, v7, v5 ; 100A0B07
v_mul_f32_e32 v4, v8, v4 ; 10080908
v_mac_f32_e32 v5, v1, v3 ; 3E0A0701
v_mac_f32_e32 v4, v0, v6 ; 3E080D00
v_mac_f32_e32 v9, v24, v2 ; 3E120518
v_mac_f32_e32 v5, v25, v18 ; 3E0A2519
v_mac_f32_e32 v4, v23, v17 ; 3E082317
v_add_f32_e64 v0, 0, v16 clamp ; D2060800 00022080
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s6, v1 ; 10040206
v_mac_f32_e32 v2, v9, v0 ; 3E040109
v_mul_f32_e32 v3, s5, v1 ; 10060205
v_mac_f32_e32 v3, v5, v0 ; 3E060105
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v4, v0 ; 3E020104
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 40
Code Size: 2236 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww
9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[6].xxxx
18: MOV TEMP[3].y, CONST[7].xxxx
19: MOV TEMP[3].z, CONST[8].xxxx
20: MOV TEMP[4].x, CONST[6].yyyy
21: MOV TEMP[4].y, CONST[7].yyyy
22: MOV TEMP[4].z, CONST[8].yyyy
23: MOV TEMP[5].x, CONST[6].zzzz
24: MOV TEMP[5].y, CONST[7].zzzz
25: MOV TEMP[5].z, CONST[8].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[3].xyz, TEMP[3].xyzx
44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww
45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz
46: MOV TEMP[4].w, TEMP[6].xxxx
47: MOV TEMP[5].w, TEMP[6].yyyy
48: MOV TEMP[3].w, TEMP[6].zzzz
49: MUL TEMP[6], CONST[2], IN[0].xxxx
50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6]
51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6]
52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6]
53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[0].yzw, TEMP[0].yxyz
55: MUL TEMP[7], CONST[13], TEMP[6].xxxx
56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7]
57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7]
58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7]
59: MOV TEMP[6].xyz, TEMP[6].xyzx
60: MOV TEMP[0].x, TEMP[1].zzzz
61: MOV OUT[1], TEMP[2]
62: MOV OUT[3], TEMP[5]
63: MOV OUT[2], TEMP[4]
64: MOV OUT[4], TEMP[3]
65: MOV OUT[0], TEMP[1]
66: MOV OUT[6], TEMP[6]
67: MOV OUT[5], TEMP[0]
68: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0
%84 = add i32 %5, %7
%85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84)
%86 = extractelement <4 x float> %85, i32 0
%87 = extractelement <4 x float> %85, i32 1
%88 = extractelement <4 x float> %85, i32 2
%89 = extractelement <4 x float> %85, i32 3
%90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = add i32 %5, %7
%93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92)
%94 = extractelement <4 x float> %93, i32 0
%95 = extractelement <4 x float> %93, i32 1
%96 = extractelement <4 x float> %93, i32 2
%97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = add i32 %5, %7
%100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99)
%101 = extractelement <4 x float> %100, i32 0
%102 = extractelement <4 x float> %100, i32 1
%103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = add i32 %5, %7
%106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105)
%107 = extractelement <4 x float> %106, i32 0
%108 = extractelement <4 x float> %106, i32 1
%109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = add i32 %5, %7
%112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111)
%113 = extractelement <4 x float> %112, i32 0
%114 = extractelement <4 x float> %112, i32 1
%115 = extractelement <4 x float> %112, i32 2
%116 = extractelement <4 x float> %112, i32 3
%117 = fmul float %20, %86
%118 = fmul float %21, %86
%119 = fmul float %22, %86
%120 = fmul float %24, %87
%121 = fadd float %120, %117
%122 = fmul float %25, %87
%123 = fadd float %122, %118
%124 = fmul float %26, %87
%125 = fadd float %124, %119
%126 = fmul float %28, %88
%127 = fadd float %126, %121
%128 = fmul float %29, %88
%129 = fadd float %128, %123
%130 = fmul float %30, %88
%131 = fadd float %130, %125
%132 = fmul float %32, %89
%133 = fadd float %132, %127
%134 = fmul float %33, %89
%135 = fadd float %134, %129
%136 = fmul float %34, %89
%137 = fadd float %136, %131
%138 = fmul float %66, %86
%139 = fmul float %67, %86
%140 = fmul float %68, %86
%141 = fmul float %69, %86
%142 = fmul float %70, %87
%143 = fadd float %142, %138
%144 = fmul float %71, %87
%145 = fadd float %144, %139
%146 = fmul float %72, %87
%147 = fadd float %146, %140
%148 = fmul float %73, %87
%149 = fadd float %148, %141
%150 = fmul float %74, %88
%151 = fadd float %150, %143
%152 = fmul float %75, %88
%153 = fadd float %152, %145
%154 = fmul float %76, %88
%155 = fadd float %154, %147
%156 = fmul float %77, %88
%157 = fadd float %156, %149
%158 = fmul float %78, %89
%159 = fadd float %158, %151
%160 = fmul float %79, %89
%161 = fadd float %160, %153
%162 = fmul float %80, %89
%163 = fadd float %162, %155
%164 = fmul float %81, %89
%165 = fadd float %164, %157
%166 = fmul float %101, %45
%167 = fadd float %166, %47
%168 = fmul float %102, %46
%169 = fadd float %168, %48
%170 = fcmp oeq float %53, 0.000000e+00
%. = select i1 %170, float %101, float %107
%.32 = select i1 %170, float %102, float %108
%171 = fmul float %., %49
%172 = fadd float %171, %51
%173 = fmul float %.32, %50
%174 = fadd float %173, %52
%175 = fmul float %36, %94
%176 = fmul float %39, %94
%177 = fmul float %42, %94
%178 = fmul float %37, %95
%179 = fadd float %178, %175
%180 = fmul float %40, %95
%181 = fadd float %180, %176
%182 = fmul float %43, %95
%183 = fadd float %182, %177
%184 = fmul float %38, %96
%185 = fadd float %184, %179
%186 = fmul float %41, %96
%187 = fadd float %186, %181
%188 = fmul float %44, %96
%189 = fadd float %188, %183
%190 = fmul float %185, %185
%191 = fmul float %187, %187
%192 = fadd float %191, %190
%193 = fmul float %189, %189
%194 = fadd float %192, %193
%195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194)
%196 = fmul float %185, %195
%197 = fmul float %187, %195
%198 = fmul float %189, %195
%199 = fmul float %20, %113
%200 = fmul float %21, %113
%201 = fmul float %22, %113
%202 = fmul float %24, %114
%203 = fadd float %202, %199
%204 = fmul float %25, %114
%205 = fadd float %204, %200
%206 = fmul float %26, %114
%207 = fadd float %206, %201
%208 = fmul float %28, %115
%209 = fadd float %208, %203
%210 = fmul float %29, %115
%211 = fadd float %210, %205
%212 = fmul float %30, %115
%213 = fadd float %212, %207
%214 = fmul float %209, %209
%215 = fmul float %211, %211
%216 = fadd float %215, %214
%217 = fmul float %213, %213
%218 = fadd float %216, %217
%219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218)
%220 = fmul float %209, %219
%221 = fmul float %211, %219
%222 = fmul float %213, %219
%223 = fmul float %198, %221
%224 = fmul float %196, %222
%225 = fmul float %197, %220
%226 = fmul float %197, %222
%227 = fsub float %226, %223
%228 = fmul float %198, %220
%229 = fsub float %228, %224
%230 = fmul float %196, %221
%231 = fsub float %230, %225
%232 = fmul float %227, %116
%233 = fmul float %229, %116
%234 = fmul float %231, %116
%235 = fmul float %133, %19
%236 = fmul float %135, %19
%237 = fmul float %137, %19
%238 = fsub float %16, %235
%239 = fsub float %17, %236
%240 = fsub float %18, %237
%241 = fmul float %20, %86
%242 = fmul float %21, %86
%243 = fmul float %22, %86
%244 = fmul float %23, %86
%245 = fmul float %24, %87
%246 = fadd float %245, %241
%247 = fmul float %25, %87
%248 = fadd float %247, %242
%249 = fmul float %26, %87
%250 = fadd float %249, %243
%251 = fmul float %27, %87
%252 = fadd float %251, %244
%253 = fmul float %28, %88
%254 = fadd float %253, %246
%255 = fmul float %29, %88
%256 = fadd float %255, %248
%257 = fmul float %30, %88
%258 = fadd float %257, %250
%259 = fmul float %31, %88
%260 = fadd float %259, %252
%261 = fmul float %32, %89
%262 = fadd float %261, %254
%263 = fmul float %33, %89
%264 = fadd float %263, %256
%265 = fmul float %34, %89
%266 = fadd float %265, %258
%267 = fmul float %35, %89
%268 = fadd float %267, %260
%269 = fsub float %133, %13
%270 = fsub float %135, %14
%271 = fsub float %137, %15
%272 = fmul float %54, %262
%273 = fmul float %55, %262
%274 = fmul float %56, %262
%275 = fmul float %57, %264
%276 = fadd float %275, %272
%277 = fmul float %58, %264
%278 = fadd float %277, %273
%279 = fmul float %59, %264
%280 = fadd float %279, %274
%281 = fmul float %60, %266
%282 = fadd float %281, %276
%283 = fmul float %61, %266
%284 = fadd float %283, %278
%285 = fmul float %62, %266
%286 = fadd float %285, %280
%287 = fmul float %63, %268
%288 = fadd float %287, %282
%289 = fmul float %64, %268
%290 = fadd float %289, %284
%291 = fmul float %65, %268
%292 = fadd float %291, %286
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130
s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A
s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128
s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134
s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135
s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136
s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
v_cndmask_b32_e32 v0, v10, v8 ; 0000110A
v_cndmask_b32_e32 v10, v11, v9 ; 0014130B
v_mov_b32_e32 v11, s5 ; 7E160205
s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129
s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B
v_mac_f32_e32 v11, s6, v8 ; 3E161006
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119
s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C
s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D
s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120
s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C
s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D
s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E
s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v8, s5 ; 7E100205
s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121
v_mac_f32_e32 v8, s4, v9 ; 3E101204
v_mul_f32_e32 v9, s6, v5 ; 10120A06
v_mac_f32_e32 v9, s11, v6 ; 3E120C0B
s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A
s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E
v_mul_f32_e32 v16, s12, v5 ; 10200A0C
v_mac_f32_e32 v16, s13, v6 ; 3E200C0D
v_mul_f32_e32 v5, s14, v5 ; 100A0A0E
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05
s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144
s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148
s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145
s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149
v_mac_f32_e32 v9, s4, v7 ; 3E120E04
s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146
s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A
s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147
v_mac_f32_e32 v16, s6, v7 ; 3E200E06
v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s5, v1 ; 100C0205
v_mac_f32_e32 v6, s12, v2 ; 3E0C040C
v_mul_f32_e32 v7, s13, v1 ; 100E020D
v_mac_f32_e32 v7, s14, v2 ; 3E0E040E
s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B
v_mul_f32_e32 v17, s4, v1 ; 10220204
v_mac_f32_e32 v17, s19, v2 ; 3E220413
v_mul_f32_e32 v18, s20, v1 ; 10240214
s_buffer_load_dword s4, s[0:3], 0xb ; C202010B
s_buffer_load_dword s6, s[0:3], 0xf ; C203010F
s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C
s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D
s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E
s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v18, s5, v2 ; 3E240405
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s19, s[0:3], 0xd ; C209810D
s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E
v_mul_f32_e32 v19, s4, v1 ; 10260204
s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113
v_mac_f32_e32 v19, s6, v2 ; 3E260406
v_mac_f32_e32 v6, s11, v3 ; 3E0C060B
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
v_mac_f32_e32 v7, s12, v3 ; 3E0E060C
v_mac_f32_e32 v17, s13, v3 ; 3E22060D
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114
v_mac_f32_e32 v18, s14, v3 ; 3E24060E
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v19, s4, v3 ; 3E260604
s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
v_mul_f32_e32 v20, s6, v1 ; 10280206
v_mac_f32_e32 v20, s5, v2 ; 3E280405
s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112
v_mac_f32_e32 v20, s11, v3 ; 3E28060B
v_mac_f32_e32 v20, s12, v4 ; 3E28080C
s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115
v_mul_f32_e32 v21, s13, v1 ; 102A020D
v_mac_f32_e32 v21, s19, v2 ; 3E2A0413
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s4, v3 ; 3E2A0604
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150
s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151
s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152
s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153
s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117
v_mac_f32_e32 v21, s12, v4 ; 3E2A080C
v_mul_f32_e32 v1, s14, v1 ; 1002020E
v_mac_f32_e32 v1, s20, v2 ; 3E020414
v_mac_f32_e32 v1, s21, v3 ; 3E020615
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s22, v4 ; 3E020816
v_mac_f32_e32 v6, s23, v4 ; 3E0C0817
v_mac_f32_e32 v7, s24, v4 ; 3E0E0818
v_mac_f32_e32 v17, s25, v4 ; 3E220819
v_mac_f32_e32 v18, s26, v4 ; 3E24081A
v_mac_f32_e32 v19, s27, v4 ; 3E26081B
v_mov_b32_e32 v2, s17 ; 7E040211
v_mac_f32_e32 v2, s15, v0 ; 3E04000F
v_mov_b32_e32 v0, s18 ; 7E000212
v_mac_f32_e32 v0, s16, v10 ; 3E001410
exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s6, v12 ; 10001806
v_mac_f32_e32 v0, s5, v13 ; 3E001A05
v_mul_f32_e32 v2, s13, v12 ; 1004180D
v_mac_f32_e32 v2, s19, v13 ; 3E041A13
v_mul_f32_e32 v3, s14, v12 ; 1006180E
v_mac_f32_e32 v3, s20, v13 ; 3E061A14
v_mac_f32_e32 v0, s11, v14 ; 3E001C0B
v_mac_f32_e32 v2, s4, v14 ; 3E041C04
v_mac_f32_e32 v3, s21, v14 ; 3E061C15
v_mul_f32_e32 v4, v9, v9 ; 10081309
v_mac_f32_e32 v4, v16, v16 ; 3E082110
v_mul_f32_e32 v8, v0, v0 ; 10100100
v_mac_f32_e32 v8, v2, v2 ; 3E100502
v_mac_f32_e32 v4, v5, v5 ; 3E080B05
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107
v_mac_f32_e32 v8, v3, v3 ; 3E100703
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v4, v9 ; 10121304
v_mul_f32_e32 v10, v4, v16 ; 10142104
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v2, v8, v2 ; 10040508
v_mul_f32_e32 v3, v8, v3 ; 10060708
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v5, s4 ; 7E0A0204
v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14
exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v5, v2, v4 ; 100A0902
v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A
v_mul_f32_e32 v3, v3, v9 ; 10061303
v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104
v_mul_f32_e32 v0, v0, v10 ; 10001500
v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509
v_mul_f32_e32 v2, v15, v5 ; 10040B0F
v_mul_f32_e32 v3, v15, v3 ; 1006070F
v_mul_f32_e32 v0, v15, v0 ; 1000010F
v_mov_b32_e32 v5, s6 ; 7E0A0206
v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15
exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101
s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102
s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140
s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141
s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142
s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139
s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A
s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C
s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D
s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mov_b32_e32 v0, s4 ; 7E000204
v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01
v_subrev_f32_e32 v2, s6, v20 ; 0A042806
v_mul_f32_e32 v3, s7, v20 ; 10062807
v_mul_f32_e32 v5, s8, v20 ; 100A2808
v_mul_f32_e32 v8, s9, v20 ; 10102809
v_mac_f32_e32 v3, s10, v21 ; 3E062A0A
v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10
v_mac_f32_e32 v8, s17, v21 ; 3E102A11
v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v5, s19, v1 ; 3E0A0213
v_mac_f32_e32 v8, s0, v1 ; 3E100200
v_subrev_f32_e32 v1, s12, v1 ; 0A02020C
v_mac_f32_e32 v3, s13, v19 ; 3E06260D
v_mac_f32_e32 v5, s14, v19 ; 3E0A260E
v_mac_f32_e32 v8, s15, v19 ; 3E10260F
exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09
exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211
exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503
exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 24
Code Size: 920 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[7..9]
DCL TEMP[0..13], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000}
IMM[1] FLT32 { 10.0000, 0.9680, 0.0300, 0.5000}
IMM[2] FLT32 { 0.0001, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D
11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy
12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[7].xxxx
13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy
14: MOV_SAT TEMP[4].x, TEMP[4].xxxx
15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
16: SQRT TEMP[4].x, TEMP[4].xxxx
17: MOV TEMP[3].z, TEMP[4].xxxx
18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
20: MOV TEMP[0].y, TEMP[1].xxxx
21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz
22: MOV TEMP[0].z, TEMP[1].xxxx
23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
24: RSQ TEMP[1].x, TEMP[1].xxxx
25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
26: MOV TEMP[1].xy, IN[0].xyyy
27: TEX TEMP[1].xyz, TEMP[1], SAMP[0], 2D
28: MUL TEMP[1].xyz, CONST[4].xyzz, TEMP[1].xyzz
29: LRP TEMP[2].xyz, CONST[8].xxxx, TEMP[1].xyzz, CONST[1].xyzz
30: MOV TEMP[3].x, IN[1].wwww
31: MOV TEMP[3].y, IN[2].wwww
32: MOV TEMP[3].z, IN[3].wwww
33: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
34: RSQ TEMP[4].x, TEMP[4].xxxx
35: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
36: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz
37: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
38: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz
39: MOV TEMP[5].xy, TEMP[5].xxxx
40: TEX TEMP[5].w, TEMP[5], SAMP[2], 2D
41: MUL TEMP[5].xyz, CONST[2].xyzz, TEMP[5].wwww
42: DP3 TEMP[6].x, IN[4].yzww, IN[4].yzww
43: RSQ TEMP[6].x, TEMP[6].xxxx
44: MUL TEMP[6].xyz, IN[4].yzww, TEMP[6].xxxx
45: MOV TEMP[6].xyz, -TEMP[6].xyzx
46: ADD TEMP[7].x, IMM[0].zzzz, -CONST[9].xxxx
47: ADD TEMP[8].xyz, TEMP[3].xyzz, TEMP[6].xyzz
48: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz
49: RSQ TEMP[9].x, TEMP[9].xxxx
50: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx
51: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[6].xyzz
52: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
53: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[8].xyzz
54: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx
55: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx
56: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww
57: ADD TEMP[10].x, IMM[0].zzzz, -TEMP[7].xxxx
58: MAD TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy, IMM[1].zzzz
59: LG2 TEMP[10].x, TEMP[10].xxxx
60: RCP TEMP[10].x, TEMP[10].xxxx
61: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx
62: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx
63: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[4].xxxx
64: ADD TEMP[12].x, IMM[0].zzzz, -TEMP[6].xxxx
65: MUL TEMP[13].x, IMM[0].xxxx, TEMP[3].xxxx
66: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].xxxx
67: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].wwww
68: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx
69: LRP TEMP[13].x, TEMP[4].xxxx, IMM[0].zzzz, TEMP[9].xxxx
70: LRP TEMP[6].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[9].xxxx
71: MAD TEMP[6].x, TEMP[13].xxxx, TEMP[6].xxxx, IMM[2].xxxx
72: RCP TEMP[6].x, TEMP[6].xxxx
73: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz
74: MAX TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx
75: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx
76: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].zzzz
77: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy
78: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
79: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx
80: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx
81: MUL TEMP[6].x, TEMP[6].xxxx, CONST[3].xxxx
82: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
83: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[5].xyzz
84: ADD TEMP[8].xyz, IMM[0].zzzz, -TEMP[2].xyzz
85: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[3].xxxx
86: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].xxxx
87: MUL TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx
88: MUL TEMP[3].x, TEMP[9].xxxx, TEMP[3].xxxx
89: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[3].xxxx, TEMP[2].xyzz
90: MUL TEMP[3].x, CONST[8].xxxx, CONST[1].wwww
91: ADD TEMP[3].x, CONST[1].wwww, -TEMP[3].xxxx
92: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
93: ADD TEMP[3].x, TEMP[7].xxxx, IMM[0].yyyy
94: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx
95: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx
96: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
97: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
98: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[8].xxxx, IMM[0].zzzz
99: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy
100: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx
101: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx
102: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
103: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
104: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].zzzz
105: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx
106: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
107: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx
108: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
109: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[2].xyzz, TEMP[1].xyzz
110: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww
111: MOV_SAT TEMP[1].x, TEMP[1].xxxx
112: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
113: MOV TEMP[0].xyz, TEMP[0].xyzx
114: MOV TEMP[0].w, IMM[0].zzzz
115: MOV OUT[0], TEMP[0]
116: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%42 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0
%44 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0
%46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%47 = bitcast <8 x i32> addrspace(2)* %46 to <32 x i8> addrspace(2)*
%48 = load <32 x i8>, <32 x i8> addrspace(2)* %47, align 32, !tbaa !0
%49 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%50 = bitcast <4 x i32> addrspace(2)* %49 to <16 x i8> addrspace(2)*
%51 = load <16 x i8>, <16 x i8> addrspace(2)* %50, align 16, !tbaa !0
%52 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%53 = bitcast <8 x i32> addrspace(2)* %52 to <32 x i8> addrspace(2)*
%54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0
%55 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%56 = bitcast <4 x i32> addrspace(2)* %55 to <16 x i8> addrspace(2)*
%57 = load <16 x i8>, <16 x i8> addrspace(2)* %56, align 16, !tbaa !0
%58 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%59 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%60 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%61 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%68 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%71 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%74 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%75 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%76 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%77 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%78 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%79 = bitcast float %58 to i32
%80 = bitcast float %59 to i32
%81 = insertelement <2 x i32> undef, i32 %79, i32 0
%82 = insertelement <2 x i32> %81, i32 %80, i32 1
%83 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %82, <32 x i8> %48, <16 x i8> %51, i32 2)
%84 = extractelement <4 x float> %83, i32 1
%85 = extractelement <4 x float> %83, i32 3
%86 = fmul float %85, 2.000000e+00
%87 = fadd float %86, -1.000000e+00
%88 = fmul float %84, 2.000000e+00
%89 = fadd float %88, -1.000000e+00
%90 = fmul float %87, %39
%91 = fmul float %89, %39
%92 = fmul float %90, %90
%93 = fmul float %91, %91
%94 = fadd float %92, %93
%95 = call float @llvm.AMDIL.clamp.(float %94, float 0.000000e+00, float 1.000000e+00)
%96 = fsub float 1.000000e+00, %95
%97 = call float @llvm.sqrt.f32(float %96)
%98 = fmul float %90, %60
%99 = fmul float %91, %64
%100 = fadd float %99, %98
%101 = fmul float %97, %68
%102 = fadd float %100, %101
%103 = fmul float %90, %61
%104 = fmul float %91, %65
%105 = fadd float %104, %103
%106 = fmul float %97, %69
%107 = fadd float %105, %106
%108 = fmul float %90, %62
%109 = fmul float %91, %66
%110 = fadd float %109, %108
%111 = fmul float %97, %70
%112 = fadd float %110, %111
%113 = fmul float %102, %102
%114 = fmul float %107, %107
%115 = fadd float %114, %113
%116 = fmul float %112, %112
%117 = fadd float %115, %116
%118 = call float @llvm.AMDGPU.rsq.clamped.f32(float %117)
%119 = fmul float %102, %118
%120 = fmul float %107, %118
%121 = fmul float %112, %118
%122 = bitcast float %58 to i32
%123 = bitcast float %59 to i32
%124 = insertelement <2 x i32> undef, i32 %122, i32 0
%125 = insertelement <2 x i32> %124, i32 %123, i32 1
%126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %125, <32 x i8> %43, <16 x i8> %45, i32 2)
%127 = extractelement <4 x float> %126, i32 0
%128 = extractelement <4 x float> %126, i32 1
%129 = extractelement <4 x float> %126, i32 2
%130 = fmul float %36, %127
%131 = fmul float %37, %128
%132 = fmul float %38, %129
%133 = call float @llvm.AMDGPU.lrp(float %40, float %130, float %26)
%134 = call float @llvm.AMDGPU.lrp(float %40, float %131, float %27)
%135 = call float @llvm.AMDGPU.lrp(float %40, float %132, float %28)
%136 = fmul float %63, %63
%137 = fmul float %67, %67
%138 = fadd float %137, %136
%139 = fmul float %71, %71
%140 = fadd float %138, %139
%141 = call float @llvm.AMDGPU.rsq.clamped.f32(float %140)
%142 = fmul float %63, %141
%143 = fmul float %67, %141
%144 = fmul float %71, %141
%145 = fmul float %119, %142
%146 = fmul float %120, %143
%147 = fadd float %146, %145
%148 = fmul float %121, %144
%149 = fadd float %147, %148
%150 = call float @llvm.maxnum.f32(float %149, float 0.000000e+00)
%151 = fmul float %76, %76
%152 = fmul float %77, %77
%153 = fadd float %152, %151
%154 = fmul float %78, %78
%155 = fadd float %153, %154
%156 = bitcast float %155 to i32
%157 = bitcast float %155 to i32
%158 = insertelement <2 x i32> undef, i32 %156, i32 0
%159 = insertelement <2 x i32> %158, i32 %157, i32 1
%160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %54, <16 x i8> %57, i32 2)
%161 = extractelement <4 x float> %160, i32 3
%162 = fmul float %30, %161
%163 = fmul float %31, %161
%164 = fmul float %32, %161
%165 = fmul float %73, %73
%166 = fmul float %74, %74
%167 = fadd float %166, %165
%168 = fmul float %75, %75
%169 = fadd float %167, %168
%170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169)
%171 = fmul float %73, %170
%172 = fmul float %74, %170
%173 = fmul float %75, %170
%174 = fsub float 1.000000e+00, %41
%175 = fsub float %142, %171
%176 = fsub float %143, %172
%177 = fsub float %144, %173
%178 = fmul float %175, %175
%179 = fmul float %176, %176
%180 = fadd float %179, %178
%181 = fmul float %177, %177
%182 = fadd float %180, %181
%183 = call float @llvm.AMDGPU.rsq.clamped.f32(float %182)
%184 = fmul float %175, %183
%185 = fmul float %176, %183
%186 = fmul float %177, %183
%187 = fmul float %171, %119
%188 = fsub float -0.000000e+00, %187
%189 = fmul float %172, %120
%190 = fsub float %188, %189
%191 = fmul float %173, %121
%192 = fsub float %190, %191
%193 = call float @llvm.maxnum.f32(float %192, float 0.000000e+00)
%194 = fmul float %142, %184
%195 = fmul float %143, %185
%196 = fadd float %195, %194
%197 = fmul float %144, %186
%198 = fadd float %196, %197
%199 = call float @llvm.maxnum.f32(float %198, float 0.000000e+00)
%200 = fmul float %174, %174
%201 = fmul float %200, %35
%202 = fsub float 1.000000e+00, %174
%203 = fmul float %202, 0x3FEEF9DB20000000
%204 = fadd float %203, 0x3F9EB851E0000000
%205 = call float @llvm.log2.f32(float %204)
%206 = fdiv float 1.000000e+00, %205
%207 = fmul float %206, 1.000000e+01
%208 = fmul float %207, %207
%209 = fsub float 1.000000e+00, %150
%210 = fsub float 1.000000e+00, %193
%211 = fmul float %199, 2.000000e+00
%212 = fmul float %199, %174
%213 = fmul float %211, %212
%214 = fadd float %213, 5.000000e-01
%215 = fsub float 1.000000e+00, %199
%216 = call float @llvm.AMDGPU.lrp(float %150, float 1.000000e+00, float %201)
%217 = call float @llvm.AMDGPU.lrp(float %193, float 1.000000e+00, float %201)
%218 = fmul float %216, %217
%219 = fadd float %218, 0x3F1A36E2E0000000
%220 = fdiv float 1.000000e+00, %219
%221 = fmul float %119, %184
%222 = fmul float %120, %185
%223 = fadd float %222, %221
%224 = fmul float %121, %186
%225 = fadd float %223, %224
%226 = call float @llvm.maxnum.f32(float %225, float 0.000000e+00)
%227 = call float @llvm.pow.f32(float %226, float %208)
%228 = fadd float %208, 1.000000e+00
%229 = fmul float %228, %34
%230 = fmul float %227, %229
%231 = fmul float %220, %230
%232 = fmul float %231, %150
%233 = fmul float %232, %33
%234 = call float @llvm.maxnum.f32(float %233, float 0.000000e+00)
%235 = fmul float %234, %162
%236 = fmul float %234, %163
%237 = fmul float %234, %164
%238 = fsub float 1.000000e+00, %133
%239 = fsub float 1.000000e+00, %134
%240 = fsub float 1.000000e+00, %135
%241 = fmul float %215, %215
%242 = fmul float %215, %215
%243 = fmul float %242, %215
%244 = fmul float %241, %243
%245 = fmul float %238, %244
%246 = fadd float %245, %133
%247 = fmul float %239, %244
%248 = fadd float %247, %134
%249 = fmul float %240, %244
%250 = fadd float %249, %135
%251 = fmul float %40, %29
%252 = fsub float %29, %251
%253 = fmul float %130, %252
%254 = fmul float %131, %252
%255 = fmul float %132, %252
%256 = fadd float %214, -1.000000e+00
%257 = fmul float %209, %209
%258 = fmul float %209, %209
%259 = fmul float %258, %209
%260 = fmul float %257, %259
%261 = fmul float %256, %260
%262 = fadd float %261, 1.000000e+00
%263 = fadd float %214, -1.000000e+00
%264 = fmul float %210, %210
%265 = fmul float %210, %210
%266 = fmul float %265, %210
%267 = fmul float %264, %266
%268 = fmul float %263, %267
%269 = fadd float %268, 1.000000e+00
%270 = fmul float %262, %269
%271 = fmul float %270, %150
%272 = fmul float %162, %271
%273 = fmul float %163, %271
%274 = fmul float %164, %271
%275 = fmul float %253, %272
%276 = fmul float %254, %273
%277 = fmul float %255, %274
%278 = fmul float %235, %246
%279 = fadd float %278, %275
%280 = fmul float %236, %248
%281 = fadd float %280, %276
%282 = fmul float %237, %250
%283 = fadd float %282, %277
%284 = fmul float %72, %24
%285 = fadd float %284, %25
%286 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00)
%287 = fmul float %279, %286
%288 = fmul float %281, %286
%289 = fmul float %283, %286
%290 = call i32 @llvm.SI.packf16(float %287, float %288)
%291 = bitcast i32 %290 to float
%292 = call i32 @llvm.SI.packf16(float %289, float 1.000000e+00)
%293 = bitcast i32 %292 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %291, float %293, float %291, float %293)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700
v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701
v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800
v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801
v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900
v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901
v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00
v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01
v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00
v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01
v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00
v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01
v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00
v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01
v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00
v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01
v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00
v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01
v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000
v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001
v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100
v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101
v_interp_p1_f32 v18, v0, 2, 4, [m0] ; C8481200
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p2_f32 v18, [v18], v1, 2, 4, [m0] ; C8491201
v_interp_p1_f32 v19, v0, 3, 4, [m0] ; C84C1300
v_interp_p2_f32 v19, [v19], v1, 3, 4, [m0] ; C84D1301
v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400
v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401
v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500
v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501
v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600
v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s32, s[0:3], 0x10 ; C2100110
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
s_buffer_load_dword s33, s[0:3], 0x11 ; C2108111
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_buffer_load_dword s34, s[0:3], 0x12 ; C2110112
v_mul_f32_e32 v22, v20, v20 ; 102C2914
s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508
s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710
v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15
v_mac_f32_e32 v22, v0, v0 ; 3E2C0100
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[12:15] ; F0800A00 00660002
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800700 00441702
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v2, s32, v23 ; 10042E20
v_mul_f32_e32 v3, s33, v24 ; 10063021
v_mul_f32_e32 v20, s34, v25 ; 10283222
v_mov_b32_e32 v23, v22 ; 7E2E0316
image_sample v21, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[40:47], s[36:39] ; F0800800 012A1516
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
v_mov_b32_e32 v22, s5 ; 7E2C0205
v_mac_f32_e32 v22, s4, v16 ; 3E2C2004
s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C
s_buffer_load_dword s5, s[0:3], 0xd ; C202810D
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107
s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108
s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109
s_buffer_load_dword s13, s[0:3], 0xa ; C206810A
s_buffer_load_dword s14, s[0:3], 0xc ; C207010C
s_buffer_load_dword s15, s[0:3], 0x20 ; C2078120
s_buffer_load_dword s0, s[0:3], 0x24 ; C2000124
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mul_f32_e32 v0, s4, v0 ; 10000004
v_mul_f32_e32 v4, v4, v1 ; 10080304
v_mac_f32_e32 v4, v8, v0 ; 3E080108
v_mul_f32_e32 v5, v5, v1 ; 100A0305
v_mac_f32_e32 v5, v9, v0 ; 3E0A0109
v_mul_f32_e32 v6, v6, v1 ; 100C0306
v_mac_f32_e32 v6, v10, v0 ; 3E0C010A
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v4, v12, v0 ; 3E08010C
v_mac_f32_e32 v5, v13, v0 ; 3E0A010D
v_mac_f32_e32 v6, v14, v0 ; 3E0C010E
v_mul_f32_e32 v0, v4, v4 ; 10000904
v_mac_f32_e32 v0, v5, v5 ; 3E000B05
v_mac_f32_e32 v0, v6, v6 ; 3E000D06
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v17, v17 ; 10022311
v_mac_f32_e32 v1, v18, v18 ; 3E022512
v_mac_f32_e32 v1, v19, v19 ; 3E022713
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v8, v7, v7 ; 10100F07
v_mac_f32_e32 v8, v11, v11 ; 3E10170B
v_mac_f32_e32 v8, v15, v15 ; 3E101F0F
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v1, v17 ; 10122301
v_mul_f32_e32 v10, v1, v18 ; 10142501
v_mul_f32_e32 v1, v1, v19 ; 10022701
v_mul_f32_e32 v4, v0, v4 ; 10080900
v_mul_f32_e32 v5, v0, v5 ; 100A0B00
v_mul_f32_e32 v12, v8, v7 ; 10180F08
v_mad_f32 v7, v7, v8, -v9 ; D2820007 84261107
v_mul_f32_e32 v9, v4, v9 ; 10121304
v_mad_f32 v9, -v10, v5, -v9 ; D2820009 A4260B0A
v_mad_f32 v10, v11, v8, -v10 ; D282000A 842A110B
v_mul_f32_e32 v13, v7, v7 ; 101A0F07
v_mac_f32_e32 v13, v10, v10 ; 3E1A150A
v_mad_f32 v14, v15, v8, -v1 ; D282000E 8406110F
v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mul_f32_e32 v16, v12, v4 ; 1020090C
v_mac_f32_e32 v16, v11, v5 ; 3E200B0B
v_mul_f32_e32 v7, v13, v7 ; 100E0F0D
v_mul_f32_e32 v10, v13, v10 ; 1014150D
v_mul_f32_e32 v12, v7, v12 ; 10181907
v_mac_f32_e32 v12, v10, v11 ; 3E18170A
v_mul_f32_e32 v4, v7, v4 ; 10080907
v_mac_f32_e32 v4, v10, v5 ; 3E080B0A
v_mul_f32_e32 v0, v0, v6 ; 10000D00
v_mul_f32_e32 v5, v8, v15 ; 100A1F08
v_mul_f32_e32 v6, v13, v14 ; 100C1D0D
v_mad_f32 v1, -v1, v0, v9 ; D2820001 24260101
v_mac_f32_e32 v16, v5, v0 ; 3E200105
v_mac_f32_e32 v12, v6, v5 ; 3E180B06
v_mac_f32_e32 v4, v6, v0 ; 3E080106
v_sub_f32_e64 v0, 1.0, s15 ; D2080000 00001EF2
v_mul_f32_e32 v5, s6, v0 ; 100A0006
v_mad_f32 v6, -v0, s6, 1.0 ; D2820006 23C80D00
v_mul_f32_e32 v7, s7, v0 ; 100E0007
v_mad_f32 v8, -v0, s7, 1.0 ; D2820008 23C80F00
v_mul_f32_e32 v9, s8, v0 ; 10120008
v_mad_f32 v0, -v0, s8, 1.0 ; D2820000 23C81100
v_sub_f32_e64 v10, 1.0, s0 ; D208000A 000000F2
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F
v_madmk_f32_e32 v11, v11, v13, 0x3f77ced9 ; 40161B0B 3F77CED9
v_max_f32_e32 v12, 0, v12 ; 20181880
v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2
v_mul_f32_e32 v14, v13, v13 ; 101C1B0D
v_mul_f32_e32 v13, v13, v14 ; 101A1D0D
v_mul_f32_e32 v13, v13, v14 ; 101A1D0D
v_mac_f32_e32 v5, s15, v2 ; 3E0A040F
v_mad_f32 v6, -s15, v2, v6 ; D2820006 241A040F
v_mac_f32_e32 v5, v13, v6 ; 3E0A0D0D
v_mac_f32_e32 v7, s15, v3 ; 3E0E060F
v_mad_f32 v6, -s15, v3, v8 ; D2820006 2422060F
v_mac_f32_e32 v7, v13, v6 ; 3E0E0D0D
v_mac_f32_e32 v9, s15, v20 ; 3E12280F
v_log_f32_e32 v6, v11 ; 7E0C4F0B
v_mad_f32 v0, -s15, v20, v0 ; D2820000 2402280F
v_mac_f32_e32 v9, v13, v0 ; 3E12010D
v_mul_f32_e32 v0, s11, v21 ; 10002A0B
v_mul_f32_e32 v8, s12, v21 ; 10102A0C
v_rcp_f32_e32 v6, v6 ; 7E0C5506
v_mul_f32_e32 v11, s13, v21 ; 10162A0D
v_mul_f32_e32 v13, v10, v10 ; 101A150A
v_mul_f32_e32 v13, s9, v13 ; 101A1A09
v_mul_f32_e32 v6, 0x41200000, v6 ; 100C0CFF 41200000
v_mad_f32 v14, v6, v6, 1.0 ; D282000E 03CA0D06
v_mul_f32_e32 v14, s5, v14 ; 101C1C05
v_max_f32_e32 v1, 0, v1 ; 20020280
v_sub_f32_e32 v15, 1.0, v1 ; 081E02F2
v_mul_f32_e32 v17, v13, v15 ; 10221F0D
v_mac_f32_e32 v17, 1.0, v1 ; 3E2202F2
v_max_f32_e32 v1, 0, v16 ; 20022080
v_sub_f32_e32 v16, 1.0, v1 ; 082002F2
v_mul_f32_e32 v13, v13, v16 ; 101A210D
v_mac_f32_e32 v13, 1.0, v1 ; 3E1A02F2
v_max_f32_e32 v4, 0, v4 ; 20080880
v_log_f32_e32 v4, v4 ; 7E084F04
v_madak_f32_e32 v13, v13, v17, 0x38d1b717 ; 421A230D 38D1B717
v_mul_f32_e32 v6, v6, v6 ; 100C0D06
v_rcp_f32_e32 v13, v13 ; 7E1A550D
v_mul_legacy_f32_e32 v4, v6, v4 ; 0E080906
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mul_f32_e32 v4, v14, v4 ; 1008090E
v_mul_f32_e32 v4, v4, v13 ; 10081B04
v_mul_f32_e32 v4, v1, v4 ; 10080901
v_mul_f32_e32 v4, s14, v4 ; 1008080E
v_mov_b32_e32 v6, s15 ; 7E0C020F
v_mad_f32 v6, -v6, s10, s10 ; D2820006 20281506
v_mul_f32_e32 v10, v10, v12 ; 1014190A
v_add_f32_e32 v12, v12, v12 ; 0618190C
v_mad_f32 v10, v12, v10, 0.5 ; D282000A 03C2150C
v_mul_f32_e32 v12, v16, v16 ; 10182110
v_mul_f32_e32 v13, v16, v12 ; 101A1910
v_mul_f32_e32 v12, v13, v12 ; 1018190D
v_mul_f32_e32 v13, v15, v15 ; 101A1F0F
v_mul_f32_e32 v14, v15, v13 ; 101C1B0F
v_mul_f32_e32 v13, v14, v13 ; 101A1B0E
v_add_f32_e32 v10, -1.0, v10 ; 061414F3
v_mad_f32 v12, v10, v12, 1.0 ; D282000C 03CA190A
v_mad_f32 v10, v10, v13, 1.0 ; D282000A 03CA1B0A
v_mul_f32_e32 v10, v10, v12 ; 1014190A
v_mul_f32_e32 v2, v6, v2 ; 10040506
v_mul_f32_e32 v1, v1, v10 ; 10021501
v_mul_f32_e32 v10, v1, v0 ; 10140101
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_max_f32_e32 v4, 0, v4 ; 20080880
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_mac_f32_e32 v2, v5, v0 ; 3E040105
v_mul_f32_e32 v0, v6, v3 ; 10000706
v_mul_f32_e32 v3, v1, v8 ; 10061101
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_mul_f32_e32 v3, v8, v4 ; 10060908
v_mac_f32_e32 v0, v7, v3 ; 3E000707
v_mul_f32_e32 v3, v6, v20 ; 10062906
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mul_f32_e32 v1, v1, v11 ; 10021701
v_mul_f32_e32 v1, v1, v3 ; 10020701
v_mac_f32_e32 v1, v9, v4 ; 3E020909
v_add_f32_e64 v3, 0, v22 clamp ; D2060803 00022C80
v_mul_f32_e32 v2, v3, v2 ; 10040503
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 28
Code Size: 1084 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww
9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[6].xxxx
18: MOV TEMP[3].y, CONST[7].xxxx
19: MOV TEMP[3].z, CONST[8].xxxx
20: MOV TEMP[4].x, CONST[6].yyyy
21: MOV TEMP[4].y, CONST[7].yyyy
22: MOV TEMP[4].z, CONST[8].yyyy
23: MOV TEMP[5].x, CONST[6].zzzz
24: MOV TEMP[5].y, CONST[7].zzzz
25: MOV TEMP[5].z, CONST[8].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[3].xyz, TEMP[3].xyzx
44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww
45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz
46: MOV TEMP[4].w, TEMP[6].xxxx
47: MOV TEMP[5].w, TEMP[6].yyyy
48: MOV TEMP[3].w, TEMP[6].zzzz
49: MUL TEMP[6], CONST[2], IN[0].xxxx
50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6]
51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6]
52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6]
53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[0].yzw, TEMP[0].yxyz
55: MUL TEMP[7], CONST[13], TEMP[6].xxxx
56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7]
57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7]
58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7]
59: MOV TEMP[6].xyz, TEMP[6].xyzx
60: MOV TEMP[0].x, TEMP[1].zzzz
61: MOV OUT[1], TEMP[2]
62: MOV OUT[3], TEMP[5]
63: MOV OUT[2], TEMP[4]
64: MOV OUT[4], TEMP[3]
65: MOV OUT[0], TEMP[1]
66: MOV OUT[6], TEMP[6]
67: MOV OUT[5], TEMP[0]
68: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0
%84 = add i32 %5, %7
%85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84)
%86 = extractelement <4 x float> %85, i32 0
%87 = extractelement <4 x float> %85, i32 1
%88 = extractelement <4 x float> %85, i32 2
%89 = extractelement <4 x float> %85, i32 3
%90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = add i32 %5, %7
%93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92)
%94 = extractelement <4 x float> %93, i32 0
%95 = extractelement <4 x float> %93, i32 1
%96 = extractelement <4 x float> %93, i32 2
%97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = add i32 %5, %7
%100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99)
%101 = extractelement <4 x float> %100, i32 0
%102 = extractelement <4 x float> %100, i32 1
%103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = add i32 %5, %7
%106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105)
%107 = extractelement <4 x float> %106, i32 0
%108 = extractelement <4 x float> %106, i32 1
%109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = add i32 %5, %7
%112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111)
%113 = extractelement <4 x float> %112, i32 0
%114 = extractelement <4 x float> %112, i32 1
%115 = extractelement <4 x float> %112, i32 2
%116 = extractelement <4 x float> %112, i32 3
%117 = fmul float %20, %86
%118 = fmul float %21, %86
%119 = fmul float %22, %86
%120 = fmul float %24, %87
%121 = fadd float %120, %117
%122 = fmul float %25, %87
%123 = fadd float %122, %118
%124 = fmul float %26, %87
%125 = fadd float %124, %119
%126 = fmul float %28, %88
%127 = fadd float %126, %121
%128 = fmul float %29, %88
%129 = fadd float %128, %123
%130 = fmul float %30, %88
%131 = fadd float %130, %125
%132 = fmul float %32, %89
%133 = fadd float %132, %127
%134 = fmul float %33, %89
%135 = fadd float %134, %129
%136 = fmul float %34, %89
%137 = fadd float %136, %131
%138 = fmul float %66, %86
%139 = fmul float %67, %86
%140 = fmul float %68, %86
%141 = fmul float %69, %86
%142 = fmul float %70, %87
%143 = fadd float %142, %138
%144 = fmul float %71, %87
%145 = fadd float %144, %139
%146 = fmul float %72, %87
%147 = fadd float %146, %140
%148 = fmul float %73, %87
%149 = fadd float %148, %141
%150 = fmul float %74, %88
%151 = fadd float %150, %143
%152 = fmul float %75, %88
%153 = fadd float %152, %145
%154 = fmul float %76, %88
%155 = fadd float %154, %147
%156 = fmul float %77, %88
%157 = fadd float %156, %149
%158 = fmul float %78, %89
%159 = fadd float %158, %151
%160 = fmul float %79, %89
%161 = fadd float %160, %153
%162 = fmul float %80, %89
%163 = fadd float %162, %155
%164 = fmul float %81, %89
%165 = fadd float %164, %157
%166 = fmul float %101, %45
%167 = fadd float %166, %47
%168 = fmul float %102, %46
%169 = fadd float %168, %48
%170 = fcmp oeq float %53, 0.000000e+00
%. = select i1 %170, float %101, float %107
%.32 = select i1 %170, float %102, float %108
%171 = fmul float %., %49
%172 = fadd float %171, %51
%173 = fmul float %.32, %50
%174 = fadd float %173, %52
%175 = fmul float %36, %94
%176 = fmul float %39, %94
%177 = fmul float %42, %94
%178 = fmul float %37, %95
%179 = fadd float %178, %175
%180 = fmul float %40, %95
%181 = fadd float %180, %176
%182 = fmul float %43, %95
%183 = fadd float %182, %177
%184 = fmul float %38, %96
%185 = fadd float %184, %179
%186 = fmul float %41, %96
%187 = fadd float %186, %181
%188 = fmul float %44, %96
%189 = fadd float %188, %183
%190 = fmul float %185, %185
%191 = fmul float %187, %187
%192 = fadd float %191, %190
%193 = fmul float %189, %189
%194 = fadd float %192, %193
%195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194)
%196 = fmul float %185, %195
%197 = fmul float %187, %195
%198 = fmul float %189, %195
%199 = fmul float %20, %113
%200 = fmul float %21, %113
%201 = fmul float %22, %113
%202 = fmul float %24, %114
%203 = fadd float %202, %199
%204 = fmul float %25, %114
%205 = fadd float %204, %200
%206 = fmul float %26, %114
%207 = fadd float %206, %201
%208 = fmul float %28, %115
%209 = fadd float %208, %203
%210 = fmul float %29, %115
%211 = fadd float %210, %205
%212 = fmul float %30, %115
%213 = fadd float %212, %207
%214 = fmul float %209, %209
%215 = fmul float %211, %211
%216 = fadd float %215, %214
%217 = fmul float %213, %213
%218 = fadd float %216, %217
%219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218)
%220 = fmul float %209, %219
%221 = fmul float %211, %219
%222 = fmul float %213, %219
%223 = fmul float %198, %221
%224 = fmul float %196, %222
%225 = fmul float %197, %220
%226 = fmul float %197, %222
%227 = fsub float %226, %223
%228 = fmul float %198, %220
%229 = fsub float %228, %224
%230 = fmul float %196, %221
%231 = fsub float %230, %225
%232 = fmul float %227, %116
%233 = fmul float %229, %116
%234 = fmul float %231, %116
%235 = fmul float %133, %19
%236 = fmul float %135, %19
%237 = fmul float %137, %19
%238 = fsub float %16, %235
%239 = fsub float %17, %236
%240 = fsub float %18, %237
%241 = fmul float %20, %86
%242 = fmul float %21, %86
%243 = fmul float %22, %86
%244 = fmul float %23, %86
%245 = fmul float %24, %87
%246 = fadd float %245, %241
%247 = fmul float %25, %87
%248 = fadd float %247, %242
%249 = fmul float %26, %87
%250 = fadd float %249, %243
%251 = fmul float %27, %87
%252 = fadd float %251, %244
%253 = fmul float %28, %88
%254 = fadd float %253, %246
%255 = fmul float %29, %88
%256 = fadd float %255, %248
%257 = fmul float %30, %88
%258 = fadd float %257, %250
%259 = fmul float %31, %88
%260 = fadd float %259, %252
%261 = fmul float %32, %89
%262 = fadd float %261, %254
%263 = fmul float %33, %89
%264 = fadd float %263, %256
%265 = fmul float %34, %89
%266 = fadd float %265, %258
%267 = fmul float %35, %89
%268 = fadd float %267, %260
%269 = fsub float %133, %13
%270 = fsub float %135, %14
%271 = fsub float %137, %15
%272 = fmul float %54, %262
%273 = fmul float %55, %262
%274 = fmul float %56, %262
%275 = fmul float %57, %264
%276 = fadd float %275, %272
%277 = fmul float %58, %264
%278 = fadd float %277, %273
%279 = fmul float %59, %264
%280 = fadd float %279, %274
%281 = fmul float %60, %266
%282 = fadd float %281, %276
%283 = fmul float %61, %266
%284 = fadd float %283, %278
%285 = fmul float %62, %266
%286 = fadd float %285, %280
%287 = fmul float %63, %268
%288 = fadd float %287, %282
%289 = fmul float %64, %268
%290 = fadd float %289, %284
%291 = fmul float %65, %268
%292 = fadd float %291, %286
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130
s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A
s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128
s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134
s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135
s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136
s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
v_cndmask_b32_e32 v0, v10, v8 ; 0000110A
v_cndmask_b32_e32 v10, v11, v9 ; 0014130B
v_mov_b32_e32 v11, s5 ; 7E160205
s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129
s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B
v_mac_f32_e32 v11, s6, v8 ; 3E161006
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119
s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C
s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D
s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120
s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C
s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D
s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E
s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v8, s5 ; 7E100205
s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121
v_mac_f32_e32 v8, s4, v9 ; 3E101204
v_mul_f32_e32 v9, s6, v5 ; 10120A06
v_mac_f32_e32 v9, s11, v6 ; 3E120C0B
s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A
s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E
v_mul_f32_e32 v16, s12, v5 ; 10200A0C
v_mac_f32_e32 v16, s13, v6 ; 3E200C0D
v_mul_f32_e32 v5, s14, v5 ; 100A0A0E
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05
s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144
s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148
s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145
s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149
v_mac_f32_e32 v9, s4, v7 ; 3E120E04
s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146
s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A
s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147
v_mac_f32_e32 v16, s6, v7 ; 3E200E06
v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s5, v1 ; 100C0205
v_mac_f32_e32 v6, s12, v2 ; 3E0C040C
v_mul_f32_e32 v7, s13, v1 ; 100E020D
v_mac_f32_e32 v7, s14, v2 ; 3E0E040E
s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B
v_mul_f32_e32 v17, s4, v1 ; 10220204
v_mac_f32_e32 v17, s19, v2 ; 3E220413
v_mul_f32_e32 v18, s20, v1 ; 10240214
s_buffer_load_dword s4, s[0:3], 0xb ; C202010B
s_buffer_load_dword s6, s[0:3], 0xf ; C203010F
s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C
s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D
s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E
s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v18, s5, v2 ; 3E240405
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s19, s[0:3], 0xd ; C209810D
s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E
v_mul_f32_e32 v19, s4, v1 ; 10260204
s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113
v_mac_f32_e32 v19, s6, v2 ; 3E260406
v_mac_f32_e32 v6, s11, v3 ; 3E0C060B
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
v_mac_f32_e32 v7, s12, v3 ; 3E0E060C
v_mac_f32_e32 v17, s13, v3 ; 3E22060D
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114
v_mac_f32_e32 v18, s14, v3 ; 3E24060E
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v19, s4, v3 ; 3E260604
s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
v_mul_f32_e32 v20, s6, v1 ; 10280206
v_mac_f32_e32 v20, s5, v2 ; 3E280405
s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112
v_mac_f32_e32 v20, s11, v3 ; 3E28060B
v_mac_f32_e32 v20, s12, v4 ; 3E28080C
s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115
v_mul_f32_e32 v21, s13, v1 ; 102A020D
v_mac_f32_e32 v21, s19, v2 ; 3E2A0413
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s4, v3 ; 3E2A0604
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150
s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151
s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152
s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153
s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117
v_mac_f32_e32 v21, s12, v4 ; 3E2A080C
v_mul_f32_e32 v1, s14, v1 ; 1002020E
v_mac_f32_e32 v1, s20, v2 ; 3E020414
v_mac_f32_e32 v1, s21, v3 ; 3E020615
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s22, v4 ; 3E020816
v_mac_f32_e32 v6, s23, v4 ; 3E0C0817
v_mac_f32_e32 v7, s24, v4 ; 3E0E0818
v_mac_f32_e32 v17, s25, v4 ; 3E220819
v_mac_f32_e32 v18, s26, v4 ; 3E24081A
v_mac_f32_e32 v19, s27, v4 ; 3E26081B
v_mov_b32_e32 v2, s17 ; 7E040211
v_mac_f32_e32 v2, s15, v0 ; 3E04000F
v_mov_b32_e32 v0, s18 ; 7E000212
v_mac_f32_e32 v0, s16, v10 ; 3E001410
exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s6, v12 ; 10001806
v_mac_f32_e32 v0, s5, v13 ; 3E001A05
v_mul_f32_e32 v2, s13, v12 ; 1004180D
v_mac_f32_e32 v2, s19, v13 ; 3E041A13
v_mul_f32_e32 v3, s14, v12 ; 1006180E
v_mac_f32_e32 v3, s20, v13 ; 3E061A14
v_mac_f32_e32 v0, s11, v14 ; 3E001C0B
v_mac_f32_e32 v2, s4, v14 ; 3E041C04
v_mac_f32_e32 v3, s21, v14 ; 3E061C15
v_mul_f32_e32 v4, v9, v9 ; 10081309
v_mac_f32_e32 v4, v16, v16 ; 3E082110
v_mul_f32_e32 v8, v0, v0 ; 10100100
v_mac_f32_e32 v8, v2, v2 ; 3E100502
v_mac_f32_e32 v4, v5, v5 ; 3E080B05
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107
v_mac_f32_e32 v8, v3, v3 ; 3E100703
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v4, v9 ; 10121304
v_mul_f32_e32 v10, v4, v16 ; 10142104
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v2, v8, v2 ; 10040508
v_mul_f32_e32 v3, v8, v3 ; 10060708
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v5, s4 ; 7E0A0204
v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14
exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v5, v2, v4 ; 100A0902
v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A
v_mul_f32_e32 v3, v3, v9 ; 10061303
v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104
v_mul_f32_e32 v0, v0, v10 ; 10001500
v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509
v_mul_f32_e32 v2, v15, v5 ; 10040B0F
v_mul_f32_e32 v3, v15, v3 ; 1006070F
v_mul_f32_e32 v0, v15, v0 ; 1000010F
v_mov_b32_e32 v5, s6 ; 7E0A0206
v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15
exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101
s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102
s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140
s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141
s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142
s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139
s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A
s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C
s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D
s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mov_b32_e32 v0, s4 ; 7E000204
v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01
v_subrev_f32_e32 v2, s6, v20 ; 0A042806
v_mul_f32_e32 v3, s7, v20 ; 10062807
v_mul_f32_e32 v5, s8, v20 ; 100A2808
v_mul_f32_e32 v8, s9, v20 ; 10102809
v_mac_f32_e32 v3, s10, v21 ; 3E062A0A
v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10
v_mac_f32_e32 v8, s17, v21 ; 3E102A11
v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v5, s19, v1 ; 3E0A0213
v_mac_f32_e32 v8, s0, v1 ; 3E100200
v_subrev_f32_e32 v1, s12, v1 ; 0A02020C
v_mac_f32_e32 v3, s13, v19 ; 3E06260D
v_mac_f32_e32 v5, s14, v19 ; 3E0A260E
v_mac_f32_e32 v8, s15, v19 ; 3E10260F
exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09
exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211
exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503
exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 24
Code Size: 920 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..9]
DCL TEMP[0..13], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 10.0000, 0.9680}
IMM[1] FLT32 { 0.0300, 2.0000, 0.5000, 0.0001}
IMM[2] FLT32 { -1.0000, 0.0000, 0.0000, 0.0000}
0: DP3 TEMP[0].x, IN[3].xyzz, IN[3].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[3].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[4].yzww, IN[4].yzww
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[4].yzww, TEMP[1].xxxx
6: MOV TEMP[2].xy, IN[0].xyyy
7: TEX TEMP[2].x, TEMP[2], SAMP[1], 2D
8: MOV TEMP[3].xyz, IMM[0].xxxx
9: FSLT TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
10: UIF TEMP[2].xxxx :0
11: MUL TEMP[2].xyz, CONST[5].xyzz, CONST[4].xyzz
12: MOV TEMP[4].xy, IN[0].xyyy
13: TEX TEMP[4].xyz, TEMP[4], SAMP[0], 2D
14: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xyzz
15: ELSE :0
16: MOV TEMP[2].xy, IN[0].xyyy
17: TEX TEMP[2].xyz, TEMP[2], SAMP[0], 2D
18: MUL TEMP[3].xyz, CONST[4].xyzz, TEMP[2].xyzz
19: ENDIF
20: LRP TEMP[2].xyz, CONST[8].xxxx, TEMP[3].xyzz, CONST[1].xyzz
21: MOV TEMP[4].x, IN[1].wwww
22: MOV TEMP[4].y, IN[2].wwww
23: MOV TEMP[4].z, IN[3].wwww
24: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
25: RSQ TEMP[5].x, TEMP[5].xxxx
26: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
27: DP3 TEMP[5].x, TEMP[0].xyzz, TEMP[4].xyzz
28: MAX TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx
29: DP3 TEMP[6].x, IN[5].xyzz, IN[5].xyzz
30: MOV TEMP[6].xy, TEMP[6].xxxx
31: TEX TEMP[6].w, TEMP[6], SAMP[2], 2D
32: MUL TEMP[6].xyz, CONST[2].xyzz, TEMP[6].wwww
33: MOV TEMP[1].xyz, -TEMP[1].xyzx
34: ADD TEMP[7].x, IMM[0].xxxx, -CONST[9].xxxx
35: ADD TEMP[8].xyz, TEMP[4].xyzz, TEMP[1].xyzz
36: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz
37: RSQ TEMP[9].x, TEMP[9].xxxx
38: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx
39: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
40: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
41: DP3 TEMP[4].x, TEMP[4].xyzz, TEMP[8].xyzz
42: MAX TEMP[4].x, IMM[0].yyyy, TEMP[4].xxxx
43: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx
44: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww
45: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[7].xxxx
46: MAD TEMP[10].x, TEMP[10].xxxx, IMM[0].wwww, IMM[1].xxxx
47: LG2 TEMP[10].x, TEMP[10].xxxx
48: RCP TEMP[10].x, TEMP[10].xxxx
49: MUL TEMP[10].x, IMM[0].zzzz, TEMP[10].xxxx
50: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx
51: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[5].xxxx
52: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[1].xxxx
53: MUL TEMP[13].x, IMM[1].yyyy, TEMP[4].xxxx
54: MUL TEMP[7].x, TEMP[4].xxxx, TEMP[7].xxxx
55: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].zzzz
56: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
57: LRP TEMP[13].x, TEMP[5].xxxx, IMM[0].xxxx, TEMP[9].xxxx
58: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[9].xxxx
59: MAD TEMP[1].x, TEMP[13].xxxx, TEMP[1].xxxx, IMM[1].wwww
60: RCP TEMP[1].x, TEMP[1].xxxx
61: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz
62: MAX TEMP[8].x, IMM[0].yyyy, TEMP[8].xxxx
63: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx
64: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].xxxx
65: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy
66: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
67: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[8].xxxx
68: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[5].xxxx
69: MUL TEMP[1].x, TEMP[1].xxxx, CONST[3].xxxx
70: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
71: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[6].xyzz
72: ADD TEMP[8].xyz, IMM[0].xxxx, -TEMP[2].xyzz
73: MUL TEMP[9].x, TEMP[4].xxxx, TEMP[4].xxxx
74: MUL TEMP[10].x, TEMP[4].xxxx, TEMP[4].xxxx
75: MUL TEMP[4].x, TEMP[10].xxxx, TEMP[4].xxxx
76: MUL TEMP[4].x, TEMP[9].xxxx, TEMP[4].xxxx
77: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[4].xxxx, TEMP[2].xyzz
78: MUL TEMP[4].x, CONST[8].xxxx, CONST[1].wwww
79: ADD TEMP[4].x, CONST[1].wwww, -TEMP[4].xxxx
80: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
81: ADD TEMP[4].x, TEMP[7].xxxx, IMM[2].xxxx
82: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx
83: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx
84: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
85: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
86: MAD TEMP[4].x, TEMP[4].xxxx, TEMP[8].xxxx, IMM[0].xxxx
87: ADD TEMP[7].x, TEMP[7].xxxx, IMM[2].xxxx
88: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx
89: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx
90: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
91: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
92: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].xxxx
93: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[7].xxxx
94: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
95: MUL TEMP[4].xyz, TEMP[6].xyzz, TEMP[4].xxxx
96: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz
97: MAD TEMP[0].xyz, TEMP[1].xyzz, TEMP[2].xyzz, TEMP[3].xyzz
98: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww
99: MOV_SAT TEMP[1].x, TEMP[1].xxxx
100: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
101: MOV TEMP[0].xyz, TEMP[0].xyzx
102: MOV TEMP[0].w, IMM[0].xxxx
103: MOV OUT[0], TEMP[0]
104: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%42 = load <8 x i32>, <8 x i32> addrspace(2)* %41, align 32, !tbaa !0
%43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%44 = load <4 x i32>, <4 x i32> addrspace(2)* %43, align 16, !tbaa !0
%45 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%46 = bitcast <8 x i32> addrspace(2)* %45 to <32 x i8> addrspace(2)*
%47 = load <32 x i8>, <32 x i8> addrspace(2)* %46, align 32, !tbaa !0
%48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%49 = bitcast <4 x i32> addrspace(2)* %48 to <16 x i8> addrspace(2)*
%50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0
%51 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%52 = bitcast <8 x i32> addrspace(2)* %51 to <32 x i8> addrspace(2)*
%53 = load <32 x i8>, <32 x i8> addrspace(2)* %52, align 32, !tbaa !0
%54 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%55 = bitcast <4 x i32> addrspace(2)* %54 to <16 x i8> addrspace(2)*
%56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0
%57 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%58 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%59 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%60 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%61 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%68 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%71 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%72 = fmul float %61, %61
%73 = fmul float %62, %62
%74 = fadd float %73, %72
%75 = fmul float %63, %63
%76 = fadd float %74, %75
%77 = call float @llvm.AMDGPU.rsq.clamped.f32(float %76)
%78 = fmul float %61, %77
%79 = fmul float %62, %77
%80 = fmul float %63, %77
%81 = fmul float %66, %66
%82 = fmul float %67, %67
%83 = fadd float %82, %81
%84 = fmul float %68, %68
%85 = fadd float %83, %84
%86 = call float @llvm.AMDGPU.rsq.clamped.f32(float %85)
%87 = fmul float %66, %86
%88 = fmul float %67, %86
%89 = fmul float %68, %86
%90 = bitcast float %57 to i32
%91 = bitcast float %58 to i32
%92 = insertelement <2 x i32> undef, i32 %90, i32 0
%93 = insertelement <2 x i32> %92, i32 %91, i32 1
%94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %47, <16 x i8> %50, i32 2)
%95 = extractelement <4 x float> %94, i32 0
%96 = fcmp ogt float %95, 0.000000e+00
br i1 %96, label %IF, label %ELSE
IF: ; preds = %main_body
%97 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%98 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%99 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%100 = fmul float %99, %36
%101 = fmul float %98, %37
%102 = fmul float %97, %38
%103 = bitcast float %57 to i32
%104 = bitcast float %58 to i32
%105 = insertelement <2 x i32> undef, i32 %103, i32 0
%106 = insertelement <2 x i32> %105, i32 %104, i32 1
%107 = bitcast <8 x i32> %42 to <32 x i8>
%108 = bitcast <4 x i32> %44 to <16 x i8>
%109 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %106, <32 x i8> %107, <16 x i8> %108, i32 2)
%110 = extractelement <4 x float> %109, i32 0
%111 = extractelement <4 x float> %109, i32 1
%112 = extractelement <4 x float> %109, i32 2
%113 = fmul float %100, %110
%114 = fmul float %101, %111
%115 = fmul float %102, %112
br label %ENDIF
ELSE: ; preds = %main_body
%116 = bitcast float %57 to i32
%117 = bitcast float %58 to i32
%118 = insertelement <2 x i32> undef, i32 %116, i32 0
%119 = insertelement <2 x i32> %118, i32 %117, i32 1
%120 = bitcast <8 x i32> %42 to <32 x i8>
%121 = bitcast <4 x i32> %44 to <16 x i8>
%122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2)
%123 = extractelement <4 x float> %122, i32 0
%124 = extractelement <4 x float> %122, i32 1
%125 = extractelement <4 x float> %122, i32 2
%126 = fmul float %36, %123
%127 = fmul float %37, %124
%128 = fmul float %38, %125
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp14.0 = phi float [ %115, %IF ], [ %128, %ELSE ]
%temp13.0 = phi float [ %114, %IF ], [ %127, %ELSE ]
%temp12.0 = phi float [ %113, %IF ], [ %126, %ELSE ]
%129 = call float @llvm.AMDGPU.lrp(float %39, float %temp12.0, float %26)
%130 = call float @llvm.AMDGPU.lrp(float %39, float %temp13.0, float %27)
%131 = call float @llvm.AMDGPU.lrp(float %39, float %temp14.0, float %28)
%132 = fmul float %59, %59
%133 = fmul float %60, %60
%134 = fadd float %133, %132
%135 = fmul float %64, %64
%136 = fadd float %134, %135
%137 = call float @llvm.AMDGPU.rsq.clamped.f32(float %136)
%138 = fmul float %59, %137
%139 = fmul float %60, %137
%140 = fmul float %64, %137
%141 = fmul float %78, %138
%142 = fmul float %79, %139
%143 = fadd float %142, %141
%144 = fmul float %80, %140
%145 = fadd float %143, %144
%146 = call float @llvm.maxnum.f32(float %145, float 0.000000e+00)
%147 = fmul float %69, %69
%148 = fmul float %70, %70
%149 = fadd float %148, %147
%150 = fmul float %71, %71
%151 = fadd float %149, %150
%152 = bitcast float %151 to i32
%153 = bitcast float %151 to i32
%154 = insertelement <2 x i32> undef, i32 %152, i32 0
%155 = insertelement <2 x i32> %154, i32 %153, i32 1
%156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %53, <16 x i8> %56, i32 2)
%157 = extractelement <4 x float> %156, i32 3
%158 = fmul float %30, %157
%159 = fmul float %31, %157
%160 = fmul float %32, %157
%161 = fsub float 1.000000e+00, %40
%162 = fsub float %138, %87
%163 = fsub float %139, %88
%164 = fsub float %140, %89
%165 = fmul float %162, %162
%166 = fmul float %163, %163
%167 = fadd float %166, %165
%168 = fmul float %164, %164
%169 = fadd float %167, %168
%170 = call float @llvm.AMDGPU.rsq.clamped.f32(float %169)
%171 = fmul float %162, %170
%172 = fmul float %163, %170
%173 = fmul float %164, %170
%174 = fmul float %87, %78
%175 = fsub float -0.000000e+00, %174
%176 = fmul float %88, %79
%177 = fsub float %175, %176
%178 = fmul float %89, %80
%179 = fsub float %177, %178
%180 = call float @llvm.maxnum.f32(float %179, float 0.000000e+00)
%181 = fmul float %138, %171
%182 = fmul float %139, %172
%183 = fadd float %182, %181
%184 = fmul float %140, %173
%185 = fadd float %183, %184
%186 = call float @llvm.maxnum.f32(float %185, float 0.000000e+00)
%187 = fmul float %161, %161
%188 = fmul float %187, %35
%189 = fsub float 1.000000e+00, %161
%190 = fmul float %189, 0x3FEEF9DB20000000
%191 = fadd float %190, 0x3F9EB851E0000000
%192 = call float @llvm.log2.f32(float %191)
%193 = fdiv float 1.000000e+00, %192
%194 = fmul float %193, 1.000000e+01
%195 = fmul float %194, %194
%196 = fsub float 1.000000e+00, %146
%197 = fsub float 1.000000e+00, %180
%198 = fmul float %186, 2.000000e+00
%199 = fmul float %186, %161
%200 = fmul float %198, %199
%201 = fadd float %200, 5.000000e-01
%202 = fsub float 1.000000e+00, %186
%203 = call float @llvm.AMDGPU.lrp(float %146, float 1.000000e+00, float %188)
%204 = call float @llvm.AMDGPU.lrp(float %180, float 1.000000e+00, float %188)
%205 = fmul float %203, %204
%206 = fadd float %205, 0x3F1A36E2E0000000
%207 = fdiv float 1.000000e+00, %206
%208 = fmul float %78, %171
%209 = fmul float %79, %172
%210 = fadd float %209, %208
%211 = fmul float %80, %173
%212 = fadd float %210, %211
%213 = call float @llvm.maxnum.f32(float %212, float 0.000000e+00)
%214 = call float @llvm.pow.f32(float %213, float %195)
%215 = fadd float %195, 1.000000e+00
%216 = fmul float %215, %34
%217 = fmul float %214, %216
%218 = fmul float %207, %217
%219 = fmul float %218, %146
%220 = fmul float %219, %33
%221 = call float @llvm.maxnum.f32(float %220, float 0.000000e+00)
%222 = fmul float %221, %158
%223 = fmul float %221, %159
%224 = fmul float %221, %160
%225 = fsub float 1.000000e+00, %129
%226 = fsub float 1.000000e+00, %130
%227 = fsub float 1.000000e+00, %131
%228 = fmul float %202, %202
%229 = fmul float %202, %202
%230 = fmul float %229, %202
%231 = fmul float %228, %230
%232 = fmul float %225, %231
%233 = fadd float %232, %129
%234 = fmul float %226, %231
%235 = fadd float %234, %130
%236 = fmul float %227, %231
%237 = fadd float %236, %131
%238 = fmul float %39, %29
%239 = fsub float %29, %238
%240 = fmul float %temp12.0, %239
%241 = fmul float %temp13.0, %239
%242 = fmul float %temp14.0, %239
%243 = fadd float %201, -1.000000e+00
%244 = fmul float %196, %196
%245 = fmul float %196, %196
%246 = fmul float %245, %196
%247 = fmul float %244, %246
%248 = fmul float %243, %247
%249 = fadd float %248, 1.000000e+00
%250 = fadd float %201, -1.000000e+00
%251 = fmul float %197, %197
%252 = fmul float %197, %197
%253 = fmul float %252, %197
%254 = fmul float %251, %253
%255 = fmul float %250, %254
%256 = fadd float %255, 1.000000e+00
%257 = fmul float %249, %256
%258 = fmul float %257, %146
%259 = fmul float %158, %258
%260 = fmul float %159, %258
%261 = fmul float %160, %258
%262 = fmul float %240, %259
%263 = fmul float %241, %260
%264 = fmul float %242, %261
%265 = fmul float %222, %233
%266 = fadd float %265, %262
%267 = fmul float %223, %235
%268 = fadd float %267, %263
%269 = fmul float %224, %237
%270 = fadd float %269, %264
%271 = fmul float %65, %24
%272 = fadd float %271, %25
%273 = call float @llvm.AMDIL.clamp.(float %272, float 0.000000e+00, float 1.000000e+00)
%274 = fmul float %266, %273
%275 = fmul float %268, %273
%276 = fmul float %270, %273
%277 = call i32 @llvm.SI.packf16(float %274, float %275)
%278 = bitcast i32 %277 to float
%279 = call i32 @llvm.SI.packf16(float %276, float 1.000000e+00)
%280 = bitcast i32 %279 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %278, float %280, float %278, float %280)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
v_interp_p1_f32 v11, v0, 0, 0, [m0] ; C82C0000
v_interp_p2_f32 v11, [v11], v1, 0, 0, [m0] ; C82D0001
v_interp_p1_f32 v12, v0, 1, 0, [m0] ; C8300100
v_interp_p2_f32 v12, [v12], v1, 1, 0, [m0] ; C8310101
v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700
v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701
v_interp_p1_f32 v5, v0, 3, 2, [m0] ; C8140B00
v_interp_p2_f32 v5, [v5], v1, 3, 2, [m0] ; C8150B01
v_interp_p1_f32 v8, v0, 0, 3, [m0] ; C8200C00
v_interp_p2_f32 v8, [v8], v1, 0, 3, [m0] ; C8210C01
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s1, s[12:15], 0x3 ; C2008D03
s_buffer_load_dword s10, s[12:15], 0x10 ; C2050D10
s_buffer_load_dword s11, s[12:15], 0x11 ; C2058D11
s_buffer_load_dword s16, s[12:15], 0x12 ; C2080D12
s_buffer_load_dword s0, s[12:15], 0x20 ; C2000D20
v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00
v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01
v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00
v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01
v_interp_p1_f32 v6, v0, 3, 3, [m0] ; C8180F00
v_interp_p2_f32 v6, [v6], v1, 3, 3, [m0] ; C8190F01
v_interp_p1_f32 v15, v0, 0, 4, [m0] ; C83C1000
v_interp_p2_f32 v15, [v15], v1, 0, 4, [m0] ; C83D1001
v_interp_p1_f32 v16, v0, 1, 4, [m0] ; C8401100
v_interp_p2_f32 v16, [v16], v1, 1, 4, [m0] ; C8411101
v_interp_p1_f32 v17, v0, 2, 4, [m0] ; C8441200
v_interp_p2_f32 v17, [v17], v1, 2, 4, [m0] ; C8451201
v_interp_p1_f32 v18, v0, 3, 4, [m0] ; C8481300
v_interp_p2_f32 v18, [v18], v1, 3, 4, [m0] ; C8491301
v_interp_p1_f32 v9, v0, 0, 5, [m0] ; C8241400
v_interp_p2_f32 v9, [v9], v1, 0, 5, [m0] ; C8251401
v_interp_p1_f32 v10, v0, 1, 5, [m0] ; C8281500
v_interp_p2_f32 v10, [v10], v1, 1, 5, [m0] ; C8291501
v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600
v_mul_f32_e32 v19, v8, v8 ; 10261108
v_mac_f32_e32 v19, v13, v13 ; 3E261B0D
s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504
s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700
v_mac_f32_e32 v19, v14, v14 ; 3E261D0E
v_rsq_clamp_f32_e32 v19, v19 ; 7E265913
v_mul_f32_e32 v20, v16, v16 ; 10282110
v_mac_f32_e32 v20, v17, v17 ; 3E282311
v_mac_f32_e32 v20, v18, v18 ; 3E282512
v_rsq_clamp_f32_e32 v20, v20 ; 7E285914
v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[36:43], s[20:23] ; F0800100 00A9010B
s_waitcnt vmcnt(0) ; BF8C0770
v_cmp_nlt_f32_e32 vcc, 0, v1 ; 7C1C0280
s_and_saveexec_b64 s[22:23], vcc ; BE96246A
s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E
image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[24:31], s[32:35] ; F0800700 0106150B
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v2, s10, v21 ; 10042A0A
v_mul_f32_e32 v3, s11, v22 ; 10062C0B
v_mul_f32_e32 v4, s16, v23 ; 10082E10
s_or_saveexec_b64 s[22:23], s[22:23] ; BE962516
v_mov_b32_e32 v1, s1 ; 7E020201
s_buffer_load_dword s21, s[12:15], 0x2 ; C20A8D02
s_buffer_load_dword s1, s[12:15], 0x4 ; C2008D04
s_buffer_load_dword s2, s[12:15], 0x5 ; C2010D05
s_buffer_load_dword s3, s[12:15], 0x6 ; C2018D06
s_buffer_load_dword s36, s[12:15], 0x7 ; C2120D07
s_buffer_load_dword s17, s[12:15], 0x8 ; C2088D08
s_buffer_load_dword s18, s[12:15], 0x9 ; C2090D09
s_buffer_load_dword s19, s[12:15], 0xa ; C2098D0A
s_buffer_load_dword s8, s[12:15], 0xc ; C2040D0C
s_buffer_load_dword s9, s[12:15], 0xd ; C2048D0D
s_buffer_load_dword s20, s[12:15], 0xf ; C20A0D0F
s_buffer_load_dword s37, s[12:15], 0x24 ; C2128D24
v_mov_b32_e32 v21, s0 ; 7E2A0200
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[22:23] ; 89FE167E
s_cbranch_execz BB0_4 ; BF880000
v_mov_b32_e32 v2, s10 ; 7E04020A
v_mov_b32_e32 v3, s11 ; 7E06020B
v_mov_b32_e32 v4, s16 ; 7E080210
s_buffer_load_dword s38, s[12:15], 0x14 ; C2130D14
s_buffer_load_dword s39, s[12:15], 0x15 ; C2138D15
s_buffer_load_dword s40, s[12:15], 0x16 ; C2140D16
image_sample v[22:24], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[24:31], s[32:35] ; F0800700 0106160B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v2, s38, v2 ; 10040426
v_mul_f32_e32 v3, s39, v3 ; 10060627
v_mul_f32_e32 v4, s40, v4 ; 10080828
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v2, v22, v2 ; 10040516
v_mul_f32_e32 v3, v23, v3 ; 10060717
v_mul_f32_e32 v4, v24, v4 ; 10080918
s_or_b64 exec, exec, s[22:23] ; 88FE167E
v_mac_f32_e32 v1, s21, v15 ; 3E021E15
v_mad_f32 v11, -v21, s36, s36 ; D282000B 20904915
v_mul_f32_e32 v8, v19, v8 ; 10101113
v_mul_f32_e32 v12, v19, v13 ; 10181B13
v_mul_f32_e32 v13, v19, v14 ; 101A1D13
v_mul_f32_e32 v14, v20, v16 ; 101C2114
v_mul_f32_e32 v15, v20, v17 ; 101E2314
v_mul_f32_e32 v16, v20, v18 ; 10202514
v_sub_f32_e64 v17, 1.0, s37 ; D2080011 00004AF2
v_add_f32_e64 v1, 0, v1 clamp ; D2060801 00020280
s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508
s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710
v_mul_f32_e32 v18, v9, v9 ; 10241309
v_mac_f32_e32 v18, v10, v10 ; 3E24150A
v_mul_f32_e32 v9, v7, v7 ; 10120F07
v_mac_f32_e32 v9, v5, v5 ; 3E120B05
v_mac_f32_e32 v9, v6, v6 ; 3E120D06
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mac_f32_e32 v18, v0, v0 ; 3E240100
v_mov_b32_e32 v19, v18 ; 7E260312
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[24:31], s[12:15] ; F0800800 00660012
v_mul_f32_e32 v10, v9, v7 ; 10140F09
v_mad_f32 v7, v7, v9, -v14 ; D2820007 843A1307
v_mul_f32_e32 v14, v8, v14 ; 101C1D08
v_mad_f32 v14, -v15, v12, -v14 ; D282000E A43A190F
v_mad_f32 v15, v5, v9, -v15 ; D282000F 843E1305
v_mul_f32_e32 v18, v7, v7 ; 10240F07
v_mac_f32_e32 v18, v15, v15 ; 3E241F0F
v_mad_f32 v19, v6, v9, -v16 ; D2820013 84421306
v_mac_f32_e32 v18, v19, v19 ; 3E242713
v_rsq_clamp_f32_e32 v18, v18 ; 7E245912
v_mul_f32_e32 v5, v9, v5 ; 100A0B09
v_mul_f32_e32 v20, v10, v8 ; 1028110A
v_mac_f32_e32 v20, v5, v12 ; 3E281905
v_mul_f32_e32 v7, v18, v7 ; 100E0F12
v_mul_f32_e32 v15, v18, v15 ; 101E1F12
v_mul_f32_e32 v10, v7, v10 ; 10141507
v_mac_f32_e32 v10, v15, v5 ; 3E140B0F
v_mul_f32_e32 v5, v7, v8 ; 100A1107
v_mac_f32_e32 v5, v15, v12 ; 3E0A190F
v_mul_f32_e32 v6, v9, v6 ; 100C0D09
v_mul_f32_e32 v7, v18, v19 ; 100E2712
v_mad_f32 v8, -v16, v13, v14 ; D2820008 243A1B10
v_mac_f32_e32 v20, v6, v13 ; 3E281B06
v_mac_f32_e32 v10, v7, v6 ; 3E140D07
v_mac_f32_e32 v5, v7, v13 ; 3E0A1B07
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v6, s17, v0 ; 100C0011
v_mul_f32_e32 v7, s18, v0 ; 100E0012
v_mul_f32_e32 v0, s19, v0 ; 10000013
v_mul_f32_e32 v9, v17, v17 ; 10122311
v_mul_f32_e32 v9, s20, v9 ; 10121214
v_sub_f32_e32 v12, 1.0, v17 ; 081822F2
v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F
v_madmk_f32_e32 v12, v12, v13, 0x3f77ced9 ; 40181B0C 3F77CED9
v_max_f32_e32 v10, 0, v10 ; 20141480
v_mul_f32_e32 v13, v17, v10 ; 101A1511
v_add_f32_e32 v14, v10, v10 ; 061C150A
v_mad_f32 v13, v14, v13, 0.5 ; D282000D 03C21B0E
v_max_f32_e32 v8, 0, v8 ; 20101080
v_log_f32_e32 v12, v12 ; 7E184F0C
v_sub_f32_e32 v14, 1.0, v8 ; 081C10F2
v_mul_f32_e32 v15, v9, v14 ; 101E1D09
v_mac_f32_e32 v15, 1.0, v8 ; 3E1E10F2
v_max_f32_e32 v8, 0, v20 ; 20102880
v_rcp_f32_e32 v12, v12 ; 7E18550C
v_sub_f32_e32 v16, 1.0, v8 ; 082010F2
v_mul_f32_e32 v9, v9, v16 ; 10122109
v_mac_f32_e32 v9, 1.0, v8 ; 3E1210F2
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_madak_f32_e32 v9, v9, v15, 0x38d1b717 ; 42121F09 38D1B717
v_mul_f32_e32 v12, 0x41200000, v12 ; 101818FF 41200000
v_mul_f32_e32 v15, v12, v12 ; 101E190C
v_mul_legacy_f32_e32 v5, v15, v5 ; 0E0A0B0F
v_rcp_f32_e32 v9, v9 ; 7E125509
v_mad_f32 v12, v12, v12, 1.0 ; D282000C 03CA190C
v_mul_f32_e32 v12, s9, v12 ; 10181809
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_mul_f32_e32 v5, v12, v5 ; 100A0B0C
v_mul_f32_e32 v5, v5, v9 ; 100A1305
v_mul_f32_e32 v5, v8, v5 ; 100A0B08
v_mul_f32_e32 v5, s8, v5 ; 100A0A08
v_sub_f32_e32 v9, 1.0, v10 ; 081214F2
v_mul_f32_e32 v10, v9, v9 ; 10141309
v_mul_f32_e32 v9, v9, v10 ; 10121509
v_mul_f32_e32 v9, v9, v10 ; 10121509
v_sub_f32_e64 v10, 1.0, s0 ; D208000A 000000F2
v_mul_f32_e32 v12, s1, v10 ; 10181401
v_mad_f32 v15, -v10, s1, 1.0 ; D282000F 23C8030A
v_mul_f32_e32 v17, s2, v10 ; 10221402
v_mad_f32 v18, -v10, s2, 1.0 ; D2820012 23C8050A
v_mul_f32_e32 v19, s3, v10 ; 10261403
v_mad_f32 v10, -v10, s3, 1.0 ; D282000A 23C8070A
v_mac_f32_e32 v12, s0, v2 ; 3E180400
v_mac_f32_e32 v17, s0, v3 ; 3E220600
v_mac_f32_e32 v19, s0, v4 ; 3E260800
v_mad_f32 v15, -s0, v2, v15 ; D282000F 243E0400
v_mad_f32 v18, -s0, v3, v18 ; D2820012 244A0600
v_mad_f32 v10, -s0, v4, v10 ; D282000A 242A0800
v_mac_f32_e32 v12, v9, v15 ; 3E181F09
v_mac_f32_e32 v17, v9, v18 ; 3E222509
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_mac_f32_e32 v19, v9, v10 ; 3E261509
v_mul_f32_e32 v9, v6, v5 ; 10120B06
v_mul_f32_e32 v10, v7, v5 ; 10140B07
v_mul_f32_e32 v5, v0, v5 ; 100A0B00
v_mul_f32_e32 v2, v11, v2 ; 1004050B
v_mul_f32_e32 v3, v11, v3 ; 1006070B
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_add_f32_e32 v11, -1.0, v13 ; 06161AF3
v_mul_f32_e32 v13, v16, v16 ; 101A2110
v_mul_f32_e32 v15, v16, v13 ; 101E1B10
v_mul_f32_e32 v13, v15, v13 ; 101A1B0F
v_mad_f32 v13, v11, v13, 1.0 ; D282000D 03CA1B0B
v_mul_f32_e32 v15, v14, v14 ; 101E1D0E
v_mul_f32_e32 v14, v14, v15 ; 101C1F0E
v_mul_f32_e32 v14, v14, v15 ; 101C1F0E
v_mad_f32 v11, v11, v14, 1.0 ; D282000B 03CA1D0B
v_mul_f32_e32 v11, v11, v13 ; 10161B0B
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_mul_f32_e32 v6, v8, v6 ; 100C0D08
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v2, v6, v2 ; 10040506
v_mul_f32_e32 v3, v7, v3 ; 10060707
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_mac_f32_e32 v2, v12, v9 ; 3E04130C
v_mac_f32_e32 v3, v17, v10 ; 3E061511
v_mac_f32_e32 v0, v19, v5 ; 3E000B13
v_mul_f32_e32 v2, v1, v2 ; 10040501
v_mul_f32_e32 v3, v1, v3 ; 10060701
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 28
Code Size: 1052 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL CONST[0..19]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[16], IN[0].xxxx
5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[9].xxxx
18: MOV TEMP[3].y, CONST[10].xxxx
19: MOV TEMP[3].z, CONST[11].xxxx
20: MOV TEMP[4].x, CONST[9].yyyy
21: MOV TEMP[4].y, CONST[10].yyyy
22: MOV TEMP[4].z, CONST[11].yyyy
23: MOV TEMP[5].x, CONST[9].zzzz
24: MOV TEMP[5].y, CONST[10].zzzz
25: MOV TEMP[5].z, CONST[11].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MOV TEMP[4].xyz, TEMP[3].xyzx
33: MUL TEMP[5], TEMP[3].xyzz, TEMP[3].yzzx
34: DP4 TEMP[6].x, CONST[1], TEMP[5]
35: DP4 TEMP[7].x, CONST[2], TEMP[5]
36: MOV TEMP[6].y, TEMP[7].xxxx
37: DP4 TEMP[5].x, CONST[3], TEMP[5]
38: MOV TEMP[6].z, TEMP[5].xxxx
39: MUL TEMP[5].x, TEMP[3].yyyy, TEMP[3].yyyy
40: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[5].xxxx
41: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
42: ADD TEMP[5].xyz, TEMP[0].xyzz, -CONST[0].xyzz
43: MOV TEMP[5].yzw, TEMP[5].yxyz
44: MOV TEMP[5].x, TEMP[1].zzzz
45: MOV TEMP[0].xyz, TEMP[0].xyzx
46: MOV OUT[5], TEMP[0]
47: MOV OUT[1], TEMP[2]
48: MOV OUT[2], TEMP[4]
49: MOV OUT[3], TEMP[3]
50: MOV OUT[0], TEMP[1]
51: MOV OUT[4], TEMP[5]
52: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0
%90 = add i32 %5, %7
%91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = extractelement <4 x float> %91, i32 2
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0
%103 = add i32 %5, %7
%104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103)
%105 = extractelement <4 x float> %104, i32 0
%106 = extractelement <4 x float> %104, i32 1
%107 = fmul float %31, %84
%108 = fmul float %32, %84
%109 = fmul float %33, %84
%110 = fmul float %34, %84
%111 = fmul float %35, %85
%112 = fadd float %111, %107
%113 = fmul float %36, %85
%114 = fadd float %113, %108
%115 = fmul float %37, %85
%116 = fadd float %115, %109
%117 = fmul float %38, %85
%118 = fadd float %117, %110
%119 = fmul float %39, %86
%120 = fadd float %119, %112
%121 = fmul float %40, %86
%122 = fadd float %121, %114
%123 = fmul float %41, %86
%124 = fadd float %123, %116
%125 = fmul float %42, %86
%126 = fadd float %125, %118
%127 = fmul float %43, %87
%128 = fadd float %127, %120
%129 = fmul float %44, %87
%130 = fadd float %129, %122
%131 = fmul float %45, %87
%132 = fadd float %131, %124
%133 = fmul float %64, %84
%134 = fmul float %65, %84
%135 = fmul float %66, %84
%136 = fmul float %67, %84
%137 = fmul float %68, %85
%138 = fadd float %137, %133
%139 = fmul float %69, %85
%140 = fadd float %139, %134
%141 = fmul float %70, %85
%142 = fadd float %141, %135
%143 = fmul float %71, %85
%144 = fadd float %143, %136
%145 = fmul float %72, %86
%146 = fadd float %145, %138
%147 = fmul float %73, %86
%148 = fadd float %147, %140
%149 = fmul float %74, %86
%150 = fadd float %149, %142
%151 = fmul float %75, %86
%152 = fadd float %151, %144
%153 = fmul float %76, %87
%154 = fadd float %153, %146
%155 = fmul float %77, %87
%156 = fadd float %155, %148
%157 = fmul float %78, %87
%158 = fadd float %157, %150
%159 = fmul float %79, %87
%160 = fadd float %159, %152
%161 = fmul float %99, %55
%162 = fadd float %161, %57
%163 = fmul float %100, %56
%164 = fadd float %163, %58
%165 = fcmp oeq float %63, 0.000000e+00
%. = select i1 %165, float %99, float %105
%.32 = select i1 %165, float %100, float %106
%166 = fmul float %., %59
%167 = fadd float %166, %61
%168 = fmul float %.32, %60
%169 = fadd float %168, %62
%170 = fmul float %46, %92
%171 = fmul float %49, %92
%172 = fmul float %52, %92
%173 = fmul float %47, %93
%174 = fadd float %173, %170
%175 = fmul float %50, %93
%176 = fadd float %175, %171
%177 = fmul float %53, %93
%178 = fadd float %177, %172
%179 = fmul float %48, %94
%180 = fadd float %179, %174
%181 = fmul float %51, %94
%182 = fadd float %181, %176
%183 = fmul float %54, %94
%184 = fadd float %183, %178
%185 = fmul float %180, %180
%186 = fmul float %182, %182
%187 = fadd float %186, %185
%188 = fmul float %184, %184
%189 = fadd float %187, %188
%190 = call float @llvm.AMDGPU.rsq.clamped.f32(float %189)
%191 = fmul float %180, %190
%192 = fmul float %182, %190
%193 = fmul float %184, %190
%194 = fmul float %191, %192
%195 = fmul float %192, %193
%196 = fmul float %193, %193
%197 = fmul float %193, %191
%198 = fmul float %16, %194
%199 = fmul float %17, %195
%200 = fadd float %198, %199
%201 = fmul float %18, %196
%202 = fadd float %200, %201
%203 = fmul float %19, %197
%204 = fadd float %202, %203
%205 = fmul float %20, %194
%206 = fmul float %21, %195
%207 = fadd float %205, %206
%208 = fmul float %22, %196
%209 = fadd float %207, %208
%210 = fmul float %23, %197
%211 = fadd float %209, %210
%212 = fmul float %24, %194
%213 = fmul float %25, %195
%214 = fadd float %212, %213
%215 = fmul float %26, %196
%216 = fadd float %214, %215
%217 = fmul float %27, %197
%218 = fadd float %216, %217
%219 = fmul float %192, %192
%220 = fmul float %191, %191
%221 = fsub float %220, %219
%222 = fmul float %28, %221
%223 = fadd float %222, %204
%224 = fmul float %29, %221
%225 = fadd float %224, %211
%226 = fmul float %30, %221
%227 = fadd float %226, %218
%228 = fsub float %128, %13
%229 = fsub float %130, %14
%230 = fsub float %132, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %162, float %164, float %167, float %169)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %191, float %192, float %193, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %223, float %225, float %227, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %158, float %228, float %229, float %230)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %128, float %130, float %132, float %126)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %154, float %156, float %158, float %160)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s18, s[20:23], 0x20 ; C2091520
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s19, s[20:23], 0x21 ; C2099521
s_buffer_load_dword s24, s[20:23], 0x22 ; C20C1522
s_buffer_load_dword s25, s[20:23], 0x24 ; C20C9524
s_buffer_load_dword s26, s[20:23], 0x25 ; C20D1525
s_buffer_load_dword s27, s[20:23], 0x26 ; C20D9526
s_buffer_load_dword s28, s[20:23], 0x28 ; C20E1528
s_buffer_load_dword s29, s[20:23], 0x29 ; C20E9529
s_buffer_load_dword s30, s[20:23], 0x2a ; C20F152A
s_buffer_load_dword s31, s[20:23], 0x2c ; C20F952C
s_buffer_load_dword s32, s[20:23], 0x2d ; C210152D
s_buffer_load_dword s33, s[20:23], 0x2e ; C210952E
s_buffer_load_dword s34, s[20:23], 0x34 ; C2111534
s_buffer_load_dword s35, s[20:23], 0x35 ; C2119535
s_buffer_load_dword s5, s[20:23], 0x36 ; C2029536
s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500
s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501
s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502
s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504
s_buffer_load_dword s14, s[20:23], 0x5 ; C2071505
s_buffer_load_dword s4, s[20:23], 0x6 ; C2021506
s_buffer_load_dword s3, s[20:23], 0x7 ; C2019507
s_buffer_load_dword s12, s[20:23], 0x8 ; C2061508
s_buffer_load_dword s16, s[20:23], 0x9 ; C2081509
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s5 ; 7E000205
s_buffer_load_dword s7, s[20:23], 0xa ; C203950A
s_buffer_load_dword s5, s[20:23], 0xb ; C202950B
s_buffer_load_dword s15, s[20:23], 0xc ; C207950C
s_buffer_load_dword s17, s[20:23], 0xd ; C208950D
s_buffer_load_dword s13, s[20:23], 0xe ; C206950E
s_buffer_load_dword s8, s[20:23], 0xf ; C204150F
s_buffer_load_dword s36, s[20:23], 0x3c ; C212153C
s_buffer_load_dword s37, s[20:23], 0x40 ; C2129540
s_buffer_load_dword s38, s[20:23], 0x41 ; C2131541
s_buffer_load_dword s39, s[20:23], 0x42 ; C2139542
s_buffer_load_dword s40, s[20:23], 0x43 ; C2141543
s_buffer_load_dword s9, s[20:23], 0x10 ; C2049510
s_buffer_load_dword s10, s[20:23], 0x11 ; C2051511
s_buffer_load_dword s11, s[20:23], 0x12 ; C2059512
s_buffer_load_dword s41, s[20:23], 0x14 ; C2149514
s_buffer_load_dword s42, s[20:23], 0x15 ; C2151515
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s36 ; D004006A 00004880
s_buffer_load_dword s36, s[20:23], 0x16 ; C2121516
s_buffer_load_dword s43, s[20:23], 0x17 ; C2159517
s_buffer_load_dword s44, s[20:23], 0x18 ; C2161518
s_buffer_load_dword s45, s[20:23], 0x19 ; C2169519
s_buffer_load_dword s46, s[20:23], 0x1a ; C217151A
s_buffer_load_dword s47, s[20:23], 0x37 ; C2179537
s_buffer_load_dword s48, s[20:23], 0x38 ; C2181538
s_buffer_load_dword s49, s[20:23], 0x39 ; C2189539
s_buffer_load_dword s50, s[20:23], 0x3a ; C219153A
s_buffer_load_dword s51, s[20:23], 0x3b ; C219953B
s_buffer_load_dword s52, s[20:23], 0x1b ; C21A151B
s_buffer_load_dword s53, s[20:23], 0x1c ; C21A951C
s_buffer_load_dword s54, s[20:23], 0x1d ; C21B151D
s_buffer_load_dword s55, s[20:23], 0x1e ; C21B951E
s_buffer_load_dword s56, s[20:23], 0x1f ; C21C151F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s47 ; 7E1A022F
s_buffer_load_dword s47, s[20:23], 0x44 ; C2179544
s_buffer_load_dword s57, s[20:23], 0x45 ; C21C9545
s_buffer_load_dword s58, s[20:23], 0x46 ; C21D1546
v_mov_b32_e32 v14, s50 ; 7E1C0232
s_buffer_load_dword s50, s[20:23], 0x47 ; C2191547
v_mov_b32_e32 v15, s51 ; 7E1E0233
s_buffer_load_dword s51, s[20:23], 0x48 ; C2199548
s_buffer_load_dword s59, s[20:23], 0x49 ; C21D9549
s_buffer_load_dword s60, s[20:23], 0x4a ; C21E154A
s_buffer_load_dword s61, s[20:23], 0x4b ; C21E954B
s_buffer_load_dword s62, s[20:23], 0x4c ; C21F154C
s_buffer_load_dword s63, s[20:23], 0x4d ; C21F954D
s_buffer_load_dword s64, s[20:23], 0x4e ; C220154E
s_buffer_load_dword s20, s[20:23], 0x4f ; C20A154F
v_mul_f32_e32 v16, s41, v2 ; 10200429
v_mac_f32_e32 v0, s34, v9 ; 3E001222
v_mul_f32_e32 v17, s42, v2 ; 1022042A
v_mul_f32_e32 v18, s36, v2 ; 10240424
v_mul_f32_e32 v19, s43, v2 ; 1026042B
v_mac_f32_e32 v13, s35, v10 ; 3E1A1423
v_mul_f32_e32 v20, s25, v6 ; 10280C19
v_mul_f32_e32 v21, s28, v6 ; 102A0C1C
v_mul_f32_e32 v6, s31, v6 ; 100C0C1F
v_mac_f32_e32 v16, s44, v3 ; 3E20062C
v_mac_f32_e32 v17, s45, v3 ; 3E22062D
v_mac_f32_e32 v18, s46, v3 ; 3E24062E
v_mac_f32_e32 v20, s26, v7 ; 3E280E1A
v_mac_f32_e32 v21, s29, v7 ; 3E2A0E1D
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mac_f32_e32 v6, s32, v7 ; 3E0C0E20
v_mac_f32_e32 v20, s27, v8 ; 3E28101B
v_mac_f32_e32 v21, s30, v8 ; 3E2A101E
v_mac_f32_e32 v6, s33, v8 ; 3E0C1021
v_mac_f32_e32 v19, s52, v3 ; 3E260634
v_mul_f32_e32 v7, s37, v2 ; 100E0425
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v7, s47, v3 ; 3E0E062F
v_mul_f32_e32 v8, s38, v2 ; 10100426
v_mac_f32_e32 v8, s57, v3 ; 3E100639
v_mul_f32_e32 v11, s39, v2 ; 10160427
v_mac_f32_e32 v11, s58, v3 ; 3E16063A
v_mul_f32_e32 v2, s40, v2 ; 10040428
v_mac_f32_e32 v2, s50, v3 ; 3E040632
v_mac_f32_e32 v16, s53, v4 ; 3E200835
v_mac_f32_e32 v17, s54, v4 ; 3E220836
v_mac_f32_e32 v18, s55, v4 ; 3E240837
v_mac_f32_e32 v19, s56, v4 ; 3E260838
v_mac_f32_e32 v7, s51, v4 ; 3E0E0833
v_mac_f32_e32 v8, s59, v4 ; 3E10083B
v_mac_f32_e32 v11, s60, v4 ; 3E16083C
v_mac_f32_e32 v2, s61, v4 ; 3E04083D
v_mac_f32_e32 v16, s18, v5 ; 3E200A12
v_mac_f32_e32 v17, s19, v5 ; 3E220A13
v_mac_f32_e32 v18, s24, v5 ; 3E240A18
v_mac_f32_e32 v7, s62, v5 ; 3E0E0A3E
v_mac_f32_e32 v8, s63, v5 ; 3E100A3F
v_mac_f32_e32 v11, s64, v5 ; 3E160A40
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mul_f32_e32 v3, v20, v20 ; 10062914
v_mac_f32_e32 v3, v21, v21 ; 3E062B15
v_mac_f32_e32 v3, v6, v6 ; 3E060D06
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mac_f32_e32 v14, s48, v9 ; 3E1C1230
v_mac_f32_e32 v15, s49, v10 ; 3E1E1431
exp 15, 32, 0, 0, 0, v0, v13, v14, v15 ; F800020F 0F0E0D00
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v3, v20 ; 10002903
v_mul_f32_e32 v4, v3, v21 ; 10082B03
v_mul_f32_e32 v3, v3, v6 ; 10060D03
v_mul_f32_e32 v5, v3, v4 ; 100A0903
v_mul_f32_e32 v6, s14, v5 ; 100C0A0E
v_mul_f32_e32 v9, s16, v5 ; 10120A10
v_mul_f32_e32 v5, s17, v5 ; 100A0A11
v_mul_f32_e32 v10, v4, v0 ; 10140104
v_mac_f32_e32 v6, s6, v10 ; 3E0C1406
v_mac_f32_e32 v9, s12, v10 ; 3E12140C
v_mac_f32_e32 v5, s15, v10 ; 3E0A140F
v_mul_f32_e32 v10, v3, v3 ; 10140703
v_mac_f32_e32 v6, s4, v10 ; 3E0C1404
v_mac_f32_e32 v9, s7, v10 ; 3E121407
v_mac_f32_e32 v5, s13, v10 ; 3E0A140D
v_mul_f32_e32 v10, v0, v3 ; 10140700
v_mac_f32_e32 v6, s3, v10 ; 3E0C1403
v_mac_f32_e32 v9, s5, v10 ; 3E121405
v_mac_f32_e32 v5, s8, v10 ; 3E0A1408
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mad_f32 v10, v0, v0, -v10 ; D282000A 842A0100
v_mac_f32_e32 v6, s9, v10 ; 3E0C1409
v_mac_f32_e32 v9, s10, v10 ; 3E12140A
v_mac_f32_e32 v5, s11, v10 ; 3E0A140B
v_subrev_f32_e32 v10, s0, v16 ; 0A142000
v_subrev_f32_e32 v12, s1, v17 ; 0A182201
v_subrev_f32_e32 v13, s2, v18 ; 0A1A2402
exp 15, 33, 0, 0, 0, v0, v4, v3, v1 ; F800021F 01030400
exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906
exp 15, 35, 0, 0, 0, v11, v10, v12, v13 ; F800023F 0D0C0A0B
exp 15, 36, 0, 0, 0, v16, v17, v18, v19 ; F800024F 13121110
exp 15, 12, 0, 1, 0, v7, v8, v11, v2 ; F80008CF 020B0807
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 24
Code Size: 748 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[21..22]
DCL CONST[24]
DCL TEMP[0..17], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000}
IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000}
IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000}
0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[3].yzww, IN[3].yzww
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[3].yzww, TEMP[1].xxxx
6: MOV TEMP[2].xy, IN[0].xyyy
7: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
8: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
9: LRP TEMP[3].xyz, CONST[21].xxxx, TEMP[2].xyzz, CONST[16].xyzz
10: MUL TEMP[4].x, CONST[21].xxxx, CONST[16].wwww
11: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
12: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
13: MOV TEMP[5].xy, IN[0].xyyy
14: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D
15: ADD TEMP[6].x, IMM[0].xxxx, -CONST[24].xxxx
16: MAD TEMP[5].x, TEMP[5].yyyy, CONST[24].xxxx, TEMP[6].xxxx
17: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
18: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
19: MOV TEMP[7].xyz, IMM[0].yyyy
20: MOV TEMP[8].w, IMM[0].xxxx
21: MOV TEMP[8].xyz, TEMP[0].xyzx
22: DP4 TEMP[9].x, CONST[1], TEMP[8]
23: DP4 TEMP[10].x, CONST[2], TEMP[8]
24: MOV TEMP[9].y, TEMP[10].xxxx
25: DP4 TEMP[8].x, CONST[3], TEMP[8]
26: MOV TEMP[9].z, TEMP[8].xxxx
27: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz
28: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
29: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz
30: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz
31: MUL TEMP[9].xyz, IMM[0].zzzz, TEMP[9].xyzz
32: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz
33: MOV TEMP[10].xyz, TEMP[9].xyzx
34: FSLT TEMP[11].x, IMM[0].yyyy, CONST[10].wwww
35: UIF TEMP[11].xxxx :0
36: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz
37: RSQ TEMP[11].x, TEMP[11].xxxx
38: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx
39: MOV TEMP[12].xyz, -IN[4].xyzx
40: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz
41: RCP TEMP[14].x, TEMP[11].xxxx
42: RCP TEMP[14].y, TEMP[11].yyyy
43: RCP TEMP[14].z, TEMP[11].zzzz
44: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
45: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz
46: RCP TEMP[14].x, TEMP[11].xxxx
47: RCP TEMP[14].y, TEMP[11].yyyy
48: RCP TEMP[14].z, TEMP[11].zzzz
49: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
50: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[11].xyzz
51: UIF TEMP[14].xxxx :0
52: MOV TEMP[15].x, TEMP[13].xxxx
53: ELSE :0
54: MOV TEMP[15].x, TEMP[12].xxxx
55: ENDIF
56: UIF TEMP[14].yyyy :0
57: MOV TEMP[16].x, TEMP[13].yyyy
58: ELSE :0
59: MOV TEMP[16].x, TEMP[12].yyyy
60: ENDIF
61: UIF TEMP[14].zzzz :0
62: MOV TEMP[13].x, TEMP[13].zzzz
63: ELSE :0
64: MOV TEMP[13].x, TEMP[12].zzzz
65: ENDIF
66: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz
67: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww
68: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
69: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
70: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz
71: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz
72: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
73: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz
74: ENDIF
75: ADD TEMP[11].x, IMM[0].xxxx, -CONST[22].xxxx
76: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].xxxx
77: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy
78: MOV TEMP[10].xyz, TEMP[10].xyzz
79: MOV TEMP[10].w, TEMP[11].xxxx
80: TXL TEMP[10], TEMP[10], SAMP[0], CUBE
81: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy
82: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx
83: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz
84: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].zzzz
85: UIF TEMP[11].xxxx :0
86: MOV TEMP[11].xyz, TEMP[9].xyzx
87: FSLT TEMP[12].x, IMM[0].yyyy, CONST[14].wwww
88: UIF TEMP[12].xxxx :0
89: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz
90: RSQ TEMP[12].x, TEMP[12].xxxx
91: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx
92: MOV TEMP[12].xyz, -IN[4].xyzx
93: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz
94: RCP TEMP[14].x, TEMP[9].xxxx
95: RCP TEMP[14].y, TEMP[9].yyyy
96: RCP TEMP[14].z, TEMP[9].zzzz
97: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
98: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz
99: RCP TEMP[14].x, TEMP[9].xxxx
100: RCP TEMP[14].y, TEMP[9].yyyy
101: RCP TEMP[14].z, TEMP[9].zzzz
102: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
103: FSLT TEMP[14].xyz, IMM[0].yyyy, TEMP[9].xyzz
104: UIF TEMP[14].xxxx :0
105: MOV TEMP[15].x, TEMP[13].xxxx
106: ELSE :0
107: MOV TEMP[15].x, TEMP[12].xxxx
108: ENDIF
109: UIF TEMP[14].yyyy :0
110: MOV TEMP[16].x, TEMP[13].yyyy
111: ELSE :0
112: MOV TEMP[16].x, TEMP[12].yyyy
113: ENDIF
114: UIF TEMP[14].zzzz :0
115: MOV TEMP[13].x, TEMP[13].zzzz
116: ELSE :0
117: MOV TEMP[13].x, TEMP[12].zzzz
118: ENDIF
119: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz
120: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[0].wwww
121: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
122: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
123: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz
124: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[4].xyzz
125: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
126: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz
127: ENDIF
128: ADD TEMP[9].x, IMM[0].xxxx, -CONST[22].xxxx
129: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].xxxx
130: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy
131: MOV TEMP[11].xyz, TEMP[11].xyzz
132: MOV TEMP[11].w, TEMP[9].xxxx
133: TXL TEMP[9], TEMP[11], SAMP[1], CUBE
134: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy
135: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx
136: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz
137: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz
138: ELSE :0
139: MOV TEMP[7].xyz, TEMP[10].xyzx
140: ENDIF
141: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
142: MOV TEMP[1].xyz, -TEMP[1].xyzx
143: ADD TEMP[5].x, IMM[0].xxxx, -CONST[22].xxxx
144: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz
145: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz
146: RSQ TEMP[10].x, TEMP[10].xxxx
147: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx
148: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
149: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
150: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz
151: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx
152: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx
153: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww
154: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx
155: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].xxxx, IMM[2].yyyy
156: LG2 TEMP[12].x, TEMP[12].xxxx
157: RCP TEMP[12].x, TEMP[12].xxxx
158: MUL TEMP[12].x, IMM[1].wwww, TEMP[12].xxxx
159: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx
160: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx
161: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx
162: MUL TEMP[15].x, IMM[0].zzzz, TEMP[10].xxxx
163: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
164: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[0].wwww
165: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx
166: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
167: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
168: ADD TEMP[4].x, CONST[22].xxxx, TEMP[4].xxxx
169: MOV_SAT TEMP[4].x, TEMP[4].xxxx
170: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx
171: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx
172: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx
173: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx
174: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
175: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx
176: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx
177: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].zzzz
178: RCP TEMP[1].x, TEMP[1].xxxx
179: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz
180: MAX TEMP[9].x, IMM[0].yyyy, TEMP[9].xxxx
181: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
182: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx
183: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy
184: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
185: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx
186: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
187: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
188: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
189: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz
190: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[3].xyzz
191: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx
192: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx
193: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx
194: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx
195: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz
196: ADD TEMP[9].x, TEMP[5].xxxx, IMM[2].wwww
197: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx
198: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx
199: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx
200: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
201: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx
202: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww
203: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx
204: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
205: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx
206: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
207: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx
208: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx
209: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
210: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
211: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
212: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
213: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
214: MOV TEMP[0].xyz, TEMP[0].xyzx
215: MAD TEMP[1].x, IN[3].xxxx, CONST[5].zzzz, CONST[5].wwww
216: MOV_SAT TEMP[1].x, TEMP[1].xxxx
217: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
218: MOV TEMP[0].xyz, TEMP[0].xyzx
219: MOV TEMP[0].w, IMM[0].xxxx
220: MOV OUT[0], TEMP[0]
221: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%83 = load <32 x i8>, <32 x i8> addrspace(2)* %82, align 32, !tbaa !0
%84 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%85 = load <16 x i8>, <16 x i8> addrspace(2)* %84, align 16, !tbaa !0
%86 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%87 = bitcast <8 x i32> addrspace(2)* %86 to <32 x i8> addrspace(2)*
%88 = load <32 x i8>, <32 x i8> addrspace(2)* %87, align 32, !tbaa !0
%89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%90 = bitcast <4 x i32> addrspace(2)* %89 to <16 x i8> addrspace(2)*
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%93 = bitcast <8 x i32> addrspace(2)* %92 to <32 x i8> addrspace(2)*
%94 = load <32 x i8>, <32 x i8> addrspace(2)* %93, align 32, !tbaa !0
%95 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%96 = bitcast <4 x i32> addrspace(2)* %95 to <16 x i8> addrspace(2)*
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%99 = bitcast <8 x i32> addrspace(2)* %98 to <32 x i8> addrspace(2)*
%100 = load <32 x i8>, <32 x i8> addrspace(2)* %99, align 32, !tbaa !0
%101 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%102 = bitcast <4 x i32> addrspace(2)* %101 to <16 x i8> addrspace(2)*
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%105 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%106 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%107 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%108 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%109 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%110 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%111 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%112 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%119 = fmul float %106, %106
%120 = fmul float %107, %107
%121 = fadd float %120, %119
%122 = fmul float %108, %108
%123 = fadd float %121, %122
%124 = call float @llvm.AMDGPU.rsq.clamped.f32(float %123)
%125 = fmul float %106, %124
%126 = fmul float %107, %124
%127 = fmul float %108, %124
%128 = fmul float %113, %113
%129 = fmul float %114, %114
%130 = fadd float %129, %128
%131 = fmul float %115, %115
%132 = fadd float %130, %131
%133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132)
%134 = fmul float %113, %133
%135 = fmul float %114, %133
%136 = fmul float %115, %133
%137 = bitcast float %104 to i32
%138 = bitcast float %105 to i32
%139 = insertelement <2 x i32> undef, i32 %137, i32 0
%140 = insertelement <2 x i32> %139, i32 %138, i32 1
%141 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %140, <32 x i8> %94, <16 x i8> %97, i32 2)
%142 = extractelement <4 x float> %141, i32 0
%143 = extractelement <4 x float> %141, i32 1
%144 = extractelement <4 x float> %141, i32 2
%145 = fmul float %76, %142
%146 = fmul float %77, %143
%147 = fmul float %78, %144
%148 = call float @llvm.AMDGPU.lrp(float %79, float %145, float %66)
%149 = call float @llvm.AMDGPU.lrp(float %79, float %146, float %67)
%150 = call float @llvm.AMDGPU.lrp(float %79, float %147, float %68)
%151 = fmul float %79, %69
%152 = fsub float %69, %151
%153 = fmul float %145, %152
%154 = fmul float %146, %152
%155 = fmul float %147, %152
%156 = bitcast float %104 to i32
%157 = bitcast float %105 to i32
%158 = insertelement <2 x i32> undef, i32 %156, i32 0
%159 = insertelement <2 x i32> %158, i32 %157, i32 1
%160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %159, <32 x i8> %100, <16 x i8> %103, i32 2)
%161 = extractelement <4 x float> %160, i32 1
%162 = fsub float 1.000000e+00, %81
%163 = fmul float %161, %81
%164 = fadd float %163, %162
%165 = fmul float %125, %24
%166 = fmul float %126, %25
%167 = fadd float %166, %165
%168 = fmul float %127, %26
%169 = fadd float %167, %168
%170 = call float @llvm.maxnum.f32(float %169, float 0.000000e+00)
%171 = fmul float %27, %125
%172 = fmul float %28, %126
%173 = fadd float %171, %172
%174 = fmul float %29, %127
%175 = fadd float %173, %174
%176 = fadd float %175, %30
%177 = fmul float %31, %125
%178 = fmul float %32, %126
%179 = fadd float %177, %178
%180 = fmul float %33, %127
%181 = fadd float %179, %180
%182 = fadd float %181, %34
%183 = fmul float %35, %125
%184 = fmul float %36, %126
%185 = fadd float %183, %184
%186 = fmul float %37, %127
%187 = fadd float %185, %186
%188 = fadd float %187, %38
%189 = fadd float %109, %176
%190 = fadd float %110, %182
%191 = fadd float %111, %188
%192 = fmul float %189, %164
%193 = fmul float %190, %164
%194 = fmul float %191, %164
%195 = fmul float %125, %134
%196 = fmul float %126, %135
%197 = fadd float %196, %195
%198 = fmul float %127, %136
%199 = fadd float %197, %198
%200 = fmul float %199, %125
%201 = fmul float %199, %126
%202 = fmul float %199, %127
%203 = fmul float %200, 2.000000e+00
%204 = fmul float %201, 2.000000e+00
%205 = fmul float %202, 2.000000e+00
%206 = fsub float %134, %203
%207 = fsub float %135, %204
%208 = fsub float %136, %205
%209 = fcmp ogt float %51, 0.000000e+00
br i1 %209, label %IF, label %ENDIF
IF: ; preds = %main_body
%210 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%211 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%212 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%213 = fmul float %206, %206
%214 = fmul float %207, %207
%215 = fadd float %214, %213
%216 = fmul float %208, %208
%217 = fadd float %215, %216
%218 = call float @llvm.AMDGPU.rsq.clamped.f32(float %217)
%219 = fmul float %206, %218
%220 = fmul float %207, %218
%221 = fmul float %208, %218
%222 = fsub float %44, %116
%223 = fsub float %45, %117
%224 = fsub float %46, %118
%225 = fdiv float 1.000000e+00, %219
%226 = fdiv float 1.000000e+00, %220
%227 = fdiv float 1.000000e+00, %221
%228 = fmul float %222, %225
%229 = fmul float %223, %226
%230 = fmul float %224, %227
%231 = fsub float %47, %116
%232 = fsub float %48, %117
%233 = fsub float %49, %118
%234 = fdiv float 1.000000e+00, %219
%235 = fdiv float 1.000000e+00, %220
%236 = fdiv float 1.000000e+00, %221
%237 = fmul float %231, %234
%238 = fmul float %232, %235
%239 = fmul float %233, %236
%240 = fcmp ogt float %219, 0.000000e+00
%241 = fcmp ogt float %220, 0.000000e+00
%242 = fcmp ogt float %221, 0.000000e+00
%. = select i1 %240, float %228, float %237
%temp64.0 = select i1 %241, float %229, float %238
%.96 = select i1 %242, float %230, float %239
%243 = fadd float %44, %47
%244 = fadd float %45, %48
%245 = fadd float %46, %49
%246 = fmul float %243, 5.000000e-01
%247 = fmul float %244, 5.000000e-01
%248 = fmul float %245, 5.000000e-01
%249 = call float @llvm.minnum.f32(float %., float %temp64.0)
%250 = call float @llvm.minnum.f32(float %249, float %.96)
%251 = fsub float %246, %212
%252 = fsub float %247, %211
%253 = fsub float %248, %210
%254 = fadd float %251, %116
%255 = fadd float %252, %117
%256 = fadd float %253, %118
%257 = fmul float %219, %250
%258 = fadd float %257, %254
%259 = fmul float %220, %250
%260 = fadd float %259, %255
%261 = fmul float %221, %250
%262 = fadd float %261, %256
%263 = fsub float %258, %246
%264 = fsub float %260, %247
%265 = fsub float %262, %248
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp40.0 = phi float [ %263, %IF ], [ %206, %main_body ]
%temp41.0 = phi float [ %264, %IF ], [ %207, %main_body ]
%temp42.0 = phi float [ %265, %IF ], [ %208, %main_body ]
%266 = fsub float 1.000000e+00, %80
%267 = call float @llvm.pow.f32(float %266, float 7.500000e-01)
%268 = fmul float %267, 7.000000e+00
%269 = insertelement <4 x float> undef, float %temp40.0, i32 0
%270 = insertelement <4 x float> %269, float %temp41.0, i32 1
%271 = insertelement <4 x float> %270, float %temp42.0, i32 2
%272 = insertelement <4 x float> %271, float %268, i32 3
%273 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %272)
%274 = extractelement <4 x float> %273, i32 0
%275 = extractelement <4 x float> %273, i32 1
%276 = extractelement <4 x float> %273, i32 2
%277 = extractelement <4 x float> %273, i32 3
%278 = call float @llvm.fabs.f32(float %276)
%279 = fdiv float 1.000000e+00, %278
%280 = fmul float %274, %279
%281 = fadd float %280, 1.500000e+00
%282 = fmul float %275, %279
%283 = fadd float %282, 1.500000e+00
%284 = bitcast float %283 to i32
%285 = bitcast float %281 to i32
%286 = bitcast float %277 to i32
%287 = bitcast float %268 to i32
%288 = insertelement <4 x i32> undef, i32 %284, i32 0
%289 = insertelement <4 x i32> %288, i32 %285, i32 1
%290 = insertelement <4 x i32> %289, i32 %286, i32 2
%291 = insertelement <4 x i32> %290, i32 %287, i32 3
%292 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %291, <32 x i8> %83, <16 x i8> %85, i32 4)
%293 = extractelement <4 x float> %292, i32 0
%294 = extractelement <4 x float> %292, i32 1
%295 = extractelement <4 x float> %292, i32 2
%296 = extractelement <4 x float> %292, i32 3
%297 = call float @llvm.pow.f32(float %296, float %53)
%298 = fmul float %52, %297
%299 = fmul float %298, %293
%300 = fmul float %298, %294
%301 = fmul float %298, %295
%302 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %302, label %IF82, label %ENDIF81
IF82: ; preds = %ENDIF
%303 = fcmp ogt float %63, 0.000000e+00
br i1 %303, label %IF85, label %ENDIF84
ENDIF81: ; preds = %ENDIF, %ENDIF84
%temp28.0 = phi float [ %531, %ENDIF84 ], [ %299, %ENDIF ]
%temp29.0 = phi float [ %532, %ENDIF84 ], [ %300, %ENDIF ]
%temp30.0 = phi float [ %533, %ENDIF84 ], [ %301, %ENDIF ]
%304 = fmul float %temp28.0, %164
%305 = fmul float %temp29.0, %164
%306 = fmul float %temp30.0, %164
%307 = fsub float 1.000000e+00, %80
%308 = fsub float %24, %134
%309 = fsub float %25, %135
%310 = fsub float %26, %136
%311 = fmul float %308, %308
%312 = fmul float %309, %309
%313 = fadd float %312, %311
%314 = fmul float %310, %310
%315 = fadd float %313, %314
%316 = call float @llvm.AMDGPU.rsq.clamped.f32(float %315)
%317 = fmul float %308, %316
%318 = fmul float %309, %316
%319 = fmul float %310, %316
%320 = fmul float %134, %125
%321 = fsub float -0.000000e+00, %320
%322 = fmul float %135, %126
%323 = fsub float %321, %322
%324 = fmul float %136, %127
%325 = fsub float %323, %324
%326 = call float @llvm.maxnum.f32(float %325, float 0.000000e+00)
%327 = fmul float %24, %317
%328 = fmul float %25, %318
%329 = fadd float %328, %327
%330 = fmul float %26, %319
%331 = fadd float %329, %330
%332 = call float @llvm.maxnum.f32(float %331, float 0.000000e+00)
%333 = fmul float %307, %307
%334 = fmul float %333, %75
%335 = fsub float 1.000000e+00, %307
%336 = fmul float %335, 0x3FEEF9DB20000000
%337 = fadd float %336, 0x3F9EB851E0000000
%338 = call float @llvm.log2.f32(float %337)
%339 = fdiv float 1.000000e+00, %338
%340 = fmul float %339, 1.000000e+01
%341 = fmul float %340, %340
%342 = fsub float 1.000000e+00, %170
%343 = fsub float 1.000000e+00, %326
%344 = fmul float %332, 2.000000e+00
%345 = fmul float %332, %307
%346 = fmul float %344, %345
%347 = fadd float %346, 5.000000e-01
%348 = fsub float 1.000000e+00, %332
%349 = fsub float 1.000000e+00, %326
%350 = fsub float 1.000000e+00, %152
%351 = fadd float %80, %350
%352 = call float @llvm.AMDIL.clamp.(float %351, float 0.000000e+00, float 1.000000e+00)
%353 = fmul float %349, %349
%354 = fmul float %349, %349
%355 = fmul float %354, %349
%356 = fmul float %353, %355
%357 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %148)
%358 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %149)
%359 = call float @llvm.AMDGPU.lrp(float %356, float %352, float %150)
%360 = call float @llvm.AMDGPU.lrp(float %170, float 1.000000e+00, float %334)
%361 = call float @llvm.AMDGPU.lrp(float %326, float 1.000000e+00, float %334)
%362 = fmul float %360, %361
%363 = fadd float %362, 0x3F1A36E2E0000000
%364 = fdiv float 1.000000e+00, %363
%365 = fmul float %125, %317
%366 = fmul float %126, %318
%367 = fadd float %366, %365
%368 = fmul float %127, %319
%369 = fadd float %367, %368
%370 = call float @llvm.maxnum.f32(float %369, float 0.000000e+00)
%371 = call float @llvm.pow.f32(float %370, float %341)
%372 = fadd float %341, 1.000000e+00
%373 = fmul float %372, %74
%374 = fmul float %371, %373
%375 = fmul float %364, %374
%376 = fmul float %375, %170
%377 = fmul float %376, %73
%378 = call float @llvm.maxnum.f32(float %377, float 0.000000e+00)
%379 = fmul float %378, %70
%380 = fmul float %378, %71
%381 = fmul float %378, %72
%382 = fsub float 1.000000e+00, %148
%383 = fsub float 1.000000e+00, %149
%384 = fsub float 1.000000e+00, %150
%385 = fmul float %348, %348
%386 = fmul float %348, %348
%387 = fmul float %386, %348
%388 = fmul float %385, %387
%389 = fmul float %382, %388
%390 = fadd float %389, %148
%391 = fmul float %383, %388
%392 = fadd float %391, %149
%393 = fmul float %384, %388
%394 = fadd float %393, %150
%395 = fadd float %347, -1.000000e+00
%396 = fmul float %342, %342
%397 = fmul float %342, %342
%398 = fmul float %397, %342
%399 = fmul float %396, %398
%400 = fmul float %395, %399
%401 = fadd float %400, 1.000000e+00
%402 = fadd float %347, -1.000000e+00
%403 = fmul float %343, %343
%404 = fmul float %343, %343
%405 = fmul float %404, %343
%406 = fmul float %403, %405
%407 = fmul float %402, %406
%408 = fadd float %407, 1.000000e+00
%409 = fmul float %401, %408
%410 = fmul float %409, %170
%411 = fmul float %70, %410
%412 = fadd float %411, %192
%413 = fmul float %71, %410
%414 = fadd float %413, %193
%415 = fmul float %72, %410
%416 = fadd float %415, %194
%417 = fmul float %153, %412
%418 = fmul float %154, %414
%419 = fmul float %155, %416
%420 = fmul float %379, %390
%421 = fadd float %420, %417
%422 = fmul float %380, %392
%423 = fadd float %422, %418
%424 = fmul float %381, %394
%425 = fadd float %424, %419
%426 = fmul float %304, %357
%427 = fadd float %426, %421
%428 = fmul float %305, %358
%429 = fadd float %428, %423
%430 = fmul float %306, %359
%431 = fadd float %430, %425
%432 = fmul float %112, %42
%433 = fadd float %432, %43
%434 = call float @llvm.AMDIL.clamp.(float %433, float 0.000000e+00, float 1.000000e+00)
%435 = call float @llvm.AMDGPU.lrp(float %434, float %427, float %39)
%436 = call float @llvm.AMDGPU.lrp(float %434, float %429, float %40)
%437 = call float @llvm.AMDGPU.lrp(float %434, float %431, float %41)
%438 = call i32 @llvm.SI.packf16(float %435, float %436)
%439 = bitcast i32 %438 to float
%440 = call i32 @llvm.SI.packf16(float %437, float 1.000000e+00)
%441 = bitcast i32 %440 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %439, float %441, float %439, float %441)
ret void
IF85: ; preds = %IF82
%442 = fmul float %206, %206
%443 = fmul float %207, %207
%444 = fadd float %443, %442
%445 = fmul float %208, %208
%446 = fadd float %444, %445
%447 = call float @llvm.AMDGPU.rsq.clamped.f32(float %446)
%448 = fmul float %206, %447
%449 = fmul float %207, %447
%450 = fmul float %208, %447
%451 = fsub float %54, %116
%452 = fsub float %55, %117
%453 = fsub float %56, %118
%454 = fdiv float 1.000000e+00, %448
%455 = fdiv float 1.000000e+00, %449
%456 = fdiv float 1.000000e+00, %450
%457 = fmul float %451, %454
%458 = fmul float %452, %455
%459 = fmul float %453, %456
%460 = fsub float %57, %116
%461 = fsub float %58, %117
%462 = fsub float %59, %118
%463 = fdiv float 1.000000e+00, %448
%464 = fdiv float 1.000000e+00, %449
%465 = fdiv float 1.000000e+00, %450
%466 = fmul float %460, %463
%467 = fmul float %461, %464
%468 = fmul float %462, %465
%469 = fcmp ogt float %448, 0.000000e+00
%470 = fcmp ogt float %449, 0.000000e+00
%471 = fcmp ogt float %450, 0.000000e+00
%.97 = select i1 %469, float %457, float %466
%temp64.1 = select i1 %470, float %458, float %467
%.98 = select i1 %471, float %459, float %468
%472 = fadd float %54, %57
%473 = fadd float %55, %58
%474 = fadd float %56, %59
%475 = fmul float %472, 5.000000e-01
%476 = fmul float %473, 5.000000e-01
%477 = fmul float %474, 5.000000e-01
%478 = call float @llvm.minnum.f32(float %.97, float %temp64.1)
%479 = call float @llvm.minnum.f32(float %478, float %.98)
%480 = fsub float %475, %60
%481 = fsub float %476, %61
%482 = fsub float %477, %62
%483 = fadd float %480, %116
%484 = fadd float %481, %117
%485 = fadd float %482, %118
%486 = fmul float %448, %479
%487 = fadd float %486, %483
%488 = fmul float %449, %479
%489 = fadd float %488, %484
%490 = fmul float %450, %479
%491 = fadd float %490, %485
%492 = fsub float %487, %475
%493 = fsub float %489, %476
%494 = fsub float %491, %477
br label %ENDIF84
ENDIF84: ; preds = %IF82, %IF85
%temp44.0 = phi float [ %492, %IF85 ], [ %206, %IF82 ]
%temp45.0 = phi float [ %493, %IF85 ], [ %207, %IF82 ]
%temp46.0 = phi float [ %494, %IF85 ], [ %208, %IF82 ]
%495 = fsub float 1.000000e+00, %80
%496 = call float @llvm.pow.f32(float %495, float 7.500000e-01)
%497 = fmul float %496, 7.000000e+00
%498 = insertelement <4 x float> undef, float %temp44.0, i32 0
%499 = insertelement <4 x float> %498, float %temp45.0, i32 1
%500 = insertelement <4 x float> %499, float %temp46.0, i32 2
%501 = insertelement <4 x float> %500, float %497, i32 3
%502 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %501)
%503 = extractelement <4 x float> %502, i32 0
%504 = extractelement <4 x float> %502, i32 1
%505 = extractelement <4 x float> %502, i32 2
%506 = extractelement <4 x float> %502, i32 3
%507 = call float @llvm.fabs.f32(float %505)
%508 = fdiv float 1.000000e+00, %507
%509 = fmul float %503, %508
%510 = fadd float %509, 1.500000e+00
%511 = fmul float %504, %508
%512 = fadd float %511, 1.500000e+00
%513 = bitcast float %512 to i32
%514 = bitcast float %510 to i32
%515 = bitcast float %506 to i32
%516 = bitcast float %497 to i32
%517 = insertelement <4 x i32> undef, i32 %513, i32 0
%518 = insertelement <4 x i32> %517, i32 %514, i32 1
%519 = insertelement <4 x i32> %518, i32 %515, i32 2
%520 = insertelement <4 x i32> %519, i32 %516, i32 3
%521 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %520, <32 x i8> %88, <16 x i8> %91, i32 4)
%522 = extractelement <4 x float> %521, i32 0
%523 = extractelement <4 x float> %521, i32 1
%524 = extractelement <4 x float> %521, i32 2
%525 = extractelement <4 x float> %521, i32 3
%526 = call float @llvm.pow.f32(float %525, float %65)
%527 = fmul float %64, %526
%528 = fmul float %527, %522
%529 = fmul float %527, %523
%530 = fmul float %527, %524
%531 = call float @llvm.AMDGPU.lrp(float %50, float %299, float %528)
%532 = call float @llvm.AMDGPU.lrp(float %50, float %300, float %529)
%533 = call float @llvm.AMDGPU.lrp(float %50, float %301, float %530)
br label %ENDIF81
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v13, v0, 0, 0, [m0] ; C8340000
v_interp_p2_f32 v13, [v13], v1, 0, 0, [m0] ; C8350001
v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100
v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500
v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900
v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901
v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00
v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01
v_interp_p1_f32 v7, v0, 0, 3, [m0] ; C81C0C00
v_interp_p2_f32 v7, [v7], v1, 0, 3, [m0] ; C81D0C01
v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00
v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01
v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00
v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01
v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00
v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01
v_mul_f32_e32 v4, v2, v2 ; 10080502
v_mac_f32_e32 v4, v3, v3 ; 3E080703
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v8, v4 ; 7E105904
v_mul_f32_e32 v4, v15, v15 ; 10081F0F
v_mac_f32_e32 v4, v16, v16 ; 3E082110
v_mac_f32_e32 v4, v17, v17 ; 3E082311
v_rsq_clamp_f32_e32 v18, v4 ; 7E245904
v_mul_f32_e32 v5, v8, v2 ; 100A0508
v_mul_f32_e32 v4, v8, v3 ; 10080708
v_mul_f32_e32 v3, v8, v6 ; 10060D08
v_mul_f32_e32 v9, v18, v15 ; 10121F12
v_mul_f32_e32 v8, v18, v16 ; 10102112
v_mul_f32_e32 v2, v9, v5 ; 10040B09
v_mac_f32_e32 v2, v8, v4 ; 3E040908
v_mul_f32_e32 v6, v18, v17 ; 100C2312
v_mac_f32_e32 v2, v6, v3 ; 3E040706
v_mul_f32_e32 v19, v5, v2 ; 10260505
v_mac_f32_e32 v19, v5, v2 ; 3E260505
v_mul_f32_e32 v21, v4, v2 ; 102A0504
v_mac_f32_e32 v21, v4, v2 ; 3E2A0504
v_mad_f32 v20, v15, v18, -v19 ; D2820014 844E250F
v_mad_f32 v21, v16, v18, -v21 ; D2820015 84562510
v_mul_f32_e32 v15, v3, v2 ; 101E0503
v_mac_f32_e32 v15, v3, v2 ; 3E1E0503
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_mad_f32 v22, v17, v18, -v15 ; D2820016 843E2511
v_interp_p1_f32 v24, v0, 0, 4, [m0] ; C8601000
v_interp_p2_f32 v24, [v24], v1, 0, 4, [m0] ; C8611001
s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508
s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710
v_interp_p1_f32 v23, v0, 1, 4, [m0] ; C85C1100
v_interp_p2_f32 v23, [v23], v1, 1, 4, [m0] ; C85D1101
v_interp_p1_f32 v25, v0, 2, 4, [m0] ; C8641200
v_interp_p2_f32 v25, [v25], v1, 2, 4, [m0] ; C8651201
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s20, s[8:11], 0x4c ; C20A094C
s_buffer_load_dword s21, s[8:11], 0x4d ; C20A894D
s_buffer_load_dword s22, s[8:11], 0x4e ; C20B094E
s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C
s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718
image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[12:19], s[0:3] ; F0800700 0003000D
s_buffer_load_dword s0, s[8:11], 0x41 ; C2000941
s_buffer_load_dword s1, s[8:11], 0x42 ; C2008942
s_buffer_load_dword s28, s[8:11], 0x54 ; C20E0954
s_buffer_load_dword s2, s[8:11], 0x40 ; C2010940
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v17, s20, v0 ; 10220014
v_mul_f32_e32 v18, s21, v1 ; 10240215
v_mul_f32_e32 v19, s22, v2 ; 10260416
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B
s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C
s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D
s_buffer_load_dword s3, s[8:11], 0x58 ; C2018958
v_sub_f32_e64 v0, 1.0, s28 ; D2080000 000038F2
v_mul_f32_e32 v2, s2, v0 ; 10040002
v_mul_f32_e32 v1, s0, v0 ; 10020000
v_mul_f32_e32 v0, s1, v0 ; 10000001
v_mac_f32_e32 v2, s28, v17 ; 3E04221C
v_mov_b32_e32 v26, v20 ; 7E340314
v_mac_f32_e32 v1, s28, v18 ; 3E02241C
v_mov_b32_e32 v27, v21 ; 7E360315
v_mac_f32_e32 v0, s28, v19 ; 3E00261C
v_mov_b32_e32 v28, v22 ; 7E380316
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[0:1], 0, s13 ; D0020000 00001A80
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[32:39], s[24:27] ; F0800F00 00C80D0D
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[20:21], s[0:1] ; BE942400
s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s0, s[8:11], 0x20 ; C2000920
s_buffer_load_dword s1, s[8:11], 0x21 ; C2008921
s_buffer_load_dword s2, s[8:11], 0x22 ; C2010922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s14, s[8:11], 0x25 ; C2070925
v_mul_f32_e32 v13, v20, v20 ; 101A2914
v_mac_f32_e32 v13, v21, v21 ; 3E1A2B15
v_mac_f32_e32 v13, v22, v22 ; 3E1A2D16
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
s_buffer_load_dword s15, s[8:11], 0x26 ; C2078926
s_buffer_load_dword s16, s[8:11], 0x28 ; C2080928
s_buffer_load_dword s17, s[8:11], 0x29 ; C2088929
s_buffer_load_dword s18, s[8:11], 0x2a ; C209092A
v_mul_f32_e32 v15, v13, v20 ; 101E290D
v_mul_f32_e32 v16, v13, v21 ; 10202B0D
v_mul_f32_e32 v13, v13, v22 ; 101A2D0D
v_rcp_f32_e32 v26, v15 ; 7E34550F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v27, s0, v24 ; 08363000
v_sub_f32_e32 v28, s1, v23 ; 08382E01
v_rcp_f32_e32 v29, v16 ; 7E3A5510
v_mul_f32_e32 v27, v26, v27 ; 1036371A
v_sub_f32_e32 v30, s13, v24 ; 083C300D
v_mul_f32_e32 v26, v26, v30 ; 10343D1A
v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80
v_cndmask_b32_e32 v26, v26, v27 ; 0034371A
v_rcp_f32_e32 v27, v13 ; 7E36550D
v_mul_f32_e32 v28, v29, v28 ; 1038391D
v_sub_f32_e32 v30, s14, v23 ; 083C2E0E
v_mul_f32_e32 v29, v29, v30 ; 103A3D1D
v_cmp_lt_f32_e32 vcc, 0, v16 ; 7C022080
v_cndmask_b32_e32 v28, v29, v28 ; 0038391D
v_sub_f32_e32 v29, s2, v25 ; 083A3202
v_mul_f32_e32 v29, v27, v29 ; 103A3B1B
v_sub_f32_e32 v30, s15, v25 ; 083C320F
v_mul_f32_e32 v27, v27, v30 ; 10363D1B
v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80
v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B
v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A
v_mov_b32_e32 v27, s13 ; 7E36020D
v_add_f32_e32 v27, s0, v27 ; 06363600
v_mov_b32_e32 v28, s14 ; 7E38020E
v_add_f32_e32 v28, s1, v28 ; 06383801
v_mov_b32_e32 v29, s15 ; 7E3A020F
v_add_f32_e32 v29, s2, v29 ; 063A3A02
v_mad_f32 v30, 0.5, v27, -s16 ; D282001E 804236F0
v_add_f32_e32 v30, v24, v30 ; 063C3D18
v_mac_f32_e32 v30, v26, v15 ; 3E3C1F1A
v_mad_f32 v15, 0.5, v28, -s17 ; D282000F 804638F0
v_add_f32_e32 v15, v23, v15 ; 061E1F17
v_mac_f32_e32 v15, v26, v16 ; 3E1E211A
v_mad_f32 v16, 0.5, v29, -s18 ; D2820010 804A3AF0
v_add_f32_e32 v16, v25, v16 ; 06202119
v_mac_f32_e32 v16, v26, v13 ; 3E201B1A
v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0
v_mad_f32 v27, 0.5, -v28, v15 ; D282001B 443E38F0
v_mad_f32 v28, 0.5, -v29, v16 ; D282001C 44423AF0
s_or_b64 exec, exec, s[20:21] ; 88FE147E
s_buffer_load_dword s14, s[8:11], 0x17 ; C2070917
s_buffer_load_dword s15, s[8:11], 0x43 ; C2078943
s_buffer_load_dword s13, s[8:11], 0x60 ; C2068960
s_buffer_load_dword s0, s[8:11], 0x0 ; C2000900
s_buffer_load_dword s1, s[8:11], 0x1 ; C2008901
s_buffer_load_dword s2, s[8:11], 0x2 ; C2010902
s_buffer_load_dword s16, s[8:11], 0x4 ; C2080904
s_buffer_load_dword s17, s[8:11], 0x5 ; C2088905
s_buffer_load_dword s18, s[8:11], 0x6 ; C2090906
s_buffer_load_dword s20, s[8:11], 0x7 ; C20A0907
s_buffer_load_dword s19, s[8:11], 0x8 ; C2098908
s_buffer_load_dword s21, s[8:11], 0x9 ; C20A8909
s_buffer_load_dword s22, s[8:11], 0xa ; C20B090A
s_buffer_load_dword s23, s[8:11], 0xb ; C20B890B
s_buffer_load_dword s24, s[8:11], 0xc ; C20C090C
s_buffer_load_dword s25, s[8:11], 0xd ; C20C890D
s_buffer_load_dword s26, s[8:11], 0xe ; C20D090E
s_buffer_load_dword s27, s[8:11], 0xf ; C20D890F
v_sub_f32_e64 v13, 1.0, s3 ; D208000D 000006F2
v_log_f32_e32 v13, v13 ; 7E1A4F0D
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_mul_legacy_f32_e32 v13, 0x3f400000, v13 ; 0E1A1AFF 3F400000
v_exp_f32_e32 v13, v13 ; 7E1A4B0D
v_mul_f32_e32 v29, 0x40e00000, v13 ; 103A1AFF 40E00000
v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A
v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A
v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A
v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A
v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000
v_rcp_f32_e64 v13, |v32| ; D354010D 00000120
v_mad_f32 v27, v13, v30, v26 ; D282001B 046A3D0D
v_mac_f32_e32 v26, v13, v31 ; 3E343F0D
v_mov_b32_e32 v28, v33 ; 7E380321
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v13, v29 ; 7E1A4F1D
v_mul_legacy_f32_e32 v13, s30, v13 ; 0E1A1A1E
v_exp_f32_e32 v13, v13 ; 7E1A4B0D
v_mul_f32_e32 v13, s29, v13 ; 101A1A1D
v_mul_f32_e32 v16, v26, v13 ; 10201B1A
v_mul_f32_e32 v15, v27, v13 ; 101E1B1B
v_mul_f32_e32 v13, v28, v13 ; 101A1B1C
v_mov_b32_e32 v26, s28 ; 7E34021C
v_mov_b32_e32 v27, 0x3f7fff58 ; 7E3602FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v27 ; 7C02360C
s_and_saveexec_b64 s[28:29], vcc ; BE9C246A
s_xor_b64 s[28:29], exec, s[28:29] ; 899C1C7E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B
s_buffer_load_dword s30, s[8:11], 0x3c ; C20F093C
s_buffer_load_dword s31, s[8:11], 0x3d ; C20F893D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080
s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936
s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938
s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939
s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A
s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930
s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931
s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932
s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934
s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935
v_mul_f32_e32 v27, v20, v20 ; 10362914
v_mac_f32_e32 v27, v21, v21 ; 3E362B15
v_mac_f32_e32 v27, v22, v22 ; 3E362D16
v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v28, s34, v25 ; 08383222
v_mov_b32_e32 v29, s34 ; 7E3A0222
v_sub_f32_e32 v30, s38, v24 ; 083C3026
v_sub_f32_e32 v31, s39, v23 ; 083E2E27
v_add_f32_e32 v29, s40, v29 ; 063A3A28
v_sub_f32_e32 v32, s40, v25 ; 08403228
v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0
v_add_f32_e32 v25, v25, v33 ; 06324319
v_mul_f32_e32 v20, v27, v20 ; 1028291B
v_mul_f32_e32 v21, v27, v21 ; 102A2B1B
v_mul_f32_e32 v22, v27, v22 ; 102C2D1B
v_rcp_f32_e32 v27, v20 ; 7E365514
v_rcp_f32_e32 v33, v21 ; 7E425515
v_rcp_f32_e32 v34, v22 ; 7E445516
v_sub_f32_e32 v35, s41, v24 ; 08463029
v_mov_b32_e32 v36, s41 ; 7E480229
v_add_f32_e32 v36, s38, v36 ; 06484826
v_mul_f32_e32 v30, v27, v30 ; 103C3D1B
v_mul_f32_e32 v27, v27, v35 ; 1036471B
v_mul_f32_e32 v31, v33, v31 ; 103E3F21
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v28, v34, v28 ; 10383922
v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0
v_add_f32_e32 v24, v24, v34 ; 06304518
v_sub_f32_e32 v34, s42, v23 ; 08442E2A
v_mov_b32_e32 v35, s42 ; 7E46022A
v_mul_f32_e32 v33, v33, v34 ; 10424521
v_add_f32_e32 v34, s39, v35 ; 06444627
v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880
v_cndmask_b32_e32 v27, v27, v30 ; 00363D1B
v_cmp_lt_f32_e32 vcc, 0, v21 ; 7C022A80
v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21
v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80
v_cndmask_b32_e32 v28, v28, v32 ; 0038411C
v_min3_f32 v27, v27, v30, v28 ; D2A2001B 04723D1B
v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0
v_add_f32_e32 v23, v23, v28 ; 062E3917
v_mac_f32_e32 v24, v27, v20 ; 3E30291B
v_mac_f32_e32 v23, v27, v21 ; 3E2E2B1B
v_mac_f32_e32 v25, v27, v22 ; 3E322D1B
v_mad_f32 v20, 0.5, -v36, v24 ; D2820014 446248F0
v_mad_f32 v21, 0.5, -v34, v23 ; D2820015 445E44F0
v_mad_f32 v22, 0.5, -v29, v25 ; D2820016 44663AF0
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_sub_f32_e64 v23, 1.0, s3 ; D2080017 000006F2
v_log_f32_e32 v23, v23 ; 7E2E4F17
s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504
v_mul_legacy_f32_e32 v23, 0x3f400000, v23 ; 0E2E2EFF 3F400000
v_exp_f32_e32 v23, v23 ; 7E2E4B17
v_mul_f32_e32 v23, 0x40e00000, v23 ; 102E2EFF 40E00000
v_cubeid_f32 v30, v20, v21, v22 ; D288001E 045A2B14
v_cubema_f32 v29, v20, v21, v22 ; D28E001D 045A2B14
s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708
v_cubesc_f32 v28, v20, v21, v22 ; D28A001C 045A2B14
v_cubetc_f32 v27, v20, v21, v22 ; D28C001B 045A2B14
v_rcp_f32_e64 v22, |v29| ; D3540116 0000011D
v_mov_b32_e32 v20, 0x3fc00000 ; 7E2802FF 3FC00000
v_mad_f32 v21, v22, v27, v20 ; D2820015 04523716
v_mac_f32_e32 v20, v22, v28 ; 3E283916
v_mov_b32_e32 v22, v30 ; 7E2C031E
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[36:43], s[32:35] ; F0900F00 01091414
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v23, v23 ; 7E2E4F17
v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2
v_mul_legacy_f32_e32 v23, s31, v23 ; 0E2E2E1F
v_exp_f32_e32 v23, v23 ; 7E2E4B17
v_mul_f32_e32 v23, s30, v23 ; 102E2E1E
v_mul_f32_e32 v20, v20, v23 ; 10282F14
v_mul_f32_e32 v21, v21, v23 ; 102A2F15
v_mul_f32_e32 v22, v22, v23 ; 102C2F16
v_mul_f32_e32 v20, v20, v24 ; 10283114
v_mul_f32_e32 v21, v21, v24 ; 102A3115
v_mul_f32_e32 v22, v22, v24 ; 102C3116
v_mac_f32_e32 v20, s12, v16 ; 3E28200C
v_mac_f32_e32 v21, s12, v15 ; 3E2A1E0C
v_mac_f32_e32 v22, s12, v13 ; 3E2C1A0C
v_mov_b32_e32 v13, v22 ; 7E1A0316
v_mov_b32_e32 v15, v21 ; 7E1E0315
v_mov_b32_e32 v16, v20 ; 7E200314
s_or_b64 exec, exec, s[28:29] ; 88FE1C7E
v_mad_f32 v22, -v26, s15, s15 ; D2820016 203C1F1A
v_mov_b32_e32 v20, s14 ; 7E28020E
v_mul_f32_e32 v21, v22, v17 ; 102A2316
v_mul_f32_e32 v18, v22, v18 ; 10242516
v_mul_f32_e32 v17, v22, v19 ; 10222716
v_mul_f32_e32 v19, s17, v4 ; 10260811
v_mac_f32_e32 v19, s16, v5 ; 3E260A10
v_mac_f32_e32 v19, s18, v3 ; 3E260612
v_add_f32_e32 v19, s20, v19 ; 06262614
v_add_f32_e32 v23, v19, v10 ; 062E1513
v_mul_f32_e32 v10, s21, v4 ; 10140815
v_mac_f32_e32 v10, s19, v5 ; 3E140A13
v_mac_f32_e32 v10, s22, v3 ; 3E140616
v_add_f32_e32 v10, s23, v10 ; 06141417
v_add_f32_e32 v11, v10, v11 ; 0616170A
v_mul_f32_e32 v10, s25, v4 ; 10140819
v_mac_f32_e32 v10, s24, v5 ; 3E140A18
v_mac_f32_e32 v10, s26, v3 ; 3E14061A
v_add_f32_e32 v10, s27, v10 ; 0614141B
v_add_f32_e32 v12, v10, v12 ; 0618190A
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916
s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944
s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945
s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946
s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948
s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_sub_f32_e64 v19, 1.0, s13 ; D2080013 00001AF2
v_mac_f32_e32 v19, s13, v14 ; 3E261C0D
v_mul_f32_e32 v10, s0, v5 ; 10140A00
v_mac_f32_e32 v10, s1, v4 ; 3E140801
v_mac_f32_e32 v10, s2, v3 ; 3E140602
v_max_f32_e32 v10, 0, v10 ; 20141480
v_mul_f32_e32 v14, v19, v23 ; 101C2F13
v_mul_f32_e32 v11, v19, v11 ; 10161713
v_mul_f32_e32 v12, v19, v12 ; 10181913
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v20, s17, v7 ; 3E280E11
v_mul_f32_e32 v7, v19, v16 ; 100E2113
v_mul_f32_e32 v15, v19, v15 ; 101E1F13
v_mul_f32_e32 v13, v19, v13 ; 101A1B13
v_sub_f32_e32 v16, 1.0, v22 ; 08202CF2
v_add_f32_e32 v16, s3, v16 ; 06202003
v_sub_f32_e64 v19, 1.0, s3 ; D2080013 000006F2
v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080
v_sub_f32_e32 v22, s0, v9 ; 082C1200
v_sub_f32_e32 v23, s1, v8 ; 082E1001
v_mul_f32_e32 v24, v22, v22 ; 10302D16
v_mac_f32_e32 v24, v23, v23 ; 3E302F17
v_sub_f32_e32 v25, s2, v6 ; 08320C02
v_mac_f32_e32 v24, v25, v25 ; 3E303319
v_rsq_clamp_f32_e32 v24, v24 ; 7E305918
v_mul_f32_e32 v22, v24, v22 ; 102C2D18
v_mul_f32_e32 v23, v24, v23 ; 102E2F18
v_mul_f32_e32 v24, v24, v25 ; 10303318
v_mul_f32_e32 v9, v9, v5 ; 10120B09
v_mad_f32 v8, -v8, v4, -v9 ; D2820008 A4260908
v_mul_f32_e32 v5, v22, v5 ; 100A0B16
v_mac_f32_e32 v5, v23, v4 ; 3E0A0917
v_mul_f32_e32 v4, s0, v22 ; 10082C00
v_mac_f32_e32 v4, s1, v23 ; 3E082E01
v_mad_f32 v6, -v6, v3, v8 ; D2820006 24220706
v_mac_f32_e32 v4, s2, v24 ; 3E083002
v_mac_f32_e32 v5, v24, v3 ; 3E0A0718
v_max_f32_e32 v3, 0, v4 ; 20060880
v_sub_f32_e32 v4, 1.0, v3 ; 080806F2
v_mul_f32_e32 v8, v4, v4 ; 10100904
v_mul_f32_e32 v4, v4, v8 ; 10081104
v_mul_f32_e32 v4, v4, v8 ; 10081104
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_sub_f32_e32 v8, 1.0, v6 ; 08100CF2
v_mul_f32_e32 v9, v8, v8 ; 10121108
v_mul_f32_e32 v22, v8, v9 ; 102C1308
v_mad_f32 v23, -v9, v22, 1.0 ; D2820017 23CA2D09
v_mul_f32_e32 v24, v2, v23 ; 10302F02
v_sub_f32_e32 v25, 1.0, v2 ; 083204F2
v_mac_f32_e32 v2, v4, v25 ; 3E043304
v_mul_f32_e32 v25, v1, v23 ; 10322F01
v_sub_f32_e32 v26, 1.0, v1 ; 083402F2
v_mac_f32_e32 v1, v4, v26 ; 3E023504
v_mul_f32_e32 v23, v0, v23 ; 102E2F00
v_sub_f32_e32 v26, 1.0, v0 ; 083400F2
v_mac_f32_e32 v0, v4, v26 ; 3E003504
v_sub_f32_e32 v4, 1.0, v19 ; 080826F2
v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F
v_madmk_f32_e32 v4, v4, v26, 0x3f77ced9 ; 40083504 3F77CED9
v_add_f32_e32 v26, v3, v3 ; 06340703
v_mul_f32_e32 v3, v19, v3 ; 10060713
v_mad_f32 v3, v26, v3, 0.5 ; D2820003 03C2071A
v_mul_f32_e32 v9, v22, v9 ; 10121316
v_mac_f32_e32 v24, v16, v9 ; 3E301310
v_mac_f32_e32 v25, v16, v9 ; 3E321310
v_mac_f32_e32 v23, v16, v9 ; 3E2E1310
v_mul_f32_e32 v16, v19, v19 ; 10202713
v_log_f32_e32 v4, v4 ; 7E084F04
v_mul_f32_e32 v16, s8, v16 ; 10202008
v_mul_f32_e32 v8, v16, v8 ; 10101110
v_mac_f32_e32 v8, 1.0, v6 ; 3E100CF2
v_rcp_f32_e32 v4, v4 ; 7E085504
v_sub_f32_e32 v6, 1.0, v10 ; 080C14F2
v_mul_f32_e32 v16, v16, v6 ; 10200D10
v_mac_f32_e32 v16, 1.0, v10 ; 3E2014F2
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_madak_f32_e32 v8, v16, v8, 0x38d1b717 ; 42101110 38D1B717
v_mul_f32_e32 v4, 0x41200000, v4 ; 100808FF 41200000
v_mul_f32_e32 v16, v4, v4 ; 10200904
v_mul_legacy_f32_e32 v5, v16, v5 ; 0E0A0B10
v_rcp_f32_e32 v8, v8 ; 7E105508
v_mad_f32 v4, v4, v4, 1.0 ; D2820004 03CA0904
v_mul_f32_e32 v4, s16, v4 ; 10080810
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mul_f32_e32 v4, v4, v8 ; 10081104
v_mul_f32_e32 v4, v10, v4 ; 1008090A
v_mul_f32_e32 v4, s15, v4 ; 1008080F
v_mul_f32_e32 v5, v6, v6 ; 100A0D06
v_mul_f32_e32 v6, v6, v5 ; 100C0B06
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_add_f32_e32 v3, -1.0, v3 ; 060606F3
v_mad_f32 v5, v3, v5, 1.0 ; D2820005 03CA0B03
v_mad_f32 v3, v3, v9, 1.0 ; D2820003 03CA1303
v_mul_f32_e32 v3, v3, v5 ; 10060B03
v_mul_f32_e32 v3, v10, v3 ; 1006070A
v_mac_f32_e32 v14, s14, v3 ; 3E1C060E
v_mul_f32_e32 v5, v14, v21 ; 100A2B0E
v_max_f32_e32 v4, 0, v4 ; 20080880
v_mul_f32_e32 v6, s14, v4 ; 100C080E
v_mac_f32_e32 v5, v2, v6 ; 3E0A0D02
v_mac_f32_e32 v11, s7, v3 ; 3E160607
v_mac_f32_e32 v12, s12, v3 ; 3E18060C
v_mul_f32_e32 v2, s7, v4 ; 10040807
v_mul_f32_e32 v3, s12, v4 ; 1006080C
v_mul_f32_e32 v4, v11, v18 ; 1008250B
v_mul_f32_e32 v6, v12, v17 ; 100C230C
v_mac_f32_e32 v4, v1, v2 ; 3E080501
v_mac_f32_e32 v6, v0, v3 ; 3E0C0700
v_mac_f32_e32 v5, v24, v7 ; 3E0A0F18
v_mac_f32_e32 v4, v25, v15 ; 3E081F19
v_mac_f32_e32 v6, v23, v13 ; 3E0C1B17
v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s6, v1 ; 10040206
v_mac_f32_e32 v2, v5, v0 ; 3E040105
v_mul_f32_e32 v3, s5, v1 ; 10060205
v_mac_f32_e32 v3, v4, v0 ; 3E060104
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v6, v0 ; 3E020106
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 40
Code Size: 2112 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL OUT[7], GENERIC[6]
DCL CONST[0..19]
DCL TEMP[0..9], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[16], IN[0].xxxx
5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[9].xxxx
18: MOV TEMP[3].y, CONST[10].xxxx
19: MOV TEMP[3].z, CONST[11].xxxx
20: MOV TEMP[4].x, CONST[9].yyyy
21: MOV TEMP[4].y, CONST[10].yyyy
22: MOV TEMP[4].z, CONST[11].yyyy
23: MOV TEMP[5].x, CONST[9].zzzz
24: MOV TEMP[5].y, CONST[10].zzzz
25: MOV TEMP[5].z, CONST[11].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[5].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[6].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[6].xyz, TEMP[3].xyzx
44: MUL TEMP[7], TEMP[3].xyzz, TEMP[3].yzzx
45: DP4 TEMP[8].x, CONST[1], TEMP[7]
46: DP4 TEMP[9].x, CONST[2], TEMP[7]
47: MOV TEMP[8].y, TEMP[9].xxxx
48: DP4 TEMP[7].x, CONST[3], TEMP[7]
49: MOV TEMP[8].z, TEMP[7].xxxx
50: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy
51: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx
52: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[8].xyzz
53: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[7].yzw, TEMP[7].yxyz
55: MOV TEMP[7].x, TEMP[1].zzzz
56: MOV TEMP[0].xyz, TEMP[0].xyzx
57: MOV OUT[7], TEMP[0]
58: MOV OUT[1], TEMP[2]
59: MOV OUT[3], TEMP[5]
60: MOV OUT[2], TEMP[4]
61: MOV OUT[4], TEMP[6]
62: MOV OUT[5], TEMP[3]
63: MOV OUT[0], TEMP[1]
64: MOV OUT[6], TEMP[7]
65: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0
%90 = add i32 %5, %7
%91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = extractelement <4 x float> %91, i32 2
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0
%103 = add i32 %5, %7
%104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103)
%105 = extractelement <4 x float> %104, i32 0
%106 = extractelement <4 x float> %104, i32 1
%107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0
%109 = add i32 %5, %7
%110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %109)
%111 = extractelement <4 x float> %110, i32 0
%112 = extractelement <4 x float> %110, i32 1
%113 = extractelement <4 x float> %110, i32 2
%114 = extractelement <4 x float> %110, i32 3
%115 = fmul float %31, %84
%116 = fmul float %32, %84
%117 = fmul float %33, %84
%118 = fmul float %34, %84
%119 = fmul float %35, %85
%120 = fadd float %119, %115
%121 = fmul float %36, %85
%122 = fadd float %121, %116
%123 = fmul float %37, %85
%124 = fadd float %123, %117
%125 = fmul float %38, %85
%126 = fadd float %125, %118
%127 = fmul float %39, %86
%128 = fadd float %127, %120
%129 = fmul float %40, %86
%130 = fadd float %129, %122
%131 = fmul float %41, %86
%132 = fadd float %131, %124
%133 = fmul float %42, %86
%134 = fadd float %133, %126
%135 = fmul float %43, %87
%136 = fadd float %135, %128
%137 = fmul float %44, %87
%138 = fadd float %137, %130
%139 = fmul float %45, %87
%140 = fadd float %139, %132
%141 = fmul float %64, %84
%142 = fmul float %65, %84
%143 = fmul float %66, %84
%144 = fmul float %67, %84
%145 = fmul float %68, %85
%146 = fadd float %145, %141
%147 = fmul float %69, %85
%148 = fadd float %147, %142
%149 = fmul float %70, %85
%150 = fadd float %149, %143
%151 = fmul float %71, %85
%152 = fadd float %151, %144
%153 = fmul float %72, %86
%154 = fadd float %153, %146
%155 = fmul float %73, %86
%156 = fadd float %155, %148
%157 = fmul float %74, %86
%158 = fadd float %157, %150
%159 = fmul float %75, %86
%160 = fadd float %159, %152
%161 = fmul float %76, %87
%162 = fadd float %161, %154
%163 = fmul float %77, %87
%164 = fadd float %163, %156
%165 = fmul float %78, %87
%166 = fadd float %165, %158
%167 = fmul float %79, %87
%168 = fadd float %167, %160
%169 = fmul float %99, %55
%170 = fadd float %169, %57
%171 = fmul float %100, %56
%172 = fadd float %171, %58
%173 = fcmp oeq float %63, 0.000000e+00
%. = select i1 %173, float %99, float %105
%.40 = select i1 %173, float %100, float %106
%174 = fmul float %., %59
%175 = fadd float %174, %61
%176 = fmul float %.40, %60
%177 = fadd float %176, %62
%178 = fmul float %46, %92
%179 = fmul float %49, %92
%180 = fmul float %52, %92
%181 = fmul float %47, %93
%182 = fadd float %181, %178
%183 = fmul float %50, %93
%184 = fadd float %183, %179
%185 = fmul float %53, %93
%186 = fadd float %185, %180
%187 = fmul float %48, %94
%188 = fadd float %187, %182
%189 = fmul float %51, %94
%190 = fadd float %189, %184
%191 = fmul float %54, %94
%192 = fadd float %191, %186
%193 = fmul float %188, %188
%194 = fmul float %190, %190
%195 = fadd float %194, %193
%196 = fmul float %192, %192
%197 = fadd float %195, %196
%198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197)
%199 = fmul float %188, %198
%200 = fmul float %190, %198
%201 = fmul float %192, %198
%202 = fmul float %31, %111
%203 = fmul float %32, %111
%204 = fmul float %33, %111
%205 = fmul float %35, %112
%206 = fadd float %205, %202
%207 = fmul float %36, %112
%208 = fadd float %207, %203
%209 = fmul float %37, %112
%210 = fadd float %209, %204
%211 = fmul float %39, %113
%212 = fadd float %211, %206
%213 = fmul float %40, %113
%214 = fadd float %213, %208
%215 = fmul float %41, %113
%216 = fadd float %215, %210
%217 = fmul float %212, %212
%218 = fmul float %214, %214
%219 = fadd float %218, %217
%220 = fmul float %216, %216
%221 = fadd float %219, %220
%222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221)
%223 = fmul float %212, %222
%224 = fmul float %214, %222
%225 = fmul float %216, %222
%226 = fmul float %201, %224
%227 = fmul float %199, %225
%228 = fmul float %200, %223
%229 = fmul float %200, %225
%230 = fsub float %229, %226
%231 = fmul float %201, %223
%232 = fsub float %231, %227
%233 = fmul float %199, %224
%234 = fsub float %233, %228
%235 = fmul float %230, %114
%236 = fmul float %232, %114
%237 = fmul float %234, %114
%238 = fmul float %199, %200
%239 = fmul float %200, %201
%240 = fmul float %201, %201
%241 = fmul float %201, %199
%242 = fmul float %16, %238
%243 = fmul float %17, %239
%244 = fadd float %242, %243
%245 = fmul float %18, %240
%246 = fadd float %244, %245
%247 = fmul float %19, %241
%248 = fadd float %246, %247
%249 = fmul float %20, %238
%250 = fmul float %21, %239
%251 = fadd float %249, %250
%252 = fmul float %22, %240
%253 = fadd float %251, %252
%254 = fmul float %23, %241
%255 = fadd float %253, %254
%256 = fmul float %24, %238
%257 = fmul float %25, %239
%258 = fadd float %256, %257
%259 = fmul float %26, %240
%260 = fadd float %258, %259
%261 = fmul float %27, %241
%262 = fadd float %260, %261
%263 = fmul float %200, %200
%264 = fmul float %199, %199
%265 = fsub float %264, %263
%266 = fmul float %28, %265
%267 = fadd float %266, %248
%268 = fmul float %29, %265
%269 = fadd float %268, %255
%270 = fmul float %30, %265
%271 = fadd float %270, %262
%272 = fsub float %136, %13
%273 = fsub float %138, %14
%274 = fsub float %140, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %170, float %172, float %175, float %177)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %223, float %224, float %225, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %236, float %237, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %201, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %267, float %269, float %271, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %166, float %272, float %273, float %274)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %136, float %138, float %140, float %134)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s27, s[28:31], 0x20 ; C20D9D20
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00
s_buffer_load_dword s32, s[28:31], 0x21 ; C2101D21
s_buffer_load_dword s33, s[28:31], 0x22 ; C2109D22
s_buffer_load_dword s34, s[28:31], 0x24 ; C2111D24
s_buffer_load_dword s35, s[28:31], 0x25 ; C2119D25
s_buffer_load_dword s1, s[28:31], 0x10 ; C2009D10
s_buffer_load_dword s2, s[28:31], 0x11 ; C2011D11
s_buffer_load_dword s0, s[28:31], 0x12 ; C2001D12
s_buffer_load_dword s18, s[28:31], 0x14 ; C2091D14
s_buffer_load_dword s17, s[28:31], 0x15 ; C2089D15
s_buffer_load_dword s36, s[28:31], 0x26 ; C2121D26
s_buffer_load_dword s37, s[28:31], 0x28 ; C2129D28
s_buffer_load_dword s38, s[28:31], 0x29 ; C2131D29
s_buffer_load_dword s39, s[28:31], 0x2a ; C2139D2A
s_buffer_load_dword s40, s[28:31], 0x2c ; C2141D2C
s_buffer_load_dword s20, s[28:31], 0x16 ; C20A1D16
s_buffer_load_dword s41, s[28:31], 0x17 ; C2149D17
s_buffer_load_dword s26, s[28:31], 0x18 ; C20D1D18
s_buffer_load_dword s25, s[28:31], 0x19 ; C20C9D19
s_buffer_load_dword s21, s[28:31], 0x1a ; C20A9D1A
s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B
s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C
s_buffer_load_dword s24, s[28:31], 0x1d ; C20C1D1D
s_buffer_load_dword s22, s[28:31], 0x1e ; C20B1D1E
s_buffer_load_dword s43, s[28:31], 0x1f ; C2159D1F
s_buffer_load_dword s44, s[28:31], 0x2d ; C2161D2D
s_buffer_load_dword s45, s[28:31], 0x2e ; C2169D2E
s_buffer_load_dword s46, s[28:31], 0x34 ; C2171D34
s_buffer_load_dword s47, s[28:31], 0x35 ; C2179D35
s_buffer_load_dword s3, s[28:31], 0x36 ; C2019D36
s_buffer_load_dword s4, s[28:31], 0x3c ; C2021D3C
s_buffer_load_dword s48, s[28:31], 0x40 ; C2181D40
s_buffer_load_dword s49, s[28:31], 0x41 ; C2189D41
s_buffer_load_dword s50, s[28:31], 0x42 ; C2191D42
s_buffer_load_dword s51, s[28:31], 0x43 ; C2199D43
s_buffer_load_dword s6, s[28:31], 0x37 ; C2031D37
s_buffer_load_dword s52, s[28:31], 0x38 ; C21A1D38
s_buffer_load_dword s53, s[28:31], 0x39 ; C21A9D39
s_buffer_load_dword s8, s[28:31], 0x3a ; C2041D3A
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s3 ; 7E000203
s_buffer_load_dword s10, s[28:31], 0x3b ; C2051D3B
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
s_buffer_load_dword s3, s[28:31], 0x0 ; C2019D00
s_buffer_load_dword s4, s[28:31], 0x1 ; C2021D01
s_buffer_load_dword s5, s[28:31], 0x2 ; C2029D02
s_buffer_load_dword s9, s[28:31], 0x4 ; C2049D04
v_mov_b32_e32 v17, s6 ; 7E220206
s_buffer_load_dword s14, s[28:31], 0x5 ; C2071D05
s_buffer_load_dword s7, s[28:31], 0x6 ; C2039D06
s_buffer_load_dword s6, s[28:31], 0x7 ; C2031D07
v_mov_b32_e32 v18, s8 ; 7E240208
s_buffer_load_dword s12, s[28:31], 0x8 ; C2061D08
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v19, s10 ; 7E26020A
s_buffer_load_dword s16, s[28:31], 0x9 ; C2081D09
s_buffer_load_dword s10, s[28:31], 0xa ; C2051D0A
s_buffer_load_dword s8, s[28:31], 0xb ; C2041D0B
s_buffer_load_dword s15, s[28:31], 0xc ; C2079D0C
s_buffer_load_dword s19, s[28:31], 0xd ; C2099D0D
s_buffer_load_dword s13, s[28:31], 0xe ; C2069D0E
s_buffer_load_dword s11, s[28:31], 0xf ; C2059D0F
s_buffer_load_dword s54, s[28:31], 0x44 ; C21B1D44
s_buffer_load_dword s55, s[28:31], 0x45 ; C21B9D45
s_buffer_load_dword s56, s[28:31], 0x46 ; C21C1D46
s_buffer_load_dword s57, s[28:31], 0x47 ; C21C9D47
s_buffer_load_dword s58, s[28:31], 0x48 ; C21D1D48
s_buffer_load_dword s59, s[28:31], 0x49 ; C21D9D49
s_buffer_load_dword s60, s[28:31], 0x4a ; C21E1D4A
s_buffer_load_dword s61, s[28:31], 0x4b ; C21E9D4B
s_buffer_load_dword s62, s[28:31], 0x4c ; C21F1D4C
s_buffer_load_dword s63, s[28:31], 0x4d ; C21F9D4D
s_buffer_load_dword s64, s[28:31], 0x4e ; C2201D4E
s_buffer_load_dword s28, s[28:31], 0x4f ; C20E1D4F
v_mul_f32_e32 v20, s41, v2 ; 10280429
v_mac_f32_e32 v20, s42, v3 ; 3E28062A
v_mac_f32_e32 v20, s43, v4 ; 3E28082B
v_mac_f32_e32 v0, s46, v9 ; 3E00122E
v_mac_f32_e32 v17, s47, v10 ; 3E22142F
v_mul_f32_e32 v21, s48, v2 ; 102A0430
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s54, v3 ; 3E2A0636
v_mac_f32_e32 v21, s58, v4 ; 3E2A083A
v_mac_f32_e32 v21, s62, v5 ; 3E2A0A3E
v_mul_f32_e32 v22, s49, v2 ; 102C0431
v_mac_f32_e32 v22, s55, v3 ; 3E2C0637
v_mac_f32_e32 v22, s59, v4 ; 3E2C083B
v_mac_f32_e32 v22, s63, v5 ; 3E2C0A3F
v_mul_f32_e32 v23, s50, v2 ; 102E0432
v_mac_f32_e32 v23, s56, v3 ; 3E2E0638
v_mac_f32_e32 v23, s60, v4 ; 3E2E083C
v_mac_f32_e32 v23, s64, v5 ; 3E2E0A40
v_mul_f32_e32 v24, s51, v2 ; 10300433
v_mac_f32_e32 v24, s57, v3 ; 3E300639
v_mac_f32_e32 v24, s61, v4 ; 3E30083D
v_mac_f32_e32 v24, s28, v5 ; 3E300A1C
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mul_f32_e32 v11, s34, v6 ; 10160C22
v_mac_f32_e32 v11, s35, v7 ; 3E160E23
v_mul_f32_e32 v12, s37, v6 ; 10180C25
v_mac_f32_e32 v12, s38, v7 ; 3E180E26
v_mul_f32_e32 v6, s40, v6 ; 100C0C28
v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C
v_mac_f32_e32 v11, s36, v8 ; 3E161024
v_mac_f32_e32 v12, s39, v8 ; 3E181027
v_mac_f32_e32 v6, s45, v8 ; 3E0C102D
v_mul_f32_e32 v7, s18, v2 ; 100E0412
v_mac_f32_e32 v7, s26, v3 ; 3E0E061A
v_mac_f32_e32 v7, s23, v4 ; 3E0E0817
v_mac_f32_e32 v7, s27, v5 ; 3E0E0A1B
v_mul_f32_e32 v8, s17, v2 ; 10100411
v_mac_f32_e32 v8, s25, v3 ; 3E100619
v_mac_f32_e32 v8, s24, v4 ; 3E100818
v_mac_f32_e32 v8, s32, v5 ; 3E100A20
v_mul_f32_e32 v2, s20, v2 ; 10040414
v_mac_f32_e32 v2, s21, v3 ; 3E040615
v_mac_f32_e32 v2, s22, v4 ; 3E040816
v_mac_f32_e32 v2, s33, v5 ; 3E040A21
v_mac_f32_e32 v18, s52, v9 ; 3E241234
v_mac_f32_e32 v19, s53, v10 ; 3E261435
exp 15, 32, 0, 0, 0, v0, v17, v18, v19 ; F800020F 13121100
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s18, v13 ; 10001A12
v_mac_f32_e32 v0, s26, v14 ; 3E001C1A
v_mul_f32_e32 v3, s17, v13 ; 10061A11
v_mac_f32_e32 v3, s25, v14 ; 3E061C19
v_mul_f32_e32 v4, s20, v13 ; 10081A14
v_mac_f32_e32 v4, s21, v14 ; 3E081C15
v_mac_f32_e32 v0, s23, v15 ; 3E001E17
v_mac_f32_e32 v3, s24, v15 ; 3E061E18
v_mac_f32_e32 v4, s22, v15 ; 3E081E16
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mac_f32_e32 v5, v12, v12 ; 3E0A190C
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v3, v3 ; 3E120703
v_mac_f32_e32 v9, v4, v4 ; 3E120904
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v10, v5, v11 ; 10141705
v_mul_f32_e32 v11, v5, v12 ; 10161905
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_mul_f32_e32 v3, v9, v3 ; 10060709
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v6, v3, v5 ; 100C0B03
v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B
v_mul_f32_e32 v9, v4, v10 ; 10121504
v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105
v_mul_f32_e32 v12, v0, v11 ; 10181700
v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A
v_mul_f32_e32 v6, v16, v6 ; 100C0D10
v_mul_f32_e32 v9, v16, v9 ; 10121310
v_mul_f32_e32 v12, v16, v12 ; 10181910
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v5, v11 ; 10001705
v_mul_f32_e32 v3, s14, v0 ; 1006000E
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mul_f32_e32 v0, s19, v0 ; 10000013
v_mul_f32_e32 v6, v11, v10 ; 100C150B
v_mac_f32_e32 v3, s9, v6 ; 3E060C09
v_mac_f32_e32 v4, s12, v6 ; 3E080C0C
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
v_mul_f32_e32 v6, v5, v5 ; 100C0B05
v_mac_f32_e32 v3, s7, v6 ; 3E060C07
v_mac_f32_e32 v4, s10, v6 ; 3E080C0A
v_mac_f32_e32 v0, s13, v6 ; 3E000C0D
v_mul_f32_e32 v6, v10, v5 ; 100C0B0A
v_mac_f32_e32 v3, s6, v6 ; 3E060C06
v_mac_f32_e32 v4, s8, v6 ; 3E080C08
v_mac_f32_e32 v0, s11, v6 ; 3E000C0B
v_mul_f32_e32 v6, v11, v11 ; 100C170B
v_mad_f32 v6, v10, v10, -v6 ; D2820006 841A150A
v_mac_f32_e32 v3, s1, v6 ; 3E060C01
v_mac_f32_e32 v4, s2, v6 ; 3E080C02
v_mac_f32_e32 v0, s0, v6 ; 3E000C00
v_subrev_f32_e32 v6, s3, v7 ; 0A0C0E03
v_subrev_f32_e32 v9, s4, v8 ; 0A121004
v_subrev_f32_e32 v12, s5, v2 ; 0A180405
exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A
exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403
exp 15, 37, 0, 0, 0, v23, v6, v9, v12 ; F800025F 0C090617
exp 15, 38, 0, 0, 0, v7, v8, v2, v20 ; F800026F 14020807
exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 28
Code Size: 892 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL IN[6], GENERIC[6], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[22..24]
DCL CONST[26]
DCL TEMP[0..18], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000}
IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000}
IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3], TEMP[3], SAMP[2], 2D
11: MUL TEMP[4].x, TEMP[3].wwww, CONST[19].wwww
12: MOV TEMP[5].xy, IN[0].xyyy
13: TEX TEMP[5].yw, TEMP[5], SAMP[3], 2D
14: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[0].xxxx, IMM[0].yyyy
15: MUL TEMP[5].xy, TEMP[5].xyyy, CONST[22].xxxx
16: DP2 TEMP[6].x, TEMP[5].xyyy, TEMP[5].xyyy
17: MOV_SAT TEMP[6].x, TEMP[6].xxxx
18: ADD TEMP[6].x, IMM[0].zzzz, -TEMP[6].xxxx
19: SQRT TEMP[6].x, TEMP[6].xxxx
20: MOV TEMP[5].z, TEMP[6].xxxx
21: DP3 TEMP[0].x, TEMP[5].xyzz, TEMP[0].xyzz
22: DP3 TEMP[1].x, TEMP[5].xyzz, TEMP[1].xyzz
23: MOV TEMP[0].y, TEMP[1].xxxx
24: DP3 TEMP[1].x, TEMP[5].xyzz, TEMP[2].xyzz
25: MOV TEMP[0].z, TEMP[1].xxxx
26: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
27: RSQ TEMP[1].x, TEMP[1].xxxx
28: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
29: DP3 TEMP[1].x, IN[5].yzww, IN[5].yzww
30: RSQ TEMP[1].x, TEMP[1].xxxx
31: MUL TEMP[1].xyz, IN[5].yzww, TEMP[1].xxxx
32: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[3].xyzz
33: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz
34: MUL TEMP[5].x, CONST[23].xxxx, CONST[16].wwww
35: ADD TEMP[5].x, CONST[16].wwww, -TEMP[5].xxxx
36: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
37: MOV TEMP[6].xy, IN[0].xyyy
38: TEX TEMP[6].y, TEMP[6], SAMP[4], 2D
39: ADD TEMP[7].x, IMM[0].zzzz, -CONST[26].xxxx
40: MAD TEMP[6].x, TEMP[6].yyyy, CONST[26].xxxx, TEMP[7].xxxx
41: DP3 TEMP[7].x, TEMP[0].xyzz, CONST[0].xyzz
42: MAX TEMP[7].x, IMM[0].wwww, TEMP[7].xxxx
43: MOV TEMP[8].xyz, IMM[0].wwww
44: MOV TEMP[9].w, IMM[0].zzzz
45: MOV TEMP[9].xyz, TEMP[0].xyzx
46: DP4 TEMP[10].x, CONST[1], TEMP[9]
47: DP4 TEMP[11].x, CONST[2], TEMP[9]
48: MOV TEMP[10].y, TEMP[11].xxxx
49: DP4 TEMP[9].x, CONST[3], TEMP[9]
50: MOV TEMP[10].z, TEMP[9].xxxx
51: ADD TEMP[9].xyz, IN[4].xyzz, TEMP[10].xyzz
52: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[6].xxxx
53: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz
54: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz
55: MUL TEMP[10].xyz, IMM[0].xxxx, TEMP[10].xyzz
56: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz
57: MOV TEMP[11].xyz, TEMP[10].xyzx
58: FSLT TEMP[12].x, IMM[0].wwww, CONST[10].wwww
59: UIF TEMP[12].xxxx :0
60: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz
61: RSQ TEMP[12].x, TEMP[12].xxxx
62: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx
63: MOV TEMP[13].xyz, -IN[6].xyzx
64: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz
65: RCP TEMP[15].x, TEMP[12].xxxx
66: RCP TEMP[15].y, TEMP[12].yyyy
67: RCP TEMP[15].z, TEMP[12].zzzz
68: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
69: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz
70: RCP TEMP[15].x, TEMP[12].xxxx
71: RCP TEMP[15].y, TEMP[12].yyyy
72: RCP TEMP[15].z, TEMP[12].zzzz
73: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
74: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[12].xyzz
75: UIF TEMP[15].xxxx :0
76: MOV TEMP[16].x, TEMP[14].xxxx
77: ELSE :0
78: MOV TEMP[16].x, TEMP[13].xxxx
79: ENDIF
80: UIF TEMP[15].yyyy :0
81: MOV TEMP[17].x, TEMP[14].yyyy
82: ELSE :0
83: MOV TEMP[17].x, TEMP[13].yyyy
84: ENDIF
85: UIF TEMP[15].zzzz :0
86: MOV TEMP[14].x, TEMP[14].zzzz
87: ELSE :0
88: MOV TEMP[14].x, TEMP[13].zzzz
89: ENDIF
90: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz
91: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx
92: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
93: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
94: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz
95: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[6].xyzz
96: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
97: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz
98: ENDIF
99: ADD TEMP[12].x, IMM[0].zzzz, -CONST[24].xxxx
100: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy
101: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz
102: MOV TEMP[11].xyz, TEMP[11].xyzz
103: MOV TEMP[11].w, TEMP[12].xxxx
104: TXL TEMP[11], TEMP[11], SAMP[0], CUBE
105: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy
106: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx
107: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz
108: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].wwww
109: UIF TEMP[12].xxxx :0
110: MOV TEMP[12].xyz, TEMP[10].xyzx
111: FSLT TEMP[13].x, IMM[0].wwww, CONST[14].wwww
112: UIF TEMP[13].xxxx :0
113: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz
114: RSQ TEMP[13].x, TEMP[13].xxxx
115: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx
116: MOV TEMP[13].xyz, -IN[6].xyzx
117: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz
118: RCP TEMP[15].x, TEMP[10].xxxx
119: RCP TEMP[15].y, TEMP[10].yyyy
120: RCP TEMP[15].z, TEMP[10].zzzz
121: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
122: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz
123: RCP TEMP[15].x, TEMP[10].xxxx
124: RCP TEMP[15].y, TEMP[10].yyyy
125: RCP TEMP[15].z, TEMP[10].zzzz
126: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
127: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[10].xyzz
128: UIF TEMP[15].xxxx :0
129: MOV TEMP[16].x, TEMP[14].xxxx
130: ELSE :0
131: MOV TEMP[16].x, TEMP[13].xxxx
132: ENDIF
133: UIF TEMP[15].yyyy :0
134: MOV TEMP[17].x, TEMP[14].yyyy
135: ELSE :0
136: MOV TEMP[17].x, TEMP[13].yyyy
137: ENDIF
138: UIF TEMP[15].zzzz :0
139: MOV TEMP[14].x, TEMP[14].zzzz
140: ELSE :0
141: MOV TEMP[14].x, TEMP[13].zzzz
142: ENDIF
143: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz
144: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx
145: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
146: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
147: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz
148: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[6].xyzz
149: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
150: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz
151: ENDIF
152: ADD TEMP[10].x, IMM[0].zzzz, -CONST[24].xxxx
153: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy
154: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
155: MOV TEMP[12].xyz, TEMP[12].xyzz
156: MOV TEMP[12].w, TEMP[10].xxxx
157: TXL TEMP[10], TEMP[12], SAMP[1], CUBE
158: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy
159: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx
160: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz
161: LRP TEMP[8].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz
162: ELSE :0
163: MOV TEMP[8].xyz, TEMP[11].xyzx
164: ENDIF
165: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[6].xxxx
166: MOV TEMP[1].xyz, -TEMP[1].xyzx
167: ADD TEMP[6].x, IMM[0].zzzz, -CONST[24].xxxx
168: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz
169: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz
170: RSQ TEMP[11].x, TEMP[11].xxxx
171: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx
172: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
173: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
174: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz
175: MAX TEMP[11].x, IMM[0].wwww, TEMP[11].xxxx
176: MUL TEMP[12].x, TEMP[6].xxxx, TEMP[6].xxxx
177: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww
178: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[6].xxxx
179: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy, IMM[2].zzzz
180: LG2 TEMP[13].x, TEMP[13].xxxx
181: RCP TEMP[13].x, TEMP[13].xxxx
182: MUL TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx
183: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx
184: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[7].xxxx
185: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx
186: MUL TEMP[16].x, IMM[0].xxxx, TEMP[11].xxxx
187: MUL TEMP[6].x, TEMP[11].xxxx, TEMP[6].xxxx
188: MAD TEMP[6].x, TEMP[16].xxxx, TEMP[6].xxxx, IMM[1].xxxx
189: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[11].xxxx
190: ADD TEMP[16].x, IMM[0].zzzz, -TEMP[1].xxxx
191: ADD TEMP[5].x, IMM[0].zzzz, -TEMP[5].xxxx
192: ADD TEMP[5].x, CONST[24].xxxx, TEMP[5].xxxx
193: MOV_SAT TEMP[5].x, TEMP[5].xxxx
194: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx
195: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx
196: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx
197: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx
198: LRP TEMP[5].xyz, TEMP[16].xxxx, TEMP[5].xxxx, TEMP[3].xyzz
199: LRP TEMP[16].x, TEMP[7].xxxx, IMM[0].zzzz, TEMP[12].xxxx
200: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[12].xxxx
201: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].wwww
202: RCP TEMP[1].x, TEMP[1].xxxx
203: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz
204: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx
205: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx
206: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].zzzz
207: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy
208: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx
209: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx
210: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[7].xxxx
211: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
212: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
213: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz
214: ADD TEMP[10].xyz, IMM[0].zzzz, -TEMP[3].xyzz
215: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx
216: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx
217: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx
218: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx
219: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz
220: ADD TEMP[10].x, TEMP[6].xxxx, IMM[0].yyyy
221: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
222: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx
223: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx
224: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
225: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].zzzz
226: ADD TEMP[6].x, TEMP[6].xxxx, IMM[0].yyyy
227: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx
228: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx
229: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx
230: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
231: MAD TEMP[6].x, TEMP[6].xxxx, TEMP[11].xxxx, IMM[0].zzzz
232: MUL TEMP[6].x, TEMP[10].xxxx, TEMP[6].xxxx
233: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[7].xxxx
234: MAD TEMP[6].xyz, CONST[17].xyzz, TEMP[6].xxxx, TEMP[9].xyzz
235: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[6].xyzz
236: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
237: MAD TEMP[0].xyz, TEMP[8].xyzz, TEMP[5].xyzz, TEMP[1].xyzz
238: MOV TEMP[0].xyz, TEMP[0].xyzx
239: MAD TEMP[1].x, IN[5].xxxx, CONST[5].zzzz, CONST[5].wwww
240: MOV_SAT TEMP[1].x, TEMP[1].xxxx
241: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
242: MOV TEMP[0].xyz, TEMP[0].xyzx
243: MOV TEMP[0].w, TEMP[4].xxxx
244: MOV OUT[0], TEMP[0]
245: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 316)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%84 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0
%86 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0
%88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%89 = bitcast <8 x i32> addrspace(2)* %88 to <32 x i8> addrspace(2)*
%90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, align 32, !tbaa !0
%91 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%92 = bitcast <4 x i32> addrspace(2)* %91 to <16 x i8> addrspace(2)*
%93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0
%94 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%95 = bitcast <8 x i32> addrspace(2)* %94 to <32 x i8> addrspace(2)*
%96 = load <32 x i8>, <32 x i8> addrspace(2)* %95, align 32, !tbaa !0
%97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%98 = bitcast <4 x i32> addrspace(2)* %97 to <16 x i8> addrspace(2)*
%99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0
%100 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%101 = bitcast <8 x i32> addrspace(2)* %100 to <32 x i8> addrspace(2)*
%102 = load <32 x i8>, <32 x i8> addrspace(2)* %101, align 32, !tbaa !0
%103 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%104 = bitcast <4 x i32> addrspace(2)* %103 to <16 x i8> addrspace(2)*
%105 = load <16 x i8>, <16 x i8> addrspace(2)* %104, align 16, !tbaa !0
%106 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%107 = bitcast <8 x i32> addrspace(2)* %106 to <32 x i8> addrspace(2)*
%108 = load <32 x i8>, <32 x i8> addrspace(2)* %107, align 32, !tbaa !0
%109 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%110 = bitcast <4 x i32> addrspace(2)* %109 to <16 x i8> addrspace(2)*
%111 = load <16 x i8>, <16 x i8> addrspace(2)* %110, align 16, !tbaa !0
%112 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7)
%132 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7)
%133 = bitcast float %112 to i32
%134 = bitcast float %113 to i32
%135 = insertelement <2 x i32> undef, i32 %133, i32 0
%136 = insertelement <2 x i32> %135, i32 %134, i32 1
%137 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %136, <32 x i8> %96, <16 x i8> %99, i32 2)
%138 = extractelement <4 x float> %137, i32 0
%139 = extractelement <4 x float> %137, i32 1
%140 = extractelement <4 x float> %137, i32 2
%141 = extractelement <4 x float> %137, i32 3
%142 = fmul float %141, %79
%143 = bitcast float %112 to i32
%144 = bitcast float %113 to i32
%145 = insertelement <2 x i32> undef, i32 %143, i32 0
%146 = insertelement <2 x i32> %145, i32 %144, i32 1
%147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %102, <16 x i8> %105, i32 2)
%148 = extractelement <4 x float> %147, i32 1
%149 = extractelement <4 x float> %147, i32 3
%150 = fmul float %149, 2.000000e+00
%151 = fadd float %150, -1.000000e+00
%152 = fmul float %148, 2.000000e+00
%153 = fadd float %152, -1.000000e+00
%154 = fmul float %151, %80
%155 = fmul float %153, %80
%156 = fmul float %154, %154
%157 = fmul float %155, %155
%158 = fadd float %156, %157
%159 = call float @llvm.AMDIL.clamp.(float %158, float 0.000000e+00, float 1.000000e+00)
%160 = fsub float 1.000000e+00, %159
%161 = call float @llvm.sqrt.f32(float %160)
%162 = fmul float %154, %114
%163 = fmul float %155, %117
%164 = fadd float %163, %162
%165 = fmul float %161, %120
%166 = fadd float %164, %165
%167 = fmul float %154, %115
%168 = fmul float %155, %118
%169 = fadd float %168, %167
%170 = fmul float %161, %121
%171 = fadd float %169, %170
%172 = fmul float %154, %116
%173 = fmul float %155, %119
%174 = fadd float %173, %172
%175 = fmul float %161, %122
%176 = fadd float %174, %175
%177 = fmul float %166, %166
%178 = fmul float %171, %171
%179 = fadd float %178, %177
%180 = fmul float %176, %176
%181 = fadd float %179, %180
%182 = call float @llvm.AMDGPU.rsq.clamped.f32(float %181)
%183 = fmul float %166, %182
%184 = fmul float %171, %182
%185 = fmul float %176, %182
%186 = fmul float %127, %127
%187 = fmul float %128, %128
%188 = fadd float %187, %186
%189 = fmul float %129, %129
%190 = fadd float %188, %189
%191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190)
%192 = fmul float %127, %191
%193 = fmul float %128, %191
%194 = fmul float %129, %191
%195 = fmul float %76, %138
%196 = fmul float %77, %139
%197 = fmul float %78, %140
%198 = call float @llvm.AMDGPU.lrp(float %81, float %195, float %66)
%199 = call float @llvm.AMDGPU.lrp(float %81, float %196, float %67)
%200 = call float @llvm.AMDGPU.lrp(float %81, float %197, float %68)
%201 = fmul float %81, %69
%202 = fsub float %69, %201
%203 = fmul float %195, %202
%204 = fmul float %196, %202
%205 = fmul float %197, %202
%206 = bitcast float %112 to i32
%207 = bitcast float %113 to i32
%208 = insertelement <2 x i32> undef, i32 %206, i32 0
%209 = insertelement <2 x i32> %208, i32 %207, i32 1
%210 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %209, <32 x i8> %108, <16 x i8> %111, i32 2)
%211 = extractelement <4 x float> %210, i32 1
%212 = fsub float 1.000000e+00, %83
%213 = fmul float %211, %83
%214 = fadd float %213, %212
%215 = fmul float %183, %24
%216 = fmul float %184, %25
%217 = fadd float %216, %215
%218 = fmul float %185, %26
%219 = fadd float %217, %218
%220 = call float @llvm.maxnum.f32(float %219, float 0.000000e+00)
%221 = fmul float %27, %183
%222 = fmul float %28, %184
%223 = fadd float %221, %222
%224 = fmul float %29, %185
%225 = fadd float %223, %224
%226 = fadd float %225, %30
%227 = fmul float %31, %183
%228 = fmul float %32, %184
%229 = fadd float %227, %228
%230 = fmul float %33, %185
%231 = fadd float %229, %230
%232 = fadd float %231, %34
%233 = fmul float %35, %183
%234 = fmul float %36, %184
%235 = fadd float %233, %234
%236 = fmul float %37, %185
%237 = fadd float %235, %236
%238 = fadd float %237, %38
%239 = fadd float %123, %226
%240 = fadd float %124, %232
%241 = fadd float %125, %238
%242 = fmul float %239, %214
%243 = fmul float %240, %214
%244 = fmul float %241, %214
%245 = fmul float %183, %192
%246 = fmul float %184, %193
%247 = fadd float %246, %245
%248 = fmul float %185, %194
%249 = fadd float %247, %248
%250 = fmul float %249, %183
%251 = fmul float %249, %184
%252 = fmul float %249, %185
%253 = fmul float %250, 2.000000e+00
%254 = fmul float %251, 2.000000e+00
%255 = fmul float %252, 2.000000e+00
%256 = fsub float %192, %253
%257 = fsub float %193, %254
%258 = fsub float %194, %255
%259 = fcmp ogt float %51, 0.000000e+00
br i1 %259, label %IF, label %ENDIF
IF: ; preds = %main_body
%260 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%261 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%262 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%263 = fmul float %256, %256
%264 = fmul float %257, %257
%265 = fadd float %264, %263
%266 = fmul float %258, %258
%267 = fadd float %265, %266
%268 = call float @llvm.AMDGPU.rsq.clamped.f32(float %267)
%269 = fmul float %256, %268
%270 = fmul float %257, %268
%271 = fmul float %258, %268
%272 = fsub float %44, %130
%273 = fsub float %45, %131
%274 = fsub float %46, %132
%275 = fdiv float 1.000000e+00, %269
%276 = fdiv float 1.000000e+00, %270
%277 = fdiv float 1.000000e+00, %271
%278 = fmul float %272, %275
%279 = fmul float %273, %276
%280 = fmul float %274, %277
%281 = fsub float %47, %130
%282 = fsub float %48, %131
%283 = fsub float %49, %132
%284 = fdiv float 1.000000e+00, %269
%285 = fdiv float 1.000000e+00, %270
%286 = fdiv float 1.000000e+00, %271
%287 = fmul float %281, %284
%288 = fmul float %282, %285
%289 = fmul float %283, %286
%290 = fcmp ogt float %269, 0.000000e+00
%291 = fcmp ogt float %270, 0.000000e+00
%292 = fcmp ogt float %271, 0.000000e+00
%. = select i1 %290, float %278, float %287
%temp68.0 = select i1 %291, float %279, float %288
%.100 = select i1 %292, float %280, float %289
%293 = fadd float %44, %47
%294 = fadd float %45, %48
%295 = fadd float %46, %49
%296 = fmul float %293, 5.000000e-01
%297 = fmul float %294, 5.000000e-01
%298 = fmul float %295, 5.000000e-01
%299 = call float @llvm.minnum.f32(float %., float %temp68.0)
%300 = call float @llvm.minnum.f32(float %299, float %.100)
%301 = fsub float %296, %262
%302 = fsub float %297, %261
%303 = fsub float %298, %260
%304 = fadd float %301, %130
%305 = fadd float %302, %131
%306 = fadd float %303, %132
%307 = fmul float %269, %300
%308 = fadd float %307, %304
%309 = fmul float %270, %300
%310 = fadd float %309, %305
%311 = fmul float %271, %300
%312 = fadd float %311, %306
%313 = fsub float %308, %296
%314 = fsub float %310, %297
%315 = fsub float %312, %298
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp44.0 = phi float [ %313, %IF ], [ %256, %main_body ]
%temp45.0 = phi float [ %314, %IF ], [ %257, %main_body ]
%temp46.0 = phi float [ %315, %IF ], [ %258, %main_body ]
%316 = fsub float 1.000000e+00, %82
%317 = call float @llvm.pow.f32(float %316, float 7.500000e-01)
%318 = fmul float %317, 7.000000e+00
%319 = insertelement <4 x float> undef, float %temp44.0, i32 0
%320 = insertelement <4 x float> %319, float %temp45.0, i32 1
%321 = insertelement <4 x float> %320, float %temp46.0, i32 2
%322 = insertelement <4 x float> %321, float %318, i32 3
%323 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %322)
%324 = extractelement <4 x float> %323, i32 0
%325 = extractelement <4 x float> %323, i32 1
%326 = extractelement <4 x float> %323, i32 2
%327 = extractelement <4 x float> %323, i32 3
%328 = call float @llvm.fabs.f32(float %326)
%329 = fdiv float 1.000000e+00, %328
%330 = fmul float %324, %329
%331 = fadd float %330, 1.500000e+00
%332 = fmul float %325, %329
%333 = fadd float %332, 1.500000e+00
%334 = bitcast float %333 to i32
%335 = bitcast float %331 to i32
%336 = bitcast float %327 to i32
%337 = bitcast float %318 to i32
%338 = insertelement <4 x i32> undef, i32 %334, i32 0
%339 = insertelement <4 x i32> %338, i32 %335, i32 1
%340 = insertelement <4 x i32> %339, i32 %336, i32 2
%341 = insertelement <4 x i32> %340, i32 %337, i32 3
%342 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %341, <32 x i8> %85, <16 x i8> %87, i32 4)
%343 = extractelement <4 x float> %342, i32 0
%344 = extractelement <4 x float> %342, i32 1
%345 = extractelement <4 x float> %342, i32 2
%346 = extractelement <4 x float> %342, i32 3
%347 = call float @llvm.pow.f32(float %346, float %53)
%348 = fmul float %52, %347
%349 = fmul float %348, %343
%350 = fmul float %348, %344
%351 = fmul float %348, %345
%352 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %352, label %IF86, label %ENDIF85
IF86: ; preds = %ENDIF
%353 = fcmp ogt float %63, 0.000000e+00
br i1 %353, label %IF89, label %ENDIF88
ENDIF85: ; preds = %ENDIF, %ENDIF88
%temp32.0 = phi float [ %581, %ENDIF88 ], [ %349, %ENDIF ]
%temp33.0 = phi float [ %582, %ENDIF88 ], [ %350, %ENDIF ]
%temp34.0 = phi float [ %583, %ENDIF88 ], [ %351, %ENDIF ]
%354 = fmul float %temp32.0, %214
%355 = fmul float %temp33.0, %214
%356 = fmul float %temp34.0, %214
%357 = fsub float 1.000000e+00, %82
%358 = fsub float %24, %192
%359 = fsub float %25, %193
%360 = fsub float %26, %194
%361 = fmul float %358, %358
%362 = fmul float %359, %359
%363 = fadd float %362, %361
%364 = fmul float %360, %360
%365 = fadd float %363, %364
%366 = call float @llvm.AMDGPU.rsq.clamped.f32(float %365)
%367 = fmul float %358, %366
%368 = fmul float %359, %366
%369 = fmul float %360, %366
%370 = fmul float %192, %183
%371 = fsub float -0.000000e+00, %370
%372 = fmul float %193, %184
%373 = fsub float %371, %372
%374 = fmul float %194, %185
%375 = fsub float %373, %374
%376 = call float @llvm.maxnum.f32(float %375, float 0.000000e+00)
%377 = fmul float %24, %367
%378 = fmul float %25, %368
%379 = fadd float %378, %377
%380 = fmul float %26, %369
%381 = fadd float %379, %380
%382 = call float @llvm.maxnum.f32(float %381, float 0.000000e+00)
%383 = fmul float %357, %357
%384 = fmul float %383, %75
%385 = fsub float 1.000000e+00, %357
%386 = fmul float %385, 0x3FEEF9DB20000000
%387 = fadd float %386, 0x3F9EB851E0000000
%388 = call float @llvm.log2.f32(float %387)
%389 = fdiv float 1.000000e+00, %388
%390 = fmul float %389, 1.000000e+01
%391 = fmul float %390, %390
%392 = fsub float 1.000000e+00, %220
%393 = fsub float 1.000000e+00, %376
%394 = fmul float %382, 2.000000e+00
%395 = fmul float %382, %357
%396 = fmul float %394, %395
%397 = fadd float %396, 5.000000e-01
%398 = fsub float 1.000000e+00, %382
%399 = fsub float 1.000000e+00, %376
%400 = fsub float 1.000000e+00, %202
%401 = fadd float %82, %400
%402 = call float @llvm.AMDIL.clamp.(float %401, float 0.000000e+00, float 1.000000e+00)
%403 = fmul float %399, %399
%404 = fmul float %399, %399
%405 = fmul float %404, %399
%406 = fmul float %403, %405
%407 = call float @llvm.AMDGPU.lrp(float %406, float %402, float %198)
%408 = call float @llvm.AMDGPU.lrp(float %406, float %402, float %199)
%409 = call float @llvm.AMDGPU.lrp(float %406, float %402, float %200)
%410 = call float @llvm.AMDGPU.lrp(float %220, float 1.000000e+00, float %384)
%411 = call float @llvm.AMDGPU.lrp(float %376, float 1.000000e+00, float %384)
%412 = fmul float %410, %411
%413 = fadd float %412, 0x3F1A36E2E0000000
%414 = fdiv float 1.000000e+00, %413
%415 = fmul float %183, %367
%416 = fmul float %184, %368
%417 = fadd float %416, %415
%418 = fmul float %185, %369
%419 = fadd float %417, %418
%420 = call float @llvm.maxnum.f32(float %419, float 0.000000e+00)
%421 = call float @llvm.pow.f32(float %420, float %391)
%422 = fadd float %391, 1.000000e+00
%423 = fmul float %422, %74
%424 = fmul float %421, %423
%425 = fmul float %414, %424
%426 = fmul float %425, %220
%427 = fmul float %426, %73
%428 = call float @llvm.maxnum.f32(float %427, float 0.000000e+00)
%429 = fmul float %428, %70
%430 = fmul float %428, %71
%431 = fmul float %428, %72
%432 = fsub float 1.000000e+00, %198
%433 = fsub float 1.000000e+00, %199
%434 = fsub float 1.000000e+00, %200
%435 = fmul float %398, %398
%436 = fmul float %398, %398
%437 = fmul float %436, %398
%438 = fmul float %435, %437
%439 = fmul float %432, %438
%440 = fadd float %439, %198
%441 = fmul float %433, %438
%442 = fadd float %441, %199
%443 = fmul float %434, %438
%444 = fadd float %443, %200
%445 = fadd float %397, -1.000000e+00
%446 = fmul float %392, %392
%447 = fmul float %392, %392
%448 = fmul float %447, %392
%449 = fmul float %446, %448
%450 = fmul float %445, %449
%451 = fadd float %450, 1.000000e+00
%452 = fadd float %397, -1.000000e+00
%453 = fmul float %393, %393
%454 = fmul float %393, %393
%455 = fmul float %454, %393
%456 = fmul float %453, %455
%457 = fmul float %452, %456
%458 = fadd float %457, 1.000000e+00
%459 = fmul float %451, %458
%460 = fmul float %459, %220
%461 = fmul float %70, %460
%462 = fadd float %461, %242
%463 = fmul float %71, %460
%464 = fadd float %463, %243
%465 = fmul float %72, %460
%466 = fadd float %465, %244
%467 = fmul float %203, %462
%468 = fmul float %204, %464
%469 = fmul float %205, %466
%470 = fmul float %429, %440
%471 = fadd float %470, %467
%472 = fmul float %430, %442
%473 = fadd float %472, %468
%474 = fmul float %431, %444
%475 = fadd float %474, %469
%476 = fmul float %354, %407
%477 = fadd float %476, %471
%478 = fmul float %355, %408
%479 = fadd float %478, %473
%480 = fmul float %356, %409
%481 = fadd float %480, %475
%482 = fmul float %126, %42
%483 = fadd float %482, %43
%484 = call float @llvm.AMDIL.clamp.(float %483, float 0.000000e+00, float 1.000000e+00)
%485 = call float @llvm.AMDGPU.lrp(float %484, float %477, float %39)
%486 = call float @llvm.AMDGPU.lrp(float %484, float %479, float %40)
%487 = call float @llvm.AMDGPU.lrp(float %484, float %481, float %41)
%488 = call i32 @llvm.SI.packf16(float %485, float %486)
%489 = bitcast i32 %488 to float
%490 = call i32 @llvm.SI.packf16(float %487, float %142)
%491 = bitcast i32 %490 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %489, float %491, float %489, float %491)
ret void
IF89: ; preds = %IF86
%492 = fmul float %256, %256
%493 = fmul float %257, %257
%494 = fadd float %493, %492
%495 = fmul float %258, %258
%496 = fadd float %494, %495
%497 = call float @llvm.AMDGPU.rsq.clamped.f32(float %496)
%498 = fmul float %256, %497
%499 = fmul float %257, %497
%500 = fmul float %258, %497
%501 = fsub float %54, %130
%502 = fsub float %55, %131
%503 = fsub float %56, %132
%504 = fdiv float 1.000000e+00, %498
%505 = fdiv float 1.000000e+00, %499
%506 = fdiv float 1.000000e+00, %500
%507 = fmul float %501, %504
%508 = fmul float %502, %505
%509 = fmul float %503, %506
%510 = fsub float %57, %130
%511 = fsub float %58, %131
%512 = fsub float %59, %132
%513 = fdiv float 1.000000e+00, %498
%514 = fdiv float 1.000000e+00, %499
%515 = fdiv float 1.000000e+00, %500
%516 = fmul float %510, %513
%517 = fmul float %511, %514
%518 = fmul float %512, %515
%519 = fcmp ogt float %498, 0.000000e+00
%520 = fcmp ogt float %499, 0.000000e+00
%521 = fcmp ogt float %500, 0.000000e+00
%.101 = select i1 %519, float %507, float %516
%temp68.1 = select i1 %520, float %508, float %517
%.102 = select i1 %521, float %509, float %518
%522 = fadd float %54, %57
%523 = fadd float %55, %58
%524 = fadd float %56, %59
%525 = fmul float %522, 5.000000e-01
%526 = fmul float %523, 5.000000e-01
%527 = fmul float %524, 5.000000e-01
%528 = call float @llvm.minnum.f32(float %.101, float %temp68.1)
%529 = call float @llvm.minnum.f32(float %528, float %.102)
%530 = fsub float %525, %60
%531 = fsub float %526, %61
%532 = fsub float %527, %62
%533 = fadd float %530, %130
%534 = fadd float %531, %131
%535 = fadd float %532, %132
%536 = fmul float %498, %529
%537 = fadd float %536, %533
%538 = fmul float %499, %529
%539 = fadd float %538, %534
%540 = fmul float %500, %529
%541 = fadd float %540, %535
%542 = fsub float %537, %525
%543 = fsub float %539, %526
%544 = fsub float %541, %527
br label %ENDIF88
ENDIF88: ; preds = %IF86, %IF89
%temp48.0 = phi float [ %542, %IF89 ], [ %256, %IF86 ]
%temp49.0 = phi float [ %543, %IF89 ], [ %257, %IF86 ]
%temp50.0 = phi float [ %544, %IF89 ], [ %258, %IF86 ]
%545 = fsub float 1.000000e+00, %82
%546 = call float @llvm.pow.f32(float %545, float 7.500000e-01)
%547 = fmul float %546, 7.000000e+00
%548 = insertelement <4 x float> undef, float %temp48.0, i32 0
%549 = insertelement <4 x float> %548, float %temp49.0, i32 1
%550 = insertelement <4 x float> %549, float %temp50.0, i32 2
%551 = insertelement <4 x float> %550, float %547, i32 3
%552 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %551)
%553 = extractelement <4 x float> %552, i32 0
%554 = extractelement <4 x float> %552, i32 1
%555 = extractelement <4 x float> %552, i32 2
%556 = extractelement <4 x float> %552, i32 3
%557 = call float @llvm.fabs.f32(float %555)
%558 = fdiv float 1.000000e+00, %557
%559 = fmul float %553, %558
%560 = fadd float %559, 1.500000e+00
%561 = fmul float %554, %558
%562 = fadd float %561, 1.500000e+00
%563 = bitcast float %562 to i32
%564 = bitcast float %560 to i32
%565 = bitcast float %556 to i32
%566 = bitcast float %547 to i32
%567 = insertelement <4 x i32> undef, i32 %563, i32 0
%568 = insertelement <4 x i32> %567, i32 %564, i32 1
%569 = insertelement <4 x i32> %568, i32 %565, i32 2
%570 = insertelement <4 x i32> %569, i32 %566, i32 3
%571 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %570, <32 x i8> %90, <16 x i8> %93, i32 4)
%572 = extractelement <4 x float> %571, i32 0
%573 = extractelement <4 x float> %571, i32 1
%574 = extractelement <4 x float> %571, i32 2
%575 = extractelement <4 x float> %571, i32 3
%576 = call float @llvm.pow.f32(float %575, float %65)
%577 = fmul float %64, %576
%578 = fmul float %577, %572
%579 = fmul float %577, %573
%580 = fmul float %577, %574
%581 = call float @llvm.AMDGPU.lrp(float %50, float %349, float %578)
%582 = call float @llvm.AMDGPU.lrp(float %50, float %350, float %579)
%583 = call float @llvm.AMDGPU.lrp(float %50, float %351, float %580)
br label %ENDIF85
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000
v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001
v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100
v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800
v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801
v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900
v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901
v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00
v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01
v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00
v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01
v_interp_p1_f32 v19, v0, 1, 3, [m0] ; C84C0D00
v_interp_p2_f32 v19, [v19], v1, 1, 3, [m0] ; C84D0D01
v_interp_p1_f32 v20, v0, 2, 3, [m0] ; C8500E00
v_interp_p2_f32 v20, [v20], v1, 2, 3, [m0] ; C8510E01
v_interp_p1_f32 v3, v0, 0, 4, [m0] ; C80C1000
v_interp_p2_f32 v3, [v3], v1, 0, 4, [m0] ; C80D1001
v_interp_p1_f32 v4, v0, 1, 4, [m0] ; C8101100
v_interp_p2_f32 v4, [v4], v1, 1, 4, [m0] ; C8111101
v_interp_p1_f32 v6, v0, 2, 4, [m0] ; C8181200
v_interp_p2_f32 v6, [v6], v1, 2, 4, [m0] ; C8191201
v_interp_p1_f32 v5, v0, 0, 5, [m0] ; C8141400
v_interp_p2_f32 v5, [v5], v1, 0, 5, [m0] ; C8151401
v_interp_p1_f32 v22, v0, 1, 5, [m0] ; C8581500
v_interp_p2_f32 v22, [v22], v1, 1, 5, [m0] ; C8591501
v_interp_p1_f32 v26, v0, 2, 5, [m0] ; C8681600
v_interp_p2_f32 v26, [v26], v1, 2, 5, [m0] ; C8691601
v_interp_p1_f32 v27, v0, 3, 5, [m0] ; C86C1700
v_interp_p2_f32 v27, [v27], v1, 3, 5, [m0] ; C86D1701
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p1_f32 v23, v0, 0, 6, [m0] ; C85C1800
s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508
s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C
s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710
s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718
v_interp_p2_f32 v23, [v23], v1, 0, 6, [m0] ; C85D1801
v_interp_p1_f32 v21, v0, 1, 6, [m0] ; C8541900
v_interp_p2_f32 v21, [v21], v1, 1, 6, [m0] ; C8551901
v_interp_p1_f32 v24, v0, 2, 6, [m0] ; C8601A00
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s44, s[8:11], 0x58 ; C2160958
v_interp_p2_f32 v24, [v24], v1, 2, 6, [m0] ; C8611A01
s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720
s_load_dwordx4 s[32:35], s[4:5], 0x10 ; C0900510
image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[24:31], s[0:3] ; F0800F00 00060D11
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[36:43], s[12:15] ; F0800A00 00690011
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
s_buffer_load_dword s24, s[8:11], 0x5c ; C20C095C
s_buffer_load_dword s0, s[8:11], 0x60 ; C2000960
v_mul_f32_e32 v1, s44, v1 ; 1002022C
v_mul_f32_e32 v0, s44, v0 ; 1000002C
v_mul_f32_e32 v2, v2, v1 ; 10040302
v_mac_f32_e32 v2, v9, v0 ; 3E040109
v_mul_f32_e32 v7, v7, v1 ; 100E0307
v_mac_f32_e32 v7, v10, v0 ; 3E0E010A
v_mul_f32_e32 v10, v8, v1 ; 10140308
v_mac_f32_e32 v10, v11, v0 ; 3E14010B
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v2, v12, v0 ; 3E04010C
v_mac_f32_e32 v7, v19, v0 ; 3E0E0113
v_mac_f32_e32 v10, v20, v0 ; 3E140114
v_mul_f32_e32 v0, v2, v2 ; 10000502
v_mac_f32_e32 v0, v7, v7 ; 3E000F07
v_mac_f32_e32 v0, v10, v10 ; 3E00150A
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v22, v22 ; 10022D16
v_mac_f32_e32 v1, v26, v26 ; 3E02351A
v_mac_f32_e32 v1, v27, v27 ; 3E02371B
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v9, v0, v2 ; 10120500
v_mul_f32_e32 v8, v0, v7 ; 10100F00
v_mul_f32_e32 v7, v0, v10 ; 100E1500
v_mul_f32_e32 v12, v1, v22 ; 10182D01
v_mul_f32_e32 v11, v1, v26 ; 10163501
v_mul_f32_e32 v0, v12, v9 ; 1000130C
v_mac_f32_e32 v0, v11, v8 ; 3E00110B
v_mul_f32_e32 v10, v1, v27 ; 10143701
v_mac_f32_e32 v0, v10, v7 ; 3E000F0A
v_mul_f32_e32 v2, v9, v0 ; 10040109
v_mac_f32_e32 v2, v9, v0 ; 3E040109
v_mul_f32_e32 v19, v8, v0 ; 10260108
v_mac_f32_e32 v19, v8, v0 ; 3E260108
v_mad_f32 v25, v22, v1, -v2 ; D2820019 840A0316
v_mad_f32 v26, v26, v1, -v19 ; D282001A 844E031A
v_mul_f32_e32 v2, v7, v0 ; 10040107
v_mac_f32_e32 v2, v7, v0 ; 3E040107
v_mad_f32 v27, v27, v1, -v2 ; D282001B 840A031B
s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940
s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941
s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942
s_buffer_load_dword s13, s[8:11], 0x4c ; C206894C
s_buffer_load_dword s14, s[8:11], 0x4d ; C207094D
s_buffer_load_dword s15, s[8:11], 0x4e ; C207894E
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s25, s[8:11], 0x2b ; C20C892B
s_buffer_load_dword s30, s[8:11], 0x2c ; C20F092C
s_buffer_load_dword s31, s[8:11], 0x2d ; C20F892D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e64 v0, 1.0, s24 ; D2080000 000030F2
v_mul_f32_e32 v2, s1, v0 ; 10040001
v_mul_f32_e32 v1, s2, v0 ; 10020002
v_mul_f32_e32 v0, s3, v0 ; 10000003
v_mul_f32_e32 v22, s13, v13 ; 102C1A0D
v_mac_f32_e32 v2, s24, v22 ; 3E042C18
v_mul_f32_e32 v13, s14, v14 ; 101A1C0E
v_mov_b32_e32 v28, v25 ; 7E380319
v_mac_f32_e32 v1, s24, v13 ; 3E021A18
v_mov_b32_e32 v29, v26 ; 7E3A031A
v_mul_f32_e32 v14, s15, v15 ; 101C1E0F
v_mac_f32_e32 v0, s24, v14 ; 3E001C18
v_mov_b32_e32 v30, v27 ; 7E3C031B
v_cmp_lt_f32_e64 s[2:3], 0, s25 ; D0020002 00003280
image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[32:35] ; F0800F00 01041111
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402
s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925
v_mul_f32_e32 v15, v25, v25 ; 101E3319
v_mac_f32_e32 v15, v26, v26 ; 3E1E351A
v_mac_f32_e32 v15, v27, v27 ; 3E1E371B
v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F
s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926
s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928
s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929
s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A
v_mul_f32_e32 v17, v15, v25 ; 1022330F
v_mul_f32_e32 v19, v15, v26 ; 1026350F
v_mul_f32_e32 v15, v15, v27 ; 101E370F
v_rcp_f32_e32 v20, v17 ; 7E285511
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v28, s1, v23 ; 08382E01
v_sub_f32_e32 v29, s2, v21 ; 083A2A02
v_rcp_f32_e32 v30, v19 ; 7E3C5513
v_mul_f32_e32 v28, v20, v28 ; 10383914
v_sub_f32_e32 v31, s13, v23 ; 083E2E0D
v_mul_f32_e32 v20, v20, v31 ; 10283F14
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v20, v20, v28 ; 00283914
v_rcp_f32_e32 v28, v15 ; 7E38550F
v_mul_f32_e32 v29, v30, v29 ; 103A3B1E
v_sub_f32_e32 v31, s16, v21 ; 083E2A10
v_mul_f32_e32 v30, v30, v31 ; 103C3F1E
v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680
v_cndmask_b32_e32 v29, v30, v29 ; 003A3B1E
v_sub_f32_e32 v30, s3, v24 ; 083C3003
v_mul_f32_e32 v30, v28, v30 ; 103C3D1C
v_sub_f32_e32 v31, s17, v24 ; 083E3011
v_mul_f32_e32 v28, v28, v31 ; 10383F1C
v_cmp_lt_f32_e32 vcc, 0, v15 ; 7C021E80
v_cndmask_b32_e32 v28, v28, v30 ; 00383D1C
v_min3_f32 v20, v20, v29, v28 ; D2A20014 04723B14
v_mov_b32_e32 v28, s13 ; 7E38020D
v_add_f32_e32 v28, s1, v28 ; 06383801
v_mov_b32_e32 v29, s16 ; 7E3A0210
v_add_f32_e32 v29, s2, v29 ; 063A3A02
v_mov_b32_e32 v30, s17 ; 7E3C0211
v_add_f32_e32 v30, s3, v30 ; 063C3C03
v_mad_f32 v31, 0.5, v28, -s18 ; D282001F 804A38F0
v_add_f32_e32 v31, v23, v31 ; 063E3F17
v_mac_f32_e32 v31, v20, v17 ; 3E3E2314
v_mad_f32 v17, 0.5, v29, -s19 ; D2820011 804E3AF0
v_add_f32_e32 v17, v21, v17 ; 06222315
v_mac_f32_e32 v17, v20, v19 ; 3E222714
v_mad_f32 v19, 0.5, v30, -s20 ; D2820013 80523CF0
v_add_f32_e32 v19, v24, v19 ; 06262718
v_mac_f32_e32 v19, v20, v15 ; 3E261F14
v_mad_f32 v28, 0.5, -v28, v31 ; D282001C 447E38F0
v_mad_f32 v29, 0.5, -v29, v17 ; D282001D 44463AF0
v_mad_f32 v30, 0.5, -v30, v19 ; D282001E 444E3CF0
s_or_b64 exec, exec, s[14:15] ; 88FE0E7E
s_buffer_load_dword s17, s[8:11], 0x17 ; C2088917
s_buffer_load_dword s18, s[8:11], 0x43 ; C2090943
s_buffer_load_dword s19, s[8:11], 0x4f ; C209894F
s_buffer_load_dword s16, s[8:11], 0x68 ; C2080968
s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900
s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901
s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902
s_buffer_load_dword s20, s[8:11], 0x4 ; C20A0904
s_buffer_load_dword s21, s[8:11], 0x5 ; C20A8905
s_buffer_load_dword s22, s[8:11], 0x6 ; C20B0906
s_buffer_load_dword s23, s[8:11], 0x7 ; C20B8907
s_buffer_load_dword s13, s[8:11], 0x8 ; C2068908
s_buffer_load_dword s15, s[8:11], 0x9 ; C2078909
s_buffer_load_dword s14, s[8:11], 0xa ; C207090A
v_sub_f32_e64 v15, 1.0, s0 ; D208000F 000000F2
v_log_f32_e32 v15, v15 ; 7E1E4F0F
v_mul_legacy_f32_e32 v15, 0x3f400000, v15 ; 0E1E1EFF 3F400000
v_exp_f32_e32 v15, v15 ; 7E1E4B0F
v_mul_f32_e32 v31, 0x40e00000, v15 ; 103E1EFF 40E00000
v_cubeid_f32 v35, v28, v29, v30 ; D2880023 047A3B1C
v_cubema_f32 v34, v28, v29, v30 ; D28E0022 047A3B1C
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_cubesc_f32 v33, v28, v29, v30 ; D28A0021 047A3B1C
v_cubetc_f32 v32, v28, v29, v30 ; D28C0020 047A3B1C
v_rcp_f32_e64 v15, |v34| ; D354010F 00000122
v_mov_b32_e32 v28, 0x3fc00000 ; 7E3802FF 3FC00000
v_mad_f32 v29, v15, v32, v28 ; D282001D 0472410F
v_mac_f32_e32 v28, v15, v33 ; 3E38430F
v_mov_b32_e32 v30, v35 ; 7E3C0323
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[36:43], s[32:35] ; F0900F00 01091C1C
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v15, v31 ; 7E1E4F1F
s_buffer_load_dword s28, s[8:11], 0xb ; C20E090B
s_buffer_load_dword s27, s[8:11], 0xc ; C20D890C
s_buffer_load_dword s29, s[8:11], 0xd ; C20E890D
s_buffer_load_dword s26, s[8:11], 0xe ; C20D090E
s_buffer_load_dword s25, s[8:11], 0xf ; C20C890F
v_mul_legacy_f32_e32 v15, s31, v15 ; 0E1E1E1F
v_exp_f32_e32 v15, v15 ; 7E1E4B0F
v_mul_f32_e32 v15, s30, v15 ; 101E1E1E
v_mul_f32_e32 v19, v28, v15 ; 10261F1C
v_mul_f32_e32 v17, v29, v15 ; 10221F1D
v_mul_f32_e32 v15, v30, v15 ; 101E1F1E
v_mov_b32_e32 v20, s24 ; 7E280218
v_mov_b32_e32 v28, 0x3f7fff58 ; 7E3802FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v28 ; 7C02380C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[30:31], vcc ; BE9E246A
s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s33, s[8:11], 0x3b ; C210893B
s_buffer_load_dword s24, s[8:11], 0x3c ; C20C093C
s_buffer_load_dword s32, s[8:11], 0x3d ; C210093D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[34:35], 0, s33 ; D0020022 00004280
s_and_saveexec_b64 s[34:35], s[34:35] ; BEA22422
s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s33, s[8:11], 0x36 ; C2108936
s_buffer_load_dword s36, s[8:11], 0x38 ; C2120938
s_buffer_load_dword s37, s[8:11], 0x39 ; C2128939
s_buffer_load_dword s38, s[8:11], 0x3a ; C213093A
s_buffer_load_dword s39, s[8:11], 0x30 ; C2138930
s_buffer_load_dword s40, s[8:11], 0x31 ; C2140931
s_buffer_load_dword s41, s[8:11], 0x32 ; C2148932
s_buffer_load_dword s42, s[8:11], 0x34 ; C2150934
s_buffer_load_dword s43, s[8:11], 0x35 ; C2158935
v_mul_f32_e32 v28, v25, v25 ; 10383319
v_mac_f32_e32 v28, v26, v26 ; 3E38351A
v_mac_f32_e32 v28, v27, v27 ; 3E38371B
v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v29, s33, v24 ; 083A3021
v_mov_b32_e32 v30, s33 ; 7E3C0221
v_sub_f32_e32 v31, s39, v23 ; 083E2E27
v_sub_f32_e32 v32, s40, v21 ; 08402A28
v_add_f32_e32 v30, s41, v30 ; 063C3C29
v_sub_f32_e32 v33, s41, v24 ; 08423029
v_mad_f32 v34, 0.5, v30, -s38 ; D2820022 809A3CF0
v_add_f32_e32 v24, v24, v34 ; 06304518
v_mul_f32_e32 v25, v28, v25 ; 1032331C
v_mul_f32_e32 v26, v28, v26 ; 1034351C
v_mul_f32_e32 v27, v28, v27 ; 1036371C
v_rcp_f32_e32 v28, v25 ; 7E385519
v_rcp_f32_e32 v34, v26 ; 7E44551A
v_rcp_f32_e32 v35, v27 ; 7E46551B
v_sub_f32_e32 v36, s42, v23 ; 08482E2A
v_mov_b32_e32 v37, s42 ; 7E4A022A
v_add_f32_e32 v37, s39, v37 ; 064A4A27
v_mul_f32_e32 v31, v28, v31 ; 103E3F1C
v_mul_f32_e32 v28, v28, v36 ; 1038491C
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v33, v35, v33 ; 10424323
v_mul_f32_e32 v29, v35, v29 ; 103A3B23
v_mad_f32 v35, 0.5, v37, -s36 ; D2820023 80924AF0
v_add_f32_e32 v23, v23, v35 ; 062E4717
v_sub_f32_e32 v35, s43, v21 ; 08462A2B
v_mov_b32_e32 v36, s43 ; 7E48022B
v_mul_f32_e32 v34, v34, v35 ; 10444722
v_add_f32_e32 v35, s40, v36 ; 06464828
v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280
v_cndmask_b32_e32 v28, v28, v31 ; 00383F1C
v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480
v_cndmask_b32_e32 v31, v34, v32 ; 003E4122
v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680
v_cndmask_b32_e32 v29, v29, v33 ; 003A431D
v_min3_f32 v28, v28, v31, v29 ; D2A2001C 04763F1C
v_mad_f32 v29, 0.5, v35, -s37 ; D282001D 809646F0
v_add_f32_e32 v21, v21, v29 ; 062A3B15
v_mac_f32_e32 v23, v28, v25 ; 3E2E331C
v_mac_f32_e32 v21, v28, v26 ; 3E2A351C
v_mac_f32_e32 v24, v28, v27 ; 3E30371C
v_mad_f32 v25, 0.5, -v37, v23 ; D2820019 445E4AF0
v_mad_f32 v26, 0.5, -v35, v21 ; D282001A 445646F0
v_mad_f32 v27, 0.5, -v30, v24 ; D282001B 44623CF0
s_or_b64 exec, exec, s[34:35] ; 88FE227E
v_sub_f32_e64 v21, 1.0, s0 ; D2080015 000000F2
v_log_f32_e32 v21, v21 ; 7E2A4F15
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
v_mul_legacy_f32_e32 v21, 0x3f400000, v21 ; 0E2A2AFF 3F400000
v_exp_f32_e32 v21, v21 ; 7E2A4B15
v_mul_f32_e32 v28, 0x40e00000, v21 ; 10382AFF 40E00000
v_cubeid_f32 v32, v25, v26, v27 ; D2880020 046E3519
v_cubema_f32 v31, v25, v26, v27 ; D28E001F 046E3519
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
v_cubesc_f32 v30, v25, v26, v27 ; D28A001E 046E3519
v_cubetc_f32 v29, v25, v26, v27 ; D28C001D 046E3519
v_rcp_f32_e64 v21, |v31| ; D3540115 0000011F
v_mov_b32_e32 v25, 0x3fc00000 ; 7E3202FF 3FC00000
v_mad_f32 v26, v21, v29, v25 ; D282001A 04663B15
v_mac_f32_e32 v25, v21, v30 ; 3E323D15
v_mov_b32_e32 v27, v32 ; 7E360320
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[40:47], s[36:39] ; F0900F00 012A1719
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v21, v26 ; 7E2A4F1A
v_sub_f32_e64 v26, 1.0, s12 ; D208001A 000018F2
v_mul_legacy_f32_e32 v21, s32, v21 ; 0E2A2A20
v_exp_f32_e32 v21, v21 ; 7E2A4B15
v_mul_f32_e32 v21, s24, v21 ; 102A2A18
v_mul_f32_e32 v23, v23, v21 ; 102E2B17
v_mul_f32_e32 v24, v24, v21 ; 10302B18
v_mul_f32_e32 v21, v25, v21 ; 102A2B19
v_mul_f32_e32 v23, v23, v26 ; 102E3517
v_mul_f32_e32 v24, v24, v26 ; 10303518
v_mul_f32_e32 v21, v21, v26 ; 102A3515
v_mac_f32_e32 v23, s12, v19 ; 3E2E260C
v_mac_f32_e32 v24, s12, v17 ; 3E30220C
v_mac_f32_e32 v21, s12, v15 ; 3E2A1E0C
v_mov_b32_e32 v15, v21 ; 7E1E0315
v_mov_b32_e32 v17, v24 ; 7E220318
v_mov_b32_e32 v19, v23 ; 7E260317
s_or_b64 exec, exec, s[30:31] ; 88FE1E7E
v_mul_f32_e32 v16, s19, v16 ; 10202013
v_mad_f32 v23, -v20, s18, s18 ; D2820017 20482514
v_mov_b32_e32 v20, s17 ; 7E280211
v_mul_f32_e32 v21, v23, v22 ; 102A2D17
v_mul_f32_e32 v22, s21, v8 ; 102C1015
v_mac_f32_e32 v22, s20, v9 ; 3E2C1214
v_mac_f32_e32 v22, s22, v7 ; 3E2C0E16
v_add_f32_e32 v24, s23, v22 ; 06302C17
v_sub_f32_e64 v22, 1.0, s16 ; D2080016 000020F2
v_mac_f32_e32 v22, s16, v18 ; 3E2C2410
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s19, s[8:11], 0x16 ; C2098916
s_buffer_load_dword s16, s[8:11], 0x44 ; C2080944
s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945
s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946
s_buffer_load_dword s17, s[8:11], 0x48 ; C2088948
s_buffer_load_dword s18, s[8:11], 0x49 ; C2090949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_mul_f32_e32 v18, s15, v8 ; 1024100F
v_mac_f32_e32 v18, s13, v9 ; 3E24120D
v_mac_f32_e32 v18, s14, v7 ; 3E240E0E
v_add_f32_e32 v18, s28, v18 ; 0624241C
v_mul_f32_e32 v25, s29, v8 ; 1032101D
v_mac_f32_e32 v25, s27, v9 ; 3E32121B
v_mac_f32_e32 v25, s26, v7 ; 3E320E1A
v_add_f32_e32 v25, s25, v25 ; 06323219
v_add_f32_e32 v24, v24, v3 ; 06300718
v_add_f32_e32 v26, v18, v4 ; 06340912
v_add_f32_e32 v25, v25, v6 ; 06320D19
v_mul_f32_e32 v4, v23, v13 ; 10081B17
v_mul_f32_e32 v3, v23, v14 ; 10061D17
v_mul_f32_e32 v6, s1, v9 ; 100C1201
v_mac_f32_e32 v6, s2, v8 ; 3E0C1002
v_mac_f32_e32 v6, s3, v7 ; 3E0C0E03
v_max_f32_e32 v18, 0, v6 ; 20240C80
v_mul_f32_e32 v14, v22, v24 ; 101C3116
v_mul_f32_e32 v6, v22, v26 ; 100C3516
v_mul_f32_e32 v13, v22, v25 ; 101A3316
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v20, s19, v5 ; 3E280A13
v_mul_f32_e32 v5, v22, v19 ; 100A2716
v_mul_f32_e32 v17, v22, v17 ; 10222316
v_mul_f32_e32 v15, v22, v15 ; 101E1F16
v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2
v_add_f32_e32 v19, s0, v19 ; 06262600
v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680
v_sub_f32_e32 v22, s1, v12 ; 082C1801
v_sub_f32_e32 v23, s2, v11 ; 082E1602
v_mul_f32_e32 v24, v22, v22 ; 10302D16
v_mac_f32_e32 v24, v23, v23 ; 3E302F17
v_sub_f32_e32 v25, s3, v10 ; 08321403
v_mac_f32_e32 v24, v25, v25 ; 3E303319
v_rsq_clamp_f32_e32 v24, v24 ; 7E305918
v_mul_f32_e32 v22, v24, v22 ; 102C2D18
v_mul_f32_e32 v23, v24, v23 ; 102E2F18
v_mul_f32_e32 v24, v24, v25 ; 10303318
v_mul_f32_e32 v12, v12, v9 ; 1018130C
v_mad_f32 v11, -v11, v8, -v12 ; D282000B A432110B
v_mul_f32_e32 v9, v22, v9 ; 10121316
v_mac_f32_e32 v9, v23, v8 ; 3E121117
v_mul_f32_e32 v8, s1, v22 ; 10102C01
v_mac_f32_e32 v8, s2, v23 ; 3E102E02
v_mad_f32 v10, -v10, v7, v11 ; D282000A 242E0F0A
v_mac_f32_e32 v8, s3, v24 ; 3E103003
v_mac_f32_e32 v9, v24, v7 ; 3E120F18
v_max_f32_e32 v7, 0, v8 ; 200E1080
v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2
v_mul_f32_e32 v11, v8, v8 ; 10161108
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_max_f32_e32 v10, 0, v10 ; 20141480
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mul_f32_e32 v22, v11, v12 ; 102C190B
v_mad_f32 v23, -v12, v22, 1.0 ; D2820017 23CA2D0C
v_mul_f32_e32 v24, v2, v23 ; 10302F02
v_sub_f32_e32 v25, 1.0, v2 ; 083204F2
v_mac_f32_e32 v2, v8, v25 ; 3E043308
v_mul_f32_e32 v25, v1, v23 ; 10322F01
v_sub_f32_e32 v26, 1.0, v1 ; 083402F2
v_mac_f32_e32 v1, v8, v26 ; 3E023508
v_mul_f32_e32 v23, v0, v23 ; 102E2F00
v_sub_f32_e32 v26, 1.0, v0 ; 083400F2
v_mac_f32_e32 v0, v8, v26 ; 3E003508
v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2
v_sub_f32_e32 v26, 1.0, v8 ; 083410F2
v_mov_b32_e32 v27, 0x3cf5c28f ; 7E3602FF 3CF5C28F
v_madmk_f32_e32 v26, v26, v27, 0x3f77ced9 ; 4034371A 3F77CED9
v_add_f32_e32 v27, v7, v7 ; 06360F07
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mad_f32 v7, v27, v7, 0.5 ; D2820007 03C20F1B
v_mul_f32_e32 v12, v22, v12 ; 10181916
v_mac_f32_e32 v24, v19, v12 ; 3E301913
v_mac_f32_e32 v25, v19, v12 ; 3E321913
v_mac_f32_e32 v23, v19, v12 ; 3E2E1913
v_mul_f32_e32 v8, v8, v8 ; 10101108
v_log_f32_e32 v19, v26 ; 7E264F1A
v_mul_f32_e32 v8, s8, v8 ; 10101008
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2
v_rcp_f32_e32 v10, v19 ; 7E145513
v_sub_f32_e32 v19, 1.0, v18 ; 082624F2
v_mul_f32_e32 v8, v8, v19 ; 10102708
v_mac_f32_e32 v8, 1.0, v18 ; 3E1024F2
v_max_f32_e32 v9, 0, v9 ; 20121280
v_log_f32_e32 v9, v9 ; 7E124F09
v_madak_f32_e32 v8, v8, v11, 0x38d1b717 ; 42101708 38D1B717
v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000
v_mul_f32_e32 v11, v10, v10 ; 1016150A
v_mul_legacy_f32_e32 v9, v11, v9 ; 0E12130B
v_rcp_f32_e32 v8, v8 ; 7E105508
v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A
v_mul_f32_e32 v10, s18, v10 ; 10141412
v_exp_f32_e32 v9, v9 ; 7E124B09
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_mul_f32_e32 v8, v9, v8 ; 10101109
v_mul_f32_e32 v9, v19, v19 ; 10122713
v_mul_f32_e32 v10, v19, v9 ; 10141313
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_mul_f32_e32 v8, v18, v8 ; 10101112
v_mul_f32_e32 v8, s17, v8 ; 10101011
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_mad_f32 v9, v7, v9, 1.0 ; D2820009 03CA1307
v_mad_f32 v7, v7, v12, 1.0 ; D2820007 03CA1907
v_mul_f32_e32 v7, v7, v9 ; 100E1307
v_mul_f32_e32 v7, v18, v7 ; 100E0F12
v_mac_f32_e32 v14, s16, v7 ; 3E1C0E10
v_mul_f32_e32 v9, v14, v21 ; 10122B0E
v_max_f32_e32 v8, 0, v8 ; 20101080
v_mul_f32_e32 v10, s16, v8 ; 10141010
v_mac_f32_e32 v9, v2, v10 ; 3E121502
v_mac_f32_e32 v6, s7, v7 ; 3E0C0E07
v_mac_f32_e32 v13, s12, v7 ; 3E1A0E0C
v_mul_f32_e32 v2, s7, v8 ; 10041007
v_mul_f32_e32 v7, s12, v8 ; 100E100C
v_mul_f32_e32 v4, v6, v4 ; 10080906
v_mul_f32_e32 v3, v13, v3 ; 1006070D
v_mac_f32_e32 v4, v1, v2 ; 3E080501
v_mac_f32_e32 v3, v0, v7 ; 3E060F00
v_mac_f32_e32 v9, v24, v5 ; 3E120B18
v_mac_f32_e32 v4, v25, v17 ; 3E082319
v_mac_f32_e32 v3, v23, v15 ; 3E061F17
v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s6, v1 ; 10040206
v_mac_f32_e32 v2, v9, v0 ; 3E040109
v_mul_f32_e32 v5, s5, v1 ; 100A0205
v_mac_f32_e32 v5, v4, v0 ; 3E0A0104
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v3, v0 ; 3E020103
v_cvt_pkrtz_f16_f32_e32 v0, v2, v5 ; 5E000B02
v_cvt_pkrtz_f16_f32_e32 v1, v1, v16 ; 5E022101
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 56
VGPRS: 40
Code Size: 2272 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV OUT[1], IN[1].xyxy
5: MOV OUT[0], TEMP[0]
6: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float %41, float %42)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[8:11], 0 idxen ; E00C2000 80020500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
v_mac_f32_e32 v0, s8, v2 ; 3E000408
v_mul_f32_e32 v7, s5, v1 ; 100E0205
v_mac_f32_e32 v7, s9, v2 ; 3E0E0409
v_mul_f32_e32 v8, s6, v1 ; 10100206
v_mac_f32_e32 v8, s10, v2 ; 3E10040A
v_mul_f32_e32 v1, s7, v1 ; 10020207
v_mac_f32_e32 v1, s11, v2 ; 3E02040B
v_mac_f32_e32 v0, s12, v3 ; 3E00060C
v_mac_f32_e32 v7, s13, v3 ; 3E0E060D
v_mac_f32_e32 v8, s14, v3 ; 3E10060E
v_mac_f32_e32 v1, s15, v3 ; 3E02060F
v_mac_f32_e32 v0, s16, v4 ; 3E000810
v_mac_f32_e32 v7, s17, v4 ; 3E0E0811
v_mac_f32_e32 v8, s18, v4 ; 3E100812
v_mac_f32_e32 v1, s0, v4 ; 3E020800
exp 15, 32, 0, 0, 0, v5, v6, v5, v6 ; F800020F 06050605
exp 15, 12, 0, 1, 0, v0, v7, v8, v1 ; F80008CF 01080700
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 192 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[2]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[1], 2D
2: MUL TEMP[0], TEMP[0], CONST[2].xxxx
3: ADD TEMP[0], IMM[0].xxxx, -TEMP[0]
4: MOV TEMP[1].xy, IN[0].zwww
5: TEX TEMP[1], TEMP[1], SAMP[0], 2D
6: ADD TEMP[1], IMM[0].xxxx, -TEMP[1]
7: MUL TEMP[0], TEMP[0], TEMP[1]
8: ADD TEMP[0], IMM[0].xxxx, -TEMP[0]
9: MOV OUT[0], TEMP[0]
10: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0
%27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0
%29 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%30 = bitcast <8 x i32> addrspace(2)* %29 to <32 x i8> addrspace(2)*
%31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0
%32 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%33 = bitcast <4 x i32> addrspace(2)* %32 to <16 x i8> addrspace(2)*
%34 = load <16 x i8>, <16 x i8> addrspace(2)* %33, align 16, !tbaa !0
%35 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%39 = bitcast float %35 to i32
%40 = bitcast float %36 to i32
%41 = insertelement <2 x i32> undef, i32 %39, i32 0
%42 = insertelement <2 x i32> %41, i32 %40, i32 1
%43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %31, <16 x i8> %34, i32 2)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = extractelement <4 x float> %43, i32 2
%47 = extractelement <4 x float> %43, i32 3
%48 = fmul float %44, %24
%49 = fmul float %45, %24
%50 = fmul float %46, %24
%51 = fmul float %47, %24
%52 = fsub float 1.000000e+00, %48
%53 = fsub float 1.000000e+00, %49
%54 = fsub float 1.000000e+00, %50
%55 = fsub float 1.000000e+00, %51
%56 = bitcast float %37 to i32
%57 = bitcast float %38 to i32
%58 = insertelement <2 x i32> undef, i32 %56, i32 0
%59 = insertelement <2 x i32> %58, i32 %57, i32 1
%60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %59, <32 x i8> %26, <16 x i8> %28, i32 2)
%61 = extractelement <4 x float> %60, i32 0
%62 = extractelement <4 x float> %60, i32 1
%63 = extractelement <4 x float> %60, i32 2
%64 = extractelement <4 x float> %60, i32 3
%65 = fsub float 1.000000e+00, %61
%66 = fsub float 1.000000e+00, %62
%67 = fsub float 1.000000e+00, %63
%68 = fsub float 1.000000e+00, %64
%69 = fmul float %52, %65
%70 = fmul float %53, %66
%71 = fmul float %54, %67
%72 = fmul float %55, %68
%73 = fsub float 1.000000e+00, %69
%74 = fsub float 1.000000e+00, %70
%75 = fsub float 1.000000e+00, %71
%76 = fsub float 1.000000e+00, %72
%77 = call i32 @llvm.SI.packf16(float %73, float %74)
%78 = bitcast i32 %77 to float
%79 = call i32 @llvm.SI.packf16(float %75, float %76)
%80 = bitcast i32 %79 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %78, float %80, float %78, float %80)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504
s_mov_b32 m0, s9 ; BEFC0309
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[0:3], 0x8 ; C2000108
s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[16:19] ; F0800F00 00850002
image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[12:15] ; F0800F00 00610404
s_waitcnt vmcnt(1) ; BF8C0771
v_mad_f32 v0, -v0, s0, 1.0 ; D2820000 23C80100
v_mad_f32 v1, -v1, s0, 1.0 ; D2820001 23C80101
v_mad_f32 v2, -v2, s0, 1.0 ; D2820002 23C80102
v_mad_f32 v3, -v3, s0, 1.0 ; D2820003 23C80103
s_waitcnt vmcnt(0) ; BF8C0770
v_sub_f32_e32 v4, 1.0, v4 ; 080808F2
v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2
v_sub_f32_e32 v6, 1.0, v6 ; 080C0CF2
v_sub_f32_e32 v7, 1.0, v7 ; 080E0EF2
v_mad_f32 v0, -v0, v4, 1.0 ; D2820000 23CA0900
v_mad_f32 v1, -v1, v5, 1.0 ; D2820001 23CA0B01
v_mad_f32 v2, -v2, v6, 1.0 ; D2820002 23CA0D02
v_mad_f32 v3, -v3, v7, 1.0 ; D2820003 23CA0F03
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 8
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..25]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, 0.0000, 0.5000}
0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[1].xyzz
1: MUL TEMP[1], CONST[2], TEMP[0].xxxx
2: MAD TEMP[1], CONST[3], TEMP[0].yyyy, TEMP[1]
3: MAD TEMP[1].xyz, CONST[4], TEMP[0].zzzz, TEMP[1]
4: LRP TEMP[0].xyz, IN[1].wwww, TEMP[1].xyzz, TEMP[0].xyzz
5: DP3 TEMP[1].x, CONST[6].xyzz, TEMP[0].xyzz
6: ADD TEMP[1].x, TEMP[1].xxxx, CONST[6].wwww
7: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[6].xyzz
8: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
9: LRP TEMP[0].xyz, CONST[7].xxxx, TEMP[0].xyzz, TEMP[1].xyzz
10: MOV TEMP[1].w, CONST[9].xxxx
11: MUL TEMP[2], CONST[21], IMM[0].yyyy
12: MUL TEMP[1].xyz, CONST[12].xyzz, CONST[8].xxxx
13: DP4 TEMP[3].x, IN[3], TEMP[1]
14: MAX TEMP[3].x, IMM[0].zzzz, TEMP[3].xxxx
15: ADD TEMP[3].x, TEMP[3].xxxx, CONST[10].xxxx
16: MAD TEMP[2], CONST[16], TEMP[3].xxxx, TEMP[2]
17: MUL TEMP[1].xyz, CONST[13].xyzz, CONST[8].xxxx
18: DP4 TEMP[3].x, IN[3], TEMP[1]
19: MAX TEMP[3].x, IMM[0].zzzz, TEMP[3].xxxx
20: ADD TEMP[3].x, TEMP[3].xxxx, CONST[10].xxxx
21: MAD TEMP[2], CONST[17], TEMP[3].xxxx, TEMP[2]
22: MUL TEMP[1].xyz, CONST[14].xyzz, CONST[8].xxxx
23: DP4 TEMP[3].x, IN[3], TEMP[1]
24: MAX TEMP[3].x, IMM[0].zzzz, TEMP[3].xxxx
25: ADD TEMP[3].x, TEMP[3].xxxx, CONST[10].xxxx
26: MAD TEMP[2], CONST[18], TEMP[3].xxxx, TEMP[2]
27: MUL TEMP[1].xyz, CONST[15].xyzz, CONST[8].xxxx
28: DP4 TEMP[1].x, IN[3], TEMP[1]
29: MAX TEMP[1].x, IMM[0].zzzz, TEMP[1].xxxx
30: ADD TEMP[1].x, TEMP[1].xxxx, CONST[10].xxxx
31: MAD TEMP[2], CONST[19], TEMP[1].xxxx, TEMP[2]
32: MUL TEMP[1], TEMP[2], CONST[11]
33: MUL TEMP[1].xyz, TEMP[1], CONST[0]
34: MOV TEMP[1].xyz, TEMP[1].xyzx
35: MUL TEMP[2].x, IMM[0].wwww, CONST[20].xxxx
36: MOV TEMP[1].w, TEMP[2].xxxx
37: MUL TEMP[2], CONST[22], TEMP[0].xxxx
38: MAD TEMP[2], CONST[23], TEMP[0].yyyy, TEMP[2]
39: MAD TEMP[0], CONST[24], TEMP[0].zzzz, TEMP[2]
40: ADD TEMP[0], TEMP[0], CONST[25]
41: MOV OUT[1], IN[2]
42: MOV OUT[2], TEMP[1]
43: MOV OUT[0], TEMP[0]
44: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408)
%82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412)
%83 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%84 = load <16 x i8>, <16 x i8> addrspace(2)* %83, align 16, !tbaa !0
%85 = add i32 %5, %7
%86 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %84, i32 0, i32 %85)
%87 = extractelement <4 x float> %86, i32 0
%88 = extractelement <4 x float> %86, i32 1
%89 = extractelement <4 x float> %86, i32 2
%90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = add i32 %5, %7
%93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92)
%94 = extractelement <4 x float> %93, i32 3
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = extractelement <4 x float> %98, i32 2
%102 = extractelement <4 x float> %98, i32 3
%103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = add i32 %5, %7
%106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105)
%107 = extractelement <4 x float> %106, i32 0
%108 = extractelement <4 x float> %106, i32 1
%109 = extractelement <4 x float> %106, i32 2
%110 = extractelement <4 x float> %106, i32 3
%111 = fmul float %87, %16
%112 = fmul float %88, %17
%113 = fmul float %89, %18
%114 = fmul float %19, %111
%115 = fmul float %20, %111
%116 = fmul float %21, %111
%117 = fmul float %22, %112
%118 = fadd float %117, %114
%119 = fmul float %23, %112
%120 = fadd float %119, %115
%121 = fmul float %24, %112
%122 = fadd float %121, %116
%123 = fmul float %25, %113
%124 = fadd float %123, %118
%125 = fmul float %26, %113
%126 = fadd float %125, %120
%127 = fmul float %27, %113
%128 = fadd float %127, %122
%129 = call float @llvm.AMDGPU.lrp(float %94, float %124, float %111)
%130 = call float @llvm.AMDGPU.lrp(float %94, float %126, float %112)
%131 = call float @llvm.AMDGPU.lrp(float %94, float %128, float %113)
%132 = fmul float %28, %129
%133 = fmul float %29, %130
%134 = fadd float %133, %132
%135 = fmul float %30, %131
%136 = fadd float %134, %135
%137 = fadd float %136, %31
%138 = fmul float %137, %28
%139 = fmul float %137, %29
%140 = fmul float %137, %30
%141 = fsub float %129, %138
%142 = fsub float %130, %139
%143 = fsub float %131, %140
%144 = call float @llvm.AMDGPU.lrp(float %32, float %129, float %141)
%145 = call float @llvm.AMDGPU.lrp(float %32, float %130, float %142)
%146 = call float @llvm.AMDGPU.lrp(float %32, float %131, float %143)
%147 = fmul float %64, 2.000000e+00
%148 = fmul float %65, 2.000000e+00
%149 = fmul float %66, 2.000000e+00
%150 = fmul float %39, %33
%151 = fmul float %40, %33
%152 = fmul float %41, %33
%153 = fmul float %107, %150
%154 = fmul float %108, %151
%155 = fadd float %153, %154
%156 = fmul float %109, %152
%157 = fadd float %155, %156
%158 = fmul float %110, %34
%159 = fadd float %157, %158
%160 = call float @llvm.maxnum.f32(float %159, float 0.000000e+00)
%161 = fadd float %160, %35
%162 = fmul float %51, %161
%163 = fadd float %162, %147
%164 = fmul float %52, %161
%165 = fadd float %164, %148
%166 = fmul float %53, %161
%167 = fadd float %166, %149
%168 = fmul float %42, %33
%169 = fmul float %43, %33
%170 = fmul float %44, %33
%171 = fmul float %107, %168
%172 = fmul float %108, %169
%173 = fadd float %171, %172
%174 = fmul float %109, %170
%175 = fadd float %173, %174
%176 = fmul float %110, %34
%177 = fadd float %175, %176
%178 = call float @llvm.maxnum.f32(float %177, float 0.000000e+00)
%179 = fadd float %178, %35
%180 = fmul float %54, %179
%181 = fadd float %180, %163
%182 = fmul float %55, %179
%183 = fadd float %182, %165
%184 = fmul float %56, %179
%185 = fadd float %184, %167
%186 = fmul float %45, %33
%187 = fmul float %46, %33
%188 = fmul float %47, %33
%189 = fmul float %107, %186
%190 = fmul float %108, %187
%191 = fadd float %189, %190
%192 = fmul float %109, %188
%193 = fadd float %191, %192
%194 = fmul float %110, %34
%195 = fadd float %193, %194
%196 = call float @llvm.maxnum.f32(float %195, float 0.000000e+00)
%197 = fadd float %196, %35
%198 = fmul float %57, %197
%199 = fadd float %198, %181
%200 = fmul float %58, %197
%201 = fadd float %200, %183
%202 = fmul float %59, %197
%203 = fadd float %202, %185
%204 = fmul float %48, %33
%205 = fmul float %49, %33
%206 = fmul float %50, %33
%207 = fmul float %107, %204
%208 = fmul float %108, %205
%209 = fadd float %207, %208
%210 = fmul float %109, %206
%211 = fadd float %209, %210
%212 = fmul float %110, %34
%213 = fadd float %211, %212
%214 = call float @llvm.maxnum.f32(float %213, float 0.000000e+00)
%215 = fadd float %214, %35
%216 = fmul float %60, %215
%217 = fadd float %216, %199
%218 = fmul float %61, %215
%219 = fadd float %218, %201
%220 = fmul float %62, %215
%221 = fadd float %220, %203
%222 = fmul float %217, %36
%223 = fmul float %219, %37
%224 = fmul float %221, %38
%225 = fmul float %222, %13
%226 = fmul float %223, %14
%227 = fmul float %224, %15
%228 = fmul float %63, 5.000000e-01
%229 = fmul float %67, %144
%230 = fmul float %68, %144
%231 = fmul float %69, %144
%232 = fmul float %70, %144
%233 = fmul float %71, %145
%234 = fadd float %233, %229
%235 = fmul float %72, %145
%236 = fadd float %235, %230
%237 = fmul float %73, %145
%238 = fadd float %237, %231
%239 = fmul float %74, %145
%240 = fadd float %239, %232
%241 = fmul float %75, %146
%242 = fadd float %241, %234
%243 = fmul float %76, %146
%244 = fadd float %243, %236
%245 = fmul float %77, %146
%246 = fadd float %245, %238
%247 = fmul float %78, %146
%248 = fadd float %247, %240
%249 = fadd float %242, %79
%250 = fadd float %244, %80
%251 = fadd float %246, %81
%252 = fadd float %248, %82
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %99, float %100, float %101, float %102)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %225, float %226, float %227, float %228)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %249, float %250, float %251, float %252)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s12, s[0:3], 0xa ; C206010A
s_buffer_load_dword s13, s[0:3], 0xc ; C206810C
s_buffer_load_dword s14, s[0:3], 0xd ; C207010D
s_buffer_load_dword s15, s[0:3], 0xe ; C207810E
s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110
s_buffer_load_dword s17, s[0:3], 0x11 ; C2088111
s_buffer_load_dword s18, s[0:3], 0x12 ; C2090112
s_buffer_load_dword s19, s[0:3], 0x18 ; C2098118
s_buffer_load_dword s20, s[0:3], 0x19 ; C20A0119
s_buffer_load_dword s21, s[0:3], 0x1a ; C20A811A
s_buffer_load_dword s22, s[0:3], 0x1b ; C20B011B
s_buffer_load_dword s23, s[0:3], 0x1c ; C20B811C
s_buffer_load_dword s24, s[0:3], 0x20 ; C20C0120
s_buffer_load_dword s25, s[0:3], 0x24 ; C20C8124
s_buffer_load_dword s26, s[0:3], 0x28 ; C20D0128
s_buffer_load_dword s27, s[0:3], 0x2c ; C20D812C
s_buffer_load_dword s28, s[0:3], 0x2d ; C20E012D
s_buffer_load_dword s29, s[0:3], 0x2e ; C20E812E
s_buffer_load_dword s30, s[0:3], 0x30 ; C20F0130
s_buffer_load_dword s31, s[0:3], 0x31 ; C20F8131
s_buffer_load_dword s32, s[0:3], 0x32 ; C2100132
s_buffer_load_dword s33, s[0:3], 0x34 ; C2108134
s_buffer_load_dword s34, s[0:3], 0x35 ; C2110135
s_buffer_load_dword s35, s[0:3], 0x36 ; C2118136
s_buffer_load_dword s36, s[0:3], 0x38 ; C2120138
s_buffer_load_dword s37, s[0:3], 0x39 ; C2128139
s_buffer_load_dword s38, s[0:3], 0x3a ; C213013A
s_buffer_load_dword s39, s[0:3], 0x3c ; C213813C
s_buffer_load_dword s40, s[0:3], 0x3d ; C214013D
s_buffer_load_dword s41, s[0:3], 0x3e ; C214813E
s_buffer_load_dword s42, s[0:3], 0x40 ; C2150140
s_buffer_load_dword s43, s[0:3], 0x41 ; C2158141
s_buffer_load_dword s44, s[0:3], 0x42 ; C2160142
s_buffer_load_dword s45, s[0:3], 0x44 ; C2168144
s_buffer_load_dword s46, s[0:3], 0x45 ; C2170145
s_buffer_load_dword s47, s[0:3], 0x46 ; C2178146
s_buffer_load_dword s48, s[0:3], 0x48 ; C2180148
s_buffer_load_dword s49, s[0:3], 0x49 ; C2188149
s_buffer_load_dword s50, s[0:3], 0x4a ; C219014A
s_buffer_load_dword s51, s[0:3], 0x4c ; C219814C
s_buffer_load_dword s52, s[0:3], 0x4d ; C21A014D
s_buffer_load_dword s53, s[0:3], 0x4e ; C21A814E
s_buffer_load_dword s54, s[0:3], 0x50 ; C21B0150
s_buffer_load_dword s55, s[0:3], 0x54 ; C21B8154
s_buffer_load_dword s56, s[0:3], 0x55 ; C21C0155
s_buffer_load_dword s57, s[0:3], 0x56 ; C21C8156
s_buffer_load_dword s58, s[0:3], 0x58 ; C21D0158
s_buffer_load_dword s59, s[0:3], 0x59 ; C21D8159
s_buffer_load_dword s60, s[0:3], 0x5a ; C21E015A
s_buffer_load_dword s61, s[0:3], 0x5b ; C21E815B
s_buffer_load_dword s62, s[0:3], 0x5c ; C21F015C
s_buffer_load_dword s63, s[0:3], 0x5d ; C21F815D
s_buffer_load_dword s64, s[0:3], 0x5e ; C220015E
s_buffer_load_dword s65, s[0:3], 0x5f ; C220815F
s_buffer_load_dword s66, s[0:3], 0x60 ; C2210160
s_buffer_load_dword s67, s[0:3], 0x61 ; C2218161
s_buffer_load_dword s68, s[0:3], 0x62 ; C2220162
s_buffer_load_dword s69, s[0:3], 0x63 ; C2228163
s_buffer_load_dword s70, s[0:3], 0x64 ; C2230164
s_buffer_load_dword s71, s[0:3], 0x65 ; C2238165
s_buffer_load_dword s72, s[0:3], 0x66 ; C2240166
s_buffer_load_dword s0, s[0:3], 0x67 ; C2000167
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s7, v1 ; 10000207
v_mul_f32_e32 v1, s8, v2 ; 10020408
v_mul_f32_e32 v2, s9, v3 ; 10040609
v_mov_b32_e32 v3, s24 ; 7E060218
v_mov_b32_e32 v4, s24 ; 7E080218
v_mov_b32_e32 v5, s24 ; 7E0A0218
v_mov_b32_e32 v6, s24 ; 7E0C0218
v_mov_b32_e32 v16, s24 ; 7E200218
v_mov_b32_e32 v17, s24 ; 7E220218
v_mov_b32_e32 v18, s24 ; 7E240218
v_mov_b32_e32 v19, s24 ; 7E260218
v_mov_b32_e32 v20, s24 ; 7E280218
v_mov_b32_e32 v21, s24 ; 7E2A0218
v_mov_b32_e32 v22, s24 ; 7E2C0218
v_mov_b32_e32 v23, s24 ; 7E2E0218
v_mul_f32_e32 v23, s30, v23 ; 102E2E1E
v_mul_f32_e32 v3, s31, v3 ; 1006061F
v_mul_f32_e32 v4, s32, v4 ; 10080820
v_mul_f32_e32 v5, s33, v5 ; 100A0A21
v_mul_f32_e32 v6, s34, v6 ; 100C0C22
v_mul_f32_e32 v16, s35, v16 ; 10202023
v_mul_f32_e32 v17, s36, v17 ; 10222224
v_mul_f32_e32 v18, s37, v18 ; 10242425
v_mul_f32_e32 v19, s38, v19 ; 10262626
v_mul_f32_e32 v20, s39, v20 ; 10282827
v_mul_f32_e32 v21, s40, v21 ; 102A2A28
v_mul_f32_e32 v22, s41, v22 ; 102C2C29
v_mul_f32_e32 v3, v3, v13 ; 10061B03
v_mac_f32_e32 v3, v23, v12 ; 3E061917
v_mul_f32_e32 v6, v6, v13 ; 100C1B06
v_mac_f32_e32 v6, v5, v12 ; 3E0C1905
v_mul_f32_e32 v5, v18, v13 ; 100A1B12
v_mac_f32_e32 v5, v17, v12 ; 3E0A1911
v_mul_f32_e32 v13, v21, v13 ; 101A1B15
v_mac_f32_e32 v13, v20, v12 ; 3E1A1914
v_mac_f32_e32 v3, v4, v14 ; 3E061D04
v_mac_f32_e32 v6, v16, v14 ; 3E0C1D10
v_mac_f32_e32 v5, v19, v14 ; 3E0A1D13
v_mac_f32_e32 v13, v22, v14 ; 3E1A1D16
v_mac_f32_e32 v3, s25, v15 ; 3E061E19
v_mac_f32_e32 v6, s25, v15 ; 3E0C1E19
v_mac_f32_e32 v5, s25, v15 ; 3E0A1E19
v_mac_f32_e32 v13, s25, v15 ; 3E1A1E19
v_mul_f32_e32 v4, s10, v0 ; 1008000A
v_mul_f32_e32 v12, s11, v0 ; 1018000B
v_mul_f32_e32 v14, s12, v0 ; 101C000C
v_mac_f32_e32 v4, s13, v1 ; 3E08020D
v_mac_f32_e32 v12, s14, v1 ; 3E18020E
v_mac_f32_e32 v14, s15, v1 ; 3E1C020F
v_mac_f32_e32 v4, s16, v2 ; 3E080410
v_mac_f32_e32 v12, s17, v2 ; 3E180411
v_mac_f32_e32 v14, s18, v2 ; 3E1C0412
v_sub_f32_e32 v15, 1.0, v7 ; 081E0EF2
v_mul_f32_e32 v0, v0, v15 ; 10001F00
v_mul_f32_e32 v1, v1, v15 ; 10021F01
v_mul_f32_e32 v2, v2, v15 ; 10041F02
v_mac_f32_e32 v0, v4, v7 ; 3E000F04
v_mac_f32_e32 v1, v12, v7 ; 3E020F0C
v_mac_f32_e32 v2, v14, v7 ; 3E040F0E
v_add_f32_e64 v4, s55, s55 ; D2060004 00006E37
v_max_f32_e32 v3, 0, v3 ; 20060680
v_add_f32_e32 v3, s26, v3 ; 0606061A
v_mac_f32_e32 v4, s42, v3 ; 3E08062A
v_add_f32_e64 v7, s56, s56 ; D2060007 00007038
v_mac_f32_e32 v7, s43, v3 ; 3E0E062B
v_add_f32_e64 v12, s57, s57 ; D206000C 00007239
v_mac_f32_e32 v12, s44, v3 ; 3E18062C
v_max_f32_e32 v3, 0, v6 ; 20060C80
v_add_f32_e32 v3, s26, v3 ; 0606061A
v_mac_f32_e32 v4, s45, v3 ; 3E08062D
v_mac_f32_e32 v7, s46, v3 ; 3E0E062E
v_mac_f32_e32 v12, s47, v3 ; 3E18062F
v_max_f32_e32 v3, 0, v5 ; 20060A80
v_add_f32_e32 v3, s26, v3 ; 0606061A
v_mac_f32_e32 v4, s48, v3 ; 3E080630
v_mac_f32_e32 v7, s49, v3 ; 3E0E0631
v_mac_f32_e32 v12, s50, v3 ; 3E180632
v_max_f32_e32 v3, 0, v13 ; 20061A80
v_add_f32_e32 v3, s26, v3 ; 0606061A
v_mac_f32_e32 v4, s51, v3 ; 3E080633
v_mac_f32_e32 v7, s52, v3 ; 3E0E0634
v_mac_f32_e32 v12, s53, v3 ; 3E180635
v_mul_f32_e32 v3, s19, v0 ; 10060013
v_mac_f32_e32 v3, s20, v1 ; 3E060214
v_mac_f32_e32 v3, s21, v2 ; 3E060415
v_add_f32_e32 v3, s22, v3 ; 06060616
v_mad_f32 v5, -v3, s19, v0 ; D2820005 24002703
v_mad_f32 v6, -v3, s20, v1 ; D2820006 24042903
v_mad_f32 v3, -v3, s21, v2 ; D2820003 24082B03
v_sub_f32_e64 v13, 1.0, s23 ; D208000D 00002EF2
v_mul_f32_e32 v5, v5, v13 ; 100A1B05
v_mul_f32_e32 v6, v6, v13 ; 100C1B06
v_mul_f32_e32 v3, v3, v13 ; 10061B03
v_mac_f32_e32 v5, s23, v0 ; 3E0A0017
v_mac_f32_e32 v6, s23, v1 ; 3E0C0217
v_mac_f32_e32 v3, s23, v2 ; 3E060417
v_mul_f32_e32 v0, s58, v5 ; 10000A3A
v_mul_f32_e32 v1, s59, v5 ; 10020A3B
v_mul_f32_e32 v2, s60, v5 ; 10040A3C
v_mul_f32_e32 v5, s61, v5 ; 100A0A3D
v_mac_f32_e32 v0, s62, v6 ; 3E000C3E
v_mac_f32_e32 v1, s63, v6 ; 3E020C3F
v_mac_f32_e32 v2, s64, v6 ; 3E040C40
v_mac_f32_e32 v5, s65, v6 ; 3E0A0C41
v_mul_f32_e32 v4, s27, v4 ; 1008081B
v_mul_f32_e32 v6, s28, v7 ; 100C0E1C
v_mul_f32_e32 v7, s29, v12 ; 100E181D
v_mul_f32_e32 v4, s4, v4 ; 10080804
v_mul_f32_e32 v6, s5, v6 ; 100C0C05
v_mul_f32_e32 v7, s6, v7 ; 100E0E06
v_mac_f32_e32 v0, s66, v3 ; 3E000642
v_mac_f32_e32 v1, s67, v3 ; 3E020643
v_mac_f32_e32 v2, s68, v3 ; 3E040644
v_mul_f32_e64 v12, 0.5, s54 ; D210000C 00006CF0
v_mac_f32_e32 v5, s69, v3 ; 3E0A0645
v_add_f32_e32 v0, s70, v0 ; 06000046
v_add_f32_e32 v1, s71, v1 ; 06020247
v_add_f32_e32 v2, s72, v2 ; 06040448
v_add_f32_e32 v3, s0, v5 ; 06060A00
exp 15, 32, 0, 0, 0, v8, v9, v10, v11 ; F800020F 0B0A0908
exp 15, 33, 0, 0, 0, v4, v6, v7, v12 ; F800021F 0C070604
exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 80
VGPRS: 24
Code Size: 888 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[1]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MUL TEMP[1].xyz, TEMP[0].xyzz, IN[1].xyzz
3: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1].xxxx
4: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
5: KILL_IF -TEMP[0].xxxx
6: MOV TEMP[1].w, IMM[0].xxxx
7: MOV OUT[0], TEMP[1]
8: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0
%27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0
%29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%32 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%33 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%34 = bitcast float %29 to i32
%35 = bitcast float %30 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %26, <16 x i8> %28, i32 2)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = fmul float %39, %31
%44 = fmul float %40, %32
%45 = fmul float %41, %33
%46 = fcmp olt float %42, %24
%47 = select i1 %46, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %47)
%48 = call i32 @llvm.SI.packf16(float %43, float %44)
%49 = bitcast i32 %48 to float
%50 = call i32 @llvm.SI.packf16(float %45, float 1.000000e+00)
%51 = bitcast i32 %50 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_mov_b32 m0, s9 ; BEFC0309
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800F00 00230602
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v1, v4, v6 ; 10020D04
v_mul_f32_e32 v2, v5, v7 ; 10040F05
v_mul_f32_e32 v0, v0, v8 ; 10001100
v_cmp_gt_f32_e32 vcc, s0, v9 ; 7C081200
v_cndmask_b32_e64 v3, 0, -1.0, vcc ; D2000003 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 136 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL CONST[0..21]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000}
0: MOV TEMP[0].w, IN[4].wwww
1: MUL TEMP[1].xyz, IN[0].xyzz, CONST[8].xyzz
2: MUL TEMP[2].x, IN[2].wwww, IN[2].wwww
3: MOV TEMP[3].y, IMM[0].xxxx
4: MOV TEMP[3].x, TEMP[2].xxxx
5: MOV TEMP[3].z, TEMP[2].xxxx
6: MUL TEMP[2], CONST[9], TEMP[1].xxxx
7: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2]
8: MAD TEMP[2].xyz, CONST[11], TEMP[1].zzzz, TEMP[2]
9: LRP TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[1].xyzz
10: DP3 TEMP[2].x, CONST[13].xyzz, TEMP[1].xyzz
11: ADD TEMP[2].x, TEMP[2].xxxx, CONST[13].wwww
12: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[13].xyzz
13: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[2].xyzz
14: LRP TEMP[1].xyz, CONST[14].xxxx, TEMP[1].xyzz, TEMP[2].xyzz
15: MOV TEMP[2].xz, IMM[0].xxxx
16: MOV TEMP[2].y, CONST[15].xxxx
17: MUL TEMP[3].xyz, TEMP[2].zxyy, IN[1].yzxx
18: MAD TEMP[2].xyz, TEMP[2].yzxx, IN[1].zxyy, -TEMP[3].xyzz
19: MUL TEMP[3].xyz, IN[1].zxyy, TEMP[2].yzxx
20: MAD TEMP[3].xyz, IN[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz
21: MUL TEMP[4].xyz, IN[1].zxyy, TEMP[3].yzxx
22: MAD TEMP[4].xyz, IN[1].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz
23: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[2].xyzz
24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx
25: UIF TEMP[2].xxxx :0
26: MOV TEMP[0].w, IMM[0].zzzz
27: ELSE :0
28: MOV TEMP[0].w, IMM[0].yyyy
29: ENDIF
30: MAD TEMP[2].xy, IN[3].xyyy, CONST[16].xyyy, CONST[16].zwww
31: MAD TEMP[4].xy, IN[3].xyyy, CONST[17].xyyy, CONST[17].zwww
32: MOV TEMP[2].zw, TEMP[4].yyxy
33: MOV TEMP[4].x, CONST[4].xxxx
34: MOV TEMP[4].y, CONST[5].xxxx
35: MOV TEMP[4].z, CONST[6].xxxx
36: MOV TEMP[5].x, CONST[4].yyyy
37: MOV TEMP[5].y, CONST[5].yyyy
38: MOV TEMP[5].z, CONST[6].yyyy
39: MOV TEMP[6].x, CONST[4].zzzz
40: MOV TEMP[6].y, CONST[5].zzzz
41: MOV TEMP[6].z, CONST[6].zzzz
42: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx
43: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz
44: MAD TEMP[4].xyz, TEMP[6].xyzz, IN[1].zzzz, TEMP[4].xyzz
45: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
46: RSQ TEMP[5].x, TEMP[5].xxxx
47: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
48: MUL TEMP[5].xyz, CONST[0].xyzz, TEMP[3].xxxx
49: MAD TEMP[5].xyz, CONST[1].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
50: MAD TEMP[3].xyz, CONST[2].xyzz, TEMP[3].zzzz, TEMP[5].xyzz
51: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz
52: RSQ TEMP[5].x, TEMP[5].xxxx
53: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx
54: MUL TEMP[5].xyz, TEMP[4].zxyy, TEMP[3].yzxx
55: MAD TEMP[5].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[5].xyzz
56: MUL TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].wwww
57: MOV TEMP[5].x, TEMP[3].xxxx
58: MOV TEMP[5].y, TEMP[0].xxxx
59: MOV TEMP[5].z, TEMP[4].xxxx
60: MOV TEMP[6].y, TEMP[0].yyyy
61: MOV TEMP[6].z, TEMP[4].yyyy
62: MOV TEMP[7].x, TEMP[3].zzzz
63: MOV TEMP[7].y, TEMP[0].zzzz
64: MUL TEMP[0], CONST[18], TEMP[1].xxxx
65: MAD TEMP[0], CONST[19], TEMP[1].yyyy, TEMP[0]
66: MAD TEMP[0], CONST[20], TEMP[1].zzzz, TEMP[0]
67: ADD TEMP[0], TEMP[0], CONST[21]
68: MOV TEMP[1].xyz, TEMP[5].xyzx
69: MOV TEMP[1].w, TEMP[3].yyyy
70: MOV TEMP[3].xy, TEMP[6].yzyy
71: MOV TEMP[3].zw, TEMP[7].yyxy
72: MOV TEMP[4].x, TEMP[4].zzzz
73: MOV OUT[4], TEMP[4]
74: MOV OUT[1], TEMP[2]
75: MOV OUT[3], TEMP[3]
76: MOV OUT[0], TEMP[0]
77: MOV OUT[2], TEMP[1]
78: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0
%75 = add i32 %5, %7
%76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75)
%77 = extractelement <4 x float> %76, i32 0
%78 = extractelement <4 x float> %76, i32 1
%79 = extractelement <4 x float> %76, i32 2
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = add i32 %5, %7
%90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89)
%91 = extractelement <4 x float> %90, i32 3
%92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0
%94 = add i32 %5, %7
%95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %94)
%96 = extractelement <4 x float> %95, i32 0
%97 = extractelement <4 x float> %95, i32 1
%98 = fmul float %77, %31
%99 = fmul float %78, %32
%100 = fmul float %79, %33
%101 = fmul float %91, %91
%102 = fmul float %34, %98
%103 = fmul float %35, %98
%104 = fmul float %36, %98
%105 = fmul float %37, %99
%106 = fadd float %105, %102
%107 = fmul float %38, %99
%108 = fadd float %107, %103
%109 = fmul float %39, %99
%110 = fadd float %109, %104
%111 = fmul float %40, %100
%112 = fadd float %111, %106
%113 = fmul float %41, %100
%114 = fadd float %113, %108
%115 = fmul float %42, %100
%116 = fadd float %115, %110
%117 = call float @llvm.AMDGPU.lrp(float %101, float %112, float %98)
%118 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %114, float %99)
%119 = call float @llvm.AMDGPU.lrp(float %101, float %116, float %100)
%120 = fmul float %43, %117
%121 = fmul float %44, %118
%122 = fadd float %121, %120
%123 = fmul float %45, %119
%124 = fadd float %122, %123
%125 = fadd float %124, %46
%126 = fmul float %125, %43
%127 = fmul float %125, %44
%128 = fmul float %125, %45
%129 = fsub float %117, %126
%130 = fsub float %118, %127
%131 = fsub float %119, %128
%132 = call float @llvm.AMDGPU.lrp(float %47, float %117, float %129)
%133 = call float @llvm.AMDGPU.lrp(float %47, float %118, float %130)
%134 = call float @llvm.AMDGPU.lrp(float %47, float %119, float %131)
%135 = fmul float %85, 0.000000e+00
%136 = fmul float %86, 0.000000e+00
%137 = fmul float %48, %84
%138 = fmul float %48, %86
%139 = fsub float %138, %135
%140 = fmul float %84, 0.000000e+00
%141 = fsub float %140, %136
%142 = fmul float %85, 0.000000e+00
%143 = fsub float %142, %137
%144 = fmul float %86, %141
%145 = fmul float %84, %143
%146 = fmul float %85, %139
%147 = fmul float %85, %143
%148 = fsub float %147, %144
%149 = fmul float %86, %139
%150 = fsub float %149, %145
%151 = fmul float %84, %141
%152 = fsub float %151, %146
%153 = fmul float %86, %150
%154 = fmul float %84, %152
%155 = fmul float %85, %148
%156 = fmul float %85, %152
%157 = fsub float %156, %153
%158 = fmul float %86, %148
%159 = fsub float %158, %154
%160 = fmul float %84, %150
%161 = fsub float %160, %155
%162 = fmul float %157, %139
%163 = fmul float %159, %141
%164 = fadd float %163, %162
%165 = fmul float %161, %143
%166 = fadd float %164, %165
%167 = fcmp olt float %166, 0.000000e+00
%. = select i1 %167, float -1.000000e+00, float 1.000000e+00
%168 = fmul float %96, %49
%169 = fadd float %168, %51
%170 = fmul float %97, %50
%171 = fadd float %170, %52
%172 = fmul float %96, %53
%173 = fadd float %172, %55
%174 = fmul float %97, %54
%175 = fadd float %174, %56
%176 = fmul float %22, %84
%177 = fmul float %25, %84
%178 = fmul float %28, %84
%179 = fmul float %23, %85
%180 = fadd float %179, %176
%181 = fmul float %26, %85
%182 = fadd float %181, %177
%183 = fmul float %29, %85
%184 = fadd float %183, %178
%185 = fmul float %24, %86
%186 = fadd float %185, %180
%187 = fmul float %27, %86
%188 = fadd float %187, %182
%189 = fmul float %30, %86
%190 = fadd float %189, %184
%191 = fmul float %186, %186
%192 = fmul float %188, %188
%193 = fadd float %192, %191
%194 = fmul float %190, %190
%195 = fadd float %193, %194
%196 = call float @llvm.AMDGPU.rsq.clamped.f32(float %195)
%197 = fmul float %186, %196
%198 = fmul float %188, %196
%199 = fmul float %190, %196
%200 = fmul float %13, %148
%201 = fmul float %14, %148
%202 = fmul float %15, %148
%203 = fmul float %16, %150
%204 = fadd float %203, %200
%205 = fmul float %17, %150
%206 = fadd float %205, %201
%207 = fmul float %18, %150
%208 = fadd float %207, %202
%209 = fmul float %19, %152
%210 = fadd float %209, %204
%211 = fmul float %20, %152
%212 = fadd float %211, %206
%213 = fmul float %21, %152
%214 = fadd float %213, %208
%215 = fmul float %210, %210
%216 = fmul float %212, %212
%217 = fadd float %216, %215
%218 = fmul float %214, %214
%219 = fadd float %217, %218
%220 = call float @llvm.AMDGPU.rsq.clamped.f32(float %219)
%221 = fmul float %210, %220
%222 = fmul float %212, %220
%223 = fmul float %214, %220
%224 = fmul float %199, %222
%225 = fmul float %197, %223
%226 = fmul float %198, %221
%227 = fmul float %198, %223
%228 = fsub float %227, %224
%229 = fmul float %199, %221
%230 = fsub float %229, %225
%231 = fmul float %197, %222
%232 = fsub float %231, %226
%233 = fmul float %228, %.
%234 = fmul float %230, %.
%235 = fmul float %232, %.
%236 = fmul float %57, %132
%237 = fmul float %58, %132
%238 = fmul float %59, %132
%239 = fmul float %60, %132
%240 = fmul float %61, %133
%241 = fadd float %240, %236
%242 = fmul float %62, %133
%243 = fadd float %242, %237
%244 = fmul float %63, %133
%245 = fadd float %244, %238
%246 = fmul float %64, %133
%247 = fadd float %246, %239
%248 = fmul float %65, %134
%249 = fadd float %248, %241
%250 = fmul float %66, %134
%251 = fadd float %250, %243
%252 = fmul float %67, %134
%253 = fadd float %252, %245
%254 = fmul float %68, %134
%255 = fadd float %254, %247
%256 = fadd float %249, %69
%257 = fadd float %251, %70
%258 = fadd float %253, %71
%259 = fadd float %255, %72
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %169, float %171, float %173, float %175)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %221, float %233, float %197, float %222)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %234, float %198, float %223, float %235)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %198, float %199, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %256, float %257, float %258, float %259)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s9, s[44:47], 0x3c ; C204AD3C
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800
buffer_load_format_xyzw v[12:15], v0, s[16:19], 0 idxen ; E00C2000 80040C00
s_buffer_load_dword s48, s[44:47], 0x40 ; C2182D40
s_buffer_load_dword s49, s[44:47], 0x41 ; C218AD41
s_buffer_load_dword s8, s[44:47], 0x42 ; C2042D42
s_buffer_load_dword s10, s[44:47], 0x43 ; C2052D43
s_buffer_load_dword s0, s[44:47], 0x0 ; C2002D00
s_buffer_load_dword s1, s[44:47], 0x1 ; C200AD01
s_buffer_load_dword s2, s[44:47], 0x2 ; C2012D02
s_buffer_load_dword s3, s[44:47], 0x4 ; C201AD04
s_buffer_load_dword s4, s[44:47], 0x5 ; C2022D05
s_buffer_load_dword s5, s[44:47], 0x6 ; C202AD06
s_buffer_load_dword s6, s[44:47], 0x8 ; C2032D08
s_buffer_load_dword s7, s[44:47], 0x9 ; C203AD09
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s8 ; 7E000208
s_buffer_load_dword s8, s[44:47], 0xa ; C2042D0A
v_mov_b32_e32 v8, s10 ; 7E10020A
s_buffer_load_dword s50, s[44:47], 0x10 ; C2192D10
s_buffer_load_dword s51, s[44:47], 0x11 ; C219AD11
s_buffer_load_dword s52, s[44:47], 0x12 ; C21A2D12
s_buffer_load_dword s53, s[44:47], 0x14 ; C21AAD14
s_buffer_load_dword s54, s[44:47], 0x15 ; C21B2D15
s_buffer_load_dword s37, s[44:47], 0x16 ; C212AD16
s_buffer_load_dword s55, s[44:47], 0x18 ; C21BAD18
s_buffer_load_dword s56, s[44:47], 0x19 ; C21C2D19
s_buffer_load_dword s43, s[44:47], 0x1a ; C215AD1A
s_buffer_load_dword s57, s[44:47], 0x20 ; C21CAD20
s_buffer_load_dword s21, s[44:47], 0x21 ; C20AAD21
s_buffer_load_dword s26, s[44:47], 0x22 ; C20D2D22
s_buffer_load_dword s22, s[44:47], 0x24 ; C20B2D24
s_buffer_load_dword s23, s[44:47], 0x25 ; C20BAD25
s_buffer_load_dword s24, s[44:47], 0x26 ; C20C2D26
s_buffer_load_dword s28, s[44:47], 0x28 ; C20E2D28
s_buffer_load_dword s29, s[44:47], 0x29 ; C20EAD29
s_buffer_load_dword s30, s[44:47], 0x2a ; C20F2D2A
s_buffer_load_dword s31, s[44:47], 0x2c ; C20FAD2C
s_buffer_load_dword s32, s[44:47], 0x2d ; C2102D2D
s_buffer_load_dword s34, s[44:47], 0x2e ; C2112D2E
s_buffer_load_dword s10, s[44:47], 0x34 ; C2052D34
s_buffer_load_dword s12, s[44:47], 0x35 ; C2062D35
s_buffer_load_dword s14, s[44:47], 0x36 ; C2072D36
s_buffer_load_dword s19, s[44:47], 0x37 ; C209AD37
s_buffer_load_dword s11, s[44:47], 0x38 ; C205AD38
s_buffer_load_dword s58, s[44:47], 0x44 ; C21D2D44
s_buffer_load_dword s59, s[44:47], 0x45 ; C21DAD45
s_buffer_load_dword s13, s[44:47], 0x46 ; C206AD46
s_buffer_load_dword s15, s[44:47], 0x47 ; C207AD47
s_buffer_load_dword s33, s[44:47], 0x48 ; C210AD48
s_buffer_load_dword s35, s[44:47], 0x49 ; C211AD49
s_buffer_load_dword s36, s[44:47], 0x4a ; C2122D4A
s_buffer_load_dword s38, s[44:47], 0x4b ; C2132D4B
s_buffer_load_dword s39, s[44:47], 0x4c ; C213AD4C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e64 v9, 1.0, s11 ; D2080009 000016F2
s_buffer_load_dword s40, s[44:47], 0x4d ; C2142D4D
s_buffer_load_dword s41, s[44:47], 0x4e ; C214AD4E
v_mov_b32_e32 v10, s13 ; 7E14020D
s_buffer_load_dword s42, s[44:47], 0x4f ; C2152D4F
v_mov_b32_e32 v14, s15 ; 7E1C020F
s_buffer_load_dword s20, s[44:47], 0x50 ; C20A2D50
s_buffer_load_dword s25, s[44:47], 0x51 ; C20CAD51
s_buffer_load_dword s27, s[44:47], 0x52 ; C20DAD52
s_buffer_load_dword s13, s[44:47], 0x53 ; C206AD53
s_buffer_load_dword s15, s[44:47], 0x54 ; C207AD54
s_buffer_load_dword s16, s[44:47], 0x55 ; C2082D55
s_buffer_load_dword s17, s[44:47], 0x56 ; C208AD56
s_buffer_load_dword s18, s[44:47], 0x57 ; C2092D57
v_mul_f32_e32 v2, s57, v2 ; 10040439
v_mul_f32_e32 v15, s50, v5 ; 101E0A32
v_mac_f32_e32 v15, s51, v6 ; 3E1E0C33
v_mac_f32_e32 v0, s48, v12 ; 3E001830
v_mac_f32_e32 v8, s49, v13 ; 3E101A31
v_mac_f32_e32 v10, s58, v12 ; 3E14183A
v_mac_f32_e32 v14, s59, v13 ; 3E1C1A3B
v_mul_f32_e32 v12, s53, v5 ; 10180A35
v_mul_f32_e32 v13, s55, v5 ; 101A0A37
v_mac_f32_e32 v12, s54, v6 ; 3E180C36
v_mac_f32_e32 v13, s56, v6 ; 3E1A0C38
v_mac_f32_e32 v15, s52, v7 ; 3E1E0E34
exp 15, 32, 0, 0, 0, v0, v8, v10, v14 ; F800020F 0E0A0800
v_mac_f32_e32 v12, s37, v7 ; 3E180E25
v_mac_f32_e32 v13, s43, v7 ; 3E1A0E2B
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mul_f32_e32 v0, v1, v6 ; 10000D01
v_mac_f32_e32 v0, s9, v7 ; 3E000E09
v_mul_f32_e32 v8, s9, v5 ; 10100A09
v_mul_f32_e32 v1, v1, v7 ; 10020F01
v_mac_f32_e32 v1, 0, v5 ; 3E020A80
v_mad_f32 v8, 0, v6, -v8 ; D2820008 84220C80
v_mul_f32_e32 v10, v1, v7 ; 10140F01
v_mad_f32 v10, v6, v8, -v10 ; D282000A 842A1106
v_mul_f32_e32 v14, v8, v5 ; 101C0B08
v_mad_f32 v14, v7, v0, -v14 ; D282000E 843A0107
v_mul_f32_e32 v16, v0, v6 ; 10200D00
v_mad_f32 v16, v5, v1, -v16 ; D2820010 84420305
v_mul_f32_e32 v17, v14, v7 ; 10220F0E
v_mad_f32 v17, v6, v16, -v17 ; D2820011 84462106
v_mul_f32_e32 v18, v16, v5 ; 10240B10
v_mad_f32 v7, v7, v10, -v18 ; D2820007 844A1507
v_mul_f32_e32 v6, v10, v6 ; 100C0D0A
v_mad_f32 v5, v5, v14, -v6 ; D2820005 841A1D05
v_mul_f32_e32 v3, s21, v3 ; 10060615
v_mul_f32_e32 v4, s26, v4 ; 1008081A
v_mul_f32_e32 v6, v11, v11 ; 100C170B
v_mad_f32 v11, -v11, v11, 1.0 ; D282000B 23CA170B
v_mul_f32_e32 v18, s22, v2 ; 10240416
v_mac_f32_e32 v18, s28, v3 ; 3E24061C
v_mul_f32_e32 v19, s23, v2 ; 10260417
v_mac_f32_e32 v19, s29, v3 ; 3E26061D
v_mul_f32_e32 v20, s24, v2 ; 10280418
v_mac_f32_e32 v20, s30, v3 ; 3E28061E
v_mac_f32_e32 v18, s31, v4 ; 3E24081F
v_mac_f32_e32 v19, s32, v4 ; 3E260820
v_mac_f32_e32 v20, s34, v4 ; 3E280822
v_mul_f32_e32 v2, v2, v11 ; 10041702
v_mul_f32_e32 v4, v4, v11 ; 10081704
v_mac_f32_e32 v2, v18, v6 ; 3E040D12
v_mac_f32_e32 v4, v20, v6 ; 3E080D14
v_mul_f32_e32 v6, s0, v10 ; 100C1400
v_mul_f32_e32 v11, s1, v10 ; 10161401
v_mul_f32_e32 v10, s2, v10 ; 10141402
v_mac_f32_e32 v6, s3, v14 ; 3E0C1C03
v_mac_f32_e32 v11, s4, v14 ; 3E161C04
v_mac_f32_e32 v10, s5, v14 ; 3E141C05
v_mac_f32_e32 v6, s6, v16 ; 3E0C2006
v_mac_f32_e32 v11, s7, v16 ; 3E162007
v_mac_f32_e32 v10, s8, v16 ; 3E142008
v_mul_f32_e32 v0, v0, v17 ; 10002300
v_mac_f32_e32 v0, v1, v7 ; 3E000F01
v_mac_f32_e32 v0, v8, v5 ; 3E000B08
v_mac_f32_e32 v3, 0, v19 ; 3E062680
v_mul_f32_e32 v1, s10, v2 ; 1002040A
v_mac_f32_e32 v1, s12, v3 ; 3E02060C
v_mac_f32_e32 v1, s14, v4 ; 3E02080E
v_add_f32_e32 v1, s19, v1 ; 06020213
v_mad_f32 v5, -v1, s10, v2 ; D2820005 24081501
v_mad_f32 v7, -v1, s12, v3 ; D2820007 240C1901
v_mad_f32 v1, -v1, s14, v4 ; D2820001 24101D01
v_mul_f32_e32 v5, v5, v9 ; 100A1305
v_mul_f32_e32 v7, v7, v9 ; 100E1307
v_mul_f32_e32 v1, v1, v9 ; 10021301
v_mac_f32_e32 v5, s11, v2 ; 3E0A040B
v_mac_f32_e32 v7, s11, v3 ; 3E0E060B
v_mac_f32_e32 v1, s11, v4 ; 3E02080B
v_mul_f32_e32 v2, s33, v5 ; 10040A21
v_mul_f32_e32 v3, s35, v5 ; 10060A23
v_mul_f32_e32 v4, s36, v5 ; 10080A24
v_mul_f32_e32 v5, s38, v5 ; 100A0A26
v_mac_f32_e32 v2, s39, v7 ; 3E040E27
v_mac_f32_e32 v3, s40, v7 ; 3E060E28
v_mac_f32_e32 v4, s41, v7 ; 3E080E29
v_mac_f32_e32 v5, s42, v7 ; 3E0A0E2A
v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080
v_cndmask_b32_e64 v0, 1.0, -1.0, vcc ; D2000000 01A9E6F2
v_mul_f32_e32 v7, v15, v15 ; 100E1F0F
v_mac_f32_e32 v7, v12, v12 ; 3E0E190C
v_mac_f32_e32 v7, v13, v13 ; 3E0E1B0D
v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907
v_mul_f32_e32 v8, v6, v6 ; 10100D06
v_mac_f32_e32 v8, v11, v11 ; 3E10170B
v_mac_f32_e32 v8, v10, v10 ; 3E10150A
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v7, v15 ; 10121F07
v_mul_f32_e32 v12, v7, v12 ; 10181907
v_mul_f32_e32 v7, v7, v13 ; 100E1B07
v_mul_f32_e32 v6, v8, v6 ; 100C0D08
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mul_f32_e32 v8, v8, v10 ; 10101508
v_mac_f32_e32 v2, s20, v1 ; 3E040214
v_mac_f32_e32 v3, s25, v1 ; 3E060219
v_mac_f32_e32 v4, s27, v1 ; 3E08021B
v_mul_f32_e32 v10, v11, v7 ; 10140F0B
v_mad_f32 v10, v12, v8, -v10 ; D282000A 842A110C
v_mul_f32_e32 v10, v0, v10 ; 10141500
exp 15, 33, 0, 0, 0, v6, v10, v9, v11 ; F800021F 0B090A06
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v10, v6, v12 ; 10141906
v_mad_f32 v10, v9, v11, -v10 ; D282000A 842A1709
v_mul_f32_e32 v9, v8, v9 ; 10121308
v_mad_f32 v6, v7, v6, -v9 ; D2820006 84260D07
v_mul_f32_e32 v6, v0, v6 ; 100C0D00
v_mul_f32_e32 v0, v0, v10 ; 10001500
exp 15, 34, 0, 0, 0, v6, v12, v8, v0 ; F800022F 00080C06
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v0, 0 ; 7E000280
exp 15, 35, 0, 0, 0, v7, v12, v7, v0 ; F800023F 00070C07
v_mac_f32_e32 v5, s13, v1 ; 3E0A020D
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, s15, v2 ; 0600040F
v_add_f32_e32 v1, s16, v3 ; 06020610
v_add_f32_e32 v2, s17, v4 ; 06040811
v_add_f32_e32 v3, s18, v5 ; 06060A12
exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 24
Code Size: 936 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[3]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000}
IMM[1] FLT32 { 0.0078, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].wwww
1: MOV TEMP[0].yz, IN[2].yxyy
2: MOV TEMP[1].xy, IN[2].zwzz
3: MOV TEMP[1].z, IN[3].xxxx
4: MOV TEMP[2].xy, IN[0].zwww
5: TEX TEMP[2].yw, TEMP[2], SAMP[1], 2D
6: MAD TEMP[2].xy, TEMP[2].wyyy, IMM[0].xxxx, IMM[0].yyyy
7: DP2 TEMP[3].x, TEMP[2].xyyy, TEMP[2].xyyy
8: MOV_SAT TEMP[3].x, TEMP[3].xxxx
9: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx
10: SQRT TEMP[3].x, TEMP[3].xxxx
11: MOV TEMP[2].z, TEMP[3].xxxx
12: MOV TEMP[3].xy, IN[0].xyyy
13: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D
14: FSLT TEMP[3].x, TEMP[3].wwww, CONST[3].xxxx
15: AND TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz
16: KILL_IF -TEMP[3].xxxx
17: DP3 TEMP[3].x, IN[1].xyzz, TEMP[2].xyzz
18: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[2].xyzz
19: MOV TEMP[3].y, TEMP[0].xxxx
20: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[2].xyzz
21: MOV TEMP[3].z, TEMP[0].xxxx
22: MAD TEMP[0].xyz, TEMP[3].xyzz, IMM[0].wwww, IMM[0].wwww
23: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
24: MOV TEMP[0].w, TEMP[1].xxxx
25: MOV OUT[0], TEMP[0]
26: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0
%28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0
%30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)*
%32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0
%33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)*
%35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0
%36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%44 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%45 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%46 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%47 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%48 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%49 = bitcast float %38 to i32
%50 = bitcast float %39 to i32
%51 = insertelement <2 x i32> undef, i32 %49, i32 0
%52 = insertelement <2 x i32> %51, i32 %50, i32 1
%53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %32, <16 x i8> %35, i32 2)
%54 = extractelement <4 x float> %53, i32 1
%55 = extractelement <4 x float> %53, i32 3
%56 = fmul float %55, 2.000000e+00
%57 = fadd float %56, -1.000000e+00
%58 = fmul float %54, 2.000000e+00
%59 = fadd float %58, -1.000000e+00
%60 = fmul float %57, %57
%61 = fmul float %59, %59
%62 = fadd float %60, %61
%63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00)
%64 = fsub float 1.000000e+00, %63
%65 = call float @llvm.sqrt.f32(float %64)
%66 = bitcast float %36 to i32
%67 = bitcast float %37 to i32
%68 = insertelement <2 x i32> undef, i32 %66, i32 0
%69 = insertelement <2 x i32> %68, i32 %67, i32 1
%70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %27, <16 x i8> %29, i32 2)
%71 = extractelement <4 x float> %70, i32 3
%72 = fcmp olt float %71, %25
%73 = select i1 %72, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %73)
%74 = fmul float %40, %57
%75 = fmul float %41, %59
%76 = fadd float %75, %74
%77 = fmul float %42, %65
%78 = fadd float %76, %77
%79 = fmul float %43, %57
%80 = fmul float %44, %59
%81 = fadd float %80, %79
%82 = fmul float %45, %65
%83 = fadd float %81, %82
%84 = fmul float %46, %57
%85 = fmul float %47, %59
%86 = fadd float %85, %84
%87 = fmul float %48, %65
%88 = fadd float %86, %87
%89 = fmul float %78, 5.000000e-01
%90 = fadd float %89, 5.000000e-01
%91 = fmul float %83, 5.000000e-01
%92 = fadd float %91, 5.000000e-01
%93 = fmul float %88, 5.000000e-01
%94 = fadd float %93, 5.000000e-01
%95 = fmul float %24, 7.812500e-03
%96 = call i32 @llvm.SI.packf16(float %90, float %92)
%97 = bitcast i32 %96 to float
%98 = call i32 @llvm.SI.packf16(float %94, float %95)
%99 = bitcast i32 %98 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %97, float %99, float %97, float %99)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700
v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901
v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00
v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01
v_interp_p1_f32 v13, v0, 3, 2, [m0] ; C8340B00
v_interp_p2_f32 v13, [v13], v1, 3, 2, [m0] ; C8350B01
v_interp_p1_f32 v0, v0, 0, 3, [m0] ; C8000C00
v_interp_p2_f32 v0, [v0], v1, 0, 3, [m0] ; C8010C01
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[4:5], 10, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[12:15] ; F0800A00 00640404
s_buffer_load_dword s4, s[0:3], 0xc ; C202010C
image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800800 00460102
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
s_waitcnt vmcnt(1) ; BF8C0771
v_mad_f32 v2, 2.0, v5, -1.0 ; D2820002 03CE0AF4
v_mad_f32 v3, 2.0, v4, -1.0 ; D2820003 03CE08F4
v_mul_f32_e32 v4, v3, v3 ; 10080703
v_mac_f32_e32 v4, v2, v2 ; 3E080502
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_sub_f32_e32 v4, 1.0, v4 ; 080808F2
v_sqrt_f32_e32 v4, v4 ; 7E086704
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_gt_f32_e32 vcc, s4, v1 ; 7C080204
v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280
v_mul_f32_e32 v1, v2, v6 ; 10020D02
v_mac_f32_e32 v1, v3, v7 ; 3E020F03
v_mul_f32_e32 v5, v2, v9 ; 100A1302
v_mac_f32_e32 v5, v3, v10 ; 3E0A1503
v_mul_f32_e32 v2, v2, v12 ; 10041902
v_mac_f32_e32 v2, v3, v13 ; 3E041B03
v_mac_f32_e32 v1, v4, v8 ; 3E021104
v_mac_f32_e32 v5, v4, v11 ; 3E0A1704
v_mac_f32_e32 v2, v4, v0 ; 3E040104
v_mov_b32_e32 v0, 0x3c000000 ; 7E0002FF 3C000000
v_mul_f32_e32 v0, s0, v0 ; 10000000
v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0
v_mad_f32 v3, 0.5, v5, 0.5 ; D2820003 03C20AF0
v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701
v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 316 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL CONST[0..21]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000}
0: MOV TEMP[0].w, IN[4].wwww
1: MUL TEMP[1].xyz, IN[0].xyzz, CONST[8].xyzz
2: MUL TEMP[2].x, IN[2].wwww, IN[2].wwww
3: MOV TEMP[3].y, IMM[0].xxxx
4: MOV TEMP[3].x, TEMP[2].xxxx
5: MOV TEMP[3].z, TEMP[2].xxxx
6: MUL TEMP[2], CONST[9], TEMP[1].xxxx
7: MAD TEMP[2], CONST[10], TEMP[1].yyyy, TEMP[2]
8: MAD TEMP[2].xyz, CONST[11], TEMP[1].zzzz, TEMP[2]
9: LRP TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[1].xyzz
10: DP3 TEMP[2].x, CONST[13].xyzz, TEMP[1].xyzz
11: ADD TEMP[2].x, TEMP[2].xxxx, CONST[13].wwww
12: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[13].xyzz
13: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[2].xyzz
14: LRP TEMP[1].xyz, CONST[14].xxxx, TEMP[1].xyzz, TEMP[2].xyzz
15: MOV TEMP[2].xz, IMM[0].xxxx
16: MOV TEMP[2].y, CONST[15].xxxx
17: MUL TEMP[3].xyz, TEMP[2].zxyy, IN[1].yzxx
18: MAD TEMP[2].xyz, TEMP[2].yzxx, IN[1].zxyy, -TEMP[3].xyzz
19: MUL TEMP[3].xyz, IN[1].zxyy, TEMP[2].yzxx
20: MAD TEMP[3].xyz, IN[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz
21: MUL TEMP[4].xyz, IN[1].zxyy, TEMP[3].yzxx
22: MAD TEMP[4].xyz, IN[1].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz
23: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[2].xyzz
24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx
25: UIF TEMP[2].xxxx :0
26: MOV TEMP[0].w, IMM[0].zzzz
27: ELSE :0
28: MOV TEMP[0].w, IMM[0].yyyy
29: ENDIF
30: MAD TEMP[2].xy, IN[3].xyyy, CONST[16].xyyy, CONST[16].zwww
31: MAD TEMP[4].xy, IN[3].xyyy, CONST[17].xyyy, CONST[17].zwww
32: MOV TEMP[2].zw, TEMP[4].yyxy
33: MOV TEMP[4].x, CONST[4].xxxx
34: MOV TEMP[4].y, CONST[5].xxxx
35: MOV TEMP[4].z, CONST[6].xxxx
36: MOV TEMP[5].x, CONST[4].yyyy
37: MOV TEMP[5].y, CONST[5].yyyy
38: MOV TEMP[5].z, CONST[6].yyyy
39: MOV TEMP[6].x, CONST[4].zzzz
40: MOV TEMP[6].y, CONST[5].zzzz
41: MOV TEMP[6].z, CONST[6].zzzz
42: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx
43: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz
44: MAD TEMP[4].xyz, TEMP[6].xyzz, IN[1].zzzz, TEMP[4].xyzz
45: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
46: RSQ TEMP[5].x, TEMP[5].xxxx
47: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
48: MUL TEMP[5].xyz, CONST[0].xyzz, TEMP[3].xxxx
49: MAD TEMP[5].xyz, CONST[1].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
50: MAD TEMP[3].xyz, CONST[2].xyzz, TEMP[3].zzzz, TEMP[5].xyzz
51: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[3].xyzz
52: RSQ TEMP[5].x, TEMP[5].xxxx
53: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xxxx
54: MUL TEMP[5].xyz, TEMP[4].zxyy, TEMP[3].yzxx
55: MAD TEMP[5].xyz, TEMP[4].yzxx, TEMP[3].zxyy, -TEMP[5].xyzz
56: MUL TEMP[0].xyz, TEMP[5].xyzz, TEMP[0].wwww
57: MOV TEMP[5].x, TEMP[3].xxxx
58: MOV TEMP[5].y, TEMP[0].xxxx
59: MOV TEMP[5].z, TEMP[4].xxxx
60: MOV TEMP[6].y, TEMP[0].yyyy
61: MOV TEMP[6].z, TEMP[4].yyyy
62: MOV TEMP[7].x, TEMP[3].zzzz
63: MOV TEMP[7].y, TEMP[0].zzzz
64: MUL TEMP[0], CONST[18], TEMP[1].xxxx
65: MAD TEMP[0], CONST[19], TEMP[1].yyyy, TEMP[0]
66: MAD TEMP[0], CONST[20], TEMP[1].zzzz, TEMP[0]
67: ADD TEMP[0], TEMP[0], CONST[21]
68: MOV TEMP[1].xyz, TEMP[5].xyzx
69: MOV TEMP[1].w, TEMP[3].yyyy
70: MOV TEMP[3].xy, TEMP[6].yzyy
71: MOV TEMP[3].zw, TEMP[7].yyxy
72: MOV TEMP[4].x, TEMP[4].zzzz
73: MOV OUT[4], TEMP[4]
74: MOV OUT[1], TEMP[2]
75: MOV OUT[3], TEMP[3]
76: MOV OUT[0], TEMP[0]
77: MOV OUT[2], TEMP[1]
78: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%73 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%74 = load <16 x i8>, <16 x i8> addrspace(2)* %73, align 16, !tbaa !0
%75 = add i32 %5, %7
%76 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %74, i32 0, i32 %75)
%77 = extractelement <4 x float> %76, i32 0
%78 = extractelement <4 x float> %76, i32 1
%79 = extractelement <4 x float> %76, i32 2
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = add i32 %5, %7
%90 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %88, i32 0, i32 %89)
%91 = extractelement <4 x float> %90, i32 3
%92 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0
%94 = add i32 %5, %7
%95 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %93, i32 0, i32 %94)
%96 = extractelement <4 x float> %95, i32 0
%97 = extractelement <4 x float> %95, i32 1
%98 = fmul float %77, %31
%99 = fmul float %78, %32
%100 = fmul float %79, %33
%101 = fmul float %91, %91
%102 = fmul float %34, %98
%103 = fmul float %35, %98
%104 = fmul float %36, %98
%105 = fmul float %37, %99
%106 = fadd float %105, %102
%107 = fmul float %38, %99
%108 = fadd float %107, %103
%109 = fmul float %39, %99
%110 = fadd float %109, %104
%111 = fmul float %40, %100
%112 = fadd float %111, %106
%113 = fmul float %41, %100
%114 = fadd float %113, %108
%115 = fmul float %42, %100
%116 = fadd float %115, %110
%117 = call float @llvm.AMDGPU.lrp(float %101, float %112, float %98)
%118 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %114, float %99)
%119 = call float @llvm.AMDGPU.lrp(float %101, float %116, float %100)
%120 = fmul float %43, %117
%121 = fmul float %44, %118
%122 = fadd float %121, %120
%123 = fmul float %45, %119
%124 = fadd float %122, %123
%125 = fadd float %124, %46
%126 = fmul float %125, %43
%127 = fmul float %125, %44
%128 = fmul float %125, %45
%129 = fsub float %117, %126
%130 = fsub float %118, %127
%131 = fsub float %119, %128
%132 = call float @llvm.AMDGPU.lrp(float %47, float %117, float %129)
%133 = call float @llvm.AMDGPU.lrp(float %47, float %118, float %130)
%134 = call float @llvm.AMDGPU.lrp(float %47, float %119, float %131)
%135 = fmul float %85, 0.000000e+00
%136 = fmul float %86, 0.000000e+00
%137 = fmul float %48, %84
%138 = fmul float %48, %86
%139 = fsub float %138, %135
%140 = fmul float %84, 0.000000e+00
%141 = fsub float %140, %136
%142 = fmul float %85, 0.000000e+00
%143 = fsub float %142, %137
%144 = fmul float %86, %141
%145 = fmul float %84, %143
%146 = fmul float %85, %139
%147 = fmul float %85, %143
%148 = fsub float %147, %144
%149 = fmul float %86, %139
%150 = fsub float %149, %145
%151 = fmul float %84, %141
%152 = fsub float %151, %146
%153 = fmul float %86, %150
%154 = fmul float %84, %152
%155 = fmul float %85, %148
%156 = fmul float %85, %152
%157 = fsub float %156, %153
%158 = fmul float %86, %148
%159 = fsub float %158, %154
%160 = fmul float %84, %150
%161 = fsub float %160, %155
%162 = fmul float %157, %139
%163 = fmul float %159, %141
%164 = fadd float %163, %162
%165 = fmul float %161, %143
%166 = fadd float %164, %165
%167 = fcmp olt float %166, 0.000000e+00
%. = select i1 %167, float -1.000000e+00, float 1.000000e+00
%168 = fmul float %96, %49
%169 = fadd float %168, %51
%170 = fmul float %97, %50
%171 = fadd float %170, %52
%172 = fmul float %96, %53
%173 = fadd float %172, %55
%174 = fmul float %97, %54
%175 = fadd float %174, %56
%176 = fmul float %22, %84
%177 = fmul float %25, %84
%178 = fmul float %28, %84
%179 = fmul float %23, %85
%180 = fadd float %179, %176
%181 = fmul float %26, %85
%182 = fadd float %181, %177
%183 = fmul float %29, %85
%184 = fadd float %183, %178
%185 = fmul float %24, %86
%186 = fadd float %185, %180
%187 = fmul float %27, %86
%188 = fadd float %187, %182
%189 = fmul float %30, %86
%190 = fadd float %189, %184
%191 = fmul float %186, %186
%192 = fmul float %188, %188
%193 = fadd float %192, %191
%194 = fmul float %190, %190
%195 = fadd float %193, %194
%196 = call float @llvm.AMDGPU.rsq.clamped.f32(float %195)
%197 = fmul float %186, %196
%198 = fmul float %188, %196
%199 = fmul float %190, %196
%200 = fmul float %13, %148
%201 = fmul float %14, %148
%202 = fmul float %15, %148
%203 = fmul float %16, %150
%204 = fadd float %203, %200
%205 = fmul float %17, %150
%206 = fadd float %205, %201
%207 = fmul float %18, %150
%208 = fadd float %207, %202
%209 = fmul float %19, %152
%210 = fadd float %209, %204
%211 = fmul float %20, %152
%212 = fadd float %211, %206
%213 = fmul float %21, %152
%214 = fadd float %213, %208
%215 = fmul float %210, %210
%216 = fmul float %212, %212
%217 = fadd float %216, %215
%218 = fmul float %214, %214
%219 = fadd float %217, %218
%220 = call float @llvm.AMDGPU.rsq.clamped.f32(float %219)
%221 = fmul float %210, %220
%222 = fmul float %212, %220
%223 = fmul float %214, %220
%224 = fmul float %199, %222
%225 = fmul float %197, %223
%226 = fmul float %198, %221
%227 = fmul float %198, %223
%228 = fsub float %227, %224
%229 = fmul float %199, %221
%230 = fsub float %229, %225
%231 = fmul float %197, %222
%232 = fsub float %231, %226
%233 = fmul float %228, %.
%234 = fmul float %230, %.
%235 = fmul float %232, %.
%236 = fmul float %57, %132
%237 = fmul float %58, %132
%238 = fmul float %59, %132
%239 = fmul float %60, %132
%240 = fmul float %61, %133
%241 = fadd float %240, %236
%242 = fmul float %62, %133
%243 = fadd float %242, %237
%244 = fmul float %63, %133
%245 = fadd float %244, %238
%246 = fmul float %64, %133
%247 = fadd float %246, %239
%248 = fmul float %65, %134
%249 = fadd float %248, %241
%250 = fmul float %66, %134
%251 = fadd float %250, %243
%252 = fmul float %67, %134
%253 = fadd float %252, %245
%254 = fmul float %68, %134
%255 = fadd float %254, %247
%256 = fadd float %249, %69
%257 = fadd float %251, %70
%258 = fadd float %253, %71
%259 = fadd float %255, %72
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %169, float %171, float %173, float %175)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %221, float %233, float %197, float %222)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %234, float %198, float %223, float %235)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %198, float %199, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %256, float %257, float %258, float %259)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0x80000000 ; 7E0202FF 80000000
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s9, s[44:47], 0x3c ; C204AD3C
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800
buffer_load_format_xyzw v[12:15], v0, s[16:19], 0 idxen ; E00C2000 80040C00
s_buffer_load_dword s48, s[44:47], 0x40 ; C2182D40
s_buffer_load_dword s49, s[44:47], 0x41 ; C218AD41
s_buffer_load_dword s8, s[44:47], 0x42 ; C2042D42
s_buffer_load_dword s10, s[44:47], 0x43 ; C2052D43
s_buffer_load_dword s0, s[44:47], 0x0 ; C2002D00
s_buffer_load_dword s1, s[44:47], 0x1 ; C200AD01
s_buffer_load_dword s2, s[44:47], 0x2 ; C2012D02
s_buffer_load_dword s3, s[44:47], 0x4 ; C201AD04
s_buffer_load_dword s4, s[44:47], 0x5 ; C2022D05
s_buffer_load_dword s5, s[44:47], 0x6 ; C202AD06
s_buffer_load_dword s6, s[44:47], 0x8 ; C2032D08
s_buffer_load_dword s7, s[44:47], 0x9 ; C203AD09
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s8 ; 7E000208
s_buffer_load_dword s8, s[44:47], 0xa ; C2042D0A
v_mov_b32_e32 v8, s10 ; 7E10020A
s_buffer_load_dword s50, s[44:47], 0x10 ; C2192D10
s_buffer_load_dword s51, s[44:47], 0x11 ; C219AD11
s_buffer_load_dword s52, s[44:47], 0x12 ; C21A2D12
s_buffer_load_dword s53, s[44:47], 0x14 ; C21AAD14
s_buffer_load_dword s54, s[44:47], 0x15 ; C21B2D15
s_buffer_load_dword s37, s[44:47], 0x16 ; C212AD16
s_buffer_load_dword s55, s[44:47], 0x18 ; C21BAD18
s_buffer_load_dword s56, s[44:47], 0x19 ; C21C2D19
s_buffer_load_dword s43, s[44:47], 0x1a ; C215AD1A
s_buffer_load_dword s57, s[44:47], 0x20 ; C21CAD20
s_buffer_load_dword s21, s[44:47], 0x21 ; C20AAD21
s_buffer_load_dword s26, s[44:47], 0x22 ; C20D2D22
s_buffer_load_dword s22, s[44:47], 0x24 ; C20B2D24
s_buffer_load_dword s23, s[44:47], 0x25 ; C20BAD25
s_buffer_load_dword s24, s[44:47], 0x26 ; C20C2D26
s_buffer_load_dword s28, s[44:47], 0x28 ; C20E2D28
s_buffer_load_dword s29, s[44:47], 0x29 ; C20EAD29
s_buffer_load_dword s30, s[44:47], 0x2a ; C20F2D2A
s_buffer_load_dword s31, s[44:47], 0x2c ; C20FAD2C
s_buffer_load_dword s32, s[44:47], 0x2d ; C2102D2D
s_buffer_load_dword s34, s[44:47], 0x2e ; C2112D2E
s_buffer_load_dword s10, s[44:47], 0x34 ; C2052D34
s_buffer_load_dword s12, s[44:47], 0x35 ; C2062D35
s_buffer_load_dword s14, s[44:47], 0x36 ; C2072D36
s_buffer_load_dword s19, s[44:47], 0x37 ; C209AD37
s_buffer_load_dword s11, s[44:47], 0x38 ; C205AD38
s_buffer_load_dword s58, s[44:47], 0x44 ; C21D2D44
s_buffer_load_dword s59, s[44:47], 0x45 ; C21DAD45
s_buffer_load_dword s13, s[44:47], 0x46 ; C206AD46
s_buffer_load_dword s15, s[44:47], 0x47 ; C207AD47
s_buffer_load_dword s33, s[44:47], 0x48 ; C210AD48
s_buffer_load_dword s35, s[44:47], 0x49 ; C211AD49
s_buffer_load_dword s36, s[44:47], 0x4a ; C2122D4A
s_buffer_load_dword s38, s[44:47], 0x4b ; C2132D4B
s_buffer_load_dword s39, s[44:47], 0x4c ; C213AD4C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e64 v9, 1.0, s11 ; D2080009 000016F2
s_buffer_load_dword s40, s[44:47], 0x4d ; C2142D4D
s_buffer_load_dword s41, s[44:47], 0x4e ; C214AD4E
v_mov_b32_e32 v10, s13 ; 7E14020D
s_buffer_load_dword s42, s[44:47], 0x4f ; C2152D4F
v_mov_b32_e32 v14, s15 ; 7E1C020F
s_buffer_load_dword s20, s[44:47], 0x50 ; C20A2D50
s_buffer_load_dword s25, s[44:47], 0x51 ; C20CAD51
s_buffer_load_dword s27, s[44:47], 0x52 ; C20DAD52
s_buffer_load_dword s13, s[44:47], 0x53 ; C206AD53
s_buffer_load_dword s15, s[44:47], 0x54 ; C207AD54
s_buffer_load_dword s16, s[44:47], 0x55 ; C2082D55
s_buffer_load_dword s17, s[44:47], 0x56 ; C208AD56
s_buffer_load_dword s18, s[44:47], 0x57 ; C2092D57
v_mul_f32_e32 v2, s57, v2 ; 10040439
v_mul_f32_e32 v15, s50, v5 ; 101E0A32
v_mac_f32_e32 v15, s51, v6 ; 3E1E0C33
v_mac_f32_e32 v0, s48, v12 ; 3E001830
v_mac_f32_e32 v8, s49, v13 ; 3E101A31
v_mac_f32_e32 v10, s58, v12 ; 3E14183A
v_mac_f32_e32 v14, s59, v13 ; 3E1C1A3B
v_mul_f32_e32 v12, s53, v5 ; 10180A35
v_mul_f32_e32 v13, s55, v5 ; 101A0A37
v_mac_f32_e32 v12, s54, v6 ; 3E180C36
v_mac_f32_e32 v13, s56, v6 ; 3E1A0C38
v_mac_f32_e32 v15, s52, v7 ; 3E1E0E34
exp 15, 32, 0, 0, 0, v0, v8, v10, v14 ; F800020F 0E0A0800
v_mac_f32_e32 v12, s37, v7 ; 3E180E25
v_mac_f32_e32 v13, s43, v7 ; 3E1A0E2B
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mul_f32_e32 v0, v1, v6 ; 10000D01
v_mac_f32_e32 v0, s9, v7 ; 3E000E09
v_mul_f32_e32 v8, s9, v5 ; 10100A09
v_mul_f32_e32 v1, v1, v7 ; 10020F01
v_mac_f32_e32 v1, 0, v5 ; 3E020A80
v_mad_f32 v8, 0, v6, -v8 ; D2820008 84220C80
v_mul_f32_e32 v10, v1, v7 ; 10140F01
v_mad_f32 v10, v6, v8, -v10 ; D282000A 842A1106
v_mul_f32_e32 v14, v8, v5 ; 101C0B08
v_mad_f32 v14, v7, v0, -v14 ; D282000E 843A0107
v_mul_f32_e32 v16, v0, v6 ; 10200D00
v_mad_f32 v16, v5, v1, -v16 ; D2820010 84420305
v_mul_f32_e32 v17, v14, v7 ; 10220F0E
v_mad_f32 v17, v6, v16, -v17 ; D2820011 84462106
v_mul_f32_e32 v18, v16, v5 ; 10240B10
v_mad_f32 v7, v7, v10, -v18 ; D2820007 844A1507
v_mul_f32_e32 v6, v10, v6 ; 100C0D0A
v_mad_f32 v5, v5, v14, -v6 ; D2820005 841A1D05
v_mul_f32_e32 v3, s21, v3 ; 10060615
v_mul_f32_e32 v4, s26, v4 ; 1008081A
v_mul_f32_e32 v6, v11, v11 ; 100C170B
v_mad_f32 v11, -v11, v11, 1.0 ; D282000B 23CA170B
v_mul_f32_e32 v18, s22, v2 ; 10240416
v_mac_f32_e32 v18, s28, v3 ; 3E24061C
v_mul_f32_e32 v19, s23, v2 ; 10260417
v_mac_f32_e32 v19, s29, v3 ; 3E26061D
v_mul_f32_e32 v20, s24, v2 ; 10280418
v_mac_f32_e32 v20, s30, v3 ; 3E28061E
v_mac_f32_e32 v18, s31, v4 ; 3E24081F
v_mac_f32_e32 v19, s32, v4 ; 3E260820
v_mac_f32_e32 v20, s34, v4 ; 3E280822
v_mul_f32_e32 v2, v2, v11 ; 10041702
v_mul_f32_e32 v4, v4, v11 ; 10081704
v_mac_f32_e32 v2, v18, v6 ; 3E040D12
v_mac_f32_e32 v4, v20, v6 ; 3E080D14
v_mul_f32_e32 v6, s0, v10 ; 100C1400
v_mul_f32_e32 v11, s1, v10 ; 10161401
v_mul_f32_e32 v10, s2, v10 ; 10141402
v_mac_f32_e32 v6, s3, v14 ; 3E0C1C03
v_mac_f32_e32 v11, s4, v14 ; 3E161C04
v_mac_f32_e32 v10, s5, v14 ; 3E141C05
v_mac_f32_e32 v6, s6, v16 ; 3E0C2006
v_mac_f32_e32 v11, s7, v16 ; 3E162007
v_mac_f32_e32 v10, s8, v16 ; 3E142008
v_mul_f32_e32 v0, v0, v17 ; 10002300
v_mac_f32_e32 v0, v1, v7 ; 3E000F01
v_mac_f32_e32 v0, v8, v5 ; 3E000B08
v_mac_f32_e32 v3, 0, v19 ; 3E062680
v_mul_f32_e32 v1, s10, v2 ; 1002040A
v_mac_f32_e32 v1, s12, v3 ; 3E02060C
v_mac_f32_e32 v1, s14, v4 ; 3E02080E
v_add_f32_e32 v1, s19, v1 ; 06020213
v_mad_f32 v5, -v1, s10, v2 ; D2820005 24081501
v_mad_f32 v7, -v1, s12, v3 ; D2820007 240C1901
v_mad_f32 v1, -v1, s14, v4 ; D2820001 24101D01
v_mul_f32_e32 v5, v5, v9 ; 100A1305
v_mul_f32_e32 v7, v7, v9 ; 100E1307
v_mul_f32_e32 v1, v1, v9 ; 10021301
v_mac_f32_e32 v5, s11, v2 ; 3E0A040B
v_mac_f32_e32 v7, s11, v3 ; 3E0E060B
v_mac_f32_e32 v1, s11, v4 ; 3E02080B
v_mul_f32_e32 v2, s33, v5 ; 10040A21
v_mul_f32_e32 v3, s35, v5 ; 10060A23
v_mul_f32_e32 v4, s36, v5 ; 10080A24
v_mul_f32_e32 v5, s38, v5 ; 100A0A26
v_mac_f32_e32 v2, s39, v7 ; 3E040E27
v_mac_f32_e32 v3, s40, v7 ; 3E060E28
v_mac_f32_e32 v4, s41, v7 ; 3E080E29
v_mac_f32_e32 v5, s42, v7 ; 3E0A0E2A
v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080
v_cndmask_b32_e64 v0, 1.0, -1.0, vcc ; D2000000 01A9E6F2
v_mul_f32_e32 v7, v15, v15 ; 100E1F0F
v_mac_f32_e32 v7, v12, v12 ; 3E0E190C
v_mac_f32_e32 v7, v13, v13 ; 3E0E1B0D
v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907
v_mul_f32_e32 v8, v6, v6 ; 10100D06
v_mac_f32_e32 v8, v11, v11 ; 3E10170B
v_mac_f32_e32 v8, v10, v10 ; 3E10150A
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v7, v15 ; 10121F07
v_mul_f32_e32 v12, v7, v12 ; 10181907
v_mul_f32_e32 v7, v7, v13 ; 100E1B07
v_mul_f32_e32 v6, v8, v6 ; 100C0D08
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mul_f32_e32 v8, v8, v10 ; 10101508
v_mac_f32_e32 v2, s20, v1 ; 3E040214
v_mac_f32_e32 v3, s25, v1 ; 3E060219
v_mac_f32_e32 v4, s27, v1 ; 3E08021B
v_mul_f32_e32 v10, v11, v7 ; 10140F0B
v_mad_f32 v10, v12, v8, -v10 ; D282000A 842A110C
v_mul_f32_e32 v10, v0, v10 ; 10141500
exp 15, 33, 0, 0, 0, v6, v10, v9, v11 ; F800021F 0B090A06
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v10, v6, v12 ; 10141906
v_mad_f32 v10, v9, v11, -v10 ; D282000A 842A1709
v_mul_f32_e32 v9, v8, v9 ; 10121308
v_mad_f32 v6, v7, v6, -v9 ; D2820006 84260D07
v_mul_f32_e32 v6, v0, v6 ; 100C0D00
v_mul_f32_e32 v0, v0, v10 ; 10001500
exp 15, 34, 0, 0, 0, v6, v12, v8, v0 ; F800022F 00080C06
s_waitcnt expcnt(0) ; BF8C070F
v_mov_b32_e32 v0, 0 ; 7E000280
exp 15, 35, 0, 0, 0, v7, v12, v7, v0 ; F800023F 00070C07
v_mac_f32_e32 v5, s13, v1 ; 3E0A020D
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, s15, v2 ; 0600040F
v_add_f32_e32 v1, s16, v3 ; 06020610
v_add_f32_e32 v2, s17, v4 ; 06040811
v_add_f32_e32 v3, s18, v5 ; 06060A12
exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 24
Code Size: 936 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[3]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.5000}
IMM[1] FLT32 { 0.0078, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].wwww
1: MOV TEMP[0].yz, IN[2].yxyy
2: MOV TEMP[1].xy, IN[2].zwzz
3: MOV TEMP[1].z, IN[3].xxxx
4: MOV TEMP[2].xy, IN[0].zwww
5: TEX TEMP[2].yw, TEMP[2], SAMP[1], 2D
6: MAD TEMP[2].xy, TEMP[2].wyyy, IMM[0].xxxx, IMM[0].yyyy
7: DP2 TEMP[3].x, TEMP[2].xyyy, TEMP[2].xyyy
8: MOV_SAT TEMP[3].x, TEMP[3].xxxx
9: ADD TEMP[3].x, IMM[0].zzzz, -TEMP[3].xxxx
10: SQRT TEMP[3].x, TEMP[3].xxxx
11: MOV TEMP[2].z, TEMP[3].xxxx
12: MOV TEMP[3].xy, IN[0].xyyy
13: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D
14: FSLT TEMP[3].x, TEMP[3].wwww, CONST[3].xxxx
15: AND TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz
16: KILL_IF -TEMP[3].xxxx
17: DP3 TEMP[3].x, IN[1].xyzz, TEMP[2].xyzz
18: DP3 TEMP[0].x, TEMP[0].xyzz, TEMP[2].xyzz
19: MOV TEMP[3].y, TEMP[0].xxxx
20: DP3 TEMP[0].x, TEMP[1].xyzz, TEMP[2].xyzz
21: MOV TEMP[3].z, TEMP[0].xxxx
22: MAD TEMP[0].xyz, TEMP[3].xyzz, IMM[0].wwww, IMM[0].wwww
23: MUL TEMP[1].x, CONST[0].xxxx, IMM[1].xxxx
24: MOV TEMP[0].w, TEMP[1].xxxx
25: MOV OUT[0], TEMP[0]
26: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0
%28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0
%30 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%31 = bitcast <8 x i32> addrspace(2)* %30 to <32 x i8> addrspace(2)*
%32 = load <32 x i8>, <32 x i8> addrspace(2)* %31, align 32, !tbaa !0
%33 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%34 = bitcast <4 x i32> addrspace(2)* %33 to <16 x i8> addrspace(2)*
%35 = load <16 x i8>, <16 x i8> addrspace(2)* %34, align 16, !tbaa !0
%36 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%41 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%42 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%43 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%44 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%45 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%46 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%47 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%48 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%49 = bitcast float %38 to i32
%50 = bitcast float %39 to i32
%51 = insertelement <2 x i32> undef, i32 %49, i32 0
%52 = insertelement <2 x i32> %51, i32 %50, i32 1
%53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %52, <32 x i8> %32, <16 x i8> %35, i32 2)
%54 = extractelement <4 x float> %53, i32 1
%55 = extractelement <4 x float> %53, i32 3
%56 = fmul float %55, 2.000000e+00
%57 = fadd float %56, -1.000000e+00
%58 = fmul float %54, 2.000000e+00
%59 = fadd float %58, -1.000000e+00
%60 = fmul float %57, %57
%61 = fmul float %59, %59
%62 = fadd float %60, %61
%63 = call float @llvm.AMDIL.clamp.(float %62, float 0.000000e+00, float 1.000000e+00)
%64 = fsub float 1.000000e+00, %63
%65 = call float @llvm.sqrt.f32(float %64)
%66 = bitcast float %36 to i32
%67 = bitcast float %37 to i32
%68 = insertelement <2 x i32> undef, i32 %66, i32 0
%69 = insertelement <2 x i32> %68, i32 %67, i32 1
%70 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %69, <32 x i8> %27, <16 x i8> %29, i32 2)
%71 = extractelement <4 x float> %70, i32 3
%72 = fcmp olt float %71, %25
%73 = select i1 %72, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %73)
%74 = fmul float %40, %57
%75 = fmul float %41, %59
%76 = fadd float %75, %74
%77 = fmul float %42, %65
%78 = fadd float %76, %77
%79 = fmul float %43, %57
%80 = fmul float %44, %59
%81 = fadd float %80, %79
%82 = fmul float %45, %65
%83 = fadd float %81, %82
%84 = fmul float %46, %57
%85 = fmul float %47, %59
%86 = fadd float %85, %84
%87 = fmul float %48, %65
%88 = fadd float %86, %87
%89 = fmul float %78, 5.000000e-01
%90 = fadd float %89, 5.000000e-01
%91 = fmul float %83, 5.000000e-01
%92 = fadd float %91, 5.000000e-01
%93 = fmul float %88, 5.000000e-01
%94 = fadd float %93, 5.000000e-01
%95 = fmul float %24, 7.812500e-03
%96 = call i32 @llvm.SI.packf16(float %90, float %92)
%97 = bitcast i32 %96 to float
%98 = call i32 @llvm.SI.packf16(float %94, float %95)
%99 = bitcast i32 %98 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %97, float %99, float %97, float %99)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[24:31], s[6:7], 0x0 ; C0CC0700
v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901
v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00
v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01
v_interp_p1_f32 v13, v0, 3, 2, [m0] ; C8340B00
v_interp_p2_f32 v13, [v13], v1, 3, 2, [m0] ; C8350B01
v_interp_p1_f32 v0, v0, 0, 3, [m0] ; C8000C00
v_interp_p2_f32 v0, [v0], v1, 0, 3, [m0] ; C8010C01
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[4:5], 10, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[16:23], s[12:15] ; F0800A00 00640404
s_buffer_load_dword s4, s[0:3], 0xc ; C202010C
image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[24:31], s[8:11] ; F0800800 00460102
s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100
s_waitcnt vmcnt(1) ; BF8C0771
v_mad_f32 v2, 2.0, v5, -1.0 ; D2820002 03CE0AF4
v_mad_f32 v3, 2.0, v4, -1.0 ; D2820003 03CE08F4
v_mul_f32_e32 v4, v3, v3 ; 10080703
v_mac_f32_e32 v4, v2, v2 ; 3E080502
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_sub_f32_e32 v4, 1.0, v4 ; 080808F2
v_sqrt_f32_e32 v4, v4 ; 7E086704
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_gt_f32_e32 vcc, s4, v1 ; 7C080204
v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280
v_mul_f32_e32 v1, v2, v6 ; 10020D02
v_mac_f32_e32 v1, v3, v7 ; 3E020F03
v_mul_f32_e32 v5, v2, v9 ; 100A1302
v_mac_f32_e32 v5, v3, v10 ; 3E0A1503
v_mul_f32_e32 v2, v2, v12 ; 10041902
v_mac_f32_e32 v2, v3, v13 ; 3E041B03
v_mac_f32_e32 v1, v4, v8 ; 3E021104
v_mac_f32_e32 v5, v4, v11 ; 3E0A1704
v_mac_f32_e32 v2, v4, v0 ; 3E040104
v_mov_b32_e32 v0, 0x3c000000 ; 7E0002FF 3C000000
v_mul_f32_e32 v0, s0, v0 ; 10000000
v_mad_f32 v1, 0.5, v1, 0.5 ; D2820001 03C202F0
v_mad_f32 v3, 0.5, v5, 0.5 ; D2820003 03C20AF0
v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701
v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0
v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 316 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..25]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[14].xyzz
1: MUL TEMP[1].x, IN[2].wwww, IN[2].wwww
2: MOV TEMP[2].y, IMM[0].xxxx
3: MOV TEMP[2].x, TEMP[1].xxxx
4: MOV TEMP[2].z, TEMP[1].xxxx
5: MUL TEMP[1], CONST[15], TEMP[0].xxxx
6: MAD TEMP[1], CONST[16], TEMP[0].yyyy, TEMP[1]
7: MAD TEMP[1].xyz, CONST[17], TEMP[0].zzzz, TEMP[1]
8: LRP TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz
9: DP3 TEMP[1].x, CONST[19].xyzz, TEMP[0].xyzz
10: ADD TEMP[1].x, TEMP[1].xxxx, CONST[19].wwww
11: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[19].xyzz
12: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
13: LRP TEMP[0].xyz, CONST[20].xxxx, TEMP[0].xyzz, TEMP[1].xyzz
14: MAD TEMP[1].xy, IN[3].xyyy, CONST[21].xyyy, CONST[21].zwww
15: FSNE TEMP[2].x, CONST[1].zzzz, IMM[0].xxxx
16: UIF TEMP[2].xxxx :0
17: MUL TEMP[2], CONST[2], TEMP[0].xxxx
18: MAD TEMP[2], CONST[3], TEMP[0].yyyy, TEMP[2]
19: MAD TEMP[2], CONST[4], TEMP[0].zzzz, TEMP[2]
20: ADD TEMP[2].xyz, TEMP[2], CONST[5]
21: MOV TEMP[3].x, CONST[6].xxxx
22: MOV TEMP[3].y, CONST[7].xxxx
23: MOV TEMP[3].z, CONST[8].xxxx
24: MOV TEMP[4].x, CONST[6].yyyy
25: MOV TEMP[4].y, CONST[7].yyyy
26: MOV TEMP[4].z, CONST[8].yyyy
27: MOV TEMP[5].x, CONST[6].zzzz
28: MOV TEMP[5].y, CONST[7].zzzz
29: MOV TEMP[5].z, CONST[8].zzzz
30: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
31: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
32: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
33: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
34: RSQ TEMP[4].x, TEMP[4].xxxx
35: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
36: MUL TEMP[4].xyz, TEMP[2].xyzz, CONST[0].wwww
37: ADD TEMP[4].xyz, CONST[0].xyzz, -TEMP[4].xyzz
38: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
39: RSQ TEMP[5].x, TEMP[5].xxxx
40: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
41: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[4].xyzz
42: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx
43: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
44: SQRT TEMP[4].x, TEMP[4].xxxx
45: MUL TEMP[4].x, CONST[1].zzzz, TEMP[4].xxxx
46: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
47: ADD TEMP[2].xyz, TEMP[2].xyzz, -TEMP[3].xyzz
48: MUL TEMP[3], CONST[10], TEMP[2].xxxx
49: MAD TEMP[3], CONST[11], TEMP[2].yyyy, TEMP[3]
50: MAD TEMP[2], CONST[12], TEMP[2].zzzz, TEMP[3]
51: ADD TEMP[2], TEMP[2], CONST[13]
52: ELSE :0
53: MUL TEMP[3], CONST[22], TEMP[0].xxxx
54: MAD TEMP[3], CONST[23], TEMP[0].yyyy, TEMP[3]
55: MAD TEMP[0], CONST[24], TEMP[0].zzzz, TEMP[3]
56: ADD TEMP[2], TEMP[0], CONST[25]
57: ENDIF
58: MOV TEMP[0].xyw, TEMP[2].xyxw
59: RCP TEMP[3].x, TEMP[2].wwww
60: MUL TEMP[3].x, CONST[1].xxxx, TEMP[3].xxxx
61: MOV_SAT TEMP[3].x, TEMP[3].xxxx
62: ADD TEMP[3].x, TEMP[2].zzzz, TEMP[3].xxxx
63: MAX TEMP[2].x, TEMP[3].xxxx, -TEMP[2].wwww
64: LRP TEMP[2].x, CONST[1].yyyy, TEMP[2].xxxx, TEMP[3].xxxx
65: MOV TEMP[0].z, TEMP[2].xxxx
66: MOV TEMP[1].xy, TEMP[1].xyxx
67: MOV OUT[1], TEMP[1]
68: MOV OUT[0], TEMP[0]
69: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = add i32 %5, %7
%43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = extractelement <4 x float> %43, i32 2
%47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0
%49 = add i32 %5, %7
%50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49)
%51 = extractelement <4 x float> %50, i32 0
%52 = extractelement <4 x float> %50, i32 1
%53 = extractelement <4 x float> %50, i32 2
%54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = add i32 %5, %7
%57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56)
%58 = extractelement <4 x float> %57, i32 3
%59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0
%61 = add i32 %5, %7
%62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61)
%63 = extractelement <4 x float> %62, i32 0
%64 = extractelement <4 x float> %62, i32 1
%65 = fmul float %44, %17
%66 = fmul float %45, %18
%67 = fmul float %46, %19
%68 = fmul float %58, %58
%69 = fmul float %20, %65
%70 = fmul float %21, %65
%71 = fmul float %22, %65
%72 = fmul float %23, %65
%73 = fmul float %24, %66
%74 = fadd float %73, %69
%75 = fmul float %25, %66
%76 = fadd float %75, %70
%77 = fmul float %26, %66
%78 = fadd float %77, %71
%79 = fmul float %27, %66
%80 = fadd float %79, %72
%81 = fmul float %28, %67
%82 = fadd float %81, %74
%83 = fmul float %29, %67
%84 = fadd float %83, %76
%85 = fmul float %30, %67
%86 = fadd float %85, %78
%87 = call float @llvm.AMDGPU.lrp(float %68, float %82, float %65)
%88 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %84, float %66)
%89 = call float @llvm.AMDGPU.lrp(float %68, float %86, float %67)
%90 = fmul float %31, %87
%91 = fmul float %32, %88
%92 = fadd float %91, %90
%93 = fmul float %33, %89
%94 = fadd float %92, %93
%95 = fadd float %94, %34
%96 = fmul float %95, %31
%97 = fmul float %95, %32
%98 = fmul float %95, %33
%99 = fsub float %87, %96
%100 = fsub float %88, %97
%101 = fsub float %89, %98
%102 = call float @llvm.AMDGPU.lrp(float %35, float %87, float %99)
%103 = call float @llvm.AMDGPU.lrp(float %35, float %88, float %100)
%104 = call float @llvm.AMDGPU.lrp(float %35, float %89, float %101)
%105 = fmul float %63, %36
%106 = fadd float %105, %38
%107 = fmul float %64, %37
%108 = fadd float %107, %39
%109 = fcmp une float %16, 0.000000e+00
br i1 %109, label %IF, label %ELSE
IF: ; preds = %main_body
%110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%113 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%116 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%118 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%119 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%121 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%122 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%127 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%130 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%131 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%133 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%134 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%136 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%137 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%140 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%141 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%143 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%144 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%146 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%147 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%149 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%150 = fmul float %146, %102
%151 = fmul float %145, %102
%152 = fmul float %144, %102
%153 = fmul float %143, %103
%154 = fadd float %153, %150
%155 = fmul float %142, %103
%156 = fadd float %155, %151
%157 = fmul float %141, %103
%158 = fadd float %157, %152
%159 = fmul float %140, %104
%160 = fadd float %159, %154
%161 = fmul float %139, %104
%162 = fadd float %161, %156
%163 = fmul float %138, %104
%164 = fadd float %163, %158
%165 = fadd float %160, %137
%166 = fadd float %162, %136
%167 = fadd float %164, %135
%168 = fmul float %134, %51
%169 = fmul float %131, %51
%170 = fmul float %128, %51
%171 = fmul float %133, %52
%172 = fadd float %171, %168
%173 = fmul float %130, %52
%174 = fadd float %173, %169
%175 = fmul float %127, %52
%176 = fadd float %175, %170
%177 = fmul float %132, %53
%178 = fadd float %177, %172
%179 = fmul float %129, %53
%180 = fadd float %179, %174
%181 = fmul float %126, %53
%182 = fadd float %181, %176
%183 = fmul float %178, %178
%184 = fmul float %180, %180
%185 = fadd float %184, %183
%186 = fmul float %182, %182
%187 = fadd float %185, %186
%188 = call float @llvm.AMDGPU.rsq.clamped.f32(float %187)
%189 = fmul float %178, %188
%190 = fmul float %180, %188
%191 = fmul float %182, %188
%192 = fmul float %165, %13
%193 = fmul float %166, %13
%194 = fmul float %167, %13
%195 = fsub float %149, %192
%196 = fsub float %148, %193
%197 = fsub float %147, %194
%198 = fmul float %195, %195
%199 = fmul float %196, %196
%200 = fadd float %199, %198
%201 = fmul float %197, %197
%202 = fadd float %200, %201
%203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202)
%204 = fmul float %195, %203
%205 = fmul float %196, %203
%206 = fmul float %197, %203
%207 = fmul float %189, %204
%208 = fmul float %190, %205
%209 = fadd float %208, %207
%210 = fmul float %191, %206
%211 = fadd float %209, %210
%212 = fmul float %211, %211
%213 = fsub float 1.000000e+00, %212
%214 = call float @llvm.sqrt.f32(float %213)
%215 = fmul float %16, %214
%216 = fmul float %189, %215
%217 = fmul float %190, %215
%218 = fmul float %191, %215
%219 = fsub float %165, %216
%220 = fsub float %166, %217
%221 = fsub float %167, %218
%222 = fmul float %125, %219
%223 = fmul float %124, %219
%224 = fmul float %123, %219
%225 = fmul float %122, %219
%226 = fmul float %121, %220
%227 = fadd float %226, %222
%228 = fmul float %120, %220
%229 = fadd float %228, %223
%230 = fmul float %119, %220
%231 = fadd float %230, %224
%232 = fmul float %118, %220
%233 = fadd float %232, %225
%234 = fmul float %117, %221
%235 = fadd float %234, %227
%236 = fmul float %116, %221
%237 = fadd float %236, %229
%238 = fmul float %115, %221
%239 = fadd float %238, %231
%240 = fmul float %114, %221
%241 = fadd float %240, %233
%242 = fadd float %235, %113
%243 = fadd float %237, %112
%244 = fadd float %239, %111
%245 = fadd float %241, %110
br label %ENDIF
ELSE: ; preds = %main_body
%246 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412)
%247 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408)
%248 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404)
%249 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400)
%250 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%251 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%252 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%253 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%254 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%256 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372)
%257 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364)
%259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360)
%260 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356)
%261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%262 = fmul float %261, %102
%263 = fmul float %260, %102
%264 = fmul float %259, %102
%265 = fmul float %258, %102
%266 = fmul float %257, %103
%267 = fadd float %266, %262
%268 = fmul float %256, %103
%269 = fadd float %268, %263
%270 = fmul float %255, %103
%271 = fadd float %270, %264
%272 = fmul float %254, %103
%273 = fadd float %272, %265
%274 = fmul float %253, %104
%275 = fadd float %274, %267
%276 = fmul float %252, %104
%277 = fadd float %276, %269
%278 = fmul float %251, %104
%279 = fadd float %278, %271
%280 = fmul float %250, %104
%281 = fadd float %280, %273
%282 = fadd float %275, %249
%283 = fadd float %277, %248
%284 = fadd float %279, %247
%285 = fadd float %281, %246
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp8.0 = phi float [ %242, %IF ], [ %282, %ELSE ]
%temp9.0 = phi float [ %243, %IF ], [ %283, %ELSE ]
%temp10.0 = phi float [ %244, %IF ], [ %284, %ELSE ]
%temp11.0 = phi float [ %245, %IF ], [ %285, %ELSE ]
%286 = fdiv float 1.000000e+00, %temp11.0
%287 = fmul float %14, %286
%288 = call float @llvm.AMDIL.clamp.(float %287, float 0.000000e+00, float 1.000000e+00)
%289 = fadd float %temp10.0, %288
%290 = fsub float -0.000000e+00, %temp11.0
%291 = call float @llvm.maxnum.f32(float %289, float %290)
%292 = call float @llvm.AMDGPU.lrp(float %15, float %291, float %289)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %106, float %108, float %101, float %80)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp8.0, float %temp9.0, float %292, float %temp11.0)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900
s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
buffer_load_format_xyzw v[11:14], v0, s[12:15], 0 idxen ; E00C2000 80030B00
buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700
s_buffer_load_dword s5, s[0:3], 0x38 ; C2028138
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00
buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000
s_buffer_load_dword s6, s[0:3], 0x39 ; C2030139
s_buffer_load_dword s7, s[0:3], 0x3a ; C203813A
s_buffer_load_dword s8, s[0:3], 0x3c ; C204013C
s_buffer_load_dword s9, s[0:3], 0x3d ; C204813D
s_buffer_load_dword s10, s[0:3], 0x3e ; C205013E
s_buffer_load_dword s11, s[0:3], 0x40 ; C2058140
s_buffer_load_dword s12, s[0:3], 0x41 ; C2060141
s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142
s_buffer_load_dword s16, s[0:3], 0x44 ; C2080144
s_buffer_load_dword s17, s[0:3], 0x45 ; C2088145
s_buffer_load_dword s18, s[0:3], 0x46 ; C2090146
s_buffer_load_dword s19, s[0:3], 0x4c ; C209814C
s_buffer_load_dword s20, s[0:3], 0x4d ; C20A014D
s_buffer_load_dword s21, s[0:3], 0x4e ; C20A814E
s_buffer_load_dword s22, s[0:3], 0x4f ; C20B014F
s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150
s_buffer_load_dword s13, s[0:3], 0x56 ; C2068156
s_buffer_load_dword s14, s[0:3], 0x57 ; C2070157
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_sub_f32_e64 v5, 1.0, s23 ; D2080005 00002EF2
v_mul_f32_e32 v3, s5, v11 ; 10061605
v_mul_f32_e32 v2, s6, v12 ; 10041806
v_mul_f32_e32 v4, s7, v13 ; 10081A07
v_mul_f32_e32 v6, v17, v17 ; 100C2311
v_mad_f32 v10, -v17, v17, 1.0 ; D282000A 23CA2311
v_mul_f32_e32 v11, s8, v3 ; 10160608
v_mac_f32_e32 v11, s11, v2 ; 3E16040B
v_mul_f32_e32 v12, s9, v3 ; 10180609
v_mac_f32_e32 v12, s12, v2 ; 3E18040C
v_mul_f32_e32 v13, s10, v3 ; 101A060A
v_mac_f32_e32 v13, s15, v2 ; 3E1A040F
v_mac_f32_e32 v11, s16, v4 ; 3E160810
v_mac_f32_e32 v12, s17, v4 ; 3E180811
v_mac_f32_e32 v13, s18, v4 ; 3E1A0812
v_mul_f32_e32 v14, v4, v10 ; 101C1504
v_mul_f32_e32 v10, v3, v10 ; 10141503
v_mac_f32_e32 v10, v11, v6 ; 3E140D0B
v_mac_f32_e32 v14, v13, v6 ; 3E1C0D0D
v_mad_f32 v6, 0, v12, v2 ; D2820006 040A1880
v_mul_f32_e32 v4, s19, v10 ; 10081413
v_mac_f32_e32 v4, s20, v6 ; 3E080C14
v_mac_f32_e32 v4, s21, v14 ; 3E081C15
v_add_f32_e32 v4, s22, v4 ; 06080816
v_mad_f32 v11, -v4, s19, v10 ; D282000B 24282704
v_mad_f32 v13, -v4, s20, v6 ; D282000D 24182904
v_mad_f32 v4, -v4, s21, v14 ; D2820004 24382B04
v_mul_f32_e32 v12, v11, v5 ; 10180B0B
v_mac_f32_e32 v12, s23, v10 ; 3E181417
v_mul_f32_e32 v11, v13, v5 ; 10160B0D
v_mac_f32_e32 v11, s23, v6 ; 3E160C17
v_mul_f32_e32 v10, v4, v5 ; 10140B04
v_mac_f32_e32 v10, s23, v14 ; 3E141C17
v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880
s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s5, s[0:3], 0x67 ; C2028167
s_buffer_load_dword s8, s[0:3], 0x62 ; C2040162
s_buffer_load_dword s9, s[0:3], 0x63 ; C2048163
s_buffer_load_dword s10, s[0:3], 0x64 ; C2050164
s_buffer_load_dword s11, s[0:3], 0x65 ; C2058165
s_buffer_load_dword s12, s[0:3], 0x66 ; C2060166
s_buffer_load_dword s15, s[0:3], 0x5d ; C207815D
s_buffer_load_dword s16, s[0:3], 0x5e ; C208015E
s_buffer_load_dword s17, s[0:3], 0x5f ; C208815F
s_buffer_load_dword s18, s[0:3], 0x60 ; C2090160
s_buffer_load_dword s19, s[0:3], 0x61 ; C2098161
s_buffer_load_dword s20, s[0:3], 0x58 ; C20A0158
s_buffer_load_dword s21, s[0:3], 0x59 ; C20A8159
s_buffer_load_dword s22, s[0:3], 0x5a ; C20B015A
s_buffer_load_dword s23, s[0:3], 0x5b ; C20B815B
s_buffer_load_dword s24, s[0:3], 0x5c ; C20C015C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s20, v12 ; 100A1814
v_mul_f32_e32 v6, s21, v12 ; 100C1815
v_mul_f32_e32 v15, s22, v12 ; 101E1816
v_mul_f32_e32 v17, s23, v12 ; 10221817
v_mac_f32_e32 v5, s24, v11 ; 3E0A1618
v_mac_f32_e32 v6, s15, v11 ; 3E0C160F
v_mac_f32_e32 v15, s16, v11 ; 3E1E1610
v_mac_f32_e32 v17, s17, v11 ; 3E221611
v_mac_f32_e32 v5, s18, v10 ; 3E0A1412
v_mac_f32_e32 v6, s19, v10 ; 3E0C1413
v_mac_f32_e32 v15, s8, v10 ; 3E1E1408
v_mac_f32_e32 v17, s9, v10 ; 3E221409
v_add_f32_e32 v13, s10, v5 ; 061A0A0A
v_add_f32_e32 v14, s11, v6 ; 061C0C0B
v_add_f32_e32 v16, s12, v15 ; 06201E0C
v_add_f32_e32 v15, s5, v17 ; 061E2205
s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_buffer_load_dword s12, s[0:3], 0x3f ; C206013F
s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143
s_buffer_load_dword s10, s[0:3], 0x54 ; C2050154
s_buffer_load_dword s11, s[0:3], 0x55 ; C2058155
v_mov_b32_e32 v5, s13 ; 7E0A020D
v_mov_b32_e32 v6, s14 ; 7E0C020E
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[6:7] ; 89FE067E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s13, s[0:3], 0x37 ; C2068137
s_buffer_load_dword s14, s[0:3], 0x32 ; C2070132
s_buffer_load_dword s15, s[0:3], 0x33 ; C2078133
s_buffer_load_dword s16, s[0:3], 0x34 ; C2080134
s_buffer_load_dword s17, s[0:3], 0x35 ; C2088135
s_buffer_load_dword s18, s[0:3], 0x36 ; C2090136
s_buffer_load_dword s19, s[0:3], 0x2d ; C209812D
s_buffer_load_dword s20, s[0:3], 0x2e ; C20A012E
s_buffer_load_dword s21, s[0:3], 0x2f ; C20A812F
s_buffer_load_dword s22, s[0:3], 0x30 ; C20B0130
s_buffer_load_dword s23, s[0:3], 0x31 ; C20B8131
s_buffer_load_dword s24, s[0:3], 0x28 ; C20C0128
s_buffer_load_dword s25, s[0:3], 0x29 ; C20C8129
s_buffer_load_dword s26, s[0:3], 0x2a ; C20D012A
s_buffer_load_dword s27, s[0:3], 0x2b ; C20D812B
s_buffer_load_dword s28, s[0:3], 0x2c ; C20E012C
s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D
s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E
s_buffer_load_dword s31, s[0:3], 0x20 ; C20F8120
s_buffer_load_dword s32, s[0:3], 0x21 ; C2100121
s_buffer_load_dword s33, s[0:3], 0x22 ; C2108122
s_buffer_load_dword s34, s[0:3], 0x16 ; C2110116
s_buffer_load_dword s35, s[0:3], 0x18 ; C2118118
s_buffer_load_dword s36, s[0:3], 0x19 ; C2120119
s_buffer_load_dword s37, s[0:3], 0x1a ; C212811A
s_buffer_load_dword s38, s[0:3], 0x1c ; C213011C
s_buffer_load_dword s39, s[0:3], 0x10 ; C2138110
s_buffer_load_dword s40, s[0:3], 0x11 ; C2140111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v13, s31, v7 ; 101A0E1F
v_mac_f32_e32 v13, s32, v8 ; 3E1A1020
v_mac_f32_e32 v13, s33, v9 ; 3E1A1221
s_buffer_load_dword s31, s[0:3], 0x12 ; C20F8112
v_mul_f32_e32 v14, s35, v7 ; 101C0E23
v_mac_f32_e32 v14, s36, v8 ; 3E1C1024
v_mac_f32_e32 v14, s37, v9 ; 3E1C1225
v_mul_f32_e32 v7, s38, v7 ; 100E0E26
v_mac_f32_e32 v7, s29, v8 ; 3E0E101D
v_mac_f32_e32 v7, s30, v9 ; 3E0E121E
s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114
s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115
s_buffer_load_dword s32, s[0:3], 0x9 ; C2100109
s_buffer_load_dword s33, s[0:3], 0xa ; C210810A
s_buffer_load_dword s35, s[0:3], 0xc ; C211810C
s_buffer_load_dword s36, s[0:3], 0xd ; C212010D
s_buffer_load_dword s37, s[0:3], 0xe ; C212810E
s_buffer_load_dword s38, s[0:3], 0x0 ; C2130100
v_mul_f32_e32 v8, v14, v14 ; 10101D0E
v_mac_f32_e32 v8, v7, v7 ; 3E100F07
v_mac_f32_e32 v8, v13, v13 ; 3E101B0D
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v9, s32, v12 ; 10121820
v_mul_f32_e32 v15, s33, v12 ; 101E1821
s_buffer_load_dword s32, s[0:3], 0x1 ; C2100101
v_mac_f32_e32 v9, s36, v11 ; 3E121624
v_mac_f32_e32 v15, s37, v11 ; 3E1E1625
v_mac_f32_e32 v9, s40, v10 ; 3E121428
v_mac_f32_e32 v15, s31, v10 ; 3E1E141F
v_add_f32_e32 v9, s30, v9 ; 0612121E
v_add_f32_e32 v15, s34, v15 ; 061E1E22
s_buffer_load_dword s30, s[0:3], 0x2 ; C20F0102
s_buffer_load_dword s31, s[0:3], 0x3 ; C20F8103
s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v16, s31 ; 7E20021F
v_mad_f32 v17, -v9, v16, s32 ; D2820011 20822109
v_mul_f32_e32 v12, s33, v12 ; 10181821
v_mac_f32_e32 v12, s35, v11 ; 3E181623
v_mac_f32_e32 v12, s39, v10 ; 3E181427
v_add_f32_e32 v10, s29, v12 ; 0614181D
v_mad_f32 v11, -v10, v16, s38 ; D282000B 209A210A
v_mad_f32 v12, -v15, v16, s30 ; D282000C 207A210F
v_mul_f32_e32 v16, v11, v11 ; 1020170B
v_mac_f32_e32 v16, v17, v17 ; 3E202311
v_mac_f32_e32 v16, v12, v12 ; 3E20190C
v_rsq_clamp_f32_e32 v16, v16 ; 7E205910
v_mul_f32_e32 v14, v8, v14 ; 101C1D08
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_f32_e32 v8, v8, v13 ; 10101B08
v_mul_f32_e32 v11, v16, v11 ; 10161710
v_mul_f32_e32 v13, v16, v17 ; 101A2310
v_mul_f32_e32 v12, v16, v12 ; 10181910
v_mul_f32_e32 v11, v11, v14 ; 10161D0B
v_mac_f32_e32 v11, v13, v7 ; 3E160F0D
v_mac_f32_e32 v11, v12, v8 ; 3E16110C
v_mad_f32 v11, -v11, v11, 1.0 ; D282000B 23CA170B
v_sqrt_f32_e32 v11, v11 ; 7E16670B
v_mul_f32_e32 v11, s4, v11 ; 10161604
v_mad_f32 v10, -v14, v11, v10 ; D282000A 242A170E
v_mad_f32 v7, -v7, v11, v9 ; D2820007 24261707
v_mad_f32 v8, -v8, v11, v15 ; D2820008 243E1708
v_mul_f32_e32 v9, s24, v10 ; 10121418
v_mul_f32_e32 v11, s25, v10 ; 10161419
v_mul_f32_e32 v12, s26, v10 ; 1018141A
v_mul_f32_e32 v10, s27, v10 ; 1014141B
v_mac_f32_e32 v9, s28, v7 ; 3E120E1C
v_mac_f32_e32 v11, s19, v7 ; 3E160E13
v_mac_f32_e32 v12, s20, v7 ; 3E180E14
v_mac_f32_e32 v10, s21, v7 ; 3E140E15
v_mac_f32_e32 v9, s22, v8 ; 3E121016
v_mac_f32_e32 v11, s23, v8 ; 3E161017
v_mac_f32_e32 v12, s14, v8 ; 3E18100E
v_mac_f32_e32 v10, s15, v8 ; 3E14100F
v_add_f32_e32 v13, s16, v9 ; 061A1210
v_add_f32_e32 v14, s17, v11 ; 061C1611
v_add_f32_e32 v16, s18, v12 ; 06201812
v_add_f32_e32 v15, s13, v10 ; 061E140D
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_mul_f32_e32 v3, s12, v3 ; 1006060C
v_rcp_f32_e32 v7, v15 ; 7E0E550F
v_mac_f32_e32 v3, s9, v2 ; 3E060409
v_mac_f32_e32 v5, s10, v0 ; 3E0A000A
v_mac_f32_e32 v6, s11, v1 ; 3E0C020B
v_mul_f32_e32 v0, s8, v7 ; 10000E08
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_add_f32_e32 v0, v0, v16 ; 06002100
v_max_f32_e64 v1, v0, -v15 ; D2200001 40021F00
v_sub_f32_e64 v2, 1.0, s5 ; D2080002 00000AF2
v_mul_f32_e32 v0, v0, v2 ; 10000500
v_mac_f32_e32 v0, s5, v1 ; 3E000205
exp 15, 32, 0, 0, 0, v5, v6, v4, v3 ; F800020F 03040605
exp 15, 12, 0, 1, 0, v13, v14, v0, v15 ; F80008CF 0F000E0D
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 20
Code Size: 1048 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[1]
DCL TEMP[0], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D
2: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1].xxxx
3: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
4: KILL_IF -TEMP[0].xxxx
5: MOV OUT[0], IMM[0].yyyy
6: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0
%27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0
%29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%31 = bitcast float %29 to i32
%32 = bitcast float %30 to i32
%33 = insertelement <2 x i32> undef, i32 %31, i32 0
%34 = insertelement <2 x i32> %33, i32 %32, i32 1
%35 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %26, <16 x i8> %28, i32 2)
%36 = extractelement <4 x float> %35, i32 3
%37 = fcmp olt float %36, %24
%38 = select i1 %37, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %38)
%39 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%40 = bitcast i32 %39 to float
%41 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%42 = bitcast i32 %41 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800800 00230002
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_gt_f32_e32 vcc, s0, v0 ; 7C080000
v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 92 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..25]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[14].xyzz
1: MUL TEMP[1].x, IN[2].wwww, IN[2].wwww
2: MOV TEMP[2].y, IMM[0].xxxx
3: MOV TEMP[2].x, TEMP[1].xxxx
4: MOV TEMP[2].z, TEMP[1].xxxx
5: MUL TEMP[1], CONST[15], TEMP[0].xxxx
6: MAD TEMP[1], CONST[16], TEMP[0].yyyy, TEMP[1]
7: MAD TEMP[1].xyz, CONST[17], TEMP[0].zzzz, TEMP[1]
8: LRP TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz
9: DP3 TEMP[1].x, CONST[19].xyzz, TEMP[0].xyzz
10: ADD TEMP[1].x, TEMP[1].xxxx, CONST[19].wwww
11: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[19].xyzz
12: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
13: LRP TEMP[0].xyz, CONST[20].xxxx, TEMP[0].xyzz, TEMP[1].xyzz
14: MAD TEMP[1].xy, IN[3].xyyy, CONST[21].xyyy, CONST[21].zwww
15: FSNE TEMP[2].x, CONST[1].zzzz, IMM[0].xxxx
16: UIF TEMP[2].xxxx :0
17: MUL TEMP[2], CONST[2], TEMP[0].xxxx
18: MAD TEMP[2], CONST[3], TEMP[0].yyyy, TEMP[2]
19: MAD TEMP[2], CONST[4], TEMP[0].zzzz, TEMP[2]
20: ADD TEMP[2].xyz, TEMP[2], CONST[5]
21: MOV TEMP[3].x, CONST[6].xxxx
22: MOV TEMP[3].y, CONST[7].xxxx
23: MOV TEMP[3].z, CONST[8].xxxx
24: MOV TEMP[4].x, CONST[6].yyyy
25: MOV TEMP[4].y, CONST[7].yyyy
26: MOV TEMP[4].z, CONST[8].yyyy
27: MOV TEMP[5].x, CONST[6].zzzz
28: MOV TEMP[5].y, CONST[7].zzzz
29: MOV TEMP[5].z, CONST[8].zzzz
30: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
31: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
32: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
33: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
34: RSQ TEMP[4].x, TEMP[4].xxxx
35: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
36: MUL TEMP[4].xyz, TEMP[2].xyzz, CONST[0].wwww
37: ADD TEMP[4].xyz, CONST[0].xyzz, -TEMP[4].xyzz
38: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
39: RSQ TEMP[5].x, TEMP[5].xxxx
40: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
41: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[4].xyzz
42: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[4].xxxx
43: ADD TEMP[4].x, IMM[0].yyyy, -TEMP[4].xxxx
44: SQRT TEMP[4].x, TEMP[4].xxxx
45: MUL TEMP[4].x, CONST[1].zzzz, TEMP[4].xxxx
46: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
47: ADD TEMP[2].xyz, TEMP[2].xyzz, -TEMP[3].xyzz
48: MUL TEMP[3], CONST[10], TEMP[2].xxxx
49: MAD TEMP[3], CONST[11], TEMP[2].yyyy, TEMP[3]
50: MAD TEMP[2], CONST[12], TEMP[2].zzzz, TEMP[3]
51: ADD TEMP[2], TEMP[2], CONST[13]
52: ELSE :0
53: MUL TEMP[3], CONST[22], TEMP[0].xxxx
54: MAD TEMP[3], CONST[23], TEMP[0].yyyy, TEMP[3]
55: MAD TEMP[0], CONST[24], TEMP[0].zzzz, TEMP[3]
56: ADD TEMP[2], TEMP[0], CONST[25]
57: ENDIF
58: MOV TEMP[0].xyw, TEMP[2].xyxw
59: RCP TEMP[3].x, TEMP[2].wwww
60: MUL TEMP[3].x, CONST[1].xxxx, TEMP[3].xxxx
61: MOV_SAT TEMP[3].x, TEMP[3].xxxx
62: ADD TEMP[3].x, TEMP[2].zzzz, TEMP[3].xxxx
63: MAX TEMP[2].x, TEMP[3].xxxx, -TEMP[2].wwww
64: LRP TEMP[2].x, CONST[1].yyyy, TEMP[2].xxxx, TEMP[3].xxxx
65: MOV TEMP[0].z, TEMP[2].xxxx
66: MOV TEMP[1].xy, TEMP[1].xyxx
67: MOV OUT[1], TEMP[1]
68: MOV OUT[0], TEMP[0]
69: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%40 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = add i32 %5, %7
%43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = extractelement <4 x float> %43, i32 2
%47 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%48 = load <16 x i8>, <16 x i8> addrspace(2)* %47, align 16, !tbaa !0
%49 = add i32 %5, %7
%50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49)
%51 = extractelement <4 x float> %50, i32 0
%52 = extractelement <4 x float> %50, i32 1
%53 = extractelement <4 x float> %50, i32 2
%54 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = add i32 %5, %7
%57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56)
%58 = extractelement <4 x float> %57, i32 3
%59 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%60 = load <16 x i8>, <16 x i8> addrspace(2)* %59, align 16, !tbaa !0
%61 = add i32 %5, %7
%62 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %60, i32 0, i32 %61)
%63 = extractelement <4 x float> %62, i32 0
%64 = extractelement <4 x float> %62, i32 1
%65 = fmul float %44, %17
%66 = fmul float %45, %18
%67 = fmul float %46, %19
%68 = fmul float %58, %58
%69 = fmul float %20, %65
%70 = fmul float %21, %65
%71 = fmul float %22, %65
%72 = fmul float %23, %65
%73 = fmul float %24, %66
%74 = fadd float %73, %69
%75 = fmul float %25, %66
%76 = fadd float %75, %70
%77 = fmul float %26, %66
%78 = fadd float %77, %71
%79 = fmul float %27, %66
%80 = fadd float %79, %72
%81 = fmul float %28, %67
%82 = fadd float %81, %74
%83 = fmul float %29, %67
%84 = fadd float %83, %76
%85 = fmul float %30, %67
%86 = fadd float %85, %78
%87 = call float @llvm.AMDGPU.lrp(float %68, float %82, float %65)
%88 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %84, float %66)
%89 = call float @llvm.AMDGPU.lrp(float %68, float %86, float %67)
%90 = fmul float %31, %87
%91 = fmul float %32, %88
%92 = fadd float %91, %90
%93 = fmul float %33, %89
%94 = fadd float %92, %93
%95 = fadd float %94, %34
%96 = fmul float %95, %31
%97 = fmul float %95, %32
%98 = fmul float %95, %33
%99 = fsub float %87, %96
%100 = fsub float %88, %97
%101 = fsub float %89, %98
%102 = call float @llvm.AMDGPU.lrp(float %35, float %87, float %99)
%103 = call float @llvm.AMDGPU.lrp(float %35, float %88, float %100)
%104 = call float @llvm.AMDGPU.lrp(float %35, float %89, float %101)
%105 = fmul float %63, %36
%106 = fadd float %105, %38
%107 = fmul float %64, %37
%108 = fadd float %107, %39
%109 = fcmp une float %16, 0.000000e+00
br i1 %109, label %IF, label %ELSE
IF: ; preds = %main_body
%110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%112 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%113 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%116 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%118 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%119 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%121 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%122 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%127 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%130 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%131 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%132 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%133 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%134 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%135 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%136 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%137 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%140 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%141 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%143 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%144 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%146 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%147 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%149 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%150 = fmul float %146, %102
%151 = fmul float %145, %102
%152 = fmul float %144, %102
%153 = fmul float %143, %103
%154 = fadd float %153, %150
%155 = fmul float %142, %103
%156 = fadd float %155, %151
%157 = fmul float %141, %103
%158 = fadd float %157, %152
%159 = fmul float %140, %104
%160 = fadd float %159, %154
%161 = fmul float %139, %104
%162 = fadd float %161, %156
%163 = fmul float %138, %104
%164 = fadd float %163, %158
%165 = fadd float %160, %137
%166 = fadd float %162, %136
%167 = fadd float %164, %135
%168 = fmul float %134, %51
%169 = fmul float %131, %51
%170 = fmul float %128, %51
%171 = fmul float %133, %52
%172 = fadd float %171, %168
%173 = fmul float %130, %52
%174 = fadd float %173, %169
%175 = fmul float %127, %52
%176 = fadd float %175, %170
%177 = fmul float %132, %53
%178 = fadd float %177, %172
%179 = fmul float %129, %53
%180 = fadd float %179, %174
%181 = fmul float %126, %53
%182 = fadd float %181, %176
%183 = fmul float %178, %178
%184 = fmul float %180, %180
%185 = fadd float %184, %183
%186 = fmul float %182, %182
%187 = fadd float %185, %186
%188 = call float @llvm.AMDGPU.rsq.clamped.f32(float %187)
%189 = fmul float %178, %188
%190 = fmul float %180, %188
%191 = fmul float %182, %188
%192 = fmul float %165, %13
%193 = fmul float %166, %13
%194 = fmul float %167, %13
%195 = fsub float %149, %192
%196 = fsub float %148, %193
%197 = fsub float %147, %194
%198 = fmul float %195, %195
%199 = fmul float %196, %196
%200 = fadd float %199, %198
%201 = fmul float %197, %197
%202 = fadd float %200, %201
%203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202)
%204 = fmul float %195, %203
%205 = fmul float %196, %203
%206 = fmul float %197, %203
%207 = fmul float %189, %204
%208 = fmul float %190, %205
%209 = fadd float %208, %207
%210 = fmul float %191, %206
%211 = fadd float %209, %210
%212 = fmul float %211, %211
%213 = fsub float 1.000000e+00, %212
%214 = call float @llvm.sqrt.f32(float %213)
%215 = fmul float %16, %214
%216 = fmul float %189, %215
%217 = fmul float %190, %215
%218 = fmul float %191, %215
%219 = fsub float %165, %216
%220 = fsub float %166, %217
%221 = fsub float %167, %218
%222 = fmul float %125, %219
%223 = fmul float %124, %219
%224 = fmul float %123, %219
%225 = fmul float %122, %219
%226 = fmul float %121, %220
%227 = fadd float %226, %222
%228 = fmul float %120, %220
%229 = fadd float %228, %223
%230 = fmul float %119, %220
%231 = fadd float %230, %224
%232 = fmul float %118, %220
%233 = fadd float %232, %225
%234 = fmul float %117, %221
%235 = fadd float %234, %227
%236 = fmul float %116, %221
%237 = fadd float %236, %229
%238 = fmul float %115, %221
%239 = fadd float %238, %231
%240 = fmul float %114, %221
%241 = fadd float %240, %233
%242 = fadd float %235, %113
%243 = fadd float %237, %112
%244 = fadd float %239, %111
%245 = fadd float %241, %110
br label %ENDIF
ELSE: ; preds = %main_body
%246 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412)
%247 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408)
%248 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404)
%249 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400)
%250 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%251 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%252 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%253 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%254 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%256 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372)
%257 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364)
%259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360)
%260 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356)
%261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%262 = fmul float %261, %102
%263 = fmul float %260, %102
%264 = fmul float %259, %102
%265 = fmul float %258, %102
%266 = fmul float %257, %103
%267 = fadd float %266, %262
%268 = fmul float %256, %103
%269 = fadd float %268, %263
%270 = fmul float %255, %103
%271 = fadd float %270, %264
%272 = fmul float %254, %103
%273 = fadd float %272, %265
%274 = fmul float %253, %104
%275 = fadd float %274, %267
%276 = fmul float %252, %104
%277 = fadd float %276, %269
%278 = fmul float %251, %104
%279 = fadd float %278, %271
%280 = fmul float %250, %104
%281 = fadd float %280, %273
%282 = fadd float %275, %249
%283 = fadd float %277, %248
%284 = fadd float %279, %247
%285 = fadd float %281, %246
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp8.0 = phi float [ %242, %IF ], [ %282, %ELSE ]
%temp9.0 = phi float [ %243, %IF ], [ %283, %ELSE ]
%temp10.0 = phi float [ %244, %IF ], [ %284, %ELSE ]
%temp11.0 = phi float [ %245, %IF ], [ %285, %ELSE ]
%286 = fdiv float 1.000000e+00, %temp11.0
%287 = fmul float %14, %286
%288 = call float @llvm.AMDIL.clamp.(float %287, float 0.000000e+00, float 1.000000e+00)
%289 = fadd float %temp10.0, %288
%290 = fsub float -0.000000e+00, %temp11.0
%291 = call float @llvm.maxnum.f32(float %289, float %290)
%292 = call float @llvm.AMDGPU.lrp(float %15, float %291, float %289)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %106, float %108, float %101, float %80)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp8.0, float %temp9.0, float %292, float %temp11.0)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900
s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
buffer_load_format_xyzw v[11:14], v0, s[12:15], 0 idxen ; E00C2000 80030B00
buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700
s_buffer_load_dword s5, s[0:3], 0x38 ; C2028138
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
buffer_load_format_xyzw v[14:17], v0, s[20:23], 0 idxen ; E00C2000 80050E00
buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000
s_buffer_load_dword s6, s[0:3], 0x39 ; C2030139
s_buffer_load_dword s7, s[0:3], 0x3a ; C203813A
s_buffer_load_dword s8, s[0:3], 0x3c ; C204013C
s_buffer_load_dword s9, s[0:3], 0x3d ; C204813D
s_buffer_load_dword s10, s[0:3], 0x3e ; C205013E
s_buffer_load_dword s11, s[0:3], 0x40 ; C2058140
s_buffer_load_dword s12, s[0:3], 0x41 ; C2060141
s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142
s_buffer_load_dword s16, s[0:3], 0x44 ; C2080144
s_buffer_load_dword s17, s[0:3], 0x45 ; C2088145
s_buffer_load_dword s18, s[0:3], 0x46 ; C2090146
s_buffer_load_dword s19, s[0:3], 0x4c ; C209814C
s_buffer_load_dword s20, s[0:3], 0x4d ; C20A014D
s_buffer_load_dword s21, s[0:3], 0x4e ; C20A814E
s_buffer_load_dword s22, s[0:3], 0x4f ; C20B014F
s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150
s_buffer_load_dword s13, s[0:3], 0x56 ; C2068156
s_buffer_load_dword s14, s[0:3], 0x57 ; C2070157
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_sub_f32_e64 v5, 1.0, s23 ; D2080005 00002EF2
v_mul_f32_e32 v3, s5, v11 ; 10061605
v_mul_f32_e32 v2, s6, v12 ; 10041806
v_mul_f32_e32 v4, s7, v13 ; 10081A07
v_mul_f32_e32 v6, v17, v17 ; 100C2311
v_mad_f32 v10, -v17, v17, 1.0 ; D282000A 23CA2311
v_mul_f32_e32 v11, s8, v3 ; 10160608
v_mac_f32_e32 v11, s11, v2 ; 3E16040B
v_mul_f32_e32 v12, s9, v3 ; 10180609
v_mac_f32_e32 v12, s12, v2 ; 3E18040C
v_mul_f32_e32 v13, s10, v3 ; 101A060A
v_mac_f32_e32 v13, s15, v2 ; 3E1A040F
v_mac_f32_e32 v11, s16, v4 ; 3E160810
v_mac_f32_e32 v12, s17, v4 ; 3E180811
v_mac_f32_e32 v13, s18, v4 ; 3E1A0812
v_mul_f32_e32 v14, v4, v10 ; 101C1504
v_mul_f32_e32 v10, v3, v10 ; 10141503
v_mac_f32_e32 v10, v11, v6 ; 3E140D0B
v_mac_f32_e32 v14, v13, v6 ; 3E1C0D0D
v_mad_f32 v6, 0, v12, v2 ; D2820006 040A1880
v_mul_f32_e32 v4, s19, v10 ; 10081413
v_mac_f32_e32 v4, s20, v6 ; 3E080C14
v_mac_f32_e32 v4, s21, v14 ; 3E081C15
v_add_f32_e32 v4, s22, v4 ; 06080816
v_mad_f32 v11, -v4, s19, v10 ; D282000B 24282704
v_mad_f32 v13, -v4, s20, v6 ; D282000D 24182904
v_mad_f32 v4, -v4, s21, v14 ; D2820004 24382B04
v_mul_f32_e32 v12, v11, v5 ; 10180B0B
v_mac_f32_e32 v12, s23, v10 ; 3E181417
v_mul_f32_e32 v11, v13, v5 ; 10160B0D
v_mac_f32_e32 v11, s23, v6 ; 3E160C17
v_mul_f32_e32 v10, v4, v5 ; 10140B04
v_mac_f32_e32 v10, s23, v14 ; 3E141C17
v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880
s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s5, s[0:3], 0x67 ; C2028167
s_buffer_load_dword s8, s[0:3], 0x62 ; C2040162
s_buffer_load_dword s9, s[0:3], 0x63 ; C2048163
s_buffer_load_dword s10, s[0:3], 0x64 ; C2050164
s_buffer_load_dword s11, s[0:3], 0x65 ; C2058165
s_buffer_load_dword s12, s[0:3], 0x66 ; C2060166
s_buffer_load_dword s15, s[0:3], 0x5d ; C207815D
s_buffer_load_dword s16, s[0:3], 0x5e ; C208015E
s_buffer_load_dword s17, s[0:3], 0x5f ; C208815F
s_buffer_load_dword s18, s[0:3], 0x60 ; C2090160
s_buffer_load_dword s19, s[0:3], 0x61 ; C2098161
s_buffer_load_dword s20, s[0:3], 0x58 ; C20A0158
s_buffer_load_dword s21, s[0:3], 0x59 ; C20A8159
s_buffer_load_dword s22, s[0:3], 0x5a ; C20B015A
s_buffer_load_dword s23, s[0:3], 0x5b ; C20B815B
s_buffer_load_dword s24, s[0:3], 0x5c ; C20C015C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s20, v12 ; 100A1814
v_mul_f32_e32 v6, s21, v12 ; 100C1815
v_mul_f32_e32 v15, s22, v12 ; 101E1816
v_mul_f32_e32 v17, s23, v12 ; 10221817
v_mac_f32_e32 v5, s24, v11 ; 3E0A1618
v_mac_f32_e32 v6, s15, v11 ; 3E0C160F
v_mac_f32_e32 v15, s16, v11 ; 3E1E1610
v_mac_f32_e32 v17, s17, v11 ; 3E221611
v_mac_f32_e32 v5, s18, v10 ; 3E0A1412
v_mac_f32_e32 v6, s19, v10 ; 3E0C1413
v_mac_f32_e32 v15, s8, v10 ; 3E1E1408
v_mac_f32_e32 v17, s9, v10 ; 3E221409
v_add_f32_e32 v13, s10, v5 ; 061A0A0A
v_add_f32_e32 v14, s11, v6 ; 061C0C0B
v_add_f32_e32 v16, s12, v15 ; 06201E0C
v_add_f32_e32 v15, s5, v17 ; 061E2205
s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_buffer_load_dword s12, s[0:3], 0x3f ; C206013F
s_buffer_load_dword s9, s[0:3], 0x43 ; C2048143
s_buffer_load_dword s10, s[0:3], 0x54 ; C2050154
s_buffer_load_dword s11, s[0:3], 0x55 ; C2058155
v_mov_b32_e32 v5, s13 ; 7E0A020D
v_mov_b32_e32 v6, s14 ; 7E0C020E
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[6:7] ; 89FE067E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s13, s[0:3], 0x37 ; C2068137
s_buffer_load_dword s14, s[0:3], 0x32 ; C2070132
s_buffer_load_dword s15, s[0:3], 0x33 ; C2078133
s_buffer_load_dword s16, s[0:3], 0x34 ; C2080134
s_buffer_load_dword s17, s[0:3], 0x35 ; C2088135
s_buffer_load_dword s18, s[0:3], 0x36 ; C2090136
s_buffer_load_dword s19, s[0:3], 0x2d ; C209812D
s_buffer_load_dword s20, s[0:3], 0x2e ; C20A012E
s_buffer_load_dword s21, s[0:3], 0x2f ; C20A812F
s_buffer_load_dword s22, s[0:3], 0x30 ; C20B0130
s_buffer_load_dword s23, s[0:3], 0x31 ; C20B8131
s_buffer_load_dword s24, s[0:3], 0x28 ; C20C0128
s_buffer_load_dword s25, s[0:3], 0x29 ; C20C8129
s_buffer_load_dword s26, s[0:3], 0x2a ; C20D012A
s_buffer_load_dword s27, s[0:3], 0x2b ; C20D812B
s_buffer_load_dword s28, s[0:3], 0x2c ; C20E012C
s_buffer_load_dword s29, s[0:3], 0x1d ; C20E811D
s_buffer_load_dword s30, s[0:3], 0x1e ; C20F011E
s_buffer_load_dword s31, s[0:3], 0x20 ; C20F8120
s_buffer_load_dword s32, s[0:3], 0x21 ; C2100121
s_buffer_load_dword s33, s[0:3], 0x22 ; C2108122
s_buffer_load_dword s34, s[0:3], 0x16 ; C2110116
s_buffer_load_dword s35, s[0:3], 0x18 ; C2118118
s_buffer_load_dword s36, s[0:3], 0x19 ; C2120119
s_buffer_load_dword s37, s[0:3], 0x1a ; C212811A
s_buffer_load_dword s38, s[0:3], 0x1c ; C213011C
s_buffer_load_dword s39, s[0:3], 0x10 ; C2138110
s_buffer_load_dword s40, s[0:3], 0x11 ; C2140111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v13, s31, v7 ; 101A0E1F
v_mac_f32_e32 v13, s32, v8 ; 3E1A1020
v_mac_f32_e32 v13, s33, v9 ; 3E1A1221
s_buffer_load_dword s31, s[0:3], 0x12 ; C20F8112
v_mul_f32_e32 v14, s35, v7 ; 101C0E23
v_mac_f32_e32 v14, s36, v8 ; 3E1C1024
v_mac_f32_e32 v14, s37, v9 ; 3E1C1225
v_mul_f32_e32 v7, s38, v7 ; 100E0E26
v_mac_f32_e32 v7, s29, v8 ; 3E0E101D
v_mac_f32_e32 v7, s30, v9 ; 3E0E121E
s_buffer_load_dword s29, s[0:3], 0x14 ; C20E8114
s_buffer_load_dword s30, s[0:3], 0x15 ; C20F0115
s_buffer_load_dword s32, s[0:3], 0x9 ; C2100109
s_buffer_load_dword s33, s[0:3], 0xa ; C210810A
s_buffer_load_dword s35, s[0:3], 0xc ; C211810C
s_buffer_load_dword s36, s[0:3], 0xd ; C212010D
s_buffer_load_dword s37, s[0:3], 0xe ; C212810E
s_buffer_load_dword s38, s[0:3], 0x0 ; C2130100
v_mul_f32_e32 v8, v14, v14 ; 10101D0E
v_mac_f32_e32 v8, v7, v7 ; 3E100F07
v_mac_f32_e32 v8, v13, v13 ; 3E101B0D
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v9, s32, v12 ; 10121820
v_mul_f32_e32 v15, s33, v12 ; 101E1821
s_buffer_load_dword s32, s[0:3], 0x1 ; C2100101
v_mac_f32_e32 v9, s36, v11 ; 3E121624
v_mac_f32_e32 v15, s37, v11 ; 3E1E1625
v_mac_f32_e32 v9, s40, v10 ; 3E121428
v_mac_f32_e32 v15, s31, v10 ; 3E1E141F
v_add_f32_e32 v9, s30, v9 ; 0612121E
v_add_f32_e32 v15, s34, v15 ; 061E1E22
s_buffer_load_dword s30, s[0:3], 0x2 ; C20F0102
s_buffer_load_dword s31, s[0:3], 0x3 ; C20F8103
s_buffer_load_dword s33, s[0:3], 0x8 ; C2108108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v16, s31 ; 7E20021F
v_mad_f32 v17, -v9, v16, s32 ; D2820011 20822109
v_mul_f32_e32 v12, s33, v12 ; 10181821
v_mac_f32_e32 v12, s35, v11 ; 3E181623
v_mac_f32_e32 v12, s39, v10 ; 3E181427
v_add_f32_e32 v10, s29, v12 ; 0614181D
v_mad_f32 v11, -v10, v16, s38 ; D282000B 209A210A
v_mad_f32 v12, -v15, v16, s30 ; D282000C 207A210F
v_mul_f32_e32 v16, v11, v11 ; 1020170B
v_mac_f32_e32 v16, v17, v17 ; 3E202311
v_mac_f32_e32 v16, v12, v12 ; 3E20190C
v_rsq_clamp_f32_e32 v16, v16 ; 7E205910
v_mul_f32_e32 v14, v8, v14 ; 101C1D08
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_f32_e32 v8, v8, v13 ; 10101B08
v_mul_f32_e32 v11, v16, v11 ; 10161710
v_mul_f32_e32 v13, v16, v17 ; 101A2310
v_mul_f32_e32 v12, v16, v12 ; 10181910
v_mul_f32_e32 v11, v11, v14 ; 10161D0B
v_mac_f32_e32 v11, v13, v7 ; 3E160F0D
v_mac_f32_e32 v11, v12, v8 ; 3E16110C
v_mad_f32 v11, -v11, v11, 1.0 ; D282000B 23CA170B
v_sqrt_f32_e32 v11, v11 ; 7E16670B
v_mul_f32_e32 v11, s4, v11 ; 10161604
v_mad_f32 v10, -v14, v11, v10 ; D282000A 242A170E
v_mad_f32 v7, -v7, v11, v9 ; D2820007 24261707
v_mad_f32 v8, -v8, v11, v15 ; D2820008 243E1708
v_mul_f32_e32 v9, s24, v10 ; 10121418
v_mul_f32_e32 v11, s25, v10 ; 10161419
v_mul_f32_e32 v12, s26, v10 ; 1018141A
v_mul_f32_e32 v10, s27, v10 ; 1014141B
v_mac_f32_e32 v9, s28, v7 ; 3E120E1C
v_mac_f32_e32 v11, s19, v7 ; 3E160E13
v_mac_f32_e32 v12, s20, v7 ; 3E180E14
v_mac_f32_e32 v10, s21, v7 ; 3E140E15
v_mac_f32_e32 v9, s22, v8 ; 3E121016
v_mac_f32_e32 v11, s23, v8 ; 3E161017
v_mac_f32_e32 v12, s14, v8 ; 3E18100E
v_mac_f32_e32 v10, s15, v8 ; 3E14100F
v_add_f32_e32 v13, s16, v9 ; 061A1210
v_add_f32_e32 v14, s17, v11 ; 061C1611
v_add_f32_e32 v16, s18, v12 ; 06201812
v_add_f32_e32 v15, s13, v10 ; 061E140D
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_mul_f32_e32 v3, s12, v3 ; 1006060C
v_rcp_f32_e32 v7, v15 ; 7E0E550F
v_mac_f32_e32 v3, s9, v2 ; 3E060409
v_mac_f32_e32 v5, s10, v0 ; 3E0A000A
v_mac_f32_e32 v6, s11, v1 ; 3E0C020B
v_mul_f32_e32 v0, s8, v7 ; 10000E08
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_add_f32_e32 v0, v0, v16 ; 06002100
v_max_f32_e64 v1, v0, -v15 ; D2200001 40021F00
v_sub_f32_e64 v2, 1.0, s5 ; D2080002 00000AF2
v_mul_f32_e32 v0, v0, v2 ; 10000500
v_mac_f32_e32 v0, s5, v1 ; 3E000205
exp 15, 32, 0, 0, 0, v5, v6, v4, v3 ; F800020F 03040605
exp 15, 12, 0, 1, 0, v13, v14, v0, v15 ; F80008CF 0F000E0D
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 20
Code Size: 1048 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[1]
DCL TEMP[0], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D
2: FSLT TEMP[0].x, TEMP[0].wwww, CONST[1].xxxx
3: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
4: KILL_IF -TEMP[0].xxxx
5: MOV OUT[0], IMM[0].yyyy
6: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%25 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%26 = load <32 x i8>, <32 x i8> addrspace(2)* %25, align 32, !tbaa !0
%27 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%28 = load <16 x i8>, <16 x i8> addrspace(2)* %27, align 16, !tbaa !0
%29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%31 = bitcast float %29 to i32
%32 = bitcast float %30 to i32
%33 = insertelement <2 x i32> undef, i32 %31, i32 0
%34 = insertelement <2 x i32> %33, i32 %32, i32 1
%35 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %26, <16 x i8> %28, i32 2)
%36 = extractelement <4 x float> %35, i32 3
%37 = fcmp olt float %36, %24
%38 = select i1 %37, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %38)
%39 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%40 = bitcast i32 %39 to float
%41 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%42 = bitcast i32 %41 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800800 00230002
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_gt_f32_e32 vcc, s0, v0 ; 7C080000
v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 92 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..24]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, 0.0000}
0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[13].xyzz
1: MUL TEMP[1].x, IN[2].wwww, IN[2].wwww
2: MOV TEMP[2].y, IMM[0].xxxx
3: MOV TEMP[2].x, TEMP[1].xxxx
4: MOV TEMP[2].z, TEMP[1].xxxx
5: MUL TEMP[1], CONST[14], TEMP[0].xxxx
6: MAD TEMP[1], CONST[15], TEMP[0].yyyy, TEMP[1]
7: MAD TEMP[1].xyz, CONST[16], TEMP[0].zzzz, TEMP[1]
8: LRP TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz
9: DP3 TEMP[1].x, CONST[18].xyzz, TEMP[0].xyzz
10: ADD TEMP[1].x, TEMP[1].xxxx, CONST[18].wwww
11: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[18].xyzz
12: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
13: LRP TEMP[0].xyz, CONST[19].xxxx, TEMP[0].xyzz, TEMP[1].xyzz
14: MUL TEMP[1], CONST[21], TEMP[0].xxxx
15: MAD TEMP[1], CONST[22], TEMP[0].yyyy, TEMP[1]
16: MAD TEMP[0], CONST[23], TEMP[0].zzzz, TEMP[1]
17: ADD TEMP[0], TEMP[0], CONST[24]
18: MUL TEMP[1].xyw, TEMP[0], IMM[0].zzzz
19: MOV TEMP[2].x, TEMP[1].xxxx
20: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
21: MOV TEMP[2].y, TEMP[3].xxxx
22: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
23: MOV TEMP[1].zw, TEMP[0].wwzw
24: MOV TEMP[2].x, CONST[8].xxxx
25: MOV TEMP[2].y, CONST[9].xxxx
26: MOV TEMP[2].z, CONST[10].xxxx
27: MOV TEMP[3].x, CONST[8].yyyy
28: MOV TEMP[3].y, CONST[9].yyyy
29: MOV TEMP[3].z, CONST[10].yyyy
30: MOV TEMP[4].x, CONST[8].zzzz
31: MOV TEMP[4].y, CONST[9].zzzz
32: MOV TEMP[4].z, CONST[10].zzzz
33: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx
34: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz
35: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz
36: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
37: RSQ TEMP[3].x, TEMP[3].xxxx
38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
39: MOV TEMP[3].w, IMM[0].yyyy
40: MOV TEMP[3].xyz, TEMP[2].xyzx
41: DP4 TEMP[4].x, CONST[1], TEMP[3]
42: DP4 TEMP[5].x, CONST[2], TEMP[3]
43: MOV TEMP[4].y, TEMP[5].xxxx
44: DP4 TEMP[3].x, CONST[3], TEMP[3]
45: MOV TEMP[4].z, TEMP[3].xxxx
46: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx
47: DP4 TEMP[5].x, CONST[4], TEMP[3]
48: DP4 TEMP[6].x, CONST[5], TEMP[3]
49: MOV TEMP[5].y, TEMP[6].xxxx
50: DP4 TEMP[3].x, CONST[6], TEMP[3]
51: MOV TEMP[5].z, TEMP[3].xxxx
52: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy
53: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx
54: MAD TEMP[2].xyz, CONST[7].xyzz, TEMP[2].xxxx, TEMP[5].xyzz
55: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz
56: MAD TEMP[3].xy, IN[3].xyyy, CONST[20].xyyy, CONST[20].zwww
57: MOV TEMP[3].w, TEMP[2].xxxx
58: MOV TEMP[2].xy, TEMP[2].yzyy
59: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww
60: MOV TEMP[3].z, TEMP[4].xxxx
61: MOV OUT[1], TEMP[1]
62: MOV OUT[3], TEMP[2]
63: MOV OUT[0], TEMP[0]
64: MOV OUT[2], TEMP[3]
65: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372)
%83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = extractelement <4 x float> %99, i32 2
%103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = add i32 %5, %7
%106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105)
%107 = extractelement <4 x float> %106, i32 3
%108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0
%110 = add i32 %5, %7
%111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = fmul float %93, %52
%115 = fmul float %94, %53
%116 = fmul float %95, %54
%117 = fmul float %107, %107
%118 = fmul float %55, %114
%119 = fmul float %56, %114
%120 = fmul float %57, %114
%121 = fmul float %58, %115
%122 = fadd float %121, %118
%123 = fmul float %59, %115
%124 = fadd float %123, %119
%125 = fmul float %60, %115
%126 = fadd float %125, %120
%127 = fmul float %61, %116
%128 = fadd float %127, %122
%129 = fmul float %62, %116
%130 = fadd float %129, %124
%131 = fmul float %63, %116
%132 = fadd float %131, %126
%133 = call float @llvm.AMDGPU.lrp(float %117, float %128, float %114)
%134 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %130, float %115)
%135 = call float @llvm.AMDGPU.lrp(float %117, float %132, float %116)
%136 = fmul float %64, %133
%137 = fmul float %65, %134
%138 = fadd float %137, %136
%139 = fmul float %66, %135
%140 = fadd float %138, %139
%141 = fadd float %140, %67
%142 = fmul float %141, %64
%143 = fmul float %141, %65
%144 = fmul float %141, %66
%145 = fsub float %133, %142
%146 = fsub float %134, %143
%147 = fsub float %135, %144
%148 = call float @llvm.AMDGPU.lrp(float %68, float %133, float %145)
%149 = call float @llvm.AMDGPU.lrp(float %68, float %134, float %146)
%150 = call float @llvm.AMDGPU.lrp(float %68, float %135, float %147)
%151 = fmul float %73, %148
%152 = fmul float %74, %148
%153 = fmul float %75, %148
%154 = fmul float %76, %148
%155 = fmul float %77, %149
%156 = fadd float %155, %151
%157 = fmul float %78, %149
%158 = fadd float %157, %152
%159 = fmul float %79, %149
%160 = fadd float %159, %153
%161 = fmul float %80, %149
%162 = fadd float %161, %154
%163 = fmul float %81, %150
%164 = fadd float %163, %156
%165 = fmul float %82, %150
%166 = fadd float %165, %158
%167 = fmul float %83, %150
%168 = fadd float %167, %160
%169 = fmul float %84, %150
%170 = fadd float %169, %162
%171 = fadd float %164, %85
%172 = fadd float %166, %86
%173 = fadd float %168, %87
%174 = fadd float %170, %88
%175 = fmul float %171, 5.000000e-01
%176 = fmul float %172, 5.000000e-01
%177 = fmul float %174, 5.000000e-01
%178 = fmul float %176, %13
%179 = fadd float %175, %177
%180 = fadd float %178, %177
%181 = fmul float %41, %100
%182 = fmul float %44, %100
%183 = fmul float %47, %100
%184 = fmul float %42, %101
%185 = fadd float %184, %181
%186 = fmul float %45, %101
%187 = fadd float %186, %182
%188 = fmul float %48, %101
%189 = fadd float %188, %183
%190 = fmul float %43, %102
%191 = fadd float %190, %185
%192 = fmul float %46, %102
%193 = fadd float %192, %187
%194 = fmul float %49, %102
%195 = fadd float %194, %189
%196 = fmul float %191, %191
%197 = fmul float %193, %193
%198 = fadd float %197, %196
%199 = fmul float %195, %195
%200 = fadd float %198, %199
%201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200)
%202 = fmul float %191, %201
%203 = fmul float %193, %201
%204 = fmul float %195, %201
%205 = fmul float %14, %202
%206 = fmul float %15, %203
%207 = fadd float %205, %206
%208 = fmul float %16, %204
%209 = fadd float %207, %208
%210 = fadd float %209, %17
%211 = fmul float %18, %202
%212 = fmul float %19, %203
%213 = fadd float %211, %212
%214 = fmul float %20, %204
%215 = fadd float %213, %214
%216 = fadd float %215, %21
%217 = fmul float %22, %202
%218 = fmul float %23, %203
%219 = fadd float %217, %218
%220 = fmul float %24, %204
%221 = fadd float %219, %220
%222 = fadd float %221, %25
%223 = fmul float %202, %203
%224 = fmul float %203, %204
%225 = fmul float %204, %204
%226 = fmul float %204, %202
%227 = fmul float %26, %223
%228 = fmul float %27, %224
%229 = fadd float %227, %228
%230 = fmul float %28, %225
%231 = fadd float %229, %230
%232 = fmul float %29, %226
%233 = fadd float %231, %232
%234 = fmul float %30, %223
%235 = fmul float %31, %224
%236 = fadd float %234, %235
%237 = fmul float %32, %225
%238 = fadd float %236, %237
%239 = fmul float %33, %226
%240 = fadd float %238, %239
%241 = fmul float %34, %223
%242 = fmul float %35, %224
%243 = fadd float %241, %242
%244 = fmul float %36, %225
%245 = fadd float %243, %244
%246 = fmul float %37, %226
%247 = fadd float %245, %246
%248 = fmul float %203, %203
%249 = fmul float %202, %202
%250 = fsub float %249, %248
%251 = fmul float %38, %250
%252 = fadd float %251, %233
%253 = fmul float %39, %250
%254 = fadd float %253, %240
%255 = fmul float %40, %250
%256 = fadd float %255, %247
%257 = fadd float %252, %210
%258 = fadd float %254, %216
%259 = fadd float %256, %222
%260 = fmul float %112, %69
%261 = fadd float %260, %71
%262 = fmul float %113, %70
%263 = fadd float %262, %72
%264 = fmul float %173, %50
%265 = fadd float %264, %51
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %179, float %180, float %173, float %174)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %261, float %263, float %265, float %257)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %258, float %259, float %259, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %171, float %172, float %173, float %174)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[68:71], s[2:3], 0x0 ; C0A20300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s35, s[68:71], 0x29 ; C211C529
buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s25, s[68:71], 0x2a ; C20CC52A
s_buffer_load_dword s0, s[68:71], 0x32 ; C2004532
s_buffer_load_dword s4, s[68:71], 0x33 ; C2024533
s_buffer_load_dword s67, s[68:71], 0x34 ; C221C534
s_buffer_load_dword s2, s[68:71], 0x1c ; C201451C
s_buffer_load_dword s3, s[68:71], 0x1d ; C201C51D
s_buffer_load_dword s1, s[68:71], 0x1e ; C200C51E
s_buffer_load_dword s72, s[68:71], 0x20 ; C2244520
s_buffer_load_dword s45, s[68:71], 0x21 ; C216C521
s_buffer_load_dword s73, s[68:71], 0x35 ; C224C535
s_buffer_load_dword s74, s[68:71], 0x36 ; C2254536
s_buffer_load_dword s51, s[68:71], 0x38 ; C219C538
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s4 ; 7E000204
s_buffer_load_dword s56, s[68:71], 0x39 ; C21C4539
s_buffer_load_dword s53, s[68:71], 0x3a ; C21AC53A
s_buffer_load_dword s44, s[68:71], 0x22 ; C2164522
s_buffer_load_dword s75, s[68:71], 0x24 ; C225C524
s_buffer_load_dword s55, s[68:71], 0x25 ; C21BC525
s_buffer_load_dword s46, s[68:71], 0x26 ; C2174526
s_buffer_load_dword s76, s[68:71], 0x28 ; C2264528
s_buffer_load_dword s57, s[68:71], 0x3c ; C21CC53C
s_buffer_load_dword s58, s[68:71], 0x3d ; C21D453D
s_buffer_load_dword s54, s[68:71], 0x3e ; C21B453E
s_buffer_load_dword s50, s[68:71], 0x40 ; C2194540
s_buffer_load_dword s47, s[68:71], 0x41 ; C217C541
s_buffer_load_dword s48, s[68:71], 0x42 ; C2184542
s_buffer_load_dword s38, s[68:71], 0x48 ; C2134548
s_buffer_load_dword s39, s[68:71], 0x49 ; C213C549
s_buffer_load_dword s36, s[68:71], 0x4a ; C212454A
s_buffer_load_dword s41, s[68:71], 0x4b ; C214C54B
s_buffer_load_dword s24, s[68:71], 0x4c ; C20C454C
s_buffer_load_dword s60, s[68:71], 0x50 ; C21E4550
s_buffer_load_dword s59, s[68:71], 0x51 ; C21DC551
s_buffer_load_dword s5, s[68:71], 0x52 ; C202C552
s_buffer_load_dword s6, s[68:71], 0x53 ; C2034553
s_buffer_load_dword s33, s[68:71], 0x54 ; C210C554
s_buffer_load_dword s28, s[68:71], 0x55 ; C20E4555
s_buffer_load_dword s29, s[68:71], 0x56 ; C20EC556
s_buffer_load_dword s26, s[68:71], 0x57 ; C20D4557
s_buffer_load_dword s27, s[68:71], 0x58 ; C20DC558
s_buffer_load_dword s9, s[68:71], 0xd ; C204C50D
s_buffer_load_dword s8, s[68:71], 0xe ; C204450E
s_buffer_load_dword s4, s[68:71], 0xf ; C202450F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v7, s5 ; 7E0E0205
s_buffer_load_dword s17, s[68:71], 0x10 ; C208C510
v_mov_b32_e32 v8, s6 ; 7E100206
s_buffer_load_dword s22, s[68:71], 0x11 ; C20B4511
s_buffer_load_dword s34, s[68:71], 0x59 ; C2114559
s_buffer_load_dword s30, s[68:71], 0x5a ; C20F455A
s_buffer_load_dword s31, s[68:71], 0x5b ; C20FC55B
s_buffer_load_dword s19, s[68:71], 0x5c ; C209C55C
s_buffer_load_dword s20, s[68:71], 0x5d ; C20A455D
s_buffer_load_dword s6, s[68:71], 0x0 ; C2034500
s_buffer_load_dword s11, s[68:71], 0x4 ; C205C504
s_buffer_load_dword s13, s[68:71], 0x5 ; C206C505
s_buffer_load_dword s10, s[68:71], 0x6 ; C2054506
s_buffer_load_dword s7, s[68:71], 0x7 ; C203C507
s_buffer_load_dword s14, s[68:71], 0x8 ; C2074508
s_buffer_load_dword s15, s[68:71], 0x9 ; C207C509
s_buffer_load_dword s12, s[68:71], 0xa ; C206450A
s_buffer_load_dword s5, s[68:71], 0xb ; C202C50B
s_buffer_load_dword s16, s[68:71], 0xc ; C208450C
s_buffer_load_dword s32, s[68:71], 0x12 ; C2104512
s_buffer_load_dword s18, s[68:71], 0x13 ; C2094513
s_buffer_load_dword s42, s[68:71], 0x14 ; C2154514
s_buffer_load_dword s49, s[68:71], 0x15 ; C218C515
s_buffer_load_dword s37, s[68:71], 0x16 ; C212C516
s_buffer_load_dword s21, s[68:71], 0x17 ; C20AC517
s_buffer_load_dword s43, s[68:71], 0x18 ; C215C518
s_buffer_load_dword s52, s[68:71], 0x19 ; C21A4519
s_buffer_load_dword s40, s[68:71], 0x1a ; C214451A
s_buffer_load_dword s23, s[68:71], 0x1b ; C20BC51B
s_buffer_load_dword s65, s[68:71], 0x5e ; C220C55E
s_buffer_load_dword s66, s[68:71], 0x5f ; C221455F
s_buffer_load_dword s62, s[68:71], 0x60 ; C21F4560
s_buffer_load_dword s63, s[68:71], 0x61 ; C21FC561
s_buffer_load_dword s61, s[68:71], 0x62 ; C21EC562
s_buffer_load_dword s64, s[68:71], 0x63 ; C2204563
v_mul_f32_e32 v1, s67, v1 ; 10020243
v_mul_f32_e32 v2, s73, v2 ; 10040449
v_mul_f32_e32 v3, s74, v3 ; 1006064A
v_mul_f32_e32 v9, s72, v4 ; 10120848
v_mul_f32_e32 v13, s75, v4 ; 101A084B
v_mul_f32_e32 v4, s76, v4 ; 1008084C
v_mac_f32_e32 v7, s60, v11 ; 3E0E163C
v_mac_f32_e32 v8, s59, v12 ; 3E10183B
v_mul_f32_e32 v11, s51, v1 ; 10160233
v_mul_f32_e32 v12, s56, v1 ; 10180238
v_mul_f32_e32 v14, s53, v1 ; 101C0235
v_mac_f32_e32 v9, s45, v5 ; 3E120A2D
v_mac_f32_e32 v13, s55, v5 ; 3E1A0A37
v_mac_f32_e32 v4, s35, v5 ; 3E080A23
v_mac_f32_e32 v11, s57, v2 ; 3E160439
v_mac_f32_e32 v12, s58, v2 ; 3E18043A
v_mac_f32_e32 v14, s54, v2 ; 3E1C0436
v_mac_f32_e32 v9, s44, v6 ; 3E120C2C
v_mac_f32_e32 v13, s46, v6 ; 3E1A0C2E
v_mac_f32_e32 v4, s25, v6 ; 3E080C19
v_mac_f32_e32 v11, s50, v3 ; 3E160632
v_mac_f32_e32 v12, s47, v3 ; 3E18062F
v_mac_f32_e32 v14, s48, v3 ; 3E1C0630
v_mul_f32_e32 v5, v10, v10 ; 100A150A
v_mad_f32 v6, -v10, v10, 1.0 ; D2820006 23CA150A
v_mul_f32_e32 v1, v1, v6 ; 10020D01
v_mul_f32_e32 v3, v3, v6 ; 10060D03
v_mac_f32_e32 v1, v11, v5 ; 3E020B0B
v_mac_f32_e32 v3, v14, v5 ; 3E060B0E
v_mac_f32_e32 v2, 0, v12 ; 3E041880
v_mul_f32_e32 v5, s38, v1 ; 100A0226
v_mac_f32_e32 v5, s39, v2 ; 3E0A0427
v_mac_f32_e32 v5, s36, v3 ; 3E0A0624
v_add_f32_e32 v5, s41, v5 ; 060A0A29
v_mad_f32 v6, -v5, s38, v1 ; D2820006 24044D05
v_mad_f32 v10, -v5, s39, v2 ; D282000A 24084F05
v_mad_f32 v5, -v5, s36, v3 ; D2820005 240C4905
v_sub_f32_e64 v11, 1.0, s24 ; D208000B 000030F2
v_mul_f32_e32 v6, v6, v11 ; 100C1706
v_mul_f32_e32 v10, v10, v11 ; 1014170A
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mac_f32_e32 v6, s24, v1 ; 3E0C0218
v_mac_f32_e32 v10, s24, v2 ; 3E140418
v_mac_f32_e32 v5, s24, v3 ; 3E0A0618
v_mul_f32_e32 v1, s33, v6 ; 10020C21
v_mul_f32_e32 v2, s28, v6 ; 10040C1C
v_mul_f32_e32 v3, s29, v6 ; 10060C1D
v_mul_f32_e32 v6, s26, v6 ; 100C0C1A
v_mac_f32_e32 v1, s27, v10 ; 3E02141B
v_mul_f32_e32 v11, v9, v9 ; 10161309
v_mac_f32_e32 v11, v13, v13 ; 3E161B0D
v_mac_f32_e32 v11, v4, v4 ; 3E160904
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v2, s34, v10 ; 3E041422
v_mac_f32_e32 v3, s30, v10 ; 3E06141E
v_mac_f32_e32 v6, s31, v10 ; 3E0C141F
v_mul_f32_e32 v10, v11, v13 ; 10141B0B
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mul_f32_e32 v12, v4, v10 ; 10181504
v_mul_f32_e32 v13, s22, v12 ; 101A1816
v_mul_f32_e32 v14, s49, v12 ; 101C1831
v_mul_f32_e32 v12, s52, v12 ; 10181834
v_mul_f32_e32 v9, v11, v9 ; 1012130B
v_mul_f32_e32 v11, v10, v9 ; 1016130A
v_mac_f32_e32 v13, s17, v11 ; 3E1A1611
v_mac_f32_e32 v14, s42, v11 ; 3E1C162A
v_mac_f32_e32 v12, s43, v11 ; 3E18162B
v_mac_f32_e32 v1, s19, v5 ; 3E020A13
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mac_f32_e32 v3, s65, v5 ; 3E060A41
v_mac_f32_e32 v6, s66, v5 ; 3E0C0A42
v_mul_f32_e32 v5, v4, v4 ; 100A0904
v_mac_f32_e32 v13, s32, v5 ; 3E1A0A20
v_mac_f32_e32 v14, s37, v5 ; 3E1C0A25
v_mac_f32_e32 v12, s40, v5 ; 3E180A28
v_mul_f32_e32 v5, s13, v10 ; 100A140D
v_mac_f32_e32 v5, s11, v9 ; 3E0A120B
v_mul_f32_e32 v11, s15, v10 ; 1016140F
v_mac_f32_e32 v11, s14, v9 ; 3E16120E
v_mul_f32_e32 v15, s9, v10 ; 101E1409
v_mac_f32_e32 v15, s16, v9 ; 3E1E1210
v_mac_f32_e32 v5, s10, v4 ; 3E0A080A
v_mac_f32_e32 v11, s12, v4 ; 3E16080C
v_mac_f32_e32 v15, s8, v4 ; 3E1E0808
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mac_f32_e32 v13, s18, v4 ; 3E1A0812
v_mac_f32_e32 v14, s21, v4 ; 3E1C0815
v_mac_f32_e32 v12, s23, v4 ; 3E180817
v_mul_f32_e32 v4, v10, v10 ; 1008150A
v_mad_f32 v4, v9, v9, -v4 ; D2820004 84121309
v_mac_f32_e32 v13, s2, v4 ; 3E1A0802
v_mac_f32_e32 v14, s3, v4 ; 3E1C0803
v_mac_f32_e32 v12, s1, v4 ; 3E180801
v_add_f32_e32 v1, s62, v1 ; 0602023E
v_add_f32_e32 v2, s63, v2 ; 0604043F
v_add_f32_e32 v4, s64, v6 ; 06080C40
v_mul_f32_e32 v6, 0.5, v2 ; 100C04F0
v_mul_f32_e32 v9, 0.5, v4 ; 101208F0
v_mad_f32 v10, 0.5, v1, v9 ; D282000A 042602F0
v_mac_f32_e32 v9, s6, v6 ; 3E120C06
v_add_f32_e32 v5, s7, v5 ; 060A0A07
v_add_f32_e32 v3, s61, v3 ; 0606063D
v_mac_f32_e32 v0, s0, v3 ; 3E000600
v_add_f32_e32 v5, v5, v13 ; 060A1B05
exp 15, 32, 0, 0, 0, v10, v9, v3, v4 ; F800020F 0403090A
exp 15, 33, 0, 0, 0, v7, v8, v0, v5 ; F800021F 05000807
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, s5, v11 ; 06001605
v_add_f32_e32 v5, s4, v15 ; 060A1E04
v_add_f32_e32 v0, v0, v14 ; 06001D00
v_add_f32_e32 v5, v5, v12 ; 060A1905
v_mov_b32_e32 v6, 0 ; 7E0C0280
exp 15, 34, 0, 0, 0, v0, v5, v5, v6 ; F800022F 06050500
exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 80
VGPRS: 16
Code Size: 900 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..2]
DCL CONST[4]
DCL CONST[6]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].wwww
1: MOV TEMP[0].yz, IN[2].yxyy
2: MOV TEMP[1].xy, IN[1].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: FSLT TEMP[2].x, TEMP[1].wwww, CONST[6].xxxx
5: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx
6: KILL_IF -TEMP[2].xxxx
7: MOV TEMP[2].xy, IN[0].xyyy
8: MOV TEMP[2].w, IN[0].wwww
9: TXP TEMP[2], TEMP[2], SAMP[1], 2D
10: LG2 TEMP[3].x, TEMP[2].xxxx
11: LG2 TEMP[3].y, TEMP[2].yyyy
12: LG2 TEMP[3].z, TEMP[2].zzzz
13: LG2 TEMP[3].w, TEMP[2].wwww
14: MOV TEMP[2], -TEMP[3]
15: ADD TEMP[0].xyz, TEMP[2].xyzz, TEMP[0].xyzz
16: MUL TEMP[2].xyz, TEMP[2].wwww, CONST[2]
17: MUL TEMP[3].xyz, TEMP[1].xyzz, CONST[4].xyzz
18: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[0].xyzz
19: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xyzz, TEMP[3].xyzz
20: MAD TEMP[1].x, TEMP[2].xyzz, CONST[1].wwww, TEMP[1].wwww
21: MOV TEMP[1].w, TEMP[1].xxxx
22: MOV_SAT TEMP[2].x, IN[1].zzzz
23: LRP TEMP[1].xyz, TEMP[2].xxxx, TEMP[0].xyzz, CONST[0].xyzz
24: MOV OUT[0], TEMP[1]
25: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%35 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%36 = load <32 x i8>, <32 x i8> addrspace(2)* %35, align 32, !tbaa !0
%37 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%40 = bitcast <8 x i32> addrspace(2)* %39 to <32 x i8> addrspace(2)*
%41 = load <32 x i8>, <32 x i8> addrspace(2)* %40, align 32, !tbaa !0
%42 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%43 = bitcast <4 x i32> addrspace(2)* %42 to <16 x i8> addrspace(2)*
%44 = load <16 x i8>, <16 x i8> addrspace(2)* %43, align 16, !tbaa !0
%45 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%46 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%47 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%48 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%49 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%50 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%51 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%52 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%53 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%54 = bitcast float %48 to i32
%55 = bitcast float %49 to i32
%56 = insertelement <2 x i32> undef, i32 %54, i32 0
%57 = insertelement <2 x i32> %56, i32 %55, i32 1
%58 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %36, <16 x i8> %38, i32 2)
%59 = extractelement <4 x float> %58, i32 0
%60 = extractelement <4 x float> %58, i32 1
%61 = extractelement <4 x float> %58, i32 2
%62 = extractelement <4 x float> %58, i32 3
%63 = fcmp olt float %62, %34
%64 = select i1 %63, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %64)
%65 = fdiv float %45, %47
%66 = fdiv float %46, %47
%67 = bitcast float %65 to i32
%68 = bitcast float %66 to i32
%69 = insertelement <2 x i32> undef, i32 %67, i32 0
%70 = insertelement <2 x i32> %69, i32 %68, i32 1
%71 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %70, <32 x i8> %41, <16 x i8> %44, i32 2)
%72 = extractelement <4 x float> %71, i32 0
%73 = extractelement <4 x float> %71, i32 1
%74 = extractelement <4 x float> %71, i32 2
%75 = extractelement <4 x float> %71, i32 3
%76 = call float @llvm.log2.f32(float %72)
%77 = call float @llvm.log2.f32(float %73)
%78 = call float @llvm.log2.f32(float %74)
%79 = call float @llvm.log2.f32(float %75)
%80 = fsub float -0.000000e+00, %79
%81 = fsub float %51, %76
%82 = fsub float %52, %77
%83 = fsub float %53, %78
%84 = fmul float %28, %80
%85 = fmul float %29, %80
%86 = fmul float %30, %80
%87 = fmul float %59, %31
%88 = fmul float %60, %32
%89 = fmul float %61, %33
%90 = fmul float %87, %81
%91 = fmul float %88, %82
%92 = fmul float %89, %83
%93 = fmul float %81, %84
%94 = fadd float %93, %90
%95 = fmul float %82, %85
%96 = fadd float %95, %91
%97 = fmul float %83, %86
%98 = fadd float %97, %92
%99 = fmul float %84, %27
%100 = fadd float %99, %62
%101 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00)
%102 = call float @llvm.AMDGPU.lrp(float %101, float %94, float %24)
%103 = call float @llvm.AMDGPU.lrp(float %101, float %96, float %25)
%104 = call float @llvm.AMDGPU.lrp(float %101, float %98, float %26)
%105 = call i32 @llvm.SI.packf16(float %102, float %103)
%106 = bitcast i32 %105 to float
%107 = call i32 @llvm.SI.packf16(float %104, float %100)
%108 = bitcast i32 %107 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %106, float %108, float %106, float %108)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600
v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118
v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700
v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701
v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800
v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801
v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900
v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901
s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504
s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[12:19], s[8:11] ; F0800F00 00430A05
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_gt_f32_e32 vcc, s20, v13 ; 7C081A14
v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280
v_mov_b32_e32 v1, 0x6f800000 ; 7E0202FF 6F800000
v_cmp_gt_f32_e64 vcc, |v4|, v1 ; D008016A 00020304
v_mov_b32_e32 v1, 0x2f800000 ; 7E0202FF 2F800000
v_cndmask_b32_e32 v1, 1.0, v1 ; 000202F2
v_mul_f32_e32 v4, v1, v4 ; 10080901
v_rcp_f32_e32 v4, v4 ; 7E085504
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_mul_f32_e32 v4, v2, v1 ; 10080302
v_mul_f32_e32 v5, v3, v1 ; 100A0303
image_sample v[1:4], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[28:35], s[24:27] ; F0800F00 00C70104
s_buffer_load_dword s4, s[0:3], 0xa ; C202010A
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v3, v3 ; 7E064F03
s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112
v_subrev_f32_e32 v0, v3, v0 ; 0A000103
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
v_log_f32_e32 v3, v4 ; 7E064F04
s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110
s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111
s_buffer_load_dword s9, s[0:3], 0x8 ; C2048108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s4, v3 ; 10080604
v_mul_f32_e32 v5, s5, v12 ; 100A1805
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
v_mul_f32_e32 v4, v4, v0 ; 10080104
v_mad_f32 v0, v5, v0, -v4 ; D2820000 84120105
v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00020E80
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
v_mul_f32_e32 v6, s6, v5 ; 100C0A06
v_mac_f32_e32 v6, v0, v4 ; 3E0C0900
v_mul_f32_e64 v0, s9, -v3 ; D2100000 40020609
v_mul_f32_e32 v7, s7, v10 ; 100E1407
v_mul_f32_e32 v10, s8, v11 ; 10141608
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v13, s4, v0 ; 3E1A0004
s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109
v_cvt_pkrtz_f16_f32_e32 v6, v6, v13 ; 5E0C1B06
v_log_f32_e32 v1, v1 ; 7E024F01
s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100
s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101
v_log_f32_e32 v2, v2 ; 7E044F02
v_subrev_f32_e32 v1, v1, v8 ; 0A021101
v_mul_f32_e32 v7, v1, v7 ; 100E0F01
v_mac_f32_e32 v7, v0, v1 ; 3E0E0300
v_subrev_f32_e32 v0, v2, v9 ; 0A001302
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s4, v3 ; 10020604
v_mul_f32_e32 v1, v1, v0 ; 10020101
v_mad_f32 v0, v10, v0, -v1 ; D2820000 8406010A
v_mul_f32_e32 v1, s5, v5 ; 10020A05
v_mac_f32_e32 v1, v7, v4 ; 3E020907
v_mul_f32_e32 v2, s0, v5 ; 10040A00
v_mac_f32_e32 v2, v0, v4 ; 3E040900
v_cvt_pkrtz_f16_f32_e32 v0, v1, v2 ; 5E000501
exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 400 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL CONST[0..24]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, 0.0000}
0: MUL TEMP[0].xyz, IN[0].xyzz, CONST[13].xyzz
1: MUL TEMP[1].x, IN[2].wwww, IN[2].wwww
2: MOV TEMP[2].y, IMM[0].xxxx
3: MOV TEMP[2].x, TEMP[1].xxxx
4: MOV TEMP[2].z, TEMP[1].xxxx
5: MUL TEMP[1], CONST[14], TEMP[0].xxxx
6: MAD TEMP[1], CONST[15], TEMP[0].yyyy, TEMP[1]
7: MAD TEMP[1].xyz, CONST[16], TEMP[0].zzzz, TEMP[1]
8: LRP TEMP[0].xyz, TEMP[2].xyzz, TEMP[1].xyzz, TEMP[0].xyzz
9: DP3 TEMP[1].x, CONST[18].xyzz, TEMP[0].xyzz
10: ADD TEMP[1].x, TEMP[1].xxxx, CONST[18].wwww
11: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[18].xyzz
12: ADD TEMP[1].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
13: LRP TEMP[0].xyz, CONST[19].xxxx, TEMP[0].xyzz, TEMP[1].xyzz
14: MUL TEMP[1], CONST[21], TEMP[0].xxxx
15: MAD TEMP[1], CONST[22], TEMP[0].yyyy, TEMP[1]
16: MAD TEMP[0], CONST[23], TEMP[0].zzzz, TEMP[1]
17: ADD TEMP[0], TEMP[0], CONST[24]
18: MUL TEMP[1].xyw, TEMP[0], IMM[0].zzzz
19: MOV TEMP[2].x, TEMP[1].xxxx
20: MUL TEMP[3].x, TEMP[1].yyyy, CONST[0].xxxx
21: MOV TEMP[2].y, TEMP[3].xxxx
22: ADD TEMP[1].xy, TEMP[2].xyyy, TEMP[1].wwww
23: MOV TEMP[1].zw, TEMP[0].wwzw
24: MOV TEMP[2].x, CONST[8].xxxx
25: MOV TEMP[2].y, CONST[9].xxxx
26: MOV TEMP[2].z, CONST[10].xxxx
27: MOV TEMP[3].x, CONST[8].yyyy
28: MOV TEMP[3].y, CONST[9].yyyy
29: MOV TEMP[3].z, CONST[10].yyyy
30: MOV TEMP[4].x, CONST[8].zzzz
31: MOV TEMP[4].y, CONST[9].zzzz
32: MOV TEMP[4].z, CONST[10].zzzz
33: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[1].xxxx
34: MAD TEMP[2].xyz, TEMP[3].xyzz, IN[1].yyyy, TEMP[2].xyzz
35: MAD TEMP[2].xyz, TEMP[4].xyzz, IN[1].zzzz, TEMP[2].xyzz
36: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
37: RSQ TEMP[3].x, TEMP[3].xxxx
38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
39: MOV TEMP[3].w, IMM[0].yyyy
40: MOV TEMP[3].xyz, TEMP[2].xyzx
41: DP4 TEMP[4].x, CONST[1], TEMP[3]
42: DP4 TEMP[5].x, CONST[2], TEMP[3]
43: MOV TEMP[4].y, TEMP[5].xxxx
44: DP4 TEMP[3].x, CONST[3], TEMP[3]
45: MOV TEMP[4].z, TEMP[3].xxxx
46: MUL TEMP[3], TEMP[2].xyzz, TEMP[2].yzzx
47: DP4 TEMP[5].x, CONST[4], TEMP[3]
48: DP4 TEMP[6].x, CONST[5], TEMP[3]
49: MOV TEMP[5].y, TEMP[6].xxxx
50: DP4 TEMP[3].x, CONST[6], TEMP[3]
51: MOV TEMP[5].z, TEMP[3].xxxx
52: MUL TEMP[3].x, TEMP[2].yyyy, TEMP[2].yyyy
53: MAD TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx, -TEMP[3].xxxx
54: MAD TEMP[2].xyz, CONST[7].xyzz, TEMP[2].xxxx, TEMP[5].xyzz
55: ADD TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xyzz
56: MAD TEMP[3].xy, IN[3].xyyy, CONST[20].xyyy, CONST[20].zwww
57: MOV TEMP[3].w, TEMP[2].xxxx
58: MOV TEMP[2].xy, TEMP[2].yzyy
59: MAD TEMP[4].x, TEMP[0].zzzz, CONST[12].zzzz, CONST[12].wwww
60: MOV TEMP[3].z, TEMP[4].xxxx
61: MOV OUT[1], TEMP[1]
62: MOV OUT[3], TEMP[2]
63: MOV OUT[0], TEMP[0]
64: MOV OUT[2], TEMP[3]
65: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372)
%83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = extractelement <4 x float> %99, i32 2
%103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = add i32 %5, %7
%106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105)
%107 = extractelement <4 x float> %106, i32 3
%108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0
%110 = add i32 %5, %7
%111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = fmul float %93, %52
%115 = fmul float %94, %53
%116 = fmul float %95, %54
%117 = fmul float %107, %107
%118 = fmul float %55, %114
%119 = fmul float %56, %114
%120 = fmul float %57, %114
%121 = fmul float %58, %115
%122 = fadd float %121, %118
%123 = fmul float %59, %115
%124 = fadd float %123, %119
%125 = fmul float %60, %115
%126 = fadd float %125, %120
%127 = fmul float %61, %116
%128 = fadd float %127, %122
%129 = fmul float %62, %116
%130 = fadd float %129, %124
%131 = fmul float %63, %116
%132 = fadd float %131, %126
%133 = call float @llvm.AMDGPU.lrp(float %117, float %128, float %114)
%134 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %130, float %115)
%135 = call float @llvm.AMDGPU.lrp(float %117, float %132, float %116)
%136 = fmul float %64, %133
%137 = fmul float %65, %134
%138 = fadd float %137, %136
%139 = fmul float %66, %135
%140 = fadd float %138, %139
%141 = fadd float %140, %67
%142 = fmul float %141, %64
%143 = fmul float %141, %65
%144 = fmul float %141, %66
%145 = fsub float %133, %142
%146 = fsub float %134, %143
%147 = fsub float %135, %144
%148 = call float @llvm.AMDGPU.lrp(float %68, float %133, float %145)
%149 = call float @llvm.AMDGPU.lrp(float %68, float %134, float %146)
%150 = call float @llvm.AMDGPU.lrp(float %68, float %135, float %147)
%151 = fmul float %73, %148
%152 = fmul float %74, %148
%153 = fmul float %75, %148
%154 = fmul float %76, %148
%155 = fmul float %77, %149
%156 = fadd float %155, %151
%157 = fmul float %78, %149
%158 = fadd float %157, %152
%159 = fmul float %79, %149
%160 = fadd float %159, %153
%161 = fmul float %80, %149
%162 = fadd float %161, %154
%163 = fmul float %81, %150
%164 = fadd float %163, %156
%165 = fmul float %82, %150
%166 = fadd float %165, %158
%167 = fmul float %83, %150
%168 = fadd float %167, %160
%169 = fmul float %84, %150
%170 = fadd float %169, %162
%171 = fadd float %164, %85
%172 = fadd float %166, %86
%173 = fadd float %168, %87
%174 = fadd float %170, %88
%175 = fmul float %171, 5.000000e-01
%176 = fmul float %172, 5.000000e-01
%177 = fmul float %174, 5.000000e-01
%178 = fmul float %176, %13
%179 = fadd float %175, %177
%180 = fadd float %178, %177
%181 = fmul float %41, %100
%182 = fmul float %44, %100
%183 = fmul float %47, %100
%184 = fmul float %42, %101
%185 = fadd float %184, %181
%186 = fmul float %45, %101
%187 = fadd float %186, %182
%188 = fmul float %48, %101
%189 = fadd float %188, %183
%190 = fmul float %43, %102
%191 = fadd float %190, %185
%192 = fmul float %46, %102
%193 = fadd float %192, %187
%194 = fmul float %49, %102
%195 = fadd float %194, %189
%196 = fmul float %191, %191
%197 = fmul float %193, %193
%198 = fadd float %197, %196
%199 = fmul float %195, %195
%200 = fadd float %198, %199
%201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200)
%202 = fmul float %191, %201
%203 = fmul float %193, %201
%204 = fmul float %195, %201
%205 = fmul float %14, %202
%206 = fmul float %15, %203
%207 = fadd float %205, %206
%208 = fmul float %16, %204
%209 = fadd float %207, %208
%210 = fadd float %209, %17
%211 = fmul float %18, %202
%212 = fmul float %19, %203
%213 = fadd float %211, %212
%214 = fmul float %20, %204
%215 = fadd float %213, %214
%216 = fadd float %215, %21
%217 = fmul float %22, %202
%218 = fmul float %23, %203
%219 = fadd float %217, %218
%220 = fmul float %24, %204
%221 = fadd float %219, %220
%222 = fadd float %221, %25
%223 = fmul float %202, %203
%224 = fmul float %203, %204
%225 = fmul float %204, %204
%226 = fmul float %204, %202
%227 = fmul float %26, %223
%228 = fmul float %27, %224
%229 = fadd float %227, %228
%230 = fmul float %28, %225
%231 = fadd float %229, %230
%232 = fmul float %29, %226
%233 = fadd float %231, %232
%234 = fmul float %30, %223
%235 = fmul float %31, %224
%236 = fadd float %234, %235
%237 = fmul float %32, %225
%238 = fadd float %236, %237
%239 = fmul float %33, %226
%240 = fadd float %238, %239
%241 = fmul float %34, %223
%242 = fmul float %35, %224
%243 = fadd float %241, %242
%244 = fmul float %36, %225
%245 = fadd float %243, %244
%246 = fmul float %37, %226
%247 = fadd float %245, %246
%248 = fmul float %203, %203
%249 = fmul float %202, %202
%250 = fsub float %249, %248
%251 = fmul float %38, %250
%252 = fadd float %251, %233
%253 = fmul float %39, %250
%254 = fadd float %253, %240
%255 = fmul float %40, %250
%256 = fadd float %255, %247
%257 = fadd float %252, %210
%258 = fadd float %254, %216
%259 = fadd float %256, %222
%260 = fmul float %112, %69
%261 = fadd float %260, %71
%262 = fmul float %113, %70
%263 = fadd float %262, %72
%264 = fmul float %173, %50
%265 = fadd float %264, %51
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %179, float %180, float %173, float %174)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %261, float %263, float %265, float %257)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %258, float %259, float %259, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %171, float %172, float %173, float %174)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[68:71], s[2:3], 0x0 ; C0A20300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s35, s[68:71], 0x29 ; C211C529
buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s25, s[68:71], 0x2a ; C20CC52A
s_buffer_load_dword s0, s[68:71], 0x32 ; C2004532
s_buffer_load_dword s4, s[68:71], 0x33 ; C2024533
s_buffer_load_dword s67, s[68:71], 0x34 ; C221C534
s_buffer_load_dword s2, s[68:71], 0x1c ; C201451C
s_buffer_load_dword s3, s[68:71], 0x1d ; C201C51D
s_buffer_load_dword s1, s[68:71], 0x1e ; C200C51E
s_buffer_load_dword s72, s[68:71], 0x20 ; C2244520
s_buffer_load_dword s45, s[68:71], 0x21 ; C216C521
s_buffer_load_dword s73, s[68:71], 0x35 ; C224C535
s_buffer_load_dword s74, s[68:71], 0x36 ; C2254536
s_buffer_load_dword s51, s[68:71], 0x38 ; C219C538
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s4 ; 7E000204
s_buffer_load_dword s56, s[68:71], 0x39 ; C21C4539
s_buffer_load_dword s53, s[68:71], 0x3a ; C21AC53A
s_buffer_load_dword s44, s[68:71], 0x22 ; C2164522
s_buffer_load_dword s75, s[68:71], 0x24 ; C225C524
s_buffer_load_dword s55, s[68:71], 0x25 ; C21BC525
s_buffer_load_dword s46, s[68:71], 0x26 ; C2174526
s_buffer_load_dword s76, s[68:71], 0x28 ; C2264528
s_buffer_load_dword s57, s[68:71], 0x3c ; C21CC53C
s_buffer_load_dword s58, s[68:71], 0x3d ; C21D453D
s_buffer_load_dword s54, s[68:71], 0x3e ; C21B453E
s_buffer_load_dword s50, s[68:71], 0x40 ; C2194540
s_buffer_load_dword s47, s[68:71], 0x41 ; C217C541
s_buffer_load_dword s48, s[68:71], 0x42 ; C2184542
s_buffer_load_dword s38, s[68:71], 0x48 ; C2134548
s_buffer_load_dword s39, s[68:71], 0x49 ; C213C549
s_buffer_load_dword s36, s[68:71], 0x4a ; C212454A
s_buffer_load_dword s41, s[68:71], 0x4b ; C214C54B
s_buffer_load_dword s24, s[68:71], 0x4c ; C20C454C
s_buffer_load_dword s60, s[68:71], 0x50 ; C21E4550
s_buffer_load_dword s59, s[68:71], 0x51 ; C21DC551
s_buffer_load_dword s5, s[68:71], 0x52 ; C202C552
s_buffer_load_dword s6, s[68:71], 0x53 ; C2034553
s_buffer_load_dword s33, s[68:71], 0x54 ; C210C554
s_buffer_load_dword s28, s[68:71], 0x55 ; C20E4555
s_buffer_load_dword s29, s[68:71], 0x56 ; C20EC556
s_buffer_load_dword s26, s[68:71], 0x57 ; C20D4557
s_buffer_load_dword s27, s[68:71], 0x58 ; C20DC558
s_buffer_load_dword s9, s[68:71], 0xd ; C204C50D
s_buffer_load_dword s8, s[68:71], 0xe ; C204450E
s_buffer_load_dword s4, s[68:71], 0xf ; C202450F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v7, s5 ; 7E0E0205
s_buffer_load_dword s17, s[68:71], 0x10 ; C208C510
v_mov_b32_e32 v8, s6 ; 7E100206
s_buffer_load_dword s22, s[68:71], 0x11 ; C20B4511
s_buffer_load_dword s34, s[68:71], 0x59 ; C2114559
s_buffer_load_dword s30, s[68:71], 0x5a ; C20F455A
s_buffer_load_dword s31, s[68:71], 0x5b ; C20FC55B
s_buffer_load_dword s19, s[68:71], 0x5c ; C209C55C
s_buffer_load_dword s20, s[68:71], 0x5d ; C20A455D
s_buffer_load_dword s6, s[68:71], 0x0 ; C2034500
s_buffer_load_dword s11, s[68:71], 0x4 ; C205C504
s_buffer_load_dword s13, s[68:71], 0x5 ; C206C505
s_buffer_load_dword s10, s[68:71], 0x6 ; C2054506
s_buffer_load_dword s7, s[68:71], 0x7 ; C203C507
s_buffer_load_dword s14, s[68:71], 0x8 ; C2074508
s_buffer_load_dword s15, s[68:71], 0x9 ; C207C509
s_buffer_load_dword s12, s[68:71], 0xa ; C206450A
s_buffer_load_dword s5, s[68:71], 0xb ; C202C50B
s_buffer_load_dword s16, s[68:71], 0xc ; C208450C
s_buffer_load_dword s32, s[68:71], 0x12 ; C2104512
s_buffer_load_dword s18, s[68:71], 0x13 ; C2094513
s_buffer_load_dword s42, s[68:71], 0x14 ; C2154514
s_buffer_load_dword s49, s[68:71], 0x15 ; C218C515
s_buffer_load_dword s37, s[68:71], 0x16 ; C212C516
s_buffer_load_dword s21, s[68:71], 0x17 ; C20AC517
s_buffer_load_dword s43, s[68:71], 0x18 ; C215C518
s_buffer_load_dword s52, s[68:71], 0x19 ; C21A4519
s_buffer_load_dword s40, s[68:71], 0x1a ; C214451A
s_buffer_load_dword s23, s[68:71], 0x1b ; C20BC51B
s_buffer_load_dword s65, s[68:71], 0x5e ; C220C55E
s_buffer_load_dword s66, s[68:71], 0x5f ; C221455F
s_buffer_load_dword s62, s[68:71], 0x60 ; C21F4560
s_buffer_load_dword s63, s[68:71], 0x61 ; C21FC561
s_buffer_load_dword s61, s[68:71], 0x62 ; C21EC562
s_buffer_load_dword s64, s[68:71], 0x63 ; C2204563
v_mul_f32_e32 v1, s67, v1 ; 10020243
v_mul_f32_e32 v2, s73, v2 ; 10040449
v_mul_f32_e32 v3, s74, v3 ; 1006064A
v_mul_f32_e32 v9, s72, v4 ; 10120848
v_mul_f32_e32 v13, s75, v4 ; 101A084B
v_mul_f32_e32 v4, s76, v4 ; 1008084C
v_mac_f32_e32 v7, s60, v11 ; 3E0E163C
v_mac_f32_e32 v8, s59, v12 ; 3E10183B
v_mul_f32_e32 v11, s51, v1 ; 10160233
v_mul_f32_e32 v12, s56, v1 ; 10180238
v_mul_f32_e32 v14, s53, v1 ; 101C0235
v_mac_f32_e32 v9, s45, v5 ; 3E120A2D
v_mac_f32_e32 v13, s55, v5 ; 3E1A0A37
v_mac_f32_e32 v4, s35, v5 ; 3E080A23
v_mac_f32_e32 v11, s57, v2 ; 3E160439
v_mac_f32_e32 v12, s58, v2 ; 3E18043A
v_mac_f32_e32 v14, s54, v2 ; 3E1C0436
v_mac_f32_e32 v9, s44, v6 ; 3E120C2C
v_mac_f32_e32 v13, s46, v6 ; 3E1A0C2E
v_mac_f32_e32 v4, s25, v6 ; 3E080C19
v_mac_f32_e32 v11, s50, v3 ; 3E160632
v_mac_f32_e32 v12, s47, v3 ; 3E18062F
v_mac_f32_e32 v14, s48, v3 ; 3E1C0630
v_mul_f32_e32 v5, v10, v10 ; 100A150A
v_mad_f32 v6, -v10, v10, 1.0 ; D2820006 23CA150A
v_mul_f32_e32 v1, v1, v6 ; 10020D01
v_mul_f32_e32 v3, v3, v6 ; 10060D03
v_mac_f32_e32 v1, v11, v5 ; 3E020B0B
v_mac_f32_e32 v3, v14, v5 ; 3E060B0E
v_mac_f32_e32 v2, 0, v12 ; 3E041880
v_mul_f32_e32 v5, s38, v1 ; 100A0226
v_mac_f32_e32 v5, s39, v2 ; 3E0A0427
v_mac_f32_e32 v5, s36, v3 ; 3E0A0624
v_add_f32_e32 v5, s41, v5 ; 060A0A29
v_mad_f32 v6, -v5, s38, v1 ; D2820006 24044D05
v_mad_f32 v10, -v5, s39, v2 ; D282000A 24084F05
v_mad_f32 v5, -v5, s36, v3 ; D2820005 240C4905
v_sub_f32_e64 v11, 1.0, s24 ; D208000B 000030F2
v_mul_f32_e32 v6, v6, v11 ; 100C1706
v_mul_f32_e32 v10, v10, v11 ; 1014170A
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mac_f32_e32 v6, s24, v1 ; 3E0C0218
v_mac_f32_e32 v10, s24, v2 ; 3E140418
v_mac_f32_e32 v5, s24, v3 ; 3E0A0618
v_mul_f32_e32 v1, s33, v6 ; 10020C21
v_mul_f32_e32 v2, s28, v6 ; 10040C1C
v_mul_f32_e32 v3, s29, v6 ; 10060C1D
v_mul_f32_e32 v6, s26, v6 ; 100C0C1A
v_mac_f32_e32 v1, s27, v10 ; 3E02141B
v_mul_f32_e32 v11, v9, v9 ; 10161309
v_mac_f32_e32 v11, v13, v13 ; 3E161B0D
v_mac_f32_e32 v11, v4, v4 ; 3E160904
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v2, s34, v10 ; 3E041422
v_mac_f32_e32 v3, s30, v10 ; 3E06141E
v_mac_f32_e32 v6, s31, v10 ; 3E0C141F
v_mul_f32_e32 v10, v11, v13 ; 10141B0B
v_mul_f32_e32 v4, v11, v4 ; 1008090B
v_mul_f32_e32 v12, v4, v10 ; 10181504
v_mul_f32_e32 v13, s22, v12 ; 101A1816
v_mul_f32_e32 v14, s49, v12 ; 101C1831
v_mul_f32_e32 v12, s52, v12 ; 10181834
v_mul_f32_e32 v9, v11, v9 ; 1012130B
v_mul_f32_e32 v11, v10, v9 ; 1016130A
v_mac_f32_e32 v13, s17, v11 ; 3E1A1611
v_mac_f32_e32 v14, s42, v11 ; 3E1C162A
v_mac_f32_e32 v12, s43, v11 ; 3E18162B
v_mac_f32_e32 v1, s19, v5 ; 3E020A13
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mac_f32_e32 v3, s65, v5 ; 3E060A41
v_mac_f32_e32 v6, s66, v5 ; 3E0C0A42
v_mul_f32_e32 v5, v4, v4 ; 100A0904
v_mac_f32_e32 v13, s32, v5 ; 3E1A0A20
v_mac_f32_e32 v14, s37, v5 ; 3E1C0A25
v_mac_f32_e32 v12, s40, v5 ; 3E180A28
v_mul_f32_e32 v5, s13, v10 ; 100A140D
v_mac_f32_e32 v5, s11, v9 ; 3E0A120B
v_mul_f32_e32 v11, s15, v10 ; 1016140F
v_mac_f32_e32 v11, s14, v9 ; 3E16120E
v_mul_f32_e32 v15, s9, v10 ; 101E1409
v_mac_f32_e32 v15, s16, v9 ; 3E1E1210
v_mac_f32_e32 v5, s10, v4 ; 3E0A080A
v_mac_f32_e32 v11, s12, v4 ; 3E16080C
v_mac_f32_e32 v15, s8, v4 ; 3E1E0808
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mac_f32_e32 v13, s18, v4 ; 3E1A0812
v_mac_f32_e32 v14, s21, v4 ; 3E1C0815
v_mac_f32_e32 v12, s23, v4 ; 3E180817
v_mul_f32_e32 v4, v10, v10 ; 1008150A
v_mad_f32 v4, v9, v9, -v4 ; D2820004 84121309
v_mac_f32_e32 v13, s2, v4 ; 3E1A0802
v_mac_f32_e32 v14, s3, v4 ; 3E1C0803
v_mac_f32_e32 v12, s1, v4 ; 3E180801
v_add_f32_e32 v1, s62, v1 ; 0602023E
v_add_f32_e32 v2, s63, v2 ; 0604043F
v_add_f32_e32 v4, s64, v6 ; 06080C40
v_mul_f32_e32 v6, 0.5, v2 ; 100C04F0
v_mul_f32_e32 v9, 0.5, v4 ; 101208F0
v_mad_f32 v10, 0.5, v1, v9 ; D282000A 042602F0
v_mac_f32_e32 v9, s6, v6 ; 3E120C06
v_add_f32_e32 v5, s7, v5 ; 060A0A07
v_add_f32_e32 v3, s61, v3 ; 0606063D
v_mac_f32_e32 v0, s0, v3 ; 3E000600
v_add_f32_e32 v5, v5, v13 ; 060A1B05
exp 15, 32, 0, 0, 0, v10, v9, v3, v4 ; F800020F 0403090A
exp 15, 33, 0, 0, 0, v7, v8, v0, v5 ; F800021F 05000807
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, s5, v11 ; 06001605
v_add_f32_e32 v5, s4, v15 ; 060A1E04
v_add_f32_e32 v0, v0, v14 ; 06001D00
v_add_f32_e32 v5, v5, v12 ; 060A1905
v_mov_b32_e32 v6, 0 ; 7E0C0280
exp 15, 34, 0, 0, 0, v0, v5, v5, v6 ; F800022F 06050500
exp 15, 12, 0, 1, 0, v1, v2, v3, v4 ; F80008CF 04030201
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 80
VGPRS: 16
Code Size: 900 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL CONST[0..3]
DCL CONST[7]
DCL CONST[9]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 0.0000, 0.7000, 32.0000, 0.5000}
IMM[1] FLT32 { 2.3000, 0.1000, 1.0000, 140.0000}
IMM[2] FLT32 { 0.3500, 3.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].wwww
1: MOV TEMP[0].yz, IN[2].yxyy
2: MOV TEMP[1].xy, IN[1].xyyy
3: TEX TEMP[1], TEMP[1], SAMP[0], 2D
4: MUL TEMP[2].xyz, TEMP[1].xyzz, CONST[7].xyzz
5: MOV TEMP[3].x, IMM[0].xxxx
6: MUL TEMP[4].x, CONST[0].xxxx, IMM[0].yyyy
7: MOV TEMP[3].y, TEMP[4].xxxx
8: MOV TEMP[5].x, IMM[0].xxxx
9: MOV TEMP[5].y, TEMP[4].xxxx
10: MOV TEMP[4].xy, IN[1].xyyy
11: TEX TEMP[4].x, TEMP[4], SAMP[1], 2D
12: MAD TEMP[3].xy, IN[1].xyyy, IMM[0].zzzz, TEMP[3].xyyy
13: MOV TEMP[3].xy, TEMP[3].xyyy
14: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D
15: MAD TEMP[5].xy, -IN[1].xyyy, IMM[0].zzzz, TEMP[5].xyyy
16: MOV TEMP[5].xy, TEMP[5].xyyy
17: TEX TEMP[5].x, TEMP[5], SAMP[2], 2D
18: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[5].xxxx
19: MUL TEMP[3].x, TEMP[3].xxxx, IMM[0].wwww
20: POW TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx
21: MUL TEMP[3].x, TEMP[4].xxxx, TEMP[3].xxxx
22: MUL TEMP[3].x, TEMP[3].xxxx, IMM[1].yyyy
23: FSLT TEMP[4].x, TEMP[1].wwww, CONST[9].xxxx
24: AND TEMP[4].x, TEMP[4].xxxx, IMM[1].zzzz
25: KILL_IF -TEMP[4].xxxx
26: MOV TEMP[4].xy, IN[0].xyyy
27: MOV TEMP[4].w, IN[0].wwww
28: TXP TEMP[4], TEMP[4], SAMP[3], 2D
29: LG2 TEMP[5].x, TEMP[4].xxxx
30: LG2 TEMP[5].y, TEMP[4].yyyy
31: LG2 TEMP[5].z, TEMP[4].zzzz
32: LG2 TEMP[5].w, TEMP[4].wwww
33: MOV TEMP[4], -TEMP[5]
34: ADD TEMP[0].xyz, TEMP[4].xyzz, TEMP[0].xyzz
35: MUL TEMP[4].xyz, TEMP[4].wwww, CONST[3]
36: MUL TEMP[5].xyz, TEMP[2].xyzz, TEMP[0].xyzz
37: MAD TEMP[5].xyz, TEMP[0].xyzz, TEMP[4].xyzz, TEMP[5].xyzz
38: MAD TEMP[1].x, TEMP[4].xyzz, CONST[2].wwww, TEMP[1].wwww
39: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
40: ADD TEMP[0].xyz, IMM[2].xxxx, -TEMP[0].xyzz
41: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[2].yyyy
42: MOV_SAT TEMP[0].xyz, TEMP[0].xyzz
43: MUL TEMP[0].xyz, IMM[1].wwww, TEMP[0].xyzz
44: MAD TEMP[5].xyz, TEMP[2].xyzz, TEMP[0].xyzz, TEMP[5].xyzz
45: MOV TEMP[0].w, TEMP[1].xxxx
46: ADD TEMP[0].xyz, TEMP[5].xyzz, TEMP[3].xxxx
47: MOV_SAT TEMP[1].x, IN[1].zzzz
48: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[1].xyzz
49: MOV OUT[0], TEMP[0]
50: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%36 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%37 = load <32 x i8>, <32 x i8> addrspace(2)* %36, align 32, !tbaa !0
%38 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%39 = load <16 x i8>, <16 x i8> addrspace(2)* %38, align 16, !tbaa !0
%40 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%41 = bitcast <8 x i32> addrspace(2)* %40 to <32 x i8> addrspace(2)*
%42 = load <32 x i8>, <32 x i8> addrspace(2)* %41, align 32, !tbaa !0
%43 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%44 = bitcast <4 x i32> addrspace(2)* %43 to <16 x i8> addrspace(2)*
%45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0
%46 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%47 = load <8 x i32>, <8 x i32> addrspace(2)* %46, align 32, !tbaa !0
%48 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%49 = load <4 x i32>, <4 x i32> addrspace(2)* %48, align 16, !tbaa !0
%50 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%51 = bitcast <8 x i32> addrspace(2)* %50 to <32 x i8> addrspace(2)*
%52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0
%53 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%54 = bitcast <4 x i32> addrspace(2)* %53 to <16 x i8> addrspace(2)*
%55 = load <16 x i8>, <16 x i8> addrspace(2)* %54, align 16, !tbaa !0
%56 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%57 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%58 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%59 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%60 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%61 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%65 = bitcast float %59 to i32
%66 = bitcast float %60 to i32
%67 = insertelement <2 x i32> undef, i32 %65, i32 0
%68 = insertelement <2 x i32> %67, i32 %66, i32 1
%69 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %68, <32 x i8> %37, <16 x i8> %39, i32 2)
%70 = extractelement <4 x float> %69, i32 0
%71 = extractelement <4 x float> %69, i32 1
%72 = extractelement <4 x float> %69, i32 2
%73 = extractelement <4 x float> %69, i32 3
%74 = fmul float %70, %32
%75 = fmul float %71, %33
%76 = fmul float %72, %34
%77 = fmul float %24, 0x3FE6666660000000
%78 = bitcast float %59 to i32
%79 = bitcast float %60 to i32
%80 = insertelement <2 x i32> undef, i32 %78, i32 0
%81 = insertelement <2 x i32> %80, i32 %79, i32 1
%82 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %81, <32 x i8> %42, <16 x i8> %45, i32 2)
%83 = extractelement <4 x float> %82, i32 0
%84 = fmul float %59, 3.200000e+01
%85 = fadd float %84, 0.000000e+00
%86 = fmul float %60, 3.200000e+01
%87 = fadd float %86, %77
%88 = bitcast float %85 to i32
%89 = bitcast float %87 to i32
%90 = insertelement <2 x i32> undef, i32 %88, i32 0
%91 = insertelement <2 x i32> %90, i32 %89, i32 1
%92 = bitcast <8 x i32> %47 to <32 x i8>
%93 = bitcast <4 x i32> %49 to <16 x i8>
%94 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %91, <32 x i8> %92, <16 x i8> %93, i32 2)
%95 = extractelement <4 x float> %94, i32 0
%96 = fmul float %59, -3.200000e+01
%97 = fadd float %96, 0.000000e+00
%98 = fmul float %60, -3.200000e+01
%99 = fadd float %98, %77
%100 = bitcast float %97 to i32
%101 = bitcast float %99 to i32
%102 = insertelement <2 x i32> undef, i32 %100, i32 0
%103 = insertelement <2 x i32> %102, i32 %101, i32 1
%104 = bitcast <8 x i32> %47 to <32 x i8>
%105 = bitcast <4 x i32> %49 to <16 x i8>
%106 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %103, <32 x i8> %104, <16 x i8> %105, i32 2)
%107 = extractelement <4 x float> %106, i32 0
%108 = fadd float %95, %107
%109 = fmul float %108, 5.000000e-01
%110 = call float @llvm.pow.f32(float %109, float 0x4002666660000000)
%111 = fmul float %83, %110
%112 = fmul float %111, 0x3FB99999A0000000
%113 = fcmp olt float %73, %35
%114 = select i1 %113, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %114)
%115 = fdiv float %56, %58
%116 = fdiv float %57, %58
%117 = bitcast float %115 to i32
%118 = bitcast float %116 to i32
%119 = insertelement <2 x i32> undef, i32 %117, i32 0
%120 = insertelement <2 x i32> %119, i32 %118, i32 1
%121 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %52, <16 x i8> %55, i32 2)
%122 = extractelement <4 x float> %121, i32 0
%123 = extractelement <4 x float> %121, i32 1
%124 = extractelement <4 x float> %121, i32 2
%125 = extractelement <4 x float> %121, i32 3
%126 = call float @llvm.log2.f32(float %122)
%127 = call float @llvm.log2.f32(float %123)
%128 = call float @llvm.log2.f32(float %124)
%129 = call float @llvm.log2.f32(float %125)
%130 = fsub float -0.000000e+00, %129
%131 = fsub float %62, %126
%132 = fsub float %63, %127
%133 = fsub float %64, %128
%134 = fmul float %29, %130
%135 = fmul float %30, %130
%136 = fmul float %31, %130
%137 = fmul float %74, %131
%138 = fmul float %75, %132
%139 = fmul float %76, %133
%140 = fmul float %131, %134
%141 = fadd float %140, %137
%142 = fmul float %132, %135
%143 = fadd float %142, %138
%144 = fmul float %133, %136
%145 = fadd float %144, %139
%146 = fmul float %134, %28
%147 = fadd float %146, %73
%148 = fmul float %74, %112
%149 = fmul float %75, %112
%150 = fmul float %76, %112
%151 = fsub float 0x3FD6666660000000, %131
%152 = fsub float 0x3FD6666660000000, %132
%153 = fsub float 0x3FD6666660000000, %133
%154 = fmul float %151, 3.000000e+00
%155 = fmul float %152, 3.000000e+00
%156 = fmul float %153, 3.000000e+00
%157 = call float @llvm.AMDIL.clamp.(float %154, float 0.000000e+00, float 1.000000e+00)
%158 = call float @llvm.AMDIL.clamp.(float %155, float 0.000000e+00, float 1.000000e+00)
%159 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00)
%160 = fmul float %157, 1.400000e+02
%161 = fmul float %158, 1.400000e+02
%162 = fmul float %159, 1.400000e+02
%163 = fmul float %148, %160
%164 = fadd float %163, %141
%165 = fmul float %149, %161
%166 = fadd float %165, %143
%167 = fmul float %150, %162
%168 = fadd float %167, %145
%169 = fadd float %164, %112
%170 = fadd float %166, %112
%171 = fadd float %168, %112
%172 = call float @llvm.AMDIL.clamp.(float %61, float 0.000000e+00, float 1.000000e+00)
%173 = call float @llvm.AMDGPU.lrp(float %172, float %169, float %25)
%174 = call float @llvm.AMDGPU.lrp(float %172, float %170, float %26)
%175 = call float @llvm.AMDGPU.lrp(float %172, float %171, float %27)
%176 = call i32 @llvm.SI.packf16(float %173, float %174)
%177 = bitcast i32 %176 to float
%178 = call i32 @llvm.SI.packf16(float %175, float %147)
%179 = bitcast i32 %178 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %177, float %179, float %177, float %179)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300
v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s48, s[0:3], 0x0 ; C2180100
v_interp_p1_f32 v7, v0, 2, 1, [m0] ; C81C0600
v_interp_p2_f32 v7, [v7], v1, 2, 1, [m0] ; C81D0601
v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700
v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701
v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800
v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801
v_interp_p1_f32 v0, v0, 1, 2, [m0] ; C8000900
v_interp_p2_f32 v0, [v0], v1, 1, 2, [m0] ; C8010901
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
s_load_dwordx4 s[20:23], s[4:5], 0x8 ; C08A0508
s_load_dwordx8 s[52:59], s[6:7], 0x0 ; C0DA0700
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C
s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710
s_load_dwordx8 s[12:19], s[6:7], 0x18 ; C0C60718
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[52:59], s[32:35] ; F0800F00 010D0A05
image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[40:47], s[36:39] ; F0800100 012A0105
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105
s_buffer_load_dword s6, s[0:3], 0x6 ; C2030106
s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
v_mov_b32_e32 v14, 0x3f333333 ; 7E1C02FF 3F333333
v_mul_f32_e32 v15, s48, v14 ; 101E1C30
v_mov_b32_e32 v14, 0x42000000 ; 7E1C02FF 42000000
v_mad_f32 v16, v5, v14, 0 ; D2820010 02021D05
v_mad_f32 v17, v14, v6, v15 ; D2820011 043E0D0E
v_mov_b32_e32 v18, 0xc2000000 ; 7E2402FF C2000000
v_mad_f32 v14, v5, v18, 0 ; D282000E 02022505
v_mac_f32_e32 v15, v18, v6 ; 3E1E0D12
s_buffer_load_dword s32, s[0:3], 0x24 ; C2100124
image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[20:23] ; F0800100 00A60510
image_sample v6, 1, 0, 0, 0, 0, 0, 0, 0, v[14:15], s[24:31], s[20:23] ; F0800100 00A6060E
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_add_f32_e32 v5, v6, v5 ; 060A0B06
v_mul_f32_e32 v5, 0.5, v5 ; 100A0AF0
s_buffer_load_dword s20, s[0:3], 0x1c ; C20A011C
s_buffer_load_dword s21, s[0:3], 0x1d ; C20A811D
s_buffer_load_dword s22, s[0:3], 0x1e ; C20B011E
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_cmp_gt_f32_e32 vcc, s32, v13 ; 7C081A20
v_cndmask_b32_e64 v6, 0, -1.0, vcc ; D2000006 01A9E680
v_mul_legacy_f32_e32 v5, 0x40133333, v5 ; 0E0A0AFF 40133333
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_mul_f32_e32 v1, v5, v1 ; 10020305
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v5, s20, v10 ; 100A1414
v_mul_f32_e32 v10, s21, v11 ; 10141615
v_mul_f32_e32 v11, s22, v12 ; 10161816
v_mov_b32_e32 v12, 0x3dcccccd ; 7E1802FF 3DCCCCCD
v_mul_f32_e32 v14, v12, v1 ; 101C030C
v_cmpx_le_f32_e32 vcc, 0, v6 ; 7C260C80
v_mov_b32_e32 v6, 0x6f800000 ; 7E0C02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v4|, v6 ; D008016A 00020D04
v_mov_b32_e32 v6, 0x2f800000 ; 7E0C02FF 2F800000
v_cndmask_b32_e32 v6, 1.0, v6 ; 000C0CF2
v_mul_f32_e32 v4, v6, v4 ; 10080906
v_rcp_f32_e32 v4, v4 ; 7E085504
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v3, v4, v3 ; 10060704
v_mul_f32_e32 v15, v2, v6 ; 101E0D02
s_buffer_load_dword s20, s[0:3], 0xc ; C20A010C
v_mul_f32_e32 v16, v3, v6 ; 10200D03
image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[12:19], s[8:11] ; F0800F00 00430F0F
s_buffer_load_dword s8, s[0:3], 0xd ; C204010D
s_buffer_load_dword s0, s[0:3], 0xe ; C200010E
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_log_f32_e32 v2, v15 ; 7E044F0F
v_log_f32_e32 v3, v18 ; 7E064F12
v_log_f32_e32 v4, v16 ; 7E084F10
v_log_f32_e32 v6, v17 ; 7E0C4F11
v_subrev_f32_e32 v2, v2, v8 ; 0A041102
v_mul_f32_e64 v8, s20, -v3 ; D2100008 40020614
v_mac_f32_e32 v13, s7, v8 ; 3E1A1007
v_mul_f32_e32 v15, v2, v5 ; 101E0B02
v_mac_f32_e32 v15, v8, v2 ; 3E1E0508
v_mul_f32_e32 v5, v14, v5 ; 100A0B0E
v_mov_b32_e32 v8, 0x3eb33333 ; 7E1002FF 3EB33333
v_subrev_f32_e32 v2, v2, v8 ; 0A041102
v_mov_b32_e32 v16, 0x40400000 ; 7E2002FF 40400000
v_mul_f32_e32 v2, v16, v2 ; 10040510
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_mov_b32_e32 v17, 0x430c0000 ; 7E2202FF 430C0000
v_mul_f32_e32 v2, v17, v2 ; 10040511
v_mac_f32_e32 v15, v2, v5 ; 3E1E0B02
v_subrev_f32_e32 v2, v4, v9 ; 0A041304
v_mul_f32_e32 v4, s8, v3 ; 10080608
v_mul_f32_e32 v4, v4, v2 ; 10080504
v_mad_f32 v4, v10, v2, -v4 ; D2820004 8412050A
v_mul_f32_e32 v5, v14, v10 ; 100A150E
v_subrev_f32_e32 v2, v2, v8 ; 0A041102
v_mul_f32_e32 v2, v16, v2 ; 10040510
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_mul_f32_e32 v2, v17, v2 ; 10040511
v_mac_f32_e32 v4, v2, v5 ; 3E080B02
v_subrev_f32_e32 v0, v6, v0 ; 0A000106
v_mul_f32_e32 v2, s0, v3 ; 10040600
v_mul_f32_e32 v2, v2, v0 ; 10040102
v_mad_f32 v2, v11, v0, -v2 ; D2820002 840A010B
v_mul_f32_e32 v3, v14, v11 ; 1006170E
v_subrev_f32_e32 v0, v0, v8 ; 0A001100
v_mul_f32_e32 v0, v16, v0 ; 10000110
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_mul_f32_e32 v0, v17, v0 ; 10000111
v_mac_f32_e32 v2, v0, v3 ; 3E040700
v_mac_f32_e32 v15, v12, v1 ; 3E1E030C
v_mac_f32_e32 v4, v12, v1 ; 3E08030C
v_mac_f32_e32 v2, v12, v1 ; 3E04030C
v_add_f32_e64 v0, 0, v7 clamp ; D2060800 00020E80
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v3, s4, v1 ; 10060204
v_mac_f32_e32 v3, v15, v0 ; 3E06010F
v_mul_f32_e32 v5, s5, v1 ; 100A0205
v_mac_f32_e32 v5, v4, v0 ; 3E0A0104
v_mul_f32_e32 v1, s6, v1 ; 10020206
v_mac_f32_e32 v1, v2, v0 ; 3E020102
v_cvt_pkrtz_f16_f32_e32 v0, v1, v13 ; 5E001B01
v_cvt_pkrtz_f16_f32_e32 v1, v3, v5 ; 5E020B03
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 20
Code Size: 656 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..8]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[3].xyxx
1: MOV TEMP[1].x, IN[2].yyyy
2: ADD TEMP[2].xyz, IN[0].xyzz, -CONST[3].xyzz
3: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
4: FSLT TEMP[3].x, CONST[4].xxxx, TEMP[3].xxxx
5: UIF TEMP[3].xxxx :0
6: MOV TEMP[1].x, IMM[0].xxxx
7: MOV TEMP[0].xy, IMM[0].xxxx
8: ENDIF
9: MAD TEMP[2].xyz, CONST[0].xyzz, TEMP[0].xxxx, IN[0].xyzz
10: LRP TEMP[1].x, CONST[3].wwww, TEMP[1].xxxx, TEMP[0].yyyy
11: MAD TEMP[2].xyz, CONST[1].xyzz, TEMP[1].xxxx, TEMP[2].xyzz
12: ABS TEMP[0].x, TEMP[0].xxxx
13: MUL TEMP[0].xyz, CONST[2].xyzz, TEMP[0].xxxx
14: MAD TEMP[2].xyz, TEMP[0].xyzz, CONST[1].wwww, TEMP[2].xyzz
15: MUL TEMP[0], CONST[5], TEMP[2].xxxx
16: MAD TEMP[0], CONST[6], TEMP[2].yyyy, TEMP[0]
17: MAD TEMP[2], CONST[7], TEMP[2].zzzz, TEMP[0]
18: MAD TEMP[2], CONST[8], IN[0].wwww, TEMP[2]
19: MOV TEMP[0].x, IN[2].xxxx
20: FSLT TEMP[1].x, IMM[0].xxxx, IN[2].yyyy
21: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy
22: MOV TEMP[0].y, TEMP[1].xxxx
23: MOV TEMP[0].xy, TEMP[0].xyxx
24: MOV TEMP[0].z, TEMP[2].zzzz
25: MOV OUT[2], TEMP[0]
26: MOV OUT[0], TEMP[2]
27: MOV OUT[1], IN[1]
28: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%44 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%45 = load <16 x i8>, <16 x i8> addrspace(2)* %44, align 16, !tbaa !0
%46 = add i32 %5, %7
%47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46)
%48 = extractelement <4 x float> %47, i32 0
%49 = extractelement <4 x float> %47, i32 1
%50 = extractelement <4 x float> %47, i32 2
%51 = extractelement <4 x float> %47, i32 3
%52 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0
%54 = add i32 %5, %7
%55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54)
%56 = extractelement <4 x float> %55, i32 0
%57 = extractelement <4 x float> %55, i32 1
%58 = extractelement <4 x float> %55, i32 2
%59 = extractelement <4 x float> %55, i32 3
%60 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%61 = load <16 x i8>, <16 x i8> addrspace(2)* %60, align 16, !tbaa !0
%62 = add i32 %5, %7
%63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62)
%64 = extractelement <4 x float> %63, i32 0
%65 = extractelement <4 x float> %63, i32 1
%66 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%67 = load <16 x i8>, <16 x i8> addrspace(2)* %66, align 16, !tbaa !0
%68 = add i32 %5, %7
%69 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %67, i32 0, i32 %68)
%70 = extractelement <4 x float> %69, i32 0
%71 = extractelement <4 x float> %69, i32 1
%72 = fsub float %48, %23
%73 = fsub float %49, %24
%74 = fsub float %50, %25
%75 = fmul float %72, %72
%76 = fmul float %73, %73
%77 = fadd float %76, %75
%78 = fmul float %74, %74
%79 = fadd float %77, %78
%80 = fcmp olt float %27, %79
%. = select i1 %80, float 0.000000e+00, float %70
%.16 = select i1 %80, float 0.000000e+00, float %71
%.17 = select i1 %80, float 0.000000e+00, float %65
%81 = fmul float %13, %.
%82 = fadd float %81, %48
%83 = fmul float %14, %.
%84 = fadd float %83, %49
%85 = fmul float %15, %.
%86 = fadd float %85, %50
%87 = call float @llvm.AMDGPU.lrp(float %26, float %.17, float %.16)
%88 = fmul float %16, %87
%89 = fadd float %88, %82
%90 = fmul float %17, %87
%91 = fadd float %90, %84
%92 = fmul float %18, %87
%93 = fadd float %92, %86
%94 = call float @llvm.fabs.f32(float %.)
%95 = fmul float %20, %94
%96 = fmul float %21, %94
%97 = fmul float %22, %94
%98 = fmul float %95, %19
%99 = fadd float %98, %89
%100 = fmul float %96, %19
%101 = fadd float %100, %91
%102 = fmul float %97, %19
%103 = fadd float %102, %93
%104 = fmul float %28, %99
%105 = fmul float %29, %99
%106 = fmul float %30, %99
%107 = fmul float %31, %99
%108 = fmul float %32, %101
%109 = fadd float %108, %104
%110 = fmul float %33, %101
%111 = fadd float %110, %105
%112 = fmul float %34, %101
%113 = fadd float %112, %106
%114 = fmul float %35, %101
%115 = fadd float %114, %107
%116 = fmul float %36, %103
%117 = fadd float %116, %109
%118 = fmul float %37, %103
%119 = fadd float %118, %111
%120 = fmul float %38, %103
%121 = fadd float %120, %113
%122 = fmul float %39, %103
%123 = fadd float %122, %115
%124 = fmul float %40, %51
%125 = fadd float %124, %117
%126 = fmul float %41, %51
%127 = fadd float %126, %119
%128 = fmul float %42, %51
%129 = fadd float %128, %121
%130 = fmul float %43, %51
%131 = fadd float %130, %123
%132 = fcmp ogt float %65, 0.000000e+00
%133 = select i1 %132, float 1.000000e+00, float 0.000000e+00
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %56, float %57, float %58, float %59)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %64, float %133, float %129, float %115)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %125, float %127, float %129, float %131)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
buffer_load_format_xyzw v[9:12], v0, s[16:19], 0 idxen ; E00C2000 80040900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[20:23], 0 idxen ; E00C2000 80050B00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106
s_buffer_load_dword s10, s[0:3], 0x7 ; C2050107
s_buffer_load_dword s11, s[0:3], 0x8 ; C2058108
s_buffer_load_dword s12, s[0:3], 0x9 ; C2060109
s_buffer_load_dword s13, s[0:3], 0xa ; C206810A
s_buffer_load_dword s14, s[0:3], 0xc ; C207010C
s_buffer_load_dword s15, s[0:3], 0xd ; C207810D
s_buffer_load_dword s16, s[0:3], 0xe ; C208010E
s_buffer_load_dword s17, s[0:3], 0xf ; C208810F
s_buffer_load_dword s18, s[0:3], 0x10 ; C2090110
s_buffer_load_dword s19, s[0:3], 0x14 ; C2098114
s_buffer_load_dword s20, s[0:3], 0x15 ; C20A0115
s_buffer_load_dword s21, s[0:3], 0x16 ; C20A8116
s_buffer_load_dword s22, s[0:3], 0x17 ; C20B0117
s_buffer_load_dword s23, s[0:3], 0x18 ; C20B8118
s_buffer_load_dword s24, s[0:3], 0x19 ; C20C0119
s_buffer_load_dword s25, s[0:3], 0x1a ; C20C811A
s_buffer_load_dword s26, s[0:3], 0x1b ; C20D011B
s_buffer_load_dword s27, s[0:3], 0x1c ; C20D811C
s_buffer_load_dword s28, s[0:3], 0x1d ; C20E011D
s_buffer_load_dword s29, s[0:3], 0x1e ; C20E811E
s_buffer_load_dword s30, s[0:3], 0x1f ; C20F011F
s_buffer_load_dword s31, s[0:3], 0x20 ; C20F8120
s_buffer_load_dword s32, s[0:3], 0x21 ; C2100121
s_buffer_load_dword s33, s[0:3], 0x22 ; C2108122
s_buffer_load_dword s0, s[0:3], 0x23 ; C2000123
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v0, s14, v1 ; 0A00020E
v_subrev_f32_e32 v13, s15, v2 ; 0A1A040F
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v13, v13 ; 3E001B0D
v_subrev_f32_e32 v13, s16, v3 ; 0A1A0610
v_mac_f32_e32 v0, v13, v13 ; 3E001B0D
v_cmp_lt_f32_e32 vcc, s18, v0 ; 7C020012
v_cndmask_b32_e64 v0, v11, 0, vcc ; D2000000 01A9010B
v_cndmask_b32_e64 v11, v12, 0, vcc ; D200000B 01A9010C
v_cndmask_b32_e64 v12, v10, 0, vcc ; D200000C 01A9010A
v_sub_f32_e64 v13, 1.0, s17 ; D208000D 000022F2
v_mul_f32_e32 v11, v11, v13 ; 10161B0B
v_mac_f32_e32 v11, s17, v12 ; 3E161811
v_mad_f32 v1, s4, v0, v1 ; D2820001 04060004
v_mac_f32_e32 v1, s7, v11 ; 3E021607
v_mad_f32 v2, s5, v0, v2 ; D2820002 040A0005
v_mac_f32_e32 v2, s8, v11 ; 3E041608
v_mad_f32 v3, s6, v0, v3 ; D2820003 040E0006
v_mac_f32_e32 v3, s9, v11 ; 3E061609
v_mul_f32_e64 v11, s11, |v0| ; D210020B 0002000B
v_mul_f32_e64 v12, s12, |v0| ; D210020C 0002000C
v_mul_f32_e64 v0, s13, |v0| ; D2100200 0002000D
v_mac_f32_e32 v1, s10, v11 ; 3E02160A
v_mac_f32_e32 v2, s10, v12 ; 3E04180A
v_mac_f32_e32 v3, s10, v0 ; 3E06000A
v_mul_f32_e32 v0, s19, v1 ; 10000213
v_mul_f32_e32 v11, s20, v1 ; 10160214
v_mul_f32_e32 v12, s21, v1 ; 10180215
v_mul_f32_e32 v1, s22, v1 ; 10020216
v_mac_f32_e32 v0, s23, v2 ; 3E000417
v_mac_f32_e32 v11, s24, v2 ; 3E160418
v_mac_f32_e32 v12, s25, v2 ; 3E180419
v_mac_f32_e32 v1, s26, v2 ; 3E02041A
v_mac_f32_e32 v0, s27, v3 ; 3E00061B
v_mac_f32_e32 v11, s28, v3 ; 3E16061C
v_mac_f32_e32 v12, s29, v3 ; 3E18061D
v_mad_f32 v2, s30, v3, v1 ; D2820002 0406061E
v_mac_f32_e32 v0, s31, v4 ; 3E00081F
v_mac_f32_e32 v11, s32, v4 ; 3E160820
v_mac_f32_e32 v12, s33, v4 ; 3E180821
v_mac_f32_e32 v2, s0, v4 ; 3E040800
v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480
v_cndmask_b32_e64 v3, 0, 1.0, vcc ; D2000003 01A9E480
exp 15, 32, 0, 0, 0, v5, v6, v7, v8 ; F800020F 08070605
exp 15, 33, 0, 0, 0, v9, v3, v12, v1 ; F800021F 010C0309
exp 15, 12, 0, 1, 0, v0, v11, v12, v2 ; F80008CF 020C0B00
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 16
Code Size: 444 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..1]
DCL TEMP[0..1], LOCAL
IMM[0] FLT32 { 0.7000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[1].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MOV TEMP[1].w, TEMP[0].wwww
3: MUL TEMP[1].xyz, TEMP[0].xyzz, IN[0].xyzz
4: FSLT TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx
5: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy
6: KILL_IF -TEMP[0].xxxx
7: MAD TEMP[0].x, IN[1].zzzz, CONST[1].zzzz, CONST[1].wwww
8: MOV_SAT TEMP[0].x, TEMP[0].xxxx
9: LRP TEMP[1].xyz, TEMP[0].xxxx, TEMP[1].xyzz, CONST[0].xyzz
10: MOV OUT[0], TEMP[1]
11: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%29 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%30 = load <32 x i8>, <32 x i8> addrspace(2)* %29, align 32, !tbaa !0
%31 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%32 = load <16 x i8>, <16 x i8> addrspace(2)* %31, align 16, !tbaa !0
%33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%39 = bitcast float %36 to i32
%40 = bitcast float %37 to i32
%41 = insertelement <2 x i32> undef, i32 %39, i32 0
%42 = insertelement <2 x i32> %41, i32 %40, i32 1
%43 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %30, <16 x i8> %32, i32 2)
%44 = extractelement <4 x float> %43, i32 0
%45 = extractelement <4 x float> %43, i32 1
%46 = extractelement <4 x float> %43, i32 2
%47 = extractelement <4 x float> %43, i32 3
%48 = fmul float %44, %33
%49 = fmul float %45, %34
%50 = fmul float %46, %35
%51 = fcmp olt float %47, 0x3FE6666660000000
%52 = select i1 %51, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %52)
%53 = fmul float %38, %27
%54 = fadd float %53, %28
%55 = call float @llvm.AMDIL.clamp.(float %54, float 0.000000e+00, float 1.000000e+00)
%56 = call float @llvm.AMDGPU.lrp(float %55, float %48, float %24)
%57 = call float @llvm.AMDGPU.lrp(float %55, float %49, float %25)
%58 = call float @llvm.AMDGPU.lrp(float %55, float %50, float %26)
%59 = call i32 @llvm.SI.packf16(float %56, float %57)
%60 = bitcast i32 %59 to float
%61 = call i32 @llvm.SI.packf16(float %58, float %47)
%62 = bitcast i32 %61 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %60, float %62, float %60, float %62)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400
v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401
v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500
v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501
v_interp_p1_f32 v0, v0, 2, 1, [m0] ; C8000600
v_interp_p2_f32 v0, [v0], v1, 2, 1, [m0] ; C8010601
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[8:15], s[4:7] ; F0800F00 00220505
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s7, s[0:3], 0x1 ; C2038101
s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v1, v2, v5 ; 10020B02
v_mul_f32_e32 v2, v3, v6 ; 10040D03
v_mul_f32_e32 v3, v4, v7 ; 10060F04
v_mov_b32_e32 v4, 0x3f333333 ; 7E0802FF 3F333333
v_cmp_gt_f32_e32 vcc, v4, v8 ; 7C081104
v_cndmask_b32_e64 v4, 0, -1.0, vcc ; D2000004 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v4, s5 ; 7E080205
v_mac_f32_e32 v4, s4, v0 ; 3E080004
v_add_f32_e64 v0, 0, v4 clamp ; D2060800 00020880
v_sub_f32_e32 v4, 1.0, v0 ; 080800F2
v_mul_f32_e32 v5, s6, v4 ; 100A0806
v_mac_f32_e32 v5, v1, v0 ; 3E0A0101
v_mul_f32_e32 v1, s7, v4 ; 10020807
v_mac_f32_e32 v1, v2, v0 ; 3E020102
v_mul_f32_e32 v2, s0, v4 ; 10040800
v_mac_f32_e32 v2, v3, v0 ; 3E040103
v_cvt_pkrtz_f16_f32_e32 v0, v2, v8 ; 5E001102
v_cvt_pkrtz_f16_f32_e32 v1, v5, v1 ; 5E020305
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 208 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL CONST[0..13]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[2].xyxx
1: MOV TEMP[1].x, IN[1].yyyy
2: ADD TEMP[2].xyz, IN[0].xyzz, -CONST[4].xyzz
3: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
4: FSLT TEMP[3].x, CONST[5].xxxx, TEMP[3].xxxx
5: UIF TEMP[3].xxxx :0
6: MOV TEMP[1].x, IMM[0].xxxx
7: MOV TEMP[0].xy, IMM[0].xxxx
8: ENDIF
9: MAD TEMP[2].xyz, CONST[1].xyzz, TEMP[0].xxxx, IN[0].xyzz
10: LRP TEMP[1].x, CONST[4].wwww, TEMP[1].xxxx, TEMP[0].yyyy
11: MAD TEMP[2].xyz, CONST[2].xyzz, TEMP[1].xxxx, TEMP[2].xyzz
12: ABS TEMP[0].x, TEMP[0].xxxx
13: MUL TEMP[0].xyz, CONST[3].xyzz, TEMP[0].xxxx
14: MAD TEMP[2].xyz, TEMP[0].xyzz, CONST[2].wwww, TEMP[2].xyzz
15: MOV TEMP[0].x, IN[1].xxxx
16: FSLT TEMP[1].x, IMM[0].xxxx, IN[1].yyyy
17: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy
18: MOV TEMP[0].y, TEMP[1].xxxx
19: MOV TEMP[1].xyz, IMM[0].xxyx
20: MUL TEMP[3], CONST[10], TEMP[2].xxxx
21: MAD TEMP[3], CONST[11], TEMP[2].yyyy, TEMP[3]
22: MAD TEMP[3], CONST[12], TEMP[2].zzzz, TEMP[3]
23: MAD TEMP[3].z, CONST[13], IN[0].wwww, TEMP[3]
24: MUL TEMP[3].x, TEMP[3].zzzz, CONST[0].wwww
25: MOV TEMP[1].w, -TEMP[3].xxxx
26: MUL TEMP[3], CONST[6], TEMP[2].xxxx
27: MAD TEMP[3], CONST[7], TEMP[2].yyyy, TEMP[3]
28: MAD TEMP[2], CONST[8], TEMP[2].zzzz, TEMP[3]
29: MAD TEMP[2], CONST[9], IN[0].wwww, TEMP[2]
30: MOV TEMP[0].xy, TEMP[0].xyxx
31: MOV OUT[1], TEMP[1]
32: MOV OUT[2], TEMP[0]
33: MOV OUT[0], TEMP[2]
34: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%49 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%50 = load <16 x i8>, <16 x i8> addrspace(2)* %49, align 16, !tbaa !0
%51 = add i32 %5, %7
%52 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %50, i32 0, i32 %51)
%53 = extractelement <4 x float> %52, i32 0
%54 = extractelement <4 x float> %52, i32 1
%55 = extractelement <4 x float> %52, i32 2
%56 = extractelement <4 x float> %52, i32 3
%57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0
%59 = add i32 %5, %7
%60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59)
%61 = extractelement <4 x float> %60, i32 0
%62 = extractelement <4 x float> %60, i32 1
%63 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%64 = load <16 x i8>, <16 x i8> addrspace(2)* %63, align 16, !tbaa !0
%65 = add i32 %5, %7
%66 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %64, i32 0, i32 %65)
%67 = extractelement <4 x float> %66, i32 0
%68 = extractelement <4 x float> %66, i32 1
%69 = fsub float %53, %24
%70 = fsub float %54, %25
%71 = fsub float %55, %26
%72 = fmul float %69, %69
%73 = fmul float %70, %70
%74 = fadd float %73, %72
%75 = fmul float %71, %71
%76 = fadd float %74, %75
%77 = fcmp olt float %28, %76
%. = select i1 %77, float 0.000000e+00, float %67
%.16 = select i1 %77, float 0.000000e+00, float %68
%.17 = select i1 %77, float 0.000000e+00, float %62
%78 = fmul float %14, %.
%79 = fadd float %78, %53
%80 = fmul float %15, %.
%81 = fadd float %80, %54
%82 = fmul float %16, %.
%83 = fadd float %82, %55
%84 = call float @llvm.AMDGPU.lrp(float %27, float %.17, float %.16)
%85 = fmul float %17, %84
%86 = fadd float %85, %79
%87 = fmul float %18, %84
%88 = fadd float %87, %81
%89 = fmul float %19, %84
%90 = fadd float %89, %83
%91 = call float @llvm.fabs.f32(float %.)
%92 = fmul float %21, %91
%93 = fmul float %22, %91
%94 = fmul float %23, %91
%95 = fmul float %92, %20
%96 = fadd float %95, %86
%97 = fmul float %93, %20
%98 = fadd float %97, %88
%99 = fmul float %94, %20
%100 = fadd float %99, %90
%101 = fcmp ogt float %62, 0.000000e+00
%102 = select i1 %101, float 1.000000e+00, float 0.000000e+00
%103 = fmul float %45, %96
%104 = fmul float %46, %98
%105 = fadd float %104, %103
%106 = fmul float %47, %100
%107 = fadd float %106, %105
%108 = fmul float %48, %56
%109 = fadd float %108, %107
%110 = fmul float %109, %13
%111 = fsub float -0.000000e+00, %110
%112 = fmul float %29, %96
%113 = fmul float %30, %96
%114 = fmul float %31, %96
%115 = fmul float %32, %96
%116 = fmul float %33, %98
%117 = fadd float %116, %112
%118 = fmul float %34, %98
%119 = fadd float %118, %113
%120 = fmul float %35, %98
%121 = fadd float %120, %114
%122 = fmul float %36, %98
%123 = fadd float %122, %115
%124 = fmul float %37, %100
%125 = fadd float %124, %117
%126 = fmul float %38, %100
%127 = fadd float %126, %119
%128 = fmul float %39, %100
%129 = fadd float %128, %121
%130 = fmul float %40, %100
%131 = fadd float %130, %123
%132 = fmul float %41, %56
%133 = fadd float %132, %125
%134 = fmul float %42, %56
%135 = fadd float %134, %127
%136 = fmul float %43, %56
%137 = fadd float %136, %129
%138 = fmul float %44, %56
%139 = fadd float %138, %131
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float %111)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %61, float %102, float %94, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %133, float %135, float %137, float %139)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0xe ; C204010E
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700
s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110
s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111
s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112
s_buffer_load_dword s7, s[0:3], 0x13 ; C2038113
s_buffer_load_dword s9, s[0:3], 0x14 ; C2048114
s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118
s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119
s_buffer_load_dword s12, s[0:3], 0x1a ; C206011A
s_buffer_load_dword s13, s[0:3], 0x1b ; C206811B
s_buffer_load_dword s14, s[0:3], 0x9 ; C2070109
s_buffer_load_dword s15, s[0:3], 0xa ; C207810A
s_buffer_load_dword s16, s[0:3], 0xb ; C208010B
s_buffer_load_dword s17, s[0:3], 0xc ; C208810C
s_buffer_load_dword s18, s[0:3], 0xd ; C209010D
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_sub_f32_e64 v0, 1.0, s7 ; D2080000 00000EF2
s_buffer_load_dword s19, s[0:3], 0x4 ; C2098104
s_buffer_load_dword s20, s[0:3], 0x5 ; C20A0105
s_buffer_load_dword s21, s[0:3], 0x6 ; C20A8106
s_buffer_load_dword s22, s[0:3], 0x8 ; C20B0108
v_subrev_f32_e32 v9, s4, v1 ; 0A120204
v_subrev_f32_e32 v10, s5, v2 ; 0A140405
v_mul_f32_e32 v9, v9, v9 ; 10121309
v_mac_f32_e32 v9, v10, v10 ; 3E12150A
v_subrev_f32_e32 v10, s6, v3 ; 0A140606
v_mac_f32_e32 v9, v10, v10 ; 3E12150A
v_cmp_lt_f32_e32 vcc, s9, v9 ; 7C021209
v_cndmask_b32_e64 v7, v7, 0, vcc ; D2000007 01A90107
v_cndmask_b32_e64 v8, v8, 0, vcc ; D2000008 01A90108
v_cndmask_b32_e64 v9, v6, 0, vcc ; D2000009 01A90106
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mac_f32_e32 v0, s7, v9 ; 3E001207
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mad_f32 v1, s19, v7, v1 ; D2820001 04060E13
v_mac_f32_e32 v1, s22, v0 ; 3E020016
v_mad_f32 v2, s20, v7, v2 ; D2820002 040A0E14
v_mac_f32_e32 v2, s14, v0 ; 3E04000E
v_mad_f32 v3, s21, v7, v3 ; D2820003 040E0E15
v_mac_f32_e32 v3, s15, v0 ; 3E06000F
v_mul_f32_e64 v0, s17, |v7| ; D2100200 00020E11
s_buffer_load_dword s4, s[0:3], 0x2a ; C202012A
s_buffer_load_dword s5, s[0:3], 0x2e ; C202812E
s_buffer_load_dword s6, s[0:3], 0x32 ; C2030132
s_buffer_load_dword s7, s[0:3], 0x36 ; C2038136
s_buffer_load_dword s9, s[0:3], 0x3 ; C2048103
v_mul_f32_e64 v8, s18, |v7| ; D2100208 00020E12
v_mul_f32_e64 v7, s8, |v7| ; D2100207 00020E08
v_mac_f32_e32 v1, s16, v0 ; 3E020010
v_mac_f32_e32 v2, s16, v8 ; 3E041010
v_mac_f32_e32 v3, s16, v7 ; 3E060E10
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v0, s4, v1 ; 10000204
v_mac_f32_e32 v0, s5, v2 ; 3E000405
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mac_f32_e32 v0, s7, v4 ; 3E000807
v_mul_f32_e32 v0, s9, v0 ; 10000009
v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000
v_mov_b32_e32 v8, 0 ; 7E100280
v_mov_b32_e32 v9, 1.0 ; 7E1202F2
exp 15, 32, 0, 0, 0, v8, v8, v9, v0 ; F800020F 00090808
v_cmp_lt_f32_e32 vcc, 0, v6 ; 7C020C80
s_waitcnt expcnt(0) ; BF8C070F
v_cndmask_b32_e64 v0, 0, 1.0, vcc ; D2000000 01A9E480
exp 15, 33, 0, 0, 0, v5, v0, v7, v8 ; F800021F 08070005
s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126
s_buffer_load_dword s5, s[0:3], 0x27 ; C2028127
s_buffer_load_dword s6, s[0:3], 0x1c ; C203011C
s_buffer_load_dword s7, s[0:3], 0x1d ; C203811D
s_buffer_load_dword s8, s[0:3], 0x1e ; C204011E
s_buffer_load_dword s9, s[0:3], 0x1f ; C204811F
s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120
s_buffer_load_dword s15, s[0:3], 0x21 ; C2078121
s_buffer_load_dword s16, s[0:3], 0x22 ; C2080122
s_buffer_load_dword s17, s[0:3], 0x23 ; C2088123
s_buffer_load_dword s18, s[0:3], 0x24 ; C2090124
s_buffer_load_dword s0, s[0:3], 0x25 ; C2000125
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s10, v1 ; 1000020A
v_mul_f32_e32 v5, s11, v1 ; 100A020B
v_mul_f32_e32 v6, s12, v1 ; 100C020C
v_mul_f32_e32 v1, s13, v1 ; 1002020D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v0, s6, v2 ; 3E000406
v_mac_f32_e32 v5, s7, v2 ; 3E0A0407
v_mac_f32_e32 v6, s8, v2 ; 3E0C0408
v_mac_f32_e32 v1, s9, v2 ; 3E020409
v_mac_f32_e32 v0, s14, v3 ; 3E00060E
v_mac_f32_e32 v5, s15, v3 ; 3E0A060F
v_mac_f32_e32 v6, s16, v3 ; 3E0C0610
v_mac_f32_e32 v1, s17, v3 ; 3E020611
v_mac_f32_e32 v0, s18, v4 ; 3E000812
v_mac_f32_e32 v5, s0, v4 ; 3E0A0800
v_mac_f32_e32 v6, s4, v4 ; 3E0C0804
v_mac_f32_e32 v1, s5, v4 ; 3E020805
exp 15, 12, 0, 1, 0, v0, v5, v6, v1 ; F80008CF 01060500
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 12
Code Size: 500 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0010, 1.0000, 0.2813, 0.5000}
IMM[1] FLT32 { 1.0000, 255.0000, 0.0039, 0.0000}
0: MOV TEMP[0].xy, IN[1].xyyy
1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D
2: FSLT TEMP[0].x, TEMP[0].wwww, IMM[0].xxxx
3: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].yyyy
4: KILL_IF -TEMP[0].xxxx
5: ADD TEMP[0].x, IN[0].zzzz, IMM[0].yyyy
6: RCP TEMP[0].x, TEMP[0].xxxx
7: MUL TEMP[0].xy, IN[0].xyyy, TEMP[0].xxxx
8: MAD TEMP[0].xy, IMM[0].zzzz, TEMP[0].xyyy, IMM[0].wwww
9: MUL TEMP[1].xy, IMM[1].xyyy, IN[0].wwww
10: FRC TEMP[1].xy, TEMP[1].xyyy
11: MOV TEMP[2].y, TEMP[1].yyyy
12: MUL TEMP[3].x, TEMP[1].yyyy, IMM[1].zzzz
13: ADD TEMP[2].x, TEMP[1].xxxx, -TEMP[3].xxxx
14: MOV TEMP[0].zw, TEMP[2].yyxy
15: MOV OUT[0], TEMP[0]
16: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
%24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
%26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%32 = bitcast float %30 to i32
%33 = bitcast float %31 to i32
%34 = insertelement <2 x i32> undef, i32 %32, i32 0
%35 = insertelement <2 x i32> %34, i32 %33, i32 1
%36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %23, <16 x i8> %25, i32 2)
%37 = extractelement <4 x float> %36, i32 3
%38 = fcmp olt float %37, 0x3F50624DE0000000
%39 = select i1 %38, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %39)
%40 = fadd float %28, 1.000000e+00
%41 = fdiv float 1.000000e+00, %40
%42 = fmul float %26, %41
%43 = fmul float %27, %41
%44 = fmul float %42, 0x3FD20033A0000000
%45 = fadd float %44, 5.000000e-01
%46 = fmul float %43, 0x3FD20033A0000000
%47 = fadd float %46, 5.000000e-01
%48 = fmul float %29, 2.550000e+02
%49 = call float @llvm.floor.f32(float %29)
%50 = fsub float %29, %49
%51 = call float @llvm.floor.f32(float %48)
%52 = fsub float %48, %51
%53 = fmul float %52, 0x3F70101020000000
%54 = fsub float %50, %53
%55 = call i32 @llvm.SI.packf16(float %45, float %47)
%56 = bitcast i32 %55 to float
%57 = call i32 @llvm.SI.packf16(float %54, float %52)
%58 = bitcast i32 %57 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %56, float %58, float %56, float %58)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[8:15], s[0:3] ; F0800800 00020006
v_mov_b32_e32 v1, 0x3a83126f ; 7E0202FF 3A83126F
s_waitcnt vmcnt(0) ; BF8C0770
v_cmp_gt_f32_e32 vcc, v1, v0 ; 7C080101
v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080
v_add_f32_e32 v0, 1.0, v4 ; 060008F2
v_rcp_f32_e32 v0, v0 ; 7E005500
v_mul_f32_e32 v1, v0, v2 ; 10020500
v_mul_f32_e32 v0, v0, v3 ; 10000700
v_mov_b32_e32 v2, 0x3e90019d ; 7E0402FF 3E90019D
v_mad_f32 v1, v1, v2, 0.5 ; D2820001 03C20501
v_mad_f32 v0, v0, v2, 0.5 ; D2820000 03C20500
v_mov_b32_e32 v2, 0x437f0000 ; 7E0402FF 437F0000
v_mul_f32_e32 v3, v2, v5 ; 10060B02
v_floor_f32_e32 v4, v5 ; 7E084905
v_subrev_f32_e32 v4, v4, v5 ; 0A080B04
v_floor_f32_e32 v3, v3 ; 7E064903
v_mad_f32 v2, v5, v2, -v3 ; D2820002 840E0505
v_madmk_f32_e32 v3, v2, v4, 0xbb808081 ; 40060902 BB808081
v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101
v_cvt_pkrtz_f16_f32_e32 v1, v3, v2 ; 5E020503
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Code Size: 204 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..18]
DCL TEMP[0..3], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000}
0: FSNE TEMP[0].x, CONST[1].zzzz, IMM[0].xxxx
1: UIF TEMP[0].xxxx :0
2: MUL TEMP[0], CONST[2], IN[0].xxxx
3: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
4: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
5: ADD TEMP[0].xyz, TEMP[0], CONST[5]
6: MOV TEMP[1].x, CONST[6].xxxx
7: MOV TEMP[1].y, CONST[7].xxxx
8: MOV TEMP[1].z, CONST[8].xxxx
9: MOV TEMP[2].x, CONST[6].yyyy
10: MOV TEMP[2].y, CONST[7].yyyy
11: MOV TEMP[2].z, CONST[8].yyyy
12: MOV TEMP[3].x, CONST[6].zzzz
13: MOV TEMP[3].y, CONST[7].zzzz
14: MOV TEMP[3].z, CONST[8].zzzz
15: MUL TEMP[1].xyz, TEMP[1].xyzz, IN[1].xxxx
16: MAD TEMP[1].xyz, TEMP[2].xyzz, IN[1].yyyy, TEMP[1].xyzz
17: MAD TEMP[1].xyz, TEMP[3].xyzz, IN[1].zzzz, TEMP[1].xyzz
18: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
19: RSQ TEMP[2].x, TEMP[2].xxxx
20: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
21: MUL TEMP[2].xyz, TEMP[0].xyzz, CONST[0].wwww
22: ADD TEMP[2].xyz, CONST[0].xyzz, -TEMP[2].xyzz
23: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
24: RSQ TEMP[3].x, TEMP[3].xxxx
25: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
26: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[2].xyzz
27: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[2].xxxx
28: ADD TEMP[2].x, IMM[0].yyyy, -TEMP[2].xxxx
29: SQRT TEMP[2].x, TEMP[2].xxxx
30: MUL TEMP[2].x, CONST[1].zzzz, TEMP[2].xxxx
31: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
32: ADD TEMP[0].xyz, TEMP[0].xyzz, -TEMP[1].xyzz
33: MUL TEMP[1], CONST[10], TEMP[0].xxxx
34: MAD TEMP[1], CONST[11], TEMP[0].yyyy, TEMP[1]
35: MAD TEMP[0], CONST[12], TEMP[0].zzzz, TEMP[1]
36: ADD TEMP[0], TEMP[0], CONST[13]
37: ELSE :0
38: MUL TEMP[1], CONST[15], IN[0].xxxx
39: MAD TEMP[1], CONST[16], IN[0].yyyy, TEMP[1]
40: MAD TEMP[1], CONST[17], IN[0].zzzz, TEMP[1]
41: ADD TEMP[0], TEMP[1], CONST[18]
42: ENDIF
43: MOV TEMP[1].xyw, TEMP[0].xyxw
44: RCP TEMP[2].x, TEMP[0].wwww
45: MUL TEMP[2].x, CONST[1].xxxx, TEMP[2].xxxx
46: MOV_SAT TEMP[2].x, TEMP[2].xxxx
47: ADD TEMP[2].x, TEMP[0].zzzz, TEMP[2].xxxx
48: MAX TEMP[0].x, TEMP[2].xxxx, -TEMP[0].wwww
49: LRP TEMP[0].x, CONST[1].yyyy, TEMP[0].xxxx, TEMP[2].xxxx
50: MOV TEMP[1].z, TEMP[0].xxxx
51: MAD TEMP[0].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
52: MOV OUT[1], TEMP[0]
53: MOV OUT[0], TEMP[1]
54: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%21 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%22 = load <16 x i8>, <16 x i8> addrspace(2)* %21, align 16, !tbaa !0
%23 = add i32 %5, %7
%24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23)
%25 = extractelement <4 x float> %24, i32 0
%26 = extractelement <4 x float> %24, i32 1
%27 = extractelement <4 x float> %24, i32 2
%28 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0
%30 = add i32 %5, %7
%31 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %29, i32 0, i32 %30)
%32 = extractelement <4 x float> %31, i32 0
%33 = extractelement <4 x float> %31, i32 1
%34 = extractelement <4 x float> %31, i32 2
%35 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%36 = load <16 x i8>, <16 x i8> addrspace(2)* %35, align 16, !tbaa !0
%37 = add i32 %5, %7
%38 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %36, i32 0, i32 %37)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = fcmp une float %16, 0.000000e+00
br i1 %41, label %IF, label %ELSE
IF: ; preds = %main_body
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%82 = fmul float %78, %25
%83 = fmul float %77, %25
%84 = fmul float %76, %25
%85 = fmul float %75, %26
%86 = fadd float %85, %82
%87 = fmul float %74, %26
%88 = fadd float %87, %83
%89 = fmul float %73, %26
%90 = fadd float %89, %84
%91 = fmul float %72, %27
%92 = fadd float %91, %86
%93 = fmul float %71, %27
%94 = fadd float %93, %88
%95 = fmul float %70, %27
%96 = fadd float %95, %90
%97 = fadd float %92, %69
%98 = fadd float %94, %68
%99 = fadd float %96, %67
%100 = fmul float %66, %32
%101 = fmul float %63, %32
%102 = fmul float %60, %32
%103 = fmul float %65, %33
%104 = fadd float %103, %100
%105 = fmul float %62, %33
%106 = fadd float %105, %101
%107 = fmul float %59, %33
%108 = fadd float %107, %102
%109 = fmul float %64, %34
%110 = fadd float %109, %104
%111 = fmul float %61, %34
%112 = fadd float %111, %106
%113 = fmul float %58, %34
%114 = fadd float %113, %108
%115 = fmul float %110, %110
%116 = fmul float %112, %112
%117 = fadd float %116, %115
%118 = fmul float %114, %114
%119 = fadd float %117, %118
%120 = call float @llvm.AMDGPU.rsq.clamped.f32(float %119)
%121 = fmul float %110, %120
%122 = fmul float %112, %120
%123 = fmul float %114, %120
%124 = fmul float %97, %13
%125 = fmul float %98, %13
%126 = fmul float %99, %13
%127 = fsub float %81, %124
%128 = fsub float %80, %125
%129 = fsub float %79, %126
%130 = fmul float %127, %127
%131 = fmul float %128, %128
%132 = fadd float %131, %130
%133 = fmul float %129, %129
%134 = fadd float %132, %133
%135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134)
%136 = fmul float %127, %135
%137 = fmul float %128, %135
%138 = fmul float %129, %135
%139 = fmul float %121, %136
%140 = fmul float %122, %137
%141 = fadd float %140, %139
%142 = fmul float %123, %138
%143 = fadd float %141, %142
%144 = fmul float %143, %143
%145 = fsub float 1.000000e+00, %144
%146 = call float @llvm.sqrt.f32(float %145)
%147 = fmul float %16, %146
%148 = fmul float %121, %147
%149 = fmul float %122, %147
%150 = fmul float %123, %147
%151 = fsub float %97, %148
%152 = fsub float %98, %149
%153 = fsub float %99, %150
%154 = fmul float %57, %151
%155 = fmul float %56, %151
%156 = fmul float %55, %151
%157 = fmul float %54, %151
%158 = fmul float %53, %152
%159 = fadd float %158, %154
%160 = fmul float %52, %152
%161 = fadd float %160, %155
%162 = fmul float %51, %152
%163 = fadd float %162, %156
%164 = fmul float %50, %152
%165 = fadd float %164, %157
%166 = fmul float %49, %153
%167 = fadd float %166, %159
%168 = fmul float %48, %153
%169 = fadd float %168, %161
%170 = fmul float %47, %153
%171 = fadd float %170, %163
%172 = fmul float %46, %153
%173 = fadd float %172, %165
%174 = fadd float %167, %45
%175 = fadd float %169, %44
%176 = fadd float %171, %43
%177 = fadd float %173, %42
br label %ENDIF
ELSE: ; preds = %main_body
%178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%183 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%186 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%188 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%189 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%191 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%192 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%193 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%194 = fmul float %193, %25
%195 = fmul float %192, %25
%196 = fmul float %191, %25
%197 = fmul float %190, %25
%198 = fmul float %189, %26
%199 = fadd float %198, %194
%200 = fmul float %188, %26
%201 = fadd float %200, %195
%202 = fmul float %187, %26
%203 = fadd float %202, %196
%204 = fmul float %186, %26
%205 = fadd float %204, %197
%206 = fmul float %185, %27
%207 = fadd float %206, %199
%208 = fmul float %184, %27
%209 = fadd float %208, %201
%210 = fmul float %183, %27
%211 = fadd float %210, %203
%212 = fmul float %182, %27
%213 = fadd float %212, %205
%214 = fadd float %207, %181
%215 = fadd float %209, %180
%216 = fadd float %211, %179
%217 = fadd float %213, %178
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp.0 = phi float [ %174, %IF ], [ %214, %ELSE ]
%temp1.0 = phi float [ %175, %IF ], [ %215, %ELSE ]
%temp2.0 = phi float [ %176, %IF ], [ %216, %ELSE ]
%temp3.0 = phi float [ %177, %IF ], [ %217, %ELSE ]
%218 = fdiv float 1.000000e+00, %temp3.0
%219 = fmul float %14, %218
%220 = call float @llvm.AMDIL.clamp.(float %219, float 0.000000e+00, float 1.000000e+00)
%221 = fadd float %temp2.0, %220
%222 = fsub float -0.000000e+00, %temp3.0
%223 = call float @llvm.maxnum.f32(float %221, float %222)
%224 = call float @llvm.AMDGPU.lrp(float %15, float %223, float %221)
%225 = fmul float %39, %17
%226 = fadd float %225, %19
%227 = fmul float %40, %18
%228 = fadd float %227, %20
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %226, float %228, float %temp2.0, float %temp3.0)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %temp.0, float %temp1.0, float %224, float %temp3.0)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[7:10], v0, s[12:15], 0 idxen ; E00C2000 80030700
buffer_load_format_xyzw v[0:3], v0, s[16:19], 0 idxen ; E00C2000 80040000
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s12, s[0:3], 0x3a ; C206013A
s_buffer_load_dword s11, s[0:3], 0x3b ; C205813B
s_waitcnt vmcnt(1) ; BF8C0771
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_cmp_eq_f32_e64 s[6:7], 0, s4 ; D0040006 00000880
s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406
s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E
s_cbranch_execz BB0_1 ; BF880000
s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B
s_buffer_load_dword s8, s[0:3], 0x46 ; C2040146
s_buffer_load_dword s9, s[0:3], 0x47 ; C2048147
s_buffer_load_dword s10, s[0:3], 0x48 ; C2050148
s_buffer_load_dword s13, s[0:3], 0x49 ; C2068149
s_buffer_load_dword s14, s[0:3], 0x4a ; C207014A
s_buffer_load_dword s15, s[0:3], 0x41 ; C2078141
s_buffer_load_dword s16, s[0:3], 0x42 ; C2080142
s_buffer_load_dword s17, s[0:3], 0x43 ; C2088143
s_buffer_load_dword s18, s[0:3], 0x44 ; C2090144
s_buffer_load_dword s19, s[0:3], 0x45 ; C2098145
s_buffer_load_dword s20, s[0:3], 0x3c ; C20A013C
s_buffer_load_dword s21, s[0:3], 0x3d ; C20A813D
s_buffer_load_dword s22, s[0:3], 0x3e ; C20B013E
s_buffer_load_dword s23, s[0:3], 0x3f ; C20B813F
s_buffer_load_dword s24, s[0:3], 0x40 ; C20C0140
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v2, s20, v4 ; 10040814
v_mul_f32_e32 v3, s21, v4 ; 10060815
v_mul_f32_e32 v12, s22, v4 ; 10180816
v_mul_f32_e32 v13, s23, v4 ; 101A0817
v_mac_f32_e32 v2, s24, v5 ; 3E040A18
v_mac_f32_e32 v3, s15, v5 ; 3E060A0F
v_mac_f32_e32 v12, s16, v5 ; 3E180A10
v_mac_f32_e32 v13, s17, v5 ; 3E1A0A11
v_mac_f32_e32 v2, s18, v6 ; 3E040C12
v_mac_f32_e32 v3, s19, v6 ; 3E060C13
v_mac_f32_e32 v12, s8, v6 ; 3E180C08
v_mac_f32_e32 v13, s9, v6 ; 3E1A0C09
v_add_f32_e32 v10, s10, v2 ; 0614040A
v_add_f32_e32 v11, s13, v3 ; 0616060D
v_add_f32_e32 v12, s14, v12 ; 0618180E
v_add_f32_e32 v13, s5, v13 ; 061A1A05
s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506
s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s8, s[0:3], 0x38 ; C2040138
s_buffer_load_dword s5, s[0:3], 0x39 ; C2028139
v_mov_b32_e32 v2, s12 ; 7E04020C
v_mov_b32_e32 v3, s11 ; 7E06020B
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[6:7] ; 89FE067E
s_cbranch_execz BB0_4 ; BF880000
s_buffer_load_dword s11, s[0:3], 0x37 ; C2058137
s_buffer_load_dword s12, s[0:3], 0x32 ; C2060132
s_buffer_load_dword s13, s[0:3], 0x33 ; C2068133
s_buffer_load_dword s14, s[0:3], 0x34 ; C2070134
s_buffer_load_dword s15, s[0:3], 0x35 ; C2078135
s_buffer_load_dword s16, s[0:3], 0x36 ; C2080136
s_buffer_load_dword s17, s[0:3], 0x2d ; C208812D
s_buffer_load_dword s18, s[0:3], 0x2e ; C209012E
s_buffer_load_dword s19, s[0:3], 0x2f ; C209812F
s_buffer_load_dword s20, s[0:3], 0x30 ; C20A0130
s_buffer_load_dword s21, s[0:3], 0x31 ; C20A8131
s_buffer_load_dword s22, s[0:3], 0x28 ; C20B0128
s_buffer_load_dword s23, s[0:3], 0x29 ; C20B8129
s_buffer_load_dword s24, s[0:3], 0x2a ; C20C012A
s_buffer_load_dword s25, s[0:3], 0x2b ; C20C812B
s_buffer_load_dword s26, s[0:3], 0x2c ; C20D012C
s_buffer_load_dword s27, s[0:3], 0x1d ; C20D811D
s_buffer_load_dword s28, s[0:3], 0x1e ; C20E011E
s_buffer_load_dword s29, s[0:3], 0x20 ; C20E8120
s_buffer_load_dword s30, s[0:3], 0x21 ; C20F0121
s_buffer_load_dword s31, s[0:3], 0x22 ; C20F8122
s_buffer_load_dword s32, s[0:3], 0x16 ; C2100116
s_buffer_load_dword s33, s[0:3], 0x18 ; C2108118
s_buffer_load_dword s34, s[0:3], 0x19 ; C2110119
s_buffer_load_dword s35, s[0:3], 0x1a ; C211811A
s_buffer_load_dword s36, s[0:3], 0x1c ; C212011C
s_buffer_load_dword s37, s[0:3], 0x10 ; C2128110
s_buffer_load_dword s38, s[0:3], 0x11 ; C2130111
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v10, s29, v7 ; 10140E1D
v_mac_f32_e32 v10, s30, v8 ; 3E14101E
v_mac_f32_e32 v10, s31, v9 ; 3E14121F
s_buffer_load_dword s29, s[0:3], 0x12 ; C20E8112
v_mul_f32_e32 v11, s33, v7 ; 10160E21
v_mac_f32_e32 v11, s34, v8 ; 3E161022
v_mac_f32_e32 v11, s35, v9 ; 3E161223
v_mul_f32_e32 v7, s36, v7 ; 100E0E24
v_mac_f32_e32 v7, s27, v8 ; 3E0E101B
v_mac_f32_e32 v7, s28, v9 ; 3E0E121C
s_buffer_load_dword s27, s[0:3], 0x14 ; C20D8114
s_buffer_load_dword s28, s[0:3], 0x15 ; C20E0115
s_buffer_load_dword s30, s[0:3], 0x9 ; C20F0109
s_buffer_load_dword s31, s[0:3], 0xa ; C20F810A
s_buffer_load_dword s33, s[0:3], 0xc ; C210810C
s_buffer_load_dword s34, s[0:3], 0xd ; C211010D
s_buffer_load_dword s35, s[0:3], 0xe ; C211810E
s_buffer_load_dword s36, s[0:3], 0x0 ; C2120100
v_mul_f32_e32 v8, v11, v11 ; 1010170B
v_mac_f32_e32 v8, v7, v7 ; 3E100F07
v_mac_f32_e32 v8, v10, v10 ; 3E10150A
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v9, s30, v4 ; 1012081E
v_mul_f32_e32 v12, s31, v4 ; 1018081F
s_buffer_load_dword s30, s[0:3], 0x1 ; C20F0101
v_mac_f32_e32 v9, s34, v5 ; 3E120A22
v_mac_f32_e32 v12, s35, v5 ; 3E180A23
v_mac_f32_e32 v9, s38, v6 ; 3E120C26
v_mac_f32_e32 v12, s29, v6 ; 3E180C1D
v_add_f32_e32 v9, s28, v9 ; 0612121C
v_add_f32_e32 v12, s32, v12 ; 06181820
s_buffer_load_dword s28, s[0:3], 0x2 ; C20E0102
s_buffer_load_dword s29, s[0:3], 0x3 ; C20E8103
s_buffer_load_dword s31, s[0:3], 0x8 ; C20F8108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s29 ; 7E1A021D
v_mad_f32 v14, -v9, v13, s30 ; D282000E 207A1B09
v_mul_f32_e32 v4, s31, v4 ; 1008081F
v_mac_f32_e32 v4, s33, v5 ; 3E080A21
v_mac_f32_e32 v4, s37, v6 ; 3E080C25
v_add_f32_e32 v4, s27, v4 ; 0608081B
v_mad_f32 v5, -v4, v13, s36 ; D2820005 20921B04
v_mad_f32 v6, -v12, v13, s28 ; D2820006 20721B0C
v_mul_f32_e32 v13, v5, v5 ; 101A0B05
v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E
v_mac_f32_e32 v13, v6, v6 ; 3E1A0D06
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_f32_e32 v8, v8, v10 ; 10101508
v_mul_f32_e32 v5, v13, v5 ; 100A0B0D
v_mul_f32_e32 v10, v13, v14 ; 10141D0D
v_mul_f32_e32 v6, v13, v6 ; 100C0D0D
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mac_f32_e32 v5, v10, v7 ; 3E0A0F0A
v_mac_f32_e32 v5, v6, v8 ; 3E0A1106
v_mad_f32 v5, -v5, v5, 1.0 ; D2820005 23CA0B05
v_sqrt_f32_e32 v5, v5 ; 7E0A6705
v_mul_f32_e32 v5, s4, v5 ; 100A0A04
v_mad_f32 v4, -v11, v5, v4 ; D2820004 24120B0B
v_mad_f32 v6, -v7, v5, v9 ; D2820006 24260B07
v_mad_f32 v5, -v8, v5, v12 ; D2820005 24320B08
v_mul_f32_e32 v7, s22, v4 ; 100E0816
v_mul_f32_e32 v8, s23, v4 ; 10100817
v_mul_f32_e32 v9, s24, v4 ; 10120818
v_mul_f32_e32 v4, s25, v4 ; 10080819
v_mac_f32_e32 v7, s26, v6 ; 3E0E0C1A
v_mac_f32_e32 v8, s17, v6 ; 3E100C11
v_mac_f32_e32 v9, s18, v6 ; 3E120C12
v_mac_f32_e32 v4, s19, v6 ; 3E080C13
v_mac_f32_e32 v7, s20, v5 ; 3E0E0A14
v_mac_f32_e32 v8, s21, v5 ; 3E100A15
v_mac_f32_e32 v9, s12, v5 ; 3E120A0C
v_mac_f32_e32 v4, s13, v5 ; 3E080A0D
v_add_f32_e32 v10, s14, v7 ; 06140E0E
v_add_f32_e32 v11, s15, v8 ; 0616100F
v_add_f32_e32 v12, s16, v9 ; 06181210
v_add_f32_e32 v13, s11, v4 ; 061A080B
s_or_b64 exec, exec, s[6:7] ; 88FE067E
v_rcp_f32_e32 v4, v13 ; 7E08550D
v_sub_f32_e64 v5, 1.0, s9 ; D2080005 000012F2
v_mul_f32_e32 v4, s10, v4 ; 1008080A
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_add_f32_e32 v4, v4, v12 ; 06081904
v_max_f32_e64 v6, v4, -v13 ; D2200006 40021B04
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mac_f32_e32 v4, s9, v6 ; 3E080C09
v_mac_f32_e32 v2, s8, v0 ; 3E040008
v_mac_f32_e32 v3, s5, v1 ; 3E060205
exp 15, 32, 0, 0, 0, v2, v3, v12, v13 ; F800020F 0D0C0302
exp 15, 12, 0, 1, 0, v10, v11, v4, v13 ; F80008CF 0D040B0A
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 16
Code Size: 800 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..1]
DCL TEMP[0], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D
2: MUL TEMP[0].x, TEMP[0].wwww, CONST[0].wwww
3: FSLT TEMP[0].x, TEMP[0].xxxx, CONST[1].xxxx
4: AND TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx
5: KILL_IF -TEMP[0].xxxx
6: MOV OUT[0], IMM[0].yyyy
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%26 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%27 = load <32 x i8>, <32 x i8> addrspace(2)* %26, align 32, !tbaa !0
%28 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%29 = load <16 x i8>, <16 x i8> addrspace(2)* %28, align 16, !tbaa !0
%30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%32 = bitcast float %30 to i32
%33 = bitcast float %31 to i32
%34 = insertelement <2 x i32> undef, i32 %32, i32 0
%35 = insertelement <2 x i32> %34, i32 %33, i32 1
%36 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %27, <16 x i8> %29, i32 2)
%37 = extractelement <4 x float> %36, i32 3
%38 = fmul float %37, %24
%39 = fcmp olt float %38, %25
%40 = select i1 %39, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %40)
%41 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%42 = bitcast i32 %41 to float
%43 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00)
%44 = bitcast i32 %43 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700
s_load_dwordx4 s[4:7], s[4:5], 0x0 ; C0820500
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x3 ; C2040103
s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[4:7] ; F0800800 00230002
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_cmp_gt_f32_e32 vcc, s0, v0 ; 7C080000
v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080
v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080
exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 4
Code Size: 100 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[6], IN[0].xxxx
1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[10].xxxx
18: MOV TEMP[3].y, CONST[11].xxxx
19: MOV TEMP[3].z, CONST[12].xxxx
20: MOV TEMP[4].x, CONST[10].yyyy
21: MOV TEMP[4].y, CONST[11].yyyy
22: MOV TEMP[4].z, CONST[12].yyyy
23: MOV TEMP[5].x, CONST[10].zzzz
24: MOV TEMP[5].y, CONST[11].zzzz
25: MOV TEMP[5].z, CONST[12].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MOV TEMP[4].xyz, TEMP[3].xyzx
33: MUL TEMP[5].xyw, TEMP[1], IMM[0].yyyy
34: MOV TEMP[6].x, TEMP[5].xxxx
35: MUL TEMP[7].x, TEMP[5].yyyy, CONST[1].xxxx
36: MOV TEMP[6].y, TEMP[7].xxxx
37: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[5].wwww
38: MOV TEMP[5].zw, TEMP[1].wwzw
39: MUL TEMP[6], TEMP[3].xyzz, TEMP[3].yzzx
40: DP4 TEMP[7].x, CONST[2], TEMP[6]
41: DP4 TEMP[8].x, CONST[3], TEMP[6]
42: MOV TEMP[7].y, TEMP[8].xxxx
43: DP4 TEMP[6].x, CONST[4], TEMP[6]
44: MOV TEMP[7].z, TEMP[6].xxxx
45: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[3].yyyy
46: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[6].xxxx
47: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[7].xyzz
48: ADD TEMP[6].xyz, TEMP[0].xyzz, -CONST[0].xyzz
49: MOV TEMP[6].yzw, TEMP[6].yxyz
50: MOV TEMP[6].x, TEMP[1].zzzz
51: MOV TEMP[0].xyz, TEMP[0].xyzx
52: MOV OUT[6], TEMP[0]
53: MOV OUT[1], TEMP[2]
54: MOV OUT[2], TEMP[4]
55: MOV OUT[3], TEMP[3]
56: MOV OUT[4], TEMP[5]
57: MOV OUT[0], TEMP[1]
58: MOV OUT[5], TEMP[6]
59: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = add i32 %5, %7
%105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = fmul float %32, %85
%109 = fmul float %33, %85
%110 = fmul float %34, %85
%111 = fmul float %35, %85
%112 = fmul float %36, %86
%113 = fadd float %112, %108
%114 = fmul float %37, %86
%115 = fadd float %114, %109
%116 = fmul float %38, %86
%117 = fadd float %116, %110
%118 = fmul float %39, %86
%119 = fadd float %118, %111
%120 = fmul float %40, %87
%121 = fadd float %120, %113
%122 = fmul float %41, %87
%123 = fadd float %122, %115
%124 = fmul float %42, %87
%125 = fadd float %124, %117
%126 = fmul float %43, %87
%127 = fadd float %126, %119
%128 = fmul float %44, %88
%129 = fadd float %128, %121
%130 = fmul float %45, %88
%131 = fadd float %130, %123
%132 = fmul float %46, %88
%133 = fadd float %132, %125
%134 = fmul float %65, %85
%135 = fmul float %66, %85
%136 = fmul float %67, %85
%137 = fmul float %68, %85
%138 = fmul float %69, %86
%139 = fadd float %138, %134
%140 = fmul float %70, %86
%141 = fadd float %140, %135
%142 = fmul float %71, %86
%143 = fadd float %142, %136
%144 = fmul float %72, %86
%145 = fadd float %144, %137
%146 = fmul float %73, %87
%147 = fadd float %146, %139
%148 = fmul float %74, %87
%149 = fadd float %148, %141
%150 = fmul float %75, %87
%151 = fadd float %150, %143
%152 = fmul float %76, %87
%153 = fadd float %152, %145
%154 = fmul float %77, %88
%155 = fadd float %154, %147
%156 = fmul float %78, %88
%157 = fadd float %156, %149
%158 = fmul float %79, %88
%159 = fadd float %158, %151
%160 = fmul float %80, %88
%161 = fadd float %160, %153
%162 = fmul float %100, %56
%163 = fadd float %162, %58
%164 = fmul float %101, %57
%165 = fadd float %164, %59
%166 = fcmp oeq float %64, 0.000000e+00
%. = select i1 %166, float %100, float %106
%.36 = select i1 %166, float %101, float %107
%167 = fmul float %., %60
%168 = fadd float %167, %62
%169 = fmul float %.36, %61
%170 = fadd float %169, %63
%171 = fmul float %47, %93
%172 = fmul float %50, %93
%173 = fmul float %53, %93
%174 = fmul float %48, %94
%175 = fadd float %174, %171
%176 = fmul float %51, %94
%177 = fadd float %176, %172
%178 = fmul float %54, %94
%179 = fadd float %178, %173
%180 = fmul float %49, %95
%181 = fadd float %180, %175
%182 = fmul float %52, %95
%183 = fadd float %182, %177
%184 = fmul float %55, %95
%185 = fadd float %184, %179
%186 = fmul float %181, %181
%187 = fmul float %183, %183
%188 = fadd float %187, %186
%189 = fmul float %185, %185
%190 = fadd float %188, %189
%191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190)
%192 = fmul float %181, %191
%193 = fmul float %183, %191
%194 = fmul float %185, %191
%195 = fmul float %155, 5.000000e-01
%196 = fmul float %157, 5.000000e-01
%197 = fmul float %161, 5.000000e-01
%198 = fmul float %196, %16
%199 = fadd float %195, %197
%200 = fadd float %198, %197
%201 = fmul float %192, %193
%202 = fmul float %193, %194
%203 = fmul float %194, %194
%204 = fmul float %194, %192
%205 = fmul float %17, %201
%206 = fmul float %18, %202
%207 = fadd float %205, %206
%208 = fmul float %19, %203
%209 = fadd float %207, %208
%210 = fmul float %20, %204
%211 = fadd float %209, %210
%212 = fmul float %21, %201
%213 = fmul float %22, %202
%214 = fadd float %212, %213
%215 = fmul float %23, %203
%216 = fadd float %214, %215
%217 = fmul float %24, %204
%218 = fadd float %216, %217
%219 = fmul float %25, %201
%220 = fmul float %26, %202
%221 = fadd float %219, %220
%222 = fmul float %27, %203
%223 = fadd float %221, %222
%224 = fmul float %28, %204
%225 = fadd float %223, %224
%226 = fmul float %193, %193
%227 = fmul float %192, %192
%228 = fsub float %227, %226
%229 = fmul float %29, %228
%230 = fadd float %229, %211
%231 = fmul float %30, %228
%232 = fadd float %231, %218
%233 = fmul float %31, %228
%234 = fadd float %233, %225
%235 = fsub float %129, %13
%236 = fsub float %131, %14
%237 = fsub float %133, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %163, float %165, float %168, float %170)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %192, float %193, float %194, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %230, float %232, float %234, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %159, float %161)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %159, float %235, float %236, float %237)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %129, float %131, float %133, float %127)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %155, float %157, float %159, float %161)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s19, s[20:23], 0x23 ; C2099523
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s24, s[20:23], 0x24 ; C20C1524
s_buffer_load_dword s25, s[20:23], 0x25 ; C20C9525
s_buffer_load_dword s26, s[20:23], 0x26 ; C20D1526
s_buffer_load_dword s27, s[20:23], 0x28 ; C20D9528
s_buffer_load_dword s28, s[20:23], 0x29 ; C20E1529
s_buffer_load_dword s29, s[20:23], 0x2a ; C20E952A
s_buffer_load_dword s30, s[20:23], 0x2c ; C20F152C
s_buffer_load_dword s31, s[20:23], 0x2d ; C20F952D
s_buffer_load_dword s32, s[20:23], 0x2e ; C210152E
s_buffer_load_dword s33, s[20:23], 0x30 ; C2109530
s_buffer_load_dword s34, s[20:23], 0x31 ; C2111531
s_buffer_load_dword s35, s[20:23], 0x32 ; C2119532
s_buffer_load_dword s36, s[20:23], 0x38 ; C2121538
s_buffer_load_dword s37, s[20:23], 0x39 ; C2129539
s_buffer_load_dword s11, s[20:23], 0x9 ; C2059509
s_buffer_load_dword s5, s[20:23], 0xa ; C202950A
s_buffer_load_dword s3, s[20:23], 0xb ; C201950B
s_buffer_load_dword s9, s[20:23], 0xc ; C204950C
s_buffer_load_dword s12, s[20:23], 0xd ; C206150D
s_buffer_load_dword s7, s[20:23], 0xe ; C203950E
s_buffer_load_dword s4, s[20:23], 0xf ; C202150F
s_buffer_load_dword s10, s[20:23], 0x10 ; C2051510
s_buffer_load_dword s13, s[20:23], 0x11 ; C2069511
s_buffer_load_dword s8, s[20:23], 0x12 ; C2041512
s_buffer_load_dword s0, s[20:23], 0x3f ; C200153F
s_buffer_load_dword s1, s[20:23], 0x40 ; C2009540
s_buffer_load_dword s38, s[20:23], 0x44 ; C2131544
s_buffer_load_dword s39, s[20:23], 0x45 ; C2139545
s_buffer_load_dword s40, s[20:23], 0x46 ; C2141546
s_buffer_load_dword s41, s[20:23], 0x47 ; C2149547
s_buffer_load_dword s42, s[20:23], 0x48 ; C2151548
s_buffer_load_dword s43, s[20:23], 0x49 ; C2159549
s_buffer_load_dword s44, s[20:23], 0x4a ; C216154A
s_buffer_load_dword s45, s[20:23], 0x4b ; C216954B
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s0 ; 7E000200
s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500
v_cmp_eq_f32_e64 vcc, 0, s1 ; D004006A 00000280
s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501
s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502
s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504
s_buffer_load_dword s14, s[20:23], 0x8 ; C2071508
s_buffer_load_dword s46, s[20:23], 0x3a ; C217153A
s_buffer_load_dword s47, s[20:23], 0x3b ; C217953B
s_buffer_load_dword s48, s[20:23], 0x3c ; C218153C
s_buffer_load_dword s49, s[20:23], 0x3d ; C218953D
s_buffer_load_dword s50, s[20:23], 0x3e ; C219153E
s_buffer_load_dword s18, s[20:23], 0x13 ; C2091513
s_buffer_load_dword s15, s[20:23], 0x14 ; C2079514
s_buffer_load_dword s16, s[20:23], 0x15 ; C2081515
s_buffer_load_dword s17, s[20:23], 0x16 ; C2089516
s_buffer_load_dword s51, s[20:23], 0x18 ; C2199518
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s46 ; 7E1A022E
s_buffer_load_dword s46, s[20:23], 0x19 ; C2171519
s_buffer_load_dword s52, s[20:23], 0x1a ; C21A151A
s_buffer_load_dword s53, s[20:23], 0x1b ; C21A951B
s_buffer_load_dword s54, s[20:23], 0x1c ; C21B151C
s_buffer_load_dword s55, s[20:23], 0x1d ; C21B951D
s_buffer_load_dword s56, s[20:23], 0x1e ; C21C151E
s_buffer_load_dword s57, s[20:23], 0x1f ; C21C951F
s_buffer_load_dword s58, s[20:23], 0x20 ; C21D1520
s_buffer_load_dword s59, s[20:23], 0x21 ; C21D9521
s_buffer_load_dword s60, s[20:23], 0x22 ; C21E1522
s_buffer_load_dword s61, s[20:23], 0x4c ; C21E954C
s_buffer_load_dword s62, s[20:23], 0x4d ; C21F154D
s_buffer_load_dword s63, s[20:23], 0x4e ; C21F954E
s_buffer_load_dword s64, s[20:23], 0x4f ; C220154F
s_buffer_load_dword s65, s[20:23], 0x50 ; C2209550
s_buffer_load_dword s66, s[20:23], 0x51 ; C2211551
s_buffer_load_dword s67, s[20:23], 0x52 ; C2219552
s_buffer_load_dword s20, s[20:23], 0x53 ; C20A1553
v_mac_f32_e32 v13, s36, v9 ; 3E1A1224
v_mov_b32_e32 v14, s47 ; 7E1C022F
v_mul_f32_e32 v15, s51, v2 ; 101E0433
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v16, s46, v2 ; 1020042E
v_mul_f32_e32 v17, s27, v6 ; 10220C1B
v_mul_f32_e32 v18, s30, v6 ; 10240C1E
v_mul_f32_e32 v6, s33, v6 ; 100C0C21
v_mac_f32_e32 v17, s28, v7 ; 3E220E1C
v_mac_f32_e32 v18, s31, v7 ; 3E240E1F
v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22
v_mac_f32_e32 v17, s29, v8 ; 3E22101D
v_mac_f32_e32 v18, s32, v8 ; 3E241020
v_mac_f32_e32 v6, s35, v8 ; 3E0C1023
v_mul_f32_e32 v7, s52, v2 ; 100E0434
v_mul_f32_e32 v8, s53, v2 ; 10100435
v_mul_f32_e32 v19, s38, v2 ; 10260426
v_mac_f32_e32 v14, s37, v10 ; 3E1C1425
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mac_f32_e32 v15, s54, v3 ; 3E1E0636
v_mac_f32_e32 v16, s55, v3 ; 3E200637
v_mac_f32_e32 v7, s56, v3 ; 3E0E0638
v_mac_f32_e32 v8, s57, v3 ; 3E100639
v_mac_f32_e32 v19, s42, v3 ; 3E26062A
v_mul_f32_e32 v11, s39, v2 ; 10160427
v_mac_f32_e32 v11, s43, v3 ; 3E16062B
v_mul_f32_e32 v12, s40, v2 ; 10180428
v_mac_f32_e32 v12, s44, v3 ; 3E18062C
v_mul_f32_e32 v2, s41, v2 ; 10040429
v_mac_f32_e32 v2, s45, v3 ; 3E04062D
v_mac_f32_e32 v15, s58, v4 ; 3E1E083A
v_mac_f32_e32 v16, s59, v4 ; 3E20083B
v_mac_f32_e32 v7, s60, v4 ; 3E0E083C
v_mac_f32_e32 v8, s19, v4 ; 3E100813
v_mac_f32_e32 v19, s61, v4 ; 3E26083D
v_mac_f32_e32 v11, s62, v4 ; 3E16083E
v_mac_f32_e32 v12, s63, v4 ; 3E18083F
v_mac_f32_e32 v2, s64, v4 ; 3E040840
v_mac_f32_e32 v15, s24, v5 ; 3E1E0A18
v_mac_f32_e32 v16, s25, v5 ; 3E200A19
v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A
v_mac_f32_e32 v19, s65, v5 ; 3E260A41
v_mac_f32_e32 v11, s66, v5 ; 3E160A42
v_mac_f32_e32 v12, s67, v5 ; 3E180A43
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mov_b32_e32 v3, s50 ; 7E060232
v_mul_f32_e32 v4, v17, v17 ; 10082311
v_mac_f32_e32 v4, v18, v18 ; 3E082512
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
v_mac_f32_e32 v3, s48, v9 ; 3E061230
v_mac_f32_e32 v0, s49, v10 ; 3E001431
exp 15, 32, 0, 0, 0, v13, v14, v3, v0 ; F800020F 00030E0D
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v4, v17 ; 10002304
v_mul_f32_e32 v3, v4, v18 ; 10062504
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v5, v4, v3 ; 100A0704
v_mul_f32_e32 v6, s11, v5 ; 100C0A0B
v_mul_f32_e32 v9, s12, v5 ; 10120A0C
v_mul_f32_e32 v5, s13, v5 ; 100A0A0D
v_mul_f32_e32 v10, v3, v0 ; 10140103
v_mac_f32_e32 v6, s14, v10 ; 3E0C140E
v_mac_f32_e32 v9, s9, v10 ; 3E121409
v_mac_f32_e32 v5, s10, v10 ; 3E0A140A
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mac_f32_e32 v6, s5, v10 ; 3E0C1405
v_mac_f32_e32 v9, s7, v10 ; 3E121407
v_mac_f32_e32 v5, s8, v10 ; 3E0A1408
v_mul_f32_e32 v10, v0, v4 ; 10140900
v_mac_f32_e32 v6, s3, v10 ; 3E0C1403
v_mac_f32_e32 v9, s4, v10 ; 3E121404
v_mac_f32_e32 v5, s18, v10 ; 3E0A1412
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mad_f32 v0, v0, v0, -v3 ; D2820000 840E0100
v_mac_f32_e32 v6, s15, v0 ; 3E0C000F
v_mac_f32_e32 v9, s16, v0 ; 3E120010
v_mac_f32_e32 v5, s17, v0 ; 3E0A0011
v_mul_f32_e32 v0, 0.5, v11 ; 100016F0
v_mul_f32_e32 v3, 0.5, v2 ; 100604F0
exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v1, 0.5, v19, v3 ; D2820001 040E26F0
v_mac_f32_e32 v3, s6, v0 ; 3E060006
exp 15, 35, 0, 0, 0, v1, v3, v12, v2 ; F800023F 020C0301
v_subrev_f32_e32 v0, s0, v15 ; 0A001E00
s_waitcnt expcnt(0) ; BF8C070F
v_subrev_f32_e32 v1, s1, v16 ; 0A022001
v_subrev_f32_e32 v3, s2, v7 ; 0A060E02
exp 15, 36, 0, 0, 0, v12, v0, v1, v3 ; F800024F 0301000C
exp 15, 37, 0, 0, 0, v15, v16, v7, v8 ; F800025F 0807100F
exp 15, 12, 0, 1, 0, v19, v11, v12, v2 ; F80008CF 020C0B13
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 20
Code Size: 788 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..20]
DCL CONST[23..24]
DCL CONST[26]
DCL TEMP[0..18], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000}
IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000}
IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000}
0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx
3: DP3 TEMP[1].x, IN[4].yzww, IN[4].yzww
4: RSQ TEMP[1].x, TEMP[1].xxxx
5: MUL TEMP[1].xyz, IN[4].yzww, TEMP[1].xxxx
6: MOV TEMP[2].xy, IN[0].xyyy
7: TEX TEMP[2].x, TEMP[2], SAMP[3], 2D
8: MOV TEMP[3].xyz, IMM[0].xxxx
9: FSLT TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
10: UIF TEMP[2].xxxx :0
11: MUL TEMP[2].xyz, CONST[20].xyzz, CONST[19].xyzz
12: MOV TEMP[4].xy, IN[0].xyyy
13: TEX TEMP[4].xyz, TEMP[4], SAMP[2], 2D
14: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[4].xyzz
15: ELSE :0
16: MOV TEMP[2].xy, IN[0].xyyy
17: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
18: MUL TEMP[3].xyz, CONST[19].xyzz, TEMP[2].xyzz
19: ENDIF
20: LRP TEMP[2].xyz, CONST[23].xxxx, TEMP[3].xyzz, CONST[16].xyzz
21: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww
22: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
23: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
24: MOV TEMP[5].xy, IN[0].xyyy
25: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D
26: ADD TEMP[6].x, IMM[0].xxxx, -CONST[26].xxxx
27: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx
28: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
29: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
30: MOV TEMP[7].xyz, IMM[0].yyyy
31: MOV TEMP[8].w, IMM[0].xxxx
32: MOV TEMP[8].xyz, TEMP[0].xyzx
33: DP4 TEMP[9].x, CONST[1], TEMP[8]
34: DP4 TEMP[10].x, CONST[2], TEMP[8]
35: MOV TEMP[9].y, TEMP[10].xxxx
36: DP4 TEMP[8].x, CONST[3], TEMP[8]
37: MOV TEMP[9].z, TEMP[8].xxxx
38: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz
39: MOV TEMP[9].xy, IN[3].xyyy
40: MOV TEMP[9].w, IN[3].wwww
41: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D
42: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx
43: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
44: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz
45: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz
46: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz
47: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz
48: MOV TEMP[11].xyz, TEMP[10].xyzx
49: FSLT TEMP[12].x, IMM[0].yyyy, CONST[10].wwww
50: UIF TEMP[12].xxxx :0
51: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz
52: RSQ TEMP[12].x, TEMP[12].xxxx
53: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx
54: MOV TEMP[13].xyz, -IN[5].xyzx
55: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz
56: RCP TEMP[15].x, TEMP[12].xxxx
57: RCP TEMP[15].y, TEMP[12].yyyy
58: RCP TEMP[15].z, TEMP[12].zzzz
59: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
60: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz
61: RCP TEMP[15].x, TEMP[12].xxxx
62: RCP TEMP[15].y, TEMP[12].yyyy
63: RCP TEMP[15].z, TEMP[12].zzzz
64: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
65: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[12].xyzz
66: UIF TEMP[15].xxxx :0
67: MOV TEMP[16].x, TEMP[14].xxxx
68: ELSE :0
69: MOV TEMP[16].x, TEMP[13].xxxx
70: ENDIF
71: UIF TEMP[15].yyyy :0
72: MOV TEMP[17].x, TEMP[14].yyyy
73: ELSE :0
74: MOV TEMP[17].x, TEMP[13].yyyy
75: ENDIF
76: UIF TEMP[15].zzzz :0
77: MOV TEMP[14].x, TEMP[14].zzzz
78: ELSE :0
79: MOV TEMP[14].x, TEMP[13].zzzz
80: ENDIF
81: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz
82: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww
83: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
84: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
85: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz
86: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz
87: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
88: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz
89: ENDIF
90: ADD TEMP[12].x, IMM[0].xxxx, -CONST[24].xxxx
91: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx
92: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy
93: MOV TEMP[11].xyz, TEMP[11].xyzz
94: MOV TEMP[11].w, TEMP[12].xxxx
95: TXL TEMP[11], TEMP[11], SAMP[0], CUBE
96: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy
97: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx
98: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz
99: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].zzzz
100: UIF TEMP[12].xxxx :0
101: MOV TEMP[12].xyz, TEMP[10].xyzx
102: FSLT TEMP[13].x, IMM[0].yyyy, CONST[14].wwww
103: UIF TEMP[13].xxxx :0
104: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz
105: RSQ TEMP[13].x, TEMP[13].xxxx
106: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx
107: MOV TEMP[13].xyz, -IN[5].xyzx
108: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz
109: RCP TEMP[15].x, TEMP[10].xxxx
110: RCP TEMP[15].y, TEMP[10].yyyy
111: RCP TEMP[15].z, TEMP[10].zzzz
112: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
113: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz
114: RCP TEMP[15].x, TEMP[10].xxxx
115: RCP TEMP[15].y, TEMP[10].yyyy
116: RCP TEMP[15].z, TEMP[10].zzzz
117: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
118: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[10].xyzz
119: UIF TEMP[15].xxxx :0
120: MOV TEMP[16].x, TEMP[14].xxxx
121: ELSE :0
122: MOV TEMP[16].x, TEMP[13].xxxx
123: ENDIF
124: UIF TEMP[15].yyyy :0
125: MOV TEMP[17].x, TEMP[14].yyyy
126: ELSE :0
127: MOV TEMP[17].x, TEMP[13].yyyy
128: ENDIF
129: UIF TEMP[15].zzzz :0
130: MOV TEMP[14].x, TEMP[14].zzzz
131: ELSE :0
132: MOV TEMP[14].x, TEMP[13].zzzz
133: ENDIF
134: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz
135: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww
136: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
137: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
138: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz
139: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz
140: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
141: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz
142: ENDIF
143: ADD TEMP[10].x, IMM[0].xxxx, -CONST[24].xxxx
144: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx
145: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy
146: MOV TEMP[12].xyz, TEMP[12].xyzz
147: MOV TEMP[12].w, TEMP[10].xxxx
148: TXL TEMP[10], TEMP[12], SAMP[1], CUBE
149: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy
150: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx
151: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz
152: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz
153: ELSE :0
154: MOV TEMP[7].xyz, TEMP[11].xyzx
155: ENDIF
156: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
157: MOV TEMP[1].xyz, -TEMP[1].xyzx
158: ADD TEMP[5].x, IMM[0].xxxx, -CONST[24].xxxx
159: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz
160: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz
161: RSQ TEMP[11].x, TEMP[11].xxxx
162: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx
163: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
164: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
165: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz
166: MAX TEMP[11].x, IMM[0].yyyy, TEMP[11].xxxx
167: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx
168: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww
169: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx
170: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].xxxx, IMM[2].yyyy
171: LG2 TEMP[13].x, TEMP[13].xxxx
172: RCP TEMP[13].x, TEMP[13].xxxx
173: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx
174: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx
175: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx
176: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
177: MUL TEMP[16].x, IMM[0].zzzz, TEMP[11].xxxx
178: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx
179: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[0].wwww
180: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx
181: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[1].xxxx
182: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
183: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx
184: MOV_SAT TEMP[4].x, TEMP[4].xxxx
185: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx
186: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx
187: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx
188: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx
189: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[2].xyzz
190: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx
191: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[12].xxxx
192: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].zzzz
193: RCP TEMP[1].x, TEMP[1].xxxx
194: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz
195: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx
196: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx
197: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx
198: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy
199: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx
200: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx
201: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
202: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
203: MAX TEMP[1].x, IMM[0].yyyy, TEMP[1].xxxx
204: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz
205: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[2].xyzz
206: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx
207: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx
208: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx
209: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx
210: MAD TEMP[2].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[2].xyzz
211: ADD TEMP[10].x, TEMP[5].xxxx, IMM[2].wwww
212: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
213: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx
214: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx
215: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
216: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx
217: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww
218: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx
219: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx
220: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx
221: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
222: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx
223: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
224: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
225: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
226: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz
227: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xyzz, TEMP[3].xyzz
228: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
229: MOV TEMP[0].xyz, TEMP[0].xyzx
230: MAD TEMP[1].x, IN[4].xxxx, CONST[5].zzzz, CONST[5].wwww
231: MOV_SAT TEMP[1].x, TEMP[1].xxxx
232: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
233: MOV TEMP[0].xyz, TEMP[0].xyzx
234: MOV TEMP[0].w, IMM[0].xxxx
235: MOV OUT[0], TEMP[0]
236: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0
%87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)*
%91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0
%92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)*
%94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0
%95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%96 = load <8 x i32>, <8 x i32> addrspace(2)* %95, align 32, !tbaa !0
%97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%98 = load <4 x i32>, <4 x i32> addrspace(2)* %97, align 16, !tbaa !0
%99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)*
%101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0
%102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)*
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)*
%107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0
%108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)*
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%112 = bitcast <8 x i32> addrspace(2)* %111 to <32 x i8> addrspace(2)*
%113 = load <32 x i8>, <32 x i8> addrspace(2)* %112, align 32, !tbaa !0
%114 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%115 = bitcast <4 x i32> addrspace(2)* %114 to <16 x i8> addrspace(2)*
%116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !tbaa !0
%117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%132 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%133 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%134 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%135 = fmul float %119, %119
%136 = fmul float %120, %120
%137 = fadd float %136, %135
%138 = fmul float %121, %121
%139 = fadd float %137, %138
%140 = call float @llvm.AMDGPU.rsq.clamped.f32(float %139)
%141 = fmul float %119, %140
%142 = fmul float %120, %140
%143 = fmul float %121, %140
%144 = fmul float %129, %129
%145 = fmul float %130, %130
%146 = fadd float %145, %144
%147 = fmul float %131, %131
%148 = fadd float %146, %147
%149 = call float @llvm.AMDGPU.rsq.clamped.f32(float %148)
%150 = fmul float %129, %149
%151 = fmul float %130, %149
%152 = fmul float %131, %149
%153 = bitcast float %117 to i32
%154 = bitcast float %118 to i32
%155 = insertelement <2 x i32> undef, i32 %153, i32 0
%156 = insertelement <2 x i32> %155, i32 %154, i32 1
%157 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %156, <32 x i8> %101, <16 x i8> %104, i32 2)
%158 = extractelement <4 x float> %157, i32 0
%159 = fcmp ogt float %158, 0.000000e+00
br i1 %159, label %IF, label %ELSE
IF: ; preds = %main_body
%160 = call float @llvm.SI.load.const(<16 x i8> %23, i32 328)
%161 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324)
%162 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%163 = fmul float %162, %79
%164 = fmul float %161, %80
%165 = fmul float %160, %81
%166 = bitcast float %117 to i32
%167 = bitcast float %118 to i32
%168 = insertelement <2 x i32> undef, i32 %166, i32 0
%169 = insertelement <2 x i32> %168, i32 %167, i32 1
%170 = bitcast <8 x i32> %96 to <32 x i8>
%171 = bitcast <4 x i32> %98 to <16 x i8>
%172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2)
%173 = extractelement <4 x float> %172, i32 0
%174 = extractelement <4 x float> %172, i32 1
%175 = extractelement <4 x float> %172, i32 2
%176 = fmul float %163, %173
%177 = fmul float %164, %174
%178 = fmul float %165, %175
br label %ENDIF
ELSE: ; preds = %main_body
%179 = bitcast float %117 to i32
%180 = bitcast float %118 to i32
%181 = insertelement <2 x i32> undef, i32 %179, i32 0
%182 = insertelement <2 x i32> %181, i32 %180, i32 1
%183 = bitcast <8 x i32> %96 to <32 x i8>
%184 = bitcast <4 x i32> %98 to <16 x i8>
%185 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %182, <32 x i8> %183, <16 x i8> %184, i32 2)
%186 = extractelement <4 x float> %185, i32 0
%187 = extractelement <4 x float> %185, i32 1
%188 = extractelement <4 x float> %185, i32 2
%189 = fmul float %79, %186
%190 = fmul float %80, %187
%191 = fmul float %81, %188
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp14.0 = phi float [ %178, %IF ], [ %191, %ELSE ]
%temp13.0 = phi float [ %177, %IF ], [ %190, %ELSE ]
%temp12.0 = phi float [ %176, %IF ], [ %189, %ELSE ]
%192 = call float @llvm.AMDGPU.lrp(float %82, float %temp12.0, float %69)
%193 = call float @llvm.AMDGPU.lrp(float %82, float %temp13.0, float %70)
%194 = call float @llvm.AMDGPU.lrp(float %82, float %temp14.0, float %71)
%195 = fmul float %82, %72
%196 = fsub float %72, %195
%197 = fmul float %temp12.0, %196
%198 = fmul float %temp13.0, %196
%199 = fmul float %temp14.0, %196
%200 = bitcast float %117 to i32
%201 = bitcast float %118 to i32
%202 = insertelement <2 x i32> undef, i32 %200, i32 0
%203 = insertelement <2 x i32> %202, i32 %201, i32 1
%204 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %203, <32 x i8> %107, <16 x i8> %110, i32 2)
%205 = extractelement <4 x float> %204, i32 1
%206 = fsub float 1.000000e+00, %84
%207 = fmul float %205, %84
%208 = fadd float %207, %206
%209 = fmul float %141, %24
%210 = fmul float %142, %25
%211 = fadd float %210, %209
%212 = fmul float %143, %26
%213 = fadd float %211, %212
%214 = call float @llvm.maxnum.f32(float %213, float 0.000000e+00)
%215 = fmul float %27, %141
%216 = fmul float %28, %142
%217 = fadd float %215, %216
%218 = fmul float %29, %143
%219 = fadd float %217, %218
%220 = fadd float %219, %30
%221 = fmul float %31, %141
%222 = fmul float %32, %142
%223 = fadd float %221, %222
%224 = fmul float %33, %143
%225 = fadd float %223, %224
%226 = fadd float %225, %34
%227 = fmul float %35, %141
%228 = fmul float %36, %142
%229 = fadd float %227, %228
%230 = fmul float %37, %143
%231 = fadd float %229, %230
%232 = fadd float %231, %38
%233 = fadd float %122, %220
%234 = fadd float %123, %226
%235 = fadd float %124, %232
%236 = fdiv float %125, %127
%237 = fdiv float %126, %127
%238 = bitcast float %236 to i32
%239 = bitcast float %237 to i32
%240 = insertelement <2 x i32> undef, i32 %238, i32 0
%241 = insertelement <2 x i32> %240, i32 %239, i32 1
%242 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %241, <32 x i8> %113, <16 x i8> %116, i32 2)
%243 = extractelement <4 x float> %242, i32 0
%244 = fmul float %73, %243
%245 = fmul float %74, %243
%246 = fmul float %75, %243
%247 = fmul float %233, %208
%248 = fmul float %234, %208
%249 = fmul float %235, %208
%250 = fmul float %141, %150
%251 = fmul float %142, %151
%252 = fadd float %251, %250
%253 = fmul float %143, %152
%254 = fadd float %252, %253
%255 = fmul float %254, %141
%256 = fmul float %254, %142
%257 = fmul float %254, %143
%258 = fmul float %255, 2.000000e+00
%259 = fmul float %256, 2.000000e+00
%260 = fmul float %257, 2.000000e+00
%261 = fsub float %150, %258
%262 = fsub float %151, %259
%263 = fsub float %152, %260
%264 = fcmp ogt float %54, 0.000000e+00
br i1 %264, label %IF77, label %ENDIF76
IF77: ; preds = %ENDIF
%265 = fmul float %261, %261
%266 = fmul float %262, %262
%267 = fadd float %266, %265
%268 = fmul float %263, %263
%269 = fadd float %267, %268
%270 = call float @llvm.AMDGPU.rsq.clamped.f32(float %269)
%271 = fmul float %261, %270
%272 = fmul float %262, %270
%273 = fmul float %263, %270
%274 = fsub float %44, %132
%275 = fsub float %45, %133
%276 = fsub float %46, %134
%277 = fdiv float 1.000000e+00, %271
%278 = fdiv float 1.000000e+00, %272
%279 = fdiv float 1.000000e+00, %273
%280 = fmul float %274, %277
%281 = fmul float %275, %278
%282 = fmul float %276, %279
%283 = fsub float %47, %132
%284 = fsub float %48, %133
%285 = fsub float %49, %134
%286 = fdiv float 1.000000e+00, %271
%287 = fdiv float 1.000000e+00, %272
%288 = fdiv float 1.000000e+00, %273
%289 = fmul float %283, %286
%290 = fmul float %284, %287
%291 = fmul float %285, %288
%292 = fcmp ogt float %271, 0.000000e+00
%293 = fcmp ogt float %272, 0.000000e+00
%294 = fcmp ogt float %273, 0.000000e+00
%. = select i1 %292, float %280, float %289
%temp68.0 = select i1 %293, float %281, float %290
%.103 = select i1 %294, float %282, float %291
%295 = fadd float %44, %47
%296 = fadd float %45, %48
%297 = fadd float %46, %49
%298 = fmul float %295, 5.000000e-01
%299 = fmul float %296, 5.000000e-01
%300 = fmul float %297, 5.000000e-01
%301 = call float @llvm.minnum.f32(float %., float %temp68.0)
%302 = call float @llvm.minnum.f32(float %301, float %.103)
%303 = fsub float %298, %51
%304 = fsub float %299, %52
%305 = fsub float %300, %53
%306 = fadd float %303, %132
%307 = fadd float %304, %133
%308 = fadd float %305, %134
%309 = fmul float %271, %302
%310 = fadd float %309, %306
%311 = fmul float %272, %302
%312 = fadd float %311, %307
%313 = fmul float %273, %302
%314 = fadd float %313, %308
%315 = fsub float %310, %298
%316 = fsub float %312, %299
%317 = fsub float %314, %300
br label %ENDIF76
ENDIF76: ; preds = %ENDIF, %IF77
%temp44.0 = phi float [ %315, %IF77 ], [ %261, %ENDIF ]
%temp45.0 = phi float [ %316, %IF77 ], [ %262, %ENDIF ]
%temp46.0 = phi float [ %317, %IF77 ], [ %263, %ENDIF ]
%318 = fsub float 1.000000e+00, %83
%319 = call float @llvm.pow.f32(float %318, float 7.500000e-01)
%320 = fmul float %319, 7.000000e+00
%321 = insertelement <4 x float> undef, float %temp44.0, i32 0
%322 = insertelement <4 x float> %321, float %temp45.0, i32 1
%323 = insertelement <4 x float> %322, float %temp46.0, i32 2
%324 = insertelement <4 x float> %323, float %320, i32 3
%325 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %324)
%326 = extractelement <4 x float> %325, i32 0
%327 = extractelement <4 x float> %325, i32 1
%328 = extractelement <4 x float> %325, i32 2
%329 = extractelement <4 x float> %325, i32 3
%330 = call float @llvm.fabs.f32(float %328)
%331 = fdiv float 1.000000e+00, %330
%332 = fmul float %326, %331
%333 = fadd float %332, 1.500000e+00
%334 = fmul float %327, %331
%335 = fadd float %334, 1.500000e+00
%336 = bitcast float %335 to i32
%337 = bitcast float %333 to i32
%338 = bitcast float %329 to i32
%339 = bitcast float %320 to i32
%340 = insertelement <4 x i32> undef, i32 %336, i32 0
%341 = insertelement <4 x i32> %340, i32 %337, i32 1
%342 = insertelement <4 x i32> %341, i32 %338, i32 2
%343 = insertelement <4 x i32> %342, i32 %339, i32 3
%344 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %343, <32 x i8> %86, <16 x i8> %88, i32 4)
%345 = extractelement <4 x float> %344, i32 0
%346 = extractelement <4 x float> %344, i32 1
%347 = extractelement <4 x float> %344, i32 2
%348 = extractelement <4 x float> %344, i32 3
%349 = call float @llvm.pow.f32(float %348, float %56)
%350 = fmul float %55, %349
%351 = fmul float %350, %345
%352 = fmul float %350, %346
%353 = fmul float %350, %347
%354 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %354, label %IF89, label %ENDIF88
IF89: ; preds = %ENDIF76
%355 = fcmp ogt float %66, 0.000000e+00
br i1 %355, label %IF92, label %ENDIF91
ENDIF88: ; preds = %ENDIF76, %ENDIF91
%temp28.0 = phi float [ %583, %ENDIF91 ], [ %351, %ENDIF76 ]
%temp29.0 = phi float [ %584, %ENDIF91 ], [ %352, %ENDIF76 ]
%temp30.0 = phi float [ %585, %ENDIF91 ], [ %353, %ENDIF76 ]
%356 = fmul float %temp28.0, %208
%357 = fmul float %temp29.0, %208
%358 = fmul float %temp30.0, %208
%359 = fsub float 1.000000e+00, %83
%360 = fsub float %24, %150
%361 = fsub float %25, %151
%362 = fsub float %26, %152
%363 = fmul float %360, %360
%364 = fmul float %361, %361
%365 = fadd float %364, %363
%366 = fmul float %362, %362
%367 = fadd float %365, %366
%368 = call float @llvm.AMDGPU.rsq.clamped.f32(float %367)
%369 = fmul float %360, %368
%370 = fmul float %361, %368
%371 = fmul float %362, %368
%372 = fmul float %150, %141
%373 = fsub float -0.000000e+00, %372
%374 = fmul float %151, %142
%375 = fsub float %373, %374
%376 = fmul float %152, %143
%377 = fsub float %375, %376
%378 = call float @llvm.maxnum.f32(float %377, float 0.000000e+00)
%379 = fmul float %24, %369
%380 = fmul float %25, %370
%381 = fadd float %380, %379
%382 = fmul float %26, %371
%383 = fadd float %381, %382
%384 = call float @llvm.maxnum.f32(float %383, float 0.000000e+00)
%385 = fmul float %359, %359
%386 = fmul float %385, %78
%387 = fsub float 1.000000e+00, %359
%388 = fmul float %387, 0x3FEEF9DB20000000
%389 = fadd float %388, 0x3F9EB851E0000000
%390 = call float @llvm.log2.f32(float %389)
%391 = fdiv float 1.000000e+00, %390
%392 = fmul float %391, 1.000000e+01
%393 = fmul float %392, %392
%394 = fsub float 1.000000e+00, %214
%395 = fsub float 1.000000e+00, %378
%396 = fmul float %384, 2.000000e+00
%397 = fmul float %384, %359
%398 = fmul float %396, %397
%399 = fadd float %398, 5.000000e-01
%400 = fsub float 1.000000e+00, %384
%401 = fsub float 1.000000e+00, %378
%402 = fsub float 1.000000e+00, %196
%403 = fadd float %83, %402
%404 = call float @llvm.AMDIL.clamp.(float %403, float 0.000000e+00, float 1.000000e+00)
%405 = fmul float %401, %401
%406 = fmul float %401, %401
%407 = fmul float %406, %401
%408 = fmul float %405, %407
%409 = call float @llvm.AMDGPU.lrp(float %408, float %404, float %192)
%410 = call float @llvm.AMDGPU.lrp(float %408, float %404, float %193)
%411 = call float @llvm.AMDGPU.lrp(float %408, float %404, float %194)
%412 = call float @llvm.AMDGPU.lrp(float %214, float 1.000000e+00, float %386)
%413 = call float @llvm.AMDGPU.lrp(float %378, float 1.000000e+00, float %386)
%414 = fmul float %412, %413
%415 = fadd float %414, 0x3F1A36E2E0000000
%416 = fdiv float 1.000000e+00, %415
%417 = fmul float %141, %369
%418 = fmul float %142, %370
%419 = fadd float %418, %417
%420 = fmul float %143, %371
%421 = fadd float %419, %420
%422 = call float @llvm.maxnum.f32(float %421, float 0.000000e+00)
%423 = call float @llvm.pow.f32(float %422, float %393)
%424 = fadd float %393, 1.000000e+00
%425 = fmul float %424, %77
%426 = fmul float %423, %425
%427 = fmul float %416, %426
%428 = fmul float %427, %214
%429 = fmul float %428, %76
%430 = call float @llvm.maxnum.f32(float %429, float 0.000000e+00)
%431 = fmul float %430, %244
%432 = fmul float %430, %245
%433 = fmul float %430, %246
%434 = fsub float 1.000000e+00, %192
%435 = fsub float 1.000000e+00, %193
%436 = fsub float 1.000000e+00, %194
%437 = fmul float %400, %400
%438 = fmul float %400, %400
%439 = fmul float %438, %400
%440 = fmul float %437, %439
%441 = fmul float %434, %440
%442 = fadd float %441, %192
%443 = fmul float %435, %440
%444 = fadd float %443, %193
%445 = fmul float %436, %440
%446 = fadd float %445, %194
%447 = fadd float %399, -1.000000e+00
%448 = fmul float %394, %394
%449 = fmul float %394, %394
%450 = fmul float %449, %394
%451 = fmul float %448, %450
%452 = fmul float %447, %451
%453 = fadd float %452, 1.000000e+00
%454 = fadd float %399, -1.000000e+00
%455 = fmul float %395, %395
%456 = fmul float %395, %395
%457 = fmul float %456, %395
%458 = fmul float %455, %457
%459 = fmul float %454, %458
%460 = fadd float %459, 1.000000e+00
%461 = fmul float %453, %460
%462 = fmul float %461, %214
%463 = fmul float %244, %462
%464 = fadd float %463, %247
%465 = fmul float %245, %462
%466 = fadd float %465, %248
%467 = fmul float %246, %462
%468 = fadd float %467, %249
%469 = fmul float %197, %464
%470 = fmul float %198, %466
%471 = fmul float %199, %468
%472 = fmul float %431, %442
%473 = fadd float %472, %469
%474 = fmul float %432, %444
%475 = fadd float %474, %470
%476 = fmul float %433, %446
%477 = fadd float %476, %471
%478 = fmul float %356, %409
%479 = fadd float %478, %473
%480 = fmul float %357, %410
%481 = fadd float %480, %475
%482 = fmul float %358, %411
%483 = fadd float %482, %477
%484 = fmul float %128, %42
%485 = fadd float %484, %43
%486 = call float @llvm.AMDIL.clamp.(float %485, float 0.000000e+00, float 1.000000e+00)
%487 = call float @llvm.AMDGPU.lrp(float %486, float %479, float %39)
%488 = call float @llvm.AMDGPU.lrp(float %486, float %481, float %40)
%489 = call float @llvm.AMDGPU.lrp(float %486, float %483, float %41)
%490 = call i32 @llvm.SI.packf16(float %487, float %488)
%491 = bitcast i32 %490 to float
%492 = call i32 @llvm.SI.packf16(float %489, float 1.000000e+00)
%493 = bitcast i32 %492 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %491, float %493, float %491, float %493)
ret void
IF92: ; preds = %IF89
%494 = fmul float %261, %261
%495 = fmul float %262, %262
%496 = fadd float %495, %494
%497 = fmul float %263, %263
%498 = fadd float %496, %497
%499 = call float @llvm.AMDGPU.rsq.clamped.f32(float %498)
%500 = fmul float %261, %499
%501 = fmul float %262, %499
%502 = fmul float %263, %499
%503 = fsub float %57, %132
%504 = fsub float %58, %133
%505 = fsub float %59, %134
%506 = fdiv float 1.000000e+00, %500
%507 = fdiv float 1.000000e+00, %501
%508 = fdiv float 1.000000e+00, %502
%509 = fmul float %503, %506
%510 = fmul float %504, %507
%511 = fmul float %505, %508
%512 = fsub float %60, %132
%513 = fsub float %61, %133
%514 = fsub float %62, %134
%515 = fdiv float 1.000000e+00, %500
%516 = fdiv float 1.000000e+00, %501
%517 = fdiv float 1.000000e+00, %502
%518 = fmul float %512, %515
%519 = fmul float %513, %516
%520 = fmul float %514, %517
%521 = fcmp ogt float %500, 0.000000e+00
%522 = fcmp ogt float %501, 0.000000e+00
%523 = fcmp ogt float %502, 0.000000e+00
%.104 = select i1 %521, float %509, float %518
%temp68.1 = select i1 %522, float %510, float %519
%.105 = select i1 %523, float %511, float %520
%524 = fadd float %57, %60
%525 = fadd float %58, %61
%526 = fadd float %59, %62
%527 = fmul float %524, 5.000000e-01
%528 = fmul float %525, 5.000000e-01
%529 = fmul float %526, 5.000000e-01
%530 = call float @llvm.minnum.f32(float %.104, float %temp68.1)
%531 = call float @llvm.minnum.f32(float %530, float %.105)
%532 = fsub float %527, %63
%533 = fsub float %528, %64
%534 = fsub float %529, %65
%535 = fadd float %532, %132
%536 = fadd float %533, %133
%537 = fadd float %534, %134
%538 = fmul float %500, %531
%539 = fadd float %538, %535
%540 = fmul float %501, %531
%541 = fadd float %540, %536
%542 = fmul float %502, %531
%543 = fadd float %542, %537
%544 = fsub float %539, %527
%545 = fsub float %541, %528
%546 = fsub float %543, %529
br label %ENDIF91
ENDIF91: ; preds = %IF89, %IF92
%temp48.0 = phi float [ %544, %IF92 ], [ %261, %IF89 ]
%temp49.0 = phi float [ %545, %IF92 ], [ %262, %IF89 ]
%temp50.0 = phi float [ %546, %IF92 ], [ %263, %IF89 ]
%547 = fsub float 1.000000e+00, %83
%548 = call float @llvm.pow.f32(float %547, float 7.500000e-01)
%549 = fmul float %548, 7.000000e+00
%550 = insertelement <4 x float> undef, float %temp48.0, i32 0
%551 = insertelement <4 x float> %550, float %temp49.0, i32 1
%552 = insertelement <4 x float> %551, float %temp50.0, i32 2
%553 = insertelement <4 x float> %552, float %549, i32 3
%554 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %553)
%555 = extractelement <4 x float> %554, i32 0
%556 = extractelement <4 x float> %554, i32 1
%557 = extractelement <4 x float> %554, i32 2
%558 = extractelement <4 x float> %554, i32 3
%559 = call float @llvm.fabs.f32(float %557)
%560 = fdiv float 1.000000e+00, %559
%561 = fmul float %555, %560
%562 = fadd float %561, 1.500000e+00
%563 = fmul float %556, %560
%564 = fadd float %563, 1.500000e+00
%565 = bitcast float %564 to i32
%566 = bitcast float %562 to i32
%567 = bitcast float %558 to i32
%568 = bitcast float %549 to i32
%569 = insertelement <4 x i32> undef, i32 %565, i32 0
%570 = insertelement <4 x i32> %569, i32 %566, i32 1
%571 = insertelement <4 x i32> %570, i32 %567, i32 2
%572 = insertelement <4 x i32> %571, i32 %568, i32 3
%573 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %572, <32 x i8> %91, <16 x i8> %94, i32 4)
%574 = extractelement <4 x float> %573, i32 0
%575 = extractelement <4 x float> %573, i32 1
%576 = extractelement <4 x float> %573, i32 2
%577 = extractelement <4 x float> %573, i32 3
%578 = call float @llvm.pow.f32(float %577, float %68)
%579 = fmul float %67, %578
%580 = fmul float %579, %574
%581 = fmul float %579, %575
%582 = fmul float %579, %576
%583 = call float @llvm.AMDGPU.lrp(float %50, float %351, float %580)
%584 = call float @llvm.AMDGPU.lrp(float %50, float %352, float %581)
%585 = call float @llvm.AMDGPU.lrp(float %50, float %353, float %582)
br label %ENDIF88
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000
v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001
v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100
v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101
v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400
v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401
v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500
v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501
v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600
v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601
v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800
v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801
v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900
v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901
v_interp_p1_f32 v4, v0, 2, 2, [m0] ; C8100A00
v_interp_p2_f32 v4, [v4], v1, 2, 2, [m0] ; C8110A01
v_interp_p1_f32 v6, v0, 0, 3, [m0] ; C8180C00
v_interp_p2_f32 v6, [v6], v1, 0, 3, [m0] ; C8190C01
v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00
v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01
v_interp_p1_f32 v19, v0, 3, 3, [m0] ; C84C0F00
v_interp_p2_f32 v19, [v19], v1, 3, 3, [m0] ; C84D0F01
v_interp_p1_f32 v5, v0, 0, 4, [m0] ; C8141000
v_interp_p2_f32 v5, [v5], v1, 0, 4, [m0] ; C8151001
v_interp_p1_f32 v20, v0, 1, 4, [m0] ; C8501100
v_interp_p2_f32 v20, [v20], v1, 1, 4, [m0] ; C8511101
v_interp_p1_f32 v23, v0, 2, 4, [m0] ; C85C1200
v_interp_p2_f32 v23, [v23], v1, 2, 4, [m0] ; C85D1201
v_interp_p1_f32 v24, v0, 3, 4, [m0] ; C8601300
v_interp_p2_f32 v24, [v24], v1, 3, 4, [m0] ; C8611301
v_interp_p1_f32 v21, v0, 0, 5, [m0] ; C8541400
v_interp_p2_f32 v21, [v21], v1, 0, 5, [m0] ; C8551401
v_interp_p1_f32 v16, v0, 1, 5, [m0] ; C8401500
v_interp_p2_f32 v16, [v16], v1, 1, 5, [m0] ; C8411501
v_interp_p1_f32 v22, v0, 2, 5, [m0] ; C8581600
v_mul_f32_e32 v0, v7, v7 ; 10000F07
v_mac_f32_e32 v0, v10, v10 ; 3E00150A
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x4c ; C204014C
s_buffer_load_dword s9, s[0:3], 0x4d ; C204814D
s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C
s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718
s_buffer_load_dword s18, s[0:3], 0x4e ; C209014E
s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710
v_mac_f32_e32 v0, v11, v11 ; 3E00170B
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v8, v20, v20 ; 10102914
v_mac_f32_e32 v8, v23, v23 ; 3E102F17
v_mac_f32_e32 v8, v24, v24 ; 3E103118
v_rsq_clamp_f32_e32 v25, v8 ; 7E325908
v_interp_p2_f32 v22, [v22], v1, 2, 5, [m0] ; C8591601
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[36:43], s[20:23] ; F0800100 00A90111
s_waitcnt vmcnt(0) ; BF8C0770
v_cmp_nlt_f32_e32 vcc, 0, v1 ; 7C1C0280
s_and_saveexec_b64 s[20:21], vcc ; BE94246A
s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E
image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[28:35], s[12:15] ; F0800700 00671A11
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v13, s8, v26 ; 101A3408
v_mul_f32_e32 v14, s9, v27 ; 101C3609
v_mul_f32_e32 v15, s18, v28 ; 101E3812
s_or_saveexec_b64 s[20:21], s[20:21] ; BE942514
s_buffer_load_dword s10, s[0:3], 0x2b ; C205012B
s_buffer_load_dword s11, s[0:3], 0x40 ; C2058140
s_buffer_load_dword s16, s[0:3], 0x41 ; C2080141
s_buffer_load_dword s17, s[0:3], 0x42 ; C2088142
s_buffer_load_dword s26, s[0:3], 0x5c ; C20D015C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_xor_b64 exec, exec, s[20:21] ; 89FE147E
s_cbranch_execz BB0_4 ; BF880000
v_mov_b32_e32 v1, s8 ; 7E020208
v_mov_b32_e32 v8, s9 ; 7E100209
v_mov_b32_e32 v12, s18 ; 7E180212
s_buffer_load_dword s19, s[0:3], 0x50 ; C2098150
s_buffer_load_dword s22, s[0:3], 0x51 ; C20B0151
s_buffer_load_dword s23, s[0:3], 0x52 ; C20B8152
image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[28:35], s[12:15] ; F0800700 00671A11
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v1, s19, v1 ; 10020213
v_mul_f32_e32 v8, s22, v8 ; 10101016
v_mul_f32_e32 v12, s23, v12 ; 10181817
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v13, v26, v1 ; 101A031A
v_mul_f32_e32 v14, v27, v8 ; 101C111B
v_mul_f32_e32 v15, v28, v12 ; 101E191C
s_or_b64 exec, exec, s[20:21] ; 88FE147E
s_buffer_load_dword s9, s[0:3], 0x27 ; C2048127
v_mul_f32_e32 v8, v0, v7 ; 10100F00
v_mul_f32_e32 v7, v0, v10 ; 100E1500
v_mul_f32_e32 v0, v0, v11 ; 10001700
s_buffer_load_dword s31, s[0:3], 0x2c ; C20F812C
s_buffer_load_dword s32, s[0:3], 0x2d ; C210012D
s_buffer_load_dword s8, s[0:3], 0x60 ; C2040160
v_mul_f32_e32 v12, v25, v20 ; 10182919
v_mul_f32_e32 v11, v25, v23 ; 10162F19
v_mul_f32_e32 v10, v25, v24 ; 10143119
v_sub_f32_e64 v1, 1.0, s26 ; D2080001 000034F2
v_mov_b32_e32 v26, 0x6f800000 ; 7E3402FF 6F800000
v_cmp_gt_f32_e64 vcc, |v19|, v26 ; D008016A 00023513
v_mov_b32_e32 v26, 0x2f800000 ; 7E3402FF 2F800000
v_cndmask_b32_e32 v26, 1.0, v26 ; 003434F2
v_mul_f32_e32 v29, v12, v8 ; 103A110C
v_mac_f32_e32 v29, v11, v7 ; 3E3A0F0B
v_mac_f32_e32 v29, v10, v0 ; 3E3A010A
v_mul_f32_e32 v27, v8, v29 ; 10363B08
v_mac_f32_e32 v27, v8, v29 ; 3E363B08
v_mul_f32_e32 v28, v7, v29 ; 10383B07
v_mac_f32_e32 v28, v7, v29 ; 3E383B07
v_mad_f32 v27, v20, v25, -v27 ; D282001B 846E3314
v_mad_f32 v28, v23, v25, -v28 ; D282001C 84723317
v_mul_f32_e32 v19, v26, v19 ; 1026271A
v_rcp_f32_e32 v19, v19 ; 7E265513
v_mul_f32_e32 v20, v0, v29 ; 10283B00
v_mac_f32_e32 v20, v0, v29 ; 3E283B00
v_mad_f32 v29, v24, v25, -v20 ; D282001D 84523318
v_mul_f32_e32 v6, v19, v6 ; 100C0D13
v_mul_f32_e32 v9, v19, v9 ; 10121313
v_mul_f32_e32 v19, v6, v26 ; 10263506
s_load_dwordx4 s[12:15], s[4:5], 0x10 ; C0860510
s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514
s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720
s_load_dwordx8 s[44:51], s[6:7], 0x28 ; C0D60728
v_mul_f32_e32 v20, v9, v26 ; 10283509
v_mul_f32_e32 v9, s11, v1 ; 1012020B
v_mul_f32_e32 v6, s16, v1 ; 100C0210
v_mul_f32_e32 v1, s17, v1 ; 10020211
v_mac_f32_e32 v9, s26, v13 ; 3E121A1A
v_mov_b32_e32 v30, v27 ; 7E3C031B
v_mac_f32_e32 v6, s26, v14 ; 3E0C1C1A
v_mov_b32_e32 v31, v28 ; 7E3E031C
v_mac_f32_e32 v1, s26, v15 ; 3E021E1A
v_mov_b32_e32 v32, v29 ; 7E40031D
v_cmp_lt_f32_e64 s[10:11], 0, s10 ; D002000A 00001480
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[36:43], s[12:15] ; F0800F00 00691711
image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[44:51], s[20:23] ; F0800F00 00AB1113
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[14:15], s[10:11] ; BE8E240A
s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s10, s[0:3], 0x26 ; C2050126
s_buffer_load_dword s11, s[0:3], 0x28 ; C2058128
s_buffer_load_dword s12, s[0:3], 0x29 ; C2060129
s_buffer_load_dword s13, s[0:3], 0x2a ; C206812A
s_buffer_load_dword s16, s[0:3], 0x20 ; C2080120
s_buffer_load_dword s17, s[0:3], 0x21 ; C2088121
s_buffer_load_dword s18, s[0:3], 0x22 ; C2090122
s_buffer_load_dword s19, s[0:3], 0x24 ; C2098124
s_buffer_load_dword s20, s[0:3], 0x25 ; C20A0125
v_mul_f32_e32 v18, v27, v27 ; 1024371B
v_mac_f32_e32 v18, v28, v28 ; 3E24391C
v_mac_f32_e32 v18, v29, v29 ; 3E243B1D
v_rsq_clamp_f32_e32 v18, v18 ; 7E245912
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v19, s10, v22 ; 08262C0A
v_mov_b32_e32 v20, s10 ; 7E28020A
v_sub_f32_e32 v23, s16, v21 ; 082E2A10
v_sub_f32_e32 v25, s17, v16 ; 08322011
v_add_f32_e32 v20, s18, v20 ; 06282812
v_sub_f32_e32 v26, s18, v22 ; 08342C12
v_mad_f32 v30, 0.5, v20, -s13 ; D282001E 803628F0
v_add_f32_e32 v32, v22, v30 ; 06403D16
v_mul_f32_e32 v30, v18, v27 ; 103C3712
v_mul_f32_e32 v31, v18, v28 ; 103E3912
v_mul_f32_e32 v18, v18, v29 ; 10243B12
v_rcp_f32_e32 v33, v30 ; 7E42551E
v_rcp_f32_e32 v34, v31 ; 7E44551F
v_rcp_f32_e32 v35, v18 ; 7E465512
v_sub_f32_e32 v36, s19, v21 ; 08482A13
v_mov_b32_e32 v37, s19 ; 7E4A0213
v_add_f32_e32 v37, s16, v37 ; 064A4A10
v_mul_f32_e32 v23, v33, v23 ; 102E2F21
v_mul_f32_e32 v25, v34, v25 ; 10323322
v_mul_f32_e32 v26, v35, v26 ; 10343523
v_mul_f32_e32 v33, v33, v36 ; 10424921
v_sub_f32_e32 v36, s20, v16 ; 08482014
v_mov_b32_e32 v38, s20 ; 7E4C0214
v_mul_f32_e32 v34, v34, v36 ; 10444922
v_mul_f32_e32 v19, v35, v19 ; 10262723
v_cmp_lt_f32_e32 vcc, 0, v30 ; 7C023C80
v_cndmask_b32_e32 v23, v33, v23 ; 002E2F21
v_cmp_lt_f32_e32 vcc, 0, v31 ; 7C023E80
v_cndmask_b32_e32 v25, v34, v25 ; 00323322
v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480
v_cndmask_b32_e32 v19, v19, v26 ; 00263513
v_add_f32_e32 v26, s17, v38 ; 06344C11
v_min3_f32 v19, v23, v25, v19 ; D2A20013 044E3317
v_mad_f32 v23, 0.5, v37, -s11 ; D2820017 802E4AF0
v_mad_f32 v25, 0.5, v26, -s12 ; D2820019 803234F0
v_add_f32_e32 v23, v21, v23 ; 062E2F15
v_add_f32_e32 v25, v16, v25 ; 06323310
v_mac_f32_e32 v23, v19, v30 ; 3E2E3D13
v_mac_f32_e32 v25, v19, v31 ; 3E323F13
v_mac_f32_e32 v32, v19, v18 ; 3E402513
v_mad_f32 v30, 0.5, -v37, v23 ; D282001E 445E4AF0
v_mad_f32 v31, 0.5, -v26, v25 ; D282001F 446634F0
v_mad_f32 v32, 0.5, -v20, v32 ; D2820020 448228F0
s_or_b64 exec, exec, s[14:15] ; 88FE0E7E
s_buffer_load_dword s28, s[0:3], 0x17 ; C20E0117
s_buffer_load_dword s29, s[0:3], 0x43 ; C20E8143
s_buffer_load_dword s27, s[0:3], 0x44 ; C20D8144
s_buffer_load_dword s19, s[0:3], 0x45 ; C2098145
s_buffer_load_dword s16, s[0:3], 0x46 ; C2080146
s_buffer_load_dword s11, s[0:3], 0x0 ; C2058100
s_buffer_load_dword s12, s[0:3], 0x1 ; C2060101
s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102
s_buffer_load_dword s13, s[0:3], 0x4 ; C2068104
s_buffer_load_dword s14, s[0:3], 0x5 ; C2070105
s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106
s_buffer_load_dword s17, s[0:3], 0x7 ; C2088107
s_buffer_load_dword s18, s[0:3], 0x8 ; C2090108
s_buffer_load_dword s20, s[0:3], 0x9 ; C20A0109
s_buffer_load_dword s21, s[0:3], 0xa ; C20A810A
s_buffer_load_dword s22, s[0:3], 0xb ; C20B010B
s_buffer_load_dword s23, s[0:3], 0xc ; C20B810C
s_buffer_load_dword s24, s[0:3], 0xd ; C20C010D
s_buffer_load_dword s25, s[0:3], 0xe ; C20C810E
v_sub_f32_e64 v18, 1.0, s8 ; D2080012 000010F2
v_log_f32_e32 v18, v18 ; 7E244F12
v_mul_legacy_f32_e32 v18, 0x3f400000, v18 ; 0E2424FF 3F400000
v_exp_f32_e32 v18, v18 ; 7E244B12
v_mul_f32_e32 v33, 0x40e00000, v18 ; 104224FF 40E00000
v_cubeid_f32 v37, v30, v31, v32 ; D2880025 04823F1E
v_cubema_f32 v36, v30, v31, v32 ; D28E0024 04823F1E
s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500
s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700
v_cubesc_f32 v35, v30, v31, v32 ; D28A0023 04823F1E
v_cubetc_f32 v34, v30, v31, v32 ; D28C0022 04823F1E
v_rcp_f32_e64 v18, |v36| ; D3540112 00000124
v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000
v_mad_f32 v31, v18, v34, v30 ; D282001F 047A4512
v_mac_f32_e32 v30, v18, v35 ; 3E3C4712
v_mov_b32_e32 v32, v37 ; 7E400325
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[40:47], s[36:39] ; F0900F00 012A1E1E
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v18, v33 ; 7E244F21
s_buffer_load_dword s30, s[0:3], 0xf ; C20F010F
s_buffer_load_dword s33, s[0:3], 0x68 ; C2108168
v_mul_legacy_f32_e32 v18, s32, v18 ; 0E242420
v_exp_f32_e32 v18, v18 ; 7E244B12
v_mul_f32_e32 v18, s31, v18 ; 1024241F
v_mul_f32_e32 v20, v30, v18 ; 1028251E
v_mul_f32_e32 v19, v31, v18 ; 1026251F
v_mul_f32_e32 v18, v32, v18 ; 10242520
v_mov_b32_e32 v23, s26 ; 7E2E021A
v_mov_b32_e32 v25, 0x3f7fff58 ; 7E3202FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s9, v25 ; 7C023209
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[34:35], vcc ; BEA2246A
s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E
s_cbranch_execz BB0_10 ; BF880000
s_buffer_load_dword s32, s[0:3], 0x3b ; C210013B
s_buffer_load_dword s26, s[0:3], 0x3c ; C20D013C
s_buffer_load_dword s31, s[0:3], 0x3d ; C20F813D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[36:37], 0, s32 ; D0020024 00004080
s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424
s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E
s_cbranch_execz BB0_11 ; BF880000
s_buffer_load_dword s32, s[0:3], 0x36 ; C2100136
s_buffer_load_dword s38, s[0:3], 0x38 ; C2130138
s_buffer_load_dword s39, s[0:3], 0x39 ; C2138139
s_buffer_load_dword s40, s[0:3], 0x3a ; C214013A
s_buffer_load_dword s41, s[0:3], 0x30 ; C2148130
s_buffer_load_dword s42, s[0:3], 0x31 ; C2150131
s_buffer_load_dword s43, s[0:3], 0x32 ; C2158132
s_buffer_load_dword s44, s[0:3], 0x34 ; C2160134
s_buffer_load_dword s45, s[0:3], 0x35 ; C2168135
v_mul_f32_e32 v25, v27, v27 ; 1032371B
v_mac_f32_e32 v25, v28, v28 ; 3E32391C
v_mac_f32_e32 v25, v29, v29 ; 3E323B1D
v_rsq_clamp_f32_e32 v25, v25 ; 7E325919
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v26, s32, v22 ; 08342C20
v_mov_b32_e32 v30, s32 ; 7E3C0220
v_sub_f32_e32 v31, s41, v21 ; 083E2A29
v_sub_f32_e32 v32, s42, v16 ; 0840202A
v_add_f32_e32 v30, s43, v30 ; 063C3C2B
v_sub_f32_e32 v33, s43, v22 ; 08422C2B
v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0
v_add_f32_e32 v22, v22, v34 ; 062C4516
v_mul_f32_e32 v27, v25, v27 ; 10363719
v_mul_f32_e32 v28, v25, v28 ; 10383919
v_mul_f32_e32 v25, v25, v29 ; 10323B19
v_rcp_f32_e32 v29, v27 ; 7E3A551B
v_rcp_f32_e32 v34, v28 ; 7E44551C
v_rcp_f32_e32 v35, v25 ; 7E465519
v_sub_f32_e32 v36, s44, v21 ; 08482A2C
v_mov_b32_e32 v37, s44 ; 7E4A022C
v_add_f32_e32 v37, s41, v37 ; 064A4A29
v_mul_f32_e32 v31, v29, v31 ; 103E3F1D
v_mul_f32_e32 v29, v29, v36 ; 103A491D
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v33, v35, v33 ; 10424323
v_mul_f32_e32 v26, v35, v26 ; 10343523
v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0
v_add_f32_e32 v21, v21, v35 ; 062A4715
v_sub_f32_e32 v35, s45, v16 ; 0846202D
v_mov_b32_e32 v36, s45 ; 7E48022D
v_mul_f32_e32 v34, v34, v35 ; 10444722
v_add_f32_e32 v35, s42, v36 ; 0646482A
v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680
v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D
v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880
v_cndmask_b32_e32 v31, v34, v32 ; 003E4122
v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280
v_cndmask_b32_e32 v26, v26, v33 ; 0034431A
v_min3_f32 v26, v29, v31, v26 ; D2A2001A 046A3F1D
v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0
v_add_f32_e32 v16, v16, v29 ; 06203B10
v_mac_f32_e32 v21, v26, v27 ; 3E2A371A
v_mac_f32_e32 v16, v26, v28 ; 3E20391A
v_mac_f32_e32 v22, v26, v25 ; 3E2C331A
v_mad_f32 v27, 0.5, -v37, v21 ; D282001B 44564AF0
v_mad_f32 v28, 0.5, -v35, v16 ; D282001C 444246F0
v_mad_f32 v29, 0.5, -v30, v22 ; D282001D 445A3CF0
s_or_b64 exec, exec, s[36:37] ; 88FE247E
v_sub_f32_e64 v16, 1.0, s8 ; D2080010 000010F2
v_log_f32_e32 v16, v16 ; 7E204F10
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
v_mul_legacy_f32_e32 v16, 0x3f400000, v16 ; 0E2020FF 3F400000
v_exp_f32_e32 v16, v16 ; 7E204B10
v_mul_f32_e32 v30, 0x40e00000, v16 ; 103C20FF 40E00000
v_cubeid_f32 v34, v27, v28, v29 ; D2880022 0476391B
v_cubema_f32 v33, v27, v28, v29 ; D28E0021 0476391B
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
v_cubesc_f32 v32, v27, v28, v29 ; D28A0020 0476391B
v_cubetc_f32 v31, v27, v28, v29 ; D28C001F 0476391B
v_rcp_f32_e64 v16, |v33| ; D3540110 00000121
v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000
v_mad_f32 v28, v16, v31, v27 ; D282001C 046E3F10
v_mac_f32_e32 v27, v16, v32 ; 3E364110
v_mov_b32_e32 v29, v34 ; 7E3A0322
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[25:28], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A191B
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v16, v28 ; 7E204F1C
v_sub_f32_e64 v21, 1.0, s9 ; D2080015 000012F2
v_mul_legacy_f32_e32 v16, s31, v16 ; 0E20201F
v_exp_f32_e32 v16, v16 ; 7E204B10
v_mul_f32_e32 v16, s26, v16 ; 1020201A
v_mul_f32_e32 v22, v25, v16 ; 102C2119
v_mul_f32_e32 v25, v26, v16 ; 1032211A
v_mul_f32_e32 v16, v27, v16 ; 1020211B
v_mul_f32_e32 v22, v22, v21 ; 102C2B16
v_mul_f32_e32 v25, v25, v21 ; 10322B19
v_mul_f32_e32 v16, v16, v21 ; 10202B10
v_mac_f32_e32 v22, s9, v20 ; 3E2C2809
v_mac_f32_e32 v25, s9, v19 ; 3E322609
v_mac_f32_e32 v16, s9, v18 ; 3E202409
v_mov_b32_e32 v18, v16 ; 7E240310
v_mov_b32_e32 v19, v25 ; 7E260319
v_mov_b32_e32 v20, v22 ; 7E280316
s_or_b64 exec, exec, s[34:35] ; 88FE227E
v_mad_f32 v25, -v23, s29, s29 ; D2820019 20743B17
v_mov_b32_e32 v16, s28 ; 7E20021C
v_mul_f32_e32 v21, v25, v13 ; 102A1B19
v_mul_f32_e32 v14, v25, v14 ; 101C1D19
v_mul_f32_e32 v13, v25, v15 ; 101A1F19
v_mul_f32_e32 v15, s27, v17 ; 101E221B
v_sub_f32_e64 v26, 1.0, s33 ; D208001A 000042F2
v_mac_f32_e32 v26, s33, v24 ; 3E343021
v_mul_f32_e32 v22, s19, v17 ; 102C2213
v_mul_f32_e32 v17, s16, v17 ; 10222210
s_buffer_load_dword s6, s[0:3], 0x10 ; C2030110
s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111
s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112
s_buffer_load_dword s16, s[0:3], 0x16 ; C2080116
s_buffer_load_dword s7, s[0:3], 0x48 ; C2038148
s_buffer_load_dword s9, s[0:3], 0x49 ; C2048149
s_buffer_load_dword s0, s[0:3], 0x4b ; C200014B
v_mul_f32_e32 v23, s14, v7 ; 102E0E0E
v_mac_f32_e32 v23, s13, v8 ; 3E2E100D
v_mac_f32_e32 v23, s15, v0 ; 3E2E000F
v_add_f32_e32 v23, s17, v23 ; 062E2E11
v_mul_f32_e32 v24, s20, v7 ; 10300E14
v_mac_f32_e32 v24, s18, v8 ; 3E301012
v_mac_f32_e32 v24, s21, v0 ; 3E300015
v_add_f32_e32 v24, s22, v24 ; 06303016
v_mul_f32_e32 v27, s24, v7 ; 10360E18
v_mac_f32_e32 v27, s23, v8 ; 3E361017
v_mac_f32_e32 v27, s25, v0 ; 3E360019
v_add_f32_e32 v27, s30, v27 ; 0636361E
v_add_f32_e32 v2, v23, v2 ; 06040517
v_add_f32_e32 v3, v24, v3 ; 06060718
v_add_f32_e32 v24, v27, v4 ; 0630091B
v_mul_f32_e32 v4, s11, v8 ; 1008100B
v_mac_f32_e32 v4, s12, v7 ; 3E080E0C
v_mac_f32_e32 v4, s10, v0 ; 3E08000A
v_max_f32_e32 v23, 0, v4 ; 202E0880
v_mul_f32_e32 v4, v26, v2 ; 1008051A
v_mul_f32_e32 v2, v26, v3 ; 1004071A
v_mul_f32_e32 v3, v26, v24 ; 1006311A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v16, s16, v5 ; 3E200A10
v_mul_f32_e32 v5, v26, v20 ; 100A291A
v_mul_f32_e32 v19, v26, v19 ; 1026271A
v_mul_f32_e32 v18, v26, v18 ; 1024251A
v_sub_f32_e32 v20, 1.0, v25 ; 082832F2
v_add_f32_e32 v20, s8, v20 ; 06282808
v_add_f32_e64 v20, 0, v20 clamp ; D2060814 00022880
v_sub_f32_e32 v24, s11, v12 ; 0830180B
v_sub_f32_e32 v25, s12, v11 ; 0832160C
v_mul_f32_e32 v26, v24, v24 ; 10343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_sub_f32_e32 v27, s10, v10 ; 0836140A
v_mac_f32_e32 v26, v27, v27 ; 3E34371B
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_mul_f32_e32 v26, v26, v27 ; 1034371A
v_mul_f32_e32 v12, v12, v8 ; 1018110C
v_mad_f32 v11, -v11, v7, -v12 ; D282000B A4320F0B
v_mad_f32 v10, -v10, v0, v11 ; D282000A 242E010A
v_mul_f32_e32 v8, v24, v8 ; 10101118
v_mac_f32_e32 v8, v25, v7 ; 3E100F19
v_mul_f32_e32 v7, s11, v24 ; 100E300B
v_mac_f32_e32 v7, s12, v25 ; 3E0E320C
v_mac_f32_e32 v8, v26, v0 ; 3E10011A
v_mac_f32_e32 v7, s10, v26 ; 3E0E340A
v_max_f32_e32 v0, 0, v7 ; 20000E80
v_sub_f32_e32 v7, 1.0, v0 ; 080E00F2
v_mul_f32_e32 v11, v7, v7 ; 10160F07
v_mul_f32_e32 v7, v7, v11 ; 100E1707
v_mul_f32_e32 v7, v7, v11 ; 100E1707
v_max_f32_e32 v10, 0, v10 ; 20141480
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mul_f32_e32 v24, v11, v12 ; 1030190B
v_mad_f32 v25, -v12, v24, 1.0 ; D2820019 23CA310C
v_mul_f32_e32 v26, v9, v25 ; 10343309
v_sub_f32_e32 v27, 1.0, v9 ; 083612F2
v_mac_f32_e32 v9, v7, v27 ; 3E123707
v_mul_f32_e32 v27, v6, v25 ; 10363306
v_sub_f32_e32 v28, 1.0, v6 ; 08380CF2
v_mac_f32_e32 v6, v7, v28 ; 3E0C3907
v_mul_f32_e32 v25, v1, v25 ; 10323301
v_sub_f32_e32 v28, 1.0, v1 ; 083802F2
v_mac_f32_e32 v1, v7, v28 ; 3E023907
v_sub_f32_e64 v7, 1.0, s8 ; D2080007 000010F2
v_sub_f32_e32 v28, 1.0, v7 ; 08380EF2
v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F
v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9
v_add_f32_e32 v29, v0, v0 ; 063A0100
v_mul_f32_e32 v0, v7, v0 ; 10000107
v_mad_f32 v0, v29, v0, 0.5 ; D2820000 03C2011D
v_mul_f32_e32 v12, v24, v12 ; 10181918
v_mac_f32_e32 v26, v20, v12 ; 3E341914
v_mac_f32_e32 v27, v20, v12 ; 3E361914
v_mac_f32_e32 v25, v20, v12 ; 3E321914
v_mul_f32_e32 v7, v7, v7 ; 100E0F07
v_log_f32_e32 v20, v28 ; 7E284F1C
v_mul_f32_e32 v7, s0, v7 ; 100E0E00
v_mul_f32_e32 v11, v7, v11 ; 10161707
v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2
v_rcp_f32_e32 v10, v20 ; 7E145514
v_sub_f32_e32 v20, 1.0, v23 ; 08282EF2
v_mul_f32_e32 v7, v7, v20 ; 100E2907
v_mac_f32_e32 v7, 1.0, v23 ; 3E0E2EF2
v_max_f32_e32 v8, 0, v8 ; 20101080
v_log_f32_e32 v8, v8 ; 7E104F08
v_madak_f32_e32 v7, v7, v11, 0x38d1b717 ; 420E1707 38D1B717
v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000
v_mul_f32_e32 v11, v10, v10 ; 1016150A
v_mul_legacy_f32_e32 v8, v11, v8 ; 0E10110B
v_rcp_f32_e32 v7, v7 ; 7E0E5507
v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A
v_mul_f32_e32 v10, s9, v10 ; 10141409
v_exp_f32_e32 v8, v8 ; 7E104B08
v_mul_f32_e32 v8, v10, v8 ; 1010110A
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_f32_e32 v8, v20, v20 ; 10102914
v_mul_f32_e32 v10, v20, v8 ; 10141114
v_mul_f32_e32 v8, v10, v8 ; 1010110A
v_add_f32_e32 v0, -1.0, v0 ; 060000F3
v_mad_f32 v8, v0, v8, 1.0 ; D2820008 03CA1100
v_mad_f32 v0, v0, v12, 1.0 ; D2820000 03CA1900
v_mul_f32_e32 v0, v0, v8 ; 10001100
v_mul_f32_e32 v7, v23, v7 ; 100E0F17
v_mul_f32_e32 v7, s7, v7 ; 100E0E07
v_mul_f32_e32 v0, v23, v0 ; 10000117
v_mac_f32_e32 v4, v0, v15 ; 3E081F00
v_mul_f32_e32 v4, v4, v21 ; 10082B04
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_mul_f32_e32 v8, v15, v7 ; 10100F0F
v_mac_f32_e32 v4, v9, v8 ; 3E081109
v_mac_f32_e32 v2, v0, v22 ; 3E042D00
v_mac_f32_e32 v3, v0, v17 ; 3E062300
v_mul_f32_e32 v0, v22, v7 ; 10000F16
v_mul_f32_e32 v7, v17, v7 ; 100E0F11
v_mul_f32_e32 v2, v2, v14 ; 10041D02
v_mul_f32_e32 v3, v3, v13 ; 10061B03
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mac_f32_e32 v3, v1, v7 ; 3E060F01
v_mac_f32_e32 v4, v26, v5 ; 3E080B1A
v_mac_f32_e32 v2, v27, v19 ; 3E04271B
v_mac_f32_e32 v3, v25, v18 ; 3E062519
v_add_f32_e64 v0, 0, v16 clamp ; D2060800 00022080
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v5, s6, v1 ; 100A0206
v_mac_f32_e32 v5, v4, v0 ; 3E0A0104
v_mul_f32_e32 v4, s5, v1 ; 10080205
v_mac_f32_e32 v4, v2, v0 ; 3E080102
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v3, v0 ; 3E020103
v_cvt_pkrtz_f16_f32_e32 v0, v5, v4 ; 5E000905
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 56
VGPRS: 40
Code Size: 2340 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL OUT[7], GENERIC[6]
DCL OUT[8], GENERIC[7]
DCL CONST[0..20]
DCL TEMP[0..10], LOCAL
IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[6], IN[0].xxxx
1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[10].xxxx
18: MOV TEMP[3].y, CONST[11].xxxx
19: MOV TEMP[3].z, CONST[12].xxxx
20: MOV TEMP[4].x, CONST[10].yyyy
21: MOV TEMP[4].y, CONST[11].yyyy
22: MOV TEMP[4].z, CONST[12].yyyy
23: MOV TEMP[5].x, CONST[10].zzzz
24: MOV TEMP[5].y, CONST[11].zzzz
25: MOV TEMP[5].z, CONST[12].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[6].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[8].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[6].xyz, TEMP[3].xyzx
44: MUL TEMP[7].xyw, TEMP[1], IMM[0].yyyy
45: MOV TEMP[8].x, TEMP[7].xxxx
46: MUL TEMP[9].x, TEMP[7].yyyy, CONST[1].xxxx
47: MOV TEMP[8].y, TEMP[9].xxxx
48: ADD TEMP[7].xy, TEMP[8].xyyy, TEMP[7].wwww
49: MOV TEMP[7].zw, TEMP[1].wwzw
50: MUL TEMP[8], TEMP[3].xyzz, TEMP[3].yzzx
51: DP4 TEMP[9].x, CONST[2], TEMP[8]
52: DP4 TEMP[10].x, CONST[3], TEMP[8]
53: MOV TEMP[9].y, TEMP[10].xxxx
54: DP4 TEMP[8].x, CONST[4], TEMP[8]
55: MOV TEMP[9].z, TEMP[8].xxxx
56: MUL TEMP[8].x, TEMP[3].yyyy, TEMP[3].yyyy
57: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[8].xxxx
58: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[9].xyzz
59: ADD TEMP[8].xyz, TEMP[0].xyzz, -CONST[0].xyzz
60: MOV TEMP[8].yzw, TEMP[8].yxyz
61: MOV TEMP[8].x, TEMP[1].zzzz
62: MOV TEMP[0].xyz, TEMP[0].xyzx
63: MOV OUT[8], TEMP[0]
64: MOV OUT[1], TEMP[2]
65: MOV OUT[3], TEMP[5]
66: MOV OUT[2], TEMP[4]
67: MOV OUT[4], TEMP[6]
68: MOV OUT[5], TEMP[3]
69: MOV OUT[6], TEMP[7]
70: MOV OUT[0], TEMP[1]
71: MOV OUT[7], TEMP[8]
72: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = add i32 %5, %7
%105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0
%110 = add i32 %5, %7
%111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = extractelement <4 x float> %111, i32 2
%115 = extractelement <4 x float> %111, i32 3
%116 = fmul float %32, %85
%117 = fmul float %33, %85
%118 = fmul float %34, %85
%119 = fmul float %35, %85
%120 = fmul float %36, %86
%121 = fadd float %120, %116
%122 = fmul float %37, %86
%123 = fadd float %122, %117
%124 = fmul float %38, %86
%125 = fadd float %124, %118
%126 = fmul float %39, %86
%127 = fadd float %126, %119
%128 = fmul float %40, %87
%129 = fadd float %128, %121
%130 = fmul float %41, %87
%131 = fadd float %130, %123
%132 = fmul float %42, %87
%133 = fadd float %132, %125
%134 = fmul float %43, %87
%135 = fadd float %134, %127
%136 = fmul float %44, %88
%137 = fadd float %136, %129
%138 = fmul float %45, %88
%139 = fadd float %138, %131
%140 = fmul float %46, %88
%141 = fadd float %140, %133
%142 = fmul float %65, %85
%143 = fmul float %66, %85
%144 = fmul float %67, %85
%145 = fmul float %68, %85
%146 = fmul float %69, %86
%147 = fadd float %146, %142
%148 = fmul float %70, %86
%149 = fadd float %148, %143
%150 = fmul float %71, %86
%151 = fadd float %150, %144
%152 = fmul float %72, %86
%153 = fadd float %152, %145
%154 = fmul float %73, %87
%155 = fadd float %154, %147
%156 = fmul float %74, %87
%157 = fadd float %156, %149
%158 = fmul float %75, %87
%159 = fadd float %158, %151
%160 = fmul float %76, %87
%161 = fadd float %160, %153
%162 = fmul float %77, %88
%163 = fadd float %162, %155
%164 = fmul float %78, %88
%165 = fadd float %164, %157
%166 = fmul float %79, %88
%167 = fadd float %166, %159
%168 = fmul float %80, %88
%169 = fadd float %168, %161
%170 = fmul float %100, %56
%171 = fadd float %170, %58
%172 = fmul float %101, %57
%173 = fadd float %172, %59
%174 = fcmp oeq float %64, 0.000000e+00
%. = select i1 %174, float %100, float %106
%.44 = select i1 %174, float %101, float %107
%175 = fmul float %., %60
%176 = fadd float %175, %62
%177 = fmul float %.44, %61
%178 = fadd float %177, %63
%179 = fmul float %47, %93
%180 = fmul float %50, %93
%181 = fmul float %53, %93
%182 = fmul float %48, %94
%183 = fadd float %182, %179
%184 = fmul float %51, %94
%185 = fadd float %184, %180
%186 = fmul float %54, %94
%187 = fadd float %186, %181
%188 = fmul float %49, %95
%189 = fadd float %188, %183
%190 = fmul float %52, %95
%191 = fadd float %190, %185
%192 = fmul float %55, %95
%193 = fadd float %192, %187
%194 = fmul float %189, %189
%195 = fmul float %191, %191
%196 = fadd float %195, %194
%197 = fmul float %193, %193
%198 = fadd float %196, %197
%199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198)
%200 = fmul float %189, %199
%201 = fmul float %191, %199
%202 = fmul float %193, %199
%203 = fmul float %32, %112
%204 = fmul float %33, %112
%205 = fmul float %34, %112
%206 = fmul float %36, %113
%207 = fadd float %206, %203
%208 = fmul float %37, %113
%209 = fadd float %208, %204
%210 = fmul float %38, %113
%211 = fadd float %210, %205
%212 = fmul float %40, %114
%213 = fadd float %212, %207
%214 = fmul float %41, %114
%215 = fadd float %214, %209
%216 = fmul float %42, %114
%217 = fadd float %216, %211
%218 = fmul float %213, %213
%219 = fmul float %215, %215
%220 = fadd float %219, %218
%221 = fmul float %217, %217
%222 = fadd float %220, %221
%223 = call float @llvm.AMDGPU.rsq.clamped.f32(float %222)
%224 = fmul float %213, %223
%225 = fmul float %215, %223
%226 = fmul float %217, %223
%227 = fmul float %202, %225
%228 = fmul float %200, %226
%229 = fmul float %201, %224
%230 = fmul float %201, %226
%231 = fsub float %230, %227
%232 = fmul float %202, %224
%233 = fsub float %232, %228
%234 = fmul float %200, %225
%235 = fsub float %234, %229
%236 = fmul float %231, %115
%237 = fmul float %233, %115
%238 = fmul float %235, %115
%239 = fmul float %163, 5.000000e-01
%240 = fmul float %165, 5.000000e-01
%241 = fmul float %169, 5.000000e-01
%242 = fmul float %240, %16
%243 = fadd float %239, %241
%244 = fadd float %242, %241
%245 = fmul float %200, %201
%246 = fmul float %201, %202
%247 = fmul float %202, %202
%248 = fmul float %202, %200
%249 = fmul float %17, %245
%250 = fmul float %18, %246
%251 = fadd float %249, %250
%252 = fmul float %19, %247
%253 = fadd float %251, %252
%254 = fmul float %20, %248
%255 = fadd float %253, %254
%256 = fmul float %21, %245
%257 = fmul float %22, %246
%258 = fadd float %256, %257
%259 = fmul float %23, %247
%260 = fadd float %258, %259
%261 = fmul float %24, %248
%262 = fadd float %260, %261
%263 = fmul float %25, %245
%264 = fmul float %26, %246
%265 = fadd float %263, %264
%266 = fmul float %27, %247
%267 = fadd float %265, %266
%268 = fmul float %28, %248
%269 = fadd float %267, %268
%270 = fmul float %201, %201
%271 = fmul float %200, %200
%272 = fsub float %271, %270
%273 = fmul float %29, %272
%274 = fadd float %273, %255
%275 = fmul float %30, %272
%276 = fadd float %275, %262
%277 = fmul float %31, %272
%278 = fadd float %277, %269
%279 = fsub float %137, %13
%280 = fsub float %139, %14
%281 = fsub float %141, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %171, float %173, float %176, float %178)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %224, float %225, float %226, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %236, float %237, float %238, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %200, float %201, float %202, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %274, float %276, float %278, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %243, float %244, float %167, float %169)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %167, float %279, float %280, float %281)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %137, float %139, float %141, float %135)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %163, float %165, float %167, float %169)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s32, s[28:31], 0x23 ; C2101D23
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00
s_buffer_load_dword s33, s[28:31], 0x24 ; C2109D24
s_buffer_load_dword s34, s[28:31], 0x25 ; C2111D25
s_buffer_load_dword s35, s[28:31], 0x26 ; C2119D26
s_buffer_load_dword s36, s[28:31], 0x28 ; C2121D28
s_buffer_load_dword s3, s[28:31], 0x13 ; C2019D13
s_buffer_load_dword s2, s[28:31], 0x14 ; C2011D14
s_buffer_load_dword s0, s[28:31], 0x15 ; C2001D15
s_buffer_load_dword s1, s[28:31], 0x16 ; C2009D16
s_buffer_load_dword s18, s[28:31], 0x18 ; C2091D18
s_buffer_load_dword s37, s[28:31], 0x29 ; C2129D29
s_buffer_load_dword s38, s[28:31], 0x2a ; C2131D2A
s_buffer_load_dword s39, s[28:31], 0x2c ; C2139D2C
s_buffer_load_dword s40, s[28:31], 0x2d ; C2141D2D
s_buffer_load_dword s41, s[28:31], 0x2e ; C2149D2E
s_buffer_load_dword s20, s[28:31], 0x19 ; C20A1D19
s_buffer_load_dword s19, s[28:31], 0x1a ; C2099D1A
s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B
s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C
s_buffer_load_dword s22, s[28:31], 0x1d ; C20B1D1D
s_buffer_load_dword s43, s[28:31], 0x30 ; C2159D30
s_buffer_load_dword s44, s[28:31], 0x31 ; C2161D31
s_buffer_load_dword s45, s[28:31], 0x32 ; C2169D32
s_buffer_load_dword s46, s[28:31], 0x38 ; C2171D38
s_buffer_load_dword s47, s[28:31], 0x39 ; C2179D39
s_buffer_load_dword s24, s[28:31], 0x1e ; C20C1D1E
s_buffer_load_dword s48, s[28:31], 0x1f ; C2181D1F
s_buffer_load_dword s26, s[28:31], 0x20 ; C20D1D20
s_buffer_load_dword s27, s[28:31], 0x21 ; C20D9D21
s_buffer_load_dword s25, s[28:31], 0x22 ; C20C9D22
s_buffer_load_dword s4, s[28:31], 0x3f ; C2021D3F
s_buffer_load_dword s5, s[28:31], 0x40 ; C2029D40
s_buffer_load_dword s49, s[28:31], 0x44 ; C2189D44
s_buffer_load_dword s50, s[28:31], 0x45 ; C2191D45
s_buffer_load_dword s51, s[28:31], 0x46 ; C2199D46
s_buffer_load_dword s6, s[28:31], 0x3a ; C2031D3A
s_buffer_load_dword s8, s[28:31], 0x3b ; C2041D3B
s_buffer_load_dword s52, s[28:31], 0x3c ; C21A1D3C
s_buffer_load_dword s53, s[28:31], 0x3d ; C21A9D3D
s_buffer_load_dword s14, s[28:31], 0x3e ; C2071D3E
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s4 ; 7E000204
s_buffer_load_dword s13, s[28:31], 0x9 ; C2069D09
v_cmp_eq_f32_e64 vcc, 0, s5 ; D004006A 00000A80
s_buffer_load_dword s9, s[28:31], 0xa ; C2049D0A
s_buffer_load_dword s7, s[28:31], 0xb ; C2039D0B
s_buffer_load_dword s12, s[28:31], 0xc ; C2061D0C
v_mov_b32_e32 v17, s6 ; 7E220206
s_buffer_load_dword s16, s[28:31], 0xd ; C2081D0D
v_mov_b32_e32 v18, s8 ; 7E240208
s_buffer_load_dword s11, s[28:31], 0xe ; C2059D0E
s_buffer_load_dword s10, s[28:31], 0xf ; C2051D0F
s_buffer_load_dword s15, s[28:31], 0x10 ; C2079D10
v_mov_b32_e32 v19, s14 ; 7E26020E
s_buffer_load_dword s17, s[28:31], 0x11 ; C2089D11
s_buffer_load_dword s14, s[28:31], 0x12 ; C2071D12
s_buffer_load_dword s54, s[28:31], 0x47 ; C21B1D47
s_buffer_load_dword s55, s[28:31], 0x48 ; C21B9D48
s_buffer_load_dword s56, s[28:31], 0x49 ; C21C1D49
s_buffer_load_dword s57, s[28:31], 0x4a ; C21C9D4A
s_buffer_load_dword s58, s[28:31], 0x4b ; C21D1D4B
s_buffer_load_dword s4, s[28:31], 0x0 ; C2021D00
s_buffer_load_dword s5, s[28:31], 0x1 ; C2029D01
s_buffer_load_dword s6, s[28:31], 0x2 ; C2031D02
s_buffer_load_dword s8, s[28:31], 0x4 ; C2041D04
s_buffer_load_dword s21, s[28:31], 0x8 ; C20A9D08
s_buffer_load_dword s59, s[28:31], 0x4c ; C21D9D4C
s_buffer_load_dword s60, s[28:31], 0x4d ; C21E1D4D
s_buffer_load_dword s61, s[28:31], 0x4e ; C21E9D4E
s_buffer_load_dword s62, s[28:31], 0x4f ; C21F1D4F
s_buffer_load_dword s63, s[28:31], 0x50 ; C21F9D50
s_buffer_load_dword s64, s[28:31], 0x51 ; C2201D51
s_buffer_load_dword s65, s[28:31], 0x52 ; C2209D52
s_buffer_load_dword s28, s[28:31], 0x53 ; C20E1D53
v_mul_f32_e32 v20, s42, v2 ; 1028042A
v_mac_f32_e32 v20, s48, v3 ; 3E280630
v_mac_f32_e32 v20, s32, v4 ; 3E280820
v_mac_f32_e32 v17, s46, v9 ; 3E22122E
v_mac_f32_e32 v18, s47, v10 ; 3E24142F
v_mul_f32_e32 v21, s49, v2 ; 102A0431
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s55, v3 ; 3E2A0637
v_mac_f32_e32 v21, s59, v4 ; 3E2A083B
v_mac_f32_e32 v21, s63, v5 ; 3E2A0A3F
v_mul_f32_e32 v22, s50, v2 ; 102C0432
v_mac_f32_e32 v22, s56, v3 ; 3E2C0638
v_mac_f32_e32 v22, s60, v4 ; 3E2C083C
v_mac_f32_e32 v22, s64, v5 ; 3E2C0A40
v_mul_f32_e32 v23, s51, v2 ; 102E0433
v_mac_f32_e32 v23, s57, v3 ; 3E2E0639
v_mac_f32_e32 v23, s61, v4 ; 3E2E083D
v_mac_f32_e32 v23, s65, v5 ; 3E2E0A41
v_mul_f32_e32 v24, s54, v2 ; 10300436
v_mac_f32_e32 v24, s58, v3 ; 3E30063A
v_mac_f32_e32 v24, s62, v4 ; 3E30083E
v_mac_f32_e32 v24, s28, v5 ; 3E300A1C
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mul_f32_e32 v11, s36, v6 ; 10160C24
v_mac_f32_e32 v11, s37, v7 ; 3E160E25
v_mul_f32_e32 v12, s39, v6 ; 10180C27
v_mac_f32_e32 v12, s40, v7 ; 3E180E28
v_mul_f32_e32 v6, s43, v6 ; 100C0C2B
v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C
v_mac_f32_e32 v11, s38, v8 ; 3E161026
v_mac_f32_e32 v12, s41, v8 ; 3E181029
v_mac_f32_e32 v6, s45, v8 ; 3E0C102D
v_mul_f32_e32 v7, s18, v2 ; 100E0412
v_mac_f32_e32 v7, s23, v3 ; 3E0E0617
v_mac_f32_e32 v7, s26, v4 ; 3E0E081A
v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21
v_mul_f32_e32 v8, s20, v2 ; 10100414
v_mac_f32_e32 v8, s22, v3 ; 3E100616
v_mac_f32_e32 v8, s27, v4 ; 3E10081B
v_mac_f32_e32 v8, s34, v5 ; 3E100A22
v_mul_f32_e32 v2, s19, v2 ; 10040413
v_mac_f32_e32 v2, s24, v3 ; 3E040618
v_mac_f32_e32 v2, s25, v4 ; 3E040819
v_mac_f32_e32 v2, s35, v5 ; 3E040A23
v_mac_f32_e32 v19, s52, v9 ; 3E261234
v_mac_f32_e32 v0, s53, v10 ; 3E001435
exp 15, 32, 0, 0, 0, v17, v18, v19, v0 ; F800020F 00131211
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s18, v13 ; 10001A12
v_mac_f32_e32 v0, s23, v14 ; 3E001C17
v_mul_f32_e32 v3, s20, v13 ; 10061A14
v_mac_f32_e32 v3, s22, v14 ; 3E061C16
v_mul_f32_e32 v4, s19, v13 ; 10081A13
v_mac_f32_e32 v4, s24, v14 ; 3E081C18
v_mac_f32_e32 v0, s26, v15 ; 3E001E1A
v_mac_f32_e32 v3, s27, v15 ; 3E061E1B
v_mac_f32_e32 v4, s25, v15 ; 3E081E19
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mac_f32_e32 v5, v12, v12 ; 3E0A190C
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v3, v3 ; 3E120703
v_mac_f32_e32 v9, v4, v4 ; 3E120904
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v10, v5, v11 ; 10141705
v_mul_f32_e32 v11, v5, v12 ; 10161905
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_mul_f32_e32 v3, v9, v3 ; 10060709
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v6, v3, v5 ; 100C0B03
v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B
v_mul_f32_e32 v9, v4, v10 ; 10121504
v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105
v_mul_f32_e32 v12, v0, v11 ; 10181700
v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A
v_mul_f32_e32 v6, v16, v6 ; 100C0D10
v_mul_f32_e32 v9, v16, v9 ; 10121310
v_mul_f32_e32 v12, v16, v12 ; 10181910
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v5, v11 ; 10001705
v_mul_f32_e32 v3, s13, v0 ; 1006000D
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mul_f32_e32 v0, s17, v0 ; 10000011
v_mul_f32_e32 v6, v11, v10 ; 100C150B
v_mac_f32_e32 v3, s21, v6 ; 3E060C15
v_mac_f32_e32 v4, s12, v6 ; 3E080C0C
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
v_mul_f32_e32 v6, v5, v5 ; 100C0B05
v_mac_f32_e32 v3, s9, v6 ; 3E060C09
v_mac_f32_e32 v4, s11, v6 ; 3E080C0B
v_mac_f32_e32 v0, s14, v6 ; 3E000C0E
v_mul_f32_e32 v6, v10, v5 ; 100C0B0A
v_mac_f32_e32 v3, s7, v6 ; 3E060C07
v_mac_f32_e32 v4, s10, v6 ; 3E080C0A
v_mac_f32_e32 v0, s3, v6 ; 3E000C03
exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mad_f32 v5, v10, v10, -v5 ; D2820005 8416150A
v_mac_f32_e32 v3, s2, v5 ; 3E060A02
v_mac_f32_e32 v4, s0, v5 ; 3E080A00
v_mac_f32_e32 v0, s1, v5 ; 3E000A01
v_mul_f32_e32 v5, 0.5, v22 ; 100A2CF0
v_mul_f32_e32 v6, 0.5, v24 ; 100C30F0
exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v0, 0.5, v21, v6 ; D2820000 041A2AF0
v_mac_f32_e32 v6, s8, v5 ; 3E0C0A08
exp 15, 37, 0, 0, 0, v0, v6, v23, v24 ; F800025F 18170600
s_waitcnt expcnt(0) ; BF8C070F
v_subrev_f32_e32 v0, s4, v7 ; 0A000E04
v_subrev_f32_e32 v1, s5, v8 ; 0A021005
v_subrev_f32_e32 v3, s6, v2 ; 0A060406
exp 15, 38, 0, 0, 0, v23, v0, v1, v3 ; F800026F 03010017
exp 15, 39, 0, 0, 0, v7, v8, v2, v20 ; F800027F 14020807
exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 28
Code Size: 932 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL IN[6], GENERIC[6], PERSPECTIVE
DCL IN[7], GENERIC[7], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..20]
DCL CONST[23..25]
DCL CONST[27]
DCL TEMP[0..18], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000}
IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000}
IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3], TEMP[3], SAMP[2], 2D
11: MUL TEMP[4].x, TEMP[3].wwww, CONST[19].wwww
12: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[20].xxxx
13: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx
14: KILL_IF -TEMP[4].xxxx
15: MOV TEMP[4].xy, IN[0].xyyy
16: TEX TEMP[4].yw, TEMP[4], SAMP[3], 2D
17: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[0].yyyy, IMM[0].zzzz
18: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[23].xxxx
19: DP2 TEMP[5].x, TEMP[4].xyyy, TEMP[4].xyyy
20: MOV_SAT TEMP[5].x, TEMP[5].xxxx
21: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx
22: SQRT TEMP[5].x, TEMP[5].xxxx
23: MOV TEMP[4].z, TEMP[5].xxxx
24: DP3 TEMP[0].x, TEMP[4].xyzz, TEMP[0].xyzz
25: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[1].xyzz
26: MOV TEMP[0].y, TEMP[1].xxxx
27: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[2].xyzz
28: MOV TEMP[0].z, TEMP[1].xxxx
29: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
30: RSQ TEMP[1].x, TEMP[1].xxxx
31: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
32: DP3 TEMP[1].x, IN[6].yzww, IN[6].yzww
33: RSQ TEMP[1].x, TEMP[1].xxxx
34: MUL TEMP[1].xyz, IN[6].yzww, TEMP[1].xxxx
35: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[3].xyzz
36: LRP TEMP[3].xyz, CONST[24].xxxx, TEMP[2].xyzz, CONST[16].xyzz
37: MUL TEMP[4].x, CONST[24].xxxx, CONST[16].wwww
38: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
39: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
40: MOV TEMP[5].xy, IN[0].xyyy
41: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D
42: ADD TEMP[6].x, IMM[0].xxxx, -CONST[27].xxxx
43: MAD TEMP[5].x, TEMP[5].yyyy, CONST[27].xxxx, TEMP[6].xxxx
44: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
45: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
46: MOV TEMP[7].xyz, IMM[0].wwww
47: MOV TEMP[8].w, IMM[0].xxxx
48: MOV TEMP[8].xyz, TEMP[0].xyzx
49: DP4 TEMP[9].x, CONST[1], TEMP[8]
50: DP4 TEMP[10].x, CONST[2], TEMP[8]
51: MOV TEMP[9].y, TEMP[10].xxxx
52: DP4 TEMP[8].x, CONST[3], TEMP[8]
53: MOV TEMP[9].z, TEMP[8].xxxx
54: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz
55: MOV TEMP[9].xy, IN[5].xyyy
56: MOV TEMP[9].w, IN[5].wwww
57: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D
58: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx
59: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
60: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz
61: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz
62: MUL TEMP[10].xyz, IMM[0].yyyy, TEMP[10].xyzz
63: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz
64: MOV TEMP[11].xyz, TEMP[10].xyzx
65: FSLT TEMP[12].x, IMM[0].wwww, CONST[10].wwww
66: UIF TEMP[12].xxxx :0
67: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz
68: RSQ TEMP[12].x, TEMP[12].xxxx
69: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx
70: MOV TEMP[13].xyz, -IN[7].xyzx
71: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz
72: RCP TEMP[15].x, TEMP[12].xxxx
73: RCP TEMP[15].y, TEMP[12].yyyy
74: RCP TEMP[15].z, TEMP[12].zzzz
75: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
76: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz
77: RCP TEMP[15].x, TEMP[12].xxxx
78: RCP TEMP[15].y, TEMP[12].yyyy
79: RCP TEMP[15].z, TEMP[12].zzzz
80: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
81: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[12].xyzz
82: UIF TEMP[15].xxxx :0
83: MOV TEMP[16].x, TEMP[14].xxxx
84: ELSE :0
85: MOV TEMP[16].x, TEMP[13].xxxx
86: ENDIF
87: UIF TEMP[15].yyyy :0
88: MOV TEMP[17].x, TEMP[14].yyyy
89: ELSE :0
90: MOV TEMP[17].x, TEMP[13].yyyy
91: ENDIF
92: UIF TEMP[15].zzzz :0
93: MOV TEMP[14].x, TEMP[14].zzzz
94: ELSE :0
95: MOV TEMP[14].x, TEMP[13].zzzz
96: ENDIF
97: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz
98: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx
99: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
100: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
101: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz
102: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz
103: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
104: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz
105: ENDIF
106: ADD TEMP[12].x, IMM[0].xxxx, -CONST[25].xxxx
107: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy
108: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz
109: MOV TEMP[11].xyz, TEMP[11].xyzz
110: MOV TEMP[11].w, TEMP[12].xxxx
111: TXL TEMP[11], TEMP[11], SAMP[0], CUBE
112: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy
113: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx
114: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz
115: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].wwww
116: UIF TEMP[12].xxxx :0
117: MOV TEMP[12].xyz, TEMP[10].xyzx
118: FSLT TEMP[13].x, IMM[0].wwww, CONST[14].wwww
119: UIF TEMP[13].xxxx :0
120: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz
121: RSQ TEMP[13].x, TEMP[13].xxxx
122: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx
123: MOV TEMP[13].xyz, -IN[7].xyzx
124: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz
125: RCP TEMP[15].x, TEMP[10].xxxx
126: RCP TEMP[15].y, TEMP[10].yyyy
127: RCP TEMP[15].z, TEMP[10].zzzz
128: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
129: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz
130: RCP TEMP[15].x, TEMP[10].xxxx
131: RCP TEMP[15].y, TEMP[10].yyyy
132: RCP TEMP[15].z, TEMP[10].zzzz
133: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
134: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[10].xyzz
135: UIF TEMP[15].xxxx :0
136: MOV TEMP[16].x, TEMP[14].xxxx
137: ELSE :0
138: MOV TEMP[16].x, TEMP[13].xxxx
139: ENDIF
140: UIF TEMP[15].yyyy :0
141: MOV TEMP[17].x, TEMP[14].yyyy
142: ELSE :0
143: MOV TEMP[17].x, TEMP[13].yyyy
144: ENDIF
145: UIF TEMP[15].zzzz :0
146: MOV TEMP[14].x, TEMP[14].zzzz
147: ELSE :0
148: MOV TEMP[14].x, TEMP[13].zzzz
149: ENDIF
150: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz
151: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx
152: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
153: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
154: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz
155: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz
156: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
157: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz
158: ENDIF
159: ADD TEMP[10].x, IMM[0].xxxx, -CONST[25].xxxx
160: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy
161: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
162: MOV TEMP[12].xyz, TEMP[12].xyzz
163: MOV TEMP[12].w, TEMP[10].xxxx
164: TXL TEMP[10], TEMP[12], SAMP[1], CUBE
165: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy
166: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx
167: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz
168: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz
169: ELSE :0
170: MOV TEMP[7].xyz, TEMP[11].xyzx
171: ENDIF
172: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
173: MOV TEMP[1].xyz, -TEMP[1].xyzx
174: ADD TEMP[5].x, IMM[0].xxxx, -CONST[25].xxxx
175: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz
176: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz
177: RSQ TEMP[11].x, TEMP[11].xxxx
178: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx
179: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
180: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
181: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz
182: MAX TEMP[11].x, IMM[0].wwww, TEMP[11].xxxx
183: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx
184: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww
185: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx
186: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy, IMM[2].zzzz
187: LG2 TEMP[13].x, TEMP[13].xxxx
188: RCP TEMP[13].x, TEMP[13].xxxx
189: MUL TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx
190: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx
191: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx
192: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
193: MUL TEMP[16].x, IMM[0].yyyy, TEMP[11].xxxx
194: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx
195: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[1].xxxx
196: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx
197: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[1].xxxx
198: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
199: ADD TEMP[4].x, CONST[25].xxxx, TEMP[4].xxxx
200: MOV_SAT TEMP[4].x, TEMP[4].xxxx
201: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx
202: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx
203: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx
204: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx
205: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
206: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx
207: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[12].xxxx
208: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].wwww
209: RCP TEMP[1].x, TEMP[1].xxxx
210: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz
211: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx
212: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx
213: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx
214: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy
215: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx
216: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx
217: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
218: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
219: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
220: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz
221: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[3].xyzz
222: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx
223: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx
224: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx
225: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx
226: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz
227: ADD TEMP[10].x, TEMP[5].xxxx, IMM[0].zzzz
228: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
229: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx
230: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx
231: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
232: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx
233: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz
234: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx
235: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx
236: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx
237: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
238: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx
239: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
240: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
241: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
242: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
243: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
244: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
245: MOV TEMP[0].xyz, TEMP[0].xyzx
246: MAD TEMP[1].x, IN[6].xxxx, CONST[5].zzzz, CONST[5].wwww
247: MOV_SAT TEMP[1].x, TEMP[1].xxxx
248: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
249: MOV TEMP[0].xyz, TEMP[0].xyzx
250: MOV TEMP[0].w, IMM[0].xxxx
251: MOV OUT[0], TEMP[0]
252: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 316)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400)
%84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432)
%85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0
%87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)*
%91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0
%92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)*
%94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0
%95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)*
%97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0
%98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)*
%100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0
%101 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%102 = bitcast <8 x i32> addrspace(2)* %101 to <32 x i8> addrspace(2)*
%103 = load <32 x i8>, <32 x i8> addrspace(2)* %102, align 32, !tbaa !0
%104 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%105 = bitcast <4 x i32> addrspace(2)* %104 to <16 x i8> addrspace(2)*
%106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0
%107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%108 = bitcast <8 x i32> addrspace(2)* %107 to <32 x i8> addrspace(2)*
%109 = load <32 x i8>, <32 x i8> addrspace(2)* %108, align 32, !tbaa !0
%110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%111 = bitcast <4 x i32> addrspace(2)* %110 to <16 x i8> addrspace(2)*
%112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0
%113 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%114 = bitcast <8 x i32> addrspace(2)* %113 to <32 x i8> addrspace(2)*
%115 = load <32 x i8>, <32 x i8> addrspace(2)* %114, align 32, !tbaa !0
%116 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%117 = bitcast <4 x i32> addrspace(2)* %116 to <16 x i8> addrspace(2)*
%118 = load <16 x i8>, <16 x i8> addrspace(2)* %117, align 16, !tbaa !0
%119 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%132 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%133 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%134 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%135 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7)
%136 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7)
%137 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7)
%138 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7)
%139 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7)
%140 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7)
%141 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7)
%142 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7)
%143 = bitcast float %119 to i32
%144 = bitcast float %120 to i32
%145 = insertelement <2 x i32> undef, i32 %143, i32 0
%146 = insertelement <2 x i32> %145, i32 %144, i32 1
%147 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %146, <32 x i8> %97, <16 x i8> %100, i32 2)
%148 = extractelement <4 x float> %147, i32 0
%149 = extractelement <4 x float> %147, i32 1
%150 = extractelement <4 x float> %147, i32 2
%151 = extractelement <4 x float> %147, i32 3
%152 = fmul float %151, %79
%153 = fcmp olt float %152, %80
%154 = select i1 %153, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %154)
%155 = bitcast float %119 to i32
%156 = bitcast float %120 to i32
%157 = insertelement <2 x i32> undef, i32 %155, i32 0
%158 = insertelement <2 x i32> %157, i32 %156, i32 1
%159 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %103, <16 x i8> %106, i32 2)
%160 = extractelement <4 x float> %159, i32 1
%161 = extractelement <4 x float> %159, i32 3
%162 = fmul float %161, 2.000000e+00
%163 = fadd float %162, -1.000000e+00
%164 = fmul float %160, 2.000000e+00
%165 = fadd float %164, -1.000000e+00
%166 = fmul float %163, %81
%167 = fmul float %165, %81
%168 = fmul float %166, %166
%169 = fmul float %167, %167
%170 = fadd float %168, %169
%171 = call float @llvm.AMDIL.clamp.(float %170, float 0.000000e+00, float 1.000000e+00)
%172 = fsub float 1.000000e+00, %171
%173 = call float @llvm.sqrt.f32(float %172)
%174 = fmul float %166, %121
%175 = fmul float %167, %124
%176 = fadd float %175, %174
%177 = fmul float %173, %127
%178 = fadd float %176, %177
%179 = fmul float %166, %122
%180 = fmul float %167, %125
%181 = fadd float %180, %179
%182 = fmul float %173, %128
%183 = fadd float %181, %182
%184 = fmul float %166, %123
%185 = fmul float %167, %126
%186 = fadd float %185, %184
%187 = fmul float %173, %129
%188 = fadd float %186, %187
%189 = fmul float %178, %178
%190 = fmul float %183, %183
%191 = fadd float %190, %189
%192 = fmul float %188, %188
%193 = fadd float %191, %192
%194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193)
%195 = fmul float %178, %194
%196 = fmul float %183, %194
%197 = fmul float %188, %194
%198 = fmul float %137, %137
%199 = fmul float %138, %138
%200 = fadd float %199, %198
%201 = fmul float %139, %139
%202 = fadd float %200, %201
%203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202)
%204 = fmul float %137, %203
%205 = fmul float %138, %203
%206 = fmul float %139, %203
%207 = fmul float %76, %148
%208 = fmul float %77, %149
%209 = fmul float %78, %150
%210 = call float @llvm.AMDGPU.lrp(float %82, float %207, float %66)
%211 = call float @llvm.AMDGPU.lrp(float %82, float %208, float %67)
%212 = call float @llvm.AMDGPU.lrp(float %82, float %209, float %68)
%213 = fmul float %82, %69
%214 = fsub float %69, %213
%215 = fmul float %207, %214
%216 = fmul float %208, %214
%217 = fmul float %209, %214
%218 = bitcast float %119 to i32
%219 = bitcast float %120 to i32
%220 = insertelement <2 x i32> undef, i32 %218, i32 0
%221 = insertelement <2 x i32> %220, i32 %219, i32 1
%222 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %221, <32 x i8> %109, <16 x i8> %112, i32 2)
%223 = extractelement <4 x float> %222, i32 1
%224 = fsub float 1.000000e+00, %84
%225 = fmul float %223, %84
%226 = fadd float %225, %224
%227 = fmul float %195, %24
%228 = fmul float %196, %25
%229 = fadd float %228, %227
%230 = fmul float %197, %26
%231 = fadd float %229, %230
%232 = call float @llvm.maxnum.f32(float %231, float 0.000000e+00)
%233 = fmul float %27, %195
%234 = fmul float %28, %196
%235 = fadd float %233, %234
%236 = fmul float %29, %197
%237 = fadd float %235, %236
%238 = fadd float %237, %30
%239 = fmul float %31, %195
%240 = fmul float %32, %196
%241 = fadd float %239, %240
%242 = fmul float %33, %197
%243 = fadd float %241, %242
%244 = fadd float %243, %34
%245 = fmul float %35, %195
%246 = fmul float %36, %196
%247 = fadd float %245, %246
%248 = fmul float %37, %197
%249 = fadd float %247, %248
%250 = fadd float %249, %38
%251 = fadd float %130, %238
%252 = fadd float %131, %244
%253 = fadd float %132, %250
%254 = fdiv float %133, %135
%255 = fdiv float %134, %135
%256 = bitcast float %254 to i32
%257 = bitcast float %255 to i32
%258 = insertelement <2 x i32> undef, i32 %256, i32 0
%259 = insertelement <2 x i32> %258, i32 %257, i32 1
%260 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %259, <32 x i8> %115, <16 x i8> %118, i32 2)
%261 = extractelement <4 x float> %260, i32 0
%262 = fmul float %70, %261
%263 = fmul float %71, %261
%264 = fmul float %72, %261
%265 = fmul float %251, %226
%266 = fmul float %252, %226
%267 = fmul float %253, %226
%268 = fmul float %195, %204
%269 = fmul float %196, %205
%270 = fadd float %269, %268
%271 = fmul float %197, %206
%272 = fadd float %270, %271
%273 = fmul float %272, %195
%274 = fmul float %272, %196
%275 = fmul float %272, %197
%276 = fmul float %273, 2.000000e+00
%277 = fmul float %274, 2.000000e+00
%278 = fmul float %275, 2.000000e+00
%279 = fsub float %204, %276
%280 = fsub float %205, %277
%281 = fsub float %206, %278
%282 = fcmp ogt float %51, 0.000000e+00
br i1 %282, label %IF, label %ENDIF
IF: ; preds = %main_body
%283 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%284 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%285 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%286 = fmul float %279, %279
%287 = fmul float %280, %280
%288 = fadd float %287, %286
%289 = fmul float %281, %281
%290 = fadd float %288, %289
%291 = call float @llvm.AMDGPU.rsq.clamped.f32(float %290)
%292 = fmul float %279, %291
%293 = fmul float %280, %291
%294 = fmul float %281, %291
%295 = fsub float %44, %140
%296 = fsub float %45, %141
%297 = fsub float %46, %142
%298 = fdiv float 1.000000e+00, %292
%299 = fdiv float 1.000000e+00, %293
%300 = fdiv float 1.000000e+00, %294
%301 = fmul float %295, %298
%302 = fmul float %296, %299
%303 = fmul float %297, %300
%304 = fsub float %47, %140
%305 = fsub float %48, %141
%306 = fsub float %49, %142
%307 = fdiv float 1.000000e+00, %292
%308 = fdiv float 1.000000e+00, %293
%309 = fdiv float 1.000000e+00, %294
%310 = fmul float %304, %307
%311 = fmul float %305, %308
%312 = fmul float %306, %309
%313 = fcmp ogt float %292, 0.000000e+00
%314 = fcmp ogt float %293, 0.000000e+00
%315 = fcmp ogt float %294, 0.000000e+00
%. = select i1 %313, float %301, float %310
%temp68.0 = select i1 %314, float %302, float %311
%.100 = select i1 %315, float %303, float %312
%316 = fadd float %44, %47
%317 = fadd float %45, %48
%318 = fadd float %46, %49
%319 = fmul float %316, 5.000000e-01
%320 = fmul float %317, 5.000000e-01
%321 = fmul float %318, 5.000000e-01
%322 = call float @llvm.minnum.f32(float %., float %temp68.0)
%323 = call float @llvm.minnum.f32(float %322, float %.100)
%324 = fsub float %319, %285
%325 = fsub float %320, %284
%326 = fsub float %321, %283
%327 = fadd float %324, %140
%328 = fadd float %325, %141
%329 = fadd float %326, %142
%330 = fmul float %292, %323
%331 = fadd float %330, %327
%332 = fmul float %293, %323
%333 = fadd float %332, %328
%334 = fmul float %294, %323
%335 = fadd float %334, %329
%336 = fsub float %331, %319
%337 = fsub float %333, %320
%338 = fsub float %335, %321
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp44.0 = phi float [ %336, %IF ], [ %279, %main_body ]
%temp45.0 = phi float [ %337, %IF ], [ %280, %main_body ]
%temp46.0 = phi float [ %338, %IF ], [ %281, %main_body ]
%339 = fsub float 1.000000e+00, %83
%340 = call float @llvm.pow.f32(float %339, float 7.500000e-01)
%341 = fmul float %340, 7.000000e+00
%342 = insertelement <4 x float> undef, float %temp44.0, i32 0
%343 = insertelement <4 x float> %342, float %temp45.0, i32 1
%344 = insertelement <4 x float> %343, float %temp46.0, i32 2
%345 = insertelement <4 x float> %344, float %341, i32 3
%346 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %345)
%347 = extractelement <4 x float> %346, i32 0
%348 = extractelement <4 x float> %346, i32 1
%349 = extractelement <4 x float> %346, i32 2
%350 = extractelement <4 x float> %346, i32 3
%351 = call float @llvm.fabs.f32(float %349)
%352 = fdiv float 1.000000e+00, %351
%353 = fmul float %347, %352
%354 = fadd float %353, 1.500000e+00
%355 = fmul float %348, %352
%356 = fadd float %355, 1.500000e+00
%357 = bitcast float %356 to i32
%358 = bitcast float %354 to i32
%359 = bitcast float %350 to i32
%360 = bitcast float %341 to i32
%361 = insertelement <4 x i32> undef, i32 %357, i32 0
%362 = insertelement <4 x i32> %361, i32 %358, i32 1
%363 = insertelement <4 x i32> %362, i32 %359, i32 2
%364 = insertelement <4 x i32> %363, i32 %360, i32 3
%365 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %364, <32 x i8> %86, <16 x i8> %88, i32 4)
%366 = extractelement <4 x float> %365, i32 0
%367 = extractelement <4 x float> %365, i32 1
%368 = extractelement <4 x float> %365, i32 2
%369 = extractelement <4 x float> %365, i32 3
%370 = call float @llvm.pow.f32(float %369, float %53)
%371 = fmul float %52, %370
%372 = fmul float %371, %366
%373 = fmul float %371, %367
%374 = fmul float %371, %368
%375 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %375, label %IF86, label %ENDIF85
IF86: ; preds = %ENDIF
%376 = fcmp ogt float %63, 0.000000e+00
br i1 %376, label %IF89, label %ENDIF88
ENDIF85: ; preds = %ENDIF, %ENDIF88
%temp28.0 = phi float [ %604, %ENDIF88 ], [ %372, %ENDIF ]
%temp29.0 = phi float [ %605, %ENDIF88 ], [ %373, %ENDIF ]
%temp30.0 = phi float [ %606, %ENDIF88 ], [ %374, %ENDIF ]
%377 = fmul float %temp28.0, %226
%378 = fmul float %temp29.0, %226
%379 = fmul float %temp30.0, %226
%380 = fsub float 1.000000e+00, %83
%381 = fsub float %24, %204
%382 = fsub float %25, %205
%383 = fsub float %26, %206
%384 = fmul float %381, %381
%385 = fmul float %382, %382
%386 = fadd float %385, %384
%387 = fmul float %383, %383
%388 = fadd float %386, %387
%389 = call float @llvm.AMDGPU.rsq.clamped.f32(float %388)
%390 = fmul float %381, %389
%391 = fmul float %382, %389
%392 = fmul float %383, %389
%393 = fmul float %204, %195
%394 = fsub float -0.000000e+00, %393
%395 = fmul float %205, %196
%396 = fsub float %394, %395
%397 = fmul float %206, %197
%398 = fsub float %396, %397
%399 = call float @llvm.maxnum.f32(float %398, float 0.000000e+00)
%400 = fmul float %24, %390
%401 = fmul float %25, %391
%402 = fadd float %401, %400
%403 = fmul float %26, %392
%404 = fadd float %402, %403
%405 = call float @llvm.maxnum.f32(float %404, float 0.000000e+00)
%406 = fmul float %380, %380
%407 = fmul float %406, %75
%408 = fsub float 1.000000e+00, %380
%409 = fmul float %408, 0x3FEEF9DB20000000
%410 = fadd float %409, 0x3F9EB851E0000000
%411 = call float @llvm.log2.f32(float %410)
%412 = fdiv float 1.000000e+00, %411
%413 = fmul float %412, 1.000000e+01
%414 = fmul float %413, %413
%415 = fsub float 1.000000e+00, %232
%416 = fsub float 1.000000e+00, %399
%417 = fmul float %405, 2.000000e+00
%418 = fmul float %405, %380
%419 = fmul float %417, %418
%420 = fadd float %419, 5.000000e-01
%421 = fsub float 1.000000e+00, %405
%422 = fsub float 1.000000e+00, %399
%423 = fsub float 1.000000e+00, %214
%424 = fadd float %83, %423
%425 = call float @llvm.AMDIL.clamp.(float %424, float 0.000000e+00, float 1.000000e+00)
%426 = fmul float %422, %422
%427 = fmul float %422, %422
%428 = fmul float %427, %422
%429 = fmul float %426, %428
%430 = call float @llvm.AMDGPU.lrp(float %429, float %425, float %210)
%431 = call float @llvm.AMDGPU.lrp(float %429, float %425, float %211)
%432 = call float @llvm.AMDGPU.lrp(float %429, float %425, float %212)
%433 = call float @llvm.AMDGPU.lrp(float %232, float 1.000000e+00, float %407)
%434 = call float @llvm.AMDGPU.lrp(float %399, float 1.000000e+00, float %407)
%435 = fmul float %433, %434
%436 = fadd float %435, 0x3F1A36E2E0000000
%437 = fdiv float 1.000000e+00, %436
%438 = fmul float %195, %390
%439 = fmul float %196, %391
%440 = fadd float %439, %438
%441 = fmul float %197, %392
%442 = fadd float %440, %441
%443 = call float @llvm.maxnum.f32(float %442, float 0.000000e+00)
%444 = call float @llvm.pow.f32(float %443, float %414)
%445 = fadd float %414, 1.000000e+00
%446 = fmul float %445, %74
%447 = fmul float %444, %446
%448 = fmul float %437, %447
%449 = fmul float %448, %232
%450 = fmul float %449, %73
%451 = call float @llvm.maxnum.f32(float %450, float 0.000000e+00)
%452 = fmul float %451, %262
%453 = fmul float %451, %263
%454 = fmul float %451, %264
%455 = fsub float 1.000000e+00, %210
%456 = fsub float 1.000000e+00, %211
%457 = fsub float 1.000000e+00, %212
%458 = fmul float %421, %421
%459 = fmul float %421, %421
%460 = fmul float %459, %421
%461 = fmul float %458, %460
%462 = fmul float %455, %461
%463 = fadd float %462, %210
%464 = fmul float %456, %461
%465 = fadd float %464, %211
%466 = fmul float %457, %461
%467 = fadd float %466, %212
%468 = fadd float %420, -1.000000e+00
%469 = fmul float %415, %415
%470 = fmul float %415, %415
%471 = fmul float %470, %415
%472 = fmul float %469, %471
%473 = fmul float %468, %472
%474 = fadd float %473, 1.000000e+00
%475 = fadd float %420, -1.000000e+00
%476 = fmul float %416, %416
%477 = fmul float %416, %416
%478 = fmul float %477, %416
%479 = fmul float %476, %478
%480 = fmul float %475, %479
%481 = fadd float %480, 1.000000e+00
%482 = fmul float %474, %481
%483 = fmul float %482, %232
%484 = fmul float %262, %483
%485 = fadd float %484, %265
%486 = fmul float %263, %483
%487 = fadd float %486, %266
%488 = fmul float %264, %483
%489 = fadd float %488, %267
%490 = fmul float %215, %485
%491 = fmul float %216, %487
%492 = fmul float %217, %489
%493 = fmul float %452, %463
%494 = fadd float %493, %490
%495 = fmul float %453, %465
%496 = fadd float %495, %491
%497 = fmul float %454, %467
%498 = fadd float %497, %492
%499 = fmul float %377, %430
%500 = fadd float %499, %494
%501 = fmul float %378, %431
%502 = fadd float %501, %496
%503 = fmul float %379, %432
%504 = fadd float %503, %498
%505 = fmul float %136, %42
%506 = fadd float %505, %43
%507 = call float @llvm.AMDIL.clamp.(float %506, float 0.000000e+00, float 1.000000e+00)
%508 = call float @llvm.AMDGPU.lrp(float %507, float %500, float %39)
%509 = call float @llvm.AMDGPU.lrp(float %507, float %502, float %40)
%510 = call float @llvm.AMDGPU.lrp(float %507, float %504, float %41)
%511 = call i32 @llvm.SI.packf16(float %508, float %509)
%512 = bitcast i32 %511 to float
%513 = call i32 @llvm.SI.packf16(float %510, float 1.000000e+00)
%514 = bitcast i32 %513 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %512, float %514, float %512, float %514)
ret void
IF89: ; preds = %IF86
%515 = fmul float %279, %279
%516 = fmul float %280, %280
%517 = fadd float %516, %515
%518 = fmul float %281, %281
%519 = fadd float %517, %518
%520 = call float @llvm.AMDGPU.rsq.clamped.f32(float %519)
%521 = fmul float %279, %520
%522 = fmul float %280, %520
%523 = fmul float %281, %520
%524 = fsub float %54, %140
%525 = fsub float %55, %141
%526 = fsub float %56, %142
%527 = fdiv float 1.000000e+00, %521
%528 = fdiv float 1.000000e+00, %522
%529 = fdiv float 1.000000e+00, %523
%530 = fmul float %524, %527
%531 = fmul float %525, %528
%532 = fmul float %526, %529
%533 = fsub float %57, %140
%534 = fsub float %58, %141
%535 = fsub float %59, %142
%536 = fdiv float 1.000000e+00, %521
%537 = fdiv float 1.000000e+00, %522
%538 = fdiv float 1.000000e+00, %523
%539 = fmul float %533, %536
%540 = fmul float %534, %537
%541 = fmul float %535, %538
%542 = fcmp ogt float %521, 0.000000e+00
%543 = fcmp ogt float %522, 0.000000e+00
%544 = fcmp ogt float %523, 0.000000e+00
%.101 = select i1 %542, float %530, float %539
%temp68.1 = select i1 %543, float %531, float %540
%.102 = select i1 %544, float %532, float %541
%545 = fadd float %54, %57
%546 = fadd float %55, %58
%547 = fadd float %56, %59
%548 = fmul float %545, 5.000000e-01
%549 = fmul float %546, 5.000000e-01
%550 = fmul float %547, 5.000000e-01
%551 = call float @llvm.minnum.f32(float %.101, float %temp68.1)
%552 = call float @llvm.minnum.f32(float %551, float %.102)
%553 = fsub float %548, %60
%554 = fsub float %549, %61
%555 = fsub float %550, %62
%556 = fadd float %553, %140
%557 = fadd float %554, %141
%558 = fadd float %555, %142
%559 = fmul float %521, %552
%560 = fadd float %559, %556
%561 = fmul float %522, %552
%562 = fadd float %561, %557
%563 = fmul float %523, %552
%564 = fadd float %563, %558
%565 = fsub float %560, %548
%566 = fsub float %562, %549
%567 = fsub float %564, %550
br label %ENDIF88
ENDIF88: ; preds = %IF86, %IF89
%temp48.0 = phi float [ %565, %IF89 ], [ %279, %IF86 ]
%temp49.0 = phi float [ %566, %IF89 ], [ %280, %IF86 ]
%temp50.0 = phi float [ %567, %IF89 ], [ %281, %IF86 ]
%568 = fsub float 1.000000e+00, %83
%569 = call float @llvm.pow.f32(float %568, float 7.500000e-01)
%570 = fmul float %569, 7.000000e+00
%571 = insertelement <4 x float> undef, float %temp48.0, i32 0
%572 = insertelement <4 x float> %571, float %temp49.0, i32 1
%573 = insertelement <4 x float> %572, float %temp50.0, i32 2
%574 = insertelement <4 x float> %573, float %570, i32 3
%575 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %574)
%576 = extractelement <4 x float> %575, i32 0
%577 = extractelement <4 x float> %575, i32 1
%578 = extractelement <4 x float> %575, i32 2
%579 = extractelement <4 x float> %575, i32 3
%580 = call float @llvm.fabs.f32(float %578)
%581 = fdiv float 1.000000e+00, %580
%582 = fmul float %576, %581
%583 = fadd float %582, 1.500000e+00
%584 = fmul float %577, %581
%585 = fadd float %584, 1.500000e+00
%586 = bitcast float %585 to i32
%587 = bitcast float %583 to i32
%588 = bitcast float %579 to i32
%589 = bitcast float %570 to i32
%590 = insertelement <4 x i32> undef, i32 %586, i32 0
%591 = insertelement <4 x i32> %590, i32 %587, i32 1
%592 = insertelement <4 x i32> %591, i32 %588, i32 2
%593 = insertelement <4 x i32> %592, i32 %589, i32 3
%594 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %593, <32 x i8> %91, <16 x i8> %94, i32 4)
%595 = extractelement <4 x float> %594, i32 0
%596 = extractelement <4 x float> %594, i32 1
%597 = extractelement <4 x float> %594, i32 2
%598 = extractelement <4 x float> %594, i32 3
%599 = call float @llvm.pow.f32(float %598, float %65)
%600 = fmul float %64, %599
%601 = fmul float %600, %595
%602 = fmul float %600, %596
%603 = fmul float %600, %597
%604 = call float @llvm.AMDGPU.lrp(float %50, float %372, float %601)
%605 = call float @llvm.AMDGPU.lrp(float %50, float %373, float %602)
%606 = call float @llvm.AMDGPU.lrp(float %50, float %374, float %603)
br label %ENDIF85
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000
v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001
v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100
v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800
v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801
v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900
v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901
v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00
v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01
v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00
v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01
v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00
v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01
v_interp_p1_f32 v19, v0, 2, 3, [m0] ; C84C0E00
v_interp_p2_f32 v19, [v19], v1, 2, 3, [m0] ; C84D0E01
v_interp_p1_f32 v2, v0, 0, 4, [m0] ; C8081000
v_interp_p2_f32 v2, [v2], v1, 0, 4, [m0] ; C8091001
v_interp_p1_f32 v3, v0, 1, 4, [m0] ; C80C1100
v_interp_p2_f32 v3, [v3], v1, 1, 4, [m0] ; C80D1101
v_interp_p1_f32 v4, v0, 2, 4, [m0] ; C8101200
v_interp_p2_f32 v4, [v4], v1, 2, 4, [m0] ; C8111201
v_interp_p1_f32 v21, v0, 0, 5, [m0] ; C8541400
v_interp_p2_f32 v21, [v21], v1, 0, 5, [m0] ; C8551401
v_interp_p1_f32 v22, v0, 1, 5, [m0] ; C8581500
v_interp_p2_f32 v22, [v22], v1, 1, 5, [m0] ; C8591501
v_interp_p1_f32 v23, v0, 3, 5, [m0] ; C85C1700
v_interp_p2_f32 v23, [v23], v1, 3, 5, [m0] ; C85D1701
v_interp_p1_f32 v5, v0, 0, 6, [m0] ; C8141800
v_interp_p2_f32 v5, [v5], v1, 0, 6, [m0] ; C8151801
v_interp_p1_f32 v24, v0, 1, 6, [m0] ; C8601900
v_interp_p2_f32 v24, [v24], v1, 1, 6, [m0] ; C8611901
v_interp_p1_f32 v28, v0, 2, 6, [m0] ; C8701A00
v_interp_p2_f32 v28, [v28], v1, 2, 6, [m0] ; C8711A01
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p1_f32 v29, v0, 3, 6, [m0] ; C8741B00
v_interp_p2_f32 v29, [v29], v1, 3, 6, [m0] ; C8751B01
v_interp_p1_f32 v25, v0, 0, 7, [m0] ; C8641C00
v_interp_p2_f32 v25, [v25], v1, 0, 7, [m0] ; C8651C01
v_interp_p1_f32 v20, v0, 1, 7, [m0] ; C8501D00
v_interp_p2_f32 v20, [v20], v1, 1, 7, [m0] ; C8511D01
v_interp_p1_f32 v26, v0, 2, 7, [m0] ; C8681E00
s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508
s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710
v_interp_p2_f32 v26, [v26], v1, 2, 7, [m0] ; C8691E01
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[8:11], 0x4f ; C206094F
s_buffer_load_dword s13, s[8:11], 0x50 ; C2068950
s_load_dwordx4 s[32:35], s[4:5], 0xc ; C090050C
s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510
s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514
s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718
s_load_dwordx8 s[36:43], s[6:7], 0x20 ; C0D20720
s_load_dwordx8 s[44:51], s[6:7], 0x28 ; C0D60728
image_sample v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[0:3] ; F0800F00 00061E10
s_buffer_load_dword s1, s[8:11], 0x4c ; C200894C
s_buffer_load_dword s2, s[8:11], 0x4d ; C201094D
s_buffer_load_dword s3, s[8:11], 0x4e ; C201894E
s_buffer_load_dword s14, s[8:11], 0x5c ; C207095C
s_buffer_load_dword s26, s[8:11], 0x60 ; C20D0960
s_buffer_load_dword s0, s[8:11], 0x64 ; C2000964
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s12, v33 ; 1000420C
v_cmp_gt_f32_e32 vcc, s13, v0 ; 7C08000D
v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080
s_cbranch_execnz 3 ; BF890003
exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000
s_endpgm ; BF810000
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[52:59], s[32:35] ; F0800A00 010D0010
v_mul_f32_e32 v13, s1, v30 ; 101A3C01
v_mul_f32_e32 v14, s2, v31 ; 101C3E02
v_mul_f32_e32 v15, s3, v32 ; 101E4003
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
v_mul_f32_e32 v1, s14, v1 ; 1002020E
v_mul_f32_e32 v0, s14, v0 ; 1000000E
v_mul_f32_e32 v6, v6, v1 ; 100C0306
v_mac_f32_e32 v6, v9, v0 ; 3E0C0109
v_mul_f32_e32 v7, v7, v1 ; 100E0307
v_mac_f32_e32 v7, v10, v0 ; 3E0E010A
v_mul_f32_e32 v10, v8, v1 ; 10140308
v_mac_f32_e32 v10, v11, v0 ; 3E14010B
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v6, v12, v0 ; 3E0C010C
v_mac_f32_e32 v7, v18, v0 ; 3E0E0112
v_mac_f32_e32 v10, v19, v0 ; 3E140113
v_mul_f32_e32 v0, v6, v6 ; 10000D06
v_mac_f32_e32 v0, v7, v7 ; 3E000F07
v_mac_f32_e32 v0, v10, v10 ; 3E00150A
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v24, v24 ; 10023118
v_mac_f32_e32 v1, v28, v28 ; 3E02391C
v_mac_f32_e32 v1, v29, v29 ; 3E023B1D
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v9, v0, v6 ; 10120D00
v_mul_f32_e32 v8, v0, v7 ; 10100F00
v_mul_f32_e32 v7, v0, v10 ; 100E1500
v_mul_f32_e32 v11, v1, v24 ; 10163101
v_mul_f32_e32 v10, v1, v28 ; 10143901
v_mul_f32_e32 v0, v11, v9 ; 1000130B
v_mac_f32_e32 v0, v10, v8 ; 3E00110A
v_mul_f32_e32 v12, v1, v29 ; 10183B01
v_mac_f32_e32 v0, v12, v7 ; 3E000F0C
v_mul_f32_e32 v6, v9, v0 ; 100C0109
v_mac_f32_e32 v6, v9, v0 ; 3E0C0109
v_mul_f32_e32 v18, v8, v0 ; 10240108
v_mac_f32_e32 v18, v8, v0 ; 3E240108
v_mad_f32 v27, v24, v1, -v6 ; D282001B 841A0318
v_mad_f32 v28, v28, v1, -v18 ; D282001C 844A031C
v_mov_b32_e32 v6, 0x6f800000 ; 7E0C02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v23|, v6 ; D008016A 00020D17
v_mov_b32_e32 v6, 0x2f800000 ; 7E0C02FF 2F800000
v_cndmask_b32_e32 v6, 1.0, v6 ; 000C0CF2
v_mul_f32_e32 v18, v6, v23 ; 10242F06
v_rcp_f32_e32 v18, v18 ; 7E245512
v_mul_f32_e32 v19, v7, v0 ; 10260107
v_mac_f32_e32 v19, v7, v0 ; 3E260107
v_mad_f32 v29, v29, v1, -v19 ; D282001D 844E031D
v_mul_f32_e32 v0, v18, v21 ; 10002B12
v_mul_f32_e32 v1, v18, v22 ; 10022D12
s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940
s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941
s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942
v_mul_f32_e32 v18, v0, v6 ; 10240D00
v_mul_f32_e32 v19, v1, v6 ; 10260D01
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B
s_buffer_load_dword s32, s[8:11], 0x2c ; C210092C
s_buffer_load_dword s33, s[8:11], 0x2d ; C210892D
v_sub_f32_e64 v0, 1.0, s26 ; D2080000 000034F2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s1, v0 ; 100C0001
v_mul_f32_e32 v1, s2, v0 ; 10020002
v_mul_f32_e32 v0, s3, v0 ; 10000003
v_mac_f32_e32 v6, s26, v13 ; 3E0C1A1A
v_mov_b32_e32 v30, v27 ; 7E3C031B
v_mac_f32_e32 v1, s26, v14 ; 3E021C1A
v_mov_b32_e32 v31, v28 ; 7E3E031C
v_mac_f32_e32 v0, s26, v15 ; 3E001E1A
v_mov_b32_e32 v32, v29 ; 7E40031D
v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80
image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[36:43], s[16:19] ; F0800F00 00891510
image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[44:51], s[20:23] ; F0800F00 00AB1012
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402
s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925
v_mul_f32_e32 v17, v27, v27 ; 1022371B
v_mac_f32_e32 v17, v28, v28 ; 3E22391C
v_mac_f32_e32 v17, v29, v29 ; 3E223B1D
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926
s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928
s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929
s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A
v_mul_f32_e32 v18, v17, v27 ; 10243711
v_mul_f32_e32 v19, v17, v28 ; 10263911
v_mul_f32_e32 v17, v17, v29 ; 10223B11
v_rcp_f32_e32 v21, v18 ; 7E2A5512
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v23, s1, v25 ; 082E3201
v_sub_f32_e32 v24, s2, v20 ; 08302802
v_rcp_f32_e32 v30, v19 ; 7E3C5513
v_mul_f32_e32 v23, v21, v23 ; 102E2F15
v_sub_f32_e32 v31, s13, v25 ; 083E320D
v_mul_f32_e32 v21, v21, v31 ; 102A3F15
v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480
v_cndmask_b32_e32 v21, v21, v23 ; 002A2F15
v_rcp_f32_e32 v23, v17 ; 7E2E5511
v_mul_f32_e32 v24, v30, v24 ; 1030311E
v_sub_f32_e32 v31, s16, v20 ; 083E2810
v_mul_f32_e32 v30, v30, v31 ; 103C3F1E
v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680
v_cndmask_b32_e32 v24, v30, v24 ; 0030311E
v_sub_f32_e32 v30, s3, v26 ; 083C3403
v_mul_f32_e32 v30, v23, v30 ; 103C3D17
v_sub_f32_e32 v31, s17, v26 ; 083E3411
v_mul_f32_e32 v23, v23, v31 ; 102E3F17
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v23, v23, v30 ; 002E3D17
v_min3_f32 v21, v21, v24, v23 ; D2A20015 045E3115
v_mov_b32_e32 v23, s13 ; 7E2E020D
v_add_f32_e32 v23, s1, v23 ; 062E2E01
v_mov_b32_e32 v24, s16 ; 7E300210
v_add_f32_e32 v24, s2, v24 ; 06303002
v_mov_b32_e32 v30, s17 ; 7E3C0211
v_add_f32_e32 v32, s3, v30 ; 06403C03
v_mad_f32 v30, 0.5, v23, -s18 ; D282001E 804A2EF0
v_add_f32_e32 v30, v25, v30 ; 063C3D19
v_mac_f32_e32 v30, v21, v18 ; 3E3C2515
v_mad_f32 v18, 0.5, v24, -s19 ; D2820012 804E30F0
v_add_f32_e32 v18, v20, v18 ; 06242514
v_mac_f32_e32 v18, v21, v19 ; 3E242715
v_mad_f32 v19, 0.5, v32, -s20 ; D2820013 805240F0
v_add_f32_e32 v19, v26, v19 ; 0626271A
v_mac_f32_e32 v19, v21, v17 ; 3E262315
v_mad_f32 v30, 0.5, -v23, v30 ; D282001E 447A2EF0
v_mad_f32 v31, 0.5, -v24, v18 ; D282001F 444A30F0
v_mad_f32 v32, 0.5, -v32, v19 ; D2820020 444E40F0
s_or_b64 exec, exec, s[14:15] ; 88FE0E7E
s_buffer_load_dword s28, s[8:11], 0x17 ; C20E0917
s_buffer_load_dword s29, s[8:11], 0x43 ; C20E8943
s_buffer_load_dword s27, s[8:11], 0x44 ; C20D8944
s_buffer_load_dword s19, s[8:11], 0x45 ; C2098945
s_buffer_load_dword s16, s[8:11], 0x46 ; C2080946
s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900
s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901
s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902
s_buffer_load_dword s13, s[8:11], 0x4 ; C2068904
s_buffer_load_dword s14, s[8:11], 0x5 ; C2070905
s_buffer_load_dword s15, s[8:11], 0x6 ; C2078906
s_buffer_load_dword s17, s[8:11], 0x7 ; C2088907
s_buffer_load_dword s18, s[8:11], 0x8 ; C2090908
s_buffer_load_dword s20, s[8:11], 0x9 ; C20A0909
s_buffer_load_dword s21, s[8:11], 0xa ; C20A890A
s_buffer_load_dword s22, s[8:11], 0xb ; C20B090B
s_buffer_load_dword s23, s[8:11], 0xc ; C20B890C
s_buffer_load_dword s24, s[8:11], 0xd ; C20C090D
s_buffer_load_dword s25, s[8:11], 0xe ; C20C890E
v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2
v_log_f32_e32 v17, v17 ; 7E224F11
v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v33, 0x40e00000, v17 ; 104222FF 40E00000
v_cubeid_f32 v37, v30, v31, v32 ; D2880025 04823F1E
v_cubema_f32 v36, v30, v31, v32 ; D28E0024 04823F1E
s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500
s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700
v_cubesc_f32 v35, v30, v31, v32 ; D28A0023 04823F1E
v_cubetc_f32 v34, v30, v31, v32 ; D28C0022 04823F1E
v_rcp_f32_e64 v17, |v36| ; D3540111 00000124
v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000
v_mad_f32 v31, v17, v34, v30 ; D282001F 047A4511
v_mac_f32_e32 v30, v17, v35 ; 3E3C4711
v_mov_b32_e32 v32, v37 ; 7E400325
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[40:47], s[36:39] ; F0900F00 012A1E1E
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v17, v33 ; 7E224F21
s_buffer_load_dword s30, s[8:11], 0xf ; C20F090F
s_buffer_load_dword s31, s[8:11], 0x6c ; C20F896C
v_mul_legacy_f32_e32 v17, s33, v17 ; 0E222221
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v17, s32, v17 ; 10222220
v_mul_f32_e32 v19, v30, v17 ; 1026231E
v_mul_f32_e32 v18, v31, v17 ; 1024231F
v_mul_f32_e32 v17, v32, v17 ; 10222320
v_mov_b32_e32 v21, s26 ; 7E2A021A
v_mov_b32_e32 v23, 0x3f7fff58 ; 7E2E02FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v23 ; 7C022E0C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[32:33], vcc ; BEA0246A
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s35, s[8:11], 0x3b ; C211893B
s_buffer_load_dword s26, s[8:11], 0x3c ; C20D093C
s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680
s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424
s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s35, s[8:11], 0x36 ; C2118936
s_buffer_load_dword s38, s[8:11], 0x38 ; C2130938
s_buffer_load_dword s39, s[8:11], 0x39 ; C2138939
s_buffer_load_dword s40, s[8:11], 0x3a ; C214093A
s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930
s_buffer_load_dword s42, s[8:11], 0x31 ; C2150931
s_buffer_load_dword s43, s[8:11], 0x32 ; C2158932
s_buffer_load_dword s44, s[8:11], 0x34 ; C2160934
s_buffer_load_dword s45, s[8:11], 0x35 ; C2168935
v_mul_f32_e32 v23, v27, v27 ; 102E371B
v_mac_f32_e32 v23, v28, v28 ; 3E2E391C
v_mac_f32_e32 v23, v29, v29 ; 3E2E3B1D
v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v24, s35, v26 ; 08303423
v_mov_b32_e32 v30, s35 ; 7E3C0223
v_sub_f32_e32 v31, s41, v25 ; 083E3229
v_sub_f32_e32 v32, s42, v20 ; 0840282A
v_add_f32_e32 v30, s43, v30 ; 063C3C2B
v_sub_f32_e32 v33, s43, v26 ; 0842342B
v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0
v_add_f32_e32 v26, v26, v34 ; 0634451A
v_mul_f32_e32 v27, v23, v27 ; 10363717
v_mul_f32_e32 v28, v23, v28 ; 10383917
v_mul_f32_e32 v23, v23, v29 ; 102E3B17
v_rcp_f32_e32 v29, v27 ; 7E3A551B
v_rcp_f32_e32 v34, v28 ; 7E44551C
v_rcp_f32_e32 v35, v23 ; 7E465517
v_sub_f32_e32 v36, s44, v25 ; 0848322C
v_mov_b32_e32 v37, s44 ; 7E4A022C
v_add_f32_e32 v37, s41, v37 ; 064A4A29
v_mul_f32_e32 v31, v29, v31 ; 103E3F1D
v_mul_f32_e32 v29, v29, v36 ; 103A491D
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v33, v35, v33 ; 10424323
v_mul_f32_e32 v24, v35, v24 ; 10303123
v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0
v_add_f32_e32 v25, v25, v35 ; 06324719
v_sub_f32_e32 v35, s45, v20 ; 0846282D
v_mov_b32_e32 v36, s45 ; 7E48022D
v_mul_f32_e32 v34, v34, v35 ; 10444722
v_add_f32_e32 v35, s42, v36 ; 0646482A
v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680
v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D
v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880
v_cndmask_b32_e32 v31, v34, v32 ; 003E4122
v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80
v_cndmask_b32_e32 v24, v24, v33 ; 00304318
v_min3_f32 v24, v29, v31, v24 ; D2A20018 04623F1D
v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0
v_add_f32_e32 v20, v20, v29 ; 06283B14
v_mac_f32_e32 v25, v24, v27 ; 3E323718
v_mac_f32_e32 v20, v24, v28 ; 3E283918
v_mac_f32_e32 v26, v24, v23 ; 3E342F18
v_mad_f32 v27, 0.5, -v37, v25 ; D282001B 44664AF0
v_mad_f32 v28, 0.5, -v35, v20 ; D282001C 445246F0
v_mad_f32 v29, 0.5, -v30, v26 ; D282001D 446A3CF0
s_or_b64 exec, exec, s[36:37] ; 88FE247E
v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2
v_log_f32_e32 v20, v20 ; 7E284F14
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
v_mul_legacy_f32_e32 v20, 0x3f400000, v20 ; 0E2828FF 3F400000
v_exp_f32_e32 v20, v20 ; 7E284B14
v_mul_f32_e32 v30, 0x40e00000, v20 ; 103C28FF 40E00000
v_cubeid_f32 v26, v27, v28, v29 ; D288001A 0476391B
v_cubema_f32 v25, v27, v28, v29 ; D28E0019 0476391B
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
v_cubesc_f32 v24, v27, v28, v29 ; D28A0018 0476391B
v_cubetc_f32 v23, v27, v28, v29 ; D28C0017 0476391B
v_rcp_f32_e64 v20, |v25| ; D3540114 00000119
v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000
v_mad_f32 v28, v20, v23, v27 ; D282001C 046E2F14
v_mac_f32_e32 v27, v20, v24 ; 3E363114
v_mov_b32_e32 v29, v26 ; 7E3A031A
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A171B
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v20, v26 ; 7E284F1A
v_sub_f32_e64 v26, 1.0, s12 ; D208001A 000018F2
v_mul_legacy_f32_e32 v20, s34, v20 ; 0E282822
v_exp_f32_e32 v20, v20 ; 7E284B14
v_mul_f32_e32 v20, s26, v20 ; 1028281A
v_mul_f32_e32 v23, v23, v20 ; 102E2917
v_mul_f32_e32 v24, v24, v20 ; 10302918
v_mul_f32_e32 v20, v25, v20 ; 10282919
v_mul_f32_e32 v23, v23, v26 ; 102E3517
v_mul_f32_e32 v24, v24, v26 ; 10303518
v_mul_f32_e32 v20, v20, v26 ; 10283514
v_mac_f32_e32 v23, s12, v19 ; 3E2E260C
v_mac_f32_e32 v24, s12, v18 ; 3E30240C
v_mac_f32_e32 v20, s12, v17 ; 3E28220C
v_mov_b32_e32 v17, v20 ; 7E220314
v_mov_b32_e32 v18, v24 ; 7E240318
v_mov_b32_e32 v19, v23 ; 7E260317
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_mad_f32 v24, -v21, s29, s29 ; D2820018 20743B15
v_mov_b32_e32 v20, s28 ; 7E28021C
v_mul_f32_e32 v21, v24, v13 ; 102A1B18
v_mul_f32_e32 v14, v24, v14 ; 101C1D18
v_mul_f32_e32 v13, v24, v15 ; 101A1F18
v_mul_f32_e32 v15, s27, v16 ; 101E201B
v_sub_f32_e64 v25, 1.0, s31 ; D2080019 00003EF2
v_mac_f32_e32 v25, s31, v22 ; 3E322C1F
v_mul_f32_e32 v22, s19, v16 ; 102C2013
v_mul_f32_e32 v16, s16, v16 ; 10202010
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s16, s[8:11], 0x16 ; C2080916
s_buffer_load_dword s7, s[8:11], 0x48 ; C2038948
s_buffer_load_dword s12, s[8:11], 0x49 ; C2060949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_mul_f32_e32 v23, s14, v8 ; 102E100E
v_mac_f32_e32 v23, s13, v9 ; 3E2E120D
v_mac_f32_e32 v23, s15, v7 ; 3E2E0E0F
v_add_f32_e32 v23, s17, v23 ; 062E2E11
v_mul_f32_e32 v26, s20, v8 ; 10341014
v_mac_f32_e32 v26, s18, v9 ; 3E341212
v_mac_f32_e32 v26, s21, v7 ; 3E340E15
v_add_f32_e32 v26, s22, v26 ; 06343416
v_mul_f32_e32 v27, s24, v8 ; 10361018
v_mac_f32_e32 v27, s23, v9 ; 3E361217
v_mac_f32_e32 v27, s25, v7 ; 3E360E19
v_add_f32_e32 v27, s30, v27 ; 0636361E
v_add_f32_e32 v2, v23, v2 ; 06040517
v_add_f32_e32 v3, v26, v3 ; 0606071A
v_add_f32_e32 v26, v27, v4 ; 0634091B
v_mul_f32_e32 v4, s2, v9 ; 10081202
v_mac_f32_e32 v4, s3, v8 ; 3E081003
v_mac_f32_e32 v4, s1, v7 ; 3E080E01
v_max_f32_e32 v23, 0, v4 ; 202E0880
v_mul_f32_e32 v4, v25, v2 ; 10080519
v_mul_f32_e32 v2, v25, v3 ; 10040719
v_mul_f32_e32 v3, v25, v26 ; 10063519
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v20, s16, v5 ; 3E280A10
v_mul_f32_e32 v5, v25, v19 ; 100A2719
v_mul_f32_e32 v18, v25, v18 ; 10242519
v_mul_f32_e32 v17, v25, v17 ; 10222319
v_sub_f32_e32 v19, 1.0, v24 ; 082630F2
v_add_f32_e32 v19, s0, v19 ; 06262600
v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680
v_sub_f32_e32 v24, s2, v11 ; 08301602
v_sub_f32_e32 v25, s3, v10 ; 08321403
v_mul_f32_e32 v26, v24, v24 ; 10343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_sub_f32_e32 v27, s1, v12 ; 08361801
v_mac_f32_e32 v26, v27, v27 ; 3E34371B
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_mul_f32_e32 v26, v26, v27 ; 1034371A
v_mul_f32_e32 v11, v11, v9 ; 1016130B
v_mad_f32 v10, -v10, v8, -v11 ; D282000A A42E110A
v_mad_f32 v10, -v12, v7, v10 ; D282000A 242A0F0C
v_mul_f32_e32 v9, v24, v9 ; 10121318
v_mac_f32_e32 v9, v25, v8 ; 3E121119
v_mul_f32_e32 v8, s2, v24 ; 10103002
v_mac_f32_e32 v8, s3, v25 ; 3E103203
v_mac_f32_e32 v9, v26, v7 ; 3E120F1A
v_mac_f32_e32 v8, s1, v26 ; 3E103401
v_max_f32_e32 v7, 0, v8 ; 200E1080
v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2
v_mul_f32_e32 v11, v8, v8 ; 10161108
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_max_f32_e32 v10, 0, v10 ; 20141480
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mul_f32_e32 v24, v11, v12 ; 1030190B
v_mad_f32 v25, -v12, v24, 1.0 ; D2820019 23CA310C
v_mul_f32_e32 v26, v6, v25 ; 10343306
v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2
v_mac_f32_e32 v6, v8, v27 ; 3E0C3708
v_mul_f32_e32 v27, v1, v25 ; 10363301
v_sub_f32_e32 v28, 1.0, v1 ; 083802F2
v_mac_f32_e32 v1, v8, v28 ; 3E023908
v_mul_f32_e32 v25, v0, v25 ; 10323300
v_sub_f32_e32 v28, 1.0, v0 ; 083800F2
v_mac_f32_e32 v0, v8, v28 ; 3E003908
v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2
v_sub_f32_e32 v28, 1.0, v8 ; 083810F2
v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F
v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9
v_add_f32_e32 v29, v7, v7 ; 063A0F07
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mad_f32 v7, v29, v7, 0.5 ; D2820007 03C20F1D
v_mul_f32_e32 v12, v24, v12 ; 10181918
v_mac_f32_e32 v26, v19, v12 ; 3E341913
v_mac_f32_e32 v27, v19, v12 ; 3E361913
v_mac_f32_e32 v25, v19, v12 ; 3E321913
v_mul_f32_e32 v8, v8, v8 ; 10101108
v_log_f32_e32 v19, v28 ; 7E264F1C
v_mul_f32_e32 v8, s8, v8 ; 10101008
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2
v_rcp_f32_e32 v10, v19 ; 7E145513
v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2
v_mul_f32_e32 v8, v8, v19 ; 10102708
v_mac_f32_e32 v8, 1.0, v23 ; 3E102EF2
v_max_f32_e32 v9, 0, v9 ; 20121280
v_log_f32_e32 v9, v9 ; 7E124F09
v_madak_f32_e32 v8, v8, v11, 0x38d1b717 ; 42101708 38D1B717
v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000
v_mul_f32_e32 v11, v10, v10 ; 1016150A
v_mul_legacy_f32_e32 v9, v11, v9 ; 0E12130B
v_rcp_f32_e32 v8, v8 ; 7E105508
v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A
v_mul_f32_e32 v10, s12, v10 ; 1014140C
v_exp_f32_e32 v9, v9 ; 7E124B09
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_mul_f32_e32 v8, v9, v8 ; 10101109
v_mul_f32_e32 v9, v19, v19 ; 10122713
v_mul_f32_e32 v10, v19, v9 ; 10141313
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_mad_f32 v9, v7, v9, 1.0 ; D2820009 03CA1307
v_mad_f32 v7, v7, v12, 1.0 ; D2820007 03CA1907
v_mul_f32_e32 v7, v7, v9 ; 100E1307
v_mul_f32_e32 v8, v23, v8 ; 10101117
v_mul_f32_e32 v8, s7, v8 ; 10101007
v_mul_f32_e32 v7, v23, v7 ; 100E0F17
v_mac_f32_e32 v4, v7, v15 ; 3E081F07
v_mul_f32_e32 v4, v4, v21 ; 10082B04
v_max_f32_e32 v8, 0, v8 ; 20101080
v_mul_f32_e32 v9, v15, v8 ; 1012110F
v_mac_f32_e32 v4, v6, v9 ; 3E081306
v_mac_f32_e32 v2, v7, v22 ; 3E042D07
v_mac_f32_e32 v3, v7, v16 ; 3E062107
v_mul_f32_e32 v6, v22, v8 ; 100C1116
v_mul_f32_e32 v7, v16, v8 ; 100E1110
v_mul_f32_e32 v2, v2, v14 ; 10041D02
v_mul_f32_e32 v3, v3, v13 ; 10061B03
v_mac_f32_e32 v2, v1, v6 ; 3E040D01
v_mac_f32_e32 v3, v0, v7 ; 3E060F00
v_mac_f32_e32 v4, v26, v5 ; 3E080B1A
v_mac_f32_e32 v2, v27, v18 ; 3E04251B
v_mac_f32_e32 v3, v25, v17 ; 3E062319
v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v5, s6, v1 ; 100A0206
v_mac_f32_e32 v5, v4, v0 ; 3E0A0104
v_mul_f32_e32 v4, s5, v1 ; 10080205
v_mac_f32_e32 v4, v2, v0 ; 3E080102
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v3, v0 ; 3E020103
v_cvt_pkrtz_f16_f32_e32 v0, v5, v4 ; 5E000905
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 40
Code Size: 2420 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[6], IN[0].xxxx
1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[10].xxxx
18: MOV TEMP[3].y, CONST[11].xxxx
19: MOV TEMP[3].z, CONST[12].xxxx
20: MOV TEMP[4].x, CONST[10].yyyy
21: MOV TEMP[4].y, CONST[11].yyyy
22: MOV TEMP[4].z, CONST[12].yyyy
23: MOV TEMP[5].x, CONST[10].zzzz
24: MOV TEMP[5].y, CONST[11].zzzz
25: MOV TEMP[5].z, CONST[12].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MOV TEMP[4].xyz, TEMP[3].xyzx
33: MUL TEMP[5].xyw, TEMP[1], IMM[0].yyyy
34: MOV TEMP[6].x, TEMP[5].xxxx
35: MUL TEMP[7].x, TEMP[5].yyyy, CONST[1].xxxx
36: MOV TEMP[6].y, TEMP[7].xxxx
37: ADD TEMP[5].xy, TEMP[6].xyyy, TEMP[5].wwww
38: MOV TEMP[5].zw, TEMP[1].wwzw
39: MUL TEMP[6], TEMP[3].xyzz, TEMP[3].yzzx
40: DP4 TEMP[7].x, CONST[2], TEMP[6]
41: DP4 TEMP[8].x, CONST[3], TEMP[6]
42: MOV TEMP[7].y, TEMP[8].xxxx
43: DP4 TEMP[6].x, CONST[4], TEMP[6]
44: MOV TEMP[7].z, TEMP[6].xxxx
45: MUL TEMP[6].x, TEMP[3].yyyy, TEMP[3].yyyy
46: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[6].xxxx
47: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[7].xyzz
48: ADD TEMP[6].xyz, TEMP[0].xyzz, -CONST[0].xyzz
49: MOV TEMP[6].yzw, TEMP[6].yxyz
50: MOV TEMP[6].x, TEMP[1].zzzz
51: MOV TEMP[0].xyz, TEMP[0].xyzx
52: MOV OUT[6], TEMP[0]
53: MOV OUT[1], TEMP[2]
54: MOV OUT[2], TEMP[4]
55: MOV OUT[3], TEMP[3]
56: MOV OUT[4], TEMP[5]
57: MOV OUT[0], TEMP[1]
58: MOV OUT[5], TEMP[6]
59: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = add i32 %5, %7
%105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = fmul float %32, %85
%109 = fmul float %33, %85
%110 = fmul float %34, %85
%111 = fmul float %35, %85
%112 = fmul float %36, %86
%113 = fadd float %112, %108
%114 = fmul float %37, %86
%115 = fadd float %114, %109
%116 = fmul float %38, %86
%117 = fadd float %116, %110
%118 = fmul float %39, %86
%119 = fadd float %118, %111
%120 = fmul float %40, %87
%121 = fadd float %120, %113
%122 = fmul float %41, %87
%123 = fadd float %122, %115
%124 = fmul float %42, %87
%125 = fadd float %124, %117
%126 = fmul float %43, %87
%127 = fadd float %126, %119
%128 = fmul float %44, %88
%129 = fadd float %128, %121
%130 = fmul float %45, %88
%131 = fadd float %130, %123
%132 = fmul float %46, %88
%133 = fadd float %132, %125
%134 = fmul float %65, %85
%135 = fmul float %66, %85
%136 = fmul float %67, %85
%137 = fmul float %68, %85
%138 = fmul float %69, %86
%139 = fadd float %138, %134
%140 = fmul float %70, %86
%141 = fadd float %140, %135
%142 = fmul float %71, %86
%143 = fadd float %142, %136
%144 = fmul float %72, %86
%145 = fadd float %144, %137
%146 = fmul float %73, %87
%147 = fadd float %146, %139
%148 = fmul float %74, %87
%149 = fadd float %148, %141
%150 = fmul float %75, %87
%151 = fadd float %150, %143
%152 = fmul float %76, %87
%153 = fadd float %152, %145
%154 = fmul float %77, %88
%155 = fadd float %154, %147
%156 = fmul float %78, %88
%157 = fadd float %156, %149
%158 = fmul float %79, %88
%159 = fadd float %158, %151
%160 = fmul float %80, %88
%161 = fadd float %160, %153
%162 = fmul float %100, %56
%163 = fadd float %162, %58
%164 = fmul float %101, %57
%165 = fadd float %164, %59
%166 = fcmp oeq float %64, 0.000000e+00
%. = select i1 %166, float %100, float %106
%.36 = select i1 %166, float %101, float %107
%167 = fmul float %., %60
%168 = fadd float %167, %62
%169 = fmul float %.36, %61
%170 = fadd float %169, %63
%171 = fmul float %47, %93
%172 = fmul float %50, %93
%173 = fmul float %53, %93
%174 = fmul float %48, %94
%175 = fadd float %174, %171
%176 = fmul float %51, %94
%177 = fadd float %176, %172
%178 = fmul float %54, %94
%179 = fadd float %178, %173
%180 = fmul float %49, %95
%181 = fadd float %180, %175
%182 = fmul float %52, %95
%183 = fadd float %182, %177
%184 = fmul float %55, %95
%185 = fadd float %184, %179
%186 = fmul float %181, %181
%187 = fmul float %183, %183
%188 = fadd float %187, %186
%189 = fmul float %185, %185
%190 = fadd float %188, %189
%191 = call float @llvm.AMDGPU.rsq.clamped.f32(float %190)
%192 = fmul float %181, %191
%193 = fmul float %183, %191
%194 = fmul float %185, %191
%195 = fmul float %155, 5.000000e-01
%196 = fmul float %157, 5.000000e-01
%197 = fmul float %161, 5.000000e-01
%198 = fmul float %196, %16
%199 = fadd float %195, %197
%200 = fadd float %198, %197
%201 = fmul float %192, %193
%202 = fmul float %193, %194
%203 = fmul float %194, %194
%204 = fmul float %194, %192
%205 = fmul float %17, %201
%206 = fmul float %18, %202
%207 = fadd float %205, %206
%208 = fmul float %19, %203
%209 = fadd float %207, %208
%210 = fmul float %20, %204
%211 = fadd float %209, %210
%212 = fmul float %21, %201
%213 = fmul float %22, %202
%214 = fadd float %212, %213
%215 = fmul float %23, %203
%216 = fadd float %214, %215
%217 = fmul float %24, %204
%218 = fadd float %216, %217
%219 = fmul float %25, %201
%220 = fmul float %26, %202
%221 = fadd float %219, %220
%222 = fmul float %27, %203
%223 = fadd float %221, %222
%224 = fmul float %28, %204
%225 = fadd float %223, %224
%226 = fmul float %193, %193
%227 = fmul float %192, %192
%228 = fsub float %227, %226
%229 = fmul float %29, %228
%230 = fadd float %229, %211
%231 = fmul float %30, %228
%232 = fadd float %231, %218
%233 = fmul float %31, %228
%234 = fadd float %233, %225
%235 = fsub float %129, %13
%236 = fsub float %131, %14
%237 = fsub float %133, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %163, float %165, float %168, float %170)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %192, float %193, float %194, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %230, float %232, float %234, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %159, float %161)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %159, float %235, float %236, float %237)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %129, float %131, float %133, float %127)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %155, float %157, float %159, float %161)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s19, s[20:23], 0x23 ; C2099523
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[8:11], 0 idxen ; E00C2000 80020B00
s_buffer_load_dword s24, s[20:23], 0x24 ; C20C1524
s_buffer_load_dword s25, s[20:23], 0x25 ; C20C9525
s_buffer_load_dword s26, s[20:23], 0x26 ; C20D1526
s_buffer_load_dword s27, s[20:23], 0x28 ; C20D9528
s_buffer_load_dword s28, s[20:23], 0x29 ; C20E1529
s_buffer_load_dword s29, s[20:23], 0x2a ; C20E952A
s_buffer_load_dword s30, s[20:23], 0x2c ; C20F152C
s_buffer_load_dword s31, s[20:23], 0x2d ; C20F952D
s_buffer_load_dword s32, s[20:23], 0x2e ; C210152E
s_buffer_load_dword s33, s[20:23], 0x30 ; C2109530
s_buffer_load_dword s34, s[20:23], 0x31 ; C2111531
s_buffer_load_dword s35, s[20:23], 0x32 ; C2119532
s_buffer_load_dword s36, s[20:23], 0x38 ; C2121538
s_buffer_load_dword s37, s[20:23], 0x39 ; C2129539
s_buffer_load_dword s11, s[20:23], 0x9 ; C2059509
s_buffer_load_dword s5, s[20:23], 0xa ; C202950A
s_buffer_load_dword s3, s[20:23], 0xb ; C201950B
s_buffer_load_dword s9, s[20:23], 0xc ; C204950C
s_buffer_load_dword s12, s[20:23], 0xd ; C206150D
s_buffer_load_dword s7, s[20:23], 0xe ; C203950E
s_buffer_load_dword s4, s[20:23], 0xf ; C202150F
s_buffer_load_dword s10, s[20:23], 0x10 ; C2051510
s_buffer_load_dword s13, s[20:23], 0x11 ; C2069511
s_buffer_load_dword s8, s[20:23], 0x12 ; C2041512
s_buffer_load_dword s0, s[20:23], 0x3f ; C200153F
s_buffer_load_dword s1, s[20:23], 0x40 ; C2009540
s_buffer_load_dword s38, s[20:23], 0x44 ; C2131544
s_buffer_load_dword s39, s[20:23], 0x45 ; C2139545
s_buffer_load_dword s40, s[20:23], 0x46 ; C2141546
s_buffer_load_dword s41, s[20:23], 0x47 ; C2149547
s_buffer_load_dword s42, s[20:23], 0x48 ; C2151548
s_buffer_load_dword s43, s[20:23], 0x49 ; C2159549
s_buffer_load_dword s44, s[20:23], 0x4a ; C216154A
s_buffer_load_dword s45, s[20:23], 0x4b ; C216954B
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s0 ; 7E000200
s_buffer_load_dword s0, s[20:23], 0x0 ; C2001500
v_cmp_eq_f32_e64 vcc, 0, s1 ; D004006A 00000280
s_buffer_load_dword s1, s[20:23], 0x1 ; C2009501
s_buffer_load_dword s2, s[20:23], 0x2 ; C2011502
s_buffer_load_dword s6, s[20:23], 0x4 ; C2031504
s_buffer_load_dword s14, s[20:23], 0x8 ; C2071508
s_buffer_load_dword s46, s[20:23], 0x3a ; C217153A
s_buffer_load_dword s47, s[20:23], 0x3b ; C217953B
s_buffer_load_dword s48, s[20:23], 0x3c ; C218153C
s_buffer_load_dword s49, s[20:23], 0x3d ; C218953D
s_buffer_load_dword s50, s[20:23], 0x3e ; C219153E
s_buffer_load_dword s18, s[20:23], 0x13 ; C2091513
s_buffer_load_dword s15, s[20:23], 0x14 ; C2079514
s_buffer_load_dword s16, s[20:23], 0x15 ; C2081515
s_buffer_load_dword s17, s[20:23], 0x16 ; C2089516
s_buffer_load_dword s51, s[20:23], 0x18 ; C2199518
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v13, s46 ; 7E1A022E
s_buffer_load_dword s46, s[20:23], 0x19 ; C2171519
s_buffer_load_dword s52, s[20:23], 0x1a ; C21A151A
s_buffer_load_dword s53, s[20:23], 0x1b ; C21A951B
s_buffer_load_dword s54, s[20:23], 0x1c ; C21B151C
s_buffer_load_dword s55, s[20:23], 0x1d ; C21B951D
s_buffer_load_dword s56, s[20:23], 0x1e ; C21C151E
s_buffer_load_dword s57, s[20:23], 0x1f ; C21C951F
s_buffer_load_dword s58, s[20:23], 0x20 ; C21D1520
s_buffer_load_dword s59, s[20:23], 0x21 ; C21D9521
s_buffer_load_dword s60, s[20:23], 0x22 ; C21E1522
s_buffer_load_dword s61, s[20:23], 0x4c ; C21E954C
s_buffer_load_dword s62, s[20:23], 0x4d ; C21F154D
s_buffer_load_dword s63, s[20:23], 0x4e ; C21F954E
s_buffer_load_dword s64, s[20:23], 0x4f ; C220154F
s_buffer_load_dword s65, s[20:23], 0x50 ; C2209550
s_buffer_load_dword s66, s[20:23], 0x51 ; C2211551
s_buffer_load_dword s67, s[20:23], 0x52 ; C2219552
s_buffer_load_dword s20, s[20:23], 0x53 ; C20A1553
v_mac_f32_e32 v13, s36, v9 ; 3E1A1224
v_mov_b32_e32 v14, s47 ; 7E1C022F
v_mul_f32_e32 v15, s51, v2 ; 101E0433
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v16, s46, v2 ; 1020042E
v_mul_f32_e32 v17, s27, v6 ; 10220C1B
v_mul_f32_e32 v18, s30, v6 ; 10240C1E
v_mul_f32_e32 v6, s33, v6 ; 100C0C21
v_mac_f32_e32 v17, s28, v7 ; 3E220E1C
v_mac_f32_e32 v18, s31, v7 ; 3E240E1F
v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22
v_mac_f32_e32 v17, s29, v8 ; 3E22101D
v_mac_f32_e32 v18, s32, v8 ; 3E241020
v_mac_f32_e32 v6, s35, v8 ; 3E0C1023
v_mul_f32_e32 v7, s52, v2 ; 100E0434
v_mul_f32_e32 v8, s53, v2 ; 10100435
v_mul_f32_e32 v19, s38, v2 ; 10260426
v_mac_f32_e32 v14, s37, v10 ; 3E1C1425
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mac_f32_e32 v15, s54, v3 ; 3E1E0636
v_mac_f32_e32 v16, s55, v3 ; 3E200637
v_mac_f32_e32 v7, s56, v3 ; 3E0E0638
v_mac_f32_e32 v8, s57, v3 ; 3E100639
v_mac_f32_e32 v19, s42, v3 ; 3E26062A
v_mul_f32_e32 v11, s39, v2 ; 10160427
v_mac_f32_e32 v11, s43, v3 ; 3E16062B
v_mul_f32_e32 v12, s40, v2 ; 10180428
v_mac_f32_e32 v12, s44, v3 ; 3E18062C
v_mul_f32_e32 v2, s41, v2 ; 10040429
v_mac_f32_e32 v2, s45, v3 ; 3E04062D
v_mac_f32_e32 v15, s58, v4 ; 3E1E083A
v_mac_f32_e32 v16, s59, v4 ; 3E20083B
v_mac_f32_e32 v7, s60, v4 ; 3E0E083C
v_mac_f32_e32 v8, s19, v4 ; 3E100813
v_mac_f32_e32 v19, s61, v4 ; 3E26083D
v_mac_f32_e32 v11, s62, v4 ; 3E16083E
v_mac_f32_e32 v12, s63, v4 ; 3E18083F
v_mac_f32_e32 v2, s64, v4 ; 3E040840
v_mac_f32_e32 v15, s24, v5 ; 3E1E0A18
v_mac_f32_e32 v16, s25, v5 ; 3E200A19
v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A
v_mac_f32_e32 v19, s65, v5 ; 3E260A41
v_mac_f32_e32 v11, s66, v5 ; 3E160A42
v_mac_f32_e32 v12, s67, v5 ; 3E180A43
v_mac_f32_e32 v2, s20, v5 ; 3E040A14
v_mov_b32_e32 v3, s50 ; 7E060232
v_mul_f32_e32 v4, v17, v17 ; 10082311
v_mac_f32_e32 v4, v18, v18 ; 3E082512
v_mac_f32_e32 v4, v6, v6 ; 3E080D06
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
v_mac_f32_e32 v3, s48, v9 ; 3E061230
v_mac_f32_e32 v0, s49, v10 ; 3E001431
exp 15, 32, 0, 0, 0, v13, v14, v3, v0 ; F800020F 00030E0D
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v4, v17 ; 10002304
v_mul_f32_e32 v3, v4, v18 ; 10062504
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v5, v4, v3 ; 100A0704
v_mul_f32_e32 v6, s11, v5 ; 100C0A0B
v_mul_f32_e32 v9, s12, v5 ; 10120A0C
v_mul_f32_e32 v5, s13, v5 ; 100A0A0D
v_mul_f32_e32 v10, v3, v0 ; 10140103
v_mac_f32_e32 v6, s14, v10 ; 3E0C140E
v_mac_f32_e32 v9, s9, v10 ; 3E121409
v_mac_f32_e32 v5, s10, v10 ; 3E0A140A
v_mul_f32_e32 v10, v4, v4 ; 10140904
v_mac_f32_e32 v6, s5, v10 ; 3E0C1405
v_mac_f32_e32 v9, s7, v10 ; 3E121407
v_mac_f32_e32 v5, s8, v10 ; 3E0A1408
v_mul_f32_e32 v10, v0, v4 ; 10140900
v_mac_f32_e32 v6, s3, v10 ; 3E0C1403
v_mac_f32_e32 v9, s4, v10 ; 3E121404
v_mac_f32_e32 v5, s18, v10 ; 3E0A1412
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, v3, v3 ; 10060703
v_mad_f32 v0, v0, v0, -v3 ; D2820000 840E0100
v_mac_f32_e32 v6, s15, v0 ; 3E0C000F
v_mac_f32_e32 v9, s16, v0 ; 3E120010
v_mac_f32_e32 v5, s17, v0 ; 3E0A0011
v_mul_f32_e32 v0, 0.5, v11 ; 100016F0
v_mul_f32_e32 v3, 0.5, v2 ; 100604F0
exp 15, 34, 0, 0, 0, v6, v9, v5, v1 ; F800022F 01050906
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v1, 0.5, v19, v3 ; D2820001 040E26F0
v_mac_f32_e32 v3, s6, v0 ; 3E060006
exp 15, 35, 0, 0, 0, v1, v3, v12, v2 ; F800023F 020C0301
v_subrev_f32_e32 v0, s0, v15 ; 0A001E00
s_waitcnt expcnt(0) ; BF8C070F
v_subrev_f32_e32 v1, s1, v16 ; 0A022001
v_subrev_f32_e32 v3, s2, v7 ; 0A060E02
exp 15, 36, 0, 0, 0, v12, v0, v1, v3 ; F800024F 0301000C
exp 15, 37, 0, 0, 0, v15, v16, v7, v8 ; F800025F 0807100F
exp 15, 12, 0, 1, 0, v19, v11, v12, v2 ; F80008CF 020C0B13
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 20
Code Size: 788 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..20]
DCL CONST[22..23]
DCL CONST[25]
DCL TEMP[0..18], LOCAL
IMM[0] FLT32 { 1.0000, 0.0000, 2.0000, 0.5000}
IMM[1] FLT32 { 0.7500, 7.0000, 1.0000, 10.0000}
IMM[2] FLT32 { 0.9680, 0.0300, 0.0001, -1.0000}
0: DP3 TEMP[0].x, IN[1].xyzz, IN[1].xyzz
1: RSQ TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].xyz, IN[1].xyzz, TEMP[0].xxxx
3: MOV TEMP[1].xy, IN[0].xyyy
4: TEX TEMP[1], TEMP[1], SAMP[2], 2D
5: MUL TEMP[2].x, TEMP[1].wwww, CONST[19].wwww
6: FSLT TEMP[2].x, TEMP[2].xxxx, CONST[20].xxxx
7: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx
8: KILL_IF -TEMP[2].xxxx
9: DP3 TEMP[2].x, IN[4].yzww, IN[4].yzww
10: RSQ TEMP[2].x, TEMP[2].xxxx
11: MUL TEMP[2].xyz, IN[4].yzww, TEMP[2].xxxx
12: MUL TEMP[1].xyz, CONST[19].xyzz, TEMP[1].xyzz
13: LRP TEMP[3].xyz, CONST[22].xxxx, TEMP[1].xyzz, CONST[16].xyzz
14: MUL TEMP[4].x, CONST[22].xxxx, CONST[16].wwww
15: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
16: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[4].xxxx
17: MOV TEMP[5].xy, IN[0].xyyy
18: TEX TEMP[5].y, TEMP[5], SAMP[3], 2D
19: ADD TEMP[6].x, IMM[0].xxxx, -CONST[25].xxxx
20: MAD TEMP[5].x, TEMP[5].yyyy, CONST[25].xxxx, TEMP[6].xxxx
21: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
22: MAX TEMP[6].x, IMM[0].yyyy, TEMP[6].xxxx
23: MOV TEMP[7].xyz, IMM[0].yyyy
24: MOV TEMP[8].w, IMM[0].xxxx
25: MOV TEMP[8].xyz, TEMP[0].xyzx
26: DP4 TEMP[9].x, CONST[1], TEMP[8]
27: DP4 TEMP[10].x, CONST[2], TEMP[8]
28: MOV TEMP[9].y, TEMP[10].xxxx
29: DP4 TEMP[8].x, CONST[3], TEMP[8]
30: MOV TEMP[9].z, TEMP[8].xxxx
31: ADD TEMP[8].xyz, IN[2].xyzz, TEMP[9].xyzz
32: MOV TEMP[9].xy, IN[3].xyyy
33: MOV TEMP[9].w, IN[3].wwww
34: TXP TEMP[9].x, TEMP[9], SAMP[4], 2D
35: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx
36: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
37: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[2].xyzz
38: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz
39: MUL TEMP[10].xyz, IMM[0].zzzz, TEMP[10].xyzz
40: ADD TEMP[10].xyz, TEMP[2].xyzz, -TEMP[10].xyzz
41: MOV TEMP[11].xyz, TEMP[10].xyzx
42: FSLT TEMP[12].x, IMM[0].yyyy, CONST[10].wwww
43: UIF TEMP[12].xxxx :0
44: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz
45: RSQ TEMP[12].x, TEMP[12].xxxx
46: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx
47: MOV TEMP[13].xyz, -IN[5].xyzx
48: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz
49: RCP TEMP[15].x, TEMP[12].xxxx
50: RCP TEMP[15].y, TEMP[12].yyyy
51: RCP TEMP[15].z, TEMP[12].zzzz
52: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
53: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz
54: RCP TEMP[15].x, TEMP[12].xxxx
55: RCP TEMP[15].y, TEMP[12].yyyy
56: RCP TEMP[15].z, TEMP[12].zzzz
57: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
58: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[12].xyzz
59: UIF TEMP[15].xxxx :0
60: MOV TEMP[16].x, TEMP[14].xxxx
61: ELSE :0
62: MOV TEMP[16].x, TEMP[13].xxxx
63: ENDIF
64: UIF TEMP[15].yyyy :0
65: MOV TEMP[17].x, TEMP[14].yyyy
66: ELSE :0
67: MOV TEMP[17].x, TEMP[13].yyyy
68: ENDIF
69: UIF TEMP[15].zzzz :0
70: MOV TEMP[14].x, TEMP[14].zzzz
71: ELSE :0
72: MOV TEMP[14].x, TEMP[13].zzzz
73: ENDIF
74: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz
75: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww
76: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
77: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
78: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz
79: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz
80: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
81: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz
82: ENDIF
83: ADD TEMP[12].x, IMM[0].xxxx, -CONST[23].xxxx
84: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].xxxx
85: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy
86: MOV TEMP[11].xyz, TEMP[11].xyzz
87: MOV TEMP[11].w, TEMP[12].xxxx
88: TXL TEMP[11], TEMP[11], SAMP[0], CUBE
89: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy
90: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx
91: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz
92: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].zzzz
93: UIF TEMP[12].xxxx :0
94: MOV TEMP[12].xyz, TEMP[10].xyzx
95: FSLT TEMP[13].x, IMM[0].yyyy, CONST[14].wwww
96: UIF TEMP[13].xxxx :0
97: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz
98: RSQ TEMP[13].x, TEMP[13].xxxx
99: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx
100: MOV TEMP[13].xyz, -IN[5].xyzx
101: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz
102: RCP TEMP[15].x, TEMP[10].xxxx
103: RCP TEMP[15].y, TEMP[10].yyyy
104: RCP TEMP[15].z, TEMP[10].zzzz
105: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
106: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz
107: RCP TEMP[15].x, TEMP[10].xxxx
108: RCP TEMP[15].y, TEMP[10].yyyy
109: RCP TEMP[15].z, TEMP[10].zzzz
110: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
111: FSLT TEMP[15].xyz, IMM[0].yyyy, TEMP[10].xyzz
112: UIF TEMP[15].xxxx :0
113: MOV TEMP[16].x, TEMP[14].xxxx
114: ELSE :0
115: MOV TEMP[16].x, TEMP[13].xxxx
116: ENDIF
117: UIF TEMP[15].yyyy :0
118: MOV TEMP[17].x, TEMP[14].yyyy
119: ELSE :0
120: MOV TEMP[17].x, TEMP[13].yyyy
121: ENDIF
122: UIF TEMP[15].zzzz :0
123: MOV TEMP[14].x, TEMP[14].zzzz
124: ELSE :0
125: MOV TEMP[14].x, TEMP[13].zzzz
126: ENDIF
127: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz
128: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[0].wwww
129: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
130: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
131: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz
132: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[5].xyzz
133: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
134: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz
135: ENDIF
136: ADD TEMP[10].x, IMM[0].xxxx, -CONST[23].xxxx
137: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].xxxx
138: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy
139: MOV TEMP[12].xyz, TEMP[12].xyzz
140: MOV TEMP[12].w, TEMP[10].xxxx
141: TXL TEMP[10], TEMP[12], SAMP[1], CUBE
142: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy
143: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx
144: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz
145: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz
146: ELSE :0
147: MOV TEMP[7].xyz, TEMP[11].xyzx
148: ENDIF
149: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
150: MOV TEMP[2].xyz, -TEMP[2].xyzx
151: ADD TEMP[5].x, IMM[0].xxxx, -CONST[23].xxxx
152: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[2].xyzz
153: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz
154: RSQ TEMP[11].x, TEMP[11].xxxx
155: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx
156: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[2].xyzz
157: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
158: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz
159: MAX TEMP[11].x, IMM[0].yyyy, TEMP[11].xxxx
160: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx
161: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww
162: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[5].xxxx
163: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].xxxx, IMM[2].yyyy
164: LG2 TEMP[13].x, TEMP[13].xxxx
165: RCP TEMP[13].x, TEMP[13].xxxx
166: MUL TEMP[13].x, IMM[1].wwww, TEMP[13].xxxx
167: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx
168: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[6].xxxx
169: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[2].xxxx
170: MUL TEMP[16].x, IMM[0].zzzz, TEMP[11].xxxx
171: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx
172: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[0].wwww
173: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[11].xxxx
174: ADD TEMP[16].x, IMM[0].xxxx, -TEMP[2].xxxx
175: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
176: ADD TEMP[4].x, CONST[23].xxxx, TEMP[4].xxxx
177: MOV_SAT TEMP[4].x, TEMP[4].xxxx
178: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx
179: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx
180: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx
181: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx
182: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
183: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[12].xxxx
184: LRP TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx, TEMP[12].xxxx
185: MAD TEMP[2].x, TEMP[16].xxxx, TEMP[2].xxxx, IMM[2].zzzz
186: RCP TEMP[2].x, TEMP[2].xxxx
187: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz
188: MAX TEMP[10].x, IMM[0].yyyy, TEMP[10].xxxx
189: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx
190: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].xxxx
191: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy
192: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx
193: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[10].xxxx
194: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[6].xxxx
195: MUL TEMP[2].x, TEMP[2].xxxx, CONST[18].xxxx
196: MAX TEMP[2].x, IMM[0].yyyy, TEMP[2].xxxx
197: MUL TEMP[2].xyz, TEMP[2].xxxx, TEMP[9].xyzz
198: ADD TEMP[10].xyz, IMM[0].xxxx, -TEMP[3].xyzz
199: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx
200: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx
201: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx
202: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx
203: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz
204: ADD TEMP[10].x, TEMP[5].xxxx, IMM[2].wwww
205: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
206: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx
207: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx
208: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
209: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].xxxx
210: ADD TEMP[5].x, TEMP[5].xxxx, IMM[2].wwww
211: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx
212: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx
213: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx
214: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
215: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].xxxx
216: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
217: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
218: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
219: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[5].xyzz
220: MAD TEMP[1].xyz, TEMP[2].xyzz, TEMP[3].xyzz, TEMP[1].xyzz
221: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
222: MOV TEMP[0].xyz, TEMP[0].xyzx
223: MAD TEMP[1].x, IN[4].xxxx, CONST[5].zzzz, CONST[5].wwww
224: MOV_SAT TEMP[1].x, TEMP[1].xxxx
225: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
226: MOV TEMP[0].xyz, TEMP[0].xyzx
227: MOV TEMP[0].w, IMM[0].xxxx
228: MOV OUT[0], TEMP[0]
229: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 316)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400)
%84 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%85 = load <32 x i8>, <32 x i8> addrspace(2)* %84, align 32, !tbaa !0
%86 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%87 = load <16 x i8>, <16 x i8> addrspace(2)* %86, align 16, !tbaa !0
%88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%89 = bitcast <8 x i32> addrspace(2)* %88 to <32 x i8> addrspace(2)*
%90 = load <32 x i8>, <32 x i8> addrspace(2)* %89, align 32, !tbaa !0
%91 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%92 = bitcast <4 x i32> addrspace(2)* %91 to <16 x i8> addrspace(2)*
%93 = load <16 x i8>, <16 x i8> addrspace(2)* %92, align 16, !tbaa !0
%94 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%95 = bitcast <8 x i32> addrspace(2)* %94 to <32 x i8> addrspace(2)*
%96 = load <32 x i8>, <32 x i8> addrspace(2)* %95, align 32, !tbaa !0
%97 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%98 = bitcast <4 x i32> addrspace(2)* %97 to <16 x i8> addrspace(2)*
%99 = load <16 x i8>, <16 x i8> addrspace(2)* %98, align 16, !tbaa !0
%100 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%101 = bitcast <8 x i32> addrspace(2)* %100 to <32 x i8> addrspace(2)*
%102 = load <32 x i8>, <32 x i8> addrspace(2)* %101, align 32, !tbaa !0
%103 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%104 = bitcast <4 x i32> addrspace(2)* %103 to <16 x i8> addrspace(2)*
%105 = load <16 x i8>, <16 x i8> addrspace(2)* %104, align 16, !tbaa !0
%106 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%107 = bitcast <8 x i32> addrspace(2)* %106 to <32 x i8> addrspace(2)*
%108 = load <32 x i8>, <32 x i8> addrspace(2)* %107, align 32, !tbaa !0
%109 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%110 = bitcast <4 x i32> addrspace(2)* %109 to <16 x i8> addrspace(2)*
%111 = load <16 x i8>, <16 x i8> addrspace(2)* %110, align 16, !tbaa !0
%112 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%130 = fmul float %114, %114
%131 = fmul float %115, %115
%132 = fadd float %131, %130
%133 = fmul float %116, %116
%134 = fadd float %132, %133
%135 = call float @llvm.AMDGPU.rsq.clamped.f32(float %134)
%136 = fmul float %114, %135
%137 = fmul float %115, %135
%138 = fmul float %116, %135
%139 = bitcast float %112 to i32
%140 = bitcast float %113 to i32
%141 = insertelement <2 x i32> undef, i32 %139, i32 0
%142 = insertelement <2 x i32> %141, i32 %140, i32 1
%143 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %142, <32 x i8> %96, <16 x i8> %99, i32 2)
%144 = extractelement <4 x float> %143, i32 0
%145 = extractelement <4 x float> %143, i32 1
%146 = extractelement <4 x float> %143, i32 2
%147 = extractelement <4 x float> %143, i32 3
%148 = fmul float %147, %79
%149 = fcmp olt float %148, %80
%150 = select i1 %149, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %150)
%151 = fmul float %124, %124
%152 = fmul float %125, %125
%153 = fadd float %152, %151
%154 = fmul float %126, %126
%155 = fadd float %153, %154
%156 = call float @llvm.AMDGPU.rsq.clamped.f32(float %155)
%157 = fmul float %124, %156
%158 = fmul float %125, %156
%159 = fmul float %126, %156
%160 = fmul float %76, %144
%161 = fmul float %77, %145
%162 = fmul float %78, %146
%163 = call float @llvm.AMDGPU.lrp(float %81, float %160, float %66)
%164 = call float @llvm.AMDGPU.lrp(float %81, float %161, float %67)
%165 = call float @llvm.AMDGPU.lrp(float %81, float %162, float %68)
%166 = fmul float %81, %69
%167 = fsub float %69, %166
%168 = fmul float %160, %167
%169 = fmul float %161, %167
%170 = fmul float %162, %167
%171 = bitcast float %112 to i32
%172 = bitcast float %113 to i32
%173 = insertelement <2 x i32> undef, i32 %171, i32 0
%174 = insertelement <2 x i32> %173, i32 %172, i32 1
%175 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %174, <32 x i8> %102, <16 x i8> %105, i32 2)
%176 = extractelement <4 x float> %175, i32 1
%177 = fsub float 1.000000e+00, %83
%178 = fmul float %176, %83
%179 = fadd float %178, %177
%180 = fmul float %136, %24
%181 = fmul float %137, %25
%182 = fadd float %181, %180
%183 = fmul float %138, %26
%184 = fadd float %182, %183
%185 = call float @llvm.maxnum.f32(float %184, float 0.000000e+00)
%186 = fmul float %27, %136
%187 = fmul float %28, %137
%188 = fadd float %186, %187
%189 = fmul float %29, %138
%190 = fadd float %188, %189
%191 = fadd float %190, %30
%192 = fmul float %31, %136
%193 = fmul float %32, %137
%194 = fadd float %192, %193
%195 = fmul float %33, %138
%196 = fadd float %194, %195
%197 = fadd float %196, %34
%198 = fmul float %35, %136
%199 = fmul float %36, %137
%200 = fadd float %198, %199
%201 = fmul float %37, %138
%202 = fadd float %200, %201
%203 = fadd float %202, %38
%204 = fadd float %117, %191
%205 = fadd float %118, %197
%206 = fadd float %119, %203
%207 = fdiv float %120, %122
%208 = fdiv float %121, %122
%209 = bitcast float %207 to i32
%210 = bitcast float %208 to i32
%211 = insertelement <2 x i32> undef, i32 %209, i32 0
%212 = insertelement <2 x i32> %211, i32 %210, i32 1
%213 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %108, <16 x i8> %111, i32 2)
%214 = extractelement <4 x float> %213, i32 0
%215 = fmul float %70, %214
%216 = fmul float %71, %214
%217 = fmul float %72, %214
%218 = fmul float %204, %179
%219 = fmul float %205, %179
%220 = fmul float %206, %179
%221 = fmul float %136, %157
%222 = fmul float %137, %158
%223 = fadd float %222, %221
%224 = fmul float %138, %159
%225 = fadd float %223, %224
%226 = fmul float %225, %136
%227 = fmul float %225, %137
%228 = fmul float %225, %138
%229 = fmul float %226, 2.000000e+00
%230 = fmul float %227, 2.000000e+00
%231 = fmul float %228, 2.000000e+00
%232 = fsub float %157, %229
%233 = fsub float %158, %230
%234 = fsub float %159, %231
%235 = fcmp ogt float %51, 0.000000e+00
br i1 %235, label %IF, label %ENDIF
IF: ; preds = %main_body
%236 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%237 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%238 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%239 = fmul float %232, %232
%240 = fmul float %233, %233
%241 = fadd float %240, %239
%242 = fmul float %234, %234
%243 = fadd float %241, %242
%244 = call float @llvm.AMDGPU.rsq.clamped.f32(float %243)
%245 = fmul float %232, %244
%246 = fmul float %233, %244
%247 = fmul float %234, %244
%248 = fsub float %44, %127
%249 = fsub float %45, %128
%250 = fsub float %46, %129
%251 = fdiv float 1.000000e+00, %245
%252 = fdiv float 1.000000e+00, %246
%253 = fdiv float 1.000000e+00, %247
%254 = fmul float %248, %251
%255 = fmul float %249, %252
%256 = fmul float %250, %253
%257 = fsub float %47, %127
%258 = fsub float %48, %128
%259 = fsub float %49, %129
%260 = fdiv float 1.000000e+00, %245
%261 = fdiv float 1.000000e+00, %246
%262 = fdiv float 1.000000e+00, %247
%263 = fmul float %257, %260
%264 = fmul float %258, %261
%265 = fmul float %259, %262
%266 = fcmp ogt float %245, 0.000000e+00
%267 = fcmp ogt float %246, 0.000000e+00
%268 = fcmp ogt float %247, 0.000000e+00
%. = select i1 %266, float %254, float %263
%temp68.0 = select i1 %267, float %255, float %264
%.100 = select i1 %268, float %256, float %265
%269 = fadd float %44, %47
%270 = fadd float %45, %48
%271 = fadd float %46, %49
%272 = fmul float %269, 5.000000e-01
%273 = fmul float %270, 5.000000e-01
%274 = fmul float %271, 5.000000e-01
%275 = call float @llvm.minnum.f32(float %., float %temp68.0)
%276 = call float @llvm.minnum.f32(float %275, float %.100)
%277 = fsub float %272, %238
%278 = fsub float %273, %237
%279 = fsub float %274, %236
%280 = fadd float %277, %127
%281 = fadd float %278, %128
%282 = fadd float %279, %129
%283 = fmul float %245, %276
%284 = fadd float %283, %280
%285 = fmul float %246, %276
%286 = fadd float %285, %281
%287 = fmul float %247, %276
%288 = fadd float %287, %282
%289 = fsub float %284, %272
%290 = fsub float %286, %273
%291 = fsub float %288, %274
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp44.0 = phi float [ %289, %IF ], [ %232, %main_body ]
%temp45.0 = phi float [ %290, %IF ], [ %233, %main_body ]
%temp46.0 = phi float [ %291, %IF ], [ %234, %main_body ]
%292 = fsub float 1.000000e+00, %82
%293 = call float @llvm.pow.f32(float %292, float 7.500000e-01)
%294 = fmul float %293, 7.000000e+00
%295 = insertelement <4 x float> undef, float %temp44.0, i32 0
%296 = insertelement <4 x float> %295, float %temp45.0, i32 1
%297 = insertelement <4 x float> %296, float %temp46.0, i32 2
%298 = insertelement <4 x float> %297, float %294, i32 3
%299 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %298)
%300 = extractelement <4 x float> %299, i32 0
%301 = extractelement <4 x float> %299, i32 1
%302 = extractelement <4 x float> %299, i32 2
%303 = extractelement <4 x float> %299, i32 3
%304 = call float @llvm.fabs.f32(float %302)
%305 = fdiv float 1.000000e+00, %304
%306 = fmul float %300, %305
%307 = fadd float %306, 1.500000e+00
%308 = fmul float %301, %305
%309 = fadd float %308, 1.500000e+00
%310 = bitcast float %309 to i32
%311 = bitcast float %307 to i32
%312 = bitcast float %303 to i32
%313 = bitcast float %294 to i32
%314 = insertelement <4 x i32> undef, i32 %310, i32 0
%315 = insertelement <4 x i32> %314, i32 %311, i32 1
%316 = insertelement <4 x i32> %315, i32 %312, i32 2
%317 = insertelement <4 x i32> %316, i32 %313, i32 3
%318 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %317, <32 x i8> %85, <16 x i8> %87, i32 4)
%319 = extractelement <4 x float> %318, i32 0
%320 = extractelement <4 x float> %318, i32 1
%321 = extractelement <4 x float> %318, i32 2
%322 = extractelement <4 x float> %318, i32 3
%323 = call float @llvm.pow.f32(float %322, float %53)
%324 = fmul float %52, %323
%325 = fmul float %324, %319
%326 = fmul float %324, %320
%327 = fmul float %324, %321
%328 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %328, label %IF86, label %ENDIF85
IF86: ; preds = %ENDIF
%329 = fcmp ogt float %63, 0.000000e+00
br i1 %329, label %IF89, label %ENDIF88
ENDIF85: ; preds = %ENDIF, %ENDIF88
%temp28.0 = phi float [ %557, %ENDIF88 ], [ %325, %ENDIF ]
%temp29.0 = phi float [ %558, %ENDIF88 ], [ %326, %ENDIF ]
%temp30.0 = phi float [ %559, %ENDIF88 ], [ %327, %ENDIF ]
%330 = fmul float %temp28.0, %179
%331 = fmul float %temp29.0, %179
%332 = fmul float %temp30.0, %179
%333 = fsub float 1.000000e+00, %82
%334 = fsub float %24, %157
%335 = fsub float %25, %158
%336 = fsub float %26, %159
%337 = fmul float %334, %334
%338 = fmul float %335, %335
%339 = fadd float %338, %337
%340 = fmul float %336, %336
%341 = fadd float %339, %340
%342 = call float @llvm.AMDGPU.rsq.clamped.f32(float %341)
%343 = fmul float %334, %342
%344 = fmul float %335, %342
%345 = fmul float %336, %342
%346 = fmul float %157, %136
%347 = fsub float -0.000000e+00, %346
%348 = fmul float %158, %137
%349 = fsub float %347, %348
%350 = fmul float %159, %138
%351 = fsub float %349, %350
%352 = call float @llvm.maxnum.f32(float %351, float 0.000000e+00)
%353 = fmul float %24, %343
%354 = fmul float %25, %344
%355 = fadd float %354, %353
%356 = fmul float %26, %345
%357 = fadd float %355, %356
%358 = call float @llvm.maxnum.f32(float %357, float 0.000000e+00)
%359 = fmul float %333, %333
%360 = fmul float %359, %75
%361 = fsub float 1.000000e+00, %333
%362 = fmul float %361, 0x3FEEF9DB20000000
%363 = fadd float %362, 0x3F9EB851E0000000
%364 = call float @llvm.log2.f32(float %363)
%365 = fdiv float 1.000000e+00, %364
%366 = fmul float %365, 1.000000e+01
%367 = fmul float %366, %366
%368 = fsub float 1.000000e+00, %185
%369 = fsub float 1.000000e+00, %352
%370 = fmul float %358, 2.000000e+00
%371 = fmul float %358, %333
%372 = fmul float %370, %371
%373 = fadd float %372, 5.000000e-01
%374 = fsub float 1.000000e+00, %358
%375 = fsub float 1.000000e+00, %352
%376 = fsub float 1.000000e+00, %167
%377 = fadd float %82, %376
%378 = call float @llvm.AMDIL.clamp.(float %377, float 0.000000e+00, float 1.000000e+00)
%379 = fmul float %375, %375
%380 = fmul float %375, %375
%381 = fmul float %380, %375
%382 = fmul float %379, %381
%383 = call float @llvm.AMDGPU.lrp(float %382, float %378, float %163)
%384 = call float @llvm.AMDGPU.lrp(float %382, float %378, float %164)
%385 = call float @llvm.AMDGPU.lrp(float %382, float %378, float %165)
%386 = call float @llvm.AMDGPU.lrp(float %185, float 1.000000e+00, float %360)
%387 = call float @llvm.AMDGPU.lrp(float %352, float 1.000000e+00, float %360)
%388 = fmul float %386, %387
%389 = fadd float %388, 0x3F1A36E2E0000000
%390 = fdiv float 1.000000e+00, %389
%391 = fmul float %136, %343
%392 = fmul float %137, %344
%393 = fadd float %392, %391
%394 = fmul float %138, %345
%395 = fadd float %393, %394
%396 = call float @llvm.maxnum.f32(float %395, float 0.000000e+00)
%397 = call float @llvm.pow.f32(float %396, float %367)
%398 = fadd float %367, 1.000000e+00
%399 = fmul float %398, %74
%400 = fmul float %397, %399
%401 = fmul float %390, %400
%402 = fmul float %401, %185
%403 = fmul float %402, %73
%404 = call float @llvm.maxnum.f32(float %403, float 0.000000e+00)
%405 = fmul float %404, %215
%406 = fmul float %404, %216
%407 = fmul float %404, %217
%408 = fsub float 1.000000e+00, %163
%409 = fsub float 1.000000e+00, %164
%410 = fsub float 1.000000e+00, %165
%411 = fmul float %374, %374
%412 = fmul float %374, %374
%413 = fmul float %412, %374
%414 = fmul float %411, %413
%415 = fmul float %408, %414
%416 = fadd float %415, %163
%417 = fmul float %409, %414
%418 = fadd float %417, %164
%419 = fmul float %410, %414
%420 = fadd float %419, %165
%421 = fadd float %373, -1.000000e+00
%422 = fmul float %368, %368
%423 = fmul float %368, %368
%424 = fmul float %423, %368
%425 = fmul float %422, %424
%426 = fmul float %421, %425
%427 = fadd float %426, 1.000000e+00
%428 = fadd float %373, -1.000000e+00
%429 = fmul float %369, %369
%430 = fmul float %369, %369
%431 = fmul float %430, %369
%432 = fmul float %429, %431
%433 = fmul float %428, %432
%434 = fadd float %433, 1.000000e+00
%435 = fmul float %427, %434
%436 = fmul float %435, %185
%437 = fmul float %215, %436
%438 = fadd float %437, %218
%439 = fmul float %216, %436
%440 = fadd float %439, %219
%441 = fmul float %217, %436
%442 = fadd float %441, %220
%443 = fmul float %168, %438
%444 = fmul float %169, %440
%445 = fmul float %170, %442
%446 = fmul float %405, %416
%447 = fadd float %446, %443
%448 = fmul float %406, %418
%449 = fadd float %448, %444
%450 = fmul float %407, %420
%451 = fadd float %450, %445
%452 = fmul float %330, %383
%453 = fadd float %452, %447
%454 = fmul float %331, %384
%455 = fadd float %454, %449
%456 = fmul float %332, %385
%457 = fadd float %456, %451
%458 = fmul float %123, %42
%459 = fadd float %458, %43
%460 = call float @llvm.AMDIL.clamp.(float %459, float 0.000000e+00, float 1.000000e+00)
%461 = call float @llvm.AMDGPU.lrp(float %460, float %453, float %39)
%462 = call float @llvm.AMDGPU.lrp(float %460, float %455, float %40)
%463 = call float @llvm.AMDGPU.lrp(float %460, float %457, float %41)
%464 = call i32 @llvm.SI.packf16(float %461, float %462)
%465 = bitcast i32 %464 to float
%466 = call i32 @llvm.SI.packf16(float %463, float 1.000000e+00)
%467 = bitcast i32 %466 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %465, float %467, float %465, float %467)
ret void
IF89: ; preds = %IF86
%468 = fmul float %232, %232
%469 = fmul float %233, %233
%470 = fadd float %469, %468
%471 = fmul float %234, %234
%472 = fadd float %470, %471
%473 = call float @llvm.AMDGPU.rsq.clamped.f32(float %472)
%474 = fmul float %232, %473
%475 = fmul float %233, %473
%476 = fmul float %234, %473
%477 = fsub float %54, %127
%478 = fsub float %55, %128
%479 = fsub float %56, %129
%480 = fdiv float 1.000000e+00, %474
%481 = fdiv float 1.000000e+00, %475
%482 = fdiv float 1.000000e+00, %476
%483 = fmul float %477, %480
%484 = fmul float %478, %481
%485 = fmul float %479, %482
%486 = fsub float %57, %127
%487 = fsub float %58, %128
%488 = fsub float %59, %129
%489 = fdiv float 1.000000e+00, %474
%490 = fdiv float 1.000000e+00, %475
%491 = fdiv float 1.000000e+00, %476
%492 = fmul float %486, %489
%493 = fmul float %487, %490
%494 = fmul float %488, %491
%495 = fcmp ogt float %474, 0.000000e+00
%496 = fcmp ogt float %475, 0.000000e+00
%497 = fcmp ogt float %476, 0.000000e+00
%.101 = select i1 %495, float %483, float %492
%temp68.1 = select i1 %496, float %484, float %493
%.102 = select i1 %497, float %485, float %494
%498 = fadd float %54, %57
%499 = fadd float %55, %58
%500 = fadd float %56, %59
%501 = fmul float %498, 5.000000e-01
%502 = fmul float %499, 5.000000e-01
%503 = fmul float %500, 5.000000e-01
%504 = call float @llvm.minnum.f32(float %.101, float %temp68.1)
%505 = call float @llvm.minnum.f32(float %504, float %.102)
%506 = fsub float %501, %60
%507 = fsub float %502, %61
%508 = fsub float %503, %62
%509 = fadd float %506, %127
%510 = fadd float %507, %128
%511 = fadd float %508, %129
%512 = fmul float %474, %505
%513 = fadd float %512, %509
%514 = fmul float %475, %505
%515 = fadd float %514, %510
%516 = fmul float %476, %505
%517 = fadd float %516, %511
%518 = fsub float %513, %501
%519 = fsub float %515, %502
%520 = fsub float %517, %503
br label %ENDIF88
ENDIF88: ; preds = %IF86, %IF89
%temp48.0 = phi float [ %518, %IF89 ], [ %232, %IF86 ]
%temp49.0 = phi float [ %519, %IF89 ], [ %233, %IF86 ]
%temp50.0 = phi float [ %520, %IF89 ], [ %234, %IF86 ]
%521 = fsub float 1.000000e+00, %82
%522 = call float @llvm.pow.f32(float %521, float 7.500000e-01)
%523 = fmul float %522, 7.000000e+00
%524 = insertelement <4 x float> undef, float %temp48.0, i32 0
%525 = insertelement <4 x float> %524, float %temp49.0, i32 1
%526 = insertelement <4 x float> %525, float %temp50.0, i32 2
%527 = insertelement <4 x float> %526, float %523, i32 3
%528 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %527)
%529 = extractelement <4 x float> %528, i32 0
%530 = extractelement <4 x float> %528, i32 1
%531 = extractelement <4 x float> %528, i32 2
%532 = extractelement <4 x float> %528, i32 3
%533 = call float @llvm.fabs.f32(float %531)
%534 = fdiv float 1.000000e+00, %533
%535 = fmul float %529, %534
%536 = fadd float %535, 1.500000e+00
%537 = fmul float %530, %534
%538 = fadd float %537, 1.500000e+00
%539 = bitcast float %538 to i32
%540 = bitcast float %536 to i32
%541 = bitcast float %532 to i32
%542 = bitcast float %523 to i32
%543 = insertelement <4 x i32> undef, i32 %539, i32 0
%544 = insertelement <4 x i32> %543, i32 %540, i32 1
%545 = insertelement <4 x i32> %544, i32 %541, i32 2
%546 = insertelement <4 x i32> %545, i32 %542, i32 3
%547 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %546, <32 x i8> %90, <16 x i8> %93, i32 4)
%548 = extractelement <4 x float> %547, i32 0
%549 = extractelement <4 x float> %547, i32 1
%550 = extractelement <4 x float> %547, i32 2
%551 = extractelement <4 x float> %547, i32 3
%552 = call float @llvm.pow.f32(float %551, float %65)
%553 = fmul float %64, %552
%554 = fmul float %553, %548
%555 = fmul float %553, %549
%556 = fmul float %553, %550
%557 = call float @llvm.AMDGPU.lrp(float %50, float %325, float %554)
%558 = call float @llvm.AMDGPU.lrp(float %50, float %326, float %555)
%559 = call float @llvm.AMDGPU.lrp(float %50, float %327, float %556)
br label %ENDIF85
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000
v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001
v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100
v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500
v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501
v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600
v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601
v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800
v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801
v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900
v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901
v_interp_p1_f32 v8, v0, 2, 2, [m0] ; C8200A00
v_interp_p2_f32 v8, [v8], v1, 2, 2, [m0] ; C8210A01
v_interp_p1_f32 v6, v0, 0, 3, [m0] ; C8180C00
v_interp_p2_f32 v6, [v6], v1, 0, 3, [m0] ; C8190C01
v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00
v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01
v_interp_p1_f32 v19, v0, 3, 3, [m0] ; C84C0F00
v_interp_p2_f32 v19, [v19], v1, 3, 3, [m0] ; C84D0F01
v_interp_p1_f32 v9, v0, 0, 4, [m0] ; C8241000
v_interp_p2_f32 v9, [v9], v1, 0, 4, [m0] ; C8251001
v_interp_p1_f32 v21, v0, 1, 4, [m0] ; C8541100
v_interp_p2_f32 v21, [v21], v1, 1, 4, [m0] ; C8551101
v_interp_p1_f32 v22, v0, 2, 4, [m0] ; C8581200
v_interp_p2_f32 v22, [v22], v1, 2, 4, [m0] ; C8591201
v_interp_p1_f32 v23, v0, 3, 4, [m0] ; C85C1300
v_interp_p2_f32 v23, [v23], v1, 3, 4, [m0] ; C85D1301
v_interp_p1_f32 v25, v0, 0, 5, [m0] ; C8641400
v_interp_p2_f32 v25, [v25], v1, 0, 5, [m0] ; C8651401
v_interp_p1_f32 v20, v0, 1, 5, [m0] ; C8501500
v_interp_p2_f32 v20, [v20], v1, 1, 5, [m0] ; C8511501
v_interp_p1_f32 v26, v0, 2, 5, [m0] ; C8681600
v_interp_p2_f32 v26, [v26], v1, 2, 5, [m0] ; C8691601
s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508
s_load_dwordx8 s[24:31], s[6:7], 0x10 ; C0CC0710
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_mul_f32_e32 v0, v2, v2 ; 10000502
v_mac_f32_e32 v0, v3, v3 ; 3E000703
v_mac_f32_e32 v0, v4, v4 ; 3E000904
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
s_load_dwordx4 s[20:23], s[4:5], 0xc ; C08A050C
s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510
s_load_dwordx8 s[36:43], s[6:7], 0x18 ; C0D20718
s_load_dwordx8 s[44:51], s[6:7], 0x20 ; C0D60720
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[12:15] ; F0800F00 00661B10
s_buffer_load_dword s1, s[8:11], 0x4f ; C200894F
s_buffer_load_dword s2, s[8:11], 0x50 ; C2010950
v_mul_f32_e32 v2, v0, v2 ; 10040500
v_mul_f32_e32 v1, v0, v3 ; 10020700
v_mul_f32_e32 v0, v0, v4 ; 10000900
s_buffer_load_dword s3, s[8:11], 0x4c ; C201894C
s_buffer_load_dword s12, s[8:11], 0x4d ; C206094D
s_buffer_load_dword s13, s[8:11], 0x4e ; C206894E
s_buffer_load_dword s30, s[8:11], 0x58 ; C20F0958
s_buffer_load_dword s0, s[8:11], 0x5c ; C200095C
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v3, s1, v30 ; 10063C01
v_cmp_gt_f32_e32 vcc, s2, v3 ; 7C080602
v_cndmask_b32_e64 v3, 0, -1.0, vcc ; D2000003 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680
s_cbranch_execnz 3 ; BF890003
exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000
s_endpgm ; BF810000
v_mul_f32_e32 v3, v21, v21 ; 10062B15
v_mac_f32_e32 v3, v22, v22 ; 3E062D16
v_mac_f32_e32 v3, v23, v23 ; 3E062F17
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mul_f32_e32 v13, s3, v27 ; 101A3603
v_mul_f32_e32 v14, s12, v28 ; 101C380C
v_mul_f32_e32 v15, s13, v29 ; 101E3A0D
v_mul_f32_e32 v11, v3, v21 ; 10162B03
v_mul_f32_e32 v10, v3, v22 ; 10142D03
v_mul_f32_e32 v4, v11, v2 ; 1008050B
v_mac_f32_e32 v4, v10, v1 ; 3E08030A
v_mul_f32_e32 v12, v3, v23 ; 10182F03
v_mac_f32_e32 v4, v12, v0 ; 3E08010C
v_mul_f32_e32 v24, v2, v4 ; 10300902
v_mac_f32_e32 v24, v2, v4 ; 3E300902
v_mul_f32_e32 v28, v1, v4 ; 10380901
v_mac_f32_e32 v28, v1, v4 ; 3E380901
v_mad_f32 v27, v21, v3, -v24 ; D282001B 84620715
v_mad_f32 v28, v22, v3, -v28 ; D282001C 84720716
v_mov_b32_e32 v21, 0x6f800000 ; 7E2A02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v19|, v21 ; D008016A 00022B13
v_mov_b32_e32 v21, 0x2f800000 ; 7E2A02FF 2F800000
v_cndmask_b32_e32 v21, 1.0, v21 ; 002A2AF2
v_mul_f32_e32 v19, v21, v19 ; 10262715
v_rcp_f32_e32 v19, v19 ; 7E265513
v_mul_f32_e32 v22, v0, v4 ; 102C0900
v_mac_f32_e32 v22, v0, v4 ; 3E2C0900
v_mad_f32 v29, v23, v3, -v22 ; D282001D 845A0717
v_mul_f32_e32 v3, v19, v6 ; 10060D13
v_mul_f32_e32 v4, v19, v18 ; 10082513
s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940
s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941
s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942
v_mul_f32_e32 v18, v3, v21 ; 10242B03
v_mul_f32_e32 v19, v4, v21 ; 10262B04
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B
s_buffer_load_dword s32, s[8:11], 0x2c ; C210092C
s_buffer_load_dword s33, s[8:11], 0x2d ; C210892D
v_sub_f32_e64 v3, 1.0, s30 ; D2080003 00003CF2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s1, v3 ; 100C0601
v_mul_f32_e32 v4, s2, v3 ; 10080602
v_mul_f32_e32 v3, s3, v3 ; 10060603
v_mac_f32_e32 v6, s30, v13 ; 3E0C1A1E
v_mov_b32_e32 v30, v27 ; 7E3C031B
v_mac_f32_e32 v4, s30, v14 ; 3E081C1E
v_mov_b32_e32 v31, v28 ; 7E3E031C
v_mac_f32_e32 v3, s30, v15 ; 3E061E1E
v_mov_b32_e32 v32, v29 ; 7E40031D
v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80
image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[36:43], s[20:23] ; F0800F00 00A91510
image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[44:51], s[16:19] ; F0800F00 008B1012
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[14:15], s[2:3] ; BE8E2402
s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s16, s[8:11], 0x25 ; C2080925
v_mul_f32_e32 v17, v27, v27 ; 1022371B
v_mac_f32_e32 v17, v28, v28 ; 3E22391C
v_mac_f32_e32 v17, v29, v29 ; 3E223B1D
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
s_buffer_load_dword s17, s[8:11], 0x26 ; C2088926
s_buffer_load_dword s18, s[8:11], 0x28 ; C2090928
s_buffer_load_dword s19, s[8:11], 0x29 ; C2098929
s_buffer_load_dword s20, s[8:11], 0x2a ; C20A092A
v_mul_f32_e32 v18, v17, v27 ; 10243711
v_mul_f32_e32 v19, v17, v28 ; 10263911
v_mul_f32_e32 v17, v17, v29 ; 10223B11
v_rcp_f32_e32 v21, v18 ; 7E2A5512
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v23, s1, v25 ; 082E3201
v_sub_f32_e32 v24, s2, v20 ; 08302802
v_rcp_f32_e32 v30, v19 ; 7E3C5513
v_mul_f32_e32 v23, v21, v23 ; 102E2F15
v_sub_f32_e32 v31, s13, v25 ; 083E320D
v_mul_f32_e32 v21, v21, v31 ; 102A3F15
v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480
v_cndmask_b32_e32 v21, v21, v23 ; 002A2F15
v_rcp_f32_e32 v23, v17 ; 7E2E5511
v_mul_f32_e32 v24, v30, v24 ; 1030311E
v_sub_f32_e32 v31, s16, v20 ; 083E2810
v_mul_f32_e32 v30, v30, v31 ; 103C3F1E
v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680
v_cndmask_b32_e32 v24, v30, v24 ; 0030311E
v_sub_f32_e32 v30, s3, v26 ; 083C3403
v_mul_f32_e32 v30, v23, v30 ; 103C3D17
v_sub_f32_e32 v31, s17, v26 ; 083E3411
v_mul_f32_e32 v23, v23, v31 ; 102E3F17
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v23, v23, v30 ; 002E3D17
v_min3_f32 v21, v21, v24, v23 ; D2A20015 045E3115
v_mov_b32_e32 v23, s13 ; 7E2E020D
v_add_f32_e32 v23, s1, v23 ; 062E2E01
v_mov_b32_e32 v24, s16 ; 7E300210
v_add_f32_e32 v24, s2, v24 ; 06303002
v_mov_b32_e32 v30, s17 ; 7E3C0211
v_add_f32_e32 v32, s3, v30 ; 06403C03
v_mad_f32 v30, 0.5, v23, -s18 ; D282001E 804A2EF0
v_add_f32_e32 v30, v25, v30 ; 063C3D19
v_mac_f32_e32 v30, v21, v18 ; 3E3C2515
v_mad_f32 v18, 0.5, v24, -s19 ; D2820012 804E30F0
v_add_f32_e32 v18, v20, v18 ; 06242514
v_mac_f32_e32 v18, v21, v19 ; 3E242715
v_mad_f32 v19, 0.5, v32, -s20 ; D2820013 805240F0
v_add_f32_e32 v19, v26, v19 ; 0626271A
v_mac_f32_e32 v19, v21, v17 ; 3E262315
v_mad_f32 v30, 0.5, -v23, v30 ; D282001E 447A2EF0
v_mad_f32 v31, 0.5, -v24, v18 ; D282001F 444A30F0
v_mad_f32 v32, 0.5, -v32, v19 ; D2820020 444E40F0
s_or_b64 exec, exec, s[14:15] ; 88FE0E7E
s_buffer_load_dword s27, s[8:11], 0x17 ; C20D8917
s_buffer_load_dword s28, s[8:11], 0x43 ; C20E0943
s_buffer_load_dword s26, s[8:11], 0x44 ; C20D0944
s_buffer_load_dword s19, s[8:11], 0x45 ; C2098945
s_buffer_load_dword s16, s[8:11], 0x46 ; C2080946
s_buffer_load_dword s2, s[8:11], 0x0 ; C2010900
s_buffer_load_dword s3, s[8:11], 0x1 ; C2018901
s_buffer_load_dword s1, s[8:11], 0x2 ; C2008902
s_buffer_load_dword s13, s[8:11], 0x4 ; C2068904
s_buffer_load_dword s14, s[8:11], 0x5 ; C2070905
s_buffer_load_dword s15, s[8:11], 0x6 ; C2078906
s_buffer_load_dword s17, s[8:11], 0x7 ; C2088907
s_buffer_load_dword s18, s[8:11], 0x8 ; C2090908
s_buffer_load_dword s20, s[8:11], 0x9 ; C20A0909
s_buffer_load_dword s21, s[8:11], 0xa ; C20A890A
s_buffer_load_dword s22, s[8:11], 0xb ; C20B090B
s_buffer_load_dword s23, s[8:11], 0xc ; C20B890C
s_buffer_load_dword s24, s[8:11], 0xd ; C20C090D
s_buffer_load_dword s25, s[8:11], 0xe ; C20C890E
v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2
v_log_f32_e32 v17, v17 ; 7E224F11
v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v33, 0x40e00000, v17 ; 104222FF 40E00000
v_cubeid_f32 v37, v30, v31, v32 ; D2880025 04823F1E
v_cubema_f32 v36, v30, v31, v32 ; D28E0024 04823F1E
s_load_dwordx4 s[36:39], s[4:5], 0x0 ; C0920500
s_load_dwordx8 s[40:47], s[6:7], 0x0 ; C0D40700
v_cubesc_f32 v35, v30, v31, v32 ; D28A0023 04823F1E
v_cubetc_f32 v34, v30, v31, v32 ; D28C0022 04823F1E
v_rcp_f32_e64 v17, |v36| ; D3540111 00000124
v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000
v_mad_f32 v31, v17, v34, v30 ; D282001F 047A4511
v_mac_f32_e32 v30, v17, v35 ; 3E3C4711
v_mov_b32_e32 v32, v37 ; 7E400325
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[40:47], s[36:39] ; F0900F00 012A1E1E
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v17, v33 ; 7E224F21
s_buffer_load_dword s29, s[8:11], 0xf ; C20E890F
s_buffer_load_dword s31, s[8:11], 0x64 ; C20F8964
v_mul_legacy_f32_e32 v17, s33, v17 ; 0E222221
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v17, s32, v17 ; 10222220
v_mul_f32_e32 v19, v30, v17 ; 1026231E
v_mul_f32_e32 v18, v31, v17 ; 1024231F
v_mul_f32_e32 v17, v32, v17 ; 10222320
v_mov_b32_e32 v21, s30 ; 7E2A021E
v_mov_b32_e32 v23, 0x3f7fff58 ; 7E2E02FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v23 ; 7C022E0C
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[32:33], vcc ; BEA0246A
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s35, s[8:11], 0x3b ; C211893B
s_buffer_load_dword s30, s[8:11], 0x3c ; C20F093C
s_buffer_load_dword s34, s[8:11], 0x3d ; C211093D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680
s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424
s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s35, s[8:11], 0x36 ; C2118936
s_buffer_load_dword s38, s[8:11], 0x38 ; C2130938
s_buffer_load_dword s39, s[8:11], 0x39 ; C2138939
s_buffer_load_dword s40, s[8:11], 0x3a ; C214093A
s_buffer_load_dword s41, s[8:11], 0x30 ; C2148930
s_buffer_load_dword s42, s[8:11], 0x31 ; C2150931
s_buffer_load_dword s43, s[8:11], 0x32 ; C2158932
s_buffer_load_dword s44, s[8:11], 0x34 ; C2160934
s_buffer_load_dword s45, s[8:11], 0x35 ; C2168935
v_mul_f32_e32 v23, v27, v27 ; 102E371B
v_mac_f32_e32 v23, v28, v28 ; 3E2E391C
v_mac_f32_e32 v23, v29, v29 ; 3E2E3B1D
v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v24, s35, v26 ; 08303423
v_mov_b32_e32 v30, s35 ; 7E3C0223
v_sub_f32_e32 v31, s41, v25 ; 083E3229
v_sub_f32_e32 v32, s42, v20 ; 0840282A
v_add_f32_e32 v30, s43, v30 ; 063C3C2B
v_sub_f32_e32 v33, s43, v26 ; 0842342B
v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0
v_add_f32_e32 v26, v26, v34 ; 0634451A
v_mul_f32_e32 v27, v23, v27 ; 10363717
v_mul_f32_e32 v28, v23, v28 ; 10383917
v_mul_f32_e32 v23, v23, v29 ; 102E3B17
v_rcp_f32_e32 v29, v27 ; 7E3A551B
v_rcp_f32_e32 v34, v28 ; 7E44551C
v_rcp_f32_e32 v35, v23 ; 7E465517
v_sub_f32_e32 v36, s44, v25 ; 0848322C
v_mov_b32_e32 v37, s44 ; 7E4A022C
v_add_f32_e32 v37, s41, v37 ; 064A4A29
v_mul_f32_e32 v31, v29, v31 ; 103E3F1D
v_mul_f32_e32 v29, v29, v36 ; 103A491D
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v33, v35, v33 ; 10424323
v_mul_f32_e32 v24, v35, v24 ; 10303123
v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0
v_add_f32_e32 v25, v25, v35 ; 06324719
v_sub_f32_e32 v35, s45, v20 ; 0846282D
v_mov_b32_e32 v36, s45 ; 7E48022D
v_mul_f32_e32 v34, v34, v35 ; 10444722
v_add_f32_e32 v35, s42, v36 ; 0646482A
v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680
v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D
v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880
v_cndmask_b32_e32 v31, v34, v32 ; 003E4122
v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80
v_cndmask_b32_e32 v24, v24, v33 ; 00304318
v_min3_f32 v24, v29, v31, v24 ; D2A20018 04623F1D
v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0
v_add_f32_e32 v20, v20, v29 ; 06283B14
v_mac_f32_e32 v25, v24, v27 ; 3E323718
v_mac_f32_e32 v20, v24, v28 ; 3E283918
v_mac_f32_e32 v26, v24, v23 ; 3E342F18
v_mad_f32 v27, 0.5, -v37, v25 ; D282001B 44664AF0
v_mad_f32 v28, 0.5, -v35, v20 ; D282001C 445246F0
v_mad_f32 v29, 0.5, -v30, v26 ; D282001D 446A3CF0
s_or_b64 exec, exec, s[36:37] ; 88FE247E
v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2
v_log_f32_e32 v20, v20 ; 7E284F14
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
v_mul_legacy_f32_e32 v20, 0x3f400000, v20 ; 0E2828FF 3F400000
v_exp_f32_e32 v20, v20 ; 7E284B14
v_mul_f32_e32 v30, 0x40e00000, v20 ; 103C28FF 40E00000
v_cubeid_f32 v26, v27, v28, v29 ; D288001A 0476391B
v_cubema_f32 v25, v27, v28, v29 ; D28E0019 0476391B
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
v_cubesc_f32 v24, v27, v28, v29 ; D28A0018 0476391B
v_cubetc_f32 v23, v27, v28, v29 ; D28C0017 0476391B
v_rcp_f32_e64 v20, |v25| ; D3540114 00000119
v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000
v_mad_f32 v28, v20, v23, v27 ; D282001C 046E2F14
v_mac_f32_e32 v27, v20, v24 ; 3E363114
v_mov_b32_e32 v29, v26 ; 7E3A031A
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[23:26], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A171B
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v20, v26 ; 7E284F1A
v_sub_f32_e64 v26, 1.0, s12 ; D208001A 000018F2
v_mul_legacy_f32_e32 v20, s34, v20 ; 0E282822
v_exp_f32_e32 v20, v20 ; 7E284B14
v_mul_f32_e32 v20, s30, v20 ; 1028281E
v_mul_f32_e32 v23, v23, v20 ; 102E2917
v_mul_f32_e32 v24, v24, v20 ; 10302918
v_mul_f32_e32 v20, v25, v20 ; 10282919
v_mul_f32_e32 v23, v23, v26 ; 102E3517
v_mul_f32_e32 v24, v24, v26 ; 10303518
v_mul_f32_e32 v20, v20, v26 ; 10283514
v_mac_f32_e32 v23, s12, v19 ; 3E2E260C
v_mac_f32_e32 v24, s12, v18 ; 3E30240C
v_mac_f32_e32 v20, s12, v17 ; 3E28220C
v_mov_b32_e32 v17, v20 ; 7E220314
v_mov_b32_e32 v18, v24 ; 7E240318
v_mov_b32_e32 v19, v23 ; 7E260317
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_mad_f32 v24, -v21, s28, s28 ; D2820018 20703915
v_mov_b32_e32 v20, s27 ; 7E28021B
v_mul_f32_e32 v21, v24, v13 ; 102A1B18
v_mul_f32_e32 v14, v24, v14 ; 101C1D18
v_mul_f32_e32 v13, v24, v15 ; 101A1F18
v_mul_f32_e32 v15, s26, v16 ; 101E201A
v_sub_f32_e64 v25, 1.0, s31 ; D2080019 00003EF2
v_mac_f32_e32 v25, s31, v22 ; 3E322C1F
v_mul_f32_e32 v22, s19, v16 ; 102C2013
v_mul_f32_e32 v16, s16, v16 ; 10202010
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s16, s[8:11], 0x16 ; C2080916
s_buffer_load_dword s7, s[8:11], 0x48 ; C2038948
s_buffer_load_dword s12, s[8:11], 0x49 ; C2060949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_mul_f32_e32 v23, s14, v1 ; 102E020E
v_mac_f32_e32 v23, s13, v2 ; 3E2E040D
v_mac_f32_e32 v23, s15, v0 ; 3E2E000F
v_add_f32_e32 v23, s17, v23 ; 062E2E11
v_mul_f32_e32 v26, s20, v1 ; 10340214
v_mac_f32_e32 v26, s18, v2 ; 3E340412
v_mac_f32_e32 v26, s21, v0 ; 3E340015
v_add_f32_e32 v26, s22, v26 ; 06343416
v_mul_f32_e32 v27, s24, v1 ; 10360218
v_mac_f32_e32 v27, s23, v2 ; 3E360417
v_mac_f32_e32 v27, s25, v0 ; 3E360019
v_add_f32_e32 v27, s29, v27 ; 0636361D
v_add_f32_e32 v5, v23, v5 ; 060A0B17
v_add_f32_e32 v7, v26, v7 ; 060E0F1A
v_add_f32_e32 v26, v27, v8 ; 0634111B
v_mul_f32_e32 v8, s2, v2 ; 10100402
v_mac_f32_e32 v8, s3, v1 ; 3E100203
v_mac_f32_e32 v8, s1, v0 ; 3E100001
v_max_f32_e32 v23, 0, v8 ; 202E1080
v_mul_f32_e32 v8, v25, v5 ; 10100B19
v_mul_f32_e32 v5, v25, v7 ; 100A0F19
v_mul_f32_e32 v7, v25, v26 ; 100E3519
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v20, s16, v9 ; 3E281210
v_mul_f32_e32 v9, v25, v19 ; 10122719
v_mul_f32_e32 v18, v25, v18 ; 10242519
v_mul_f32_e32 v17, v25, v17 ; 10222319
v_sub_f32_e32 v19, 1.0, v24 ; 082630F2
v_add_f32_e32 v19, s0, v19 ; 06262600
v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680
v_sub_f32_e32 v24, s2, v11 ; 08301602
v_sub_f32_e32 v25, s3, v10 ; 08321403
v_mul_f32_e32 v26, v24, v24 ; 10343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_sub_f32_e32 v27, s1, v12 ; 08361801
v_mac_f32_e32 v26, v27, v27 ; 3E34371B
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_mul_f32_e32 v26, v26, v27 ; 1034371A
v_mul_f32_e32 v11, v11, v2 ; 1016050B
v_mad_f32 v10, -v10, v1, -v11 ; D282000A A42E030A
v_mad_f32 v10, -v12, v0, v10 ; D282000A 242A010C
v_mul_f32_e32 v2, v24, v2 ; 10040518
v_mac_f32_e32 v2, v25, v1 ; 3E040319
v_mul_f32_e32 v1, s2, v24 ; 10023002
v_mac_f32_e32 v1, s3, v25 ; 3E023203
v_mac_f32_e32 v2, v26, v0 ; 3E04011A
v_mac_f32_e32 v1, s1, v26 ; 3E023401
v_max_f32_e32 v0, 0, v1 ; 20000280
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v11, v1, v1 ; 10160301
v_mul_f32_e32 v1, v1, v11 ; 10021701
v_mul_f32_e32 v1, v1, v11 ; 10021701
v_max_f32_e32 v10, 0, v10 ; 20141480
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mul_f32_e32 v24, v11, v12 ; 1030190B
v_mad_f32 v25, -v12, v24, 1.0 ; D2820019 23CA310C
v_mul_f32_e32 v26, v6, v25 ; 10343306
v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2
v_mac_f32_e32 v6, v1, v27 ; 3E0C3701
v_mul_f32_e32 v27, v4, v25 ; 10363304
v_sub_f32_e32 v28, 1.0, v4 ; 083808F2
v_mac_f32_e32 v4, v1, v28 ; 3E083901
v_mul_f32_e32 v25, v3, v25 ; 10323303
v_sub_f32_e32 v28, 1.0, v3 ; 083806F2
v_mac_f32_e32 v3, v1, v28 ; 3E063901
v_sub_f32_e64 v1, 1.0, s0 ; D2080001 000000F2
v_sub_f32_e32 v28, 1.0, v1 ; 083802F2
v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F
v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9
v_add_f32_e32 v29, v0, v0 ; 063A0100
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mad_f32 v0, v29, v0, 0.5 ; D2820000 03C2011D
v_mul_f32_e32 v12, v24, v12 ; 10181918
v_mac_f32_e32 v26, v19, v12 ; 3E341913
v_mac_f32_e32 v27, v19, v12 ; 3E361913
v_mac_f32_e32 v25, v19, v12 ; 3E321913
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_log_f32_e32 v19, v28 ; 7E264F1C
v_mul_f32_e32 v1, s8, v1 ; 10020208
v_mul_f32_e32 v11, v1, v11 ; 10161701
v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2
v_rcp_f32_e32 v10, v19 ; 7E145513
v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2
v_mul_f32_e32 v1, v1, v19 ; 10022701
v_mac_f32_e32 v1, 1.0, v23 ; 3E022EF2
v_max_f32_e32 v2, 0, v2 ; 20040480
v_log_f32_e32 v2, v2 ; 7E044F02
v_madak_f32_e32 v1, v1, v11, 0x38d1b717 ; 42021701 38D1B717
v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000
v_mul_f32_e32 v11, v10, v10 ; 1016150A
v_mul_legacy_f32_e32 v2, v11, v2 ; 0E04050B
v_rcp_f32_e32 v1, v1 ; 7E025501
v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A
v_mul_f32_e32 v10, s12, v10 ; 1014140C
v_exp_f32_e32 v2, v2 ; 7E044B02
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_mul_f32_e32 v1, v2, v1 ; 10020302
v_mul_f32_e32 v2, v19, v19 ; 10042713
v_mul_f32_e32 v10, v19, v2 ; 10140513
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_add_f32_e32 v0, -1.0, v0 ; 060000F3
v_mad_f32 v2, v0, v2, 1.0 ; D2820002 03CA0500
v_mad_f32 v0, v0, v12, 1.0 ; D2820000 03CA1900
v_mul_f32_e32 v0, v0, v2 ; 10000500
v_mul_f32_e32 v1, v23, v1 ; 10020317
v_mul_f32_e32 v1, s7, v1 ; 10020207
v_mul_f32_e32 v0, v23, v0 ; 10000117
v_mac_f32_e32 v8, v0, v15 ; 3E101F00
v_mul_f32_e32 v2, v8, v21 ; 10042B08
v_max_f32_e32 v1, 0, v1 ; 20020280
v_mul_f32_e32 v8, v15, v1 ; 1010030F
v_mac_f32_e32 v2, v6, v8 ; 3E041106
v_mac_f32_e32 v5, v0, v22 ; 3E0A2D00
v_mac_f32_e32 v7, v0, v16 ; 3E0E2100
v_mul_f32_e32 v0, v22, v1 ; 10000316
v_mul_f32_e32 v1, v16, v1 ; 10020310
v_mul_f32_e32 v5, v5, v14 ; 100A1D05
v_mul_f32_e32 v6, v7, v13 ; 100C1B07
v_mac_f32_e32 v5, v4, v0 ; 3E0A0104
v_mac_f32_e32 v6, v3, v1 ; 3E0C0303
v_mac_f32_e32 v2, v26, v9 ; 3E04131A
v_mac_f32_e32 v5, v27, v18 ; 3E0A251B
v_mac_f32_e32 v6, v25, v17 ; 3E0C2319
v_add_f32_e64 v0, 0, v20 clamp ; D2060800 00022880
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v3, s6, v1 ; 10060206
v_mac_f32_e32 v3, v2, v0 ; 3E060102
v_mul_f32_e32 v2, s5, v1 ; 10040205
v_mac_f32_e32 v2, v5, v0 ; 3E040105
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v6, v0 ; 3E020106
v_cvt_pkrtz_f16_f32_e32 v0, v3, v2 ; 5E000503
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 56
VGPRS: 40
Code Size: 2264 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL OUT[7], GENERIC[6]
DCL CONST[0..19]
DCL TEMP[0..9], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[5], IN[0].xxxx
1: MAD TEMP[0], CONST[6], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[7], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[8], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[16], IN[0].xxxx
5: MAD TEMP[1], CONST[17], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[18], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[19], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[13].xyyy, CONST[13].zwww
9: FSEQ TEMP[3].x, CONST[15].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[14].xyyy, CONST[14].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[9].xxxx
18: MOV TEMP[3].y, CONST[10].xxxx
19: MOV TEMP[3].z, CONST[11].xxxx
20: MOV TEMP[4].x, CONST[9].yyyy
21: MOV TEMP[4].y, CONST[10].yyyy
22: MOV TEMP[4].z, CONST[11].yyyy
23: MOV TEMP[5].x, CONST[9].zzzz
24: MOV TEMP[5].y, CONST[10].zzzz
25: MOV TEMP[5].z, CONST[11].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[5].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[6].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[6].xyz, TEMP[3].xyzx
44: MUL TEMP[7], TEMP[3].xyzz, TEMP[3].yzzx
45: DP4 TEMP[8].x, CONST[1], TEMP[7]
46: DP4 TEMP[9].x, CONST[2], TEMP[7]
47: MOV TEMP[8].y, TEMP[9].xxxx
48: DP4 TEMP[7].x, CONST[3], TEMP[7]
49: MOV TEMP[8].z, TEMP[7].xxxx
50: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy
51: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx
52: MAD TEMP[3].xyz, CONST[4].xyzz, TEMP[3].xxxx, TEMP[8].xyzz
53: ADD TEMP[7].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[7].yzw, TEMP[7].yxyz
55: MOV TEMP[7].x, TEMP[1].zzzz
56: MOV TEMP[0].xyz, TEMP[0].xyzx
57: MOV OUT[7], TEMP[0]
58: MOV OUT[1], TEMP[2]
59: MOV OUT[3], TEMP[5]
60: MOV OUT[2], TEMP[4]
61: MOV OUT[4], TEMP[6]
62: MOV OUT[5], TEMP[3]
63: MOV OUT[0], TEMP[1]
64: MOV OUT[6], TEMP[7]
65: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 268)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%80 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = add i32 %5, %7
%83 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %81, i32 0, i32 %82)
%84 = extractelement <4 x float> %83, i32 0
%85 = extractelement <4 x float> %83, i32 1
%86 = extractelement <4 x float> %83, i32 2
%87 = extractelement <4 x float> %83, i32 3
%88 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%89 = load <16 x i8>, <16 x i8> addrspace(2)* %88, align 16, !tbaa !0
%90 = add i32 %5, %7
%91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90)
%92 = extractelement <4 x float> %91, i32 0
%93 = extractelement <4 x float> %91, i32 1
%94 = extractelement <4 x float> %91, i32 2
%95 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%96 = load <16 x i8>, <16 x i8> addrspace(2)* %95, align 16, !tbaa !0
%97 = add i32 %5, %7
%98 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %96, i32 0, i32 %97)
%99 = extractelement <4 x float> %98, i32 0
%100 = extractelement <4 x float> %98, i32 1
%101 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%102 = load <16 x i8>, <16 x i8> addrspace(2)* %101, align 16, !tbaa !0
%103 = add i32 %5, %7
%104 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %102, i32 0, i32 %103)
%105 = extractelement <4 x float> %104, i32 0
%106 = extractelement <4 x float> %104, i32 1
%107 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%108 = load <16 x i8>, <16 x i8> addrspace(2)* %107, align 16, !tbaa !0
%109 = add i32 %5, %7
%110 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %108, i32 0, i32 %109)
%111 = extractelement <4 x float> %110, i32 0
%112 = extractelement <4 x float> %110, i32 1
%113 = extractelement <4 x float> %110, i32 2
%114 = extractelement <4 x float> %110, i32 3
%115 = fmul float %31, %84
%116 = fmul float %32, %84
%117 = fmul float %33, %84
%118 = fmul float %34, %84
%119 = fmul float %35, %85
%120 = fadd float %119, %115
%121 = fmul float %36, %85
%122 = fadd float %121, %116
%123 = fmul float %37, %85
%124 = fadd float %123, %117
%125 = fmul float %38, %85
%126 = fadd float %125, %118
%127 = fmul float %39, %86
%128 = fadd float %127, %120
%129 = fmul float %40, %86
%130 = fadd float %129, %122
%131 = fmul float %41, %86
%132 = fadd float %131, %124
%133 = fmul float %42, %86
%134 = fadd float %133, %126
%135 = fmul float %43, %87
%136 = fadd float %135, %128
%137 = fmul float %44, %87
%138 = fadd float %137, %130
%139 = fmul float %45, %87
%140 = fadd float %139, %132
%141 = fmul float %64, %84
%142 = fmul float %65, %84
%143 = fmul float %66, %84
%144 = fmul float %67, %84
%145 = fmul float %68, %85
%146 = fadd float %145, %141
%147 = fmul float %69, %85
%148 = fadd float %147, %142
%149 = fmul float %70, %85
%150 = fadd float %149, %143
%151 = fmul float %71, %85
%152 = fadd float %151, %144
%153 = fmul float %72, %86
%154 = fadd float %153, %146
%155 = fmul float %73, %86
%156 = fadd float %155, %148
%157 = fmul float %74, %86
%158 = fadd float %157, %150
%159 = fmul float %75, %86
%160 = fadd float %159, %152
%161 = fmul float %76, %87
%162 = fadd float %161, %154
%163 = fmul float %77, %87
%164 = fadd float %163, %156
%165 = fmul float %78, %87
%166 = fadd float %165, %158
%167 = fmul float %79, %87
%168 = fadd float %167, %160
%169 = fmul float %99, %55
%170 = fadd float %169, %57
%171 = fmul float %100, %56
%172 = fadd float %171, %58
%173 = fcmp oeq float %63, 0.000000e+00
%. = select i1 %173, float %99, float %105
%.40 = select i1 %173, float %100, float %106
%174 = fmul float %., %59
%175 = fadd float %174, %61
%176 = fmul float %.40, %60
%177 = fadd float %176, %62
%178 = fmul float %46, %92
%179 = fmul float %49, %92
%180 = fmul float %52, %92
%181 = fmul float %47, %93
%182 = fadd float %181, %178
%183 = fmul float %50, %93
%184 = fadd float %183, %179
%185 = fmul float %53, %93
%186 = fadd float %185, %180
%187 = fmul float %48, %94
%188 = fadd float %187, %182
%189 = fmul float %51, %94
%190 = fadd float %189, %184
%191 = fmul float %54, %94
%192 = fadd float %191, %186
%193 = fmul float %188, %188
%194 = fmul float %190, %190
%195 = fadd float %194, %193
%196 = fmul float %192, %192
%197 = fadd float %195, %196
%198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197)
%199 = fmul float %188, %198
%200 = fmul float %190, %198
%201 = fmul float %192, %198
%202 = fmul float %31, %111
%203 = fmul float %32, %111
%204 = fmul float %33, %111
%205 = fmul float %35, %112
%206 = fadd float %205, %202
%207 = fmul float %36, %112
%208 = fadd float %207, %203
%209 = fmul float %37, %112
%210 = fadd float %209, %204
%211 = fmul float %39, %113
%212 = fadd float %211, %206
%213 = fmul float %40, %113
%214 = fadd float %213, %208
%215 = fmul float %41, %113
%216 = fadd float %215, %210
%217 = fmul float %212, %212
%218 = fmul float %214, %214
%219 = fadd float %218, %217
%220 = fmul float %216, %216
%221 = fadd float %219, %220
%222 = call float @llvm.AMDGPU.rsq.clamped.f32(float %221)
%223 = fmul float %212, %222
%224 = fmul float %214, %222
%225 = fmul float %216, %222
%226 = fmul float %201, %224
%227 = fmul float %199, %225
%228 = fmul float %200, %223
%229 = fmul float %200, %225
%230 = fsub float %229, %226
%231 = fmul float %201, %223
%232 = fsub float %231, %227
%233 = fmul float %199, %224
%234 = fsub float %233, %228
%235 = fmul float %230, %114
%236 = fmul float %232, %114
%237 = fmul float %234, %114
%238 = fmul float %199, %200
%239 = fmul float %200, %201
%240 = fmul float %201, %201
%241 = fmul float %201, %199
%242 = fmul float %16, %238
%243 = fmul float %17, %239
%244 = fadd float %242, %243
%245 = fmul float %18, %240
%246 = fadd float %244, %245
%247 = fmul float %19, %241
%248 = fadd float %246, %247
%249 = fmul float %20, %238
%250 = fmul float %21, %239
%251 = fadd float %249, %250
%252 = fmul float %22, %240
%253 = fadd float %251, %252
%254 = fmul float %23, %241
%255 = fadd float %253, %254
%256 = fmul float %24, %238
%257 = fmul float %25, %239
%258 = fadd float %256, %257
%259 = fmul float %26, %240
%260 = fadd float %258, %259
%261 = fmul float %27, %241
%262 = fadd float %260, %261
%263 = fmul float %200, %200
%264 = fmul float %199, %199
%265 = fsub float %264, %263
%266 = fmul float %28, %265
%267 = fadd float %266, %248
%268 = fmul float %29, %265
%269 = fadd float %268, %255
%270 = fmul float %30, %265
%271 = fadd float %270, %262
%272 = fsub float %136, %13
%273 = fsub float %138, %14
%274 = fsub float %140, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %170, float %172, float %175, float %177)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %223, float %224, float %225, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %235, float %236, float %237, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %199, float %200, float %201, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %267, float %269, float %271, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %166, float %272, float %273, float %274)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %136, float %138, float %140, float %134)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s27, s[28:31], 0x20 ; C20D9D20
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00
s_buffer_load_dword s32, s[28:31], 0x21 ; C2101D21
s_buffer_load_dword s33, s[28:31], 0x22 ; C2109D22
s_buffer_load_dword s34, s[28:31], 0x24 ; C2111D24
s_buffer_load_dword s35, s[28:31], 0x25 ; C2119D25
s_buffer_load_dword s1, s[28:31], 0x10 ; C2009D10
s_buffer_load_dword s2, s[28:31], 0x11 ; C2011D11
s_buffer_load_dword s0, s[28:31], 0x12 ; C2001D12
s_buffer_load_dword s18, s[28:31], 0x14 ; C2091D14
s_buffer_load_dword s17, s[28:31], 0x15 ; C2089D15
s_buffer_load_dword s36, s[28:31], 0x26 ; C2121D26
s_buffer_load_dword s37, s[28:31], 0x28 ; C2129D28
s_buffer_load_dword s38, s[28:31], 0x29 ; C2131D29
s_buffer_load_dword s39, s[28:31], 0x2a ; C2139D2A
s_buffer_load_dword s40, s[28:31], 0x2c ; C2141D2C
s_buffer_load_dword s20, s[28:31], 0x16 ; C20A1D16
s_buffer_load_dword s41, s[28:31], 0x17 ; C2149D17
s_buffer_load_dword s26, s[28:31], 0x18 ; C20D1D18
s_buffer_load_dword s25, s[28:31], 0x19 ; C20C9D19
s_buffer_load_dword s21, s[28:31], 0x1a ; C20A9D1A
s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B
s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C
s_buffer_load_dword s24, s[28:31], 0x1d ; C20C1D1D
s_buffer_load_dword s22, s[28:31], 0x1e ; C20B1D1E
s_buffer_load_dword s43, s[28:31], 0x1f ; C2159D1F
s_buffer_load_dword s44, s[28:31], 0x2d ; C2161D2D
s_buffer_load_dword s45, s[28:31], 0x2e ; C2169D2E
s_buffer_load_dword s46, s[28:31], 0x34 ; C2171D34
s_buffer_load_dword s47, s[28:31], 0x35 ; C2179D35
s_buffer_load_dword s3, s[28:31], 0x36 ; C2019D36
s_buffer_load_dword s4, s[28:31], 0x3c ; C2021D3C
s_buffer_load_dword s48, s[28:31], 0x40 ; C2181D40
s_buffer_load_dword s49, s[28:31], 0x41 ; C2189D41
s_buffer_load_dword s50, s[28:31], 0x42 ; C2191D42
s_buffer_load_dword s51, s[28:31], 0x43 ; C2199D43
s_buffer_load_dword s6, s[28:31], 0x37 ; C2031D37
s_buffer_load_dword s52, s[28:31], 0x38 ; C21A1D38
s_buffer_load_dword s53, s[28:31], 0x39 ; C21A9D39
s_buffer_load_dword s8, s[28:31], 0x3a ; C2041D3A
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s3 ; 7E000203
s_buffer_load_dword s10, s[28:31], 0x3b ; C2051D3B
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
s_buffer_load_dword s3, s[28:31], 0x0 ; C2019D00
s_buffer_load_dword s4, s[28:31], 0x1 ; C2021D01
s_buffer_load_dword s5, s[28:31], 0x2 ; C2029D02
s_buffer_load_dword s9, s[28:31], 0x4 ; C2049D04
v_mov_b32_e32 v17, s6 ; 7E220206
s_buffer_load_dword s14, s[28:31], 0x5 ; C2071D05
s_buffer_load_dword s7, s[28:31], 0x6 ; C2039D06
s_buffer_load_dword s6, s[28:31], 0x7 ; C2031D07
v_mov_b32_e32 v18, s8 ; 7E240208
s_buffer_load_dword s12, s[28:31], 0x8 ; C2061D08
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v19, s10 ; 7E26020A
s_buffer_load_dword s16, s[28:31], 0x9 ; C2081D09
s_buffer_load_dword s10, s[28:31], 0xa ; C2051D0A
s_buffer_load_dword s8, s[28:31], 0xb ; C2041D0B
s_buffer_load_dword s15, s[28:31], 0xc ; C2079D0C
s_buffer_load_dword s19, s[28:31], 0xd ; C2099D0D
s_buffer_load_dword s13, s[28:31], 0xe ; C2069D0E
s_buffer_load_dword s11, s[28:31], 0xf ; C2059D0F
s_buffer_load_dword s54, s[28:31], 0x44 ; C21B1D44
s_buffer_load_dword s55, s[28:31], 0x45 ; C21B9D45
s_buffer_load_dword s56, s[28:31], 0x46 ; C21C1D46
s_buffer_load_dword s57, s[28:31], 0x47 ; C21C9D47
s_buffer_load_dword s58, s[28:31], 0x48 ; C21D1D48
s_buffer_load_dword s59, s[28:31], 0x49 ; C21D9D49
s_buffer_load_dword s60, s[28:31], 0x4a ; C21E1D4A
s_buffer_load_dword s61, s[28:31], 0x4b ; C21E9D4B
s_buffer_load_dword s62, s[28:31], 0x4c ; C21F1D4C
s_buffer_load_dword s63, s[28:31], 0x4d ; C21F9D4D
s_buffer_load_dword s64, s[28:31], 0x4e ; C2201D4E
s_buffer_load_dword s28, s[28:31], 0x4f ; C20E1D4F
v_mul_f32_e32 v20, s41, v2 ; 10280429
v_mac_f32_e32 v20, s42, v3 ; 3E28062A
v_mac_f32_e32 v20, s43, v4 ; 3E28082B
v_mac_f32_e32 v0, s46, v9 ; 3E00122E
v_mac_f32_e32 v17, s47, v10 ; 3E22142F
v_mul_f32_e32 v21, s48, v2 ; 102A0430
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s54, v3 ; 3E2A0636
v_mac_f32_e32 v21, s58, v4 ; 3E2A083A
v_mac_f32_e32 v21, s62, v5 ; 3E2A0A3E
v_mul_f32_e32 v22, s49, v2 ; 102C0431
v_mac_f32_e32 v22, s55, v3 ; 3E2C0637
v_mac_f32_e32 v22, s59, v4 ; 3E2C083B
v_mac_f32_e32 v22, s63, v5 ; 3E2C0A3F
v_mul_f32_e32 v23, s50, v2 ; 102E0432
v_mac_f32_e32 v23, s56, v3 ; 3E2E0638
v_mac_f32_e32 v23, s60, v4 ; 3E2E083C
v_mac_f32_e32 v23, s64, v5 ; 3E2E0A40
v_mul_f32_e32 v24, s51, v2 ; 10300433
v_mac_f32_e32 v24, s57, v3 ; 3E300639
v_mac_f32_e32 v24, s61, v4 ; 3E30083D
v_mac_f32_e32 v24, s28, v5 ; 3E300A1C
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mul_f32_e32 v11, s34, v6 ; 10160C22
v_mac_f32_e32 v11, s35, v7 ; 3E160E23
v_mul_f32_e32 v12, s37, v6 ; 10180C25
v_mac_f32_e32 v12, s38, v7 ; 3E180E26
v_mul_f32_e32 v6, s40, v6 ; 100C0C28
v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C
v_mac_f32_e32 v11, s36, v8 ; 3E161024
v_mac_f32_e32 v12, s39, v8 ; 3E181027
v_mac_f32_e32 v6, s45, v8 ; 3E0C102D
v_mul_f32_e32 v7, s18, v2 ; 100E0412
v_mac_f32_e32 v7, s26, v3 ; 3E0E061A
v_mac_f32_e32 v7, s23, v4 ; 3E0E0817
v_mac_f32_e32 v7, s27, v5 ; 3E0E0A1B
v_mul_f32_e32 v8, s17, v2 ; 10100411
v_mac_f32_e32 v8, s25, v3 ; 3E100619
v_mac_f32_e32 v8, s24, v4 ; 3E100818
v_mac_f32_e32 v8, s32, v5 ; 3E100A20
v_mul_f32_e32 v2, s20, v2 ; 10040414
v_mac_f32_e32 v2, s21, v3 ; 3E040615
v_mac_f32_e32 v2, s22, v4 ; 3E040816
v_mac_f32_e32 v2, s33, v5 ; 3E040A21
v_mac_f32_e32 v18, s52, v9 ; 3E241234
v_mac_f32_e32 v19, s53, v10 ; 3E261435
exp 15, 32, 0, 0, 0, v0, v17, v18, v19 ; F800020F 13121100
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s18, v13 ; 10001A12
v_mac_f32_e32 v0, s26, v14 ; 3E001C1A
v_mul_f32_e32 v3, s17, v13 ; 10061A11
v_mac_f32_e32 v3, s25, v14 ; 3E061C19
v_mul_f32_e32 v4, s20, v13 ; 10081A14
v_mac_f32_e32 v4, s21, v14 ; 3E081C15
v_mac_f32_e32 v0, s23, v15 ; 3E001E17
v_mac_f32_e32 v3, s24, v15 ; 3E061E18
v_mac_f32_e32 v4, s22, v15 ; 3E081E16
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mac_f32_e32 v5, v12, v12 ; 3E0A190C
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v3, v3 ; 3E120703
v_mac_f32_e32 v9, v4, v4 ; 3E120904
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v10, v5, v11 ; 10141705
v_mul_f32_e32 v11, v5, v12 ; 10161905
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_mul_f32_e32 v3, v9, v3 ; 10060709
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v6, v3, v5 ; 100C0B03
v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B
v_mul_f32_e32 v9, v4, v10 ; 10121504
v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105
v_mul_f32_e32 v12, v0, v11 ; 10181700
v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A
v_mul_f32_e32 v6, v16, v6 ; 100C0D10
v_mul_f32_e32 v9, v16, v9 ; 10121310
v_mul_f32_e32 v12, v16, v12 ; 10181910
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v5, v11 ; 10001705
v_mul_f32_e32 v3, s14, v0 ; 1006000E
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mul_f32_e32 v0, s19, v0 ; 10000013
v_mul_f32_e32 v6, v11, v10 ; 100C150B
v_mac_f32_e32 v3, s9, v6 ; 3E060C09
v_mac_f32_e32 v4, s12, v6 ; 3E080C0C
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
v_mul_f32_e32 v6, v5, v5 ; 100C0B05
v_mac_f32_e32 v3, s7, v6 ; 3E060C07
v_mac_f32_e32 v4, s10, v6 ; 3E080C0A
v_mac_f32_e32 v0, s13, v6 ; 3E000C0D
v_mul_f32_e32 v6, v10, v5 ; 100C0B0A
v_mac_f32_e32 v3, s6, v6 ; 3E060C06
v_mac_f32_e32 v4, s8, v6 ; 3E080C08
v_mac_f32_e32 v0, s11, v6 ; 3E000C0B
v_mul_f32_e32 v6, v11, v11 ; 100C170B
v_mad_f32 v6, v10, v10, -v6 ; D2820006 841A150A
v_mac_f32_e32 v3, s1, v6 ; 3E060C01
v_mac_f32_e32 v4, s2, v6 ; 3E080C02
v_mac_f32_e32 v0, s0, v6 ; 3E000C00
v_subrev_f32_e32 v6, s3, v7 ; 0A0C0E03
v_subrev_f32_e32 v9, s4, v8 ; 0A121004
v_subrev_f32_e32 v12, s5, v2 ; 0A180405
exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A
exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403
exp 15, 37, 0, 0, 0, v23, v6, v9, v12 ; F800025F 0C090617
exp 15, 38, 0, 0, 0, v7, v8, v2, v20 ; F800026F 14020807
exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 28
Code Size: 892 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL IN[6], GENERIC[6], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..20]
DCL CONST[23..25]
DCL CONST[27]
DCL TEMP[0..17], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000}
IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000}
IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3], TEMP[3], SAMP[2], 2D
11: MUL TEMP[4].x, TEMP[3].wwww, CONST[19].wwww
12: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[20].xxxx
13: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx
14: KILL_IF -TEMP[4].xxxx
15: MOV TEMP[4].xy, IN[0].xyyy
16: TEX TEMP[4].yw, TEMP[4], SAMP[3], 2D
17: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[0].yyyy, IMM[0].zzzz
18: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[23].xxxx
19: DP2 TEMP[5].x, TEMP[4].xyyy, TEMP[4].xyyy
20: MOV_SAT TEMP[5].x, TEMP[5].xxxx
21: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx
22: SQRT TEMP[5].x, TEMP[5].xxxx
23: MOV TEMP[4].z, TEMP[5].xxxx
24: DP3 TEMP[0].x, TEMP[4].xyzz, TEMP[0].xyzz
25: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[1].xyzz
26: MOV TEMP[0].y, TEMP[1].xxxx
27: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[2].xyzz
28: MOV TEMP[0].z, TEMP[1].xxxx
29: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
30: RSQ TEMP[1].x, TEMP[1].xxxx
31: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
32: DP3 TEMP[1].x, IN[5].yzww, IN[5].yzww
33: RSQ TEMP[1].x, TEMP[1].xxxx
34: MUL TEMP[1].xyz, IN[5].yzww, TEMP[1].xxxx
35: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[3].xyzz
36: LRP TEMP[3].xyz, CONST[24].xxxx, TEMP[2].xyzz, CONST[16].xyzz
37: MUL TEMP[4].x, CONST[24].xxxx, CONST[16].wwww
38: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
39: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
40: MOV TEMP[5].xy, IN[0].xyyy
41: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D
42: ADD TEMP[6].x, IMM[0].xxxx, -CONST[27].xxxx
43: MAD TEMP[5].x, TEMP[5].yyyy, CONST[27].xxxx, TEMP[6].xxxx
44: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
45: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
46: MOV TEMP[7].xyz, IMM[0].wwww
47: MOV TEMP[8].w, IMM[0].xxxx
48: MOV TEMP[8].xyz, TEMP[0].xyzx
49: DP4 TEMP[9].x, CONST[1], TEMP[8]
50: DP4 TEMP[10].x, CONST[2], TEMP[8]
51: MOV TEMP[9].y, TEMP[10].xxxx
52: DP4 TEMP[8].x, CONST[3], TEMP[8]
53: MOV TEMP[9].z, TEMP[8].xxxx
54: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz
55: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
56: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[1].xyzz
57: MUL TEMP[9].xyz, TEMP[9].xxxx, TEMP[0].xyzz
58: MUL TEMP[9].xyz, IMM[0].yyyy, TEMP[9].xyzz
59: ADD TEMP[9].xyz, TEMP[1].xyzz, -TEMP[9].xyzz
60: MOV TEMP[10].xyz, TEMP[9].xyzx
61: FSLT TEMP[11].x, IMM[0].wwww, CONST[10].wwww
62: UIF TEMP[11].xxxx :0
63: DP3 TEMP[11].x, TEMP[9].xyzz, TEMP[9].xyzz
64: RSQ TEMP[11].x, TEMP[11].xxxx
65: MUL TEMP[11].xyz, TEMP[9].xyzz, TEMP[11].xxxx
66: MOV TEMP[12].xyz, -IN[6].xyzx
67: ADD TEMP[13].xyz, CONST[8].xyzz, TEMP[12].xyzz
68: RCP TEMP[14].x, TEMP[11].xxxx
69: RCP TEMP[14].y, TEMP[11].yyyy
70: RCP TEMP[14].z, TEMP[11].zzzz
71: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
72: ADD TEMP[12].xyz, CONST[9].xyzz, TEMP[12].xyzz
73: RCP TEMP[14].x, TEMP[11].xxxx
74: RCP TEMP[14].y, TEMP[11].yyyy
75: RCP TEMP[14].z, TEMP[11].zzzz
76: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
77: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[11].xyzz
78: UIF TEMP[14].xxxx :0
79: MOV TEMP[15].x, TEMP[13].xxxx
80: ELSE :0
81: MOV TEMP[15].x, TEMP[12].xxxx
82: ENDIF
83: UIF TEMP[14].yyyy :0
84: MOV TEMP[16].x, TEMP[13].yyyy
85: ELSE :0
86: MOV TEMP[16].x, TEMP[12].yyyy
87: ENDIF
88: UIF TEMP[14].zzzz :0
89: MOV TEMP[13].x, TEMP[13].zzzz
90: ELSE :0
91: MOV TEMP[13].x, TEMP[12].zzzz
92: ENDIF
93: ADD TEMP[12].xyz, CONST[8].xyzz, CONST[9].xyzz
94: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx
95: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
96: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
97: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[10].xyzz
98: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz
99: MAD TEMP[11].xyz, TEMP[11].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
100: ADD TEMP[10].xyz, TEMP[11].xyzz, -TEMP[12].xyzz
101: ENDIF
102: ADD TEMP[11].x, IMM[0].xxxx, -CONST[25].xxxx
103: POW TEMP[11].x, TEMP[11].xxxx, IMM[1].yyyy
104: MUL TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz
105: MOV TEMP[10].xyz, TEMP[10].xyzz
106: MOV TEMP[10].w, TEMP[11].xxxx
107: TXL TEMP[10], TEMP[10], SAMP[0], CUBE
108: POW TEMP[11].x, TEMP[10].wwww, CONST[11].yyyy
109: MUL TEMP[11].x, CONST[11].xxxx, TEMP[11].xxxx
110: MUL TEMP[10].xyz, TEMP[11].xxxx, TEMP[10].xyzz
111: FSLT TEMP[11].x, CONST[9].wwww, IMM[1].wwww
112: UIF TEMP[11].xxxx :0
113: MOV TEMP[11].xyz, TEMP[9].xyzx
114: FSLT TEMP[12].x, IMM[0].wwww, CONST[14].wwww
115: UIF TEMP[12].xxxx :0
116: DP3 TEMP[12].x, TEMP[9].xyzz, TEMP[9].xyzz
117: RSQ TEMP[12].x, TEMP[12].xxxx
118: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[12].xxxx
119: MOV TEMP[12].xyz, -IN[6].xyzx
120: ADD TEMP[13].xyz, CONST[12].xyzz, TEMP[12].xyzz
121: RCP TEMP[14].x, TEMP[9].xxxx
122: RCP TEMP[14].y, TEMP[9].yyyy
123: RCP TEMP[14].z, TEMP[9].zzzz
124: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[14].xyzz
125: ADD TEMP[12].xyz, CONST[13].xyzz, TEMP[12].xyzz
126: RCP TEMP[14].x, TEMP[9].xxxx
127: RCP TEMP[14].y, TEMP[9].yyyy
128: RCP TEMP[14].z, TEMP[9].zzzz
129: MUL TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xyzz
130: FSLT TEMP[14].xyz, IMM[0].wwww, TEMP[9].xyzz
131: UIF TEMP[14].xxxx :0
132: MOV TEMP[15].x, TEMP[13].xxxx
133: ELSE :0
134: MOV TEMP[15].x, TEMP[12].xxxx
135: ENDIF
136: UIF TEMP[14].yyyy :0
137: MOV TEMP[16].x, TEMP[13].yyyy
138: ELSE :0
139: MOV TEMP[16].x, TEMP[12].yyyy
140: ENDIF
141: UIF TEMP[14].zzzz :0
142: MOV TEMP[13].x, TEMP[13].zzzz
143: ELSE :0
144: MOV TEMP[13].x, TEMP[12].zzzz
145: ENDIF
146: ADD TEMP[12].xyz, CONST[12].xyzz, CONST[13].xyzz
147: MUL TEMP[12].xyz, TEMP[12].xyzz, IMM[1].xxxx
148: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[16].xxxx
149: MIN TEMP[13].x, TEMP[14].xxxx, TEMP[13].xxxx
150: ADD TEMP[14].xyz, TEMP[12].xyzz, -CONST[14].xyzz
151: ADD TEMP[14].xyz, TEMP[14].xyzz, IN[6].xyzz
152: MAD TEMP[9].xyz, TEMP[9].xyzz, TEMP[13].xxxx, TEMP[14].xyzz
153: ADD TEMP[11].xyz, TEMP[9].xyzz, -TEMP[12].xyzz
154: ENDIF
155: ADD TEMP[9].x, IMM[0].xxxx, -CONST[25].xxxx
156: POW TEMP[9].x, TEMP[9].xxxx, IMM[1].yyyy
157: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz
158: MOV TEMP[11].xyz, TEMP[11].xyzz
159: MOV TEMP[11].w, TEMP[9].xxxx
160: TXL TEMP[9], TEMP[11], SAMP[1], CUBE
161: POW TEMP[11].x, TEMP[9].wwww, CONST[15].yyyy
162: MUL TEMP[11].x, CONST[15].xxxx, TEMP[11].xxxx
163: MUL TEMP[9].xyz, TEMP[11].xxxx, TEMP[9].xyzz
164: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[10].xyzz, TEMP[9].xyzz
165: ELSE :0
166: MOV TEMP[7].xyz, TEMP[10].xyzx
167: ENDIF
168: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
169: MOV TEMP[1].xyz, -TEMP[1].xyzx
170: ADD TEMP[5].x, IMM[0].xxxx, -CONST[25].xxxx
171: ADD TEMP[9].xyz, CONST[0].xyzz, TEMP[1].xyzz
172: DP3 TEMP[10].x, TEMP[9].xyzz, TEMP[9].xyzz
173: RSQ TEMP[10].x, TEMP[10].xxxx
174: MUL TEMP[9].xyz, TEMP[9].xyzz, TEMP[10].xxxx
175: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
176: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
177: DP3 TEMP[10].x, CONST[0].xyzz, TEMP[9].xyzz
178: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx
179: MUL TEMP[11].x, TEMP[5].xxxx, TEMP[5].xxxx
180: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].wwww
181: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[5].xxxx
182: MAD TEMP[12].x, TEMP[12].xxxx, IMM[2].yyyy, IMM[2].zzzz
183: LG2 TEMP[12].x, TEMP[12].xxxx
184: RCP TEMP[12].x, TEMP[12].xxxx
185: MUL TEMP[12].x, IMM[2].xxxx, TEMP[12].xxxx
186: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx
187: ADD TEMP[13].x, IMM[0].xxxx, -TEMP[6].xxxx
188: ADD TEMP[14].x, IMM[0].xxxx, -TEMP[1].xxxx
189: MUL TEMP[15].x, IMM[0].yyyy, TEMP[10].xxxx
190: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
191: MAD TEMP[5].x, TEMP[15].xxxx, TEMP[5].xxxx, IMM[1].xxxx
192: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[10].xxxx
193: ADD TEMP[15].x, IMM[0].xxxx, -TEMP[1].xxxx
194: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
195: ADD TEMP[4].x, CONST[25].xxxx, TEMP[4].xxxx
196: MOV_SAT TEMP[4].x, TEMP[4].xxxx
197: MUL TEMP[16].x, TEMP[15].xxxx, TEMP[15].xxxx
198: MUL TEMP[17].x, TEMP[15].xxxx, TEMP[15].xxxx
199: MUL TEMP[15].x, TEMP[17].xxxx, TEMP[15].xxxx
200: MUL TEMP[15].x, TEMP[16].xxxx, TEMP[15].xxxx
201: LRP TEMP[4].xyz, TEMP[15].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
202: LRP TEMP[15].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[11].xxxx
203: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx, TEMP[11].xxxx
204: MAD TEMP[1].x, TEMP[15].xxxx, TEMP[1].xxxx, IMM[2].wwww
205: RCP TEMP[1].x, TEMP[1].xxxx
206: DP3 TEMP[9].x, TEMP[0].xyzz, TEMP[9].xyzz
207: MAX TEMP[9].x, IMM[0].wwww, TEMP[9].xxxx
208: POW TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
209: ADD TEMP[11].x, TEMP[12].xxxx, IMM[0].xxxx
210: MUL TEMP[11].x, TEMP[11].xxxx, CONST[18].yyyy
211: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
212: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[9].xxxx
213: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
214: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
215: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
216: MUL TEMP[1].xyz, TEMP[1].xxxx, CONST[17].xyzz
217: ADD TEMP[9].xyz, IMM[0].xxxx, -TEMP[3].xyzz
218: MUL TEMP[11].x, TEMP[10].xxxx, TEMP[10].xxxx
219: MUL TEMP[12].x, TEMP[10].xxxx, TEMP[10].xxxx
220: MUL TEMP[10].x, TEMP[12].xxxx, TEMP[10].xxxx
221: MUL TEMP[10].x, TEMP[11].xxxx, TEMP[10].xxxx
222: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[10].xxxx, TEMP[3].xyzz
223: ADD TEMP[9].x, TEMP[5].xxxx, IMM[0].zzzz
224: MUL TEMP[10].x, TEMP[13].xxxx, TEMP[13].xxxx
225: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[13].xxxx
226: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[13].xxxx
227: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
228: MAD TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx, IMM[0].xxxx
229: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].zzzz
230: MUL TEMP[10].x, TEMP[14].xxxx, TEMP[14].xxxx
231: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
232: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[14].xxxx
233: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx
234: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[10].xxxx, IMM[0].xxxx
235: MUL TEMP[5].x, TEMP[9].xxxx, TEMP[5].xxxx
236: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
237: MAD TEMP[5].xyz, CONST[17].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
238: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
239: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
240: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
241: MOV TEMP[0].xyz, TEMP[0].xyzx
242: MAD TEMP[1].x, IN[5].xxxx, CONST[5].zzzz, CONST[5].wwww
243: MOV_SAT TEMP[1].x, TEMP[1].xxxx
244: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
245: MOV TEMP[0].xyz, TEMP[0].xyzx
246: MOV TEMP[0].w, IMM[0].xxxx
247: MOV OUT[0], TEMP[0]
248: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 316)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400)
%84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432)
%85 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%86 = load <32 x i8>, <32 x i8> addrspace(2)* %85, align 32, !tbaa !0
%87 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%88 = load <16 x i8>, <16 x i8> addrspace(2)* %87, align 16, !tbaa !0
%89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%90 = bitcast <8 x i32> addrspace(2)* %89 to <32 x i8> addrspace(2)*
%91 = load <32 x i8>, <32 x i8> addrspace(2)* %90, align 32, !tbaa !0
%92 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%93 = bitcast <4 x i32> addrspace(2)* %92 to <16 x i8> addrspace(2)*
%94 = load <16 x i8>, <16 x i8> addrspace(2)* %93, align 16, !tbaa !0
%95 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%96 = bitcast <8 x i32> addrspace(2)* %95 to <32 x i8> addrspace(2)*
%97 = load <32 x i8>, <32 x i8> addrspace(2)* %96, align 32, !tbaa !0
%98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%99 = bitcast <4 x i32> addrspace(2)* %98 to <16 x i8> addrspace(2)*
%100 = load <16 x i8>, <16 x i8> addrspace(2)* %99, align 16, !tbaa !0
%101 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%102 = bitcast <8 x i32> addrspace(2)* %101 to <32 x i8> addrspace(2)*
%103 = load <32 x i8>, <32 x i8> addrspace(2)* %102, align 32, !tbaa !0
%104 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%105 = bitcast <4 x i32> addrspace(2)* %104 to <16 x i8> addrspace(2)*
%106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0
%107 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%108 = bitcast <8 x i32> addrspace(2)* %107 to <32 x i8> addrspace(2)*
%109 = load <32 x i8>, <32 x i8> addrspace(2)* %108, align 32, !tbaa !0
%110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%111 = bitcast <4 x i32> addrspace(2)* %110 to <16 x i8> addrspace(2)*
%112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0
%113 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7)
%132 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7)
%133 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7)
%134 = bitcast float %113 to i32
%135 = bitcast float %114 to i32
%136 = insertelement <2 x i32> undef, i32 %134, i32 0
%137 = insertelement <2 x i32> %136, i32 %135, i32 1
%138 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %137, <32 x i8> %97, <16 x i8> %100, i32 2)
%139 = extractelement <4 x float> %138, i32 0
%140 = extractelement <4 x float> %138, i32 1
%141 = extractelement <4 x float> %138, i32 2
%142 = extractelement <4 x float> %138, i32 3
%143 = fmul float %142, %79
%144 = fcmp olt float %143, %80
%145 = select i1 %144, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %145)
%146 = bitcast float %113 to i32
%147 = bitcast float %114 to i32
%148 = insertelement <2 x i32> undef, i32 %146, i32 0
%149 = insertelement <2 x i32> %148, i32 %147, i32 1
%150 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %149, <32 x i8> %103, <16 x i8> %106, i32 2)
%151 = extractelement <4 x float> %150, i32 1
%152 = extractelement <4 x float> %150, i32 3
%153 = fmul float %152, 2.000000e+00
%154 = fadd float %153, -1.000000e+00
%155 = fmul float %151, 2.000000e+00
%156 = fadd float %155, -1.000000e+00
%157 = fmul float %154, %81
%158 = fmul float %156, %81
%159 = fmul float %157, %157
%160 = fmul float %158, %158
%161 = fadd float %159, %160
%162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00)
%163 = fsub float 1.000000e+00, %162
%164 = call float @llvm.sqrt.f32(float %163)
%165 = fmul float %157, %115
%166 = fmul float %158, %118
%167 = fadd float %166, %165
%168 = fmul float %164, %121
%169 = fadd float %167, %168
%170 = fmul float %157, %116
%171 = fmul float %158, %119
%172 = fadd float %171, %170
%173 = fmul float %164, %122
%174 = fadd float %172, %173
%175 = fmul float %157, %117
%176 = fmul float %158, %120
%177 = fadd float %176, %175
%178 = fmul float %164, %123
%179 = fadd float %177, %178
%180 = fmul float %169, %169
%181 = fmul float %174, %174
%182 = fadd float %181, %180
%183 = fmul float %179, %179
%184 = fadd float %182, %183
%185 = call float @llvm.AMDGPU.rsq.clamped.f32(float %184)
%186 = fmul float %169, %185
%187 = fmul float %174, %185
%188 = fmul float %179, %185
%189 = fmul float %128, %128
%190 = fmul float %129, %129
%191 = fadd float %190, %189
%192 = fmul float %130, %130
%193 = fadd float %191, %192
%194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193)
%195 = fmul float %128, %194
%196 = fmul float %129, %194
%197 = fmul float %130, %194
%198 = fmul float %76, %139
%199 = fmul float %77, %140
%200 = fmul float %78, %141
%201 = call float @llvm.AMDGPU.lrp(float %82, float %198, float %66)
%202 = call float @llvm.AMDGPU.lrp(float %82, float %199, float %67)
%203 = call float @llvm.AMDGPU.lrp(float %82, float %200, float %68)
%204 = fmul float %82, %69
%205 = fsub float %69, %204
%206 = fmul float %198, %205
%207 = fmul float %199, %205
%208 = fmul float %200, %205
%209 = bitcast float %113 to i32
%210 = bitcast float %114 to i32
%211 = insertelement <2 x i32> undef, i32 %209, i32 0
%212 = insertelement <2 x i32> %211, i32 %210, i32 1
%213 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %212, <32 x i8> %109, <16 x i8> %112, i32 2)
%214 = extractelement <4 x float> %213, i32 1
%215 = fsub float 1.000000e+00, %84
%216 = fmul float %214, %84
%217 = fadd float %216, %215
%218 = fmul float %186, %24
%219 = fmul float %187, %25
%220 = fadd float %219, %218
%221 = fmul float %188, %26
%222 = fadd float %220, %221
%223 = call float @llvm.maxnum.f32(float %222, float 0.000000e+00)
%224 = fmul float %27, %186
%225 = fmul float %28, %187
%226 = fadd float %224, %225
%227 = fmul float %29, %188
%228 = fadd float %226, %227
%229 = fadd float %228, %30
%230 = fmul float %31, %186
%231 = fmul float %32, %187
%232 = fadd float %230, %231
%233 = fmul float %33, %188
%234 = fadd float %232, %233
%235 = fadd float %234, %34
%236 = fmul float %35, %186
%237 = fmul float %36, %187
%238 = fadd float %236, %237
%239 = fmul float %37, %188
%240 = fadd float %238, %239
%241 = fadd float %240, %38
%242 = fadd float %124, %229
%243 = fadd float %125, %235
%244 = fadd float %126, %241
%245 = fmul float %242, %217
%246 = fmul float %243, %217
%247 = fmul float %244, %217
%248 = fmul float %186, %195
%249 = fmul float %187, %196
%250 = fadd float %249, %248
%251 = fmul float %188, %197
%252 = fadd float %250, %251
%253 = fmul float %252, %186
%254 = fmul float %252, %187
%255 = fmul float %252, %188
%256 = fmul float %253, 2.000000e+00
%257 = fmul float %254, 2.000000e+00
%258 = fmul float %255, 2.000000e+00
%259 = fsub float %195, %256
%260 = fsub float %196, %257
%261 = fsub float %197, %258
%262 = fcmp ogt float %51, 0.000000e+00
br i1 %262, label %IF, label %ENDIF
IF: ; preds = %main_body
%263 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%264 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%265 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%266 = fmul float %259, %259
%267 = fmul float %260, %260
%268 = fadd float %267, %266
%269 = fmul float %261, %261
%270 = fadd float %268, %269
%271 = call float @llvm.AMDGPU.rsq.clamped.f32(float %270)
%272 = fmul float %259, %271
%273 = fmul float %260, %271
%274 = fmul float %261, %271
%275 = fsub float %44, %131
%276 = fsub float %45, %132
%277 = fsub float %46, %133
%278 = fdiv float 1.000000e+00, %272
%279 = fdiv float 1.000000e+00, %273
%280 = fdiv float 1.000000e+00, %274
%281 = fmul float %275, %278
%282 = fmul float %276, %279
%283 = fmul float %277, %280
%284 = fsub float %47, %131
%285 = fsub float %48, %132
%286 = fsub float %49, %133
%287 = fdiv float 1.000000e+00, %272
%288 = fdiv float 1.000000e+00, %273
%289 = fdiv float 1.000000e+00, %274
%290 = fmul float %284, %287
%291 = fmul float %285, %288
%292 = fmul float %286, %289
%293 = fcmp ogt float %272, 0.000000e+00
%294 = fcmp ogt float %273, 0.000000e+00
%295 = fcmp ogt float %274, 0.000000e+00
%. = select i1 %293, float %281, float %290
%temp64.0 = select i1 %294, float %282, float %291
%.96 = select i1 %295, float %283, float %292
%296 = fadd float %44, %47
%297 = fadd float %45, %48
%298 = fadd float %46, %49
%299 = fmul float %296, 5.000000e-01
%300 = fmul float %297, 5.000000e-01
%301 = fmul float %298, 5.000000e-01
%302 = call float @llvm.minnum.f32(float %., float %temp64.0)
%303 = call float @llvm.minnum.f32(float %302, float %.96)
%304 = fsub float %299, %265
%305 = fsub float %300, %264
%306 = fsub float %301, %263
%307 = fadd float %304, %131
%308 = fadd float %305, %132
%309 = fadd float %306, %133
%310 = fmul float %272, %303
%311 = fadd float %310, %307
%312 = fmul float %273, %303
%313 = fadd float %312, %308
%314 = fmul float %274, %303
%315 = fadd float %314, %309
%316 = fsub float %311, %299
%317 = fsub float %313, %300
%318 = fsub float %315, %301
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp40.0 = phi float [ %316, %IF ], [ %259, %main_body ]
%temp41.0 = phi float [ %317, %IF ], [ %260, %main_body ]
%temp42.0 = phi float [ %318, %IF ], [ %261, %main_body ]
%319 = fsub float 1.000000e+00, %83
%320 = call float @llvm.pow.f32(float %319, float 7.500000e-01)
%321 = fmul float %320, 7.000000e+00
%322 = insertelement <4 x float> undef, float %temp40.0, i32 0
%323 = insertelement <4 x float> %322, float %temp41.0, i32 1
%324 = insertelement <4 x float> %323, float %temp42.0, i32 2
%325 = insertelement <4 x float> %324, float %321, i32 3
%326 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %325)
%327 = extractelement <4 x float> %326, i32 0
%328 = extractelement <4 x float> %326, i32 1
%329 = extractelement <4 x float> %326, i32 2
%330 = extractelement <4 x float> %326, i32 3
%331 = call float @llvm.fabs.f32(float %329)
%332 = fdiv float 1.000000e+00, %331
%333 = fmul float %327, %332
%334 = fadd float %333, 1.500000e+00
%335 = fmul float %328, %332
%336 = fadd float %335, 1.500000e+00
%337 = bitcast float %336 to i32
%338 = bitcast float %334 to i32
%339 = bitcast float %330 to i32
%340 = bitcast float %321 to i32
%341 = insertelement <4 x i32> undef, i32 %337, i32 0
%342 = insertelement <4 x i32> %341, i32 %338, i32 1
%343 = insertelement <4 x i32> %342, i32 %339, i32 2
%344 = insertelement <4 x i32> %343, i32 %340, i32 3
%345 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %344, <32 x i8> %86, <16 x i8> %88, i32 4)
%346 = extractelement <4 x float> %345, i32 0
%347 = extractelement <4 x float> %345, i32 1
%348 = extractelement <4 x float> %345, i32 2
%349 = extractelement <4 x float> %345, i32 3
%350 = call float @llvm.pow.f32(float %349, float %53)
%351 = fmul float %52, %350
%352 = fmul float %351, %346
%353 = fmul float %351, %347
%354 = fmul float %351, %348
%355 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %355, label %IF82, label %ENDIF81
IF82: ; preds = %ENDIF
%356 = fcmp ogt float %63, 0.000000e+00
br i1 %356, label %IF85, label %ENDIF84
ENDIF81: ; preds = %ENDIF, %ENDIF84
%temp28.0 = phi float [ %584, %ENDIF84 ], [ %352, %ENDIF ]
%temp29.0 = phi float [ %585, %ENDIF84 ], [ %353, %ENDIF ]
%temp30.0 = phi float [ %586, %ENDIF84 ], [ %354, %ENDIF ]
%357 = fmul float %temp28.0, %217
%358 = fmul float %temp29.0, %217
%359 = fmul float %temp30.0, %217
%360 = fsub float 1.000000e+00, %83
%361 = fsub float %24, %195
%362 = fsub float %25, %196
%363 = fsub float %26, %197
%364 = fmul float %361, %361
%365 = fmul float %362, %362
%366 = fadd float %365, %364
%367 = fmul float %363, %363
%368 = fadd float %366, %367
%369 = call float @llvm.AMDGPU.rsq.clamped.f32(float %368)
%370 = fmul float %361, %369
%371 = fmul float %362, %369
%372 = fmul float %363, %369
%373 = fmul float %195, %186
%374 = fsub float -0.000000e+00, %373
%375 = fmul float %196, %187
%376 = fsub float %374, %375
%377 = fmul float %197, %188
%378 = fsub float %376, %377
%379 = call float @llvm.maxnum.f32(float %378, float 0.000000e+00)
%380 = fmul float %24, %370
%381 = fmul float %25, %371
%382 = fadd float %381, %380
%383 = fmul float %26, %372
%384 = fadd float %382, %383
%385 = call float @llvm.maxnum.f32(float %384, float 0.000000e+00)
%386 = fmul float %360, %360
%387 = fmul float %386, %75
%388 = fsub float 1.000000e+00, %360
%389 = fmul float %388, 0x3FEEF9DB20000000
%390 = fadd float %389, 0x3F9EB851E0000000
%391 = call float @llvm.log2.f32(float %390)
%392 = fdiv float 1.000000e+00, %391
%393 = fmul float %392, 1.000000e+01
%394 = fmul float %393, %393
%395 = fsub float 1.000000e+00, %223
%396 = fsub float 1.000000e+00, %379
%397 = fmul float %385, 2.000000e+00
%398 = fmul float %385, %360
%399 = fmul float %397, %398
%400 = fadd float %399, 5.000000e-01
%401 = fsub float 1.000000e+00, %385
%402 = fsub float 1.000000e+00, %379
%403 = fsub float 1.000000e+00, %205
%404 = fadd float %83, %403
%405 = call float @llvm.AMDIL.clamp.(float %404, float 0.000000e+00, float 1.000000e+00)
%406 = fmul float %402, %402
%407 = fmul float %402, %402
%408 = fmul float %407, %402
%409 = fmul float %406, %408
%410 = call float @llvm.AMDGPU.lrp(float %409, float %405, float %201)
%411 = call float @llvm.AMDGPU.lrp(float %409, float %405, float %202)
%412 = call float @llvm.AMDGPU.lrp(float %409, float %405, float %203)
%413 = call float @llvm.AMDGPU.lrp(float %223, float 1.000000e+00, float %387)
%414 = call float @llvm.AMDGPU.lrp(float %379, float 1.000000e+00, float %387)
%415 = fmul float %413, %414
%416 = fadd float %415, 0x3F1A36E2E0000000
%417 = fdiv float 1.000000e+00, %416
%418 = fmul float %186, %370
%419 = fmul float %187, %371
%420 = fadd float %419, %418
%421 = fmul float %188, %372
%422 = fadd float %420, %421
%423 = call float @llvm.maxnum.f32(float %422, float 0.000000e+00)
%424 = call float @llvm.pow.f32(float %423, float %394)
%425 = fadd float %394, 1.000000e+00
%426 = fmul float %425, %74
%427 = fmul float %424, %426
%428 = fmul float %417, %427
%429 = fmul float %428, %223
%430 = fmul float %429, %73
%431 = call float @llvm.maxnum.f32(float %430, float 0.000000e+00)
%432 = fmul float %431, %70
%433 = fmul float %431, %71
%434 = fmul float %431, %72
%435 = fsub float 1.000000e+00, %201
%436 = fsub float 1.000000e+00, %202
%437 = fsub float 1.000000e+00, %203
%438 = fmul float %401, %401
%439 = fmul float %401, %401
%440 = fmul float %439, %401
%441 = fmul float %438, %440
%442 = fmul float %435, %441
%443 = fadd float %442, %201
%444 = fmul float %436, %441
%445 = fadd float %444, %202
%446 = fmul float %437, %441
%447 = fadd float %446, %203
%448 = fadd float %400, -1.000000e+00
%449 = fmul float %395, %395
%450 = fmul float %395, %395
%451 = fmul float %450, %395
%452 = fmul float %449, %451
%453 = fmul float %448, %452
%454 = fadd float %453, 1.000000e+00
%455 = fadd float %400, -1.000000e+00
%456 = fmul float %396, %396
%457 = fmul float %396, %396
%458 = fmul float %457, %396
%459 = fmul float %456, %458
%460 = fmul float %455, %459
%461 = fadd float %460, 1.000000e+00
%462 = fmul float %454, %461
%463 = fmul float %462, %223
%464 = fmul float %70, %463
%465 = fadd float %464, %245
%466 = fmul float %71, %463
%467 = fadd float %466, %246
%468 = fmul float %72, %463
%469 = fadd float %468, %247
%470 = fmul float %206, %465
%471 = fmul float %207, %467
%472 = fmul float %208, %469
%473 = fmul float %432, %443
%474 = fadd float %473, %470
%475 = fmul float %433, %445
%476 = fadd float %475, %471
%477 = fmul float %434, %447
%478 = fadd float %477, %472
%479 = fmul float %357, %410
%480 = fadd float %479, %474
%481 = fmul float %358, %411
%482 = fadd float %481, %476
%483 = fmul float %359, %412
%484 = fadd float %483, %478
%485 = fmul float %127, %42
%486 = fadd float %485, %43
%487 = call float @llvm.AMDIL.clamp.(float %486, float 0.000000e+00, float 1.000000e+00)
%488 = call float @llvm.AMDGPU.lrp(float %487, float %480, float %39)
%489 = call float @llvm.AMDGPU.lrp(float %487, float %482, float %40)
%490 = call float @llvm.AMDGPU.lrp(float %487, float %484, float %41)
%491 = call i32 @llvm.SI.packf16(float %488, float %489)
%492 = bitcast i32 %491 to float
%493 = call i32 @llvm.SI.packf16(float %490, float 1.000000e+00)
%494 = bitcast i32 %493 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %492, float %494, float %492, float %494)
ret void
IF85: ; preds = %IF82
%495 = fmul float %259, %259
%496 = fmul float %260, %260
%497 = fadd float %496, %495
%498 = fmul float %261, %261
%499 = fadd float %497, %498
%500 = call float @llvm.AMDGPU.rsq.clamped.f32(float %499)
%501 = fmul float %259, %500
%502 = fmul float %260, %500
%503 = fmul float %261, %500
%504 = fsub float %54, %131
%505 = fsub float %55, %132
%506 = fsub float %56, %133
%507 = fdiv float 1.000000e+00, %501
%508 = fdiv float 1.000000e+00, %502
%509 = fdiv float 1.000000e+00, %503
%510 = fmul float %504, %507
%511 = fmul float %505, %508
%512 = fmul float %506, %509
%513 = fsub float %57, %131
%514 = fsub float %58, %132
%515 = fsub float %59, %133
%516 = fdiv float 1.000000e+00, %501
%517 = fdiv float 1.000000e+00, %502
%518 = fdiv float 1.000000e+00, %503
%519 = fmul float %513, %516
%520 = fmul float %514, %517
%521 = fmul float %515, %518
%522 = fcmp ogt float %501, 0.000000e+00
%523 = fcmp ogt float %502, 0.000000e+00
%524 = fcmp ogt float %503, 0.000000e+00
%.97 = select i1 %522, float %510, float %519
%temp64.1 = select i1 %523, float %511, float %520
%.98 = select i1 %524, float %512, float %521
%525 = fadd float %54, %57
%526 = fadd float %55, %58
%527 = fadd float %56, %59
%528 = fmul float %525, 5.000000e-01
%529 = fmul float %526, 5.000000e-01
%530 = fmul float %527, 5.000000e-01
%531 = call float @llvm.minnum.f32(float %.97, float %temp64.1)
%532 = call float @llvm.minnum.f32(float %531, float %.98)
%533 = fsub float %528, %60
%534 = fsub float %529, %61
%535 = fsub float %530, %62
%536 = fadd float %533, %131
%537 = fadd float %534, %132
%538 = fadd float %535, %133
%539 = fmul float %501, %532
%540 = fadd float %539, %536
%541 = fmul float %502, %532
%542 = fadd float %541, %537
%543 = fmul float %503, %532
%544 = fadd float %543, %538
%545 = fsub float %540, %528
%546 = fsub float %542, %529
%547 = fsub float %544, %530
br label %ENDIF84
ENDIF84: ; preds = %IF82, %IF85
%temp44.0 = phi float [ %545, %IF85 ], [ %259, %IF82 ]
%temp45.0 = phi float [ %546, %IF85 ], [ %260, %IF82 ]
%temp46.0 = phi float [ %547, %IF85 ], [ %261, %IF82 ]
%548 = fsub float 1.000000e+00, %83
%549 = call float @llvm.pow.f32(float %548, float 7.500000e-01)
%550 = fmul float %549, 7.000000e+00
%551 = insertelement <4 x float> undef, float %temp44.0, i32 0
%552 = insertelement <4 x float> %551, float %temp45.0, i32 1
%553 = insertelement <4 x float> %552, float %temp46.0, i32 2
%554 = insertelement <4 x float> %553, float %550, i32 3
%555 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %554)
%556 = extractelement <4 x float> %555, i32 0
%557 = extractelement <4 x float> %555, i32 1
%558 = extractelement <4 x float> %555, i32 2
%559 = extractelement <4 x float> %555, i32 3
%560 = call float @llvm.fabs.f32(float %558)
%561 = fdiv float 1.000000e+00, %560
%562 = fmul float %556, %561
%563 = fadd float %562, 1.500000e+00
%564 = fmul float %557, %561
%565 = fadd float %564, 1.500000e+00
%566 = bitcast float %565 to i32
%567 = bitcast float %563 to i32
%568 = bitcast float %559 to i32
%569 = bitcast float %550 to i32
%570 = insertelement <4 x i32> undef, i32 %566, i32 0
%571 = insertelement <4 x i32> %570, i32 %567, i32 1
%572 = insertelement <4 x i32> %571, i32 %568, i32 2
%573 = insertelement <4 x i32> %572, i32 %569, i32 3
%574 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %573, <32 x i8> %91, <16 x i8> %94, i32 4)
%575 = extractelement <4 x float> %574, i32 0
%576 = extractelement <4 x float> %574, i32 1
%577 = extractelement <4 x float> %574, i32 2
%578 = extractelement <4 x float> %574, i32 3
%579 = call float @llvm.pow.f32(float %578, float %65)
%580 = fmul float %64, %579
%581 = fmul float %580, %575
%582 = fmul float %580, %576
%583 = fmul float %580, %577
%584 = call float @llvm.AMDGPU.lrp(float %50, float %352, float %581)
%585 = call float @llvm.AMDGPU.lrp(float %50, float %353, float %582)
%586 = call float @llvm.AMDGPU.lrp(float %50, float %354, float %583)
br label %ENDIF81
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000
v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001
v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100
v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101
v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400
v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401
v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500
v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501
v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600
v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601
v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800
v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801
v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900
v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901
v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00
v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01
v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00
v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01
v_interp_p1_f32 v19, v0, 1, 3, [m0] ; C84C0D00
v_interp_p2_f32 v19, [v19], v1, 1, 3, [m0] ; C84D0D01
v_interp_p1_f32 v20, v0, 2, 3, [m0] ; C8500E00
v_interp_p2_f32 v20, [v20], v1, 2, 3, [m0] ; C8510E01
v_interp_p1_f32 v7, v0, 0, 4, [m0] ; C81C1000
v_interp_p2_f32 v7, [v7], v1, 0, 4, [m0] ; C81D1001
v_interp_p1_f32 v8, v0, 1, 4, [m0] ; C8201100
v_interp_p2_f32 v8, [v8], v1, 1, 4, [m0] ; C8211101
v_interp_p1_f32 v9, v0, 2, 4, [m0] ; C8241200
v_interp_p2_f32 v9, [v9], v1, 2, 4, [m0] ; C8251201
v_interp_p1_f32 v3, v0, 0, 5, [m0] ; C80C1400
v_interp_p2_f32 v3, [v3], v1, 0, 5, [m0] ; C80D1401
v_interp_p1_f32 v23, v0, 1, 5, [m0] ; C85C1500
v_interp_p2_f32 v23, [v23], v1, 1, 5, [m0] ; C85D1501
v_interp_p1_f32 v24, v0, 2, 5, [m0] ; C8601600
v_interp_p2_f32 v24, [v24], v1, 2, 5, [m0] ; C8611601
v_interp_p1_f32 v25, v0, 3, 5, [m0] ; C8641700
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p2_f32 v25, [v25], v1, 3, 5, [m0] ; C8651701
v_interp_p1_f32 v21, v0, 0, 6, [m0] ; C8541800
v_interp_p2_f32 v21, [v21], v1, 0, 6, [m0] ; C8551801
v_interp_p1_f32 v16, v0, 1, 6, [m0] ; C8401900
s_load_dwordx4 s[0:3], s[4:5], 0x8 ; C0800508
s_load_dwordx8 s[40:47], s[6:7], 0x10 ; C0D40710
v_interp_p2_f32 v16, [v16], v1, 1, 6, [m0] ; C8411901
v_interp_p1_f32 v22, v0, 2, 6, [m0] ; C8581A00
v_interp_p2_f32 v22, [v22], v1, 2, 6, [m0] ; C8591A01
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[8:11], 0x4f ; C206094F
s_buffer_load_dword s13, s[8:11], 0x50 ; C2068950
s_load_dwordx4 s[28:31], s[4:5], 0xc ; C08E050C
s_load_dwordx8 s[32:39], s[6:7], 0x18 ; C0D00718
s_load_dwordx8 s[16:23], s[6:7], 0x20 ; C0C80720
s_load_dwordx4 s[24:27], s[4:5], 0x10 ; C08C0510
image_sample v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[40:47], s[0:3] ; F0800F00 000A1A11
s_buffer_load_dword s1, s[8:11], 0x4c ; C200894C
s_buffer_load_dword s2, s[8:11], 0x4d ; C201094D
s_buffer_load_dword s3, s[8:11], 0x4e ; C201894E
s_buffer_load_dword s15, s[8:11], 0x5c ; C207895C
s_buffer_load_dword s14, s[8:11], 0x60 ; C2070960
s_buffer_load_dword s0, s[8:11], 0x64 ; C2000964
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s12, v29 ; 10003A0C
v_cmp_gt_f32_e32 vcc, s13, v0 ; 7C08000D
v_cndmask_b32_e64 v0, 0, -1.0, vcc ; D2000000 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v0 ; 7C260080
s_cbranch_execnz 3 ; BF890003
exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000
s_endpgm ; BF810000
v_mul_f32_e32 v13, s1, v26 ; 101A3401
v_mul_f32_e32 v14, s2, v27 ; 101C3602
v_mul_f32_e32 v15, s3, v28 ; 101E3803
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[32:39], s[28:31] ; F0800A00 00E80011
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
v_mul_f32_e32 v1, s15, v1 ; 1002020F
v_mul_f32_e32 v0, s15, v0 ; 1000000F
v_mul_f32_e32 v2, v2, v1 ; 10040302
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v4, v4, v1 ; 10080304
v_mac_f32_e32 v4, v10, v0 ; 3E08010A
v_mul_f32_e32 v10, v5, v1 ; 10140305
v_mac_f32_e32 v10, v11, v0 ; 3E14010B
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v2, v12, v0 ; 3E04010C
v_mac_f32_e32 v4, v19, v0 ; 3E080113
v_mac_f32_e32 v10, v20, v0 ; 3E140114
v_mul_f32_e32 v0, v2, v2 ; 10000502
v_mac_f32_e32 v0, v4, v4 ; 3E000904
v_mac_f32_e32 v0, v10, v10 ; 3E00150A
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v23, v23 ; 10022F17
v_mac_f32_e32 v1, v24, v24 ; 3E023118
v_mac_f32_e32 v1, v25, v25 ; 3E023319
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v6, v0, v2 ; 100C0500
v_mul_f32_e32 v5, v0, v4 ; 100A0900
v_mul_f32_e32 v4, v0, v10 ; 10081500
v_mul_f32_e32 v12, v1, v23 ; 10182F01
v_mul_f32_e32 v11, v1, v24 ; 10163101
v_mul_f32_e32 v0, v12, v6 ; 10000D0C
v_mac_f32_e32 v0, v11, v5 ; 3E000B0B
v_mul_f32_e32 v10, v1, v25 ; 10143301
v_mac_f32_e32 v0, v10, v4 ; 3E00090A
v_mul_f32_e32 v2, v6, v0 ; 10040106
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v19, v5, v0 ; 10260105
v_mac_f32_e32 v19, v5, v0 ; 3E260105
v_mad_f32 v23, v23, v1, -v2 ; D2820017 840A0317
v_mad_f32 v24, v24, v1, -v19 ; D2820018 844E0318
s_buffer_load_dword s1, s[8:11], 0x40 ; C2008940
s_buffer_load_dword s2, s[8:11], 0x41 ; C2010941
s_buffer_load_dword s3, s[8:11], 0x42 ; C2018942
v_mul_f32_e32 v2, v4, v0 ; 10040104
v_mac_f32_e32 v2, v4, v0 ; 3E040104
v_mad_f32 v25, v25, v1, -v2 ; D2820019 840A0319
s_buffer_load_dword s12, s[8:11], 0x27 ; C2060927
s_buffer_load_dword s13, s[8:11], 0x2b ; C206892B
s_buffer_load_dword s29, s[8:11], 0x2c ; C20E892C
s_buffer_load_dword s30, s[8:11], 0x2d ; C20F092D
v_sub_f32_e64 v0, 1.0, s14 ; D2080000 00001CF2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v2, s1, v0 ; 10040001
v_mul_f32_e32 v1, s2, v0 ; 10020002
v_mul_f32_e32 v0, s3, v0 ; 10000003
v_mac_f32_e32 v2, s14, v13 ; 3E041A0E
v_mov_b32_e32 v26, v23 ; 7E340317
v_mac_f32_e32 v1, s14, v14 ; 3E021C0E
v_mov_b32_e32 v27, v24 ; 7E360318
v_mac_f32_e32 v0, s14, v15 ; 3E001E0E
v_mov_b32_e32 v28, v25 ; 7E380319
v_cmp_lt_f32_e64 s[2:3], 0, s13 ; D0020002 00001A80
image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[16:23], s[24:27] ; F0800F00 00C41111
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[20:21], s[2:3] ; BE942402
s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s1, s[8:11], 0x20 ; C2008920
s_buffer_load_dword s2, s[8:11], 0x21 ; C2010921
s_buffer_load_dword s3, s[8:11], 0x22 ; C2018922
s_buffer_load_dword s13, s[8:11], 0x24 ; C2068924
s_buffer_load_dword s15, s[8:11], 0x25 ; C2078925
v_mul_f32_e32 v17, v23, v23 ; 10222F17
v_mac_f32_e32 v17, v24, v24 ; 3E223118
v_mac_f32_e32 v17, v25, v25 ; 3E223319
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
s_buffer_load_dword s16, s[8:11], 0x26 ; C2080926
s_buffer_load_dword s17, s[8:11], 0x28 ; C2088928
s_buffer_load_dword s18, s[8:11], 0x29 ; C2090929
s_buffer_load_dword s19, s[8:11], 0x2a ; C209892A
v_mul_f32_e32 v19, v17, v23 ; 10262F11
v_mul_f32_e32 v20, v17, v24 ; 10283111
v_mul_f32_e32 v17, v17, v25 ; 10223311
v_rcp_f32_e32 v26, v19 ; 7E345513
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v27, s1, v21 ; 08362A01
v_sub_f32_e32 v28, s2, v16 ; 08382002
v_rcp_f32_e32 v29, v20 ; 7E3A5514
v_mul_f32_e32 v27, v26, v27 ; 1036371A
v_sub_f32_e32 v30, s13, v21 ; 083C2A0D
v_mul_f32_e32 v26, v26, v30 ; 10343D1A
v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680
v_cndmask_b32_e32 v26, v26, v27 ; 0034371A
v_rcp_f32_e32 v27, v17 ; 7E365511
v_mul_f32_e32 v28, v29, v28 ; 1038391D
v_sub_f32_e32 v30, s15, v16 ; 083C200F
v_mul_f32_e32 v29, v29, v30 ; 103A3D1D
v_cmp_lt_f32_e32 vcc, 0, v20 ; 7C022880
v_cndmask_b32_e32 v28, v29, v28 ; 0038391D
v_sub_f32_e32 v29, s3, v22 ; 083A2C03
v_mul_f32_e32 v29, v27, v29 ; 103A3B1B
v_sub_f32_e32 v30, s16, v22 ; 083C2C10
v_mul_f32_e32 v27, v27, v30 ; 10363D1B
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v27, v27, v29 ; 00363B1B
v_min3_f32 v26, v26, v28, v27 ; D2A2001A 046E391A
v_mov_b32_e32 v27, s13 ; 7E36020D
v_add_f32_e32 v27, s1, v27 ; 06363601
v_mov_b32_e32 v28, s15 ; 7E38020F
v_add_f32_e32 v28, s2, v28 ; 06383802
v_mov_b32_e32 v29, s16 ; 7E3A0210
v_add_f32_e32 v29, s3, v29 ; 063A3A03
v_mad_f32 v30, 0.5, v27, -s17 ; D282001E 804636F0
v_add_f32_e32 v30, v21, v30 ; 063C3D15
v_mac_f32_e32 v30, v26, v19 ; 3E3C271A
v_mad_f32 v19, 0.5, v28, -s18 ; D2820013 804A38F0
v_add_f32_e32 v19, v16, v19 ; 06262710
v_mac_f32_e32 v19, v26, v20 ; 3E26291A
v_mad_f32 v20, 0.5, v29, -s19 ; D2820014 804E3AF0
v_add_f32_e32 v20, v22, v20 ; 06282916
v_mac_f32_e32 v20, v26, v17 ; 3E28231A
v_mad_f32 v26, 0.5, -v27, v30 ; D282001A 447A36F0
v_mad_f32 v27, 0.5, -v28, v19 ; D282001B 444E38F0
v_mad_f32 v28, 0.5, -v29, v20 ; D282001C 44523AF0
s_or_b64 exec, exec, s[20:21] ; 88FE147E
s_buffer_load_dword s15, s[8:11], 0x17 ; C2078917
s_buffer_load_dword s16, s[8:11], 0x43 ; C2080943
s_buffer_load_dword s13, s[8:11], 0x6c ; C206896C
s_buffer_load_dword s1, s[8:11], 0x0 ; C2008900
s_buffer_load_dword s2, s[8:11], 0x1 ; C2010901
s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902
s_buffer_load_dword s17, s[8:11], 0x4 ; C2088904
s_buffer_load_dword s18, s[8:11], 0x5 ; C2090905
s_buffer_load_dword s19, s[8:11], 0x6 ; C2098906
s_buffer_load_dword s21, s[8:11], 0x7 ; C20A8907
s_buffer_load_dword s20, s[8:11], 0x8 ; C20A0908
s_buffer_load_dword s22, s[8:11], 0x9 ; C20B0909
s_buffer_load_dword s23, s[8:11], 0xa ; C20B890A
s_buffer_load_dword s24, s[8:11], 0xb ; C20C090B
s_buffer_load_dword s25, s[8:11], 0xc ; C20C890C
s_buffer_load_dword s26, s[8:11], 0xd ; C20D090D
s_buffer_load_dword s27, s[8:11], 0xe ; C20D890E
s_buffer_load_dword s28, s[8:11], 0xf ; C20E090F
v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2
v_log_f32_e32 v17, v17 ; 7E224F11
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v29, 0x40e00000, v17 ; 103A22FF 40E00000
v_cubeid_f32 v33, v26, v27, v28 ; D2880021 0472371A
v_cubema_f32 v32, v26, v27, v28 ; D28E0020 0472371A
v_cubesc_f32 v31, v26, v27, v28 ; D28A001F 0472371A
v_cubetc_f32 v30, v26, v27, v28 ; D28C001E 0472371A
v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000
v_rcp_f32_e64 v17, |v32| ; D3540111 00000120
v_mad_f32 v27, v17, v30, v26 ; D282001B 046A3D11
v_mac_f32_e32 v26, v17, v31 ; 3E343F11
v_mov_b32_e32 v28, v33 ; 7E380321
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[26:29], 15, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[36:43], s[32:35] ; F0900F00 01091A1A
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v17, v29 ; 7E224F1D
v_mul_legacy_f32_e32 v17, s30, v17 ; 0E22221E
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v17, s29, v17 ; 1022221D
v_mul_f32_e32 v20, v26, v17 ; 1028231A
v_mul_f32_e32 v19, v27, v17 ; 1026231B
v_mul_f32_e32 v17, v28, v17 ; 1022231C
v_mov_b32_e32 v27, s14 ; 7E36020E
v_mov_b32_e32 v26, 0x3f7fff58 ; 7E3402FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s12, v26 ; 7C02340C
s_and_saveexec_b64 s[30:31], vcc ; BE9E246A
s_xor_b64 s[30:31], exec, s[30:31] ; 899E1E7E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s32, s[8:11], 0x3b ; C210093B
s_buffer_load_dword s14, s[8:11], 0x3c ; C207093C
s_buffer_load_dword s29, s[8:11], 0x3d ; C20E893D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[32:33], 0, s32 ; D0020020 00004080
s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s34, s[8:11], 0x36 ; C2110936
s_buffer_load_dword s35, s[8:11], 0x38 ; C2118938
s_buffer_load_dword s36, s[8:11], 0x39 ; C2120939
s_buffer_load_dword s37, s[8:11], 0x3a ; C212893A
s_buffer_load_dword s38, s[8:11], 0x30 ; C2130930
s_buffer_load_dword s39, s[8:11], 0x31 ; C2138931
s_buffer_load_dword s40, s[8:11], 0x32 ; C2140932
s_buffer_load_dword s41, s[8:11], 0x34 ; C2148934
s_buffer_load_dword s42, s[8:11], 0x35 ; C2150935
v_mul_f32_e32 v26, v23, v23 ; 10342F17
v_mac_f32_e32 v26, v24, v24 ; 3E343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v28, s34, v22 ; 08382C22
v_mov_b32_e32 v29, s34 ; 7E3A0222
v_sub_f32_e32 v30, s38, v21 ; 083C2A26
v_sub_f32_e32 v31, s39, v16 ; 083E2027
v_add_f32_e32 v29, s40, v29 ; 063A3A28
v_sub_f32_e32 v32, s40, v22 ; 08402C28
v_mad_f32 v33, 0.5, v29, -s37 ; D2820021 80963AF0
v_add_f32_e32 v22, v22, v33 ; 062C4316
v_mul_f32_e32 v23, v26, v23 ; 102E2F1A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_rcp_f32_e32 v26, v23 ; 7E345517
v_rcp_f32_e32 v33, v24 ; 7E425518
v_rcp_f32_e32 v34, v25 ; 7E445519
v_sub_f32_e32 v35, s41, v21 ; 08462A29
v_mov_b32_e32 v36, s41 ; 7E480229
v_add_f32_e32 v36, s38, v36 ; 06484826
v_mul_f32_e32 v30, v26, v30 ; 103C3D1A
v_mul_f32_e32 v26, v26, v35 ; 1034471A
v_mul_f32_e32 v31, v33, v31 ; 103E3F21
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v28, v34, v28 ; 10383922
v_mad_f32 v34, 0.5, v36, -s35 ; D2820022 808E48F0
v_add_f32_e32 v21, v21, v34 ; 062A4515
v_sub_f32_e32 v34, s42, v16 ; 0844202A
v_mov_b32_e32 v35, s42 ; 7E46022A
v_mul_f32_e32 v33, v33, v34 ; 10424521
v_add_f32_e32 v34, s39, v35 ; 06444627
v_cmp_lt_f32_e32 vcc, 0, v23 ; 7C022E80
v_cndmask_b32_e32 v26, v26, v30 ; 00343D1A
v_cmp_lt_f32_e32 vcc, 0, v24 ; 7C023080
v_cndmask_b32_e32 v30, v33, v31 ; 003C3F21
v_cmp_lt_f32_e32 vcc, 0, v25 ; 7C023280
v_cndmask_b32_e32 v28, v28, v32 ; 0038411C
v_min3_f32 v26, v26, v30, v28 ; D2A2001A 04723D1A
v_mad_f32 v28, 0.5, v34, -s36 ; D282001C 809244F0
v_add_f32_e32 v16, v16, v28 ; 06203910
v_mac_f32_e32 v21, v26, v23 ; 3E2A2F1A
v_mac_f32_e32 v16, v26, v24 ; 3E20311A
v_mac_f32_e32 v22, v26, v25 ; 3E2C331A
v_mad_f32 v23, 0.5, -v36, v21 ; D2820017 445648F0
v_mad_f32 v24, 0.5, -v34, v16 ; D2820018 444244F0
v_mad_f32 v25, 0.5, -v29, v22 ; D2820019 445A3AF0
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_sub_f32_e64 v16, 1.0, s0 ; D2080010 000000F2
v_log_f32_e32 v16, v16 ; 7E204F10
s_load_dwordx4 s[32:35], s[4:5], 0x4 ; C0900504
v_mul_legacy_f32_e32 v16, 0x3f400000, v16 ; 0E2020FF 3F400000
v_exp_f32_e32 v16, v16 ; 7E204B10
v_mul_f32_e32 v26, 0x40e00000, v16 ; 103420FF 40E00000
v_cubeid_f32 v31, v23, v24, v25 ; D288001F 04663117
v_cubema_f32 v30, v23, v24, v25 ; D28E001E 04663117
s_load_dwordx8 s[36:43], s[6:7], 0x8 ; C0D20708
v_cubesc_f32 v29, v23, v24, v25 ; D28A001D 04663117
v_cubetc_f32 v28, v23, v24, v25 ; D28C001C 04663117
v_rcp_f32_e64 v16, |v30| ; D3540110 0000011E
v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000
v_mad_f32 v24, v16, v28, v23 ; D2820018 045E3910
v_mac_f32_e32 v23, v16, v29 ; 3E2E3B10
v_mov_b32_e32 v25, v31 ; 7E32031F
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[36:43], s[32:35] ; F0900F00 01091517
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v16, v24 ; 7E204F18
v_sub_f32_e64 v24, 1.0, s12 ; D2080018 000018F2
v_mul_legacy_f32_e32 v16, s29, v16 ; 0E20201D
v_exp_f32_e32 v16, v16 ; 7E204B10
v_mul_f32_e32 v16, s14, v16 ; 1020200E
v_mul_f32_e32 v21, v21, v16 ; 102A2115
v_mul_f32_e32 v22, v22, v16 ; 102C2116
v_mul_f32_e32 v16, v23, v16 ; 10202117
v_mul_f32_e32 v21, v21, v24 ; 102A3115
v_mul_f32_e32 v22, v22, v24 ; 102C3116
v_mul_f32_e32 v16, v16, v24 ; 10203110
v_mac_f32_e32 v21, s12, v20 ; 3E2A280C
v_mac_f32_e32 v22, s12, v19 ; 3E2C260C
v_mac_f32_e32 v16, s12, v17 ; 3E20220C
v_mov_b32_e32 v17, v16 ; 7E220310
v_mov_b32_e32 v19, v22 ; 7E260316
v_mov_b32_e32 v20, v21 ; 7E280315
s_or_b64 exec, exec, s[30:31] ; 88FE1E7E
v_mad_f32 v22, -v27, s16, s16 ; D2820016 2040211B
v_mov_b32_e32 v16, s15 ; 7E20020F
v_mul_f32_e32 v21, v22, v13 ; 102A1B16
v_mul_f32_e32 v14, v22, v14 ; 101C1D16
v_mul_f32_e32 v13, v22, v15 ; 101A1F16
v_mul_f32_e32 v15, s18, v5 ; 101E0A12
v_mac_f32_e32 v15, s17, v6 ; 3E1E0C11
v_mac_f32_e32 v15, s19, v4 ; 3E1E0813
v_add_f32_e32 v15, s21, v15 ; 061E1E15
v_add_f32_e32 v15, v15, v7 ; 061E0F0F
v_mul_f32_e32 v7, s22, v5 ; 100E0A16
v_mac_f32_e32 v7, s20, v6 ; 3E0E0C14
v_mac_f32_e32 v7, s23, v4 ; 3E0E0817
v_add_f32_e32 v7, s24, v7 ; 060E0E18
v_add_f32_e32 v8, v7, v8 ; 06101107
v_mul_f32_e32 v7, s26, v5 ; 100E0A1A
v_mac_f32_e32 v7, s25, v6 ; 3E0E0C19
v_mac_f32_e32 v7, s27, v4 ; 3E0E081B
v_add_f32_e32 v7, s28, v7 ; 060E0E1C
v_add_f32_e32 v9, v7, v9 ; 06121307
s_buffer_load_dword s6, s[8:11], 0x10 ; C2030910
s_buffer_load_dword s5, s[8:11], 0x11 ; C2028911
s_buffer_load_dword s4, s[8:11], 0x12 ; C2020912
s_buffer_load_dword s17, s[8:11], 0x16 ; C2088916
s_buffer_load_dword s14, s[8:11], 0x44 ; C2070944
s_buffer_load_dword s7, s[8:11], 0x45 ; C2038945
s_buffer_load_dword s12, s[8:11], 0x46 ; C2060946
s_buffer_load_dword s15, s[8:11], 0x48 ; C2078948
s_buffer_load_dword s16, s[8:11], 0x49 ; C2080949
s_buffer_load_dword s8, s[8:11], 0x4b ; C204094B
v_sub_f32_e64 v23, 1.0, s13 ; D2080017 00001AF2
v_mac_f32_e32 v23, s13, v18 ; 3E2E240D
v_mul_f32_e32 v7, s1, v6 ; 100E0C01
v_mac_f32_e32 v7, s2, v5 ; 3E0E0A02
v_mac_f32_e32 v7, s3, v4 ; 3E0E0803
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_mul_f32_e32 v15, v23, v15 ; 101E1F17
v_mul_f32_e32 v8, v23, v8 ; 10101117
v_mul_f32_e32 v9, v23, v9 ; 10121317
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v16, s17, v3 ; 3E200611
v_mul_f32_e32 v3, v23, v20 ; 10062917
v_mul_f32_e32 v18, v23, v19 ; 10242717
v_mul_f32_e32 v17, v23, v17 ; 10222317
v_sub_f32_e32 v19, 1.0, v22 ; 08262CF2
v_add_f32_e32 v19, s0, v19 ; 06262600
v_sub_f32_e64 v20, 1.0, s0 ; D2080014 000000F2
v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680
v_sub_f32_e32 v22, s1, v12 ; 082C1801
v_sub_f32_e32 v23, s2, v11 ; 082E1602
v_mul_f32_e32 v24, v22, v22 ; 10302D16
v_mac_f32_e32 v24, v23, v23 ; 3E302F17
v_sub_f32_e32 v25, s3, v10 ; 08321403
v_mac_f32_e32 v24, v25, v25 ; 3E303319
v_rsq_clamp_f32_e32 v24, v24 ; 7E305918
v_mul_f32_e32 v22, v24, v22 ; 102C2D18
v_mul_f32_e32 v23, v24, v23 ; 102E2F18
v_mul_f32_e32 v24, v24, v25 ; 10303318
v_mul_f32_e32 v12, v12, v6 ; 10180D0C
v_mad_f32 v11, -v11, v5, -v12 ; D282000B A4320B0B
v_mul_f32_e32 v6, v22, v6 ; 100C0D16
v_mac_f32_e32 v6, v23, v5 ; 3E0C0B17
v_mul_f32_e32 v5, s1, v22 ; 100A2C01
v_mac_f32_e32 v5, s2, v23 ; 3E0A2E02
v_mad_f32 v10, -v10, v4, v11 ; D282000A 242E090A
v_mac_f32_e32 v5, s3, v24 ; 3E0A3003
v_mac_f32_e32 v6, v24, v4 ; 3E0C0918
v_max_f32_e32 v4, 0, v5 ; 20080A80
v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2
v_mul_f32_e32 v11, v5, v5 ; 10160B05
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_max_f32_e32 v10, 0, v10 ; 20141480
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v12, v11, v11 ; 1018170B
v_mul_f32_e32 v22, v11, v12 ; 102C190B
v_mad_f32 v23, -v12, v22, 1.0 ; D2820017 23CA2D0C
v_mul_f32_e32 v24, v2, v23 ; 10302F02
v_sub_f32_e32 v25, 1.0, v2 ; 083204F2
v_mac_f32_e32 v2, v5, v25 ; 3E043305
v_mul_f32_e32 v25, v1, v23 ; 10322F01
v_sub_f32_e32 v26, 1.0, v1 ; 083402F2
v_mac_f32_e32 v1, v5, v26 ; 3E023505
v_mul_f32_e32 v23, v0, v23 ; 102E2F00
v_sub_f32_e32 v26, 1.0, v0 ; 083400F2
v_mac_f32_e32 v0, v5, v26 ; 3E003505
v_sub_f32_e32 v5, 1.0, v20 ; 080A28F2
v_mov_b32_e32 v26, 0x3cf5c28f ; 7E3402FF 3CF5C28F
v_madmk_f32_e32 v5, v5, v26, 0x3f77ced9 ; 400A3505 3F77CED9
v_add_f32_e32 v26, v4, v4 ; 06340904
v_mul_f32_e32 v4, v20, v4 ; 10080914
v_mad_f32 v4, v26, v4, 0.5 ; D2820004 03C2091A
v_mul_f32_e32 v12, v22, v12 ; 10181916
v_mac_f32_e32 v24, v19, v12 ; 3E301913
v_mac_f32_e32 v25, v19, v12 ; 3E321913
v_mac_f32_e32 v23, v19, v12 ; 3E2E1913
v_mul_f32_e32 v19, v20, v20 ; 10262914
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_mul_f32_e32 v19, s8, v19 ; 10262608
v_mul_f32_e32 v11, v19, v11 ; 10161713
v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2
v_rcp_f32_e32 v5, v5 ; 7E0A5505
v_sub_f32_e32 v10, 1.0, v7 ; 08140EF2
v_mul_f32_e32 v19, v19, v10 ; 10261513
v_mac_f32_e32 v19, 1.0, v7 ; 3E260EF2
v_max_f32_e32 v6, 0, v6 ; 200C0C80
v_log_f32_e32 v6, v6 ; 7E0C4F06
v_madak_f32_e32 v11, v19, v11, 0x38d1b717 ; 42161713 38D1B717
v_mul_f32_e32 v5, 0x41200000, v5 ; 100A0AFF 41200000
v_mul_f32_e32 v19, v5, v5 ; 10260B05
v_mul_legacy_f32_e32 v6, v19, v6 ; 0E0C0D13
v_rcp_f32_e32 v11, v11 ; 7E16550B
v_mad_f32 v5, v5, v5, 1.0 ; D2820005 03CA0B05
v_mul_f32_e32 v5, s16, v5 ; 100A0A10
v_exp_f32_e32 v6, v6 ; 7E0C4B06
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v5, v5, v11 ; 100A1705
v_mul_f32_e32 v5, v7, v5 ; 100A0B07
v_mul_f32_e32 v5, s15, v5 ; 100A0A0F
v_mul_f32_e32 v6, v10, v10 ; 100C150A
v_mul_f32_e32 v10, v10, v6 ; 10140D0A
v_mul_f32_e32 v6, v10, v6 ; 100C0D0A
v_add_f32_e32 v4, -1.0, v4 ; 060808F3
v_mad_f32 v6, v4, v6, 1.0 ; D2820006 03CA0D04
v_mad_f32 v4, v4, v12, 1.0 ; D2820004 03CA1904
v_mul_f32_e32 v4, v4, v6 ; 10080D04
v_mul_f32_e32 v4, v7, v4 ; 10080907
v_mac_f32_e32 v15, s14, v4 ; 3E1E080E
v_mul_f32_e32 v6, v15, v21 ; 100C2B0F
v_max_f32_e32 v5, 0, v5 ; 200A0A80
v_mul_f32_e32 v7, s14, v5 ; 100E0A0E
v_mac_f32_e32 v6, v2, v7 ; 3E0C0F02
v_mac_f32_e32 v8, s7, v4 ; 3E100807
v_mac_f32_e32 v9, s12, v4 ; 3E12080C
v_mul_f32_e32 v2, s7, v5 ; 10040A07
v_mul_f32_e32 v4, s12, v5 ; 10080A0C
v_mul_f32_e32 v5, v8, v14 ; 100A1D08
v_mul_f32_e32 v7, v9, v13 ; 100E1B09
v_mac_f32_e32 v5, v1, v2 ; 3E0A0501
v_mac_f32_e32 v7, v0, v4 ; 3E0E0900
v_mac_f32_e32 v6, v24, v3 ; 3E0C0718
v_mac_f32_e32 v5, v25, v18 ; 3E0A2519
v_mac_f32_e32 v7, v23, v17 ; 3E0E2317
v_add_f32_e64 v0, 0, v16 clamp ; D2060800 00022080
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v2, s6, v1 ; 10040206
v_mac_f32_e32 v2, v6, v0 ; 3E040106
v_mul_f32_e32 v3, s5, v1 ; 10060205
v_mac_f32_e32 v3, v5, v0 ; 3E060105
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mac_f32_e32 v1, v7, v0 ; 3E020107
v_cvt_pkrtz_f16_f32_e32 v0, v2, v3 ; 5E000702
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 56
VGPRS: 40
Code Size: 2312 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL CONST[0..20]
DCL TEMP[0..7], LOCAL
IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[2], IN[0].xxxx
1: MAD TEMP[0], CONST[3], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[4], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[5], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[10].xyyy, CONST[10].zwww
9: FSEQ TEMP[3].x, CONST[12].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[11].xyyy, CONST[11].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[6].xxxx
18: MOV TEMP[3].y, CONST[7].xxxx
19: MOV TEMP[3].z, CONST[8].xxxx
20: MOV TEMP[4].x, CONST[6].yyyy
21: MOV TEMP[4].y, CONST[7].yyyy
22: MOV TEMP[4].z, CONST[8].yyyy
23: MOV TEMP[5].x, CONST[6].zzzz
24: MOV TEMP[5].y, CONST[7].zzzz
25: MOV TEMP[5].z, CONST[8].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[2].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[3].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[4].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[3].xyz, TEMP[3].xyzx
44: MUL TEMP[6].xyz, TEMP[0].xyzz, CONST[1].wwww
45: ADD TEMP[6].xyz, CONST[1].xyzz, -TEMP[6].xyzz
46: MOV TEMP[4].w, TEMP[6].xxxx
47: MOV TEMP[5].w, TEMP[6].yyyy
48: MOV TEMP[3].w, TEMP[6].zzzz
49: MUL TEMP[6], CONST[2], IN[0].xxxx
50: MAD TEMP[6], CONST[3], IN[0].yyyy, TEMP[6]
51: MAD TEMP[6], CONST[4], IN[0].zzzz, TEMP[6]
52: MAD TEMP[6], CONST[5], IN[0].wwww, TEMP[6]
53: ADD TEMP[0].xyz, TEMP[0].xyzz, -CONST[0].xyzz
54: MOV TEMP[0].yzw, TEMP[0].yxyz
55: MUL TEMP[7], CONST[13], TEMP[6].xxxx
56: MAD TEMP[7], CONST[14], TEMP[6].yyyy, TEMP[7]
57: MAD TEMP[7], CONST[15], TEMP[6].zzzz, TEMP[7]
58: MAD TEMP[6].xyz, CONST[16], TEMP[6].wwww, TEMP[7]
59: MOV TEMP[6].xyz, TEMP[6].xyzx
60: MOV TEMP[0].x, TEMP[1].zzzz
61: MOV OUT[1], TEMP[2]
62: MOV OUT[3], TEMP[5]
63: MOV OUT[2], TEMP[4]
64: MOV OUT[4], TEMP[3]
65: MOV OUT[0], TEMP[1]
66: MOV OUT[6], TEMP[6]
67: MOV OUT[5], TEMP[0]
68: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0
%84 = add i32 %5, %7
%85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84)
%86 = extractelement <4 x float> %85, i32 0
%87 = extractelement <4 x float> %85, i32 1
%88 = extractelement <4 x float> %85, i32 2
%89 = extractelement <4 x float> %85, i32 3
%90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = add i32 %5, %7
%93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92)
%94 = extractelement <4 x float> %93, i32 0
%95 = extractelement <4 x float> %93, i32 1
%96 = extractelement <4 x float> %93, i32 2
%97 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = add i32 %5, %7
%100 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %98, i32 0, i32 %99)
%101 = extractelement <4 x float> %100, i32 0
%102 = extractelement <4 x float> %100, i32 1
%103 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = add i32 %5, %7
%106 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %104, i32 0, i32 %105)
%107 = extractelement <4 x float> %106, i32 0
%108 = extractelement <4 x float> %106, i32 1
%109 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = add i32 %5, %7
%112 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %110, i32 0, i32 %111)
%113 = extractelement <4 x float> %112, i32 0
%114 = extractelement <4 x float> %112, i32 1
%115 = extractelement <4 x float> %112, i32 2
%116 = extractelement <4 x float> %112, i32 3
%117 = fmul float %20, %86
%118 = fmul float %21, %86
%119 = fmul float %22, %86
%120 = fmul float %24, %87
%121 = fadd float %120, %117
%122 = fmul float %25, %87
%123 = fadd float %122, %118
%124 = fmul float %26, %87
%125 = fadd float %124, %119
%126 = fmul float %28, %88
%127 = fadd float %126, %121
%128 = fmul float %29, %88
%129 = fadd float %128, %123
%130 = fmul float %30, %88
%131 = fadd float %130, %125
%132 = fmul float %32, %89
%133 = fadd float %132, %127
%134 = fmul float %33, %89
%135 = fadd float %134, %129
%136 = fmul float %34, %89
%137 = fadd float %136, %131
%138 = fmul float %66, %86
%139 = fmul float %67, %86
%140 = fmul float %68, %86
%141 = fmul float %69, %86
%142 = fmul float %70, %87
%143 = fadd float %142, %138
%144 = fmul float %71, %87
%145 = fadd float %144, %139
%146 = fmul float %72, %87
%147 = fadd float %146, %140
%148 = fmul float %73, %87
%149 = fadd float %148, %141
%150 = fmul float %74, %88
%151 = fadd float %150, %143
%152 = fmul float %75, %88
%153 = fadd float %152, %145
%154 = fmul float %76, %88
%155 = fadd float %154, %147
%156 = fmul float %77, %88
%157 = fadd float %156, %149
%158 = fmul float %78, %89
%159 = fadd float %158, %151
%160 = fmul float %79, %89
%161 = fadd float %160, %153
%162 = fmul float %80, %89
%163 = fadd float %162, %155
%164 = fmul float %81, %89
%165 = fadd float %164, %157
%166 = fmul float %101, %45
%167 = fadd float %166, %47
%168 = fmul float %102, %46
%169 = fadd float %168, %48
%170 = fcmp oeq float %53, 0.000000e+00
%. = select i1 %170, float %101, float %107
%.32 = select i1 %170, float %102, float %108
%171 = fmul float %., %49
%172 = fadd float %171, %51
%173 = fmul float %.32, %50
%174 = fadd float %173, %52
%175 = fmul float %36, %94
%176 = fmul float %39, %94
%177 = fmul float %42, %94
%178 = fmul float %37, %95
%179 = fadd float %178, %175
%180 = fmul float %40, %95
%181 = fadd float %180, %176
%182 = fmul float %43, %95
%183 = fadd float %182, %177
%184 = fmul float %38, %96
%185 = fadd float %184, %179
%186 = fmul float %41, %96
%187 = fadd float %186, %181
%188 = fmul float %44, %96
%189 = fadd float %188, %183
%190 = fmul float %185, %185
%191 = fmul float %187, %187
%192 = fadd float %191, %190
%193 = fmul float %189, %189
%194 = fadd float %192, %193
%195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194)
%196 = fmul float %185, %195
%197 = fmul float %187, %195
%198 = fmul float %189, %195
%199 = fmul float %20, %113
%200 = fmul float %21, %113
%201 = fmul float %22, %113
%202 = fmul float %24, %114
%203 = fadd float %202, %199
%204 = fmul float %25, %114
%205 = fadd float %204, %200
%206 = fmul float %26, %114
%207 = fadd float %206, %201
%208 = fmul float %28, %115
%209 = fadd float %208, %203
%210 = fmul float %29, %115
%211 = fadd float %210, %205
%212 = fmul float %30, %115
%213 = fadd float %212, %207
%214 = fmul float %209, %209
%215 = fmul float %211, %211
%216 = fadd float %215, %214
%217 = fmul float %213, %213
%218 = fadd float %216, %217
%219 = call float @llvm.AMDGPU.rsq.clamped.f32(float %218)
%220 = fmul float %209, %219
%221 = fmul float %211, %219
%222 = fmul float %213, %219
%223 = fmul float %198, %221
%224 = fmul float %196, %222
%225 = fmul float %197, %220
%226 = fmul float %197, %222
%227 = fsub float %226, %223
%228 = fmul float %198, %220
%229 = fsub float %228, %224
%230 = fmul float %196, %221
%231 = fsub float %230, %225
%232 = fmul float %227, %116
%233 = fmul float %229, %116
%234 = fmul float %231, %116
%235 = fmul float %133, %19
%236 = fmul float %135, %19
%237 = fmul float %137, %19
%238 = fsub float %16, %235
%239 = fsub float %17, %236
%240 = fsub float %18, %237
%241 = fmul float %20, %86
%242 = fmul float %21, %86
%243 = fmul float %22, %86
%244 = fmul float %23, %86
%245 = fmul float %24, %87
%246 = fadd float %245, %241
%247 = fmul float %25, %87
%248 = fadd float %247, %242
%249 = fmul float %26, %87
%250 = fadd float %249, %243
%251 = fmul float %27, %87
%252 = fadd float %251, %244
%253 = fmul float %28, %88
%254 = fadd float %253, %246
%255 = fmul float %29, %88
%256 = fadd float %255, %248
%257 = fmul float %30, %88
%258 = fadd float %257, %250
%259 = fmul float %31, %88
%260 = fadd float %259, %252
%261 = fmul float %32, %89
%262 = fadd float %261, %254
%263 = fmul float %33, %89
%264 = fadd float %263, %256
%265 = fmul float %34, %89
%266 = fadd float %265, %258
%267 = fmul float %35, %89
%268 = fadd float %267, %260
%269 = fsub float %133, %13
%270 = fsub float %135, %14
%271 = fsub float %137, %15
%272 = fmul float %54, %262
%273 = fmul float %55, %262
%274 = fmul float %56, %262
%275 = fmul float %57, %264
%276 = fadd float %275, %272
%277 = fmul float %58, %264
%278 = fadd float %277, %273
%279 = fmul float %59, %264
%280 = fadd float %279, %274
%281 = fmul float %60, %266
%282 = fadd float %281, %276
%283 = fmul float %61, %266
%284 = fadd float %283, %278
%285 = fmul float %62, %266
%286 = fadd float %285, %280
%287 = fmul float %63, %268
%288 = fadd float %287, %282
%289 = fmul float %64, %268
%290 = fadd float %289, %284
%291 = fmul float %65, %268
%292 = fadd float %291, %286
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %167, float %169, float %172, float %174)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %220, float %221, float %222, float %238)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %232, float %233, float %234, float %239)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %196, float %197, float %198, float %240)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %163, float %269, float %270, float %271)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %288, float %290, float %292, float %268)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %159, float %161, float %163, float %165)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130
s_buffer_load_dword s5, s[0:3], 0x2a ; C202812A
s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128
s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134
s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135
s_buffer_load_dword s9, s[0:3], 0x36 ; C2048136
s_buffer_load_dword s10, s[0:3], 0x38 ; C2050138
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_eq_f32_e64 vcc, 0, s4 ; D004006A 00000880
v_cndmask_b32_e32 v0, v10, v8 ; 0000110A
v_cndmask_b32_e32 v10, v11, v9 ; 0014130B
v_mov_b32_e32 v11, s5 ; 7E160205
s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129
s_buffer_load_dword s5, s[0:3], 0x2b ; C202812B
v_mac_f32_e32 v11, s6, v8 ; 3E161006
s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118
s_buffer_load_dword s11, s[0:3], 0x19 ; C2058119
s_buffer_load_dword s12, s[0:3], 0x1c ; C206011C
s_buffer_load_dword s13, s[0:3], 0x1d ; C206811D
s_buffer_load_dword s14, s[0:3], 0x20 ; C2070120
s_buffer_load_dword s15, s[0:3], 0x2c ; C207812C
s_buffer_load_dword s16, s[0:3], 0x2d ; C208012D
s_buffer_load_dword s17, s[0:3], 0x2e ; C208812E
s_buffer_load_dword s18, s[0:3], 0x2f ; C209012F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v8, s5 ; 7E100205
s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121
v_mac_f32_e32 v8, s4, v9 ; 3E101204
v_mul_f32_e32 v9, s6, v5 ; 10120A06
v_mac_f32_e32 v9, s11, v6 ; 3E120C0B
s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A
s_buffer_load_dword s6, s[0:3], 0x1e ; C203011E
v_mul_f32_e32 v16, s12, v5 ; 10200A0C
v_mac_f32_e32 v16, s13, v6 ; 3E200C0D
v_mul_f32_e32 v5, s14, v5 ; 100A0A0E
s_buffer_load_dword s11, s[0:3], 0x22 ; C2058122
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v5, s5, v6 ; 3E0A0C05
s_buffer_load_dword s5, s[0:3], 0x44 ; C2028144
s_buffer_load_dword s12, s[0:3], 0x48 ; C2060148
s_buffer_load_dword s13, s[0:3], 0x45 ; C2068145
s_buffer_load_dword s14, s[0:3], 0x49 ; C2070149
v_mac_f32_e32 v9, s4, v7 ; 3E120E04
s_buffer_load_dword s4, s[0:3], 0x46 ; C2020146
s_buffer_load_dword s19, s[0:3], 0x4a ; C209814A
s_buffer_load_dword s20, s[0:3], 0x47 ; C20A0147
v_mac_f32_e32 v16, s6, v7 ; 3E200E06
v_mac_f32_e32 v5, s11, v7 ; 3E0A0E0B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s5, v1 ; 100C0205
v_mac_f32_e32 v6, s12, v2 ; 3E0C040C
v_mul_f32_e32 v7, s13, v1 ; 100E020D
v_mac_f32_e32 v7, s14, v2 ; 3E0E040E
s_buffer_load_dword s5, s[0:3], 0x4b ; C202814B
v_mul_f32_e32 v17, s4, v1 ; 10220204
v_mac_f32_e32 v17, s19, v2 ; 3E220413
v_mul_f32_e32 v18, s20, v1 ; 10240214
s_buffer_load_dword s4, s[0:3], 0xb ; C202010B
s_buffer_load_dword s6, s[0:3], 0xf ; C203010F
s_buffer_load_dword s11, s[0:3], 0x4c ; C205814C
s_buffer_load_dword s12, s[0:3], 0x4d ; C206014D
s_buffer_load_dword s13, s[0:3], 0x4e ; C206814E
s_buffer_load_dword s14, s[0:3], 0x4f ; C207014F
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v18, s5, v2 ; 3E240405
s_buffer_load_dword s5, s[0:3], 0xc ; C202810C
s_buffer_load_dword s19, s[0:3], 0xd ; C209810D
s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E
v_mul_f32_e32 v19, s4, v1 ; 10260204
s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113
v_mac_f32_e32 v19, s6, v2 ; 3E260406
v_mac_f32_e32 v6, s11, v3 ; 3E0C060B
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
v_mac_f32_e32 v7, s12, v3 ; 3E0E060C
v_mac_f32_e32 v17, s13, v3 ; 3E22060D
s_buffer_load_dword s11, s[0:3], 0x10 ; C2058110
s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114
v_mac_f32_e32 v18, s14, v3 ; 3E24060E
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v19, s4, v3 ; 3E260604
s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
v_mul_f32_e32 v20, s6, v1 ; 10280206
v_mac_f32_e32 v20, s5, v2 ; 3E280405
s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112
v_mac_f32_e32 v20, s11, v3 ; 3E28060B
v_mac_f32_e32 v20, s12, v4 ; 3E28080C
s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115
v_mul_f32_e32 v21, s13, v1 ; 102A020D
v_mac_f32_e32 v21, s19, v2 ; 3E2A0413
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s4, v3 ; 3E2A0604
s_buffer_load_dword s22, s[0:3], 0x16 ; C20B0116
s_buffer_load_dword s23, s[0:3], 0x50 ; C20B8150
s_buffer_load_dword s24, s[0:3], 0x51 ; C20C0151
s_buffer_load_dword s25, s[0:3], 0x52 ; C20C8152
s_buffer_load_dword s26, s[0:3], 0x53 ; C20D0153
s_buffer_load_dword s27, s[0:3], 0x17 ; C20D8117
v_mac_f32_e32 v21, s12, v4 ; 3E2A080C
v_mul_f32_e32 v1, s14, v1 ; 1002020E
v_mac_f32_e32 v1, s20, v2 ; 3E020414
v_mac_f32_e32 v1, s21, v3 ; 3E020615
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v1, s22, v4 ; 3E020816
v_mac_f32_e32 v6, s23, v4 ; 3E0C0817
v_mac_f32_e32 v7, s24, v4 ; 3E0E0818
v_mac_f32_e32 v17, s25, v4 ; 3E220819
v_mac_f32_e32 v18, s26, v4 ; 3E24081A
v_mac_f32_e32 v19, s27, v4 ; 3E26081B
v_mov_b32_e32 v2, s17 ; 7E040211
v_mac_f32_e32 v2, s15, v0 ; 3E04000F
v_mov_b32_e32 v0, s18 ; 7E000212
v_mac_f32_e32 v0, s16, v10 ; 3E001410
exp 15, 32, 0, 0, 0, v11, v8, v2, v0 ; F800020F 0002080B
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s6, v12 ; 10001806
v_mac_f32_e32 v0, s5, v13 ; 3E001A05
v_mul_f32_e32 v2, s13, v12 ; 1004180D
v_mac_f32_e32 v2, s19, v13 ; 3E041A13
v_mul_f32_e32 v3, s14, v12 ; 1006180E
v_mac_f32_e32 v3, s20, v13 ; 3E061A14
v_mac_f32_e32 v0, s11, v14 ; 3E001C0B
v_mac_f32_e32 v2, s4, v14 ; 3E041C04
v_mac_f32_e32 v3, s21, v14 ; 3E061C15
v_mul_f32_e32 v4, v9, v9 ; 10081309
v_mac_f32_e32 v4, v16, v16 ; 3E082110
v_mul_f32_e32 v8, v0, v0 ; 10100100
v_mac_f32_e32 v8, v2, v2 ; 3E100502
v_mac_f32_e32 v4, v5, v5 ; 3E080B05
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104
s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107
v_mac_f32_e32 v8, v3, v3 ; 3E100703
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v4, v9 ; 10121304
v_mul_f32_e32 v10, v4, v16 ; 10142104
v_mul_f32_e32 v4, v4, v5 ; 10080B04
v_mul_f32_e32 v0, v8, v0 ; 10000108
v_mul_f32_e32 v2, v8, v2 ; 10040508
v_mul_f32_e32 v3, v8, v3 ; 10060708
s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v5, s4 ; 7E0A0204
v_mad_f32 v5, -v20, s5, v5 ; D2820005 24140B14
exp 15, 33, 0, 0, 0, v0, v2, v3, v5 ; F800021F 05030200
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v5, v2, v4 ; 100A0902
v_mad_f32 v5, v10, v3, -v5 ; D2820005 8416070A
v_mul_f32_e32 v3, v3, v9 ; 10061303
v_mad_f32 v3, v4, v0, -v3 ; D2820003 840E0104
v_mul_f32_e32 v0, v0, v10 ; 10001500
v_mad_f32 v0, v9, v2, -v0 ; D2820000 84020509
v_mul_f32_e32 v2, v15, v5 ; 10040B0F
v_mul_f32_e32 v3, v15, v3 ; 1006070F
v_mul_f32_e32 v0, v15, v0 ; 1000010F
v_mov_b32_e32 v5, s6 ; 7E0A0206
v_mad_f32 v5, -v21, s5, v5 ; D2820005 24140B15
exp 15, 34, 0, 0, 0, v2, v3, v0, v5 ; F800022F 05000302
s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106
s_buffer_load_dword s6, s[0:3], 0x0 ; C2030100
s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101
s_buffer_load_dword s12, s[0:3], 0x2 ; C2060102
s_buffer_load_dword s13, s[0:3], 0x40 ; C2068140
s_buffer_load_dword s14, s[0:3], 0x41 ; C2070141
s_buffer_load_dword s15, s[0:3], 0x42 ; C2078142
s_buffer_load_dword s16, s[0:3], 0x39 ; C2080139
s_buffer_load_dword s17, s[0:3], 0x3a ; C208813A
s_buffer_load_dword s18, s[0:3], 0x3c ; C209013C
s_buffer_load_dword s19, s[0:3], 0x3d ; C209813D
s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E
s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F
v_mov_b32_e32 v0, s4 ; 7E000204
v_mad_f32 v0, -v1, s5, v0 ; D2820000 24000B01
v_subrev_f32_e32 v2, s6, v20 ; 0A042806
v_mul_f32_e32 v3, s7, v20 ; 10062807
v_mul_f32_e32 v5, s8, v20 ; 100A2808
v_mul_f32_e32 v8, s9, v20 ; 10102809
v_mac_f32_e32 v3, s10, v21 ; 3E062A0A
v_mac_f32_e32 v5, s16, v21 ; 3E0A2A10
v_mac_f32_e32 v8, s17, v21 ; 3E102A11
v_subrev_f32_e32 v11, s11, v21 ; 0A162A0B
v_mac_f32_e32 v3, s18, v1 ; 3E060212
v_mac_f32_e32 v5, s19, v1 ; 3E0A0213
v_mac_f32_e32 v8, s0, v1 ; 3E100200
v_subrev_f32_e32 v1, s12, v1 ; 0A02020C
v_mac_f32_e32 v3, s13, v19 ; 3E06260D
v_mac_f32_e32 v5, s14, v19 ; 3E0A260E
v_mac_f32_e32 v8, s15, v19 ; 3E10260F
exp 15, 35, 0, 0, 0, v9, v10, v4, v0 ; F800023F 00040A09
exp 15, 36, 0, 0, 0, v17, v2, v11, v1 ; F800024F 010B0211
exp 15, 37, 0, 0, 0, v3, v5, v8, v19 ; F800025F 13080503
exp 15, 12, 0, 1, 0, v6, v7, v17, v18 ; F80008CF 12110706
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 24
Code Size: 920 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..10]
DCL TEMP[0..13], LOCAL
IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000}
IMM[1] FLT32 { 10.0000, 0.9680, 0.0300, 0.5000}
IMM[2] FLT32 { 0.0001, 0.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3], TEMP[3], SAMP[0], 2D
11: MUL TEMP[4].x, TEMP[3].wwww, CONST[4].wwww
12: FSLT TEMP[4].x, TEMP[4].xxxx, CONST[5].xxxx
13: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx
14: KILL_IF -TEMP[4].xxxx
15: MOV TEMP[4].xy, IN[0].xyyy
16: TEX TEMP[4].yw, TEMP[4], SAMP[1], 2D
17: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[0].yyyy, IMM[0].zzzz
18: MUL TEMP[4].xy, TEMP[4].xyyy, CONST[8].xxxx
19: DP2 TEMP[5].x, TEMP[4].xyyy, TEMP[4].xyyy
20: MOV_SAT TEMP[5].x, TEMP[5].xxxx
21: ADD TEMP[5].x, IMM[0].xxxx, -TEMP[5].xxxx
22: SQRT TEMP[5].x, TEMP[5].xxxx
23: MOV TEMP[4].z, TEMP[5].xxxx
24: DP3 TEMP[0].x, TEMP[4].xyzz, TEMP[0].xyzz
25: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[1].xyzz
26: MOV TEMP[0].y, TEMP[1].xxxx
27: DP3 TEMP[1].x, TEMP[4].xyzz, TEMP[2].xyzz
28: MOV TEMP[0].z, TEMP[1].xxxx
29: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
30: RSQ TEMP[1].x, TEMP[1].xxxx
31: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
32: MUL TEMP[1].xyz, CONST[4].xyzz, TEMP[3].xyzz
33: LRP TEMP[2].xyz, CONST[9].xxxx, TEMP[1].xyzz, CONST[1].xyzz
34: MOV TEMP[3].x, IN[1].wwww
35: MOV TEMP[3].y, IN[2].wwww
36: MOV TEMP[3].z, IN[3].wwww
37: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
38: RSQ TEMP[4].x, TEMP[4].xxxx
39: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
40: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[3].xyzz
41: MAX TEMP[4].x, IMM[0].wwww, TEMP[4].xxxx
42: DP3 TEMP[5].x, IN[5].xyzz, IN[5].xyzz
43: MOV TEMP[5].xy, TEMP[5].xxxx
44: TEX TEMP[5].w, TEMP[5], SAMP[2], 2D
45: MUL TEMP[5].xyz, CONST[2].xyzz, TEMP[5].wwww
46: DP3 TEMP[6].x, IN[4].yzww, IN[4].yzww
47: RSQ TEMP[6].x, TEMP[6].xxxx
48: MUL TEMP[6].xyz, IN[4].yzww, TEMP[6].xxxx
49: MOV TEMP[6].xyz, -TEMP[6].xyzx
50: ADD TEMP[7].x, IMM[0].xxxx, -CONST[10].xxxx
51: ADD TEMP[8].xyz, TEMP[3].xyzz, TEMP[6].xyzz
52: DP3 TEMP[9].x, TEMP[8].xyzz, TEMP[8].xyzz
53: RSQ TEMP[9].x, TEMP[9].xxxx
54: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[9].xxxx
55: DP3 TEMP[6].x, TEMP[0].xyzz, TEMP[6].xyzz
56: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
57: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[8].xyzz
58: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx
59: MUL TEMP[9].x, TEMP[7].xxxx, TEMP[7].xxxx
60: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].wwww
61: ADD TEMP[10].x, IMM[0].xxxx, -TEMP[7].xxxx
62: MAD TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy, IMM[1].zzzz
63: LG2 TEMP[10].x, TEMP[10].xxxx
64: RCP TEMP[10].x, TEMP[10].xxxx
65: MUL TEMP[10].x, IMM[1].xxxx, TEMP[10].xxxx
66: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[10].xxxx
67: ADD TEMP[11].x, IMM[0].xxxx, -TEMP[4].xxxx
68: ADD TEMP[12].x, IMM[0].xxxx, -TEMP[6].xxxx
69: MUL TEMP[13].x, IMM[0].yyyy, TEMP[3].xxxx
70: MUL TEMP[7].x, TEMP[3].xxxx, TEMP[7].xxxx
71: MAD TEMP[7].x, TEMP[13].xxxx, TEMP[7].xxxx, IMM[1].wwww
72: ADD TEMP[3].x, IMM[0].xxxx, -TEMP[3].xxxx
73: LRP TEMP[13].x, TEMP[4].xxxx, IMM[0].xxxx, TEMP[9].xxxx
74: LRP TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx, TEMP[9].xxxx
75: MAD TEMP[6].x, TEMP[13].xxxx, TEMP[6].xxxx, IMM[2].xxxx
76: RCP TEMP[6].x, TEMP[6].xxxx
77: DP3 TEMP[8].x, TEMP[0].xyzz, TEMP[8].xyzz
78: MAX TEMP[8].x, IMM[0].wwww, TEMP[8].xxxx
79: POW TEMP[8].x, TEMP[8].xxxx, TEMP[10].xxxx
80: ADD TEMP[9].x, TEMP[10].xxxx, IMM[0].xxxx
81: MUL TEMP[9].x, TEMP[9].xxxx, CONST[3].yyyy
82: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
83: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[8].xxxx
84: MUL TEMP[6].x, TEMP[6].xxxx, TEMP[4].xxxx
85: MUL TEMP[6].x, TEMP[6].xxxx, CONST[3].xxxx
86: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
87: MUL TEMP[6].xyz, TEMP[6].xxxx, TEMP[5].xyzz
88: ADD TEMP[8].xyz, IMM[0].xxxx, -TEMP[2].xyzz
89: MUL TEMP[9].x, TEMP[3].xxxx, TEMP[3].xxxx
90: MUL TEMP[10].x, TEMP[3].xxxx, TEMP[3].xxxx
91: MUL TEMP[3].x, TEMP[10].xxxx, TEMP[3].xxxx
92: MUL TEMP[3].x, TEMP[9].xxxx, TEMP[3].xxxx
93: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[3].xxxx, TEMP[2].xyzz
94: MUL TEMP[3].x, CONST[9].xxxx, CONST[1].wwww
95: ADD TEMP[3].x, CONST[1].wwww, -TEMP[3].xxxx
96: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xxxx
97: ADD TEMP[3].x, TEMP[7].xxxx, IMM[0].zzzz
98: MUL TEMP[8].x, TEMP[11].xxxx, TEMP[11].xxxx
99: MUL TEMP[9].x, TEMP[11].xxxx, TEMP[11].xxxx
100: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[11].xxxx
101: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
102: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[8].xxxx, IMM[0].xxxx
103: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].zzzz
104: MUL TEMP[8].x, TEMP[12].xxxx, TEMP[12].xxxx
105: MUL TEMP[9].x, TEMP[12].xxxx, TEMP[12].xxxx
106: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[12].xxxx
107: MUL TEMP[8].x, TEMP[8].xxxx, TEMP[9].xxxx
108: MAD TEMP[7].x, TEMP[7].xxxx, TEMP[8].xxxx, IMM[0].xxxx
109: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[7].xxxx
110: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[4].xxxx
111: MUL TEMP[3].xyz, TEMP[5].xyzz, TEMP[3].xxxx
112: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz
113: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[2].xyzz, TEMP[1].xyzz
114: MAD TEMP[1].x, IN[4].xxxx, CONST[0].zzzz, CONST[0].wwww
115: MOV_SAT TEMP[1].x, TEMP[1].xxxx
116: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
117: MOV TEMP[0].xyz, TEMP[0].xyzx
118: MOV TEMP[0].w, IMM[0].xxxx
119: MOV OUT[0], TEMP[0]
120: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%44 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%45 = load <32 x i8>, <32 x i8> addrspace(2)* %44, align 32, !tbaa !0
%46 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)*
%50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0
%51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)*
%53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0
%54 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%55 = bitcast <8 x i32> addrspace(2)* %54 to <32 x i8> addrspace(2)*
%56 = load <32 x i8>, <32 x i8> addrspace(2)* %55, align 32, !tbaa !0
%57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%58 = bitcast <4 x i32> addrspace(2)* %57 to <16 x i8> addrspace(2)*
%59 = load <16 x i8>, <16 x i8> addrspace(2)* %58, align 16, !tbaa !0
%60 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%61 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%68 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%71 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%74 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%75 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%76 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%77 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%78 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%79 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%80 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7)
%81 = bitcast float %60 to i32
%82 = bitcast float %61 to i32
%83 = insertelement <2 x i32> undef, i32 %81, i32 0
%84 = insertelement <2 x i32> %83, i32 %82, i32 1
%85 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %84, <32 x i8> %45, <16 x i8> %47, i32 2)
%86 = extractelement <4 x float> %85, i32 0
%87 = extractelement <4 x float> %85, i32 1
%88 = extractelement <4 x float> %85, i32 2
%89 = extractelement <4 x float> %85, i32 3
%90 = fmul float %89, %39
%91 = fcmp olt float %90, %40
%92 = select i1 %91, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %92)
%93 = bitcast float %60 to i32
%94 = bitcast float %61 to i32
%95 = insertelement <2 x i32> undef, i32 %93, i32 0
%96 = insertelement <2 x i32> %95, i32 %94, i32 1
%97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %50, <16 x i8> %53, i32 2)
%98 = extractelement <4 x float> %97, i32 1
%99 = extractelement <4 x float> %97, i32 3
%100 = fmul float %99, 2.000000e+00
%101 = fadd float %100, -1.000000e+00
%102 = fmul float %98, 2.000000e+00
%103 = fadd float %102, -1.000000e+00
%104 = fmul float %101, %41
%105 = fmul float %103, %41
%106 = fmul float %104, %104
%107 = fmul float %105, %105
%108 = fadd float %106, %107
%109 = call float @llvm.AMDIL.clamp.(float %108, float 0.000000e+00, float 1.000000e+00)
%110 = fsub float 1.000000e+00, %109
%111 = call float @llvm.sqrt.f32(float %110)
%112 = fmul float %104, %62
%113 = fmul float %105, %66
%114 = fadd float %113, %112
%115 = fmul float %111, %70
%116 = fadd float %114, %115
%117 = fmul float %104, %63
%118 = fmul float %105, %67
%119 = fadd float %118, %117
%120 = fmul float %111, %71
%121 = fadd float %119, %120
%122 = fmul float %104, %64
%123 = fmul float %105, %68
%124 = fadd float %123, %122
%125 = fmul float %111, %72
%126 = fadd float %124, %125
%127 = fmul float %116, %116
%128 = fmul float %121, %121
%129 = fadd float %128, %127
%130 = fmul float %126, %126
%131 = fadd float %129, %130
%132 = call float @llvm.AMDGPU.rsq.clamped.f32(float %131)
%133 = fmul float %116, %132
%134 = fmul float %121, %132
%135 = fmul float %126, %132
%136 = fmul float %36, %86
%137 = fmul float %37, %87
%138 = fmul float %38, %88
%139 = call float @llvm.AMDGPU.lrp(float %42, float %136, float %26)
%140 = call float @llvm.AMDGPU.lrp(float %42, float %137, float %27)
%141 = call float @llvm.AMDGPU.lrp(float %42, float %138, float %28)
%142 = fmul float %65, %65
%143 = fmul float %69, %69
%144 = fadd float %143, %142
%145 = fmul float %73, %73
%146 = fadd float %144, %145
%147 = call float @llvm.AMDGPU.rsq.clamped.f32(float %146)
%148 = fmul float %65, %147
%149 = fmul float %69, %147
%150 = fmul float %73, %147
%151 = fmul float %133, %148
%152 = fmul float %134, %149
%153 = fadd float %152, %151
%154 = fmul float %135, %150
%155 = fadd float %153, %154
%156 = call float @llvm.maxnum.f32(float %155, float 0.000000e+00)
%157 = fmul float %78, %78
%158 = fmul float %79, %79
%159 = fadd float %158, %157
%160 = fmul float %80, %80
%161 = fadd float %159, %160
%162 = bitcast float %161 to i32
%163 = bitcast float %161 to i32
%164 = insertelement <2 x i32> undef, i32 %162, i32 0
%165 = insertelement <2 x i32> %164, i32 %163, i32 1
%166 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %165, <32 x i8> %56, <16 x i8> %59, i32 2)
%167 = extractelement <4 x float> %166, i32 3
%168 = fmul float %30, %167
%169 = fmul float %31, %167
%170 = fmul float %32, %167
%171 = fmul float %75, %75
%172 = fmul float %76, %76
%173 = fadd float %172, %171
%174 = fmul float %77, %77
%175 = fadd float %173, %174
%176 = call float @llvm.AMDGPU.rsq.clamped.f32(float %175)
%177 = fmul float %75, %176
%178 = fmul float %76, %176
%179 = fmul float %77, %176
%180 = fsub float 1.000000e+00, %43
%181 = fsub float %148, %177
%182 = fsub float %149, %178
%183 = fsub float %150, %179
%184 = fmul float %181, %181
%185 = fmul float %182, %182
%186 = fadd float %185, %184
%187 = fmul float %183, %183
%188 = fadd float %186, %187
%189 = call float @llvm.AMDGPU.rsq.clamped.f32(float %188)
%190 = fmul float %181, %189
%191 = fmul float %182, %189
%192 = fmul float %183, %189
%193 = fmul float %177, %133
%194 = fsub float -0.000000e+00, %193
%195 = fmul float %178, %134
%196 = fsub float %194, %195
%197 = fmul float %179, %135
%198 = fsub float %196, %197
%199 = call float @llvm.maxnum.f32(float %198, float 0.000000e+00)
%200 = fmul float %148, %190
%201 = fmul float %149, %191
%202 = fadd float %201, %200
%203 = fmul float %150, %192
%204 = fadd float %202, %203
%205 = call float @llvm.maxnum.f32(float %204, float 0.000000e+00)
%206 = fmul float %180, %180
%207 = fmul float %206, %35
%208 = fsub float 1.000000e+00, %180
%209 = fmul float %208, 0x3FEEF9DB20000000
%210 = fadd float %209, 0x3F9EB851E0000000
%211 = call float @llvm.log2.f32(float %210)
%212 = fdiv float 1.000000e+00, %211
%213 = fmul float %212, 1.000000e+01
%214 = fmul float %213, %213
%215 = fsub float 1.000000e+00, %156
%216 = fsub float 1.000000e+00, %199
%217 = fmul float %205, 2.000000e+00
%218 = fmul float %205, %180
%219 = fmul float %217, %218
%220 = fadd float %219, 5.000000e-01
%221 = fsub float 1.000000e+00, %205
%222 = call float @llvm.AMDGPU.lrp(float %156, float 1.000000e+00, float %207)
%223 = call float @llvm.AMDGPU.lrp(float %199, float 1.000000e+00, float %207)
%224 = fmul float %222, %223
%225 = fadd float %224, 0x3F1A36E2E0000000
%226 = fdiv float 1.000000e+00, %225
%227 = fmul float %133, %190
%228 = fmul float %134, %191
%229 = fadd float %228, %227
%230 = fmul float %135, %192
%231 = fadd float %229, %230
%232 = call float @llvm.maxnum.f32(float %231, float 0.000000e+00)
%233 = call float @llvm.pow.f32(float %232, float %214)
%234 = fadd float %214, 1.000000e+00
%235 = fmul float %234, %34
%236 = fmul float %233, %235
%237 = fmul float %226, %236
%238 = fmul float %237, %156
%239 = fmul float %238, %33
%240 = call float @llvm.maxnum.f32(float %239, float 0.000000e+00)
%241 = fmul float %240, %168
%242 = fmul float %240, %169
%243 = fmul float %240, %170
%244 = fsub float 1.000000e+00, %139
%245 = fsub float 1.000000e+00, %140
%246 = fsub float 1.000000e+00, %141
%247 = fmul float %221, %221
%248 = fmul float %221, %221
%249 = fmul float %248, %221
%250 = fmul float %247, %249
%251 = fmul float %244, %250
%252 = fadd float %251, %139
%253 = fmul float %245, %250
%254 = fadd float %253, %140
%255 = fmul float %246, %250
%256 = fadd float %255, %141
%257 = fmul float %42, %29
%258 = fsub float %29, %257
%259 = fmul float %136, %258
%260 = fmul float %137, %258
%261 = fmul float %138, %258
%262 = fadd float %220, -1.000000e+00
%263 = fmul float %215, %215
%264 = fmul float %215, %215
%265 = fmul float %264, %215
%266 = fmul float %263, %265
%267 = fmul float %262, %266
%268 = fadd float %267, 1.000000e+00
%269 = fadd float %220, -1.000000e+00
%270 = fmul float %216, %216
%271 = fmul float %216, %216
%272 = fmul float %271, %216
%273 = fmul float %270, %272
%274 = fmul float %269, %273
%275 = fadd float %274, 1.000000e+00
%276 = fmul float %268, %275
%277 = fmul float %276, %156
%278 = fmul float %168, %277
%279 = fmul float %169, %277
%280 = fmul float %170, %277
%281 = fmul float %259, %278
%282 = fmul float %260, %279
%283 = fmul float %261, %280
%284 = fmul float %241, %252
%285 = fadd float %284, %281
%286 = fmul float %242, %254
%287 = fadd float %286, %282
%288 = fmul float %243, %256
%289 = fadd float %288, %283
%290 = fmul float %74, %24
%291 = fadd float %290, %25
%292 = call float @llvm.AMDIL.clamp.(float %291, float 0.000000e+00, float 1.000000e+00)
%293 = fmul float %285, %292
%294 = fmul float %287, %292
%295 = fmul float %289, %292
%296 = call i32 @llvm.SI.packf16(float %293, float %294)
%297 = bitcast i32 %296 to float
%298 = call i32 @llvm.SI.packf16(float %295, float 1.000000e+00)
%299 = bitcast i32 %298 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %297, float %299, float %297, float %299)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400
v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401
v_interp_p1_f32 v5, v0, 1, 1, [m0] ; C8140500
v_interp_p2_f32 v5, [v5], v1, 1, 1, [m0] ; C8150501
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v7, v0, 3, 1, [m0] ; C81C0700
v_interp_p2_f32 v7, [v7], v1, 3, 1, [m0] ; C81D0701
v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800
v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801
v_interp_p1_f32 v9, v0, 1, 2, [m0] ; C8240900
v_interp_p2_f32 v9, [v9], v1, 1, 2, [m0] ; C8250901
v_interp_p1_f32 v10, v0, 2, 2, [m0] ; C8280A00
v_interp_p2_f32 v10, [v10], v1, 2, 2, [m0] ; C8290A01
v_interp_p1_f32 v11, v0, 3, 2, [m0] ; C82C0B00
v_interp_p2_f32 v11, [v11], v1, 3, 2, [m0] ; C82D0B01
v_interp_p1_f32 v12, v0, 0, 3, [m0] ; C8300C00
v_interp_p2_f32 v12, [v12], v1, 0, 3, [m0] ; C8310C01
v_interp_p1_f32 v13, v0, 1, 3, [m0] ; C8340D00
v_interp_p2_f32 v13, [v13], v1, 1, 3, [m0] ; C8350D01
v_interp_p1_f32 v14, v0, 2, 3, [m0] ; C8380E00
v_interp_p2_f32 v14, [v14], v1, 2, 3, [m0] ; C8390E01
v_interp_p1_f32 v15, v0, 3, 3, [m0] ; C83C0F00
v_interp_p2_f32 v15, [v15], v1, 3, 3, [m0] ; C83D0F01
v_interp_p1_f32 v16, v0, 0, 4, [m0] ; C8401000
v_interp_p2_f32 v16, [v16], v1, 0, 4, [m0] ; C8411001
v_interp_p1_f32 v17, v0, 1, 4, [m0] ; C8441100
v_interp_p2_f32 v17, [v17], v1, 1, 4, [m0] ; C8451101
v_interp_p1_f32 v18, v0, 2, 4, [m0] ; C8481200
v_interp_p2_f32 v18, [v18], v1, 2, 4, [m0] ; C8491201
v_interp_p1_f32 v19, v0, 3, 4, [m0] ; C84C1300
s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300
v_interp_p2_f32 v19, [v19], v1, 3, 4, [m0] ; C84D1301
v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400
v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401
v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500
v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501
v_interp_p1_f32 v0, v0, 2, 5, [m0] ; C8001600
s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500
s_load_dwordx8 s[36:43], s[6:7], 0x0 ; C0D20700
v_interp_p2_f32 v0, [v0], v1, 2, 5, [m0] ; C8011601
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s44, s[8:11], 0x10 ; C2160910
s_buffer_load_dword s45, s[8:11], 0x11 ; C2168911
s_buffer_load_dword s46, s[8:11], 0x13 ; C2170913
s_buffer_load_dword s47, s[8:11], 0x14 ; C2178914
s_load_dwordx4 s[24:27], s[4:5], 0x4 ; C08C0504
s_load_dwordx4 s[12:15], s[4:5], 0x8 ; C0860508
s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708
s_load_dwordx8 s[16:23], s[6:7], 0x10 ; C0C80710
image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[36:43], s[0:3] ; F0800F00 00091602
s_buffer_load_dword s3, s[8:11], 0x12 ; C2018912
s_buffer_load_dword s2, s[8:11], 0x20 ; C2010920
s_buffer_load_dword s0, s[8:11], 0x24 ; C2000924
s_buffer_load_dword s1, s[8:11], 0x28 ; C2008928
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v1, s46, v25 ; 1002322E
v_cmp_gt_f32_e32 vcc, s47, v1 ; 7C08022F
v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280
v_mul_f32_e32 v1, s44, v22 ; 10022C2C
v_mul_f32_e32 v22, s45, v23 ; 102C2E2D
v_mul_f32_e32 v23, s3, v24 ; 102E3003
s_buffer_load_dword s3, s[8:11], 0x2 ; C2018902
s_buffer_load_dword s4, s[8:11], 0x3 ; C2020903
v_mul_f32_e32 v24, v20, v20 ; 10302914
v_mac_f32_e32 v24, v21, v21 ; 3E302B15
v_mac_f32_e32 v24, v0, v0 ; 3E300100
image_sample v[2:3], 10, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[28:35], s[24:27] ; F0800A00 00C70202
v_mov_b32_e32 v25, v24 ; 7E320318
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800800 00640018
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mad_f32 v3, 2.0, v3, -1.0 ; D2820003 03CE06F4
v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4
s_buffer_load_dword s5, s[8:11], 0x4 ; C2028904
s_buffer_load_dword s6, s[8:11], 0x5 ; C2030905
s_buffer_load_dword s7, s[8:11], 0x6 ; C2038906
v_mov_b32_e32 v20, s4 ; 7E280204
v_mac_f32_e32 v20, s3, v16 ; 3E282003
s_buffer_load_dword s3, s[8:11], 0xd ; C201890D
s_buffer_load_dword s4, s[8:11], 0xf ; C202090F
s_buffer_load_dword s12, s[8:11], 0x7 ; C2060907
s_buffer_load_dword s13, s[8:11], 0x8 ; C2068908
s_buffer_load_dword s14, s[8:11], 0x9 ; C2070909
s_buffer_load_dword s15, s[8:11], 0xa ; C207890A
s_buffer_load_dword s8, s[8:11], 0xc ; C204090C
v_mul_f32_e32 v3, s2, v3 ; 10060602
v_mul_f32_e32 v2, s2, v2 ; 10040402
v_mul_f32_e32 v4, v4, v3 ; 10080704
v_mac_f32_e32 v4, v8, v2 ; 3E080508
v_mul_f32_e32 v5, v5, v3 ; 100A0705
v_mac_f32_e32 v5, v9, v2 ; 3E0A0509
v_mul_f32_e32 v6, v6, v3 ; 100C0706
v_mac_f32_e32 v6, v10, v2 ; 3E0C050A
v_mul_f32_e32 v2, v2, v2 ; 10040502
v_mac_f32_e32 v2, v3, v3 ; 3E040703
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_sub_f32_e32 v2, 1.0, v2 ; 080404F2
v_sqrt_f32_e32 v2, v2 ; 7E046702
v_mac_f32_e32 v4, v12, v2 ; 3E08050C
v_mac_f32_e32 v5, v13, v2 ; 3E0A050D
v_mac_f32_e32 v6, v14, v2 ; 3E0C050E
v_mul_f32_e32 v2, v4, v4 ; 10040904
v_mac_f32_e32 v2, v5, v5 ; 3E040B05
v_mac_f32_e32 v2, v6, v6 ; 3E040D06
v_rsq_clamp_f32_e32 v2, v2 ; 7E045902
v_mul_f32_e32 v3, v17, v17 ; 10062311
v_mac_f32_e32 v3, v18, v18 ; 3E062512
v_mac_f32_e32 v3, v19, v19 ; 3E062713
v_rsq_clamp_f32_e32 v3, v3 ; 7E065903
v_mul_f32_e32 v8, v7, v7 ; 10100F07
v_mac_f32_e32 v8, v11, v11 ; 3E10170B
v_mac_f32_e32 v8, v15, v15 ; 3E101F0F
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_mul_f32_e32 v9, v3, v17 ; 10122303
v_mul_f32_e32 v10, v3, v18 ; 10142503
v_mul_f32_e32 v3, v3, v19 ; 10062703
v_mul_f32_e32 v4, v2, v4 ; 10080902
v_mul_f32_e32 v5, v2, v5 ; 100A0B02
v_mul_f32_e32 v12, v8, v7 ; 10180F08
v_mad_f32 v7, v7, v8, -v9 ; D2820007 84261107
v_mul_f32_e32 v9, v4, v9 ; 10121304
v_mad_f32 v9, -v10, v5, -v9 ; D2820009 A4260B0A
v_mad_f32 v10, v11, v8, -v10 ; D282000A 842A110B
v_mul_f32_e32 v13, v7, v7 ; 101A0F07
v_mac_f32_e32 v13, v10, v10 ; 3E1A150A
v_mad_f32 v14, v15, v8, -v3 ; D282000E 840E110F
v_mac_f32_e32 v13, v14, v14 ; 3E1A1D0E
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mul_f32_e32 v16, v12, v4 ; 1020090C
v_mac_f32_e32 v16, v11, v5 ; 3E200B0B
v_mul_f32_e32 v7, v13, v7 ; 100E0F0D
v_mul_f32_e32 v10, v13, v10 ; 1014150D
v_mul_f32_e32 v12, v7, v12 ; 10181907
v_mac_f32_e32 v12, v10, v11 ; 3E18170A
v_mul_f32_e32 v4, v7, v4 ; 10080907
v_mac_f32_e32 v4, v10, v5 ; 3E080B0A
v_mul_f32_e32 v2, v2, v6 ; 10040D02
v_mul_f32_e32 v5, v8, v15 ; 100A1F08
v_mul_f32_e32 v6, v13, v14 ; 100C1D0D
v_mad_f32 v3, -v3, v2, v9 ; D2820003 24260503
v_mac_f32_e32 v16, v5, v2 ; 3E200505
v_mac_f32_e32 v12, v6, v5 ; 3E180B06
v_mac_f32_e32 v4, v6, v2 ; 3E080506
v_sub_f32_e64 v2, 1.0, s0 ; D2080002 000000F2
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v5, s5, v2 ; 100A0405
v_mad_f32 v6, -v2, s5, 1.0 ; D2820006 23C80B02
v_mul_f32_e32 v7, s6, v2 ; 100E0406
v_mad_f32 v8, -v2, s6, 1.0 ; D2820008 23C80D02
v_mul_f32_e32 v9, s7, v2 ; 10120407
v_mad_f32 v2, -v2, s7, 1.0 ; D2820002 23C80F02
v_sub_f32_e64 v10, 1.0, s1 ; D208000A 000002F2
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mov_b32_e32 v13, 0x3cf5c28f ; 7E1A02FF 3CF5C28F
v_madmk_f32_e32 v11, v11, v13, 0x3f77ced9 ; 40161B0B 3F77CED9
v_max_f32_e32 v12, 0, v12 ; 20181880
v_sub_f32_e32 v13, 1.0, v12 ; 081A18F2
v_mul_f32_e32 v14, v13, v13 ; 101C1B0D
v_mul_f32_e32 v13, v13, v14 ; 101A1D0D
v_mul_f32_e32 v13, v13, v14 ; 101A1D0D
v_mac_f32_e32 v5, s0, v1 ; 3E0A0200
v_mad_f32 v6, -s0, v1, v6 ; D2820006 241A0200
v_mac_f32_e32 v5, v13, v6 ; 3E0A0D0D
v_mac_f32_e32 v7, s0, v22 ; 3E0E2C00
v_mad_f32 v6, -s0, v22, v8 ; D2820006 24222C00
v_mac_f32_e32 v7, v13, v6 ; 3E0E0D0D
v_mac_f32_e32 v9, s0, v23 ; 3E122E00
v_log_f32_e32 v6, v11 ; 7E0C4F0B
v_mad_f32 v2, -s0, v23, v2 ; D2820002 240A2E00
v_mac_f32_e32 v9, v13, v2 ; 3E12050D
v_mul_f32_e32 v2, s13, v0 ; 1004000D
v_mul_f32_e32 v8, s14, v0 ; 1010000E
v_rcp_f32_e32 v6, v6 ; 7E0C5506
v_mul_f32_e32 v0, s15, v0 ; 1000000F
v_mul_f32_e32 v11, v10, v10 ; 1016150A
v_mul_f32_e32 v11, s4, v11 ; 10161604
v_mul_f32_e32 v6, 0x41200000, v6 ; 100C0CFF 41200000
v_mad_f32 v13, v6, v6, 1.0 ; D282000D 03CA0D06
v_mul_f32_e32 v13, s3, v13 ; 101A1A03
v_max_f32_e32 v3, 0, v3 ; 20060680
v_sub_f32_e32 v14, 1.0, v3 ; 081C06F2
v_mul_f32_e32 v15, v11, v14 ; 101E1D0B
v_mac_f32_e32 v15, 1.0, v3 ; 3E1E06F2
v_max_f32_e32 v3, 0, v16 ; 20062080
v_sub_f32_e32 v16, 1.0, v3 ; 082006F2
v_mul_f32_e32 v11, v11, v16 ; 1016210B
v_mac_f32_e32 v11, 1.0, v3 ; 3E1606F2
v_max_f32_e32 v4, 0, v4 ; 20080880
v_log_f32_e32 v4, v4 ; 7E084F04
v_madak_f32_e32 v11, v11, v15, 0x38d1b717 ; 42161F0B 38D1B717
v_mul_f32_e32 v6, v6, v6 ; 100C0D06
v_rcp_f32_e32 v11, v11 ; 7E16550B
v_mul_legacy_f32_e32 v4, v6, v4 ; 0E080906
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mul_f32_e32 v4, v13, v4 ; 1008090D
v_mul_f32_e32 v4, v4, v11 ; 10081704
v_mul_f32_e32 v4, v3, v4 ; 10080903
v_mul_f32_e32 v4, s8, v4 ; 10080808
v_mov_b32_e32 v6, s0 ; 7E0C0200
v_mad_f32 v6, -v6, s12, s12 ; D2820006 20301906
v_mul_f32_e32 v10, v10, v12 ; 1014190A
v_add_f32_e32 v11, v12, v12 ; 0616190C
v_mad_f32 v10, v11, v10, 0.5 ; D282000A 03C2150B
v_mul_f32_e32 v11, v16, v16 ; 10162110
v_mul_f32_e32 v12, v16, v11 ; 10181710
v_mul_f32_e32 v11, v12, v11 ; 1016170C
v_mul_f32_e32 v12, v14, v14 ; 10181D0E
v_mul_f32_e32 v13, v14, v12 ; 101A190E
v_mul_f32_e32 v12, v13, v12 ; 1018190D
v_add_f32_e32 v10, -1.0, v10 ; 061414F3
v_mad_f32 v11, v10, v11, 1.0 ; D282000B 03CA170A
v_mad_f32 v10, v10, v12, 1.0 ; D282000A 03CA190A
v_mul_f32_e32 v10, v10, v11 ; 1014170A
v_mul_f32_e32 v1, v6, v1 ; 10020306
v_mul_f32_e32 v3, v3, v10 ; 10061503
v_mul_f32_e32 v10, v3, v2 ; 10140503
v_mul_f32_e32 v1, v10, v1 ; 1002030A
v_max_f32_e32 v4, 0, v4 ; 20080880
v_mul_f32_e32 v2, v2, v4 ; 10040902
v_mac_f32_e32 v1, v5, v2 ; 3E020505
v_mul_f32_e32 v2, v6, v22 ; 10042D06
v_mul_f32_e32 v5, v3, v8 ; 100A1103
v_mul_f32_e32 v2, v5, v2 ; 10040505
v_mul_f32_e32 v5, v8, v4 ; 100A0908
v_mac_f32_e32 v2, v7, v5 ; 3E040B07
v_mul_f32_e32 v5, v6, v23 ; 100A2F06
v_mul_f32_e32 v4, v0, v4 ; 10080900
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_mul_f32_e32 v0, v0, v5 ; 10000B00
v_mac_f32_e32 v0, v9, v4 ; 3E000909
v_add_f32_e64 v3, 0, v20 clamp ; D2060803 00022880
v_mul_f32_e32 v1, v3, v1 ; 10020303
v_mul_f32_e32 v2, v3, v2 ; 10040503
v_mul_f32_e32 v0, v3, v0 ; 10000103
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 56
VGPRS: 28
Code Size: 1112 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..12]
DCL CONST[16..27]
DCL TEMP[0..14], LOCAL
IMM[0] FLT32 { 1.0000, 0.5000, 0.0000, 0.4990}
IMM[1] FLT32 { 1.4990, 2.0000, 0.8000, 1.5000}
IMM[2] FLT32 { -0.5000, -1.0000, 0.2000, 8.0000}
IMM[3] FLT32 { 0.1000, 0.0000, -4.0000, 0.0700}
IMM[4] FLT32 { -0.0150, 100.0000, 180.0000, 0.7000}
0: MUL TEMP[0].x, IN[3].xxxx, CONST[16].xxxx
1: FLR TEMP[0].x, TEMP[0].xxxx
2: ADD TEMP[1].xy, IN[0].xyyy, IMM[0].xxxx
3: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy
4: MOV TEMP[1].z, TEMP[0].xxxx
5: RCP TEMP[2].x, CONST[16].xxxx
6: ADD TEMP[3].x, TEMP[0].xxxx, IMM[0].wwww
7: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[2].xxxx
8: RCP TEMP[4].x, CONST[16].yyyy
9: MUL TEMP[5].x, IMM[0].wwww, TEMP[4].xxxx
10: MOV TEMP[3].y, TEMP[5].xxxx
11: MOV TEMP[3].xy, TEMP[3].xyyy
12: MOV TEMP[3].w, IMM[0].zzzz
13: TXL TEMP[3], TEMP[3], SAMP[0], 2D
14: ADD TEMP[5].x, TEMP[0].xxxx, IMM[0].wwww
15: MUL TEMP[0].x, TEMP[5].xxxx, TEMP[2].xxxx
16: MUL TEMP[2].x, IMM[1].xxxx, TEMP[4].xxxx
17: MOV TEMP[0].y, TEMP[2].xxxx
18: MOV TEMP[0].xy, TEMP[0].xyyy
19: MOV TEMP[0].w, IMM[0].zzzz
20: TXL TEMP[0].xy, TEMP[0], SAMP[0], 2D
21: MUL TEMP[2].xy, TEMP[3].xyyy, IMM[1].yyyy
22: MUL TEMP[3].xy, TEMP[3].zwww, IMM[1].yyyy
23: MOV TEMP[4].xy, IN[1].xzzz
24: MOV TEMP[4].w, IMM[0].zzzz
25: TXL TEMP[4].xy, TEMP[4], SAMP[1], 2D
26: LRP TEMP[3].xy, TEMP[4].xyyy, TEMP[3].xyyy, TEMP[2].xyyy
27: MOV TEMP[2].x, IN[2].xxxx
28: MUL TEMP[4].x, IN[2].xxxx, IN[2].xxxx
29: MAD TEMP[4].x, IN[2].yyyy, IN[2].yyyy, TEMP[4].xxxx
30: SQRT TEMP[4].x, TEMP[4].xxxx
31: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
32: MOV TEMP[2].y, TEMP[4].xxxx
33: MOV TEMP[2].z, IN[2].yyyy
34: MOV TEMP[4].y, TEMP[4].xxxx
35: MOV TEMP[5].x, CONST[9].zzzz
36: MOV TEMP[5].y, CONST[10].zzzz
37: MOV TEMP[5].z, CONST[11].zzzz
38: MOV TEMP[5].xyz, -TEMP[5].xyzx
39: MUL TEMP[6].xyz, IMM[0].zzxx, TEMP[5].yzxx
40: MAD TEMP[6].xyz, IMM[0].xzzz, TEMP[5].zxyy, -TEMP[6].xyzz
41: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz
42: RSQ TEMP[7].x, TEMP[7].xxxx
43: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx
44: MUL TEMP[7].xyz, TEMP[5].zxyy, TEMP[6].yzxx
45: MAD TEMP[7].xyz, TEMP[5].yzxx, TEMP[6].zxyy, -TEMP[7].xyzz
46: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz
47: RSQ TEMP[8].x, TEMP[8].xxxx
48: MUL TEMP[7].xz, TEMP[7].xyzz, TEMP[8].xxxx
49: MOV TEMP[7].xz, TEMP[7].xxzx
50: MOV TEMP[7].y, IMM[0].xxxx
51: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[2].xyzz
52: ABS TEMP[5].x, TEMP[5].xxxx
53: MAD TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx, IMM[0].yyyy
54: LRP TEMP[2].xyz, TEMP[5].xxxx, TEMP[7].xyzz, TEMP[2].xyzz
55: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
56: RSQ TEMP[5].x, TEMP[5].xxxx
57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
58: MUL TEMP[5], CONST[19].xxyy, CONST[17].xxyy
59: MUL TEMP[5], TEMP[5], CONST[21]
60: MUL TEMP[7], CONST[19].zzww, CONST[17].zzww
61: MUL TEMP[7], TEMP[7], CONST[22]
62: DP2 TEMP[8].x, CONST[21].xyyy, IN[1].xzzz
63: DP2 TEMP[9].x, CONST[21].zwww, IN[1].xzzz
64: MOV TEMP[8].y, TEMP[9].xxxx
65: DP2 TEMP[9].x, CONST[22].xyyy, IN[1].xzzz
66: MOV TEMP[8].z, TEMP[9].xxxx
67: DP2 TEMP[9].x, CONST[22].zwww, IN[1].xzzz
68: MOV TEMP[8].w, TEMP[9].xxxx
69: MUL TEMP[8], CONST[18], TEMP[8]
70: MUL TEMP[9], CONST[0].yyyy, CONST[20]
71: ADD TEMP[8], TEMP[8], TEMP[9]
72: COS TEMP[10].x, TEMP[8].xxxx
73: COS TEMP[10].y, TEMP[8].yyyy
74: COS TEMP[10].z, TEMP[8].zzzz
75: COS TEMP[10].w, TEMP[8].wwww
76: MOV TEMP[8].xy, TEMP[5].xzxx
77: MOV TEMP[8].zw, TEMP[7].zzxz
78: DP4 TEMP[8].x, TEMP[10], TEMP[8]
79: MOV TEMP[5].xy, TEMP[5].ywyy
80: MOV TEMP[5].zw, TEMP[7].wwyw
81: DP4 TEMP[5].x, TEMP[10], TEMP[5]
82: MOV TEMP[8].z, TEMP[5].xxxx
83: ADD TEMP[5].xy, IN[1].xzzz, TEMP[8].xzzz
84: MOV TEMP[7].y, IMM[1].yyyy
85: MUL TEMP[8], CONST[18].xxyy, CONST[17].xxyy
86: MUL TEMP[8], TEMP[8], CONST[21]
87: MUL TEMP[10], CONST[18].zzww, CONST[17].zzww
88: MUL TEMP[10], TEMP[10], CONST[22]
89: DP2 TEMP[11].x, CONST[21].xyyy, TEMP[5].xyyy
90: DP2 TEMP[12].x, CONST[21].zwww, TEMP[5].xyyy
91: MOV TEMP[11].y, TEMP[12].xxxx
92: DP2 TEMP[12].x, CONST[22].xyyy, TEMP[5].xyyy
93: MOV TEMP[11].z, TEMP[12].xxxx
94: DP2 TEMP[5].x, CONST[22].zwww, TEMP[5].xyyy
95: MOV TEMP[11].w, TEMP[5].xxxx
96: MAD TEMP[5], CONST[18], TEMP[11], TEMP[9]
97: COS TEMP[9].x, TEMP[5].xxxx
98: COS TEMP[9].y, TEMP[5].yyyy
99: COS TEMP[9].z, TEMP[5].zzzz
100: COS TEMP[9].w, TEMP[5].wwww
101: MOV TEMP[5].xy, TEMP[8].xzxx
102: MOV TEMP[5].zw, TEMP[10].zzxz
103: DP4 TEMP[5].x, TEMP[9], TEMP[5]
104: MOV TEMP[7].x, -TEMP[5].xxxx
105: MOV TEMP[5].xy, TEMP[8].ywyy
106: MOV TEMP[5].zw, TEMP[10].wwyw
107: DP4 TEMP[5].x, TEMP[9], TEMP[5]
108: MOV TEMP[7].z, -TEMP[5].xxxx
109: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[7].xyzz
110: RSQ TEMP[5].x, TEMP[5].xxxx
111: MUL TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xxxx
112: MUL TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy
113: MUL TEMP[5].xz, TEMP[5].xyzz, IMM[1].yyyy
114: MOV TEMP[8].xz, TEMP[5].xxzx
115: MOV TEMP[8].y, IMM[0].zzzz
116: ADD TEMP[10].xy, IN[1].xzzz, -CONST[23].xyyy
117: MAD TEMP[10].xy, CONST[23].zwww, IMM[0].yyyy, TEMP[10].xyyy
118: RCP TEMP[11].x, CONST[23].zzzz
119: RCP TEMP[11].y, CONST[23].wwww
120: MUL TEMP[9].xy, TEMP[10].xyyy, TEMP[11].xyyy
121: MOV TEMP[10].w, IMM[0].zzzz
122: MOV TEMP[10].xyz, IN[1].xyzx
123: MOV TEMP[11].w, IMM[0].xxxx
124: ADD TEMP[12].x, IN[0].yyyy, IMM[1].zzzz
125: MUL TEMP[13].x, TEMP[12].xxxx, TEMP[3].yyyy
126: MUL TEMP[14].x, IN[0].xxxx, TEMP[3].xxxx
127: MUL TEMP[6].xyz, TEMP[14].xxxx, TEMP[6].xyzz
128: MAD TEMP[11].xyz, TEMP[13].xxxx, TEMP[2].xyzz, TEMP[6].xyzz
129: MOV TEMP[2].y, IMM[0].zzzz
130: MOV TEMP[6].xy, TEMP[9].xyyy
131: MOV TEMP[6].w, IMM[0].zzzz
132: TXL TEMP[6].xyz, TEMP[6], SAMP[2], 2D
133: ADD TEMP[6].xz, TEMP[6].xzyy, IMM[2].xyxx
134: MOV TEMP[2].xz, TEMP[6].xxzx
135: MOV TEMP[6].w, IMM[0].zzzz
136: ADD TEMP[2].xyz, TEMP[8].xyzz, TEMP[2].xyzz
137: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
138: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].yyyy
139: MUL TEMP[6].xyz, TEMP[3].xyzz, TEMP[12].xxxx
140: MUL TEMP[3].xy, IN[2].xyyy, IMM[1].wwww
141: MOV TEMP[4].x, TEMP[3].xxyx
142: MOV TEMP[2].xy, TEMP[4].xyxx
143: MUL TEMP[4].xy, TEMP[5].xzzz, TEMP[7].xxxx
144: MUL TEMP[4].x, TEMP[4].xyyy, IMM[2].zzzz
145: ADD TEMP[3].x, TEMP[3].yyyy, TEMP[4].xxxx
146: MOV TEMP[2].z, TEMP[3].xxxx
147: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
148: RSQ TEMP[3].x, TEMP[3].xxxx
149: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx
150: MUL TEMP[2].xyz, IMM[3].yyxx, TEMP[3].yzxx
151: MAD TEMP[2].xyz, IMM[3].xyyy, TEMP[3].zxyy, -TEMP[2].xyzz
152: MUL TEMP[4].xyz, TEMP[3].zxyy, TEMP[2].yzxx
153: MAD TEMP[4].xyz, TEMP[3].yzxx, TEMP[2].zxyy, -TEMP[4].xyzz
154: MOV TEMP[1].xyz, TEMP[1].xyzx
155: MAD TEMP[0].x, TEMP[0].yyyy, IMM[2].wwww, IMM[3].zzzz
156: MOV TEMP[1].w, TEMP[0].xxxx
157: MOV TEMP[0].x, CONST[5].xxxx
158: MOV TEMP[0].y, CONST[6].xxxx
159: MOV TEMP[0].z, CONST[7].xxxx
160: MOV TEMP[5].x, CONST[5].yyyy
161: MOV TEMP[5].y, CONST[6].yyyy
162: MOV TEMP[5].z, CONST[7].yyyy
163: MOV TEMP[7].x, CONST[5].zzzz
164: MOV TEMP[7].y, CONST[6].zzzz
165: MOV TEMP[7].z, CONST[7].zzzz
166: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx
167: MAD TEMP[0].xyz, TEMP[5].xyzz, TEMP[3].yyyy, TEMP[0].xyzz
168: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[3].zzzz, TEMP[0].xyzz
169: DP3 TEMP[5].x, TEMP[0].xyzz, TEMP[0].xyzz
170: RSQ TEMP[5].x, TEMP[5].xxxx
171: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[5].xxxx
172: MUL TEMP[5].xyz, CONST[1].xyzz, TEMP[4].xxxx
173: MAD TEMP[5].xyz, CONST[2].xyzz, TEMP[4].yyyy, TEMP[5].xyzz
174: MAD TEMP[5].xyz, CONST[3].xyzz, TEMP[4].zzzz, TEMP[5].xyzz
175: DP3 TEMP[7].x, TEMP[5].xyzz, TEMP[5].xyzz
176: RSQ TEMP[7].x, TEMP[7].xxxx
177: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[7].xxxx
178: MUL TEMP[7].xyz, TEMP[0].zxyy, TEMP[5].yzxx
179: MAD TEMP[7].xyz, TEMP[0].yzxx, TEMP[5].zxyy, -TEMP[7].xyzz
180: MUL TEMP[8].xyz, TEMP[3].zxyy, TEMP[4].yzxx
181: MAD TEMP[3].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[8].xyzz
182: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[2].xyzz
183: SSG TEMP[3].x, TEMP[3].xxxx
184: MUL TEMP[3].xyz, TEMP[7].xyzz, TEMP[3].xxxx
185: MOV TEMP[2].x, TEMP[5].xxxx
186: MOV TEMP[2].y, TEMP[3].xxxx
187: MOV TEMP[2].z, TEMP[0].xxxx
188: MOV TEMP[4].y, TEMP[3].yyyy
189: MOV TEMP[4].z, TEMP[0].yyyy
190: MOV TEMP[7].x, TEMP[5].zzzz
191: MOV TEMP[7].y, TEMP[3].zzzz
192: ADD TEMP[3], TEMP[10], TEMP[11]
193: SSG TEMP[8], TEMP[6]
194: ABS TEMP[9], TEMP[6]
195: MAD TEMP[9], TEMP[9], IMM[3].wwww, IMM[4].xxxx
196: MAX TEMP[9], TEMP[9], IMM[0].zzzz
197: MUL TEMP[8], TEMP[8], TEMP[9]
198: DP2 TEMP[9].x, IN[2].xyyy, IN[2].xyyy
199: SQRT TEMP[9].x, TEMP[9].xxxx
200: ADD TEMP[9].x, TEMP[9].xxxx, IMM[0].xxxx
201: ADD TEMP[10].x, CONST[0].xxxx, IMM[4].yyyy
202: MUL TEMP[9].x, TEMP[9].xxxx, TEMP[10].xxxx
203: MUL TEMP[9].x, TEMP[9].xxxx, IMM[4].zzzz
204: SIN TEMP[9].x, TEMP[9].xxxx
205: MUL TEMP[9].x, TEMP[9].xxxx, IMM[4].wwww
206: MAD TEMP[6], TEMP[8], TEMP[9].xxxx, TEMP[6]
207: ADD TEMP[3], TEMP[3], TEMP[6]
208: MUL TEMP[6], CONST[5], TEMP[3].xxxx
209: MAD TEMP[6], CONST[6], TEMP[3].yyyy, TEMP[6]
210: MAD TEMP[6], CONST[7], TEMP[3].zzzz, TEMP[6]
211: MAD TEMP[3], CONST[8], TEMP[3].wwww, TEMP[6]
212: MUL TEMP[6], CONST[24], TEMP[3].xxxx
213: MAD TEMP[6], CONST[25], TEMP[3].yyyy, TEMP[6]
214: MAD TEMP[6], CONST[26], TEMP[3].zzzz, TEMP[6]
215: MAD TEMP[3], CONST[27], TEMP[3].wwww, TEMP[6]
216: MOV TEMP[2].xyz, TEMP[2].xyzx
217: MOV TEMP[2].w, TEMP[5].yyyy
218: MOV TEMP[4].xy, TEMP[4].yzyy
219: MOV TEMP[4].zw, TEMP[7].yyxy
220: MOV TEMP[0].x, TEMP[0].zzzz
221: MOV OUT[4], TEMP[0]
222: MOV OUT[3], TEMP[4]
223: MOV OUT[1], TEMP[1]
224: MOV OUT[2], TEMP[2]
225: MOV OUT[0], TEMP[3]
226: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416)
%82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420)
%83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424)
%84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 428)
%85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 432)
%86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 436)
%87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 440)
%88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 444)
%89 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%90 = load <8 x i32>, <8 x i32> addrspace(2)* %89, align 32, !tbaa !0
%91 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%92 = load <4 x i32>, <4 x i32> addrspace(2)* %91, align 16, !tbaa !0
%93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)*
%95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0
%96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)*
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)*
%101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0
%102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)*
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0
%107 = add i32 %5, %7
%108 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %106, i32 0, i32 %107)
%109 = extractelement <4 x float> %108, i32 0
%110 = extractelement <4 x float> %108, i32 1
%111 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%112 = load <16 x i8>, <16 x i8> addrspace(2)* %111, align 16, !tbaa !0
%113 = add i32 %5, %7
%114 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %112, i32 0, i32 %113)
%115 = extractelement <4 x float> %114, i32 0
%116 = extractelement <4 x float> %114, i32 1
%117 = extractelement <4 x float> %114, i32 2
%118 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%119 = load <16 x i8>, <16 x i8> addrspace(2)* %118, align 16, !tbaa !0
%120 = add i32 %5, %7
%121 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %119, i32 0, i32 %120)
%122 = extractelement <4 x float> %121, i32 0
%123 = extractelement <4 x float> %121, i32 1
%124 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%125 = load <16 x i8>, <16 x i8> addrspace(2)* %124, align 16, !tbaa !0
%126 = add i32 %5, %7
%127 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %125, i32 0, i32 %126)
%128 = extractelement <4 x float> %127, i32 0
%129 = fmul float %128, %43
%130 = call float @llvm.floor.f32(float %129)
%131 = fadd float %109, 1.000000e+00
%132 = fadd float %110, 1.000000e+00
%133 = fmul float %131, 5.000000e-01
%134 = fmul float %132, 5.000000e-01
%135 = fdiv float 1.000000e+00, %43
%136 = fadd float %130, 0x3FDFEF9DC0000000
%137 = fmul float %136, %135
%138 = fdiv float 1.000000e+00, %44
%139 = fmul float %138, 0x3FDFEF9DC0000000
%140 = bitcast float %137 to i32
%141 = bitcast float %139 to i32
%142 = insertelement <4 x i32> undef, i32 %140, i32 0
%143 = insertelement <4 x i32> %142, i32 %141, i32 1
%144 = insertelement <4 x i32> %143, i32 0, i32 2
%145 = bitcast <8 x i32> %90 to <32 x i8>
%146 = bitcast <4 x i32> %92 to <16 x i8>
%147 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %144, <32 x i8> %145, <16 x i8> %146, i32 2)
%148 = extractelement <4 x float> %147, i32 0
%149 = extractelement <4 x float> %147, i32 1
%150 = extractelement <4 x float> %147, i32 2
%151 = extractelement <4 x float> %147, i32 3
%152 = fadd float %130, 0x3FDFEF9DC0000000
%153 = fmul float %152, %135
%154 = fmul float %138, 0x3FF7FBE760000000
%155 = bitcast float %153 to i32
%156 = bitcast float %154 to i32
%157 = insertelement <4 x i32> undef, i32 %155, i32 0
%158 = insertelement <4 x i32> %157, i32 %156, i32 1
%159 = insertelement <4 x i32> %158, i32 0, i32 2
%160 = bitcast <8 x i32> %90 to <32 x i8>
%161 = bitcast <4 x i32> %92 to <16 x i8>
%162 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %159, <32 x i8> %160, <16 x i8> %161, i32 2)
%163 = extractelement <4 x float> %162, i32 0
%164 = extractelement <4 x float> %162, i32 1
%165 = fmul float %148, 2.000000e+00
%166 = fmul float %149, 2.000000e+00
%167 = fmul float %150, 2.000000e+00
%168 = fmul float %151, 2.000000e+00
%169 = bitcast float %115 to i32
%170 = bitcast float %117 to i32
%171 = insertelement <4 x i32> undef, i32 %169, i32 0
%172 = insertelement <4 x i32> %171, i32 %170, i32 1
%173 = insertelement <4 x i32> %172, i32 0, i32 2
%174 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %173, <32 x i8> %95, <16 x i8> %98, i32 2)
%175 = extractelement <4 x float> %174, i32 0
%176 = extractelement <4 x float> %174, i32 1
%177 = call float @llvm.AMDGPU.lrp(float %175, float %167, float %165)
%178 = call float @llvm.AMDGPU.lrp(float %176, float %168, float %166)
%179 = fmul float %122, %122
%180 = fmul float %123, %123
%181 = fadd float %180, %179
%182 = call float @llvm.sqrt.f32(float %181)
%183 = fsub float 1.000000e+00, %182
%184 = fsub float -0.000000e+00, %40
%185 = fsub float -0.000000e+00, %41
%186 = fsub float -0.000000e+00, %42
%187 = fmul float %41, -0.000000e+00
%188 = fmul float %42, -0.000000e+00
%189 = fsub float -0.000000e+00, %187
%190 = fsub float %189, %42
%191 = fmul float %40, -0.000000e+00
%192 = fsub float %191, %188
%193 = fmul float %41, -0.000000e+00
%194 = fadd float %193, %40
%195 = fmul float %190, %190
%196 = fmul float %192, %192
%197 = fadd float %196, %195
%198 = fmul float %194, %194
%199 = fadd float %197, %198
%200 = call float @llvm.AMDGPU.rsq.clamped.f32(float %199)
%201 = fmul float %190, %200
%202 = fmul float %192, %200
%203 = fmul float %194, %200
%204 = fmul float %202, %186
%205 = fmul float %203, %184
%206 = fmul float %201, %185
%207 = fmul float %203, %185
%208 = fsub float %207, %204
%209 = fmul float %201, %186
%210 = fsub float %209, %205
%211 = fmul float %202, %184
%212 = fsub float %211, %206
%213 = fmul float %208, %208
%214 = fmul float %210, %210
%215 = fadd float %214, %213
%216 = fmul float %212, %212
%217 = fadd float %215, %216
%218 = call float @llvm.AMDGPU.rsq.clamped.f32(float %217)
%219 = fmul float %208, %218
%220 = fmul float %212, %218
%221 = fmul float %122, %184
%222 = fmul float %183, %185
%223 = fadd float %222, %221
%224 = fmul float %123, %186
%225 = fadd float %223, %224
%226 = call float @llvm.fabs.f32(float %225)
%227 = fmul float %226, 5.000000e-01
%228 = fadd float %227, 5.000000e-01
%229 = call float @llvm.AMDGPU.lrp(float %228, float %219, float %122)
%230 = call float @llvm.AMDGPU.lrp(float %228, float 1.000000e+00, float %183)
%231 = call float @llvm.AMDGPU.lrp(float %228, float %220, float %123)
%232 = fmul float %229, %229
%233 = fmul float %230, %230
%234 = fadd float %233, %232
%235 = fmul float %231, %231
%236 = fadd float %234, %235
%237 = call float @llvm.AMDGPU.rsq.clamped.f32(float %236)
%238 = fmul float %229, %237
%239 = fmul float %230, %237
%240 = fmul float %231, %237
%241 = fmul float %53, %45
%242 = fmul float %53, %45
%243 = fmul float %54, %46
%244 = fmul float %54, %46
%245 = fmul float %241, %61
%246 = fmul float %242, %62
%247 = fmul float %243, %63
%248 = fmul float %244, %64
%249 = fmul float %55, %47
%250 = fmul float %55, %47
%251 = fmul float %56, %48
%252 = fmul float %56, %48
%253 = fmul float %249, %65
%254 = fmul float %250, %66
%255 = fmul float %251, %67
%256 = fmul float %252, %68
%257 = fmul float %61, %115
%258 = fmul float %62, %117
%259 = fadd float %257, %258
%260 = fmul float %63, %115
%261 = fmul float %64, %117
%262 = fadd float %260, %261
%263 = fmul float %65, %115
%264 = fmul float %66, %117
%265 = fadd float %263, %264
%266 = fmul float %67, %115
%267 = fmul float %68, %117
%268 = fadd float %266, %267
%269 = fmul float %49, %259
%270 = fmul float %50, %262
%271 = fmul float %51, %265
%272 = fmul float %52, %268
%273 = fmul float %14, %57
%274 = fmul float %14, %58
%275 = fmul float %14, %59
%276 = fmul float %14, %60
%277 = fadd float %269, %273
%278 = fadd float %270, %274
%279 = fadd float %271, %275
%280 = fadd float %272, %276
%281 = call float @llvm.cos.f32(float %277)
%282 = call float @llvm.cos.f32(float %278)
%283 = call float @llvm.cos.f32(float %279)
%284 = call float @llvm.cos.f32(float %280)
%285 = fmul float %281, %245
%286 = fmul float %282, %247
%287 = fadd float %285, %286
%288 = fmul float %283, %253
%289 = fadd float %287, %288
%290 = fmul float %284, %255
%291 = fadd float %289, %290
%292 = fmul float %281, %246
%293 = fmul float %282, %248
%294 = fadd float %292, %293
%295 = fmul float %283, %254
%296 = fadd float %294, %295
%297 = fmul float %284, %256
%298 = fadd float %296, %297
%299 = fadd float %115, %291
%300 = fadd float %117, %298
%301 = fmul float %49, %45
%302 = fmul float %49, %45
%303 = fmul float %50, %46
%304 = fmul float %50, %46
%305 = fmul float %301, %61
%306 = fmul float %302, %62
%307 = fmul float %303, %63
%308 = fmul float %304, %64
%309 = fmul float %51, %47
%310 = fmul float %51, %47
%311 = fmul float %52, %48
%312 = fmul float %52, %48
%313 = fmul float %309, %65
%314 = fmul float %310, %66
%315 = fmul float %311, %67
%316 = fmul float %312, %68
%317 = fmul float %61, %299
%318 = fmul float %62, %300
%319 = fadd float %317, %318
%320 = fmul float %63, %299
%321 = fmul float %64, %300
%322 = fadd float %320, %321
%323 = fmul float %65, %299
%324 = fmul float %66, %300
%325 = fadd float %323, %324
%326 = fmul float %67, %299
%327 = fmul float %68, %300
%328 = fadd float %326, %327
%329 = fmul float %49, %319
%330 = fadd float %329, %273
%331 = fmul float %50, %322
%332 = fadd float %331, %274
%333 = fmul float %51, %325
%334 = fadd float %333, %275
%335 = fmul float %52, %328
%336 = fadd float %335, %276
%337 = call float @llvm.cos.f32(float %330)
%338 = call float @llvm.cos.f32(float %332)
%339 = call float @llvm.cos.f32(float %334)
%340 = call float @llvm.cos.f32(float %336)
%341 = fmul float %337, %305
%342 = fmul float %338, %307
%343 = fadd float %341, %342
%344 = fmul float %339, %313
%345 = fadd float %343, %344
%346 = fmul float %340, %315
%347 = fadd float %345, %346
%348 = fmul float %337, %306
%349 = fmul float %338, %308
%350 = fadd float %348, %349
%351 = fmul float %339, %314
%352 = fadd float %350, %351
%353 = fmul float %340, %316
%354 = fadd float %352, %353
%355 = fmul float %347, %347
%356 = fadd float %355, 4.000000e+00
%357 = fmul float %354, %354
%358 = fadd float %356, %357
%359 = call float @llvm.AMDGPU.rsq.clamped.f32(float %358)
%360 = fmul float %347, %359
%361 = fmul float %354, %359
%362 = fmul float %163, 5.000000e-01
%363 = fmul float %360, -2.000000e+00
%364 = fmul float %361, -2.000000e+00
%365 = fsub float %115, %69
%366 = fsub float %117, %70
%367 = fmul float %71, 5.000000e-01
%368 = fadd float %367, %365
%369 = fmul float %72, 5.000000e-01
%370 = fadd float %369, %366
%371 = fdiv float 1.000000e+00, %71
%372 = fdiv float 1.000000e+00, %72
%373 = fmul float %368, %371
%374 = fmul float %370, %372
%375 = fadd float %110, 0x3FE99999A0000000
%376 = fmul float %375, %178
%377 = fmul float %109, %177
%378 = fmul float %377, %201
%379 = fmul float %377, %202
%380 = fmul float %377, %203
%381 = fmul float %376, %238
%382 = fadd float %381, %378
%383 = fmul float %376, %239
%384 = fadd float %383, %379
%385 = fmul float %376, %240
%386 = fadd float %385, %380
%387 = bitcast float %373 to i32
%388 = bitcast float %374 to i32
%389 = insertelement <4 x i32> undef, i32 %387, i32 0
%390 = insertelement <4 x i32> %389, i32 %388, i32 1
%391 = insertelement <4 x i32> %390, i32 0, i32 2
%392 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %391, <32 x i8> %101, <16 x i8> %104, i32 2)
%393 = extractelement <4 x float> %392, i32 0
%394 = extractelement <4 x float> %392, i32 1
%395 = fadd float %393, -5.000000e-01
%396 = fadd float %394, -5.000000e-01
%397 = fadd float %363, %395
%398 = fadd float %364, %396
%399 = fmul float %397, %362
%400 = fmul float %362, 0.000000e+00
%401 = fmul float %398, %362
%402 = fmul float %399, %178
%403 = fmul float %400, %178
%404 = fmul float %401, %178
%405 = fmul float %402, %375
%406 = fmul float %403, %375
%407 = fmul float %404, %375
%408 = fmul float %122, 1.500000e+00
%409 = fmul float %123, 1.500000e+00
%410 = fmul float %363, %362
%411 = fmul float %410, 0x3FC99999A0000000
%412 = fadd float %409, %411
%413 = fmul float %408, %408
%414 = fmul float %183, %183
%415 = fadd float %414, %413
%416 = fmul float %412, %412
%417 = fadd float %415, %416
%418 = call float @llvm.AMDGPU.rsq.clamped.f32(float %417)
%419 = fmul float %408, %418
%420 = fmul float %183, %418
%421 = fmul float %412, %418
%422 = fmul float %420, 0.000000e+00
%423 = fmul float %421, 0.000000e+00
%424 = fmul float %419, 0x3FB99999A0000000
%425 = fmul float %421, 0x3FB99999A0000000
%426 = fsub float %425, %422
%427 = fmul float %419, 0.000000e+00
%428 = fsub float %427, %423
%429 = fmul float %420, 0.000000e+00
%430 = fsub float %429, %424
%431 = fmul float %421, %428
%432 = fmul float %419, %430
%433 = fmul float %420, %426
%434 = fmul float %420, %430
%435 = fsub float %434, %431
%436 = fmul float %421, %426
%437 = fsub float %436, %432
%438 = fmul float %419, %428
%439 = fsub float %438, %433
%440 = fmul float %164, 8.000000e+00
%441 = fadd float %440, -4.000000e+00
%442 = fmul float %24, %419
%443 = fmul float %28, %419
%444 = fmul float %32, %419
%445 = fmul float %25, %420
%446 = fadd float %445, %442
%447 = fmul float %29, %420
%448 = fadd float %447, %443
%449 = fmul float %33, %420
%450 = fadd float %449, %444
%451 = fmul float %26, %421
%452 = fadd float %451, %446
%453 = fmul float %30, %421
%454 = fadd float %453, %448
%455 = fmul float %34, %421
%456 = fadd float %455, %450
%457 = fmul float %452, %452
%458 = fmul float %454, %454
%459 = fadd float %458, %457
%460 = fmul float %456, %456
%461 = fadd float %459, %460
%462 = call float @llvm.AMDGPU.rsq.clamped.f32(float %461)
%463 = fmul float %452, %462
%464 = fmul float %454, %462
%465 = fmul float %456, %462
%466 = fmul float %15, %435
%467 = fmul float %16, %435
%468 = fmul float %17, %435
%469 = fmul float %18, %437
%470 = fadd float %469, %466
%471 = fmul float %19, %437
%472 = fadd float %471, %467
%473 = fmul float %20, %437
%474 = fadd float %473, %468
%475 = fmul float %21, %439
%476 = fadd float %475, %470
%477 = fmul float %22, %439
%478 = fadd float %477, %472
%479 = fmul float %23, %439
%480 = fadd float %479, %474
%481 = fmul float %476, %476
%482 = fmul float %478, %478
%483 = fadd float %482, %481
%484 = fmul float %480, %480
%485 = fadd float %483, %484
%486 = call float @llvm.AMDGPU.rsq.clamped.f32(float %485)
%487 = fmul float %476, %486
%488 = fmul float %478, %486
%489 = fmul float %480, %486
%490 = fmul float %465, %488
%491 = fmul float %463, %489
%492 = fmul float %464, %487
%493 = fmul float %464, %489
%494 = fsub float %493, %490
%495 = fmul float %465, %487
%496 = fsub float %495, %491
%497 = fmul float %463, %488
%498 = fsub float %497, %492
%499 = fmul float %421, %437
%500 = fmul float %419, %439
%501 = fmul float %420, %435
%502 = fmul float %420, %439
%503 = fsub float %502, %499
%504 = fmul float %421, %435
%505 = fsub float %504, %500
%506 = fmul float %419, %437
%507 = fsub float %506, %501
%508 = fmul float %503, %426
%509 = fmul float %505, %428
%510 = fadd float %509, %508
%511 = fmul float %507, %430
%512 = fadd float %510, %511
%513 = fcmp ogt float %512, 0.000000e+00
%514 = select i1 %513, float 1.000000e+00, float %512
%515 = fcmp oge float %514, 0.000000e+00
%516 = select i1 %515, float %514, float -1.000000e+00
%517 = fmul float %494, %516
%518 = fmul float %496, %516
%519 = fmul float %498, %516
%520 = fadd float %115, %382
%521 = fadd float %116, %384
%522 = fadd float %117, %386
%523 = fcmp ogt float %405, 0.000000e+00
%524 = select i1 %523, float 1.000000e+00, float %405
%525 = fcmp oge float %524, 0.000000e+00
%526 = select i1 %525, float %524, float -1.000000e+00
%527 = fcmp ogt float %406, 0.000000e+00
%528 = select i1 %527, float 1.000000e+00, float %406
%529 = fcmp oge float %528, 0.000000e+00
%530 = select i1 %529, float %528, float -1.000000e+00
%531 = fcmp ogt float %407, 0.000000e+00
%532 = select i1 %531, float 1.000000e+00, float %407
%533 = fcmp oge float %532, 0.000000e+00
%534 = select i1 %533, float %532, float -1.000000e+00
%535 = call float @llvm.fabs.f32(float %405)
%536 = call float @llvm.fabs.f32(float %406)
%537 = call float @llvm.fabs.f32(float %407)
%538 = fmul float %535, 0x3FB1EB8520000000
%539 = fadd float %538, 0xBF8EB851E0000000
%540 = fmul float %536, 0x3FB1EB8520000000
%541 = fadd float %540, 0xBF8EB851E0000000
%542 = fmul float %537, 0x3FB1EB8520000000
%543 = fadd float %542, 0xBF8EB851E0000000
%544 = call float @llvm.maxnum.f32(float %539, float 0.000000e+00)
%545 = call float @llvm.maxnum.f32(float %541, float 0.000000e+00)
%546 = call float @llvm.maxnum.f32(float %543, float 0.000000e+00)
%547 = fmul float %526, %544
%548 = fmul float %530, %545
%549 = fmul float %534, %546
%550 = fmul float %122, %122
%551 = fmul float %123, %123
%552 = fadd float %550, %551
%553 = call float @llvm.sqrt.f32(float %552)
%554 = fadd float %553, 1.000000e+00
%555 = fadd float %13, 1.000000e+02
%556 = fmul float %554, %555
%557 = fmul float %556, 1.800000e+02
%558 = call float @llvm.sin.f32(float %557)
%559 = fmul float %558, 0x3FE6666660000000
%560 = fmul float %547, %559
%561 = fadd float %560, %405
%562 = fmul float %548, %559
%563 = fadd float %562, %406
%564 = fmul float %549, %559
%565 = fadd float %564, %407
%566 = fmul float %559, 0.000000e+00
%567 = fadd float %566, 0.000000e+00
%568 = fadd float %520, %561
%569 = fadd float %521, %563
%570 = fadd float %522, %565
%571 = fadd float %567, 1.000000e+00
%572 = fmul float %24, %568
%573 = fmul float %25, %568
%574 = fmul float %26, %568
%575 = fmul float %27, %568
%576 = fmul float %28, %569
%577 = fadd float %576, %572
%578 = fmul float %29, %569
%579 = fadd float %578, %573
%580 = fmul float %30, %569
%581 = fadd float %580, %574
%582 = fmul float %31, %569
%583 = fadd float %582, %575
%584 = fmul float %32, %570
%585 = fadd float %584, %577
%586 = fmul float %33, %570
%587 = fadd float %586, %579
%588 = fmul float %34, %570
%589 = fadd float %588, %581
%590 = fmul float %35, %570
%591 = fadd float %590, %583
%592 = fmul float %36, %571
%593 = fadd float %592, %585
%594 = fmul float %37, %571
%595 = fadd float %594, %587
%596 = fmul float %38, %571
%597 = fadd float %596, %589
%598 = fmul float %39, %571
%599 = fadd float %598, %591
%600 = fmul float %73, %593
%601 = fmul float %74, %593
%602 = fmul float %75, %593
%603 = fmul float %76, %593
%604 = fmul float %77, %595
%605 = fadd float %604, %600
%606 = fmul float %78, %595
%607 = fadd float %606, %601
%608 = fmul float %79, %595
%609 = fadd float %608, %602
%610 = fmul float %80, %595
%611 = fadd float %610, %603
%612 = fmul float %81, %597
%613 = fadd float %612, %605
%614 = fmul float %82, %597
%615 = fadd float %614, %607
%616 = fmul float %83, %597
%617 = fadd float %616, %609
%618 = fmul float %84, %597
%619 = fadd float %618, %611
%620 = fmul float %85, %599
%621 = fadd float %620, %613
%622 = fmul float %86, %599
%623 = fadd float %622, %615
%624 = fmul float %87, %599
%625 = fadd float %624, %617
%626 = fmul float %88, %599
%627 = fadd float %626, %619
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %133, float %134, float %130, float %441)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %487, float %517, float %463, float %488)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %518, float %464, float %489, float %519)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %465, float %464, float %465, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %621, float %623, float %625, float %627)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.cos.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sin.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[36:39], s[8:9], 0x0 ; C0920900
v_add_i32_e32 v15, s10, v0 ; 4A1E000A
v_mov_b32_e32 v0, 0x3e22f983 ; 7E0002FF 3E22F983
v_mov_b32_e32 v5, 0x80000000 ; 7E0A02FF 80000000
v_mov_b32_e32 v16, 0x3eff7cee ; 7E2002FF 3EFF7CEE
v_mov_b32_e32 v22, 0x3fc00000 ; 7E2C02FF 3FC00000
v_mov_b32_e32 v14, 0xbdcccccd ; 7E1C02FF BDCCCCCD
v_mov_b32_e32 v23, 0x41000000 ; 7E2E02FF 41000000
s_load_dwordx4 s[44:47], s[8:9], 0x4 ; C0960904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s43, s[0:3], 0x56 ; C2158156
s_buffer_load_dword s52, s[0:3], 0x57 ; C21A0157
s_buffer_load_dword s35, s[0:3], 0x58 ; C2118158
s_buffer_load_dword s42, s[0:3], 0x59 ; C2150159
s_buffer_load_dword s33, s[0:3], 0x5a ; C210815A
s_buffer_load_dword s10, s[0:3], 0x51 ; C2050151
s_buffer_load_dword s11, s[0:3], 0x52 ; C2058152
s_buffer_load_dword s12, s[0:3], 0x53 ; C2060153
s_buffer_load_dword s55, s[0:3], 0x54 ; C21B8154
s_buffer_load_dword s56, s[0:3], 0x55 ; C21C0155
s_buffer_load_dword s13, s[0:3], 0x47 ; C2068147
s_buffer_load_dword s54, s[0:3], 0x48 ; C21B0148
s_buffer_load_dword s53, s[0:3], 0x49 ; C21A8149
s_buffer_load_dword s40, s[0:3], 0x4a ; C214014A
s_buffer_load_dword s34, s[0:3], 0x4b ; C211014B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v6, s10 ; 7E0C020A
s_buffer_load_dword s10, s[0:3], 0x4c ; C205014C
v_mov_b32_e32 v7, s11 ; 7E0E020B
s_buffer_load_dword s11, s[0:3], 0x4d ; C205814D
v_mov_b32_e32 v8, s12 ; 7E10020C
s_buffer_load_dword s16, s[0:3], 0x4e ; C208014E
s_buffer_load_dword s12, s[0:3], 0x4f ; C206014F
s_buffer_load_dword s14, s[0:3], 0x50 ; C2070150
v_mov_b32_e32 v1, s13 ; 7E02020D
v_mov_b32_e32 v2, s13 ; 7E04020D
s_buffer_load_dword s41, s[0:3], 0x5b ; C214815B
s_buffer_load_dword s57, s[0:3], 0x5c ; C21C815C
s_buffer_load_dword s58, s[0:3], 0x5d ; C21D015D
s_buffer_load_dword s59, s[0:3], 0x5e ; C21D815E
v_mul_f32_e32 v1, s34, v1 ; 10020222
v_mul_f32_e32 v24, s33, v1 ; 10300221
s_buffer_load_dword s76, s[0:3], 0x5f ; C226015F
s_buffer_load_dword s77, s[0:3], 0x40 ; C2268140
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v2, s12, v2 ; 1004040C
v_mov_b32_e32 v9, s14 ; 7E12020E
s_buffer_load_dword s12, s[0:3], 0x41 ; C2060141
v_mul_f32_e32 v25, s41, v1 ; 10320229
s_buffer_load_dword s13, s[0:3], 0x44 ; C2068144
v_mul_f32_e32 v26, s41, v2 ; 10340429
v_mul_f32_e32 v27, s33, v2 ; 10360421
v_rcp_f32_e32 v28, s59 ; 7E38543B
s_buffer_load_dword s17, s[0:3], 0x45 ; C2088145
v_rcp_f32_e32 v29, s76 ; 7E3A544C
s_buffer_load_dword s18, s[0:3], 0x46 ; C2090146
v_rcp_f32_e32 v17, s77 ; 7E22544D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_rcp_f32_e32 v30, s12 ; 7E3C540C
s_buffer_load_dword s24, s[0:3], 0xe ; C20C010E
v_mov_b32_e32 v1, s13 ; 7E02020D
v_mov_b32_e32 v2, s13 ; 7E04020D
v_mul_f32_e32 v2, s10, v2 ; 1004040A
v_mul_f32_e32 v1, s54, v1 ; 10020236
s_buffer_load_dword s13, s[0:3], 0x14 ; C2068114
s_buffer_load_dword s12, s[0:3], 0x15 ; C2060115
v_mov_b32_e32 v3, s17 ; 7E060211
v_mul_f32_e32 v3, s53, v3 ; 10060635
s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116
s_buffer_load_dword s14, s[0:3], 0x17 ; C2070117
v_mov_b32_e32 v4, s17 ; 7E080211
v_mul_f32_e32 v4, s11, v4 ; 1008080B
s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108
s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109
v_mul_f32_e32 v31, s52, v4 ; 103E0834
v_mul_f32_e32 v32, s43, v4 ; 1040082B
v_mul_f32_e32 v33, s56, v2 ; 10420438
v_mul_f32_e32 v34, s55, v2 ; 10440437
v_mov_b32_e32 v2, s18 ; 7E040212
v_mul_f32_e32 v2, s40, v2 ; 10040428
v_mov_b32_e32 v4, s18 ; 7E080212
v_mul_f32_e32 v4, s16, v4 ; 10080810
s_buffer_load_dword s28, s[0:3], 0xa ; C20E010A
s_buffer_load_dword s27, s[0:3], 0xc ; C20D810C
v_mul_f32_e32 v35, s42, v4 ; 1046082A
v_mul_f32_e32 v36, s35, v4 ; 10480823
v_mul_f32_e32 v37, s43, v3 ; 104A062B
v_mul_f32_e32 v38, s52, v3 ; 104C0634
v_mul_f32_e32 v39, s55, v1 ; 104E0237
v_mul_f32_e32 v40, s56, v1 ; 10500238
v_mul_f32_e32 v41, s35, v2 ; 10520423
v_mul_f32_e32 v42, s42, v2 ; 1054042A
s_buffer_load_dword s29, s[0:3], 0xd ; C20E810D
s_buffer_load_dword s22, s[0:3], 0x18 ; C20B0118
s_buffer_load_dword s23, s[0:3], 0x19 ; C20B8119
s_buffer_load_dword s20, s[0:3], 0x1a ; C20A011A
s_buffer_load_dword s21, s[0:3], 0x1b ; C20A811B
s_buffer_load_dword s18, s[0:3], 0x1c ; C209011C
s_buffer_load_dword s19, s[0:3], 0x1d ; C209811D
s_buffer_load_dword s16, s[0:3], 0x1e ; C208011E
s_buffer_load_dword s17, s[0:3], 0x1f ; C208811F
s_buffer_load_dword s10, s[0:3], 0x20 ; C2050120
s_buffer_load_dword s11, s[0:3], 0x21 ; C2058121
s_buffer_load_dword s48, s[0:3], 0x1 ; C2180101
s_buffer_load_dword s30, s[0:3], 0x4 ; C20F0104
s_buffer_load_dword s31, s[0:3], 0x5 ; C20F8105
s_buffer_load_dword s32, s[0:3], 0x6 ; C2100106
s_load_dwordx4 s[68:71], s[8:9], 0x8 ; C0A20908
s_load_dwordx4 s[72:75], s[8:9], 0xc ; C0A4090C
buffer_load_format_xyzw v[10:13], v15, s[36:39], 0 idxen ; E00C2000 80090A0F
buffer_load_format_xyzw v[1:4], v15, s[44:47], 0 idxen ; E00C2000 800B010F
s_load_dwordx4 s[64:67], s[4:5], 0x0 ; C0A00500
s_load_dwordx4 s[60:63], s[4:5], 0x4 ; C09E0504
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v12, s48, v6 ; 10180C30
v_mul_f32_e32 v13, s48, v9 ; 101A1230
v_mul_f32_e32 v43, s48, v7 ; 10560E30
v_mul_f32_e32 v44, s48, v8 ; 10581030
buffer_load_format_xyzw v[6:9], v15, s[68:71], 0 idxen ; E00C2000 8011060F
buffer_load_format_xyzw v[18:21], v15, s[72:75], 0 idxen ; E00C2000 8012120F
s_load_dwordx4 s[36:39], s[4:5], 0x8 ; C0920508
s_load_dwordx8 s[80:87], s[6:7], 0x0 ; C0E80700
s_load_dwordx8 s[68:75], s[6:7], 0x8 ; C0E20708
s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710
s_waitcnt vmcnt(2) ; BF8C0772
v_add_f32_e32 v4, 1.0, v10 ; 060814F2
v_subrev_f32_e32 v45, s57, v1 ; 0A5A0239
v_mac_f32_e64 v45, 0.5, s59 ; D23E002D 000076F0
v_subrev_f32_e32 v46, s58, v3 ; 0A5C063A
v_mac_f32_e64 v46, 0.5, s76 ; D23E002E 000098F0
v_mul_f32_e32 v4, 0.5, v4 ; 100808F0
v_mul_f32_e32 v47, s56, v3 ; 105E0638
v_mac_f32_e32 v47, s55, v1 ; 3E5E0237
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v8, s77, v18 ; 1010244D
v_floor_f32_e32 v48, v8 ; 7E604908
v_add_f32_e32 v8, v16, v48 ; 06106110
v_mul_f32_e32 v15, v17, v8 ; 101E1111
v_mul_f32_e32 v16, v16, v30 ; 10203D10
v_mov_b32_e32 v17, 0 ; 7E220280
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[80:87], s[64:67] ; F0900F00 0214120F
v_mul_f32_e32 v16, 0x3fbfdf3b, v30 ; 10203CFF 3FBFDF3B
s_waitcnt vmcnt(0) ; BF8C0770
image_sample_l v[49:50], 3, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[80:87], s[64:67] ; F0900300 0214310F
v_mov_b32_e32 v15, v1 ; 7E1E0301
v_mov_b32_e32 v16, v3 ; 7E200303
image_sample_l v[8:9], 3, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[68:75], s[60:63] ; F0900300 01F1080F
v_mul_f32_e32 v15, s52, v3 ; 101E0634
v_mac_f32_e32 v15, s43, v1 ; 3E1E022B
v_mad_f32 v16, s54, v47, v13 ; D2820010 04365E36
v_mad_f32 v15, s53, v15, v12 ; D282000F 04321E35
v_mul_f32_e32 v16, v0, v16 ; 10202100
v_mul_f32_e32 v15, v0, v15 ; 101E1F00
v_fract_f32_e32 v16, v16 ; 7E204110
v_fract_f32_e32 v15, v15 ; 7E1E410F
v_cos_f32_e32 v16, v16 ; 7E206D10
v_cos_f32_e32 v15, v15 ; 7E1E6D0F
v_mul_f32_e32 v30, v31, v15 ; 103C1F1F
v_mac_f32_e32 v30, v33, v16 ; 3E3C2121
v_mul_f32_e32 v31, s42, v3 ; 103E062A
v_mac_f32_e32 v31, s35, v1 ; 3E3E0223
v_mad_f32 v31, s40, v31, v43 ; D282001F 04AE3E28
v_mul_f32_e32 v31, v0, v31 ; 103E3F00
v_fract_f32_e32 v31, v31 ; 7E3E411F
v_cos_f32_e32 v31, v31 ; 7E3E6D1F
v_mac_f32_e32 v30, v35, v31 ; 3E3C3F23
v_mul_f32_e32 v33, s41, v3 ; 10420629
v_mac_f32_e32 v33, s33, v1 ; 3E420221
v_mad_f32 v33, s34, v33, v44 ; D2820021 04B24222
v_mul_f32_e32 v33, v0, v33 ; 10424300
v_fract_f32_e32 v33, v33 ; 7E424121
v_cos_f32_e32 v33, v33 ; 7E426D21
v_mac_f32_e32 v30, v26, v33 ; 3E3C431A
v_add_f32_e32 v26, v30, v3 ; 0634071E
v_mul_f32_e32 v30, s56, v26 ; 103C3438
v_mul_f32_e32 v15, v32, v15 ; 101E1F20
v_mac_f32_e32 v15, v34, v16 ; 3E1E2122
v_mac_f32_e32 v15, v36, v31 ; 3E1E3F24
v_mac_f32_e32 v15, v27, v33 ; 3E1E431B
v_add_f32_e32 v16, v15, v1 ; 0620030F
v_mac_f32_e32 v30, s55, v16 ; 3E3C2037
v_mac_f32_e32 v13, s54, v30 ; 3E1A3C36
v_mul_f32_e32 v15, s52, v26 ; 101E3434
v_mac_f32_e32 v15, s43, v16 ; 3E1E202B
v_mac_f32_e32 v12, s53, v15 ; 3E181E35
v_mul_f32_e32 v27, s42, v26 ; 1036342A
v_mul_f32_e32 v15, v28, v45 ; 101E5B1C
v_mul_f32_e32 v12, v0, v12 ; 10181900
v_fract_f32_e32 v12, v12 ; 7E18410C
v_cos_f32_e32 v12, v12 ; 7E186D0C
v_mul_f32_e32 v28, v37, v12 ; 10381925
v_mul_f32_e32 v12, v38, v12 ; 10181926
v_mul_f32_e32 v13, v0, v13 ; 101A1B00
v_fract_f32_e32 v13, v13 ; 7E1A410D
v_cos_f32_e32 v13, v13 ; 7E1A6D0D
v_mac_f32_e32 v28, v39, v13 ; 3E381B27
v_mac_f32_e32 v12, v40, v13 ; 3E181B28
v_mac_f32_e32 v27, s35, v16 ; 3E362023
v_mac_f32_e32 v43, s40, v27 ; 3E563628
v_mul_f32_e32 v13, v0, v43 ; 101A5700
v_fract_f32_e32 v13, v13 ; 7E1A410D
v_cos_f32_e32 v13, v13 ; 7E1A6D0D
v_mac_f32_e32 v28, v41, v13 ; 3E381B29
v_mac_f32_e32 v12, v42, v13 ; 3E181B2A
v_mul_f32_e32 v13, s41, v26 ; 101A3429
v_mac_f32_e32 v13, s33, v16 ; 3E1A2021
v_mac_f32_e32 v44, s34, v13 ; 3E581A22
v_mul_f32_e32 v13, v0, v44 ; 101A5900
v_fract_f32_e32 v13, v13 ; 7E1A410D
v_cos_f32_e32 v13, v13 ; 7E1A6D0D
v_mac_f32_e32 v28, v24, v13 ; 3E381B18
v_mac_f32_e32 v12, v25, v13 ; 3E181B19
v_mad_f32 v13, v28, v28, 4.0 ; D282000D 03DA391C
v_mac_f32_e32 v13, v12, v12 ; 3E1A190C
v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D
v_mul_f32_e32 v16, v29, v46 ; 10205D1D
s_waitcnt vmcnt(1) ; BF8C0771
v_mad_f32 v23, v23, v50, -4.0 ; D2820017 03DE6517
v_mul_f32_e32 v24, 0.5, v49 ; 103062F0
v_mul_f32_e32 v25, v13, v28 ; 1032390D
v_mul_f32_e32 v26, -2.0, v25 ; 103432F5
v_mul_f32_e32 v26, v24, v26 ; 10343518
v_mul_f32_e32 v26, 0x3e4ccccd, v26 ; 103434FF 3E4CCCCD
v_mac_f32_e32 v26, v22, v7 ; 3E340F16
v_mul_f32_e32 v22, v22, v6 ; 102C0D16
v_mul_f32_e32 v27, v6, v6 ; 10360D06
v_mac_f32_e32 v27, v7, v7 ; 3E360F07
v_sqrt_f32_e32 v27, v27 ; 7E36671B
v_mul_f32_e32 v28, v22, v22 ; 10382D16
v_sub_f32_e32 v29, 1.0, v27 ; 083A36F2
v_mac_f32_e32 v28, v29, v29 ; 3E383B1D
v_mac_f32_e32 v28, v26, v26 ; 3E38351A
v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C
v_add_f32_e32 v30, 1.0, v11 ; 063C16F2
v_mul_f32_e32 v30, 0.5, v30 ; 103C3CF0
image_sample_l v[15:16], 3, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[44:51], s[36:39] ; F0900300 012B0F0F
exp 15, 32, 0, 0, 0, v4, v30, v48, v23 ; F800020F 17301E04
s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700
v_mul_f32_e32 v4, v28, v22 ; 10082D1C
v_mul_f32_e32 v22, v28, v26 ; 102C351C
v_mul_f32_e32 v23, v28, v29 ; 102E3B1C
v_mul_f32_e32 v14, v4, v14 ; 101C1D04
v_mul_f32_e32 v26, v5, v23 ; 10342F05
v_madmk_f32_e32 v26, v22, v26, 0x3dcccccd ; 40343516 3DCCCCCD
v_mul_f32_e32 v28, v5, v22 ; 10382D05
v_mac_f32_e32 v28, 0, v4 ; 3E380880
v_mac_f32_e32 v14, 0, v23 ; 3E1C2E80
v_mul_f32_e32 v30, v14, v4 ; 103C090E
v_mad_f32 v30, v22, v26, -v30 ; D282001E 847A3516
v_mul_f32_e32 v31, v26, v23 ; 103E2F1A
v_mad_f32 v31, v4, v28, -v31 ; D282001F 847E3904
v_mul_f32_e32 v32, v30, v22 ; 10402D1E
v_mad_f32 v32, v23, v31, -v32 ; D2820020 84823F17
v_mul_f32_e32 v26, v26, v32 ; 1034411A
v_mul_f32_e32 v32, v28, v22 ; 10402D1C
v_mad_f32 v32, v23, v14, -v32 ; D2820020 84821D17
v_mul_f32_e32 v33, v31, v4 ; 1042091F
v_mad_f32 v33, v22, v32, -v33 ; D2820021 84864116
v_mac_f32_e32 v26, v28, v33 ; 3E34431C
v_mul_f32_e32 v28, v32, v23 ; 10382F20
v_mad_f32 v28, v4, v30, -v28 ; D282001C 84723D04
v_mac_f32_e32 v26, v14, v28 ; 3E34390E
v_mul_f32_e32 v14, s13, v4 ; 101C080D
v_mul_f32_e32 v28, s22, v4 ; 10380816
v_mul_f32_e32 v4, s18, v4 ; 10080812
v_mac_f32_e32 v14, s12, v23 ; 3E1C2E0C
v_mac_f32_e32 v28, s23, v23 ; 3E382E17
v_mac_f32_e32 v4, s19, v23 ; 3E082E13
v_mac_f32_e32 v14, s15, v22 ; 3E1C2C0F
v_mac_f32_e32 v28, s20, v22 ; 3E382C14
v_mac_f32_e32 v4, s16, v22 ; 3E082C10
v_mul_f32_e32 v22, s30, v32 ; 102C401E
v_mul_f32_e32 v23, s31, v32 ; 102E401F
v_mul_f32_e32 v32, s32, v32 ; 10404020
v_mac_f32_e32 v22, s26, v30 ; 3E2C3C1A
v_mac_f32_e32 v23, s25, v30 ; 3E2E3C19
v_mac_f32_e32 v32, s28, v30 ; 3E403C1C
v_mac_f32_e32 v22, s27, v31 ; 3E2C3E1B
v_mac_f32_e32 v23, s29, v31 ; 3E2E3E1D
v_mac_f32_e32 v32, s24, v31 ; 3E403E18
v_mul_f32_e32 v30, v14, v14 ; 103C1D0E
v_mac_f32_e32 v30, v28, v28 ; 3E3C391C
v_mac_f32_e32 v30, v4, v4 ; 3E3C0904
v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E
v_mul_f32_e32 v31, v22, v22 ; 103E2D16
v_mac_f32_e32 v31, v23, v23 ; 3E3E2F17
v_mac_f32_e32 v31, v32, v32 ; 3E3E4120
v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F
v_mul_f32_e32 v14, v30, v14 ; 101C1D1E
v_mul_f32_e32 v28, v30, v28 ; 1038391E
v_mul_f32_e32 v4, v30, v4 ; 1008091E
v_mul_f32_e32 v22, v31, v22 ; 102C2D1F
v_mul_f32_e32 v23, v31, v23 ; 102E2F1F
v_mul_f32_e32 v30, v31, v32 ; 103C411F
v_cmp_lt_f32_e32 vcc, 0, v26 ; 7C023480
v_cndmask_b32_e64 v26, v26, 1.0, vcc ; D200001A 01A9E51A
v_cmp_le_f32_e32 vcc, 0, v26 ; 7C063480
v_cndmask_b32_e32 v26, -1.0, v26 ; 003434F3
v_mul_f32_e32 v31, v23, v4 ; 103E0917
v_mad_f32 v31, v28, v30, -v31 ; D282001F 847E3D1C
v_mul_f32_e32 v31, v26, v31 ; 103E3F1A
exp 15, 33, 0, 0, 0, v22, v31, v14, v23 ; F800021F 170E1F16
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v31, v22, v28 ; 103E3916
v_mad_f32 v23, v14, v23, -v31 ; D2820017 847E2F0E
v_mul_f32_e32 v14, v30, v14 ; 101C1D1E
v_mad_f32 v14, v4, v22, -v14 ; D282000E 843A2D04
v_mul_f32_e32 v14, v26, v14 ; 101C1D1A
v_mul_f32_e32 v22, v26, v23 ; 102C2F1A
exp 15, 34, 0, 0, 0, v14, v28, v30, v22 ; F800022F 161E1C0E
exp 15, 35, 0, 0, 0, v4, v28, v4, v17 ; F800023F 11041C04
s_buffer_load_dword s4, s[0:3], 0x2e ; C202012E
s_buffer_load_dword s5, s[0:3], 0x26 ; C2028126
s_buffer_load_dword s6, s[0:3], 0x2a ; C203012A
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v4, v18, v18 ; 06082512
v_sub_f32_e32 v14, 1.0, v8 ; 081C10F2
v_mul_f32_e32 v4, v4, v14 ; 10081D04
v_add_f32_e32 v14, v20, v20 ; 061C2914
v_mac_f32_e32 v4, v14, v8 ; 3E08110E
v_mul_f32_e32 v4, v4, v10 ; 10081504
v_add_f32_e32 v8, 0x3f4ccccd, v11 ; 061016FF 3F4CCCCD
v_add_f32_e32 v10, v19, v19 ; 06142713
v_add_f32_e32 v11, v21, v21 ; 06162B15
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v14, s4 ; 7E1C0204
v_mad_f32 v14, -s6, v5, -v14 ; D282000E A43A0A06
v_mov_b32_e32 v17, s5 ; 7E220205
v_mad_f32 v17, s6, v5, v17 ; D2820011 04460A06
v_mul_f32_e64 v18, 0, s4 ; D2100012 00000880
v_mac_f32_e32 v18, s5, v5 ; 3E240A05
v_mul_f32_e32 v5, v14, v14 ; 100A1D0E
v_mac_f32_e32 v5, v18, v18 ; 3E0A2512
v_mac_f32_e32 v5, v17, v17 ; 3E0A2311
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v19, s5, v6 ; 10260C05
v_mad_f32 v19, v29, -s6, -v19 ; D2820013 C44C0D1D
v_mad_f32 v19, -v7, s4, v19 ; D2820013 244C0907
v_mad_f32 v19, 0.5, |v19|, 0.5 ; D2820213 03C226F0
v_sub_f32_e32 v20, 1.0, v19 ; 082826F2
v_mul_f32_e32 v6, v6, v20 ; 100C2906
v_mul_f32_e32 v7, v7, v20 ; 100E2907
v_mul_f32_e32 v14, v5, v14 ; 101C1D05
v_mul_f32_e32 v18, v5, v18 ; 10242505
v_mul_f32_e32 v5, v5, v17 ; 100A2305
v_mul_f32_e32 v17, s6, v5 ; 10220A06
v_mad_f32 v17, v18, s4, -v17 ; D2820011 84440912
v_mul_f32_e32 v21, s4, v14 ; 102A1C04
v_mad_f32 v21, v5, s5, -v21 ; D2820015 84540B05
v_mul_f32_e32 v22, v17, v17 ; 102C2311
v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15
v_mul_f32_e32 v21, s5, v18 ; 102A2405
v_mad_f32 v21, v14, s6, -v21 ; D2820015 84540D0E
v_mac_f32_e32 v22, v21, v21 ; 3E2C2B15
v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916
v_sub_f32_e32 v23, 1.0, v9 ; 082E12F2
v_mul_f32_e32 v10, v10, v23 ; 10142F0A
v_mac_f32_e32 v10, v11, v9 ; 3E14130B
v_mul_f32_e32 v9, v22, v17 ; 10122316
v_mul_f32_e32 v11, v22, v21 ; 10162B16
v_mul_f32_e32 v17, v29, v20 ; 1022291D
v_mac_f32_e32 v6, v9, v19 ; 3E0C2709
v_mac_f32_e32 v7, v11, v19 ; 3E0E270B
v_mac_f32_e32 v17, 1.0, v19 ; 3E2226F2
v_mul_f32_e32 v9, v6, v6 ; 10120D06
v_mac_f32_e32 v9, v17, v17 ; 3E122311
v_mac_f32_e32 v9, v7, v7 ; 3E120F07
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mad_f32 v1, v14, v4, v1 ; D2820001 0406090E
v_mad_f32 v2, v18, v4, v2 ; D2820002 040A0912
v_mac_f32_e32 v3, v5, v4 ; 3E060905
v_mul_f32_e32 v4, v9, v6 ; 10080D09
v_mul_f32_e32 v5, v9, v17 ; 100A2309
v_mul_f32_e32 v6, v9, v7 ; 100C0F09
v_mul_f32_e32 v7, v10, v8 ; 100E110A
v_mac_f32_e32 v1, v4, v7 ; 3E020F04
v_mul_f32_e32 v4, 0, v24 ; 10083080
v_mul_f32_e32 v4, v10, v4 ; 1008090A
v_mul_f32_e32 v9, v8, v4 ; 10120908
v_cmp_lt_f32_e32 vcc, 0, v9 ; 7C021280
v_cndmask_b32_e64 v11, v9, 1.0, vcc ; D200000B 01A9E509
v_cmp_le_f32_e32 vcc, 0, v11 ; 7C061680
v_cndmask_b32_e32 v11, -1.0, v11 ; 001616F3
v_mac_f32_e32 v2, v5, v7 ; 3E040F05
v_mac_f32_e32 v3, v6, v7 ; 3E060F06
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
v_add_f32_e32 v5, -0.5, v15 ; 060A1EF1
v_mac_f32_e32 v5, -2.0, v25 ; 3E0A32F5
v_mul_f32_e32 v5, v24, v5 ; 100A0B18
v_mul_f32_e32 v5, v10, v5 ; 100A0B0A
v_mul_f32_e32 v6, v8, v5 ; 100C0B08
v_cmp_lt_f32_e32 vcc, 0, v6 ; 7C020C80
v_cndmask_b32_e64 v7, v6, 1.0, vcc ; D2000007 01A9E506
v_cmp_le_f32_e32 vcc, 0, v7 ; 7C060E80
v_cndmask_b32_e32 v7, -1.0, v7 ; 000E0EF3
v_mov_b32_e32 v14, 0x42c80000 ; 7E1C02FF 42C80000
s_waitcnt lgkmcnt(0) ; BF8C007F
v_add_f32_e32 v14, s4, v14 ; 061C1C04
v_add_f32_e32 v15, 1.0, v27 ; 061E36F2
v_mul_f32_e32 v14, v14, v15 ; 101C1F0E
v_mul_f32_e32 v14, 0x43340000, v14 ; 101C1CFF 43340000
v_mul_f32_e32 v0, v0, v14 ; 10001D00
v_mov_b32_e32 v14, 0xbc75c28f ; 7E1C02FF BC75C28F
v_mov_b32_e32 v15, 0x3d8f5c29 ; 7E1E02FF 3D8F5C29
v_mad_f32 v9, |v9|, v15, v14 ; D2820109 043A1F09
v_max_f32_e32 v9, 0, v9 ; 20121280
v_mul_f32_e32 v9, v9, v11 ; 10121709
v_mac_f32_e32 v2, v8, v4 ; 3E040908
v_fract_f32_e32 v0, v0 ; 7E004100
v_sin_f32_e32 v0, v0 ; 7E006B00
v_mul_f32_e32 v0, 0x3f333333, v0 ; 100000FF 3F333333
v_mac_f32_e32 v2, v0, v9 ; 3E041300
v_mac_f32_e32 v1, v8, v5 ; 3E020B08
v_mad_f32 v4, |v6|, v15, v14 ; D2820104 043A1F06
v_max_f32_e32 v4, 0, v4 ; 20080880
v_mul_f32_e32 v4, v4, v7 ; 10080F04
v_mac_f32_e32 v1, v0, v4 ; 3E020900
v_mul_f32_e32 v4, s13, v1 ; 1008020D
v_mul_f32_e32 v5, s12, v1 ; 100A020C
v_mul_f32_e32 v6, s15, v1 ; 100C020F
v_mul_f32_e32 v1, s14, v1 ; 1002020E
v_mac_f32_e32 v4, s22, v2 ; 3E080416
v_mac_f32_e32 v5, s23, v2 ; 3E0A0417
v_mac_f32_e32 v6, s20, v2 ; 3E0C0414
v_mac_f32_e32 v1, s21, v2 ; 3E020415
v_add_f32_e32 v2, -0.5, v16 ; 060420F1
v_mul_f32_e32 v7, v13, v12 ; 100E190D
v_mac_f32_e32 v2, -2.0, v7 ; 3E040EF5
v_mul_f32_e32 v2, v24, v2 ; 10040518
v_mul_f32_e32 v2, v10, v2 ; 1004050A
v_mac_f32_e32 v3, v8, v2 ; 3E060508
v_mul_f32_e32 v2, v8, v2 ; 10040508
v_mad_f32 v7, |v2|, v15, v14 ; D2820107 043A1F02
v_cmp_lt_f32_e32 vcc, 0, v2 ; 7C020480
v_cndmask_b32_e64 v2, v2, 1.0, vcc ; D2000002 01A9E502
v_cmp_le_f32_e32 vcc, 0, v2 ; 7C060480
v_cndmask_b32_e32 v2, -1.0, v2 ; 000404F3
v_max_f32_e32 v7, 0, v7 ; 200E0E80
v_mul_f32_e32 v2, v7, v2 ; 10040507
v_mac_f32_e32 v3, v0, v2 ; 3E060500
v_mac_f32_e32 v4, s18, v3 ; 3E080612
v_mac_f32_e32 v5, s19, v3 ; 3E0A0613
v_mac_f32_e32 v6, s16, v3 ; 3E0C0610
v_mac_f32_e32 v1, s17, v3 ; 3E020611
s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122
s_buffer_load_dword s5, s[0:3], 0x23 ; C2028123
s_buffer_load_dword s6, s[0:3], 0x60 ; C2030160
s_buffer_load_dword s7, s[0:3], 0x61 ; C2038161
s_buffer_load_dword s8, s[0:3], 0x62 ; C2040162
s_buffer_load_dword s9, s[0:3], 0x63 ; C2048163
s_buffer_load_dword s12, s[0:3], 0x64 ; C2060164
s_buffer_load_dword s13, s[0:3], 0x65 ; C2068165
s_buffer_load_dword s14, s[0:3], 0x66 ; C2070166
s_buffer_load_dword s15, s[0:3], 0x67 ; C2078167
s_buffer_load_dword s16, s[0:3], 0x68 ; C2080168
s_buffer_load_dword s17, s[0:3], 0x69 ; C2088169
s_buffer_load_dword s18, s[0:3], 0x6a ; C209016A
s_buffer_load_dword s19, s[0:3], 0x6b ; C209816B
s_buffer_load_dword s20, s[0:3], 0x6c ; C20A016C
s_buffer_load_dword s21, s[0:3], 0x6d ; C20A816D
s_buffer_load_dword s22, s[0:3], 0x6e ; C20B016E
s_buffer_load_dword s0, s[0:3], 0x6f ; C200016F
v_mad_f32 v0, 0, v0, 0 ; D2820000 02020080
v_add_f32_e32 v0, 1.0, v0 ; 060000F2
v_mac_f32_e32 v4, s10, v0 ; 3E08000A
v_mac_f32_e32 v5, s11, v0 ; 3E0A000B
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v6, s4, v0 ; 3E0C0004
v_mac_f32_e32 v1, s5, v0 ; 3E020005
v_mul_f32_e32 v0, s6, v4 ; 10000806
v_mul_f32_e32 v2, s7, v4 ; 10040807
v_mul_f32_e32 v3, s8, v4 ; 10060808
v_mul_f32_e32 v4, s9, v4 ; 10080809
v_mac_f32_e32 v0, s12, v5 ; 3E000A0C
v_mac_f32_e32 v2, s13, v5 ; 3E040A0D
v_mac_f32_e32 v3, s14, v5 ; 3E060A0E
v_mac_f32_e32 v4, s15, v5 ; 3E080A0F
v_mac_f32_e32 v0, s16, v6 ; 3E000C10
v_mac_f32_e32 v2, s17, v6 ; 3E040C11
v_mac_f32_e32 v3, s18, v6 ; 3E060C12
v_mac_f32_e32 v4, s19, v6 ; 3E080C13
v_mac_f32_e32 v0, s20, v1 ; 3E000214
v_mac_f32_e32 v2, s21, v1 ; 3E040215
v_mac_f32_e32 v3, s22, v1 ; 3E060216
v_mac_f32_e32 v4, s0, v1 ; 3E080200
exp 15, 12, 0, 1, 0, v0, v2, v3, v4 ; F80008CF 04030200
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 96
VGPRS: 52
Code Size: 2212 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[1..3]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 0.5000, -1.0000, 0.0000, 5.0000}
IMM[1] FLT32 { 1.0000, 2.0000, 0.0000, 0.0000}
0: ADD TEMP[0].x, IN[0].zzzz, IMM[0].xxxx
1: FLR TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].x, TEMP[0].xxxx, CONST[1].xxxx
3: FLR TEMP[1].x, TEMP[0].xxxx
4: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1].yyyy
5: FRC TEMP[0].x, TEMP[0].xxxx
6: FRC TEMP[1].x, TEMP[1].xxxx
7: MOV TEMP[0].y, TEMP[1].xxxx
8: DDX TEMP[1].xy, IN[0].xyyy
9: MUL TEMP[2], CONST[3].xxxx, IN[0].xyyy
10: DDY TEMP[2].xy, TEMP[2]
11: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy
12: SQRT TEMP[1].x, TEMP[1].xxxx
13: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy
14: SQRT TEMP[2].x, TEMP[2].xxxx
15: MOV TEMP[1].y, TEMP[2].xxxx
16: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy
17: SQRT TEMP[1].x, TEMP[1].xxxx
18: MUL TEMP[1].x, TEMP[1].xxxx, CONST[1].wwww
19: LG2 TEMP[1].x, TEMP[1].xxxx
20: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy
21: ADD TEMP[1].x, TEMP[1].xxxx, IN[0].wwww
22: MAX TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz
23: MIN TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww
24: CEIL TEMP[2].x, TEMP[1].xxxx
25: EX2 TEMP[2].x, TEMP[2].xxxx
26: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1].zzzz
27: RCP TEMP[3].x, CONST[1].wwww
28: MUL TEMP[3].x, IMM[0].xxxx, TEMP[3].xxxx
29: FRC TEMP[4].xy, IN[0].xyyy
30: MUL TEMP[5].x, TEMP[2].xxxx, IMM[1].yyyy
31: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx
32: ADD TEMP[5].x, IMM[1].xxxx, -TEMP[5].xxxx
33: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx
34: MAD TEMP[2].xy, TEMP[4].xyyy, TEMP[5].xxxx, TEMP[2].xxxx
35: MAD TEMP[0].xy, TEMP[2].xyyy, CONST[1].xyyy, TEMP[0].xyyy
36: MOV TEMP[0].xy, TEMP[0].xyyy
37: MOV TEMP[0].w, TEMP[1].xxxx
38: TXL TEMP[0].w, TEMP[0], SAMP[0], 2D
39: FSLT TEMP[0].x, TEMP[0].wwww, CONST[2].xxxx
40: AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx
41: KILL_IF -TEMP[0].xxxx
42: MOV TEMP[0].x, IN[1].zzzz
43: MOV TEMP[0].y, IN[2].yyyy
44: MOV TEMP[0].z, IN[3].xxxx
45: MAD TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx, IMM[0].xxxx
46: MOV TEMP[0].w, IMM[0].zzzz
47: MOV OUT[0], TEMP[0]
48: END
; ModuleID = 'tgsi'
@ddxy_lds = external addrspace(3) global [64 x i32]
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%30 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%31 = load <32 x i8>, <32 x i8> addrspace(2)* %30, align 32, !tbaa !0
%32 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !tbaa !0
%34 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%35 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%36 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%37 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%38 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%39 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%40 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%41 = fadd float %36, 5.000000e-01
%42 = call float @llvm.floor.f32(float %41)
%43 = fmul float %42, %24
%44 = call float @llvm.floor.f32(float %43)
%45 = fmul float %44, %25
%46 = call float @llvm.floor.f32(float %43)
%47 = fsub float %43, %46
%48 = call float @llvm.floor.f32(float %45)
%49 = fsub float %45, %48
%50 = call i32 @llvm.SI.tid()
%51 = sext i32 %50 to i64
%52 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %51
%53 = bitcast i32 addrspace(3)* %52 to float addrspace(3)*
store float %35, float addrspace(3)* %53, align 4
%54 = call i32 @llvm.SI.tid()
%55 = sext i32 %54 to i64
%56 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %55
%57 = and i32 %54, -4
%58 = sext i32 %57 to i64
%59 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %58
%60 = or i32 %57, 1
%61 = sext i32 %60 to i64
%62 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %61
%63 = bitcast i32 addrspace(3)* %56 to float addrspace(3)*
store float %34, float addrspace(3)* %63, align 4
%64 = bitcast i32 addrspace(3)* %59 to float addrspace(3)*
%65 = load float, float addrspace(3)* %64, align 4
%66 = bitcast i32 addrspace(3)* %62 to float addrspace(3)*
%67 = load float, float addrspace(3)* %66, align 4
%68 = fsub float %67, %65
%69 = bitcast i32 addrspace(3)* %56 to float addrspace(3)*
store float %35, float addrspace(3)* %69, align 4
%70 = bitcast i32 addrspace(3)* %59 to float addrspace(3)*
%71 = load float, float addrspace(3)* %70, align 4
%72 = bitcast i32 addrspace(3)* %62 to float addrspace(3)*
%73 = load float, float addrspace(3)* %72, align 4
%74 = fsub float %73, %71
%75 = fmul float %29, %34
%76 = fmul float %29, %35
%77 = fmul float %29, %35
%78 = call i32 @llvm.SI.tid()
%79 = sext i32 %78 to i64
%80 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %79
%81 = bitcast i32 addrspace(3)* %80 to float addrspace(3)*
store float %77, float addrspace(3)* %81, align 4
%82 = call i32 @llvm.SI.tid()
%83 = sext i32 %82 to i64
%84 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %83
%85 = and i32 %82, -4
%86 = sext i32 %85 to i64
%87 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %86
%88 = or i32 %85, 2
%89 = sext i32 %88 to i64
%90 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %89
%91 = bitcast i32 addrspace(3)* %84 to float addrspace(3)*
store float %75, float addrspace(3)* %91, align 4
%92 = bitcast i32 addrspace(3)* %87 to float addrspace(3)*
%93 = load float, float addrspace(3)* %92, align 4
%94 = bitcast i32 addrspace(3)* %90 to float addrspace(3)*
%95 = load float, float addrspace(3)* %94, align 4
%96 = fsub float %95, %93
%97 = bitcast i32 addrspace(3)* %84 to float addrspace(3)*
store float %76, float addrspace(3)* %97, align 4
%98 = bitcast i32 addrspace(3)* %87 to float addrspace(3)*
%99 = load float, float addrspace(3)* %98, align 4
%100 = bitcast i32 addrspace(3)* %90 to float addrspace(3)*
%101 = load float, float addrspace(3)* %100, align 4
%102 = fsub float %101, %99
%103 = bitcast i32 addrspace(3)* %84 to float addrspace(3)*
store float %77, float addrspace(3)* %103, align 4
%104 = fmul float %68, %68
%105 = fmul float %74, %74
%106 = fadd float %104, %105
%107 = call float @llvm.sqrt.f32(float %106)
%108 = fmul float %96, %96
%109 = fmul float %102, %102
%110 = fadd float %108, %109
%111 = call float @llvm.sqrt.f32(float %110)
%112 = fmul float %107, %107
%113 = fmul float %111, %111
%114 = fadd float %112, %113
%115 = call float @llvm.sqrt.f32(float %114)
%116 = fmul float %115, %27
%117 = call float @llvm.log2.f32(float %116)
%118 = fadd float %117, -1.000000e+00
%119 = fadd float %118, %37
%120 = call float @llvm.maxnum.f32(float %119, float 0.000000e+00)
%121 = call float @llvm.minnum.f32(float %120, float 5.000000e+00)
%122 = call float @llvm.ceil.f32(float %121)
%123 = call float @llvm.AMDIL.exp.(float %122)
%124 = fmul float %123, %26
%125 = fdiv float 1.000000e+00, %27
%126 = fmul float %125, 5.000000e-01
%127 = call float @llvm.floor.f32(float %34)
%128 = fsub float %34, %127
%129 = call float @llvm.floor.f32(float %35)
%130 = fsub float %35, %129
%131 = fmul float %124, 2.000000e+00
%132 = fmul float %131, %126
%133 = fsub float 1.000000e+00, %132
%134 = fmul float %126, %124
%135 = fmul float %128, %133
%136 = fadd float %135, %134
%137 = fmul float %130, %133
%138 = fadd float %137, %134
%139 = fmul float %136, %24
%140 = fadd float %139, %47
%141 = fmul float %138, %25
%142 = fadd float %141, %49
%143 = bitcast float %140 to i32
%144 = bitcast float %142 to i32
%145 = bitcast float %121 to i32
%146 = insertelement <4 x i32> undef, i32 %143, i32 0
%147 = insertelement <4 x i32> %146, i32 %144, i32 1
%148 = insertelement <4 x i32> %147, i32 %145, i32 2
%149 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %148, <32 x i8> %31, <16 x i8> %33, i32 2)
%150 = extractelement <4 x float> %149, i32 3
%151 = fcmp olt float %150, %28
%152 = select i1 %151, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %152)
%153 = fmul float %38, 5.000000e-01
%154 = fadd float %153, 5.000000e-01
%155 = fmul float %39, 5.000000e-01
%156 = fadd float %155, 5.000000e-01
%157 = fmul float %40, 5.000000e-01
%158 = fadd float %157, 5.000000e-01
%159 = call i32 @llvm.SI.packf16(float %154, float %156)
%160 = bitcast i32 %159 to float
%161 = call i32 @llvm.SI.packf16(float %158, float 0.000000e+00)
%162 = bitcast i32 %161 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %160, float %162, float %160, float %162)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: readnone
declare i32 @llvm.SI.tid() #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.ceil.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 2, 1, [m0] ; C8180600
v_interp_p2_f32 v6, [v6], v1, 2, 1, [m0] ; C8190601
v_interp_p1_f32 v7, v0, 1, 2, [m0] ; C81C0900
v_interp_p2_f32 v7, [v7], v1, 1, 2, [m0] ; C81D0901
v_interp_p1_f32 v0, v0, 0, 3, [m0] ; C8000C00
v_interp_p2_f32 v0, [v0], v1, 0, 3, [m0] ; C8010C01
v_mbcnt_lo_u32_b32_e64 v1, -1, 0 ; D2460001 000100C1
v_mbcnt_hi_u32_b32_e32 v1, -1, v1 ; 480202C1
v_lshlrev_b32_e32 v8, 2, v1 ; 34100282
s_mov_b32 m0, -1 ; BEFC03C1
ds_write_b32 v8, v3 ; D8340000 00000308
ds_write_b32 v8, v2 ; D8340000 00000208
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0xc ; C204010C
v_and_b32_e32 v1, -4, v1 ; 360202C4
v_or_b32_e32 v9, 1, v1 ; 38120281
v_lshlrev_b32_e32 v9, 2, v9 ; 34121282
v_lshlrev_b32_e32 v10, 2, v1 ; 34140282
ds_read_b32 v11, v10 ; D8D80000 0B00000A
ds_read_b32 v12, v9 ; D8D80000 0C000009
ds_write_b32 v8, v3 ; D8340000 00000308
ds_read_b32 v9, v9 ; D8D80000 09000009
ds_read_b32 v13, v10 ; D8D80000 0D00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v14, s8, v3 ; 101C0608
ds_write_b32 v8, v14 ; D8340000 00000E08
v_mul_f32_e32 v15, s8, v2 ; 101E0408
ds_write_b32 v8, v15 ; D8340000 00000F08
v_or_b32_e32 v1, 2, v1 ; 38020282
v_lshlrev_b32_e32 v1, 2, v1 ; 34020282
s_waitcnt lgkmcnt(0) ; BF8C007F
ds_read_b32 v15, v10 ; D8D80000 0F00000A
ds_read_b32 v16, v1 ; D8D80000 10000001
ds_write_b32 v8, v14 ; D8340000 00000E08
ds_read_b32 v10, v10 ; D8D80000 0A00000A
ds_read_b32 v1, v1 ; D8D80000 01000001
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s0, s[0:3], 0x8 ; C2000108
ds_write_b32 v8, v14 ; D8340000 00000E08
v_subrev_f32_e32 v8, v11, v12 ; 0A10190B
v_subrev_f32_e32 v9, v13, v9 ; 0A12130D
v_mul_f32_e32 v9, v9, v9 ; 10121309
v_mac_f32_e32 v9, v8, v8 ; 3E121108
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v8, v15, v16 ; 0A10210F
v_subrev_f32_e32 v1, v10, v1 ; 0A02030A
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_mac_f32_e32 v1, v8, v8 ; 3E021108
v_add_f32_e32 v4, 0.5, v4 ; 060808F0
v_floor_f32_e32 v4, v4 ; 7E084904
v_sqrt_f32_e32 v8, v9 ; 7E106709
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_mac_f32_e32 v1, v8, v8 ; 3E021108
v_mul_f32_e32 v8, s8, v4 ; 10100808
v_floor_f32_e32 v8, v8 ; 7E104908
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mul_f32_e32 v1, s11, v1 ; 1002020B
v_log_f32_e32 v1, v1 ; 7E024F01
v_mul_f32_e32 v9, s9, v8 ; 10121009
v_floor_f32_e32 v9, v9 ; 7E124909
v_mad_f32 v10, v4, s8, -v8 ; D282000A 84201104
v_mad_f32 v11, v8, s9, -v9 ; D282000B 84241308
v_add_f32_e32 v1, -1.0, v1 ; 060202F3
v_add_f32_e32 v1, v5, v1 ; 06020305
v_max_f32_e32 v1, 0, v1 ; 20020280
v_min_f32_e32 v12, 0x40a00000, v1 ; 1E1802FF 40A00000
v_ceil_f32_e32 v1, v12 ; 7E02450C
v_exp_f32_e32 v1, v1 ; 7E024B01
v_mul_f32_e32 v1, s10, v1 ; 1002020A
v_floor_f32_e32 v4, v2 ; 7E084902
v_subrev_f32_e32 v2, v4, v2 ; 0A040504
v_rcp_f32_e32 v4, s11 ; 7E08540B
v_floor_f32_e32 v5, v3 ; 7E0A4903
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
v_subrev_f32_e32 v3, v5, v3 ; 0A060705
v_mul_f32_e32 v4, 0.5, v4 ; 100808F0
v_mul_f32_e32 v5, -2.0, v1 ; 100A02F5
v_mad_f32 v5, v5, v4, 1.0 ; D2820005 03CA0905
v_mul_f32_e32 v1, v1, v4 ; 10020901
v_mad_f32 v2, v5, v2, v1 ; D2820002 04060505
v_mac_f32_e32 v1, v5, v3 ; 3E020705
v_mac_f32_e32 v10, s8, v2 ; 3E140408
v_mac_f32_e32 v11, s9, v1 ; 3E160209
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v1, 8, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900800 0064010A
s_waitcnt vmcnt(0) ; BF8C0770
v_cmp_gt_f32_e32 vcc, s0, v1 ; 7C080200
v_cndmask_b32_e64 v1, 0, -1.0, vcc ; D2000001 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v1 ; 7C260280
v_mad_f32 v1, 0.5, v6, 0.5 ; D2820001 03C20CF0
v_mad_f32 v2, 0.5, v7, 0.5 ; D2820002 03C20EF0
v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0
v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501
v_cvt_pkrtz_f16_f32_e64 v0, v0, 0 ; D25E0000 00010100
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 32
VGPRS: 20
Code Size: 560 bytes
LDS: 1 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL CONST[0..16]
DCL CONST[20..31]
DCL TEMP[0..14], LOCAL
IMM[0] FLT32 { 1.0000, 0.5000, 0.0000, 0.4990}
IMM[1] FLT32 { 1.4990, 2.0000, 0.8000, 1.5000}
IMM[2] FLT32 { -0.5000, -1.0000, 0.2000, 8.0000}
IMM[3] FLT32 { -4.0000, 0.0700, -0.0150, 100.0000}
IMM[4] FLT32 { 180.0000, 0.7000, 0.0000, 0.0000}
0: MUL TEMP[0].x, IN[4].xxxx, CONST[20].xxxx
1: FLR TEMP[0].x, TEMP[0].xxxx
2: ADD TEMP[1].xy, IN[0].xyyy, IMM[0].xxxx
3: MUL TEMP[1].xy, TEMP[1].xyyy, IMM[0].yyyy
4: MOV TEMP[1].z, TEMP[0].xxxx
5: RCP TEMP[2].x, CONST[20].xxxx
6: ADD TEMP[3].x, TEMP[0].xxxx, IMM[0].wwww
7: MUL TEMP[3].x, TEMP[3].xxxx, TEMP[2].xxxx
8: RCP TEMP[4].x, CONST[20].yyyy
9: MUL TEMP[5].x, IMM[0].wwww, TEMP[4].xxxx
10: MOV TEMP[3].y, TEMP[5].xxxx
11: MOV TEMP[3].xy, TEMP[3].xyyy
12: MOV TEMP[3].w, IMM[0].zzzz
13: TXL TEMP[3], TEMP[3], SAMP[0], 2D
14: ADD TEMP[5].x, TEMP[0].xxxx, IMM[0].wwww
15: MUL TEMP[0].x, TEMP[5].xxxx, TEMP[2].xxxx
16: MUL TEMP[2].x, IMM[1].xxxx, TEMP[4].xxxx
17: MOV TEMP[0].y, TEMP[2].xxxx
18: MOV TEMP[0].xy, TEMP[0].xyyy
19: MOV TEMP[0].w, IMM[0].zzzz
20: TXL TEMP[0].xy, TEMP[0], SAMP[0], 2D
21: MUL TEMP[2].xy, TEMP[3].xyyy, IMM[1].yyyy
22: MUL TEMP[3].xy, TEMP[3].zwww, IMM[1].yyyy
23: MOV TEMP[4].xy, IN[1].xzzz
24: MOV TEMP[4].w, IMM[0].zzzz
25: TXL TEMP[4].xy, TEMP[4], SAMP[1], 2D
26: LRP TEMP[3].xy, TEMP[4].xyyy, TEMP[3].xyyy, TEMP[2].xyyy
27: MOV TEMP[2].x, IN[3].xxxx
28: MUL TEMP[4].x, IN[3].xxxx, IN[3].xxxx
29: MAD TEMP[4].x, IN[3].yyyy, IN[3].yyyy, TEMP[4].xxxx
30: SQRT TEMP[4].x, TEMP[4].xxxx
31: ADD TEMP[4].x, IMM[0].xxxx, -TEMP[4].xxxx
32: MOV TEMP[2].y, TEMP[4].xxxx
33: MOV TEMP[2].z, IN[3].yyyy
34: MOV TEMP[4].y, TEMP[4].xxxx
35: MOV TEMP[5].x, CONST[13].zzzz
36: MOV TEMP[5].y, CONST[14].zzzz
37: MOV TEMP[5].z, CONST[15].zzzz
38: MOV TEMP[5].xyz, -TEMP[5].xyzx
39: MUL TEMP[6].xyz, IMM[0].zzxx, TEMP[5].yzxx
40: MAD TEMP[6].xyz, IMM[0].xzzz, TEMP[5].zxyy, -TEMP[6].xyzz
41: DP3 TEMP[7].x, TEMP[6].xyzz, TEMP[6].xyzz
42: RSQ TEMP[7].x, TEMP[7].xxxx
43: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[7].xxxx
44: MUL TEMP[7].xyz, TEMP[5].zxyy, TEMP[6].yzxx
45: MAD TEMP[7].xyz, TEMP[5].yzxx, TEMP[6].zxyy, -TEMP[7].xyzz
46: DP3 TEMP[8].x, TEMP[7].xyzz, TEMP[7].xyzz
47: RSQ TEMP[8].x, TEMP[8].xxxx
48: MUL TEMP[7].xz, TEMP[7].xyzz, TEMP[8].xxxx
49: MOV TEMP[7].xz, TEMP[7].xxzx
50: MOV TEMP[7].y, IMM[0].xxxx
51: DP3 TEMP[5].x, TEMP[5].xyzz, TEMP[2].xyzz
52: ABS TEMP[5].x, TEMP[5].xxxx
53: MAD TEMP[5].x, IMM[0].yyyy, TEMP[5].xxxx, IMM[0].yyyy
54: LRP TEMP[2].xyz, TEMP[5].xxxx, TEMP[7].xyzz, TEMP[2].xyzz
55: DP3 TEMP[5].x, TEMP[2].xyzz, TEMP[2].xyzz
56: RSQ TEMP[5].x, TEMP[5].xxxx
57: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xxxx
58: MUL TEMP[5], CONST[23].xxyy, CONST[21].xxyy
59: MUL TEMP[5], TEMP[5], CONST[25]
60: MUL TEMP[7], CONST[23].zzww, CONST[21].zzww
61: MUL TEMP[7], TEMP[7], CONST[26]
62: DP2 TEMP[8].x, CONST[25].xyyy, IN[1].xzzz
63: DP2 TEMP[9].x, CONST[25].zwww, IN[1].xzzz
64: MOV TEMP[8].y, TEMP[9].xxxx
65: DP2 TEMP[9].x, CONST[26].xyyy, IN[1].xzzz
66: MOV TEMP[8].z, TEMP[9].xxxx
67: DP2 TEMP[9].x, CONST[26].zwww, IN[1].xzzz
68: MOV TEMP[8].w, TEMP[9].xxxx
69: MUL TEMP[8], CONST[22], TEMP[8]
70: MUL TEMP[9], CONST[0].yyyy, CONST[24]
71: ADD TEMP[8], TEMP[8], TEMP[9]
72: COS TEMP[10].x, TEMP[8].xxxx
73: COS TEMP[10].y, TEMP[8].yyyy
74: COS TEMP[10].z, TEMP[8].zzzz
75: COS TEMP[10].w, TEMP[8].wwww
76: MOV TEMP[8].xy, TEMP[5].xzxx
77: MOV TEMP[8].zw, TEMP[7].zzxz
78: DP4 TEMP[8].x, TEMP[10], TEMP[8]
79: MOV TEMP[5].xy, TEMP[5].ywyy
80: MOV TEMP[5].zw, TEMP[7].wwyw
81: DP4 TEMP[5].x, TEMP[10], TEMP[5]
82: MOV TEMP[8].z, TEMP[5].xxxx
83: ADD TEMP[5].xy, IN[1].xzzz, TEMP[8].xzzz
84: MOV TEMP[7].y, IMM[1].yyyy
85: MUL TEMP[8], CONST[22].xxyy, CONST[21].xxyy
86: MUL TEMP[8], TEMP[8], CONST[25]
87: MUL TEMP[10], CONST[22].zzww, CONST[21].zzww
88: MUL TEMP[10], TEMP[10], CONST[26]
89: DP2 TEMP[11].x, CONST[25].xyyy, TEMP[5].xyyy
90: DP2 TEMP[12].x, CONST[25].zwww, TEMP[5].xyyy
91: MOV TEMP[11].y, TEMP[12].xxxx
92: DP2 TEMP[12].x, CONST[26].xyyy, TEMP[5].xyyy
93: MOV TEMP[11].z, TEMP[12].xxxx
94: DP2 TEMP[5].x, CONST[26].zwww, TEMP[5].xyyy
95: MOV TEMP[11].w, TEMP[5].xxxx
96: MAD TEMP[5], CONST[22], TEMP[11], TEMP[9]
97: COS TEMP[9].x, TEMP[5].xxxx
98: COS TEMP[9].y, TEMP[5].yyyy
99: COS TEMP[9].z, TEMP[5].zzzz
100: COS TEMP[9].w, TEMP[5].wwww
101: MOV TEMP[5].xy, TEMP[8].xzxx
102: MOV TEMP[5].zw, TEMP[10].zzxz
103: DP4 TEMP[5].x, TEMP[9], TEMP[5]
104: MOV TEMP[7].x, -TEMP[5].xxxx
105: MOV TEMP[5].xy, TEMP[8].ywyy
106: MOV TEMP[5].zw, TEMP[10].wwyw
107: DP4 TEMP[5].x, TEMP[9], TEMP[5]
108: MOV TEMP[7].z, -TEMP[5].xxxx
109: DP3 TEMP[5].x, TEMP[7].xyzz, TEMP[7].xyzz
110: RSQ TEMP[5].x, TEMP[5].xxxx
111: MUL TEMP[5].xyz, TEMP[7].xyzz, TEMP[5].xxxx
112: MUL TEMP[7].x, TEMP[0].xxxx, IMM[0].yyyy
113: MUL TEMP[5].xz, TEMP[5].xyzz, IMM[1].yyyy
114: MOV TEMP[8].xz, TEMP[5].xxzx
115: MOV TEMP[8].y, IMM[0].zzzz
116: ADD TEMP[10].xy, IN[1].xzzz, -CONST[27].xyyy
117: MAD TEMP[10].xy, CONST[27].zwww, IMM[0].yyyy, TEMP[10].xyyy
118: RCP TEMP[11].x, CONST[27].zzzz
119: RCP TEMP[11].y, CONST[27].wwww
120: MUL TEMP[9].xy, TEMP[10].xyyy, TEMP[11].xyyy
121: MOV TEMP[10].w, IMM[0].zzzz
122: MOV TEMP[10].xyz, IN[1].xyzx
123: MOV TEMP[11].w, IMM[0].xxxx
124: ADD TEMP[12].x, IN[0].yyyy, IMM[1].zzzz
125: MUL TEMP[13].x, TEMP[12].xxxx, TEMP[3].yyyy
126: MUL TEMP[14].x, IN[0].xxxx, TEMP[3].xxxx
127: MUL TEMP[6].xyz, TEMP[14].xxxx, TEMP[6].xyzz
128: MAD TEMP[11].xyz, TEMP[13].xxxx, TEMP[2].xyzz, TEMP[6].xyzz
129: MOV TEMP[2].y, IMM[0].zzzz
130: MOV TEMP[6].xy, TEMP[9].xyyy
131: MOV TEMP[6].w, IMM[0].zzzz
132: TXL TEMP[6].xyz, TEMP[6], SAMP[2], 2D
133: ADD TEMP[6].xz, TEMP[6].xzyy, IMM[2].xyxx
134: MOV TEMP[2].xz, TEMP[6].xxzx
135: MOV TEMP[6].w, IMM[0].zzzz
136: ADD TEMP[2].xyz, TEMP[8].xyzz, TEMP[2].xyzz
137: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[7].xxxx
138: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].yyyy
139: MUL TEMP[6].xyz, TEMP[3].xyzz, TEMP[12].xxxx
140: MUL TEMP[3].xy, IN[3].xyyy, IMM[1].wwww
141: MOV TEMP[4].x, TEMP[3].xxyx
142: MOV TEMP[2].xy, TEMP[4].xyxx
143: MUL TEMP[4].xy, TEMP[5].xzzz, TEMP[7].xxxx
144: MUL TEMP[4].x, TEMP[4].xyyy, IMM[2].zzzz
145: ADD TEMP[3].x, TEMP[3].yyyy, TEMP[4].xxxx
146: MOV TEMP[2].z, TEMP[3].xxxx
147: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
148: RSQ TEMP[3].x, TEMP[3].xxxx
149: MUL TEMP[3].xyz, TEMP[2].xyzz, TEMP[3].xxxx
150: MOV TEMP[1].xyz, TEMP[1].xyzx
151: MAD TEMP[0].x, TEMP[0].yyyy, IMM[2].wwww, IMM[3].xxxx
152: MOV TEMP[1].w, TEMP[0].xxxx
153: ADD TEMP[0], TEMP[10], TEMP[11]
154: SSG TEMP[2], TEMP[6]
155: ABS TEMP[4], TEMP[6]
156: MAD TEMP[4], TEMP[4], IMM[3].yyyy, IMM[3].zzzz
157: MAX TEMP[4], TEMP[4], IMM[0].zzzz
158: MUL TEMP[2], TEMP[2], TEMP[4]
159: DP2 TEMP[4].x, IN[3].xyyy, IN[3].xyyy
160: SQRT TEMP[4].x, TEMP[4].xxxx
161: ADD TEMP[4].x, TEMP[4].xxxx, IMM[0].xxxx
162: ADD TEMP[5].x, CONST[0].xxxx, IMM[3].wwww
163: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx
164: MUL TEMP[4].x, TEMP[4].xxxx, IMM[4].xxxx
165: SIN TEMP[4].x, TEMP[4].xxxx
166: MUL TEMP[4].x, TEMP[4].xxxx, IMM[4].yyyy
167: MAD TEMP[2], TEMP[2], TEMP[4].xxxx, TEMP[6]
168: ADD TEMP[0], TEMP[0], TEMP[2]
169: MUL TEMP[2], CONST[9], TEMP[0].xxxx
170: MAD TEMP[2], CONST[10], TEMP[0].yyyy, TEMP[2]
171: MAD TEMP[2], CONST[11], TEMP[0].zzzz, TEMP[2]
172: MAD TEMP[0], CONST[12], TEMP[0].wwww, TEMP[2]
173: MUL TEMP[2], CONST[28], TEMP[0].xxxx
174: MAD TEMP[2], CONST[29], TEMP[0].yyyy, TEMP[2]
175: MAD TEMP[2], CONST[30], TEMP[0].zzzz, TEMP[2]
176: MAD TEMP[0], CONST[31], TEMP[0].wwww, TEMP[2]
177: MUL TEMP[2].xyw, TEMP[0], IMM[0].yyyy
178: MOV TEMP[4].x, TEMP[2].xxxx
179: MUL TEMP[5].x, TEMP[2].yyyy, CONST[1].xxxx
180: MOV TEMP[4].y, TEMP[5].xxxx
181: ADD TEMP[2].xy, TEMP[4].xyyy, TEMP[2].wwww
182: MOV TEMP[2].zw, TEMP[0].wwzw
183: MOV TEMP[4].x, CONST[9].xxxx
184: MOV TEMP[4].y, CONST[10].xxxx
185: MOV TEMP[4].z, CONST[11].xxxx
186: MOV TEMP[5].x, CONST[9].yyyy
187: MOV TEMP[5].y, CONST[10].yyyy
188: MOV TEMP[5].z, CONST[11].yyyy
189: MOV TEMP[6].x, CONST[9].zzzz
190: MOV TEMP[6].y, CONST[10].zzzz
191: MOV TEMP[6].z, CONST[11].zzzz
192: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[3].xxxx
193: MAD TEMP[4].xyz, TEMP[5].xyzz, TEMP[3].yyyy, TEMP[4].xyzz
194: MAD TEMP[3].xyz, TEMP[6].xyzz, TEMP[3].zzzz, TEMP[4].xyzz
195: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
196: RSQ TEMP[4].x, TEMP[4].xxxx
197: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
198: MOV TEMP[4].w, IMM[0].xxxx
199: MOV TEMP[4].xyz, TEMP[3].xyzx
200: DP4 TEMP[5].x, CONST[2], TEMP[4]
201: DP4 TEMP[6].x, CONST[3], TEMP[4]
202: MOV TEMP[5].y, TEMP[6].xxxx
203: DP4 TEMP[4].x, CONST[4], TEMP[4]
204: MOV TEMP[5].z, TEMP[4].xxxx
205: MUL TEMP[4], TEMP[3].xyzz, TEMP[3].yzzx
206: DP4 TEMP[6].x, CONST[5], TEMP[4]
207: DP4 TEMP[7].x, CONST[6], TEMP[4]
208: MOV TEMP[6].y, TEMP[7].xxxx
209: DP4 TEMP[4].x, CONST[7], TEMP[4]
210: MOV TEMP[6].z, TEMP[4].xxxx
211: MOV TEMP[4].yzw, IN[2].yxyz
212: MUL TEMP[7].x, TEMP[3].yyyy, TEMP[3].yyyy
213: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[7].xxxx
214: MAD TEMP[3].xyz, CONST[8].xyzz, TEMP[3].xxxx, TEMP[6].xyzz
215: ADD TEMP[3].xyz, TEMP[3].xyzz, TEMP[5].xyzz
216: MOV TEMP[4].x, TEMP[0].zzzz
217: MOV OUT[1], TEMP[1]
218: MOV OUT[2], TEMP[2]
219: MOV OUT[3], TEMP[4]
220: MOV OUT[4], TEMP[3]
221: MOV OUT[0], TEMP[0]
222: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 372)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 376)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404)
%82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408)
%83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412)
%84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416)
%85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420)
%86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424)
%87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 428)
%88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 432)
%89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 436)
%90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 440)
%91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 444)
%92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 448)
%93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 452)
%94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 456)
%95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 460)
%96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 464)
%97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 468)
%98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 472)
%99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 476)
%100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 480)
%101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 484)
%102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 488)
%103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 492)
%104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 496)
%105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 500)
%106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 504)
%107 = call float @llvm.SI.load.const(<16 x i8> %12, i32 508)
%108 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 0
%109 = load <8 x i32>, <8 x i32> addrspace(2)* %108, align 32, !tbaa !0
%110 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 0
%111 = load <4 x i32>, <4 x i32> addrspace(2)* %110, align 16, !tbaa !0
%112 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%113 = bitcast <8 x i32> addrspace(2)* %112 to <32 x i8> addrspace(2)*
%114 = load <32 x i8>, <32 x i8> addrspace(2)* %113, align 32, !tbaa !0
%115 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%116 = bitcast <4 x i32> addrspace(2)* %115 to <16 x i8> addrspace(2)*
%117 = load <16 x i8>, <16 x i8> addrspace(2)* %116, align 16, !tbaa !0
%118 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%119 = bitcast <8 x i32> addrspace(2)* %118 to <32 x i8> addrspace(2)*
%120 = load <32 x i8>, <32 x i8> addrspace(2)* %119, align 32, !tbaa !0
%121 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%122 = bitcast <4 x i32> addrspace(2)* %121 to <16 x i8> addrspace(2)*
%123 = load <16 x i8>, <16 x i8> addrspace(2)* %122, align 16, !tbaa !0
%124 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%125 = load <16 x i8>, <16 x i8> addrspace(2)* %124, align 16, !tbaa !0
%126 = add i32 %5, %7
%127 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %125, i32 0, i32 %126)
%128 = extractelement <4 x float> %127, i32 0
%129 = extractelement <4 x float> %127, i32 1
%130 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%131 = load <16 x i8>, <16 x i8> addrspace(2)* %130, align 16, !tbaa !0
%132 = add i32 %5, %7
%133 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %131, i32 0, i32 %132)
%134 = extractelement <4 x float> %133, i32 0
%135 = extractelement <4 x float> %133, i32 1
%136 = extractelement <4 x float> %133, i32 2
%137 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%138 = load <16 x i8>, <16 x i8> addrspace(2)* %137, align 16, !tbaa !0
%139 = add i32 %5, %7
%140 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %138, i32 0, i32 %139)
%141 = extractelement <4 x float> %140, i32 0
%142 = extractelement <4 x float> %140, i32 1
%143 = extractelement <4 x float> %140, i32 2
%144 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%145 = load <16 x i8>, <16 x i8> addrspace(2)* %144, align 16, !tbaa !0
%146 = add i32 %5, %7
%147 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %145, i32 0, i32 %146)
%148 = extractelement <4 x float> %147, i32 0
%149 = extractelement <4 x float> %147, i32 1
%150 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%151 = load <16 x i8>, <16 x i8> addrspace(2)* %150, align 16, !tbaa !0
%152 = add i32 %5, %7
%153 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %151, i32 0, i32 %152)
%154 = extractelement <4 x float> %153, i32 0
%155 = fmul float %154, %62
%156 = call float @llvm.floor.f32(float %155)
%157 = fadd float %128, 1.000000e+00
%158 = fadd float %129, 1.000000e+00
%159 = fmul float %157, 5.000000e-01
%160 = fmul float %158, 5.000000e-01
%161 = fdiv float 1.000000e+00, %62
%162 = fadd float %156, 0x3FDFEF9DC0000000
%163 = fmul float %162, %161
%164 = fdiv float 1.000000e+00, %63
%165 = fmul float %164, 0x3FDFEF9DC0000000
%166 = bitcast float %163 to i32
%167 = bitcast float %165 to i32
%168 = insertelement <4 x i32> undef, i32 %166, i32 0
%169 = insertelement <4 x i32> %168, i32 %167, i32 1
%170 = insertelement <4 x i32> %169, i32 0, i32 2
%171 = bitcast <8 x i32> %109 to <32 x i8>
%172 = bitcast <4 x i32> %111 to <16 x i8>
%173 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %170, <32 x i8> %171, <16 x i8> %172, i32 2)
%174 = extractelement <4 x float> %173, i32 0
%175 = extractelement <4 x float> %173, i32 1
%176 = extractelement <4 x float> %173, i32 2
%177 = extractelement <4 x float> %173, i32 3
%178 = fadd float %156, 0x3FDFEF9DC0000000
%179 = fmul float %178, %161
%180 = fmul float %164, 0x3FF7FBE760000000
%181 = bitcast float %179 to i32
%182 = bitcast float %180 to i32
%183 = insertelement <4 x i32> undef, i32 %181, i32 0
%184 = insertelement <4 x i32> %183, i32 %182, i32 1
%185 = insertelement <4 x i32> %184, i32 0, i32 2
%186 = bitcast <8 x i32> %109 to <32 x i8>
%187 = bitcast <4 x i32> %111 to <16 x i8>
%188 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %185, <32 x i8> %186, <16 x i8> %187, i32 2)
%189 = extractelement <4 x float> %188, i32 0
%190 = extractelement <4 x float> %188, i32 1
%191 = fmul float %174, 2.000000e+00
%192 = fmul float %175, 2.000000e+00
%193 = fmul float %176, 2.000000e+00
%194 = fmul float %177, 2.000000e+00
%195 = bitcast float %134 to i32
%196 = bitcast float %136 to i32
%197 = insertelement <4 x i32> undef, i32 %195, i32 0
%198 = insertelement <4 x i32> %197, i32 %196, i32 1
%199 = insertelement <4 x i32> %198, i32 0, i32 2
%200 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %199, <32 x i8> %114, <16 x i8> %117, i32 2)
%201 = extractelement <4 x float> %200, i32 0
%202 = extractelement <4 x float> %200, i32 1
%203 = call float @llvm.AMDGPU.lrp(float %201, float %193, float %191)
%204 = call float @llvm.AMDGPU.lrp(float %202, float %194, float %192)
%205 = fmul float %148, %148
%206 = fmul float %149, %149
%207 = fadd float %206, %205
%208 = call float @llvm.sqrt.f32(float %207)
%209 = fsub float 1.000000e+00, %208
%210 = fsub float -0.000000e+00, %59
%211 = fsub float -0.000000e+00, %60
%212 = fsub float -0.000000e+00, %61
%213 = fmul float %60, -0.000000e+00
%214 = fmul float %61, -0.000000e+00
%215 = fsub float -0.000000e+00, %213
%216 = fsub float %215, %61
%217 = fmul float %59, -0.000000e+00
%218 = fsub float %217, %214
%219 = fmul float %60, -0.000000e+00
%220 = fadd float %219, %59
%221 = fmul float %216, %216
%222 = fmul float %218, %218
%223 = fadd float %222, %221
%224 = fmul float %220, %220
%225 = fadd float %223, %224
%226 = call float @llvm.AMDGPU.rsq.clamped.f32(float %225)
%227 = fmul float %216, %226
%228 = fmul float %218, %226
%229 = fmul float %220, %226
%230 = fmul float %228, %212
%231 = fmul float %229, %210
%232 = fmul float %227, %211
%233 = fmul float %229, %211
%234 = fsub float %233, %230
%235 = fmul float %227, %212
%236 = fsub float %235, %231
%237 = fmul float %228, %210
%238 = fsub float %237, %232
%239 = fmul float %234, %234
%240 = fmul float %236, %236
%241 = fadd float %240, %239
%242 = fmul float %238, %238
%243 = fadd float %241, %242
%244 = call float @llvm.AMDGPU.rsq.clamped.f32(float %243)
%245 = fmul float %234, %244
%246 = fmul float %238, %244
%247 = fmul float %148, %210
%248 = fmul float %209, %211
%249 = fadd float %248, %247
%250 = fmul float %149, %212
%251 = fadd float %249, %250
%252 = call float @llvm.fabs.f32(float %251)
%253 = fmul float %252, 5.000000e-01
%254 = fadd float %253, 5.000000e-01
%255 = call float @llvm.AMDGPU.lrp(float %254, float %245, float %148)
%256 = call float @llvm.AMDGPU.lrp(float %254, float 1.000000e+00, float %209)
%257 = call float @llvm.AMDGPU.lrp(float %254, float %246, float %149)
%258 = fmul float %255, %255
%259 = fmul float %256, %256
%260 = fadd float %259, %258
%261 = fmul float %257, %257
%262 = fadd float %260, %261
%263 = call float @llvm.AMDGPU.rsq.clamped.f32(float %262)
%264 = fmul float %255, %263
%265 = fmul float %256, %263
%266 = fmul float %257, %263
%267 = fmul float %72, %64
%268 = fmul float %72, %64
%269 = fmul float %73, %65
%270 = fmul float %73, %65
%271 = fmul float %267, %80
%272 = fmul float %268, %81
%273 = fmul float %269, %82
%274 = fmul float %270, %83
%275 = fmul float %74, %66
%276 = fmul float %74, %66
%277 = fmul float %75, %67
%278 = fmul float %75, %67
%279 = fmul float %275, %84
%280 = fmul float %276, %85
%281 = fmul float %277, %86
%282 = fmul float %278, %87
%283 = fmul float %80, %134
%284 = fmul float %81, %136
%285 = fadd float %283, %284
%286 = fmul float %82, %134
%287 = fmul float %83, %136
%288 = fadd float %286, %287
%289 = fmul float %84, %134
%290 = fmul float %85, %136
%291 = fadd float %289, %290
%292 = fmul float %86, %134
%293 = fmul float %87, %136
%294 = fadd float %292, %293
%295 = fmul float %68, %285
%296 = fmul float %69, %288
%297 = fmul float %70, %291
%298 = fmul float %71, %294
%299 = fmul float %14, %76
%300 = fmul float %14, %77
%301 = fmul float %14, %78
%302 = fmul float %14, %79
%303 = fadd float %295, %299
%304 = fadd float %296, %300
%305 = fadd float %297, %301
%306 = fadd float %298, %302
%307 = call float @llvm.cos.f32(float %303)
%308 = call float @llvm.cos.f32(float %304)
%309 = call float @llvm.cos.f32(float %305)
%310 = call float @llvm.cos.f32(float %306)
%311 = fmul float %307, %271
%312 = fmul float %308, %273
%313 = fadd float %311, %312
%314 = fmul float %309, %279
%315 = fadd float %313, %314
%316 = fmul float %310, %281
%317 = fadd float %315, %316
%318 = fmul float %307, %272
%319 = fmul float %308, %274
%320 = fadd float %318, %319
%321 = fmul float %309, %280
%322 = fadd float %320, %321
%323 = fmul float %310, %282
%324 = fadd float %322, %323
%325 = fadd float %134, %317
%326 = fadd float %136, %324
%327 = fmul float %68, %64
%328 = fmul float %68, %64
%329 = fmul float %69, %65
%330 = fmul float %69, %65
%331 = fmul float %327, %80
%332 = fmul float %328, %81
%333 = fmul float %329, %82
%334 = fmul float %330, %83
%335 = fmul float %70, %66
%336 = fmul float %70, %66
%337 = fmul float %71, %67
%338 = fmul float %71, %67
%339 = fmul float %335, %84
%340 = fmul float %336, %85
%341 = fmul float %337, %86
%342 = fmul float %338, %87
%343 = fmul float %80, %325
%344 = fmul float %81, %326
%345 = fadd float %343, %344
%346 = fmul float %82, %325
%347 = fmul float %83, %326
%348 = fadd float %346, %347
%349 = fmul float %84, %325
%350 = fmul float %85, %326
%351 = fadd float %349, %350
%352 = fmul float %86, %325
%353 = fmul float %87, %326
%354 = fadd float %352, %353
%355 = fmul float %68, %345
%356 = fadd float %355, %299
%357 = fmul float %69, %348
%358 = fadd float %357, %300
%359 = fmul float %70, %351
%360 = fadd float %359, %301
%361 = fmul float %71, %354
%362 = fadd float %361, %302
%363 = call float @llvm.cos.f32(float %356)
%364 = call float @llvm.cos.f32(float %358)
%365 = call float @llvm.cos.f32(float %360)
%366 = call float @llvm.cos.f32(float %362)
%367 = fmul float %363, %331
%368 = fmul float %364, %333
%369 = fadd float %367, %368
%370 = fmul float %365, %339
%371 = fadd float %369, %370
%372 = fmul float %366, %341
%373 = fadd float %371, %372
%374 = fmul float %363, %332
%375 = fmul float %364, %334
%376 = fadd float %374, %375
%377 = fmul float %365, %340
%378 = fadd float %376, %377
%379 = fmul float %366, %342
%380 = fadd float %378, %379
%381 = fmul float %373, %373
%382 = fadd float %381, 4.000000e+00
%383 = fmul float %380, %380
%384 = fadd float %382, %383
%385 = call float @llvm.AMDGPU.rsq.clamped.f32(float %384)
%386 = fmul float %373, %385
%387 = fmul float %380, %385
%388 = fmul float %189, 5.000000e-01
%389 = fmul float %386, -2.000000e+00
%390 = fmul float %387, -2.000000e+00
%391 = fsub float %134, %88
%392 = fsub float %136, %89
%393 = fmul float %90, 5.000000e-01
%394 = fadd float %393, %391
%395 = fmul float %91, 5.000000e-01
%396 = fadd float %395, %392
%397 = fdiv float 1.000000e+00, %90
%398 = fdiv float 1.000000e+00, %91
%399 = fmul float %394, %397
%400 = fmul float %396, %398
%401 = fadd float %129, 0x3FE99999A0000000
%402 = fmul float %401, %204
%403 = fmul float %128, %203
%404 = fmul float %403, %227
%405 = fmul float %403, %228
%406 = fmul float %403, %229
%407 = fmul float %402, %264
%408 = fadd float %407, %404
%409 = fmul float %402, %265
%410 = fadd float %409, %405
%411 = fmul float %402, %266
%412 = fadd float %411, %406
%413 = bitcast float %399 to i32
%414 = bitcast float %400 to i32
%415 = insertelement <4 x i32> undef, i32 %413, i32 0
%416 = insertelement <4 x i32> %415, i32 %414, i32 1
%417 = insertelement <4 x i32> %416, i32 0, i32 2
%418 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %417, <32 x i8> %120, <16 x i8> %123, i32 2)
%419 = extractelement <4 x float> %418, i32 0
%420 = extractelement <4 x float> %418, i32 1
%421 = fadd float %419, -5.000000e-01
%422 = fadd float %420, -5.000000e-01
%423 = fadd float %389, %421
%424 = fadd float %390, %422
%425 = fmul float %423, %388
%426 = fmul float %388, 0.000000e+00
%427 = fmul float %424, %388
%428 = fmul float %425, %204
%429 = fmul float %426, %204
%430 = fmul float %427, %204
%431 = fmul float %428, %401
%432 = fmul float %429, %401
%433 = fmul float %430, %401
%434 = fmul float %148, 1.500000e+00
%435 = fmul float %149, 1.500000e+00
%436 = fmul float %389, %388
%437 = fmul float %436, 0x3FC99999A0000000
%438 = fadd float %435, %437
%439 = fmul float %434, %434
%440 = fmul float %209, %209
%441 = fadd float %440, %439
%442 = fmul float %438, %438
%443 = fadd float %441, %442
%444 = call float @llvm.AMDGPU.rsq.clamped.f32(float %443)
%445 = fmul float %434, %444
%446 = fmul float %209, %444
%447 = fmul float %438, %444
%448 = fmul float %190, 8.000000e+00
%449 = fadd float %448, -4.000000e+00
%450 = fadd float %134, %408
%451 = fadd float %135, %410
%452 = fadd float %136, %412
%453 = fcmp ogt float %431, 0.000000e+00
%454 = select i1 %453, float 1.000000e+00, float %431
%455 = fcmp oge float %454, 0.000000e+00
%456 = select i1 %455, float %454, float -1.000000e+00
%457 = fcmp ogt float %432, 0.000000e+00
%458 = select i1 %457, float 1.000000e+00, float %432
%459 = fcmp oge float %458, 0.000000e+00
%460 = select i1 %459, float %458, float -1.000000e+00
%461 = fcmp ogt float %433, 0.000000e+00
%462 = select i1 %461, float 1.000000e+00, float %433
%463 = fcmp oge float %462, 0.000000e+00
%464 = select i1 %463, float %462, float -1.000000e+00
%465 = call float @llvm.fabs.f32(float %431)
%466 = call float @llvm.fabs.f32(float %432)
%467 = call float @llvm.fabs.f32(float %433)
%468 = fmul float %465, 0x3FB1EB8520000000
%469 = fadd float %468, 0xBF8EB851E0000000
%470 = fmul float %466, 0x3FB1EB8520000000
%471 = fadd float %470, 0xBF8EB851E0000000
%472 = fmul float %467, 0x3FB1EB8520000000
%473 = fadd float %472, 0xBF8EB851E0000000
%474 = call float @llvm.maxnum.f32(float %469, float 0.000000e+00)
%475 = call float @llvm.maxnum.f32(float %471, float 0.000000e+00)
%476 = call float @llvm.maxnum.f32(float %473, float 0.000000e+00)
%477 = fmul float %456, %474
%478 = fmul float %460, %475
%479 = fmul float %464, %476
%480 = fmul float %148, %148
%481 = fmul float %149, %149
%482 = fadd float %480, %481
%483 = call float @llvm.sqrt.f32(float %482)
%484 = fadd float %483, 1.000000e+00
%485 = fadd float %13, 1.000000e+02
%486 = fmul float %484, %485
%487 = fmul float %486, 1.800000e+02
%488 = call float @llvm.sin.f32(float %487)
%489 = fmul float %488, 0x3FE6666660000000
%490 = fmul float %477, %489
%491 = fadd float %490, %431
%492 = fmul float %478, %489
%493 = fadd float %492, %432
%494 = fmul float %479, %489
%495 = fadd float %494, %433
%496 = fmul float %489, 0.000000e+00
%497 = fadd float %496, 0.000000e+00
%498 = fadd float %450, %491
%499 = fadd float %451, %493
%500 = fadd float %452, %495
%501 = fadd float %497, 1.000000e+00
%502 = fmul float %43, %498
%503 = fmul float %44, %498
%504 = fmul float %45, %498
%505 = fmul float %46, %498
%506 = fmul float %47, %499
%507 = fadd float %506, %502
%508 = fmul float %48, %499
%509 = fadd float %508, %503
%510 = fmul float %49, %499
%511 = fadd float %510, %504
%512 = fmul float %50, %499
%513 = fadd float %512, %505
%514 = fmul float %51, %500
%515 = fadd float %514, %507
%516 = fmul float %52, %500
%517 = fadd float %516, %509
%518 = fmul float %53, %500
%519 = fadd float %518, %511
%520 = fmul float %54, %500
%521 = fadd float %520, %513
%522 = fmul float %55, %501
%523 = fadd float %522, %515
%524 = fmul float %56, %501
%525 = fadd float %524, %517
%526 = fmul float %57, %501
%527 = fadd float %526, %519
%528 = fmul float %58, %501
%529 = fadd float %528, %521
%530 = fmul float %92, %523
%531 = fmul float %93, %523
%532 = fmul float %94, %523
%533 = fmul float %95, %523
%534 = fmul float %96, %525
%535 = fadd float %534, %530
%536 = fmul float %97, %525
%537 = fadd float %536, %531
%538 = fmul float %98, %525
%539 = fadd float %538, %532
%540 = fmul float %99, %525
%541 = fadd float %540, %533
%542 = fmul float %100, %527
%543 = fadd float %542, %535
%544 = fmul float %101, %527
%545 = fadd float %544, %537
%546 = fmul float %102, %527
%547 = fadd float %546, %539
%548 = fmul float %103, %527
%549 = fadd float %548, %541
%550 = fmul float %104, %529
%551 = fadd float %550, %543
%552 = fmul float %105, %529
%553 = fadd float %552, %545
%554 = fmul float %106, %529
%555 = fadd float %554, %547
%556 = fmul float %107, %529
%557 = fadd float %556, %549
%558 = fmul float %551, 5.000000e-01
%559 = fmul float %553, 5.000000e-01
%560 = fmul float %557, 5.000000e-01
%561 = fmul float %559, %15
%562 = fadd float %558, %560
%563 = fadd float %561, %560
%564 = fmul float %43, %445
%565 = fmul float %47, %445
%566 = fmul float %51, %445
%567 = fmul float %44, %446
%568 = fadd float %567, %564
%569 = fmul float %48, %446
%570 = fadd float %569, %565
%571 = fmul float %52, %446
%572 = fadd float %571, %566
%573 = fmul float %45, %447
%574 = fadd float %573, %568
%575 = fmul float %49, %447
%576 = fadd float %575, %570
%577 = fmul float %53, %447
%578 = fadd float %577, %572
%579 = fmul float %574, %574
%580 = fmul float %576, %576
%581 = fadd float %580, %579
%582 = fmul float %578, %578
%583 = fadd float %581, %582
%584 = call float @llvm.AMDGPU.rsq.clamped.f32(float %583)
%585 = fmul float %574, %584
%586 = fmul float %576, %584
%587 = fmul float %578, %584
%588 = fmul float %16, %585
%589 = fmul float %17, %586
%590 = fadd float %588, %589
%591 = fmul float %18, %587
%592 = fadd float %590, %591
%593 = fadd float %592, %19
%594 = fmul float %20, %585
%595 = fmul float %21, %586
%596 = fadd float %594, %595
%597 = fmul float %22, %587
%598 = fadd float %596, %597
%599 = fadd float %598, %23
%600 = fmul float %24, %585
%601 = fmul float %25, %586
%602 = fadd float %600, %601
%603 = fmul float %26, %587
%604 = fadd float %602, %603
%605 = fadd float %604, %27
%606 = fmul float %585, %586
%607 = fmul float %586, %587
%608 = fmul float %587, %587
%609 = fmul float %587, %585
%610 = fmul float %28, %606
%611 = fmul float %29, %607
%612 = fadd float %610, %611
%613 = fmul float %30, %608
%614 = fadd float %612, %613
%615 = fmul float %31, %609
%616 = fadd float %614, %615
%617 = fmul float %32, %606
%618 = fmul float %33, %607
%619 = fadd float %617, %618
%620 = fmul float %34, %608
%621 = fadd float %619, %620
%622 = fmul float %35, %609
%623 = fadd float %621, %622
%624 = fmul float %36, %606
%625 = fmul float %37, %607
%626 = fadd float %624, %625
%627 = fmul float %38, %608
%628 = fadd float %626, %627
%629 = fmul float %39, %609
%630 = fadd float %628, %629
%631 = fmul float %586, %586
%632 = fmul float %585, %585
%633 = fsub float %632, %631
%634 = fmul float %40, %633
%635 = fadd float %634, %616
%636 = fmul float %41, %633
%637 = fadd float %636, %623
%638 = fmul float %42, %633
%639 = fadd float %638, %630
%640 = fadd float %635, %593
%641 = fadd float %637, %599
%642 = fadd float %639, %605
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %159, float %160, float %156, float %449)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %562, float %563, float %555, float %557)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %555, float %141, float %142, float %143)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %640, float %641, float %642, float %177)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %551, float %553, float %555, float %557)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.cos.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.sin.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v22, 0x3eff7cee ; 7E2C02FF 3EFF7CEE
v_mov_b32_e32 v10, 0x3e22f983 ; 7E1402FF 3E22F983
v_mov_b32_e32 v23, 0x80000000 ; 7E2E02FF 80000000
v_mov_b32_e32 v11, 0x3fc00000 ; 7E1602FF 3FC00000
v_mov_b32_e32 v20, 0x42c80000 ; 7E2802FF 42C80000
v_mov_b32_e32 v8, 0xbc75c28f ; 7E1002FF BC75C28F
v_mov_b32_e32 v9, 0x3d8f5c29 ; 7E1202FF 3D8F5C29
v_mov_b32_e32 v36, 0x41000000 ; 7E4802FF 41000000
v_add_i32_e32 v21, s10, v0 ; 4A2A000A
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904
s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908
s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_load_dwordx4 s[72:75], s[4:5], 0x0 ; C0A40500
s_load_dwordx4 s[68:71], s[4:5], 0x4 ; C0A20504
s_load_dwordx4 s[56:59], s[4:5], 0x8 ; C09C0508
s_load_dwordx8 s[76:83], s[6:7], 0x0 ; C0E60700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s47, s[12:15], 0x67 ; C2178D67
buffer_load_format_xyzw v[16:19], v21, s[0:3], 0 idxen ; E00C2000 80001015
buffer_load_format_xyzw v[4:7], v21, s[16:19], 0 idxen ; E00C2000 80040415
buffer_load_format_xyzw v[0:3], v21, s[20:23], 0 idxen ; E00C2000 80050015
s_buffer_load_dword s46, s[12:15], 0x68 ; C2170D68
s_buffer_load_dword s48, s[12:15], 0x69 ; C2180D69
s_buffer_load_dword s20, s[12:15], 0x6a ; C20A0D6A
s_buffer_load_dword s49, s[12:15], 0x6b ; C2188D6B
s_buffer_load_dword s3, s[12:15], 0x0 ; C2018D00
s_buffer_load_dword s4, s[12:15], 0x1 ; C2020D01
s_buffer_load_dword s2, s[12:15], 0x4 ; C2010D04
s_buffer_load_dword s0, s[12:15], 0x8 ; C2000D08
s_buffer_load_dword s1, s[12:15], 0x9 ; C2008D09
buffer_load_format_xyzw v[12:15], v21, s[24:27], 0 idxen ; E00C2000 80060C15
s_buffer_load_dword s5, s[12:15], 0x5d ; C2028D5D
s_buffer_load_dword s16, s[12:15], 0x5e ; C2080D5E
s_buffer_load_dword s17, s[12:15], 0x5f ; C2088D5F
s_buffer_load_dword s18, s[12:15], 0x60 ; C2090D60
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_add_f32_e32 v3, s3, v20 ; 06062803
s_buffer_load_dword s3, s[12:15], 0x61 ; C2018D61
buffer_load_format_xyzw v[37:40], v21, s[8:11], 0 idxen ; E00C2000 80022515
s_buffer_load_dword s8, s[12:15], 0x62 ; C2040D62
s_buffer_load_dword s9, s[12:15], 0x63 ; C2048D63
s_buffer_load_dword s51, s[12:15], 0x64 ; C2198D64
s_buffer_load_dword s55, s[12:15], 0x65 ; C21B8D65
s_buffer_load_dword s53, s[12:15], 0x66 ; C21A8D66
s_buffer_load_dword s50, s[12:15], 0x58 ; C2190D58
s_waitcnt vmcnt(1) ; BF8C0771
v_mov_b32_e32 v7, s18 ; 7E0E0212
v_mul_f32_e32 v20, s4, v7 ; 10280E04
s_buffer_load_dword s52, s[12:15], 0x59 ; C21A0D59
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v7, s3 ; 7E0E0203
v_mul_f32_e32 v21, s4, v7 ; 102A0E04
s_buffer_load_dword s54, s[12:15], 0x5a ; C21B0D5A
s_waitcnt vmcnt(0) ; BF8C0770
v_mov_b32_e32 v7, s8 ; 7E0E0208
v_mul_f32_e32 v19, s4, v7 ; 10260E04
v_mov_b32_e32 v7, s9 ; 7E0E0209
v_mul_f32_e32 v14, s4, v7 ; 101C0E04
s_buffer_load_dword s30, s[12:15], 0x5b ; C20F0D5B
s_buffer_load_dword s4, s[12:15], 0x5c ; C2020D5C
s_buffer_load_dword s8, s[12:15], 0x51 ; C2040D51
s_buffer_load_dword s9, s[12:15], 0x54 ; C2048D54
s_buffer_load_dword s10, s[12:15], 0x55 ; C2050D55
s_buffer_load_dword s11, s[12:15], 0x56 ; C2058D56
s_buffer_load_dword s18, s[12:15], 0x57 ; C2090D57
s_buffer_load_dword s3, s[12:15], 0x33 ; C2018D33
s_buffer_load_dword s29, s[12:15], 0x36 ; C20E8D36
s_buffer_load_dword s28, s[12:15], 0x3a ; C20E0D3A
s_buffer_load_dword s27, s[12:15], 0x3e ; C20D8D3E
s_buffer_load_dword s92, s[12:15], 0x50 ; C22E0D50
s_waitcnt lgkmcnt(0) ; BF8C007F
v_rcp_f32_e32 v38, s8 ; 7E4C5408
v_mov_b32_e32 v7, s9 ; 7E0E0209
v_mov_b32_e32 v15, s9 ; 7E1E0209
v_mul_f32_e32 v15, s4, v15 ; 101E1E04
v_mul_f32_e32 v7, s50, v7 ; 100E0E32
v_mov_b32_e32 v18, s10 ; 7E24020A
v_mov_b32_e32 v24, s10 ; 7E30020A
v_mul_f32_e32 v24, s5, v24 ; 10303005
v_mul_f32_e32 v18, s52, v18 ; 10242434
v_mul_f32_e32 v39, s47, v24 ; 104E302F
v_mul_f32_e32 v40, s53, v24 ; 10503035
v_mul_f32_e32 v41, s55, v15 ; 10521E37
v_mul_f32_e32 v42, s51, v15 ; 10541E33
v_mov_b32_e32 v15, s11 ; 7E1E020B
v_mov_b32_e32 v24, s11 ; 7E30020B
v_mul_f32_e32 v24, s16, v24 ; 10303010
v_mul_f32_e32 v25, s54, v15 ; 10321E36
v_mul_f32_e32 v43, s48, v24 ; 10563030
v_mul_f32_e32 v44, s46, v24 ; 1058302E
v_mov_b32_e32 v15, s18 ; 7E1E0212
v_mov_b32_e32 v24, s18 ; 7E300212
v_mul_f32_e32 v24, s17, v24 ; 10303011
v_mul_f32_e32 v26, s30, v15 ; 10341E1E
v_mul_f32_e32 v45, s49, v24 ; 105A3031
v_mul_f32_e32 v46, s20, v24 ; 105C3014
v_mov_b32_e32 v15, s27 ; 7E1E021B
v_mad_f32 v15, -s28, v23, -v15 ; D282000F A43E2E1C
v_mul_f32_e64 v24, 0, s27 ; D2100018 00003680
v_mac_f32_e32 v24, s29, v23 ; 3E302E1D
v_mul_f32_e32 v27, v15, v15 ; 10361F0F
v_mac_f32_e32 v27, v24, v24 ; 3E363118
v_mov_b32_e32 v28, s29 ; 7E38021D
v_mad_f32 v23, s28, v23, v28 ; D2820017 04722E1C
v_mac_f32_e32 v27, v23, v23 ; 3E362F17
v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B
v_mul_f32_e32 v35, s53, v18 ; 10462435
v_mul_f32_e32 v34, s47, v18 ; 1044242F
v_mul_f32_e32 v33, s51, v7 ; 10420E33
v_mul_f32_e32 v32, s55, v7 ; 10400E37
v_mul_f32_e32 v7, v27, v23 ; 100E2F1B
v_mul_f32_e32 v15, v27, v15 ; 101E1F1B
v_mul_f32_e32 v18, v27, v24 ; 1024311B
v_mul_f32_e32 v30, s46, v25 ; 103C322E
v_mul_f32_e32 v31, s48, v25 ; 103E3230
v_rcp_f32_e32 v23, s92 ; 7E2E545C
v_mul_f32_e32 v29, s20, v26 ; 103A3414
v_mul_f32_e32 v28, s49, v26 ; 10383431
v_mul_f32_e32 v24, s28, v7 ; 10300E1C
v_mad_f32 v24, v18, s27, -v24 ; D2820018 84603712
v_mul_f32_e32 v25, s27, v15 ; 10321E1B
v_mad_f32 v25, v7, s29, -v25 ; D2820019 84643B07
v_mul_f32_e32 v26, s29, v18 ; 1034241D
v_mad_f32 v27, v15, s28, -v26 ; D282001B 8468390F
v_mul_f32_e32 v26, v24, v24 ; 10343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_mac_f32_e32 v26, v27, v27 ; 3E34371B
v_rsq_clamp_f32_e32 v25, v26 ; 7E32591A
s_buffer_load_dword s93, s[12:15], 0x6c ; C22E8D6C
s_buffer_load_dword s94, s[12:15], 0x6d ; C22F0D6D
s_buffer_load_dword s95, s[12:15], 0x6e ; C22F8D6E
v_mul_f32_e32 v26, v25, v24 ; 10343119
v_mul_f32_e32 v27, v25, v27 ; 10363719
s_buffer_load_dword s96, s[12:15], 0x6f ; C2300D6F
s_buffer_load_dword s16, s[12:15], 0x70 ; C2080D70
s_buffer_load_dword s9, s[12:15], 0x24 ; C2048D24
s_buffer_load_dword s5, s[12:15], 0x25 ; C2028D25
s_buffer_load_dword s4, s[12:15], 0x26 ; C2020D26
s_buffer_load_dword s24, s[12:15], 0x27 ; C20C0D27
s_buffer_load_dword s17, s[12:15], 0x28 ; C2088D28
s_waitcnt lgkmcnt(0) ; BF8C007F
v_rcp_f32_e32 v47, s95 ; 7E5E545F
s_buffer_load_dword s10, s[12:15], 0x29 ; C2050D29
s_buffer_load_dword s8, s[12:15], 0x2a ; C2040D2A
v_rcp_f32_e32 v48, s96 ; 7E605460
s_buffer_load_dword s26, s[12:15], 0x2b ; C20D0D2B
s_buffer_load_dword s19, s[12:15], 0x2c ; C2098D2C
s_buffer_load_dword s18, s[12:15], 0x2d ; C2090D2D
s_buffer_load_dword s11, s[12:15], 0x2e ; C2058D2E
s_buffer_load_dword s25, s[12:15], 0x2f ; C20C8D2F
s_buffer_load_dword s21, s[12:15], 0x30 ; C20A8D30
s_buffer_load_dword s22, s[12:15], 0x31 ; C20B0D31
s_buffer_load_dword s23, s[12:15], 0x32 ; C20B8D32
s_buffer_load_dword s31, s[12:15], 0x71 ; C20F8D71
s_buffer_load_dword s32, s[12:15], 0x72 ; C2100D72
s_buffer_load_dword s33, s[12:15], 0x73 ; C2108D73
s_buffer_load_dword s34, s[12:15], 0x74 ; C2110D74
s_buffer_load_dword s35, s[12:15], 0x75 ; C2118D75
s_buffer_load_dword s36, s[12:15], 0x76 ; C2120D76
s_buffer_load_dword s37, s[12:15], 0x77 ; C2128D77
s_buffer_load_dword s38, s[12:15], 0x78 ; C2130D78
s_buffer_load_dword s39, s[12:15], 0x79 ; C2138D79
s_buffer_load_dword s40, s[12:15], 0x7a ; C2140D7A
s_buffer_load_dword s41, s[12:15], 0x7b ; C2148D7B
s_buffer_load_dword s42, s[12:15], 0x7c ; C2150D7C
s_buffer_load_dword s43, s[12:15], 0x7d ; C2158D7D
s_buffer_load_dword s44, s[12:15], 0x7e ; C2160D7E
s_buffer_load_dword s45, s[12:15], 0x7f ; C2168D7F
s_load_dwordx8 s[84:91], s[6:7], 0x8 ; C0EA0708
s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710
v_add_f32_e32 v49, 1.0, v16 ; 066220F2
v_subrev_f32_e32 v50, s93, v4 ; 0A64085D
v_mac_f32_e64 v50, 0.5, s95 ; D23E0032 0000BEF0
v_subrev_f32_e32 v51, s94, v6 ; 0A660C5E
v_mac_f32_e64 v51, 0.5, s96 ; D23E0033 0000C0F0
v_mul_f32_e32 v52, s55, v6 ; 10680C37
v_mac_f32_e32 v52, s51, v4 ; 3E680833
v_mul_f32_e32 v24, s92, v37 ; 10304A5C
v_floor_f32_e32 v37, v24 ; 7E4A4918
v_add_f32_e32 v24, v22, v37 ; 06304B16
v_mul_f32_e32 v53, v23, v24 ; 106A3117
v_mul_f32_e32 v54, v22, v38 ; 106C4D16
v_mov_b32_e32 v55, 0 ; 7E6E0280
image_sample_l v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[76:83], s[72:75] ; F0900F00 02531635
v_mul_f32_e32 v54, 0x3fbfdf3b, v38 ; 106C4CFF 3FBFDF3B
image_sample_l v[56:57], 3, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[76:83], s[72:75] ; F0900300 02533835
v_mov_b32_e32 v53, v4 ; 7E6A0304
v_mov_b32_e32 v54, v6 ; 7E6C0306
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
image_sample_l v[58:59], 3, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[84:91], s[68:71] ; F0900300 02353A35
v_mul_f32_e32 v53, v47, v50 ; 106A652F
v_mul_f32_e32 v54, v48, v51 ; 106C6730
image_sample_l v[47:48], 3, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[60:67], s[56:59] ; F0900300 01CF2F35
v_mul_f32_e32 v38, s47, v6 ; 104C0C2F
v_mac_f32_e32 v38, s53, v4 ; 3E4C0835
v_mad_f32 v50, s50, v52, v20 ; D2820032 04526832
v_mad_f32 v38, s52, v38, v21 ; D2820026 04564C34
v_mul_f32_e32 v50, v10, v50 ; 1064650A
v_mul_f32_e32 v38, v10, v38 ; 104C4D0A
v_fract_f32_e32 v50, v50 ; 7E644132
v_fract_f32_e32 v38, v38 ; 7E4C4126
v_cos_f32_e32 v50, v50 ; 7E646D32
v_cos_f32_e32 v38, v38 ; 7E4C6D26
v_mul_f32_e32 v39, v39, v38 ; 104E4D27
v_mac_f32_e32 v39, v41, v50 ; 3E4E6529
v_mul_f32_e32 v41, s48, v6 ; 10520C30
v_mac_f32_e32 v41, s46, v4 ; 3E52082E
v_mad_f32 v41, s54, v41, v19 ; D2820029 044E5236
v_mul_f32_e32 v41, v10, v41 ; 1052530A
v_fract_f32_e32 v41, v41 ; 7E524129
v_cos_f32_e32 v41, v41 ; 7E526D29
v_mac_f32_e32 v39, v43, v41 ; 3E4E532B
v_mul_f32_e32 v43, s49, v6 ; 10560C31
v_mac_f32_e32 v43, s20, v4 ; 3E560814
v_mad_f32 v43, s30, v43, v14 ; D282002B 043A561E
v_mul_f32_e32 v43, v10, v43 ; 1056570A
v_fract_f32_e32 v43, v43 ; 7E56412B
v_cos_f32_e32 v43, v43 ; 7E566D2B
v_mac_f32_e32 v39, v45, v43 ; 3E4E572D
v_add_f32_e32 v39, v39, v6 ; 064E0D27
v_mul_f32_e32 v45, s55, v39 ; 105A4E37
v_mul_f32_e32 v51, s47, v39 ; 10664E2F
v_mul_f32_e32 v52, s48, v39 ; 10684E30
v_mul_f32_e32 v39, s49, v39 ; 104E4E31
v_mul_f32_e32 v38, v40, v38 ; 104C4D28
v_mac_f32_e32 v38, v42, v50 ; 3E4C652A
v_mac_f32_e32 v38, v44, v41 ; 3E4C532C
v_mac_f32_e32 v38, v46, v43 ; 3E4C572E
v_add_f32_e32 v38, v38, v4 ; 064C0926
v_mac_f32_e32 v45, s51, v38 ; 3E5A4C33
v_mac_f32_e32 v51, s53, v38 ; 3E664C35
v_mac_f32_e32 v52, s46, v38 ; 3E684C2E
v_mac_f32_e32 v20, s50, v45 ; 3E285A32
v_mac_f32_e32 v21, s52, v51 ; 3E2A6634
v_mac_f32_e32 v19, s54, v52 ; 3E266836
v_mul_f32_e32 v40, 0.5, v49 ; 105062F0
v_add_f32_e32 v41, 1.0, v17 ; 065222F2
v_mul_f32_e32 v41, 0.5, v41 ; 105252F0
v_mad_f32 v36, v36, v57, -4.0 ; D2820024 03DE7324
exp 15, 32, 0, 0, 0, v40, v41, v37, v36 ; F800020F 24252928
v_add_f32_e32 v22, v22, v22 ; 062C2D16
s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700
v_sub_f32_e32 v36, 1.0, v58 ; 084874F2
v_mul_f32_e32 v22, v22, v36 ; 102C4916
v_add_f32_e32 v24, v24, v24 ; 06303118
v_mac_f32_e32 v22, v24, v58 ; 3E2C7518
v_mul_f32_e32 v16, v22, v16 ; 10202116
v_add_f32_e32 v17, 0x3f4ccccd, v17 ; 062222FF 3F4CCCCD
v_mul_f32_e32 v21, v10, v21 ; 102A2B0A
v_fract_f32_e32 v21, v21 ; 7E2A4115
v_cos_f32_e32 v21, v21 ; 7E2A6D15
v_mul_f32_e32 v22, v35, v21 ; 102C2B23
v_mul_f32_e32 v21, v34, v21 ; 102A2B22
v_mul_f32_e32 v20, v10, v20 ; 1028290A
v_fract_f32_e32 v20, v20 ; 7E284114
v_cos_f32_e32 v20, v20 ; 7E286D14
v_mac_f32_e32 v22, v33, v20 ; 3E2C2921
v_mac_f32_e32 v21, v32, v20 ; 3E2A2920
v_mul_f32_e32 v19, v10, v19 ; 1026270A
v_fract_f32_e32 v19, v19 ; 7E264113
v_cos_f32_e32 v19, v19 ; 7E266D13
v_mac_f32_e32 v22, v30, v19 ; 3E2C271E
v_mac_f32_e32 v21, v31, v19 ; 3E2A271F
v_mac_f32_e32 v39, s20, v38 ; 3E4E4C14
v_mac_f32_e32 v14, s30, v39 ; 3E1C4E1E
v_mul_f32_e32 v14, v10, v14 ; 101C1D0A
v_fract_f32_e32 v14, v14 ; 7E1C410E
v_cos_f32_e32 v14, v14 ; 7E1C6D0E
v_mac_f32_e32 v22, v29, v14 ; 3E2C1D1D
v_mac_f32_e32 v21, v28, v14 ; 3E2A1D1C
v_mul_f32_e32 v14, 0.5, v56 ; 101C70F0
v_mul_f32_e32 v19, v12, v12 ; 1026190C
v_mac_f32_e32 v19, v13, v13 ; 3E261B0D
v_sqrt_f32_e32 v19, v19 ; 7E266713
v_mad_f32 v20, v22, v22, 4.0 ; D2820014 03DA2D16
v_mac_f32_e32 v20, v21, v21 ; 3E282B15
v_rsq_clamp_f32_e32 v20, v20 ; 7E285914
v_mul_f32_e32 v24, s29, v12 ; 1030181D
v_sub_f32_e32 v28, 1.0, v19 ; 083826F2
v_mad_f32 v24, v28, -s28, -v24 ; D2820018 C460391C
v_mad_f32 v24, -v13, s27, v24 ; D2820018 2460370D
v_mul_f32_e32 v22, v20, v22 ; 102C2D14
v_mul_f32_e32 v29, -2.0, v22 ; 103A2CF5
v_mul_f32_e32 v29, v14, v29 ; 103A3B0E
v_mul_f32_e32 v29, 0x3e4ccccd, v29 ; 103A3AFF 3E4CCCCD
v_mac_f32_e32 v29, v11, v13 ; 3E3A1B0B
v_mul_f32_e32 v11, v11, v12 ; 1016190B
v_mad_f32 v24, 0.5, |v24|, 0.5 ; D2820218 03C230F0
v_sub_f32_e32 v30, 1.0, v24 ; 083C30F2
v_mul_f32_e32 v12, v12, v30 ; 10183D0C
v_mul_f32_e32 v13, v13, v30 ; 101A3D0D
v_add_f32_e32 v23, v23, v23 ; 062E2F17
v_sub_f32_e32 v31, 1.0, v59 ; 083E76F2
v_mul_f32_e32 v23, v23, v31 ; 102E3F17
v_add_f32_e32 v31, v25, v25 ; 063E3319
v_mac_f32_e32 v23, v31, v59 ; 3E2E771F
v_mac_f32_e32 v12, v26, v24 ; 3E18311A
v_mac_f32_e32 v13, v27, v24 ; 3E1A311B
v_mul_f32_e32 v26, v28, v30 ; 10343D1C
v_mac_f32_e32 v26, 1.0, v24 ; 3E3430F2
v_mul_f32_e32 v24, v12, v12 ; 1030190C
v_mac_f32_e32 v24, v26, v26 ; 3E30351A
v_mac_f32_e32 v24, v13, v13 ; 3E301B0D
v_rsq_clamp_f32_e32 v24, v24 ; 7E305918
v_mad_f32 v4, v15, v16, v4 ; D2820004 0412210F
v_mad_f32 v5, v18, v16, v5 ; D2820005 04162112
v_mac_f32_e32 v6, v7, v16 ; 3E0C2107
v_mul_f32_e32 v7, v24, v12 ; 100E1918
v_mul_f32_e32 v12, v24, v26 ; 10183518
v_mul_f32_e32 v13, v24, v13 ; 101A1B18
v_mul_f32_e32 v15, v23, v17 ; 101E2317
v_mac_f32_e32 v4, v7, v15 ; 3E081F07
v_mac_f32_e32 v5, v12, v15 ; 3E0A1F0C
v_mac_f32_e32 v6, v13, v15 ; 3E0C1F0D
v_add_f32_e32 v7, 1.0, v19 ; 060E26F2
v_mul_f32_e32 v3, v3, v7 ; 10060F03
v_mul_f32_e32 v3, 0x43340000, v3 ; 100606FF 43340000
v_mul_f32_e32 v3, v10, v3 ; 1006070A
v_add_f32_e32 v7, -0.5, v47 ; 060E5EF1
v_add_f32_e32 v10, -0.5, v48 ; 061460F1
v_mul_f32_e32 v12, 0, v14 ; 10181C80
v_mul_f32_e32 v12, v23, v12 ; 10181917
v_mul_f32_e32 v13, v17, v12 ; 101A1911
v_cmp_lt_f32_e32 vcc, 0, v13 ; 7C021A80
v_cndmask_b32_e64 v15, v13, 1.0, vcc ; D200000F 01A9E50D
v_cmp_le_f32_e32 vcc, 0, v15 ; 7C061E80
v_cndmask_b32_e32 v15, -1.0, v15 ; 001E1EF3
v_mad_f32 v13, |v13|, v9, v8 ; D282010D 0422130D
v_max_f32_e32 v13, 0, v13 ; 201A1A80
v_mul_f32_e32 v13, v13, v15 ; 101A1F0D
v_mac_f32_e32 v5, v17, v12 ; 3E0A1911
v_fract_f32_e32 v3, v3 ; 7E064103
v_sin_f32_e32 v3, v3 ; 7E066B03
v_mul_f32_e32 v3, 0x3f333333, v3 ; 100606FF 3F333333
v_mac_f32_e32 v5, v3, v13 ; 3E0A1B03
v_mul_f32_e32 v12, v20, v21 ; 10182B14
v_mac_f32_e32 v7, -2.0, v22 ; 3E0E2CF5
v_mac_f32_e32 v10, -2.0, v12 ; 3E1418F5
v_mul_f32_e32 v7, v14, v7 ; 100E0F0E
v_mul_f32_e32 v10, v14, v10 ; 1014150E
v_mul_f32_e32 v7, v23, v7 ; 100E0F17
v_mul_f32_e32 v10, v23, v10 ; 10141517
v_mac_f32_e32 v4, v17, v7 ; 3E080F11
v_mul_f32_e32 v7, v17, v7 ; 100E0F11
v_mac_f32_e32 v6, v17, v10 ; 3E0C1511
v_mul_f32_e32 v10, v17, v10 ; 10141511
v_mad_f32 v12, |v7|, v9, v8 ; D282010C 04221307
v_mad_f32 v8, |v10|, v9, v8 ; D2820108 0422130A
v_cmp_lt_f32_e32 vcc, 0, v7 ; 7C020E80
v_cndmask_b32_e64 v7, v7, 1.0, vcc ; D2000007 01A9E507
v_cmp_lt_f32_e32 vcc, 0, v10 ; 7C021480
v_cndmask_b32_e64 v9, v10, 1.0, vcc ; D2000009 01A9E50A
v_cmp_le_f32_e32 vcc, 0, v7 ; 7C060E80
v_cndmask_b32_e32 v7, -1.0, v7 ; 000E0EF3
v_cmp_le_f32_e32 vcc, 0, v9 ; 7C061280
v_cndmask_b32_e32 v9, -1.0, v9 ; 001212F3
v_max_f32_e32 v10, 0, v12 ; 20141880
v_mul_f32_e32 v7, v10, v7 ; 100E0F0A
v_max_f32_e32 v8, 0, v8 ; 20101080
v_mul_f32_e32 v8, v8, v9 ; 10101308
v_mac_f32_e32 v4, v3, v7 ; 3E080F03
v_mac_f32_e32 v6, v3, v8 ; 3E0C1103
v_mul_f32_e32 v7, s24, v4 ; 100E0818
v_mac_f32_e32 v7, s26, v5 ; 3E0E0A1A
v_mul_f32_e32 v8, s9, v4 ; 10100809
v_mul_f32_e32 v9, s5, v4 ; 10120805
v_mul_f32_e32 v4, s4, v4 ; 10080804
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s10, v5 ; 3E120A0A
v_mac_f32_e32 v4, s8, v5 ; 3E080A08
v_mac_f32_e32 v7, s25, v6 ; 3E0E0C19
v_mac_f32_e32 v8, s19, v6 ; 3E100C13
v_mac_f32_e32 v9, s18, v6 ; 3E120C12
v_mac_f32_e32 v4, s11, v6 ; 3E080C0B
v_mad_f32 v3, 0, v3, 0 ; D2820003 02020680
v_add_f32_e32 v3, 1.0, v3 ; 060606F2
v_mac_f32_e32 v8, s21, v3 ; 3E100615
v_mac_f32_e32 v9, s22, v3 ; 3E120616
v_mac_f32_e32 v4, s23, v3 ; 3E080617
v_mac_f32_e32 v7, s3, v3 ; 3E0E0603
v_mul_f32_e32 v3, s16, v8 ; 10061010
v_mul_f32_e32 v5, s31, v8 ; 100A101F
v_mul_f32_e32 v6, s32, v8 ; 100C1020
v_mul_f32_e32 v8, s33, v8 ; 10101021
v_mac_f32_e32 v3, s34, v9 ; 3E061222
v_mac_f32_e32 v5, s35, v9 ; 3E0A1223
v_mac_f32_e32 v6, s36, v9 ; 3E0C1224
v_mac_f32_e32 v8, s37, v9 ; 3E101225
v_mac_f32_e32 v3, s38, v4 ; 3E060826
v_mac_f32_e32 v5, s39, v4 ; 3E0A0827
v_mac_f32_e32 v6, s40, v4 ; 3E0C0828
v_mac_f32_e32 v8, s41, v4 ; 3E100829
v_mac_f32_e32 v3, s42, v7 ; 3E060E2A
v_mac_f32_e32 v5, s43, v7 ; 3E0A0E2B
v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C
v_mac_f32_e32 v8, s45, v7 ; 3E100E2D
v_mul_f32_e32 v4, v11, v11 ; 1008170B
v_mac_f32_e32 v4, v28, v28 ; 3E08391C
v_mac_f32_e32 v4, v29, v29 ; 3E083B1D
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
v_mul_f32_e32 v7, 0.5, v5 ; 100E0AF0
v_mul_f32_e32 v9, 0.5, v8 ; 101210F0
v_mad_f32 v10, 0.5, v3, v9 ; D282000A 042606F0
v_mac_f32_e32 v9, s2, v7 ; 3E120E02
exp 15, 33, 0, 0, 0, v10, v9, v6, v8 ; F800021F 0806090A
exp 15, 34, 0, 0, 0, v6, v0, v1, v2 ; F800022F 02010006
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v4, v11 ; 10001704
v_mul_f32_e32 v1, s9, v0 ; 10020009
v_mul_f32_e32 v2, s17, v0 ; 10040011
v_mul_f32_e32 v0, s19, v0 ; 10000013
v_mul_f32_e32 v7, v4, v28 ; 100E3904
v_mac_f32_e32 v1, s5, v7 ; 3E020E05
v_mac_f32_e32 v2, s10, v7 ; 3E040E0A
v_mac_f32_e32 v0, s18, v7 ; 3E000E12
v_mul_f32_e32 v4, v4, v29 ; 10083B04
v_mac_f32_e32 v1, s4, v4 ; 3E020804
v_mac_f32_e32 v2, s8, v4 ; 3E040808
v_mac_f32_e32 v0, s11, v4 ; 3E00080B
v_mul_f32_e32 v4, v1, v1 ; 10080301
v_mac_f32_e32 v4, v2, v2 ; 3E080502
v_mac_f32_e32 v4, v0, v0 ; 3E080100
v_rsq_clamp_f32_e32 v4, v4 ; 7E085904
s_buffer_load_dword s2, s[12:15], 0xa ; C2010D0A
s_buffer_load_dword s3, s[12:15], 0xb ; C2018D0B
s_buffer_load_dword s4, s[12:15], 0xc ; C2020D0C
s_buffer_load_dword s5, s[12:15], 0xd ; C2028D0D
s_buffer_load_dword s6, s[12:15], 0xe ; C2030D0E
s_buffer_load_dword s7, s[12:15], 0xf ; C2038D0F
s_buffer_load_dword s8, s[12:15], 0x10 ; C2040D10
s_buffer_load_dword s9, s[12:15], 0x11 ; C2048D11
s_buffer_load_dword s10, s[12:15], 0x12 ; C2050D12
s_buffer_load_dword s11, s[12:15], 0x13 ; C2058D13
s_buffer_load_dword s16, s[12:15], 0x14 ; C2080D14
s_buffer_load_dword s17, s[12:15], 0x15 ; C2088D15
s_buffer_load_dword s18, s[12:15], 0x16 ; C2090D16
s_buffer_load_dword s19, s[12:15], 0x17 ; C2098D17
s_buffer_load_dword s20, s[12:15], 0x18 ; C20A0D18
s_buffer_load_dword s21, s[12:15], 0x19 ; C20A8D19
s_buffer_load_dword s22, s[12:15], 0x1a ; C20B0D1A
s_buffer_load_dword s23, s[12:15], 0x1b ; C20B8D1B
s_buffer_load_dword s24, s[12:15], 0x1c ; C20C0D1C
s_buffer_load_dword s25, s[12:15], 0x1d ; C20C8D1D
s_buffer_load_dword s26, s[12:15], 0x1e ; C20D0D1E
s_buffer_load_dword s27, s[12:15], 0x1f ; C20D8D1F
s_buffer_load_dword s28, s[12:15], 0x20 ; C20E0D20
s_buffer_load_dword s29, s[12:15], 0x21 ; C20E8D21
s_buffer_load_dword s12, s[12:15], 0x22 ; C2060D22
v_mul_f32_e32 v1, v4, v1 ; 10020304
v_mul_f32_e32 v2, v4, v2 ; 10040504
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_mul_f32_e32 v4, v0, v2 ; 10080500
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v7, s17, v4 ; 100E0811
v_mul_f32_e32 v9, s21, v4 ; 10120815
v_mul_f32_e32 v4, s25, v4 ; 10080819
v_mul_f32_e32 v10, v2, v1 ; 10140302
v_mac_f32_e32 v7, s16, v10 ; 3E0E1410
v_mac_f32_e32 v9, s20, v10 ; 3E121414
v_mac_f32_e32 v4, s24, v10 ; 3E081418
v_mul_f32_e32 v10, v0, v0 ; 10140100
v_mac_f32_e32 v7, s18, v10 ; 3E0E1412
v_mac_f32_e32 v9, s22, v10 ; 3E121416
v_mac_f32_e32 v4, s26, v10 ; 3E08141A
v_mul_f32_e32 v10, s1, v2 ; 10140401
v_mac_f32_e32 v10, s0, v1 ; 3E140200
v_mul_f32_e32 v11, s5, v2 ; 10160405
v_mac_f32_e32 v11, s4, v1 ; 3E160204
v_mul_f32_e32 v12, s9, v2 ; 10180409
v_mac_f32_e32 v12, s8, v1 ; 3E180208
v_mac_f32_e32 v10, s2, v0 ; 3E140002
v_mac_f32_e32 v11, s6, v0 ; 3E160006
v_mac_f32_e32 v12, s10, v0 ; 3E18000A
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_mac_f32_e32 v7, s19, v0 ; 3E0E0013
v_mac_f32_e32 v9, s23, v0 ; 3E120017
v_mac_f32_e32 v4, s27, v0 ; 3E08001B
v_mul_f32_e32 v0, v2, v2 ; 10000502
v_mad_f32 v0, v1, v1, -v0 ; D2820000 84020301
v_mac_f32_e32 v7, s28, v0 ; 3E0E001C
v_mac_f32_e32 v9, s29, v0 ; 3E12001D
v_mac_f32_e32 v4, s12, v0 ; 3E08000C
v_add_f32_e32 v0, s3, v10 ; 06001403
v_add_f32_e32 v0, v0, v7 ; 06000F00
v_add_f32_e32 v1, s7, v11 ; 06021607
v_add_f32_e32 v1, v1, v9 ; 06021301
v_add_f32_e32 v2, s11, v12 ; 0604180B
v_add_f32_e32 v2, v2, v4 ; 06040902
exp 15, 35, 0, 0, 0, v0, v1, v2, v25 ; F800023F 19020100
exp 15, 12, 0, 1, 0, v3, v5, v6, v8 ; F80008CF 08060503
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 60
Code Size: 2196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..1]
DCL CONST[3..4]
DCL CONST[6..7]
DCL TEMP[0..5], LOCAL
IMM[0] FLT32 { 0.5000, -1.0000, 0.0000, 5.0000}
IMM[1] FLT32 { 1.0000, 2.0000, 0.8800, 1.3000}
0: ADD TEMP[0].x, IN[0].zzzz, IMM[0].xxxx
1: FLR TEMP[0].x, TEMP[0].xxxx
2: MUL TEMP[0].x, TEMP[0].xxxx, CONST[3].xxxx
3: FLR TEMP[1].x, TEMP[0].xxxx
4: MUL TEMP[1].x, TEMP[1].xxxx, CONST[3].yyyy
5: FRC TEMP[0].x, TEMP[0].xxxx
6: FRC TEMP[1].x, TEMP[1].xxxx
7: MOV TEMP[0].y, TEMP[1].xxxx
8: DDX TEMP[1].xy, IN[0].xyyy
9: MUL TEMP[2], CONST[7].xxxx, IN[0].xyyy
10: DDY TEMP[2].xy, TEMP[2]
11: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy
12: SQRT TEMP[1].x, TEMP[1].xxxx
13: DP2 TEMP[2].x, TEMP[2].xyyy, TEMP[2].xyyy
14: SQRT TEMP[2].x, TEMP[2].xxxx
15: MOV TEMP[1].y, TEMP[2].xxxx
16: DP2 TEMP[1].x, TEMP[1].xyyy, TEMP[1].xyyy
17: SQRT TEMP[1].x, TEMP[1].xxxx
18: MUL TEMP[1].x, TEMP[1].xxxx, CONST[3].wwww
19: LG2 TEMP[1].x, TEMP[1].xxxx
20: ADD TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy
21: ADD TEMP[1].x, TEMP[1].xxxx, IN[0].wwww
22: MAX TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz
23: MIN TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww
24: CEIL TEMP[2].x, TEMP[1].xxxx
25: EX2 TEMP[2].x, TEMP[2].xxxx
26: MUL TEMP[2].x, TEMP[2].xxxx, CONST[3].zzzz
27: RCP TEMP[3].x, CONST[3].wwww
28: MUL TEMP[3].x, IMM[0].xxxx, TEMP[3].xxxx
29: FRC TEMP[4].xy, IN[0].xyyy
30: MUL TEMP[5].x, TEMP[2].xxxx, IMM[1].yyyy
31: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx
32: ADD TEMP[5].x, IMM[1].xxxx, -TEMP[5].xxxx
33: MUL TEMP[2].x, TEMP[3].xxxx, TEMP[2].xxxx
34: MAD TEMP[2].xy, TEMP[4].xyyy, TEMP[5].xxxx, TEMP[2].xxxx
35: MAD TEMP[0].xy, TEMP[2].xyyy, CONST[3].xyyy, TEMP[0].xyyy
36: MOV TEMP[0].xy, TEMP[0].xyyy
37: MOV TEMP[0].w, TEMP[1].xxxx
38: TXL TEMP[0], TEMP[0], SAMP[0], 2D
39: FSLT TEMP[1].x, TEMP[0].wwww, CONST[6].xxxx
40: AND TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx
41: KILL_IF -TEMP[1].xxxx
42: MOV TEMP[1].xy, IN[1].xyyy
43: MOV TEMP[1].w, IN[1].wwww
44: TXP TEMP[1], TEMP[1], SAMP[1], 2D
45: LG2 TEMP[2].x, TEMP[1].xxxx
46: LG2 TEMP[2].y, TEMP[1].yyyy
47: LG2 TEMP[2].z, TEMP[1].zzzz
48: LG2 TEMP[2].w, TEMP[1].wwww
49: MOV TEMP[1].xyz, -TEMP[2]
50: ADD TEMP[1].xyz, TEMP[1].xyzz, IN[3].xyzz
51: MUL TEMP[2].xyz, TEMP[0].xyzz, IN[2].yzww
52: MUL TEMP[1].xyz, TEMP[1].xyzz, IMM[1].zzzz
53: POW TEMP[3].x, TEMP[1].xxxx, IMM[1].wwww
54: POW TEMP[3].y, TEMP[1].yyyy, IMM[1].wwww
55: POW TEMP[3].z, TEMP[1].zzzz, IMM[1].wwww
56: MAD TEMP[1].xyz, TEMP[3].xyzz, CONST[4].yyyy, CONST[4].xxxx
57: MUL TEMP[1].xyz, TEMP[2].xyzz, TEMP[1].xyzz
58: MOV TEMP[0].w, TEMP[0].wwww
59: MAD TEMP[2].x, IN[2].xxxx, CONST[1].zzzz, CONST[1].wwww
60: MOV_SAT TEMP[2].x, TEMP[2].xxxx
61: LRP TEMP[0].xyz, TEMP[2].xxxx, TEMP[1].xyzz, CONST[0].xyzz
62: MOV OUT[0], TEMP[0]
63: END
; ModuleID = 'tgsi'
@ddxy_lds = external addrspace(3) global [64 x i32]
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%37 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%38 = load <32 x i8>, <32 x i8> addrspace(2)* %37, align 32, !tbaa !0
%39 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%40 = load <16 x i8>, <16 x i8> addrspace(2)* %39, align 16, !tbaa !0
%41 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%42 = bitcast <8 x i32> addrspace(2)* %41 to <32 x i8> addrspace(2)*
%43 = load <32 x i8>, <32 x i8> addrspace(2)* %42, align 32, !tbaa !0
%44 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%45 = bitcast <4 x i32> addrspace(2)* %44 to <16 x i8> addrspace(2)*
%46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !tbaa !0
%47 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%48 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%49 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%50 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%51 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%52 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%53 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%54 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%55 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%56 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%57 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%58 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%59 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%60 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%61 = fadd float %49, 5.000000e-01
%62 = call float @llvm.floor.f32(float %61)
%63 = fmul float %62, %29
%64 = call float @llvm.floor.f32(float %63)
%65 = fmul float %64, %30
%66 = call float @llvm.floor.f32(float %63)
%67 = fsub float %63, %66
%68 = call float @llvm.floor.f32(float %65)
%69 = fsub float %65, %68
%70 = call i32 @llvm.SI.tid()
%71 = sext i32 %70 to i64
%72 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %71
%73 = bitcast i32 addrspace(3)* %72 to float addrspace(3)*
store float %48, float addrspace(3)* %73, align 4
%74 = call i32 @llvm.SI.tid()
%75 = sext i32 %74 to i64
%76 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %75
%77 = and i32 %74, -4
%78 = sext i32 %77 to i64
%79 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %78
%80 = or i32 %77, 1
%81 = sext i32 %80 to i64
%82 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %81
%83 = bitcast i32 addrspace(3)* %76 to float addrspace(3)*
store float %47, float addrspace(3)* %83, align 4
%84 = bitcast i32 addrspace(3)* %79 to float addrspace(3)*
%85 = load float, float addrspace(3)* %84, align 4
%86 = bitcast i32 addrspace(3)* %82 to float addrspace(3)*
%87 = load float, float addrspace(3)* %86, align 4
%88 = fsub float %87, %85
%89 = bitcast i32 addrspace(3)* %76 to float addrspace(3)*
store float %48, float addrspace(3)* %89, align 4
%90 = bitcast i32 addrspace(3)* %79 to float addrspace(3)*
%91 = load float, float addrspace(3)* %90, align 4
%92 = bitcast i32 addrspace(3)* %82 to float addrspace(3)*
%93 = load float, float addrspace(3)* %92, align 4
%94 = fsub float %93, %91
%95 = fmul float %36, %47
%96 = fmul float %36, %48
%97 = fmul float %36, %48
%98 = call i32 @llvm.SI.tid()
%99 = sext i32 %98 to i64
%100 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %99
%101 = bitcast i32 addrspace(3)* %100 to float addrspace(3)*
store float %97, float addrspace(3)* %101, align 4
%102 = call i32 @llvm.SI.tid()
%103 = sext i32 %102 to i64
%104 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %103
%105 = and i32 %102, -4
%106 = sext i32 %105 to i64
%107 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %106
%108 = or i32 %105, 2
%109 = sext i32 %108 to i64
%110 = getelementptr [64 x i32], [64 x i32] addrspace(3)* @ddxy_lds, i64 0, i64 %109
%111 = bitcast i32 addrspace(3)* %104 to float addrspace(3)*
store float %95, float addrspace(3)* %111, align 4
%112 = bitcast i32 addrspace(3)* %107 to float addrspace(3)*
%113 = load float, float addrspace(3)* %112, align 4
%114 = bitcast i32 addrspace(3)* %110 to float addrspace(3)*
%115 = load float, float addrspace(3)* %114, align 4
%116 = fsub float %115, %113
%117 = bitcast i32 addrspace(3)* %104 to float addrspace(3)*
store float %96, float addrspace(3)* %117, align 4
%118 = bitcast i32 addrspace(3)* %107 to float addrspace(3)*
%119 = load float, float addrspace(3)* %118, align 4
%120 = bitcast i32 addrspace(3)* %110 to float addrspace(3)*
%121 = load float, float addrspace(3)* %120, align 4
%122 = fsub float %121, %119
%123 = bitcast i32 addrspace(3)* %104 to float addrspace(3)*
store float %97, float addrspace(3)* %123, align 4
%124 = fmul float %88, %88
%125 = fmul float %94, %94
%126 = fadd float %124, %125
%127 = call float @llvm.sqrt.f32(float %126)
%128 = fmul float %116, %116
%129 = fmul float %122, %122
%130 = fadd float %128, %129
%131 = call float @llvm.sqrt.f32(float %130)
%132 = fmul float %127, %127
%133 = fmul float %131, %131
%134 = fadd float %132, %133
%135 = call float @llvm.sqrt.f32(float %134)
%136 = fmul float %135, %32
%137 = call float @llvm.log2.f32(float %136)
%138 = fadd float %137, -1.000000e+00
%139 = fadd float %138, %50
%140 = call float @llvm.maxnum.f32(float %139, float 0.000000e+00)
%141 = call float @llvm.minnum.f32(float %140, float 5.000000e+00)
%142 = call float @llvm.ceil.f32(float %141)
%143 = call float @llvm.AMDIL.exp.(float %142)
%144 = fmul float %143, %31
%145 = fdiv float 1.000000e+00, %32
%146 = fmul float %145, 5.000000e-01
%147 = call float @llvm.floor.f32(float %47)
%148 = fsub float %47, %147
%149 = call float @llvm.floor.f32(float %48)
%150 = fsub float %48, %149
%151 = fmul float %144, 2.000000e+00
%152 = fmul float %151, %146
%153 = fsub float 1.000000e+00, %152
%154 = fmul float %146, %144
%155 = fmul float %148, %153
%156 = fadd float %155, %154
%157 = fmul float %150, %153
%158 = fadd float %157, %154
%159 = fmul float %156, %29
%160 = fadd float %159, %67
%161 = fmul float %158, %30
%162 = fadd float %161, %69
%163 = bitcast float %160 to i32
%164 = bitcast float %162 to i32
%165 = bitcast float %141 to i32
%166 = insertelement <4 x i32> undef, i32 %163, i32 0
%167 = insertelement <4 x i32> %166, i32 %164, i32 1
%168 = insertelement <4 x i32> %167, i32 %165, i32 2
%169 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %168, <32 x i8> %38, <16 x i8> %40, i32 2)
%170 = extractelement <4 x float> %169, i32 0
%171 = extractelement <4 x float> %169, i32 1
%172 = extractelement <4 x float> %169, i32 2
%173 = extractelement <4 x float> %169, i32 3
%174 = fcmp olt float %173, %35
%175 = select i1 %174, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %175)
%176 = fdiv float %51, %53
%177 = fdiv float %52, %53
%178 = bitcast float %176 to i32
%179 = bitcast float %177 to i32
%180 = insertelement <2 x i32> undef, i32 %178, i32 0
%181 = insertelement <2 x i32> %180, i32 %179, i32 1
%182 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %181, <32 x i8> %43, <16 x i8> %46, i32 2)
%183 = extractelement <4 x float> %182, i32 0
%184 = extractelement <4 x float> %182, i32 1
%185 = extractelement <4 x float> %182, i32 2
%186 = call float @llvm.log2.f32(float %183)
%187 = call float @llvm.log2.f32(float %184)
%188 = call float @llvm.log2.f32(float %185)
%189 = fsub float %58, %186
%190 = fsub float %59, %187
%191 = fsub float %60, %188
%192 = fmul float %170, %55
%193 = fmul float %171, %56
%194 = fmul float %172, %57
%195 = fmul float %189, 0x3FEC28F5C0000000
%196 = fmul float %190, 0x3FEC28F5C0000000
%197 = fmul float %191, 0x3FEC28F5C0000000
%198 = call float @llvm.pow.f32(float %195, float 0x3FF4CCCCC0000000)
%199 = call float @llvm.pow.f32(float %196, float 0x3FF4CCCCC0000000)
%200 = call float @llvm.pow.f32(float %197, float 0x3FF4CCCCC0000000)
%201 = fmul float %198, %34
%202 = fadd float %201, %33
%203 = fmul float %199, %34
%204 = fadd float %203, %33
%205 = fmul float %200, %34
%206 = fadd float %205, %33
%207 = fmul float %192, %202
%208 = fmul float %193, %204
%209 = fmul float %194, %206
%210 = fmul float %54, %27
%211 = fadd float %210, %28
%212 = call float @llvm.AMDIL.clamp.(float %211, float 0.000000e+00, float 1.000000e+00)
%213 = call float @llvm.AMDGPU.lrp(float %212, float %207, float %24)
%214 = call float @llvm.AMDGPU.lrp(float %212, float %208, float %25)
%215 = call float @llvm.AMDGPU.lrp(float %212, float %209, float %26)
%216 = call i32 @llvm.SI.packf16(float %213, float %214)
%217 = bitcast i32 %216 to float
%218 = call i32 @llvm.SI.packf16(float %215, float %173)
%219 = bitcast i32 %218 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %217, float %219, float %217, float %219)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: readnone
declare i32 @llvm.SI.tid() #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.ceil.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.exp.(float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 3, 1, [m0] ; C8200700
v_interp_p2_f32 v8, [v8], v1, 3, 1, [m0] ; C8210701
v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800
v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801
v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900
v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901
v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00
v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01
v_interp_p1_f32 v12, v0, 3, 2, [m0] ; C8300B00
v_interp_p2_f32 v12, [v12], v1, 3, 2, [m0] ; C8310B01
v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00
v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01
v_interp_p1_f32 v14, v0, 1, 3, [m0] ; C8380D00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p2_f32 v14, [v14], v1, 1, 3, [m0] ; C8390D01
v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00
v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01
v_mbcnt_lo_u32_b32_e64 v1, -1, 0 ; D2460001 000100C1
v_mbcnt_hi_u32_b32_e32 v1, -1, v1 ; 480202C1
v_lshlrev_b32_e32 v15, 2, v1 ; 341E0282
s_mov_b32 m0, -1 ; BEFC03C1
ds_write_b32 v15, v3 ; D8340000 0000030F
ds_write_b32 v15, v2 ; D8340000 0000020F
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x1c ; C204011C
v_and_b32_e32 v1, -4, v1 ; 360202C4
v_or_b32_e32 v16, 1, v1 ; 38200281
v_lshlrev_b32_e32 v16, 2, v16 ; 34202082
v_lshlrev_b32_e32 v17, 2, v1 ; 34220282
ds_read_b32 v18, v17 ; D8D80000 12000011
ds_read_b32 v19, v16 ; D8D80000 13000010
ds_write_b32 v15, v3 ; D8340000 0000030F
ds_read_b32 v16, v16 ; D8D80000 10000010
ds_read_b32 v20, v17 ; D8D80000 14000011
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v21, s8, v3 ; 102A0608
ds_write_b32 v15, v21 ; D8340000 0000150F
v_mul_f32_e32 v22, s8, v2 ; 102C0408
ds_write_b32 v15, v22 ; D8340000 0000160F
v_or_b32_e32 v1, 2, v1 ; 38020282
v_lshlrev_b32_e32 v1, 2, v1 ; 34020282
s_waitcnt lgkmcnt(0) ; BF8C007F
ds_read_b32 v22, v17 ; D8D80000 16000011
ds_read_b32 v23, v1 ; D8D80000 17000001
ds_write_b32 v15, v21 ; D8340000 0000150F
ds_read_b32 v17, v17 ; D8D80000 11000011
ds_read_b32 v1, v1 ; D8D80000 01000001
ds_write_b32 v15, v21 ; D8340000 0000150F
v_subrev_f32_e32 v15, v18, v19 ; 0A1E2712
v_subrev_f32_e32 v16, v20, v16 ; 0A202114
s_waitcnt lgkmcnt(0) ; BF8C007F
v_subrev_f32_e32 v18, v22, v23 ; 0A242F16
v_subrev_f32_e32 v1, v17, v1 ; 0A020311
v_mul_f32_e32 v16, v16, v16 ; 10202110
v_mac_f32_e32 v16, v15, v15 ; 3E201F0F
v_mul_f32_e32 v1, v1, v1 ; 10020301
v_mac_f32_e32 v1, v18, v18 ; 3E022512
v_sqrt_f32_e32 v15, v16 ; 7E1E6710
s_buffer_load_dword s8, s[0:3], 0xc ; C204010C
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mul_f32_e32 v1, v1, v1 ; 10020301
s_buffer_load_dword s9, s[0:3], 0xf ; C204810F
v_mac_f32_e32 v1, v15, v15 ; 3E021F0F
s_buffer_load_dword s10, s[0:3], 0xd ; C205010D
v_add_f32_e32 v4, 0.5, v4 ; 060808F0
v_floor_f32_e32 v4, v4 ; 7E084904
s_buffer_load_dword s11, s[0:3], 0xe ; C205810E
s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v15, s8, v4 ; 101E0808
v_floor_f32_e32 v15, v15 ; 7E1E490F
v_sqrt_f32_e32 v1, v1 ; 7E026701
v_mul_f32_e32 v1, s9, v1 ; 10020209
v_log_f32_e32 v1, v1 ; 7E024F01
v_mul_f32_e32 v16, s10, v15 ; 10201E0A
v_floor_f32_e32 v16, v16 ; 7E204910
v_mad_f32 v17, v4, s8, -v15 ; D2820011 843C1104
v_mad_f32 v18, v15, s10, -v16 ; D2820012 8440150F
v_add_f32_e32 v1, -1.0, v1 ; 060202F3
v_add_f32_e32 v1, v5, v1 ; 06020305
v_max_f32_e32 v1, 0, v1 ; 20020280
v_min_f32_e32 v19, 0x40a00000, v1 ; 1E2602FF 40A00000
v_ceil_f32_e32 v1, v19 ; 7E024513
v_exp_f32_e32 v1, v1 ; 7E024B01
v_mul_f32_e32 v1, s11, v1 ; 1002020B
v_rcp_f32_e32 v4, s9 ; 7E085409
v_floor_f32_e32 v5, v2 ; 7E0A4902
v_subrev_f32_e32 v2, v5, v2 ; 0A040505
v_floor_f32_e32 v5, v3 ; 7E0A4903
v_subrev_f32_e32 v3, v5, v3 ; 0A060705
v_mul_f32_e32 v4, 0.5, v4 ; 100808F0
s_buffer_load_dword s9, s[0:3], 0x18 ; C2048118
s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500
s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700
v_mul_f32_e32 v5, -2.0, v1 ; 100A02F5
v_mad_f32 v5, v5, v4, 1.0 ; D2820005 03CA0905
v_mul_f32_e32 v1, v1, v4 ; 10020901
v_mad_f32 v2, v5, v2, v1 ; D2820002 04060505
v_mac_f32_e32 v1, v5, v3 ; 3E020705
v_mac_f32_e32 v17, s8, v2 ; 3E220408
v_mac_f32_e32 v18, s10, v1 ; 3E24020A
s_load_dwordx4 s[28:31], s[4:5], 0x4 ; C08E0504
s_load_dwordx8 s[32:39], s[6:7], 0x8 ; C0D00708
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[1:4], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[20:27], s[16:19] ; F0900F00 00850111
s_waitcnt vmcnt(0) ; BF8C0770
v_cmp_gt_f32_e32 vcc, s9, v4 ; 7C080809
v_cndmask_b32_e64 v5, 0, -1.0, vcc ; D2000005 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v5 ; 7C260A80
v_mov_b32_e32 v5, 0x6f800000 ; 7E0A02FF 6F800000
v_cmp_gt_f32_e64 vcc, |v8|, v5 ; D008016A 00020B08
v_mov_b32_e32 v5, 0x2f800000 ; 7E0A02FF 2F800000
v_cndmask_b32_e32 v5, 1.0, v5 ; 000A0AF2
v_mul_f32_e32 v8, v5, v8 ; 10101105
v_rcp_f32_e32 v8, v8 ; 7E105508
v_mul_f32_e32 v6, v8, v6 ; 100C0D08
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mul_f32_e32 v15, v6, v5 ; 101E0B06
v_mul_f32_e32 v16, v7, v5 ; 10200B07
s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107
image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[32:39], s[28:31] ; F0800700 00E8050F
s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_log_f32_e32 v7, v7 ; 7E0E4F07
v_log_f32_e32 v5, v5 ; 7E0A4F05
v_log_f32_e32 v6, v6 ; 7E0C4F06
s_buffer_load_dword s6, s[0:3], 0x11 ; C2030111
v_subrev_f32_e32 v0, v7, v0 ; 0A000107
v_mov_b32_e32 v7, 0x3f6147ae ; 7E0E02FF 3F6147AE
v_mul_f32_e32 v0, v7, v0 ; 10000107
v_log_f32_e32 v0, v0 ; 7E004F00
v_mov_b32_e32 v8, s4 ; 7E100204
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
v_mac_f32_e32 v8, s5, v9 ; 3E101205
v_mov_b32_e32 v9, 0x3fa66666 ; 7E1202FF 3FA66666
v_mul_legacy_f32_e32 v0, v9, v0 ; 0E000109
v_exp_f32_e32 v0, v0 ; 7E004B00
v_mov_b32_e32 v15, s12 ; 7E1E020C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v15, s6, v0 ; 3E1E0006
v_mul_f32_e32 v0, v12, v3 ; 1000070C
v_mul_f32_e32 v0, v15, v0 ; 1000010F
v_add_f32_e64 v3, 0, v8 clamp ; D2060803 00021080
v_sub_f32_e32 v8, 1.0, v3 ; 081006F2
v_mul_f32_e32 v12, s4, v8 ; 10181004
v_mac_f32_e32 v12, v0, v3 ; 3E180700
v_mul_f32_e32 v0, v10, v1 ; 1000030A
v_mul_f32_e32 v1, v11, v2 ; 1002050B
v_cvt_pkrtz_f16_f32_e32 v2, v12, v4 ; 5E04090C
v_subrev_f32_e32 v4, v5, v13 ; 0A081B05
v_subrev_f32_e32 v5, v6, v14 ; 0A0A1D06
v_mul_f32_e32 v4, v7, v4 ; 10080907
v_mul_f32_e32 v5, v7, v5 ; 100A0B07
v_log_f32_e32 v4, v4 ; 7E084F04
v_log_f32_e32 v5, v5 ; 7E0A4F05
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101
v_mul_legacy_f32_e32 v4, v9, v4 ; 0E080909
v_exp_f32_e32 v4, v4 ; 7E084B04
v_mul_legacy_f32_e32 v5, v9, v5 ; 0E0A0B09
v_mov_b32_e32 v6, s12 ; 7E0C020C
v_mad_f32 v4, s6, v4, v6 ; D2820004 041A0806
v_mov_b32_e32 v6, s12 ; 7E0C020C
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_mad_f32 v5, s6, v5, v6 ; D2820005 041A0A06
v_mul_f32_e32 v0, v4, v0 ; 10000104
v_mul_f32_e32 v1, v5, v1 ; 10020305
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v4, s4, v8 ; 10081004
v_mac_f32_e32 v4, v0, v3 ; 3E080700
v_mul_f32_e32 v0, s0, v8 ; 10001000
v_mac_f32_e32 v0, v1, v3 ; 3E000701
v_cvt_pkrtz_f16_f32_e32 v0, v4, v0 ; 5E000104
exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 48
VGPRS: 24
Code Size: 888 bytes
LDS: 1 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL OUT[7], GENERIC[6]
DCL OUT[8], GENERIC[7]
DCL CONST[0..20]
DCL TEMP[0..10], LOCAL
IMM[0] FLT32 { 0.0000, 0.5000, 0.0000, 0.0000}
0: MUL TEMP[0], CONST[6], IN[0].xxxx
1: MAD TEMP[0], CONST[7], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[8], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0].xyz, CONST[9], IN[0].wwww, TEMP[0]
4: MUL TEMP[1], CONST[17], IN[0].xxxx
5: MAD TEMP[1], CONST[18], IN[0].yyyy, TEMP[1]
6: MAD TEMP[1], CONST[19], IN[0].zzzz, TEMP[1]
7: MAD TEMP[1], CONST[20], IN[0].wwww, TEMP[1]
8: MAD TEMP[2].xy, IN[2].xyyy, CONST[14].xyyy, CONST[14].zwww
9: FSEQ TEMP[3].x, CONST[16].xxxx, IMM[0].xxxx
10: UIF TEMP[3].xxxx :0
11: MOV TEMP[3].xy, IN[2].xyxx
12: ELSE :0
13: MOV TEMP[3].xy, IN[3].xyxx
14: ENDIF
15: MAD TEMP[3].xy, TEMP[3].xyyy, CONST[15].xyyy, CONST[15].zwww
16: MOV TEMP[2].zw, TEMP[3].yyxy
17: MOV TEMP[3].x, CONST[10].xxxx
18: MOV TEMP[3].y, CONST[11].xxxx
19: MOV TEMP[3].z, CONST[12].xxxx
20: MOV TEMP[4].x, CONST[10].yyyy
21: MOV TEMP[4].y, CONST[11].yyyy
22: MOV TEMP[4].z, CONST[12].yyyy
23: MOV TEMP[5].x, CONST[10].zzzz
24: MOV TEMP[5].y, CONST[11].zzzz
25: MOV TEMP[5].z, CONST[12].zzzz
26: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[1].xxxx
27: MAD TEMP[3].xyz, TEMP[4].xyzz, IN[1].yyyy, TEMP[3].xyzz
28: MAD TEMP[3].xyz, TEMP[5].xyzz, IN[1].zzzz, TEMP[3].xyzz
29: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
30: RSQ TEMP[4].x, TEMP[4].xxxx
31: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
32: MUL TEMP[4].xyz, CONST[6].xyzz, IN[4].xxxx
33: MAD TEMP[4].xyz, CONST[7].xyzz, IN[4].yyyy, TEMP[4].xyzz
34: MAD TEMP[4].xyz, CONST[8].xyzz, IN[4].zzzz, TEMP[4].xyzz
35: DP3 TEMP[5].x, TEMP[4].xyzz, TEMP[4].xyzz
36: RSQ TEMP[5].x, TEMP[5].xxxx
37: MUL TEMP[4].xyz, TEMP[4].xyzz, TEMP[5].xxxx
38: MUL TEMP[5].xyz, TEMP[3].zxyy, TEMP[4].yzxx
39: MAD TEMP[5].xyz, TEMP[3].yzxx, TEMP[4].zxyy, -TEMP[5].xyzz
40: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[4].wwww
41: MOV TEMP[4].xyz, TEMP[4].xyzx
42: MOV TEMP[5].xyz, TEMP[5].xyzx
43: MOV TEMP[6].xyz, TEMP[3].xyzx
44: MUL TEMP[7].xyw, TEMP[1], IMM[0].yyyy
45: MOV TEMP[8].x, TEMP[7].xxxx
46: MUL TEMP[9].x, TEMP[7].yyyy, CONST[1].xxxx
47: MOV TEMP[8].y, TEMP[9].xxxx
48: ADD TEMP[7].xy, TEMP[8].xyyy, TEMP[7].wwww
49: MOV TEMP[7].zw, TEMP[1].wwzw
50: MUL TEMP[8], TEMP[3].xyzz, TEMP[3].yzzx
51: DP4 TEMP[9].x, CONST[2], TEMP[8]
52: DP4 TEMP[10].x, CONST[3], TEMP[8]
53: MOV TEMP[9].y, TEMP[10].xxxx
54: DP4 TEMP[8].x, CONST[4], TEMP[8]
55: MOV TEMP[9].z, TEMP[8].xxxx
56: MUL TEMP[8].x, TEMP[3].yyyy, TEMP[3].yyyy
57: MAD TEMP[3].x, TEMP[3].xxxx, TEMP[3].xxxx, -TEMP[8].xxxx
58: MAD TEMP[3].xyz, CONST[5].xyzz, TEMP[3].xxxx, TEMP[9].xyzz
59: ADD TEMP[8].xyz, TEMP[0].xyzz, -CONST[0].xyzz
60: MOV TEMP[8].yzw, TEMP[8].yxyz
61: MOV TEMP[8].x, TEMP[1].zzzz
62: MOV TEMP[0].xyz, TEMP[0].xyzx
63: MOV OUT[8], TEMP[0]
64: MOV OUT[1], TEMP[2]
65: MOV OUT[3], TEMP[5]
66: MOV OUT[2], TEMP[4]
67: MOV OUT[4], TEMP[6]
68: MOV OUT[5], TEMP[3]
69: MOV OUT[6], TEMP[7]
70: MOV OUT[0], TEMP[1]
71: MOV OUT[7], TEMP[8]
72: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%81 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%82 = load <16 x i8>, <16 x i8> addrspace(2)* %81, align 16, !tbaa !0
%83 = add i32 %5, %7
%84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83)
%85 = extractelement <4 x float> %84, i32 0
%86 = extractelement <4 x float> %84, i32 1
%87 = extractelement <4 x float> %84, i32 2
%88 = extractelement <4 x float> %84, i32 3
%89 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%90 = load <16 x i8>, <16 x i8> addrspace(2)* %89, align 16, !tbaa !0
%91 = add i32 %5, %7
%92 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %90, i32 0, i32 %91)
%93 = extractelement <4 x float> %92, i32 0
%94 = extractelement <4 x float> %92, i32 1
%95 = extractelement <4 x float> %92, i32 2
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%103 = load <16 x i8>, <16 x i8> addrspace(2)* %102, align 16, !tbaa !0
%104 = add i32 %5, %7
%105 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %103, i32 0, i32 %104)
%106 = extractelement <4 x float> %105, i32 0
%107 = extractelement <4 x float> %105, i32 1
%108 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 4
%109 = load <16 x i8>, <16 x i8> addrspace(2)* %108, align 16, !tbaa !0
%110 = add i32 %5, %7
%111 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %109, i32 0, i32 %110)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = extractelement <4 x float> %111, i32 2
%115 = extractelement <4 x float> %111, i32 3
%116 = fmul float %32, %85
%117 = fmul float %33, %85
%118 = fmul float %34, %85
%119 = fmul float %35, %85
%120 = fmul float %36, %86
%121 = fadd float %120, %116
%122 = fmul float %37, %86
%123 = fadd float %122, %117
%124 = fmul float %38, %86
%125 = fadd float %124, %118
%126 = fmul float %39, %86
%127 = fadd float %126, %119
%128 = fmul float %40, %87
%129 = fadd float %128, %121
%130 = fmul float %41, %87
%131 = fadd float %130, %123
%132 = fmul float %42, %87
%133 = fadd float %132, %125
%134 = fmul float %43, %87
%135 = fadd float %134, %127
%136 = fmul float %44, %88
%137 = fadd float %136, %129
%138 = fmul float %45, %88
%139 = fadd float %138, %131
%140 = fmul float %46, %88
%141 = fadd float %140, %133
%142 = fmul float %65, %85
%143 = fmul float %66, %85
%144 = fmul float %67, %85
%145 = fmul float %68, %85
%146 = fmul float %69, %86
%147 = fadd float %146, %142
%148 = fmul float %70, %86
%149 = fadd float %148, %143
%150 = fmul float %71, %86
%151 = fadd float %150, %144
%152 = fmul float %72, %86
%153 = fadd float %152, %145
%154 = fmul float %73, %87
%155 = fadd float %154, %147
%156 = fmul float %74, %87
%157 = fadd float %156, %149
%158 = fmul float %75, %87
%159 = fadd float %158, %151
%160 = fmul float %76, %87
%161 = fadd float %160, %153
%162 = fmul float %77, %88
%163 = fadd float %162, %155
%164 = fmul float %78, %88
%165 = fadd float %164, %157
%166 = fmul float %79, %88
%167 = fadd float %166, %159
%168 = fmul float %80, %88
%169 = fadd float %168, %161
%170 = fmul float %100, %56
%171 = fadd float %170, %58
%172 = fmul float %101, %57
%173 = fadd float %172, %59
%174 = fcmp oeq float %64, 0.000000e+00
%. = select i1 %174, float %100, float %106
%.44 = select i1 %174, float %101, float %107
%175 = fmul float %., %60
%176 = fadd float %175, %62
%177 = fmul float %.44, %61
%178 = fadd float %177, %63
%179 = fmul float %47, %93
%180 = fmul float %50, %93
%181 = fmul float %53, %93
%182 = fmul float %48, %94
%183 = fadd float %182, %179
%184 = fmul float %51, %94
%185 = fadd float %184, %180
%186 = fmul float %54, %94
%187 = fadd float %186, %181
%188 = fmul float %49, %95
%189 = fadd float %188, %183
%190 = fmul float %52, %95
%191 = fadd float %190, %185
%192 = fmul float %55, %95
%193 = fadd float %192, %187
%194 = fmul float %189, %189
%195 = fmul float %191, %191
%196 = fadd float %195, %194
%197 = fmul float %193, %193
%198 = fadd float %196, %197
%199 = call float @llvm.AMDGPU.rsq.clamped.f32(float %198)
%200 = fmul float %189, %199
%201 = fmul float %191, %199
%202 = fmul float %193, %199
%203 = fmul float %32, %112
%204 = fmul float %33, %112
%205 = fmul float %34, %112
%206 = fmul float %36, %113
%207 = fadd float %206, %203
%208 = fmul float %37, %113
%209 = fadd float %208, %204
%210 = fmul float %38, %113
%211 = fadd float %210, %205
%212 = fmul float %40, %114
%213 = fadd float %212, %207
%214 = fmul float %41, %114
%215 = fadd float %214, %209
%216 = fmul float %42, %114
%217 = fadd float %216, %211
%218 = fmul float %213, %213
%219 = fmul float %215, %215
%220 = fadd float %219, %218
%221 = fmul float %217, %217
%222 = fadd float %220, %221
%223 = call float @llvm.AMDGPU.rsq.clamped.f32(float %222)
%224 = fmul float %213, %223
%225 = fmul float %215, %223
%226 = fmul float %217, %223
%227 = fmul float %202, %225
%228 = fmul float %200, %226
%229 = fmul float %201, %224
%230 = fmul float %201, %226
%231 = fsub float %230, %227
%232 = fmul float %202, %224
%233 = fsub float %232, %228
%234 = fmul float %200, %225
%235 = fsub float %234, %229
%236 = fmul float %231, %115
%237 = fmul float %233, %115
%238 = fmul float %235, %115
%239 = fmul float %163, 5.000000e-01
%240 = fmul float %165, 5.000000e-01
%241 = fmul float %169, 5.000000e-01
%242 = fmul float %240, %16
%243 = fadd float %239, %241
%244 = fadd float %242, %241
%245 = fmul float %200, %201
%246 = fmul float %201, %202
%247 = fmul float %202, %202
%248 = fmul float %202, %200
%249 = fmul float %17, %245
%250 = fmul float %18, %246
%251 = fadd float %249, %250
%252 = fmul float %19, %247
%253 = fadd float %251, %252
%254 = fmul float %20, %248
%255 = fadd float %253, %254
%256 = fmul float %21, %245
%257 = fmul float %22, %246
%258 = fadd float %256, %257
%259 = fmul float %23, %247
%260 = fadd float %258, %259
%261 = fmul float %24, %248
%262 = fadd float %260, %261
%263 = fmul float %25, %245
%264 = fmul float %26, %246
%265 = fadd float %263, %264
%266 = fmul float %27, %247
%267 = fadd float %265, %266
%268 = fmul float %28, %248
%269 = fadd float %267, %268
%270 = fmul float %201, %201
%271 = fmul float %200, %200
%272 = fsub float %271, %270
%273 = fmul float %29, %272
%274 = fadd float %273, %255
%275 = fmul float %30, %272
%276 = fadd float %275, %262
%277 = fmul float %31, %272
%278 = fadd float %277, %269
%279 = fsub float %137, %13
%280 = fsub float %139, %14
%281 = fsub float %141, %15
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %171, float %173, float %176, float %178)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %224, float %225, float %226, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %236, float %237, float %238, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %200, float %201, float %202, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %274, float %276, float %278, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %243, float %244, float %167, float %169)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %167, float %279, float %280, float %281)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %137, float %139, float %141, float %135)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %163, float %165, float %167, float %169)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0 ; 7E020280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904
s_load_dwordx4 s[12:15], s[8:9], 0x8 ; C0860908
s_load_dwordx4 s[16:19], s[8:9], 0xc ; C088090C
s_load_dwordx4 s[8:11], s[8:9], 0x10 ; C0840910
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s32, s[28:31], 0x23 ; C2101D23
buffer_load_format_xyzw v[2:5], v0, s[0:3], 0 idxen ; E00C2000 80000200
buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[9:12], v0, s[12:15], 0 idxen ; E00C2000 80030900
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[11:14], v0, s[16:19], 0 idxen ; E00C2000 80040B00
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[13:16], v0, s[8:11], 0 idxen ; E00C2000 80020D00
s_buffer_load_dword s33, s[28:31], 0x24 ; C2109D24
s_buffer_load_dword s34, s[28:31], 0x25 ; C2111D25
s_buffer_load_dword s35, s[28:31], 0x26 ; C2119D26
s_buffer_load_dword s36, s[28:31], 0x28 ; C2121D28
s_buffer_load_dword s3, s[28:31], 0x13 ; C2019D13
s_buffer_load_dword s2, s[28:31], 0x14 ; C2011D14
s_buffer_load_dword s0, s[28:31], 0x15 ; C2001D15
s_buffer_load_dword s1, s[28:31], 0x16 ; C2009D16
s_buffer_load_dword s18, s[28:31], 0x18 ; C2091D18
s_buffer_load_dword s37, s[28:31], 0x29 ; C2129D29
s_buffer_load_dword s38, s[28:31], 0x2a ; C2131D2A
s_buffer_load_dword s39, s[28:31], 0x2c ; C2139D2C
s_buffer_load_dword s40, s[28:31], 0x2d ; C2141D2D
s_buffer_load_dword s41, s[28:31], 0x2e ; C2149D2E
s_buffer_load_dword s20, s[28:31], 0x19 ; C20A1D19
s_buffer_load_dword s19, s[28:31], 0x1a ; C2099D1A
s_buffer_load_dword s42, s[28:31], 0x1b ; C2151D1B
s_buffer_load_dword s23, s[28:31], 0x1c ; C20B9D1C
s_buffer_load_dword s22, s[28:31], 0x1d ; C20B1D1D
s_buffer_load_dword s43, s[28:31], 0x30 ; C2159D30
s_buffer_load_dword s44, s[28:31], 0x31 ; C2161D31
s_buffer_load_dword s45, s[28:31], 0x32 ; C2169D32
s_buffer_load_dword s46, s[28:31], 0x38 ; C2171D38
s_buffer_load_dword s47, s[28:31], 0x39 ; C2179D39
s_buffer_load_dword s24, s[28:31], 0x1e ; C20C1D1E
s_buffer_load_dword s48, s[28:31], 0x1f ; C2181D1F
s_buffer_load_dword s26, s[28:31], 0x20 ; C20D1D20
s_buffer_load_dword s27, s[28:31], 0x21 ; C20D9D21
s_buffer_load_dword s25, s[28:31], 0x22 ; C20C9D22
s_buffer_load_dword s4, s[28:31], 0x3f ; C2021D3F
s_buffer_load_dword s5, s[28:31], 0x40 ; C2029D40
s_buffer_load_dword s49, s[28:31], 0x44 ; C2189D44
s_buffer_load_dword s50, s[28:31], 0x45 ; C2191D45
s_buffer_load_dword s51, s[28:31], 0x46 ; C2199D46
s_buffer_load_dword s6, s[28:31], 0x3a ; C2031D3A
s_buffer_load_dword s8, s[28:31], 0x3b ; C2041D3B
s_buffer_load_dword s52, s[28:31], 0x3c ; C21A1D3C
s_buffer_load_dword s53, s[28:31], 0x3d ; C21A9D3D
s_buffer_load_dword s14, s[28:31], 0x3e ; C2071D3E
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mov_b32_e32 v0, s4 ; 7E000204
s_buffer_load_dword s13, s[28:31], 0x9 ; C2069D09
v_cmp_eq_f32_e64 vcc, 0, s5 ; D004006A 00000A80
s_buffer_load_dword s9, s[28:31], 0xa ; C2049D0A
s_buffer_load_dword s7, s[28:31], 0xb ; C2039D0B
s_buffer_load_dword s12, s[28:31], 0xc ; C2061D0C
v_mov_b32_e32 v17, s6 ; 7E220206
s_buffer_load_dword s16, s[28:31], 0xd ; C2081D0D
v_mov_b32_e32 v18, s8 ; 7E240208
s_buffer_load_dword s11, s[28:31], 0xe ; C2059D0E
s_buffer_load_dword s10, s[28:31], 0xf ; C2051D0F
s_buffer_load_dword s15, s[28:31], 0x10 ; C2079D10
v_mov_b32_e32 v19, s14 ; 7E26020E
s_buffer_load_dword s17, s[28:31], 0x11 ; C2089D11
s_buffer_load_dword s14, s[28:31], 0x12 ; C2071D12
s_buffer_load_dword s54, s[28:31], 0x47 ; C21B1D47
s_buffer_load_dword s55, s[28:31], 0x48 ; C21B9D48
s_buffer_load_dword s56, s[28:31], 0x49 ; C21C1D49
s_buffer_load_dword s57, s[28:31], 0x4a ; C21C9D4A
s_buffer_load_dword s58, s[28:31], 0x4b ; C21D1D4B
s_buffer_load_dword s4, s[28:31], 0x0 ; C2021D00
s_buffer_load_dword s5, s[28:31], 0x1 ; C2029D01
s_buffer_load_dword s6, s[28:31], 0x2 ; C2031D02
s_buffer_load_dword s8, s[28:31], 0x4 ; C2041D04
s_buffer_load_dword s21, s[28:31], 0x8 ; C20A9D08
s_buffer_load_dword s59, s[28:31], 0x4c ; C21D9D4C
s_buffer_load_dword s60, s[28:31], 0x4d ; C21E1D4D
s_buffer_load_dword s61, s[28:31], 0x4e ; C21E9D4E
s_buffer_load_dword s62, s[28:31], 0x4f ; C21F1D4F
s_buffer_load_dword s63, s[28:31], 0x50 ; C21F9D50
s_buffer_load_dword s64, s[28:31], 0x51 ; C2201D51
s_buffer_load_dword s65, s[28:31], 0x52 ; C2209D52
s_buffer_load_dword s28, s[28:31], 0x53 ; C20E1D53
v_mul_f32_e32 v20, s42, v2 ; 1028042A
v_mac_f32_e32 v20, s48, v3 ; 3E280630
v_mac_f32_e32 v20, s32, v4 ; 3E280820
v_mac_f32_e32 v17, s46, v9 ; 3E22122E
v_mac_f32_e32 v18, s47, v10 ; 3E24142F
v_mul_f32_e32 v21, s49, v2 ; 102A0431
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v21, s55, v3 ; 3E2A0637
v_mac_f32_e32 v21, s59, v4 ; 3E2A083B
v_mac_f32_e32 v21, s63, v5 ; 3E2A0A3F
v_mul_f32_e32 v22, s50, v2 ; 102C0432
v_mac_f32_e32 v22, s56, v3 ; 3E2C0638
v_mac_f32_e32 v22, s60, v4 ; 3E2C083C
v_mac_f32_e32 v22, s64, v5 ; 3E2C0A40
v_mul_f32_e32 v23, s51, v2 ; 102E0433
v_mac_f32_e32 v23, s57, v3 ; 3E2E0639
v_mac_f32_e32 v23, s61, v4 ; 3E2E083D
v_mac_f32_e32 v23, s65, v5 ; 3E2E0A41
v_mul_f32_e32 v24, s54, v2 ; 10300436
v_mac_f32_e32 v24, s58, v3 ; 3E30063A
v_mac_f32_e32 v24, s62, v4 ; 3E30083E
v_mac_f32_e32 v24, s28, v5 ; 3E300A1C
v_cndmask_b32_e32 v9, v11, v9 ; 0012130B
v_cndmask_b32_e32 v10, v12, v10 ; 0014150C
v_mul_f32_e32 v11, s36, v6 ; 10160C24
v_mac_f32_e32 v11, s37, v7 ; 3E160E25
v_mul_f32_e32 v12, s39, v6 ; 10180C27
v_mac_f32_e32 v12, s40, v7 ; 3E180E28
v_mul_f32_e32 v6, s43, v6 ; 100C0C2B
v_mac_f32_e32 v6, s44, v7 ; 3E0C0E2C
v_mac_f32_e32 v11, s38, v8 ; 3E161026
v_mac_f32_e32 v12, s41, v8 ; 3E181029
v_mac_f32_e32 v6, s45, v8 ; 3E0C102D
v_mul_f32_e32 v7, s18, v2 ; 100E0412
v_mac_f32_e32 v7, s23, v3 ; 3E0E0617
v_mac_f32_e32 v7, s26, v4 ; 3E0E081A
v_mac_f32_e32 v7, s33, v5 ; 3E0E0A21
v_mul_f32_e32 v8, s20, v2 ; 10100414
v_mac_f32_e32 v8, s22, v3 ; 3E100616
v_mac_f32_e32 v8, s27, v4 ; 3E10081B
v_mac_f32_e32 v8, s34, v5 ; 3E100A22
v_mul_f32_e32 v2, s19, v2 ; 10040413
v_mac_f32_e32 v2, s24, v3 ; 3E040618
v_mac_f32_e32 v2, s25, v4 ; 3E040819
v_mac_f32_e32 v2, s35, v5 ; 3E040A23
v_mac_f32_e32 v19, s52, v9 ; 3E261234
v_mac_f32_e32 v0, s53, v10 ; 3E001435
exp 15, 32, 0, 0, 0, v17, v18, v19, v0 ; F800020F 00131211
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, s18, v13 ; 10001A12
v_mac_f32_e32 v0, s23, v14 ; 3E001C17
v_mul_f32_e32 v3, s20, v13 ; 10061A14
v_mac_f32_e32 v3, s22, v14 ; 3E061C16
v_mul_f32_e32 v4, s19, v13 ; 10081A13
v_mac_f32_e32 v4, s24, v14 ; 3E081C18
v_mac_f32_e32 v0, s26, v15 ; 3E001E1A
v_mac_f32_e32 v3, s27, v15 ; 3E061E1B
v_mac_f32_e32 v4, s25, v15 ; 3E081E19
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mac_f32_e32 v5, v12, v12 ; 3E0A190C
v_mac_f32_e32 v5, v6, v6 ; 3E0A0D06
v_rsq_clamp_f32_e32 v5, v5 ; 7E0A5905
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v3, v3 ; 3E120703
v_mac_f32_e32 v9, v4, v4 ; 3E120904
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v10, v5, v11 ; 10141705
v_mul_f32_e32 v11, v5, v12 ; 10161905
v_mul_f32_e32 v5, v5, v6 ; 100A0D05
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_mul_f32_e32 v3, v9, v3 ; 10060709
v_mul_f32_e32 v4, v9, v4 ; 10080909
v_mul_f32_e32 v6, v3, v5 ; 100C0B03
v_mad_f32 v6, v11, v4, -v6 ; D2820006 841A090B
v_mul_f32_e32 v9, v4, v10 ; 10121504
v_mad_f32 v9, v5, v0, -v9 ; D2820009 84260105
v_mul_f32_e32 v12, v0, v11 ; 10181700
v_mad_f32 v12, v10, v3, -v12 ; D282000C 8432070A
v_mul_f32_e32 v6, v16, v6 ; 100C0D10
v_mul_f32_e32 v9, v16, v9 ; 10121310
v_mul_f32_e32 v12, v16, v12 ; 10181910
exp 15, 33, 0, 0, 0, v0, v3, v4, v1 ; F800021F 01040300
exp 15, 34, 0, 0, 0, v6, v9, v12, v1 ; F800022F 010C0906
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v5, v11 ; 10001705
v_mul_f32_e32 v3, s13, v0 ; 1006000D
v_mul_f32_e32 v4, s16, v0 ; 10080010
v_mul_f32_e32 v0, s17, v0 ; 10000011
v_mul_f32_e32 v6, v11, v10 ; 100C150B
v_mac_f32_e32 v3, s21, v6 ; 3E060C15
v_mac_f32_e32 v4, s12, v6 ; 3E080C0C
v_mac_f32_e32 v0, s15, v6 ; 3E000C0F
v_mul_f32_e32 v6, v5, v5 ; 100C0B05
v_mac_f32_e32 v3, s9, v6 ; 3E060C09
v_mac_f32_e32 v4, s11, v6 ; 3E080C0B
v_mac_f32_e32 v0, s14, v6 ; 3E000C0E
v_mul_f32_e32 v6, v10, v5 ; 100C0B0A
v_mac_f32_e32 v3, s7, v6 ; 3E060C07
v_mac_f32_e32 v4, s10, v6 ; 3E080C0A
v_mac_f32_e32 v0, s3, v6 ; 3E000C03
exp 15, 35, 0, 0, 0, v10, v11, v5, v1 ; F800023F 01050B0A
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v5, v11, v11 ; 100A170B
v_mad_f32 v5, v10, v10, -v5 ; D2820005 8416150A
v_mac_f32_e32 v3, s2, v5 ; 3E060A02
v_mac_f32_e32 v4, s0, v5 ; 3E080A00
v_mac_f32_e32 v0, s1, v5 ; 3E000A01
v_mul_f32_e32 v5, 0.5, v22 ; 100A2CF0
v_mul_f32_e32 v6, 0.5, v24 ; 100C30F0
exp 15, 36, 0, 0, 0, v3, v4, v0, v1 ; F800024F 01000403
s_waitcnt expcnt(0) ; BF8C070F
v_mad_f32 v0, 0.5, v21, v6 ; D2820000 041A2AF0
v_mac_f32_e32 v6, s8, v5 ; 3E0C0A08
exp 15, 37, 0, 0, 0, v0, v6, v23, v24 ; F800025F 18170600
s_waitcnt expcnt(0) ; BF8C070F
v_subrev_f32_e32 v0, s4, v7 ; 0A000E04
v_subrev_f32_e32 v1, s5, v8 ; 0A021005
v_subrev_f32_e32 v3, s6, v2 ; 0A060406
exp 15, 38, 0, 0, 0, v23, v0, v1, v3 ; F800026F 03010017
exp 15, 39, 0, 0, 0, v7, v8, v2, v20 ; F800027F 14020807
exp 15, 12, 0, 1, 0, v21, v22, v23, v24 ; F80008CF 18171615
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 72
VGPRS: 28
Code Size: 932 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL IN[6], GENERIC[6], PERSPECTIVE
DCL IN[7], GENERIC[7], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SVIEW[0], CUBE, FLOAT
DCL SVIEW[1], CUBE, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..19]
DCL CONST[22..24]
DCL CONST[26]
DCL TEMP[0..18], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000}
IMM[1] FLT32 { 0.5000, 0.7500, 7.0000, 1.0000}
IMM[2] FLT32 { 10.0000, 0.9680, 0.0300, 0.0001}
0: MOV TEMP[0].x, IN[1].xxxx
1: MOV TEMP[0].y, IN[2].xxxx
2: MOV TEMP[0].z, IN[3].xxxx
3: MOV TEMP[1].x, IN[1].yyyy
4: MOV TEMP[1].y, IN[2].yyyy
5: MOV TEMP[1].z, IN[3].yyyy
6: MOV TEMP[2].x, IN[1].zzzz
7: MOV TEMP[2].y, IN[2].zzzz
8: MOV TEMP[2].z, IN[3].zzzz
9: MOV TEMP[3].xy, IN[0].xyyy
10: TEX TEMP[3].yw, TEMP[3], SAMP[3], 2D
11: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy
12: MUL TEMP[3].xy, TEMP[3].xyyy, CONST[22].xxxx
13: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy
14: MOV_SAT TEMP[4].x, TEMP[4].xxxx
15: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
16: SQRT TEMP[4].x, TEMP[4].xxxx
17: MOV TEMP[3].z, TEMP[4].xxxx
18: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
19: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
20: MOV TEMP[0].y, TEMP[1].xxxx
21: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[2].xyzz
22: MOV TEMP[0].z, TEMP[1].xxxx
23: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[0].xyzz
24: RSQ TEMP[1].x, TEMP[1].xxxx
25: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
26: DP3 TEMP[1].x, IN[6].yzww, IN[6].yzww
27: RSQ TEMP[1].x, TEMP[1].xxxx
28: MUL TEMP[1].xyz, IN[6].yzww, TEMP[1].xxxx
29: MOV TEMP[2].xy, IN[0].xyyy
30: TEX TEMP[2].xyz, TEMP[2], SAMP[2], 2D
31: MUL TEMP[2].xyz, CONST[19].xyzz, TEMP[2].xyzz
32: LRP TEMP[3].xyz, CONST[23].xxxx, TEMP[2].xyzz, CONST[16].xyzz
33: MUL TEMP[4].x, CONST[23].xxxx, CONST[16].wwww
34: ADD TEMP[4].x, CONST[16].wwww, -TEMP[4].xxxx
35: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[4].xxxx
36: MOV TEMP[5].xy, IN[0].xyyy
37: TEX TEMP[5].y, TEMP[5], SAMP[4], 2D
38: ADD TEMP[6].x, IMM[0].zzzz, -CONST[26].xxxx
39: MAD TEMP[5].x, TEMP[5].yyyy, CONST[26].xxxx, TEMP[6].xxxx
40: DP3 TEMP[6].x, TEMP[0].xyzz, CONST[0].xyzz
41: MAX TEMP[6].x, IMM[0].wwww, TEMP[6].xxxx
42: MOV TEMP[7].xyz, IMM[0].wwww
43: MOV TEMP[8].w, IMM[0].zzzz
44: MOV TEMP[8].xyz, TEMP[0].xyzx
45: DP4 TEMP[9].x, CONST[1], TEMP[8]
46: DP4 TEMP[10].x, CONST[2], TEMP[8]
47: MOV TEMP[9].y, TEMP[10].xxxx
48: DP4 TEMP[8].x, CONST[3], TEMP[8]
49: MOV TEMP[9].z, TEMP[8].xxxx
50: ADD TEMP[8].xyz, IN[4].xyzz, TEMP[9].xyzz
51: MOV TEMP[9].xy, IN[5].xyyy
52: MOV TEMP[9].w, IN[5].wwww
53: TXP TEMP[9].x, TEMP[9], SAMP[5], 2D
54: MUL TEMP[9].xyz, CONST[17].xyzz, TEMP[9].xxxx
55: MUL TEMP[8].xyz, TEMP[8].xyzz, TEMP[5].xxxx
56: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[1].xyzz
57: MUL TEMP[10].xyz, TEMP[10].xxxx, TEMP[0].xyzz
58: MUL TEMP[10].xyz, IMM[0].xxxx, TEMP[10].xyzz
59: ADD TEMP[10].xyz, TEMP[1].xyzz, -TEMP[10].xyzz
60: MOV TEMP[11].xyz, TEMP[10].xyzx
61: FSLT TEMP[12].x, IMM[0].wwww, CONST[10].wwww
62: UIF TEMP[12].xxxx :0
63: DP3 TEMP[12].x, TEMP[10].xyzz, TEMP[10].xyzz
64: RSQ TEMP[12].x, TEMP[12].xxxx
65: MUL TEMP[12].xyz, TEMP[10].xyzz, TEMP[12].xxxx
66: MOV TEMP[13].xyz, -IN[7].xyzx
67: ADD TEMP[14].xyz, CONST[8].xyzz, TEMP[13].xyzz
68: RCP TEMP[15].x, TEMP[12].xxxx
69: RCP TEMP[15].y, TEMP[12].yyyy
70: RCP TEMP[15].z, TEMP[12].zzzz
71: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
72: ADD TEMP[13].xyz, CONST[9].xyzz, TEMP[13].xyzz
73: RCP TEMP[15].x, TEMP[12].xxxx
74: RCP TEMP[15].y, TEMP[12].yyyy
75: RCP TEMP[15].z, TEMP[12].zzzz
76: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
77: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[12].xyzz
78: UIF TEMP[15].xxxx :0
79: MOV TEMP[16].x, TEMP[14].xxxx
80: ELSE :0
81: MOV TEMP[16].x, TEMP[13].xxxx
82: ENDIF
83: UIF TEMP[15].yyyy :0
84: MOV TEMP[17].x, TEMP[14].yyyy
85: ELSE :0
86: MOV TEMP[17].x, TEMP[13].yyyy
87: ENDIF
88: UIF TEMP[15].zzzz :0
89: MOV TEMP[14].x, TEMP[14].zzzz
90: ELSE :0
91: MOV TEMP[14].x, TEMP[13].zzzz
92: ENDIF
93: ADD TEMP[13].xyz, CONST[8].xyzz, CONST[9].xyzz
94: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx
95: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
96: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
97: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[10].xyzz
98: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz
99: MAD TEMP[12].xyz, TEMP[12].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
100: ADD TEMP[11].xyz, TEMP[12].xyzz, -TEMP[13].xyzz
101: ENDIF
102: ADD TEMP[12].x, IMM[0].zzzz, -CONST[24].xxxx
103: POW TEMP[12].x, TEMP[12].xxxx, IMM[1].yyyy
104: MUL TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz
105: MOV TEMP[11].xyz, TEMP[11].xyzz
106: MOV TEMP[11].w, TEMP[12].xxxx
107: TXL TEMP[11], TEMP[11], SAMP[0], CUBE
108: POW TEMP[12].x, TEMP[11].wwww, CONST[11].yyyy
109: MUL TEMP[12].x, CONST[11].xxxx, TEMP[12].xxxx
110: MUL TEMP[11].xyz, TEMP[12].xxxx, TEMP[11].xyzz
111: FSLT TEMP[12].x, CONST[9].wwww, IMM[1].wwww
112: UIF TEMP[12].xxxx :0
113: MOV TEMP[12].xyz, TEMP[10].xyzx
114: FSLT TEMP[13].x, IMM[0].wwww, CONST[14].wwww
115: UIF TEMP[13].xxxx :0
116: DP3 TEMP[13].x, TEMP[10].xyzz, TEMP[10].xyzz
117: RSQ TEMP[13].x, TEMP[13].xxxx
118: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[13].xxxx
119: MOV TEMP[13].xyz, -IN[7].xyzx
120: ADD TEMP[14].xyz, CONST[12].xyzz, TEMP[13].xyzz
121: RCP TEMP[15].x, TEMP[10].xxxx
122: RCP TEMP[15].y, TEMP[10].yyyy
123: RCP TEMP[15].z, TEMP[10].zzzz
124: MUL TEMP[14].xyz, TEMP[14].xyzz, TEMP[15].xyzz
125: ADD TEMP[13].xyz, CONST[13].xyzz, TEMP[13].xyzz
126: RCP TEMP[15].x, TEMP[10].xxxx
127: RCP TEMP[15].y, TEMP[10].yyyy
128: RCP TEMP[15].z, TEMP[10].zzzz
129: MUL TEMP[13].xyz, TEMP[13].xyzz, TEMP[15].xyzz
130: FSLT TEMP[15].xyz, IMM[0].wwww, TEMP[10].xyzz
131: UIF TEMP[15].xxxx :0
132: MOV TEMP[16].x, TEMP[14].xxxx
133: ELSE :0
134: MOV TEMP[16].x, TEMP[13].xxxx
135: ENDIF
136: UIF TEMP[15].yyyy :0
137: MOV TEMP[17].x, TEMP[14].yyyy
138: ELSE :0
139: MOV TEMP[17].x, TEMP[13].yyyy
140: ENDIF
141: UIF TEMP[15].zzzz :0
142: MOV TEMP[14].x, TEMP[14].zzzz
143: ELSE :0
144: MOV TEMP[14].x, TEMP[13].zzzz
145: ENDIF
146: ADD TEMP[13].xyz, CONST[12].xyzz, CONST[13].xyzz
147: MUL TEMP[13].xyz, TEMP[13].xyzz, IMM[1].xxxx
148: MIN TEMP[15].x, TEMP[16].xxxx, TEMP[17].xxxx
149: MIN TEMP[14].x, TEMP[15].xxxx, TEMP[14].xxxx
150: ADD TEMP[15].xyz, TEMP[13].xyzz, -CONST[14].xyzz
151: ADD TEMP[15].xyz, TEMP[15].xyzz, IN[7].xyzz
152: MAD TEMP[10].xyz, TEMP[10].xyzz, TEMP[14].xxxx, TEMP[15].xyzz
153: ADD TEMP[12].xyz, TEMP[10].xyzz, -TEMP[13].xyzz
154: ENDIF
155: ADD TEMP[10].x, IMM[0].zzzz, -CONST[24].xxxx
156: POW TEMP[10].x, TEMP[10].xxxx, IMM[1].yyyy
157: MUL TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
158: MOV TEMP[12].xyz, TEMP[12].xyzz
159: MOV TEMP[12].w, TEMP[10].xxxx
160: TXL TEMP[10], TEMP[12], SAMP[1], CUBE
161: POW TEMP[12].x, TEMP[10].wwww, CONST[15].yyyy
162: MUL TEMP[12].x, CONST[15].xxxx, TEMP[12].xxxx
163: MUL TEMP[10].xyz, TEMP[12].xxxx, TEMP[10].xyzz
164: LRP TEMP[7].xyz, CONST[9].wwww, TEMP[11].xyzz, TEMP[10].xyzz
165: ELSE :0
166: MOV TEMP[7].xyz, TEMP[11].xyzx
167: ENDIF
168: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[5].xxxx
169: MOV TEMP[1].xyz, -TEMP[1].xyzx
170: ADD TEMP[5].x, IMM[0].zzzz, -CONST[24].xxxx
171: ADD TEMP[10].xyz, CONST[0].xyzz, TEMP[1].xyzz
172: DP3 TEMP[11].x, TEMP[10].xyzz, TEMP[10].xyzz
173: RSQ TEMP[11].x, TEMP[11].xxxx
174: MUL TEMP[10].xyz, TEMP[10].xyzz, TEMP[11].xxxx
175: DP3 TEMP[1].x, TEMP[0].xyzz, TEMP[1].xyzz
176: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
177: DP3 TEMP[11].x, CONST[0].xyzz, TEMP[10].xyzz
178: MAX TEMP[11].x, IMM[0].wwww, TEMP[11].xxxx
179: MUL TEMP[12].x, TEMP[5].xxxx, TEMP[5].xxxx
180: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].wwww
181: ADD TEMP[13].x, IMM[0].zzzz, -TEMP[5].xxxx
182: MAD TEMP[13].x, TEMP[13].xxxx, IMM[2].yyyy, IMM[2].zzzz
183: LG2 TEMP[13].x, TEMP[13].xxxx
184: RCP TEMP[13].x, TEMP[13].xxxx
185: MUL TEMP[13].x, IMM[2].xxxx, TEMP[13].xxxx
186: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[13].xxxx
187: ADD TEMP[14].x, IMM[0].zzzz, -TEMP[6].xxxx
188: ADD TEMP[15].x, IMM[0].zzzz, -TEMP[1].xxxx
189: MUL TEMP[16].x, IMM[0].xxxx, TEMP[11].xxxx
190: MUL TEMP[5].x, TEMP[11].xxxx, TEMP[5].xxxx
191: MAD TEMP[5].x, TEMP[16].xxxx, TEMP[5].xxxx, IMM[1].xxxx
192: ADD TEMP[11].x, IMM[0].zzzz, -TEMP[11].xxxx
193: ADD TEMP[16].x, IMM[0].zzzz, -TEMP[1].xxxx
194: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
195: ADD TEMP[4].x, CONST[24].xxxx, TEMP[4].xxxx
196: MOV_SAT TEMP[4].x, TEMP[4].xxxx
197: MUL TEMP[17].x, TEMP[16].xxxx, TEMP[16].xxxx
198: MUL TEMP[18].x, TEMP[16].xxxx, TEMP[16].xxxx
199: MUL TEMP[16].x, TEMP[18].xxxx, TEMP[16].xxxx
200: MUL TEMP[16].x, TEMP[17].xxxx, TEMP[16].xxxx
201: LRP TEMP[4].xyz, TEMP[16].xxxx, TEMP[4].xxxx, TEMP[3].xyzz
202: LRP TEMP[16].x, TEMP[6].xxxx, IMM[0].zzzz, TEMP[12].xxxx
203: LRP TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, TEMP[12].xxxx
204: MAD TEMP[1].x, TEMP[16].xxxx, TEMP[1].xxxx, IMM[2].wwww
205: RCP TEMP[1].x, TEMP[1].xxxx
206: DP3 TEMP[10].x, TEMP[0].xyzz, TEMP[10].xyzz
207: MAX TEMP[10].x, IMM[0].wwww, TEMP[10].xxxx
208: POW TEMP[10].x, TEMP[10].xxxx, TEMP[13].xxxx
209: ADD TEMP[12].x, TEMP[13].xxxx, IMM[0].zzzz
210: MUL TEMP[12].x, TEMP[12].xxxx, CONST[18].yyyy
211: MUL TEMP[10].x, TEMP[10].xxxx, TEMP[12].xxxx
212: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[10].xxxx
213: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[6].xxxx
214: MUL TEMP[1].x, TEMP[1].xxxx, CONST[18].xxxx
215: MAX TEMP[1].x, IMM[0].wwww, TEMP[1].xxxx
216: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[9].xyzz
217: ADD TEMP[10].xyz, IMM[0].zzzz, -TEMP[3].xyzz
218: MUL TEMP[12].x, TEMP[11].xxxx, TEMP[11].xxxx
219: MUL TEMP[13].x, TEMP[11].xxxx, TEMP[11].xxxx
220: MUL TEMP[11].x, TEMP[13].xxxx, TEMP[11].xxxx
221: MUL TEMP[11].x, TEMP[12].xxxx, TEMP[11].xxxx
222: MAD TEMP[3].xyz, TEMP[10].xyzz, TEMP[11].xxxx, TEMP[3].xyzz
223: ADD TEMP[10].x, TEMP[5].xxxx, IMM[0].yyyy
224: MUL TEMP[11].x, TEMP[14].xxxx, TEMP[14].xxxx
225: MUL TEMP[12].x, TEMP[14].xxxx, TEMP[14].xxxx
226: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[14].xxxx
227: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
228: MAD TEMP[10].x, TEMP[10].xxxx, TEMP[11].xxxx, IMM[0].zzzz
229: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy
230: MUL TEMP[11].x, TEMP[15].xxxx, TEMP[15].xxxx
231: MUL TEMP[12].x, TEMP[15].xxxx, TEMP[15].xxxx
232: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[15].xxxx
233: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[12].xxxx
234: MAD TEMP[5].x, TEMP[5].xxxx, TEMP[11].xxxx, IMM[0].zzzz
235: MUL TEMP[5].x, TEMP[10].xxxx, TEMP[5].xxxx
236: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[6].xxxx
237: MAD TEMP[5].xyz, TEMP[9].xyzz, TEMP[5].xxxx, TEMP[8].xyzz
238: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[5].xyzz
239: MAD TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].xyzz, TEMP[2].xyzz
240: MAD TEMP[0].xyz, TEMP[7].xyzz, TEMP[4].xyzz, TEMP[1].xyzz
241: MOV TEMP[0].xyz, TEMP[0].xyzx
242: MAD TEMP[1].x, IN[6].xxxx, CONST[5].zzzz, CONST[5].wwww
243: MOV_SAT TEMP[1].x, TEMP[1].xxxx
244: LRP TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[4].xyzz
245: MOV TEMP[0].xyz, TEMP[0].xyzx
246: MOV TEMP[0].w, IMM[0].zzzz
247: MOV OUT[0], TEMP[0]
248: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 152)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216)
%60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224)
%61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228)
%62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 232)
%63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236)
%64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240)
%65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244)
%66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260)
%68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 264)
%69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268)
%70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276)
%72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 280)
%73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292)
%75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 300)
%76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308)
%78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 312)
%79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%83 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%84 = load <32 x i8>, <32 x i8> addrspace(2)* %83, align 32, !tbaa !0
%85 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%86 = load <16 x i8>, <16 x i8> addrspace(2)* %85, align 16, !tbaa !0
%87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%88 = bitcast <8 x i32> addrspace(2)* %87 to <32 x i8> addrspace(2)*
%89 = load <32 x i8>, <32 x i8> addrspace(2)* %88, align 32, !tbaa !0
%90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%91 = bitcast <4 x i32> addrspace(2)* %90 to <16 x i8> addrspace(2)*
%92 = load <16 x i8>, <16 x i8> addrspace(2)* %91, align 16, !tbaa !0
%93 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%94 = bitcast <8 x i32> addrspace(2)* %93 to <32 x i8> addrspace(2)*
%95 = load <32 x i8>, <32 x i8> addrspace(2)* %94, align 32, !tbaa !0
%96 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%97 = bitcast <4 x i32> addrspace(2)* %96 to <16 x i8> addrspace(2)*
%98 = load <16 x i8>, <16 x i8> addrspace(2)* %97, align 16, !tbaa !0
%99 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%100 = bitcast <8 x i32> addrspace(2)* %99 to <32 x i8> addrspace(2)*
%101 = load <32 x i8>, <32 x i8> addrspace(2)* %100, align 32, !tbaa !0
%102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%103 = bitcast <4 x i32> addrspace(2)* %102 to <16 x i8> addrspace(2)*
%104 = load <16 x i8>, <16 x i8> addrspace(2)* %103, align 16, !tbaa !0
%105 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%106 = bitcast <8 x i32> addrspace(2)* %105 to <32 x i8> addrspace(2)*
%107 = load <32 x i8>, <32 x i8> addrspace(2)* %106, align 32, !tbaa !0
%108 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%109 = bitcast <4 x i32> addrspace(2)* %108 to <16 x i8> addrspace(2)*
%110 = load <16 x i8>, <16 x i8> addrspace(2)* %109, align 16, !tbaa !0
%111 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%112 = bitcast <8 x i32> addrspace(2)* %111 to <32 x i8> addrspace(2)*
%113 = load <32 x i8>, <32 x i8> addrspace(2)* %112, align 32, !tbaa !0
%114 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%115 = bitcast <4 x i32> addrspace(2)* %114 to <16 x i8> addrspace(2)*
%116 = load <16 x i8>, <16 x i8> addrspace(2)* %115, align 16, !tbaa !0
%117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%121 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%122 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%123 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%124 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%125 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%126 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%127 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%128 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%129 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%130 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%131 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%132 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%133 = call float @llvm.SI.fs.interp(i32 3, i32 5, i32 %5, <2 x i32> %7)
%134 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7)
%135 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7)
%136 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7)
%137 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7)
%138 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7)
%139 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7)
%140 = call float @llvm.SI.fs.interp(i32 2, i32 7, i32 %5, <2 x i32> %7)
%141 = bitcast float %117 to i32
%142 = bitcast float %118 to i32
%143 = insertelement <2 x i32> undef, i32 %141, i32 0
%144 = insertelement <2 x i32> %143, i32 %142, i32 1
%145 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %144, <32 x i8> %101, <16 x i8> %104, i32 2)
%146 = extractelement <4 x float> %145, i32 1
%147 = extractelement <4 x float> %145, i32 3
%148 = fmul float %147, 2.000000e+00
%149 = fadd float %148, -1.000000e+00
%150 = fmul float %146, 2.000000e+00
%151 = fadd float %150, -1.000000e+00
%152 = fmul float %149, %79
%153 = fmul float %151, %79
%154 = fmul float %152, %152
%155 = fmul float %153, %153
%156 = fadd float %154, %155
%157 = call float @llvm.AMDIL.clamp.(float %156, float 0.000000e+00, float 1.000000e+00)
%158 = fsub float 1.000000e+00, %157
%159 = call float @llvm.sqrt.f32(float %158)
%160 = fmul float %152, %119
%161 = fmul float %153, %122
%162 = fadd float %161, %160
%163 = fmul float %159, %125
%164 = fadd float %162, %163
%165 = fmul float %152, %120
%166 = fmul float %153, %123
%167 = fadd float %166, %165
%168 = fmul float %159, %126
%169 = fadd float %167, %168
%170 = fmul float %152, %121
%171 = fmul float %153, %124
%172 = fadd float %171, %170
%173 = fmul float %159, %127
%174 = fadd float %172, %173
%175 = fmul float %164, %164
%176 = fmul float %169, %169
%177 = fadd float %176, %175
%178 = fmul float %174, %174
%179 = fadd float %177, %178
%180 = call float @llvm.AMDGPU.rsq.clamped.f32(float %179)
%181 = fmul float %164, %180
%182 = fmul float %169, %180
%183 = fmul float %174, %180
%184 = fmul float %135, %135
%185 = fmul float %136, %136
%186 = fadd float %185, %184
%187 = fmul float %137, %137
%188 = fadd float %186, %187
%189 = call float @llvm.AMDGPU.rsq.clamped.f32(float %188)
%190 = fmul float %135, %189
%191 = fmul float %136, %189
%192 = fmul float %137, %189
%193 = bitcast float %117 to i32
%194 = bitcast float %118 to i32
%195 = insertelement <2 x i32> undef, i32 %193, i32 0
%196 = insertelement <2 x i32> %195, i32 %194, i32 1
%197 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %196, <32 x i8> %95, <16 x i8> %98, i32 2)
%198 = extractelement <4 x float> %197, i32 0
%199 = extractelement <4 x float> %197, i32 1
%200 = extractelement <4 x float> %197, i32 2
%201 = fmul float %76, %198
%202 = fmul float %77, %199
%203 = fmul float %78, %200
%204 = call float @llvm.AMDGPU.lrp(float %80, float %201, float %66)
%205 = call float @llvm.AMDGPU.lrp(float %80, float %202, float %67)
%206 = call float @llvm.AMDGPU.lrp(float %80, float %203, float %68)
%207 = fmul float %80, %69
%208 = fsub float %69, %207
%209 = fmul float %201, %208
%210 = fmul float %202, %208
%211 = fmul float %203, %208
%212 = bitcast float %117 to i32
%213 = bitcast float %118 to i32
%214 = insertelement <2 x i32> undef, i32 %212, i32 0
%215 = insertelement <2 x i32> %214, i32 %213, i32 1
%216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %215, <32 x i8> %107, <16 x i8> %110, i32 2)
%217 = extractelement <4 x float> %216, i32 1
%218 = fsub float 1.000000e+00, %82
%219 = fmul float %217, %82
%220 = fadd float %219, %218
%221 = fmul float %181, %24
%222 = fmul float %182, %25
%223 = fadd float %222, %221
%224 = fmul float %183, %26
%225 = fadd float %223, %224
%226 = call float @llvm.maxnum.f32(float %225, float 0.000000e+00)
%227 = fmul float %27, %181
%228 = fmul float %28, %182
%229 = fadd float %227, %228
%230 = fmul float %29, %183
%231 = fadd float %229, %230
%232 = fadd float %231, %30
%233 = fmul float %31, %181
%234 = fmul float %32, %182
%235 = fadd float %233, %234
%236 = fmul float %33, %183
%237 = fadd float %235, %236
%238 = fadd float %237, %34
%239 = fmul float %35, %181
%240 = fmul float %36, %182
%241 = fadd float %239, %240
%242 = fmul float %37, %183
%243 = fadd float %241, %242
%244 = fadd float %243, %38
%245 = fadd float %128, %232
%246 = fadd float %129, %238
%247 = fadd float %130, %244
%248 = fdiv float %131, %133
%249 = fdiv float %132, %133
%250 = bitcast float %248 to i32
%251 = bitcast float %249 to i32
%252 = insertelement <2 x i32> undef, i32 %250, i32 0
%253 = insertelement <2 x i32> %252, i32 %251, i32 1
%254 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %253, <32 x i8> %113, <16 x i8> %116, i32 2)
%255 = extractelement <4 x float> %254, i32 0
%256 = fmul float %70, %255
%257 = fmul float %71, %255
%258 = fmul float %72, %255
%259 = fmul float %245, %220
%260 = fmul float %246, %220
%261 = fmul float %247, %220
%262 = fmul float %181, %190
%263 = fmul float %182, %191
%264 = fadd float %263, %262
%265 = fmul float %183, %192
%266 = fadd float %264, %265
%267 = fmul float %266, %181
%268 = fmul float %266, %182
%269 = fmul float %266, %183
%270 = fmul float %267, 2.000000e+00
%271 = fmul float %268, 2.000000e+00
%272 = fmul float %269, 2.000000e+00
%273 = fsub float %190, %270
%274 = fsub float %191, %271
%275 = fsub float %192, %272
%276 = fcmp ogt float %51, 0.000000e+00
br i1 %276, label %IF, label %ENDIF
IF: ; preds = %main_body
%277 = call float @llvm.SI.load.const(<16 x i8> %23, i32 168)
%278 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164)
%279 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160)
%280 = fmul float %273, %273
%281 = fmul float %274, %274
%282 = fadd float %281, %280
%283 = fmul float %275, %275
%284 = fadd float %282, %283
%285 = call float @llvm.AMDGPU.rsq.clamped.f32(float %284)
%286 = fmul float %273, %285
%287 = fmul float %274, %285
%288 = fmul float %275, %285
%289 = fsub float %44, %138
%290 = fsub float %45, %139
%291 = fsub float %46, %140
%292 = fdiv float 1.000000e+00, %286
%293 = fdiv float 1.000000e+00, %287
%294 = fdiv float 1.000000e+00, %288
%295 = fmul float %289, %292
%296 = fmul float %290, %293
%297 = fmul float %291, %294
%298 = fsub float %47, %138
%299 = fsub float %48, %139
%300 = fsub float %49, %140
%301 = fdiv float 1.000000e+00, %286
%302 = fdiv float 1.000000e+00, %287
%303 = fdiv float 1.000000e+00, %288
%304 = fmul float %298, %301
%305 = fmul float %299, %302
%306 = fmul float %300, %303
%307 = fcmp ogt float %286, 0.000000e+00
%308 = fcmp ogt float %287, 0.000000e+00
%309 = fcmp ogt float %288, 0.000000e+00
%. = select i1 %307, float %295, float %304
%temp68.0 = select i1 %308, float %296, float %305
%.100 = select i1 %309, float %297, float %306
%310 = fadd float %44, %47
%311 = fadd float %45, %48
%312 = fadd float %46, %49
%313 = fmul float %310, 5.000000e-01
%314 = fmul float %311, 5.000000e-01
%315 = fmul float %312, 5.000000e-01
%316 = call float @llvm.minnum.f32(float %., float %temp68.0)
%317 = call float @llvm.minnum.f32(float %316, float %.100)
%318 = fsub float %313, %279
%319 = fsub float %314, %278
%320 = fsub float %315, %277
%321 = fadd float %318, %138
%322 = fadd float %319, %139
%323 = fadd float %320, %140
%324 = fmul float %286, %317
%325 = fadd float %324, %321
%326 = fmul float %287, %317
%327 = fadd float %326, %322
%328 = fmul float %288, %317
%329 = fadd float %328, %323
%330 = fsub float %325, %313
%331 = fsub float %327, %314
%332 = fsub float %329, %315
br label %ENDIF
ENDIF: ; preds = %main_body, %IF
%temp44.0 = phi float [ %330, %IF ], [ %273, %main_body ]
%temp45.0 = phi float [ %331, %IF ], [ %274, %main_body ]
%temp46.0 = phi float [ %332, %IF ], [ %275, %main_body ]
%333 = fsub float 1.000000e+00, %81
%334 = call float @llvm.pow.f32(float %333, float 7.500000e-01)
%335 = fmul float %334, 7.000000e+00
%336 = insertelement <4 x float> undef, float %temp44.0, i32 0
%337 = insertelement <4 x float> %336, float %temp45.0, i32 1
%338 = insertelement <4 x float> %337, float %temp46.0, i32 2
%339 = insertelement <4 x float> %338, float %335, i32 3
%340 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %339)
%341 = extractelement <4 x float> %340, i32 0
%342 = extractelement <4 x float> %340, i32 1
%343 = extractelement <4 x float> %340, i32 2
%344 = extractelement <4 x float> %340, i32 3
%345 = call float @llvm.fabs.f32(float %343)
%346 = fdiv float 1.000000e+00, %345
%347 = fmul float %341, %346
%348 = fadd float %347, 1.500000e+00
%349 = fmul float %342, %346
%350 = fadd float %349, 1.500000e+00
%351 = bitcast float %350 to i32
%352 = bitcast float %348 to i32
%353 = bitcast float %344 to i32
%354 = bitcast float %335 to i32
%355 = insertelement <4 x i32> undef, i32 %351, i32 0
%356 = insertelement <4 x i32> %355, i32 %352, i32 1
%357 = insertelement <4 x i32> %356, i32 %353, i32 2
%358 = insertelement <4 x i32> %357, i32 %354, i32 3
%359 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %358, <32 x i8> %84, <16 x i8> %86, i32 4)
%360 = extractelement <4 x float> %359, i32 0
%361 = extractelement <4 x float> %359, i32 1
%362 = extractelement <4 x float> %359, i32 2
%363 = extractelement <4 x float> %359, i32 3
%364 = call float @llvm.pow.f32(float %363, float %53)
%365 = fmul float %52, %364
%366 = fmul float %365, %360
%367 = fmul float %365, %361
%368 = fmul float %365, %362
%369 = fcmp olt float %50, 0x3FEFFFEB00000000
br i1 %369, label %IF86, label %ENDIF85
IF86: ; preds = %ENDIF
%370 = fcmp ogt float %63, 0.000000e+00
br i1 %370, label %IF89, label %ENDIF88
ENDIF85: ; preds = %ENDIF, %ENDIF88
%temp28.0 = phi float [ %598, %ENDIF88 ], [ %366, %ENDIF ]
%temp29.0 = phi float [ %599, %ENDIF88 ], [ %367, %ENDIF ]
%temp30.0 = phi float [ %600, %ENDIF88 ], [ %368, %ENDIF ]
%371 = fmul float %temp28.0, %220
%372 = fmul float %temp29.0, %220
%373 = fmul float %temp30.0, %220
%374 = fsub float 1.000000e+00, %81
%375 = fsub float %24, %190
%376 = fsub float %25, %191
%377 = fsub float %26, %192
%378 = fmul float %375, %375
%379 = fmul float %376, %376
%380 = fadd float %379, %378
%381 = fmul float %377, %377
%382 = fadd float %380, %381
%383 = call float @llvm.AMDGPU.rsq.clamped.f32(float %382)
%384 = fmul float %375, %383
%385 = fmul float %376, %383
%386 = fmul float %377, %383
%387 = fmul float %190, %181
%388 = fsub float -0.000000e+00, %387
%389 = fmul float %191, %182
%390 = fsub float %388, %389
%391 = fmul float %192, %183
%392 = fsub float %390, %391
%393 = call float @llvm.maxnum.f32(float %392, float 0.000000e+00)
%394 = fmul float %24, %384
%395 = fmul float %25, %385
%396 = fadd float %395, %394
%397 = fmul float %26, %386
%398 = fadd float %396, %397
%399 = call float @llvm.maxnum.f32(float %398, float 0.000000e+00)
%400 = fmul float %374, %374
%401 = fmul float %400, %75
%402 = fsub float 1.000000e+00, %374
%403 = fmul float %402, 0x3FEEF9DB20000000
%404 = fadd float %403, 0x3F9EB851E0000000
%405 = call float @llvm.log2.f32(float %404)
%406 = fdiv float 1.000000e+00, %405
%407 = fmul float %406, 1.000000e+01
%408 = fmul float %407, %407
%409 = fsub float 1.000000e+00, %226
%410 = fsub float 1.000000e+00, %393
%411 = fmul float %399, 2.000000e+00
%412 = fmul float %399, %374
%413 = fmul float %411, %412
%414 = fadd float %413, 5.000000e-01
%415 = fsub float 1.000000e+00, %399
%416 = fsub float 1.000000e+00, %393
%417 = fsub float 1.000000e+00, %208
%418 = fadd float %81, %417
%419 = call float @llvm.AMDIL.clamp.(float %418, float 0.000000e+00, float 1.000000e+00)
%420 = fmul float %416, %416
%421 = fmul float %416, %416
%422 = fmul float %421, %416
%423 = fmul float %420, %422
%424 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %204)
%425 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %205)
%426 = call float @llvm.AMDGPU.lrp(float %423, float %419, float %206)
%427 = call float @llvm.AMDGPU.lrp(float %226, float 1.000000e+00, float %401)
%428 = call float @llvm.AMDGPU.lrp(float %393, float 1.000000e+00, float %401)
%429 = fmul float %427, %428
%430 = fadd float %429, 0x3F1A36E2E0000000
%431 = fdiv float 1.000000e+00, %430
%432 = fmul float %181, %384
%433 = fmul float %182, %385
%434 = fadd float %433, %432
%435 = fmul float %183, %386
%436 = fadd float %434, %435
%437 = call float @llvm.maxnum.f32(float %436, float 0.000000e+00)
%438 = call float @llvm.pow.f32(float %437, float %408)
%439 = fadd float %408, 1.000000e+00
%440 = fmul float %439, %74
%441 = fmul float %438, %440
%442 = fmul float %431, %441
%443 = fmul float %442, %226
%444 = fmul float %443, %73
%445 = call float @llvm.maxnum.f32(float %444, float 0.000000e+00)
%446 = fmul float %445, %256
%447 = fmul float %445, %257
%448 = fmul float %445, %258
%449 = fsub float 1.000000e+00, %204
%450 = fsub float 1.000000e+00, %205
%451 = fsub float 1.000000e+00, %206
%452 = fmul float %415, %415
%453 = fmul float %415, %415
%454 = fmul float %453, %415
%455 = fmul float %452, %454
%456 = fmul float %449, %455
%457 = fadd float %456, %204
%458 = fmul float %450, %455
%459 = fadd float %458, %205
%460 = fmul float %451, %455
%461 = fadd float %460, %206
%462 = fadd float %414, -1.000000e+00
%463 = fmul float %409, %409
%464 = fmul float %409, %409
%465 = fmul float %464, %409
%466 = fmul float %463, %465
%467 = fmul float %462, %466
%468 = fadd float %467, 1.000000e+00
%469 = fadd float %414, -1.000000e+00
%470 = fmul float %410, %410
%471 = fmul float %410, %410
%472 = fmul float %471, %410
%473 = fmul float %470, %472
%474 = fmul float %469, %473
%475 = fadd float %474, 1.000000e+00
%476 = fmul float %468, %475
%477 = fmul float %476, %226
%478 = fmul float %256, %477
%479 = fadd float %478, %259
%480 = fmul float %257, %477
%481 = fadd float %480, %260
%482 = fmul float %258, %477
%483 = fadd float %482, %261
%484 = fmul float %209, %479
%485 = fmul float %210, %481
%486 = fmul float %211, %483
%487 = fmul float %446, %457
%488 = fadd float %487, %484
%489 = fmul float %447, %459
%490 = fadd float %489, %485
%491 = fmul float %448, %461
%492 = fadd float %491, %486
%493 = fmul float %371, %424
%494 = fadd float %493, %488
%495 = fmul float %372, %425
%496 = fadd float %495, %490
%497 = fmul float %373, %426
%498 = fadd float %497, %492
%499 = fmul float %134, %42
%500 = fadd float %499, %43
%501 = call float @llvm.AMDIL.clamp.(float %500, float 0.000000e+00, float 1.000000e+00)
%502 = call float @llvm.AMDGPU.lrp(float %501, float %494, float %39)
%503 = call float @llvm.AMDGPU.lrp(float %501, float %496, float %40)
%504 = call float @llvm.AMDGPU.lrp(float %501, float %498, float %41)
%505 = call i32 @llvm.SI.packf16(float %502, float %503)
%506 = bitcast i32 %505 to float
%507 = call i32 @llvm.SI.packf16(float %504, float 1.000000e+00)
%508 = bitcast i32 %507 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %506, float %508, float %506, float %508)
ret void
IF89: ; preds = %IF86
%509 = fmul float %273, %273
%510 = fmul float %274, %274
%511 = fadd float %510, %509
%512 = fmul float %275, %275
%513 = fadd float %511, %512
%514 = call float @llvm.AMDGPU.rsq.clamped.f32(float %513)
%515 = fmul float %273, %514
%516 = fmul float %274, %514
%517 = fmul float %275, %514
%518 = fsub float %54, %138
%519 = fsub float %55, %139
%520 = fsub float %56, %140
%521 = fdiv float 1.000000e+00, %515
%522 = fdiv float 1.000000e+00, %516
%523 = fdiv float 1.000000e+00, %517
%524 = fmul float %518, %521
%525 = fmul float %519, %522
%526 = fmul float %520, %523
%527 = fsub float %57, %138
%528 = fsub float %58, %139
%529 = fsub float %59, %140
%530 = fdiv float 1.000000e+00, %515
%531 = fdiv float 1.000000e+00, %516
%532 = fdiv float 1.000000e+00, %517
%533 = fmul float %527, %530
%534 = fmul float %528, %531
%535 = fmul float %529, %532
%536 = fcmp ogt float %515, 0.000000e+00
%537 = fcmp ogt float %516, 0.000000e+00
%538 = fcmp ogt float %517, 0.000000e+00
%.101 = select i1 %536, float %524, float %533
%temp68.1 = select i1 %537, float %525, float %534
%.102 = select i1 %538, float %526, float %535
%539 = fadd float %54, %57
%540 = fadd float %55, %58
%541 = fadd float %56, %59
%542 = fmul float %539, 5.000000e-01
%543 = fmul float %540, 5.000000e-01
%544 = fmul float %541, 5.000000e-01
%545 = call float @llvm.minnum.f32(float %.101, float %temp68.1)
%546 = call float @llvm.minnum.f32(float %545, float %.102)
%547 = fsub float %542, %60
%548 = fsub float %543, %61
%549 = fsub float %544, %62
%550 = fadd float %547, %138
%551 = fadd float %548, %139
%552 = fadd float %549, %140
%553 = fmul float %515, %546
%554 = fadd float %553, %550
%555 = fmul float %516, %546
%556 = fadd float %555, %551
%557 = fmul float %517, %546
%558 = fadd float %557, %552
%559 = fsub float %554, %542
%560 = fsub float %556, %543
%561 = fsub float %558, %544
br label %ENDIF88
ENDIF88: ; preds = %IF86, %IF89
%temp48.0 = phi float [ %559, %IF89 ], [ %273, %IF86 ]
%temp49.0 = phi float [ %560, %IF89 ], [ %274, %IF86 ]
%temp50.0 = phi float [ %561, %IF89 ], [ %275, %IF86 ]
%562 = fsub float 1.000000e+00, %81
%563 = call float @llvm.pow.f32(float %562, float 7.500000e-01)
%564 = fmul float %563, 7.000000e+00
%565 = insertelement <4 x float> undef, float %temp48.0, i32 0
%566 = insertelement <4 x float> %565, float %temp49.0, i32 1
%567 = insertelement <4 x float> %566, float %temp50.0, i32 2
%568 = insertelement <4 x float> %567, float %564, i32 3
%569 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %568)
%570 = extractelement <4 x float> %569, i32 0
%571 = extractelement <4 x float> %569, i32 1
%572 = extractelement <4 x float> %569, i32 2
%573 = extractelement <4 x float> %569, i32 3
%574 = call float @llvm.fabs.f32(float %572)
%575 = fdiv float 1.000000e+00, %574
%576 = fmul float %570, %575
%577 = fadd float %576, 1.500000e+00
%578 = fmul float %571, %575
%579 = fadd float %578, 1.500000e+00
%580 = bitcast float %579 to i32
%581 = bitcast float %577 to i32
%582 = bitcast float %573 to i32
%583 = bitcast float %564 to i32
%584 = insertelement <4 x i32> undef, i32 %580, i32 0
%585 = insertelement <4 x i32> %584, i32 %581, i32 1
%586 = insertelement <4 x i32> %585, i32 %582, i32 2
%587 = insertelement <4 x i32> %586, i32 %583, i32 3
%588 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %587, <32 x i8> %89, <16 x i8> %92, i32 4)
%589 = extractelement <4 x float> %588, i32 0
%590 = extractelement <4 x float> %588, i32 1
%591 = extractelement <4 x float> %588, i32 2
%592 = extractelement <4 x float> %588, i32 3
%593 = call float @llvm.pow.f32(float %592, float %65)
%594 = fmul float %64, %593
%595 = fmul float %594, %589
%596 = fmul float %594, %590
%597 = fmul float %594, %591
%598 = call float @llvm.AMDGPU.lrp(float %50, float %366, float %595)
%599 = call float @llvm.AMDGPU.lrp(float %50, float %367, float %596)
%600 = call float @llvm.AMDGPU.lrp(float %50, float %368, float %597)
br label %ENDIF85
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000
v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001
v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100
v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s2, s[12:15], 0x58 ; C2010D58
s_buffer_load_dword s1, s[12:15], 0x5c ; C2008D5C
s_buffer_load_dword s0, s[12:15], 0x60 ; C2000D60
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 0, 2, [m0] ; C8240800
v_interp_p2_f32 v9, [v9], v1, 0, 2, [m0] ; C8250801
v_interp_p1_f32 v10, v0, 1, 2, [m0] ; C8280900
v_interp_p2_f32 v10, [v10], v1, 1, 2, [m0] ; C8290901
v_interp_p1_f32 v11, v0, 2, 2, [m0] ; C82C0A00
v_interp_p2_f32 v11, [v11], v1, 2, 2, [m0] ; C82D0A01
v_interp_p1_f32 v13, v0, 0, 3, [m0] ; C8340C00
v_interp_p2_f32 v13, [v13], v1, 0, 3, [m0] ; C8350C01
v_interp_p1_f32 v18, v0, 1, 3, [m0] ; C8480D00
v_interp_p2_f32 v18, [v18], v1, 1, 3, [m0] ; C8490D01
v_interp_p1_f32 v19, v0, 2, 3, [m0] ; C84C0E00
v_interp_p2_f32 v19, [v19], v1, 2, 3, [m0] ; C84D0E01
v_interp_p1_f32 v2, v0, 0, 4, [m0] ; C8081000
v_interp_p2_f32 v2, [v2], v1, 0, 4, [m0] ; C8091001
v_interp_p1_f32 v3, v0, 1, 4, [m0] ; C80C1100
v_interp_p2_f32 v3, [v3], v1, 1, 4, [m0] ; C80D1101
v_interp_p1_f32 v5, v0, 2, 4, [m0] ; C8141200
v_interp_p2_f32 v5, [v5], v1, 2, 4, [m0] ; C8151201
v_interp_p1_f32 v20, v0, 0, 5, [m0] ; C8501400
v_interp_p2_f32 v20, [v20], v1, 0, 5, [m0] ; C8511401
v_interp_p1_f32 v21, v0, 1, 5, [m0] ; C8541500
v_interp_p2_f32 v21, [v21], v1, 1, 5, [m0] ; C8551501
v_interp_p1_f32 v22, v0, 3, 5, [m0] ; C8581700
v_interp_p2_f32 v22, [v22], v1, 3, 5, [m0] ; C8591701
v_interp_p1_f32 v4, v0, 0, 6, [m0] ; C8101800
v_interp_p2_f32 v4, [v4], v1, 0, 6, [m0] ; C8111801
v_interp_p1_f32 v23, v0, 1, 6, [m0] ; C85C1900
v_interp_p2_f32 v23, [v23], v1, 1, 6, [m0] ; C85D1901
v_interp_p1_f32 v24, v0, 2, 6, [m0] ; C8601A00
v_interp_p2_f32 v24, [v24], v1, 2, 6, [m0] ; C8611A01
s_load_dwordx4 s[8:11], s[4:5], 0xc ; C084050C
s_load_dwordx4 s[16:19], s[4:5], 0x10 ; C0880510
s_load_dwordx4 s[20:23], s[4:5], 0x14 ; C08A0514
s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710
v_interp_p1_f32 v25, v0, 3, 6, [m0] ; C8641B00
v_interp_p2_f32 v25, [v25], v1, 3, 6, [m0] ; C8651B01
v_interp_p1_f32 v14, v0, 0, 7, [m0] ; C8381C00
v_interp_p2_f32 v14, [v14], v1, 0, 7, [m0] ; C8391C01
v_interp_p1_f32 v12, v0, 1, 7, [m0] ; C8301D00
v_interp_p2_f32 v12, [v12], v1, 1, 7, [m0] ; C8311D01
v_interp_p1_f32 v15, v0, 2, 7, [m0] ; C83C1E00
v_interp_p2_f32 v15, [v15], v1, 2, 7, [m0] ; C83D1E01
s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718
s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720
s_load_dwordx8 s[32:39], s[6:7], 0x28 ; C0D00728
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[0:1], 10, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[52:59], s[8:11] ; F0800A00 004D0010
image_sample v[30:32], 7, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[44:51], s[40:43] ; F0800700 014B1E10
s_waitcnt vmcnt(1) ; BF8C0771
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4
v_mul_f32_e32 v1, s2, v1 ; 10020202
v_mul_f32_e32 v0, s2, v0 ; 10000002
v_mul_f32_e32 v6, v6, v1 ; 100C0306
v_mac_f32_e32 v6, v9, v0 ; 3E0C0109
v_mul_f32_e32 v7, v7, v1 ; 100E0307
v_mac_f32_e32 v7, v10, v0 ; 3E0E010A
v_mul_f32_e32 v10, v8, v1 ; 10140308
v_mac_f32_e32 v10, v11, v0 ; 3E14010B
v_mul_f32_e32 v0, v0, v0 ; 10000100
v_mac_f32_e32 v0, v1, v1 ; 3E000301
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_sub_f32_e32 v0, 1.0, v0 ; 080000F2
v_sqrt_f32_e32 v0, v0 ; 7E006700
v_mac_f32_e32 v6, v13, v0 ; 3E0C010D
v_mac_f32_e32 v7, v18, v0 ; 3E0E0112
v_mac_f32_e32 v10, v19, v0 ; 3E140113
v_mul_f32_e32 v0, v6, v6 ; 10000D06
v_mac_f32_e32 v0, v7, v7 ; 3E000F07
v_mac_f32_e32 v0, v10, v10 ; 3E00150A
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v1, v23, v23 ; 10022F17
v_mac_f32_e32 v1, v24, v24 ; 3E023118
v_mac_f32_e32 v1, v25, v25 ; 3E023319
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v9, v0, v6 ; 10120D00
v_mul_f32_e32 v8, v0, v7 ; 10100F00
v_mul_f32_e32 v7, v0, v10 ; 100E1500
v_mul_f32_e32 v11, v1, v23 ; 10162F01
v_mul_f32_e32 v10, v1, v24 ; 10143101
v_mul_f32_e32 v0, v11, v9 ; 1000130B
v_mac_f32_e32 v0, v10, v8 ; 3E00110A
v_mul_f32_e32 v13, v1, v25 ; 101A3301
v_mac_f32_e32 v0, v13, v7 ; 3E000F0D
v_mul_f32_e32 v6, v9, v0 ; 100C0109
v_mac_f32_e32 v6, v9, v0 ; 3E0C0109
v_mul_f32_e32 v18, v8, v0 ; 10240108
v_mac_f32_e32 v18, v8, v0 ; 3E240108
v_mad_f32 v27, v23, v1, -v6 ; D282001B 841A0317
v_mad_f32 v28, v24, v1, -v18 ; D282001C 844A0318
v_mul_f32_e32 v6, v7, v0 ; 100C0107
v_mac_f32_e32 v6, v7, v0 ; 3E0C0107
s_buffer_load_dword s2, s[12:15], 0x4c ; C2010D4C
s_buffer_load_dword s3, s[12:15], 0x4d ; C2018D4D
s_buffer_load_dword s8, s[12:15], 0x4e ; C2040D4E
v_mad_f32 v29, v25, v1, -v6 ; D282001D 841A0319
v_mov_b32_e32 v0, 0x6f800000 ; 7E0002FF 6F800000
v_cmp_gt_f32_e64 vcc, |v22|, v0 ; D008016A 00020116
v_mov_b32_e32 v0, 0x2f800000 ; 7E0002FF 2F800000
v_cndmask_b32_e32 v0, 1.0, v0 ; 000000F2
v_mul_f32_e32 v1, v0, v22 ; 10022D00
v_rcp_f32_e32 v1, v1 ; 7E025501
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v24, s2, v30 ; 10303C02
v_mul_f32_e32 v25, s3, v31 ; 10323E03
v_mul_f32_e32 v26, s8, v32 ; 10344008
v_mul_f32_e32 v6, v1, v20 ; 100C2901
v_mul_f32_e32 v1, v1, v21 ; 10022B01
s_buffer_load_dword s2, s[12:15], 0x40 ; C2010D40
s_buffer_load_dword s3, s[12:15], 0x41 ; C2018D41
s_buffer_load_dword s8, s[12:15], 0x42 ; C2040D42
v_mul_f32_e32 v18, v6, v0 ; 10240106
v_mul_f32_e32 v19, v1, v0 ; 10260101
s_buffer_load_dword s9, s[12:15], 0x27 ; C2048D27
s_buffer_load_dword s10, s[12:15], 0x2b ; C2050D2B
s_buffer_load_dword s40, s[12:15], 0x2c ; C2140D2C
s_buffer_load_dword s41, s[12:15], 0x2d ; C2148D2D
v_sub_f32_e64 v0, 1.0, s1 ; D2080000 000002F2
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v6, s2, v0 ; 100C0002
v_mul_f32_e32 v1, s3, v0 ; 10020003
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_mac_f32_e32 v6, s1, v24 ; 3E0C3001
v_mov_b32_e32 v30, v27 ; 7E3C031B
v_mac_f32_e32 v1, s1, v25 ; 3E023201
v_mov_b32_e32 v31, v28 ; 7E3E031C
v_mac_f32_e32 v0, s1, v26 ; 3E003401
v_mov_b32_e32 v32, v29 ; 7E40031D
v_cmp_lt_f32_e64 s[2:3], 0, s10 ; D0020002 00001480
image_sample v[20:23], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[24:31], s[16:19] ; F0800F00 00861410
image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[32:39], s[20:23] ; F0800F00 00A81012
s_waitcnt vmcnt(0) ; BF8C0770
s_and_saveexec_b64 s[18:19], s[2:3] ; BE922402
s_xor_b64 s[18:19], exec, s[18:19] ; 8992127E
s_cbranch_execz BB0_2 ; BF880000
s_buffer_load_dword s2, s[12:15], 0x20 ; C2010D20
s_buffer_load_dword s3, s[12:15], 0x21 ; C2018D21
s_buffer_load_dword s8, s[12:15], 0x22 ; C2040D22
s_buffer_load_dword s10, s[12:15], 0x24 ; C2050D24
s_buffer_load_dword s11, s[12:15], 0x25 ; C2058D25
v_mul_f32_e32 v17, v27, v27 ; 1022371B
v_mac_f32_e32 v17, v28, v28 ; 3E22391C
v_mac_f32_e32 v17, v29, v29 ; 3E223B1D
v_rsq_clamp_f32_e32 v17, v17 ; 7E225911
s_buffer_load_dword s16, s[12:15], 0x26 ; C2080D26
s_buffer_load_dword s17, s[12:15], 0x28 ; C2088D28
s_buffer_load_dword s20, s[12:15], 0x29 ; C20A0D29
s_buffer_load_dword s21, s[12:15], 0x2a ; C20A8D2A
v_mul_f32_e32 v18, v17, v27 ; 10243711
v_mul_f32_e32 v19, v17, v28 ; 10263911
v_mul_f32_e32 v17, v17, v29 ; 10223B11
v_rcp_f32_e32 v20, v18 ; 7E285512
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v22, s2, v14 ; 082C1C02
v_sub_f32_e32 v23, s3, v12 ; 082E1803
v_rcp_f32_e32 v30, v19 ; 7E3C5513
v_mul_f32_e32 v22, v20, v22 ; 102C2D14
v_sub_f32_e32 v31, s10, v14 ; 083E1C0A
v_mul_f32_e32 v20, v20, v31 ; 10283F14
v_cmp_lt_f32_e32 vcc, 0, v18 ; 7C022480
v_cndmask_b32_e32 v20, v20, v22 ; 00282D14
v_rcp_f32_e32 v22, v17 ; 7E2C5511
v_mul_f32_e32 v23, v30, v23 ; 102E2F1E
v_sub_f32_e32 v31, s11, v12 ; 083E180B
v_mul_f32_e32 v30, v30, v31 ; 103C3F1E
v_cmp_lt_f32_e32 vcc, 0, v19 ; 7C022680
v_cndmask_b32_e32 v23, v30, v23 ; 002E2F1E
v_sub_f32_e32 v30, s8, v15 ; 083C1E08
v_mul_f32_e32 v30, v22, v30 ; 103C3D16
v_sub_f32_e32 v31, s16, v15 ; 083E1E10
v_mul_f32_e32 v22, v22, v31 ; 102C3F16
v_cmp_lt_f32_e32 vcc, 0, v17 ; 7C022280
v_cndmask_b32_e32 v22, v22, v30 ; 002C3D16
v_min3_f32 v20, v20, v23, v22 ; D2A20014 045A2F14
v_mov_b32_e32 v22, s10 ; 7E2C020A
v_add_f32_e32 v22, s2, v22 ; 062C2C02
v_mov_b32_e32 v23, s11 ; 7E2E020B
v_add_f32_e32 v23, s3, v23 ; 062E2E03
v_mov_b32_e32 v30, s16 ; 7E3C0210
v_add_f32_e32 v32, s8, v30 ; 06403C08
v_mad_f32 v30, 0.5, v22, -s17 ; D282001E 80462CF0
v_add_f32_e32 v30, v14, v30 ; 063C3D0E
v_mac_f32_e32 v30, v20, v18 ; 3E3C2514
v_mad_f32 v18, 0.5, v23, -s20 ; D2820012 80522EF0
v_add_f32_e32 v18, v12, v18 ; 0624250C
v_mac_f32_e32 v18, v20, v19 ; 3E242714
v_mad_f32 v19, 0.5, v32, -s21 ; D2820013 805640F0
v_add_f32_e32 v19, v15, v19 ; 0626270F
v_mac_f32_e32 v19, v20, v17 ; 3E262314
v_mad_f32 v30, 0.5, -v22, v30 ; D282001E 447A2CF0
v_mad_f32 v31, 0.5, -v23, v18 ; D282001F 444A2EF0
v_mad_f32 v32, 0.5, -v32, v19 ; D2820020 444E40F0
s_or_b64 exec, exec, s[18:19] ; 88FE127E
s_buffer_load_dword s28, s[12:15], 0x17 ; C20E0D17
s_buffer_load_dword s29, s[12:15], 0x43 ; C20E8D43
s_buffer_load_dword s27, s[12:15], 0x44 ; C20D8D44
s_buffer_load_dword s20, s[12:15], 0x45 ; C20A0D45
s_buffer_load_dword s17, s[12:15], 0x46 ; C2088D46
s_buffer_load_dword s3, s[12:15], 0x0 ; C2018D00
s_buffer_load_dword s8, s[12:15], 0x1 ; C2040D01
s_buffer_load_dword s2, s[12:15], 0x2 ; C2010D02
s_buffer_load_dword s10, s[12:15], 0x4 ; C2050D04
s_buffer_load_dword s11, s[12:15], 0x5 ; C2058D05
s_buffer_load_dword s16, s[12:15], 0x6 ; C2080D06
s_buffer_load_dword s18, s[12:15], 0x7 ; C2090D07
s_buffer_load_dword s19, s[12:15], 0x8 ; C2098D08
s_buffer_load_dword s21, s[12:15], 0x9 ; C20A8D09
s_buffer_load_dword s22, s[12:15], 0xa ; C20B0D0A
s_buffer_load_dword s23, s[12:15], 0xb ; C20B8D0B
s_buffer_load_dword s24, s[12:15], 0xc ; C20C0D0C
s_buffer_load_dword s25, s[12:15], 0xd ; C20C8D0D
s_buffer_load_dword s26, s[12:15], 0xe ; C20D0D0E
v_sub_f32_e64 v17, 1.0, s0 ; D2080011 000000F2
v_log_f32_e32 v17, v17 ; 7E224F11
v_mul_legacy_f32_e32 v17, 0x3f400000, v17 ; 0E2222FF 3F400000
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v33, 0x40e00000, v17 ; 104222FF 40E00000
v_cubeid_f32 v20, v30, v31, v32 ; D2880014 04823F1E
v_cubema_f32 v19, v30, v31, v32 ; D28E0013 04823F1E
s_load_dwordx4 s[32:35], s[4:5], 0x0 ; C0900500
s_load_dwordx8 s[44:51], s[6:7], 0x0 ; C0D60700
v_cubesc_f32 v18, v30, v31, v32 ; D28A0012 04823F1E
v_cubetc_f32 v17, v30, v31, v32 ; D28C0011 04823F1E
v_rcp_f32_e64 v19, |v19| ; D3540113 00000113
v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000
v_mad_f32 v31, v19, v17, v30 ; D282001F 047A2313
v_mac_f32_e32 v30, v19, v18 ; 3E3C2513
v_mov_b32_e32 v32, v20 ; 7E400314
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[30:33], 15, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[44:51], s[32:35] ; F0900F00 010B1E1E
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v17, v33 ; 7E224F21
s_buffer_load_dword s30, s[12:15], 0xf ; C20F0D0F
s_buffer_load_dword s31, s[12:15], 0x68 ; C20F8D68
v_mul_legacy_f32_e32 v17, s41, v17 ; 0E222229
v_exp_f32_e32 v17, v17 ; 7E224B11
v_mul_f32_e32 v17, s40, v17 ; 10222228
v_mul_f32_e32 v19, v30, v17 ; 1026231E
v_mul_f32_e32 v18, v31, v17 ; 1024231F
v_mul_f32_e32 v17, v32, v17 ; 10222320
v_mov_b32_e32 v20, s1 ; 7E280201
v_mov_b32_e32 v22, 0x3f7fff58 ; 7E2C02FF 3F7FFF58
v_cmp_lt_f32_e32 vcc, s9, v22 ; 7C022C09
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[32:33], vcc ; BEA0246A
s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E
s_cbranch_execz BB0_6 ; BF880000
s_buffer_load_dword s35, s[12:15], 0x3b ; C2118D3B
s_buffer_load_dword s1, s[12:15], 0x3c ; C2008D3C
s_buffer_load_dword s34, s[12:15], 0x3d ; C2110D3D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_lt_f32_e64 s[36:37], 0, s35 ; D0020024 00004680
s_and_saveexec_b64 s[36:37], s[36:37] ; BEA42424
s_xor_b64 s[36:37], exec, s[36:37] ; 89A4247E
s_cbranch_execz BB0_7 ; BF880000
s_buffer_load_dword s35, s[12:15], 0x36 ; C2118D36
s_buffer_load_dword s38, s[12:15], 0x38 ; C2130D38
s_buffer_load_dword s39, s[12:15], 0x39 ; C2138D39
s_buffer_load_dword s40, s[12:15], 0x3a ; C2140D3A
s_buffer_load_dword s41, s[12:15], 0x30 ; C2148D30
s_buffer_load_dword s42, s[12:15], 0x31 ; C2150D31
s_buffer_load_dword s43, s[12:15], 0x32 ; C2158D32
s_buffer_load_dword s44, s[12:15], 0x34 ; C2160D34
s_buffer_load_dword s45, s[12:15], 0x35 ; C2168D35
v_mul_f32_e32 v22, v27, v27 ; 102C371B
v_mac_f32_e32 v22, v28, v28 ; 3E2C391C
v_mac_f32_e32 v22, v29, v29 ; 3E2C3B1D
v_rsq_clamp_f32_e32 v22, v22 ; 7E2C5916
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v23, s35, v15 ; 082E1E23
v_mov_b32_e32 v30, s35 ; 7E3C0223
v_sub_f32_e32 v31, s41, v14 ; 083E1C29
v_sub_f32_e32 v32, s42, v12 ; 0840182A
v_add_f32_e32 v30, s43, v30 ; 063C3C2B
v_sub_f32_e32 v33, s43, v15 ; 08421E2B
v_mad_f32 v34, 0.5, v30, -s40 ; D2820022 80A23CF0
v_add_f32_e32 v15, v15, v34 ; 061E450F
v_mul_f32_e32 v27, v22, v27 ; 10363716
v_mul_f32_e32 v28, v22, v28 ; 10383916
v_mul_f32_e32 v22, v22, v29 ; 102C3B16
v_rcp_f32_e32 v29, v27 ; 7E3A551B
v_rcp_f32_e32 v34, v28 ; 7E44551C
v_rcp_f32_e32 v35, v22 ; 7E465516
v_sub_f32_e32 v36, s44, v14 ; 08481C2C
v_mov_b32_e32 v37, s44 ; 7E4A022C
v_add_f32_e32 v37, s41, v37 ; 064A4A29
v_mul_f32_e32 v31, v29, v31 ; 103E3F1D
v_mul_f32_e32 v29, v29, v36 ; 103A491D
v_mul_f32_e32 v32, v34, v32 ; 10404122
v_mul_f32_e32 v33, v35, v33 ; 10424323
v_mul_f32_e32 v23, v35, v23 ; 102E2F23
v_mad_f32 v35, 0.5, v37, -s38 ; D2820023 809A4AF0
v_add_f32_e32 v14, v14, v35 ; 061C470E
v_sub_f32_e32 v35, s45, v12 ; 0846182D
v_mov_b32_e32 v36, s45 ; 7E48022D
v_mul_f32_e32 v34, v34, v35 ; 10444722
v_add_f32_e32 v35, s42, v36 ; 0646482A
v_cmp_lt_f32_e32 vcc, 0, v27 ; 7C023680
v_cndmask_b32_e32 v29, v29, v31 ; 003A3F1D
v_cmp_lt_f32_e32 vcc, 0, v28 ; 7C023880
v_cndmask_b32_e32 v31, v34, v32 ; 003E4122
v_cmp_lt_f32_e32 vcc, 0, v22 ; 7C022C80
v_cndmask_b32_e32 v23, v23, v33 ; 002E4317
v_min3_f32 v23, v29, v31, v23 ; D2A20017 045E3F1D
v_mad_f32 v29, 0.5, v35, -s39 ; D282001D 809E46F0
v_add_f32_e32 v12, v12, v29 ; 06183B0C
v_mac_f32_e32 v14, v23, v27 ; 3E1C3717
v_mac_f32_e32 v12, v23, v28 ; 3E183917
v_mac_f32_e32 v15, v23, v22 ; 3E1E2D17
v_mad_f32 v27, 0.5, -v37, v14 ; D282001B 443A4AF0
v_mad_f32 v28, 0.5, -v35, v12 ; D282001C 443246F0
v_mad_f32 v29, 0.5, -v30, v15 ; D282001D 443E3CF0
s_or_b64 exec, exec, s[36:37] ; 88FE247E
v_sub_f32_e64 v12, 1.0, s0 ; D208000C 000000F2
v_log_f32_e32 v12, v12 ; 7E184F0C
s_load_dwordx4 s[36:39], s[4:5], 0x4 ; C0920504
v_mul_legacy_f32_e32 v12, 0x3f400000, v12 ; 0E1818FF 3F400000
v_exp_f32_e32 v12, v12 ; 7E184B0C
v_mul_f32_e32 v30, 0x40e00000, v12 ; 103C18FF 40E00000
v_cubeid_f32 v34, v27, v28, v29 ; D2880022 0476391B
v_cubema_f32 v33, v27, v28, v29 ; D28E0021 0476391B
s_load_dwordx8 s[40:47], s[6:7], 0x8 ; C0D40708
v_cubesc_f32 v32, v27, v28, v29 ; D28A0020 0476391B
v_cubetc_f32 v31, v27, v28, v29 ; D28C001F 0476391B
v_rcp_f32_e64 v12, |v33| ; D354010C 00000121
v_mov_b32_e32 v27, 0x3fc00000 ; 7E3602FF 3FC00000
v_mad_f32 v28, v12, v31, v27 ; D282001C 046E3F0C
v_mac_f32_e32 v27, v12, v32 ; 3E36410C
v_mov_b32_e32 v29, v34 ; 7E3A0322
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[40:47], s[36:39] ; F0900F00 012A1B1B
s_waitcnt vmcnt(0) ; BF8C0770
v_log_f32_e32 v12, v30 ; 7E184F1E
v_sub_f32_e64 v14, 1.0, s9 ; D208000E 000012F2
v_mul_legacy_f32_e32 v12, s34, v12 ; 0E181822
v_exp_f32_e32 v12, v12 ; 7E184B0C
v_mul_f32_e32 v12, s1, v12 ; 10181801
v_mul_f32_e32 v15, v27, v12 ; 101E191B
v_mul_f32_e32 v22, v28, v12 ; 102C191C
v_mul_f32_e32 v12, v29, v12 ; 1018191D
v_mul_f32_e32 v15, v15, v14 ; 101E1D0F
v_mul_f32_e32 v22, v22, v14 ; 102C1D16
v_mul_f32_e32 v12, v12, v14 ; 10181D0C
v_mac_f32_e32 v15, s9, v19 ; 3E1E2609
v_mac_f32_e32 v22, s9, v18 ; 3E2C2409
v_mac_f32_e32 v12, s9, v17 ; 3E182209
v_mov_b32_e32 v17, v12 ; 7E22030C
v_mov_b32_e32 v18, v22 ; 7E240316
v_mov_b32_e32 v19, v15 ; 7E26030F
s_or_b64 exec, exec, s[32:33] ; 88FE207E
v_mad_f32 v27, -v20, s29, s29 ; D282001B 20743B14
v_mov_b32_e32 v12, s28 ; 7E18021C
v_mul_f32_e32 v20, v27, v24 ; 1028311B
v_mul_f32_e32 v15, v27, v25 ; 101E331B
v_mul_f32_e32 v14, v27, v26 ; 101C351B
v_mul_f32_e32 v22, s27, v16 ; 102C201B
v_sub_f32_e64 v24, 1.0, s31 ; D2080018 00003EF2
v_mac_f32_e32 v24, s31, v21 ; 3E302A1F
v_mul_f32_e32 v21, s20, v16 ; 102A2014
v_mul_f32_e32 v16, s17, v16 ; 10202011
s_buffer_load_dword s5, s[12:15], 0x10 ; C2028D10
s_buffer_load_dword s4, s[12:15], 0x11 ; C2020D11
s_buffer_load_dword s1, s[12:15], 0x12 ; C2008D12
s_buffer_load_dword s17, s[12:15], 0x16 ; C2088D16
s_buffer_load_dword s6, s[12:15], 0x48 ; C2030D48
s_buffer_load_dword s7, s[12:15], 0x49 ; C2038D49
s_buffer_load_dword s9, s[12:15], 0x4b ; C2048D4B
v_mul_f32_e32 v23, s11, v8 ; 102E100B
v_mac_f32_e32 v23, s10, v9 ; 3E2E120A
v_mac_f32_e32 v23, s16, v7 ; 3E2E0E10
v_add_f32_e32 v23, s18, v23 ; 062E2E12
v_mul_f32_e32 v25, s21, v8 ; 10321015
v_mac_f32_e32 v25, s19, v9 ; 3E321213
v_mac_f32_e32 v25, s22, v7 ; 3E320E16
v_add_f32_e32 v25, s23, v25 ; 06323217
v_mul_f32_e32 v26, s25, v8 ; 10341019
v_mac_f32_e32 v26, s24, v9 ; 3E341218
v_mac_f32_e32 v26, s26, v7 ; 3E340E1A
v_add_f32_e32 v26, s30, v26 ; 0634341E
v_add_f32_e32 v2, v23, v2 ; 06040517
v_add_f32_e32 v3, v25, v3 ; 06060719
v_add_f32_e32 v25, v26, v5 ; 06320B1A
v_mul_f32_e32 v5, s3, v9 ; 100A1203
v_mac_f32_e32 v5, s8, v8 ; 3E0A1008
v_mac_f32_e32 v5, s2, v7 ; 3E0A0E02
v_max_f32_e32 v23, 0, v5 ; 202E0A80
v_mul_f32_e32 v5, v24, v2 ; 100A0518
v_mul_f32_e32 v2, v24, v3 ; 10040718
v_mul_f32_e32 v3, v24, v25 ; 10063318
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v12, s17, v4 ; 3E180811
v_mul_f32_e32 v4, v24, v19 ; 10082718
v_mul_f32_e32 v18, v24, v18 ; 10242518
v_mul_f32_e32 v17, v24, v17 ; 10222318
v_sub_f32_e32 v19, 1.0, v27 ; 082636F2
v_add_f32_e32 v19, s0, v19 ; 06262600
v_add_f32_e64 v19, 0, v19 clamp ; D2060813 00022680
v_sub_f32_e32 v24, s3, v11 ; 08301603
v_sub_f32_e32 v25, s8, v10 ; 08321408
v_mul_f32_e32 v26, v24, v24 ; 10343118
v_mac_f32_e32 v26, v25, v25 ; 3E343319
v_sub_f32_e32 v27, s2, v13 ; 08361A02
v_mac_f32_e32 v26, v27, v27 ; 3E34371B
v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A
v_mul_f32_e32 v24, v26, v24 ; 1030311A
v_mul_f32_e32 v25, v26, v25 ; 1032331A
v_mul_f32_e32 v26, v26, v27 ; 1034371A
v_mul_f32_e32 v11, v11, v9 ; 1016130B
v_mad_f32 v10, -v10, v8, -v11 ; D282000A A42E110A
v_mad_f32 v10, -v13, v7, v10 ; D282000A 242A0F0D
v_mul_f32_e32 v9, v24, v9 ; 10121318
v_mac_f32_e32 v9, v25, v8 ; 3E121119
v_mul_f32_e32 v8, s3, v24 ; 10103003
v_mac_f32_e32 v8, s8, v25 ; 3E103208
v_mac_f32_e32 v9, v26, v7 ; 3E120F1A
v_mac_f32_e32 v8, s2, v26 ; 3E103402
v_max_f32_e32 v7, 0, v8 ; 200E1080
v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2
v_mul_f32_e32 v11, v8, v8 ; 10161108
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_mul_f32_e32 v8, v8, v11 ; 10101708
v_max_f32_e32 v10, 0, v10 ; 20141480
v_sub_f32_e32 v11, 1.0, v10 ; 081614F2
v_mul_f32_e32 v13, v11, v11 ; 101A170B
v_mul_f32_e32 v24, v11, v13 ; 10301B0B
v_mad_f32 v25, -v13, v24, 1.0 ; D2820019 23CA310D
v_mul_f32_e32 v26, v6, v25 ; 10343306
v_sub_f32_e32 v27, 1.0, v6 ; 08360CF2
v_mac_f32_e32 v6, v8, v27 ; 3E0C3708
v_mul_f32_e32 v27, v1, v25 ; 10363301
v_sub_f32_e32 v28, 1.0, v1 ; 083802F2
v_mac_f32_e32 v1, v8, v28 ; 3E023908
v_mul_f32_e32 v25, v0, v25 ; 10323300
v_sub_f32_e32 v28, 1.0, v0 ; 083800F2
v_mac_f32_e32 v0, v8, v28 ; 3E003908
v_sub_f32_e64 v8, 1.0, s0 ; D2080008 000000F2
v_sub_f32_e32 v28, 1.0, v8 ; 083810F2
v_mov_b32_e32 v29, 0x3cf5c28f ; 7E3A02FF 3CF5C28F
v_madmk_f32_e32 v28, v28, v29, 0x3f77ced9 ; 40383B1C 3F77CED9
v_add_f32_e32 v29, v7, v7 ; 063A0F07
v_mul_f32_e32 v7, v8, v7 ; 100E0F08
v_mad_f32 v7, v29, v7, 0.5 ; D2820007 03C20F1D
v_mul_f32_e32 v13, v24, v13 ; 101A1B18
v_mac_f32_e32 v26, v19, v13 ; 3E341B13
v_mac_f32_e32 v27, v19, v13 ; 3E361B13
v_mac_f32_e32 v25, v19, v13 ; 3E321B13
v_mul_f32_e32 v8, v8, v8 ; 10101108
v_log_f32_e32 v19, v28 ; 7E264F1C
v_mul_f32_e32 v8, s9, v8 ; 10101009
v_mul_f32_e32 v11, v8, v11 ; 10161708
v_mac_f32_e32 v11, 1.0, v10 ; 3E1614F2
v_rcp_f32_e32 v10, v19 ; 7E145513
v_sub_f32_e32 v19, 1.0, v23 ; 08262EF2
v_mul_f32_e32 v8, v8, v19 ; 10102708
v_mac_f32_e32 v8, 1.0, v23 ; 3E102EF2
v_max_f32_e32 v9, 0, v9 ; 20121280
v_log_f32_e32 v9, v9 ; 7E124F09
v_madak_f32_e32 v8, v8, v11, 0x38d1b717 ; 42101708 38D1B717
v_mul_f32_e32 v10, 0x41200000, v10 ; 101414FF 41200000
v_mul_f32_e32 v11, v10, v10 ; 1016150A
v_mul_legacy_f32_e32 v9, v11, v9 ; 0E12130B
v_rcp_f32_e32 v8, v8 ; 7E105508
v_mad_f32 v10, v10, v10, 1.0 ; D282000A 03CA150A
v_mul_f32_e32 v10, s7, v10 ; 10141407
v_exp_f32_e32 v9, v9 ; 7E124B09
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_mul_f32_e32 v8, v9, v8 ; 10101109
v_mul_f32_e32 v9, v19, v19 ; 10122713
v_mul_f32_e32 v10, v19, v9 ; 10141313
v_mul_f32_e32 v9, v10, v9 ; 1012130A
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_mad_f32 v9, v7, v9, 1.0 ; D2820009 03CA1307
v_mad_f32 v7, v7, v13, 1.0 ; D2820007 03CA1B07
v_mul_f32_e32 v7, v7, v9 ; 100E1307
v_mul_f32_e32 v8, v23, v8 ; 10101117
v_mul_f32_e32 v8, s6, v8 ; 10101006
v_mul_f32_e32 v7, v23, v7 ; 100E0F17
v_mac_f32_e32 v5, v7, v22 ; 3E0A2D07
v_mul_f32_e32 v5, v5, v20 ; 100A2905
v_max_f32_e32 v8, 0, v8 ; 20101080
v_mul_f32_e32 v9, v22, v8 ; 10121116
v_mac_f32_e32 v5, v6, v9 ; 3E0A1306
v_mac_f32_e32 v2, v7, v21 ; 3E042B07
v_mac_f32_e32 v3, v7, v16 ; 3E062107
v_mul_f32_e32 v6, v21, v8 ; 100C1115
v_mul_f32_e32 v7, v16, v8 ; 100E1110
v_mul_f32_e32 v2, v2, v15 ; 10041F02
v_mul_f32_e32 v3, v3, v14 ; 10061D03
v_mac_f32_e32 v2, v1, v6 ; 3E040D01
v_mac_f32_e32 v3, v0, v7 ; 3E060F00
v_mac_f32_e32 v5, v26, v4 ; 3E0A091A
v_mac_f32_e32 v2, v27, v18 ; 3E04251B
v_mac_f32_e32 v3, v25, v17 ; 3E062319
v_add_f32_e64 v0, 0, v12 clamp ; D2060800 00021880
v_sub_f32_e32 v1, 1.0, v0 ; 080200F2
v_mul_f32_e32 v4, s5, v1 ; 10080205
v_mac_f32_e32 v4, v5, v0 ; 3E080105
v_mul_f32_e32 v5, s4, v1 ; 100A0204
v_mac_f32_e32 v5, v2, v0 ; 3E0A0102
v_mul_f32_e32 v1, s1, v1 ; 10020201
v_mac_f32_e32 v1, v3, v0 ; 3E020103
v_cvt_pkrtz_f16_f32_e32 v0, v4, v5 ; 5E000B04
v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 40
Code Size: 2380 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..7]
DCL CONST[9..12]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999}
IMM[1] INT32 {256, 0, 1, 2}
IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039}
IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000}
IMM[4] INT32 {4, 0, 0, 0}
0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx
1: F2I TEMP[0].x, TEMP[0].xxxx
2: F2I TEMP[1].x, IN[2].yyyy
3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx
4: I2F TEMP[3].x, TEMP[0].xxxx
5: I2F TEMP[4].x, TEMP[2].xxxx
6: MOV TEMP[3].y, TEMP[4].xxxx
7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
8: INEG TEMP[2].x, TEMP[2].xxxx
9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx
10: I2F TEMP[2].x, TEMP[2].xxxx
11: MOV TEMP[3].z, TEMP[2].xxxx
12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy
13: I2F TEMP[1].x, TEMP[1].xxxx
14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx
15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww
16: F2I TEMP[1].x, TEMP[1].xxxx
17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy
18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx
19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz
20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx
21: MOV TEMP[4].y, TEMP[5].xxxx
22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww
23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx
24: MOV TEMP[4].z, TEMP[1].xxxx
25: MOV TEMP[1].xyz, TEMP[4].xyzx
26: MOV TEMP[4].w, IMM[2].yyyy
27: MOV TEMP[4].xyz, TEMP[3].xyzx
28: MOV TEMP[3].y, IMM[2].yzyy
29: DP4 TEMP[4].x, TEMP[1], TEMP[4]
30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww
31: MOV TEMP[3].xy, TEMP[3].xyyy
32: MOV TEMP[3].w, IMM[2].yyyy
33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D
34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy
35: MOV TEMP[2].w, TEMP[4].xxxx
36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz
37: MOV TEMP[1].w, TEMP[3].xxxx
38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx
39: I2F TEMP[0].x, TEMP[0].xxxx
40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx
41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww
42: MUL TEMP[3], CONST[9], IN[0].xxxx
43: MAD TEMP[3], CONST[10], IN[0].yyyy, TEMP[3]
44: MAD TEMP[3], CONST[11], IN[0].zzzz, TEMP[3]
45: MAD TEMP[3], CONST[12], IN[0].wwww, TEMP[3]
46: MOV TEMP[4].x, CONST[4].xxxx
47: MOV TEMP[4].y, CONST[5].xxxx
48: MOV TEMP[4].z, CONST[6].xxxx
49: MOV TEMP[5].x, CONST[4].yyyy
50: MOV TEMP[5].y, CONST[5].yyyy
51: MOV TEMP[5].z, CONST[6].yyyy
52: MOV TEMP[6].x, CONST[4].zzzz
53: MOV TEMP[6].y, CONST[5].zzzz
54: MOV TEMP[6].z, CONST[6].zzzz
55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx
56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz
57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz
58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
59: RSQ TEMP[4].x, TEMP[4].xxxx
60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
61: MOV TEMP[0].yzw, TEMP[0].yxyz
62: MUL TEMP[4], CONST[0], IN[0].xxxx
63: MAD TEMP[4], CONST[1], IN[0].yyyy, TEMP[4]
64: MAD TEMP[4], CONST[2], IN[0].zzzz, TEMP[4]
65: MAD TEMP[4].xyz, CONST[3], IN[0].wwww, TEMP[4]
66: MOV TEMP[4].xyz, TEMP[4].xyzx
67: MOV TEMP[0].x, TEMP[3].zzzz
68: MOV OUT[1], TEMP[2]
69: MOV OUT[4], TEMP[4]
70: MOV OUT[2], TEMP[1]
71: MOV OUT[3], TEMP[0]
72: MOV OUT[0], TEMP[3]
73: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204)
%53 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%54 = load <32 x i8>, <32 x i8> addrspace(2)* %53, align 32, !tbaa !0
%55 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%56 = load <16 x i8>, <16 x i8> addrspace(2)* %55, align 16, !tbaa !0
%57 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%58 = load <16 x i8>, <16 x i8> addrspace(2)* %57, align 16, !tbaa !0
%59 = add i32 %5, %7
%60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59)
%61 = extractelement <4 x float> %60, i32 0
%62 = extractelement <4 x float> %60, i32 1
%63 = extractelement <4 x float> %60, i32 2
%64 = extractelement <4 x float> %60, i32 3
%65 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%66 = load <16 x i8>, <16 x i8> addrspace(2)* %65, align 16, !tbaa !0
%67 = add i32 %5, %7
%68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67)
%69 = extractelement <4 x float> %68, i32 0
%70 = extractelement <4 x float> %68, i32 1
%71 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%72 = load <16 x i8>, <16 x i8> addrspace(2)* %71, align 16, !tbaa !0
%73 = add i32 %5, %7
%74 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %72, i32 0, i32 %73)
%75 = extractelement <4 x float> %74, i32 0
%76 = extractelement <4 x float> %74, i32 1
%77 = fmul float %75, 2.500000e-01
%78 = fptosi float %77 to i32
%79 = fptosi float %76 to i32
%80 = sdiv i32 %79, 256
%81 = sitofp i32 %78 to float
%82 = sitofp i32 %80 to float
%83 = shl nsw i32 %80, 8
%84 = sub i32 %79, %83
%85 = sitofp i32 %84 to float
%86 = fadd float %81, -1.000000e+00
%87 = fadd float %82, -1.000000e+00
%88 = fadd float %85, -1.000000e+00
%89 = sitofp i32 %79 to float
%90 = fsub float %76, %89
%91 = fmul float %90, 1.000000e+01
%92 = fadd float %91, 0x3FDFFE5CA0000000
%93 = fptosi float %92 to i32
%94 = icmp eq i32 %93, 0
%95 = select i1 %94, float 1.000000e+00, float 0.000000e+00
%96 = icmp eq i32 %93, 1
%97 = select i1 %96, float 1.000000e+00, float 0.000000e+00
%98 = icmp eq i32 %93, 2
%99 = select i1 %98, float 1.000000e+00, float 0.000000e+00
%100 = fmul float %95, %81
%101 = fmul float %97, %82
%102 = fadd float %100, %101
%103 = fmul float %99, %85
%104 = fadd float %102, %103
%105 = fadd float %104, 0.000000e+00
%106 = fmul float %105, 0x3F70101020000000
%107 = bitcast float %106 to i32
%108 = insertelement <4 x i32> undef, i32 %107, i32 0
%109 = insertelement <4 x i32> %108, i32 1036831949, i32 1
%110 = insertelement <4 x i32> %109, i32 0, i32 2
%111 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %110, <32 x i8> %54, <16 x i8> %56, i32 2)
%112 = extractelement <4 x float> %111, i32 0
%113 = extractelement <4 x float> %111, i32 1
%114 = fmul float %112, 1.600000e+01
%115 = fadd float %114, -8.000000e+00
%116 = fmul float %113, 4.000000e+00
%117 = shl i32 %78, 2
%118 = sitofp i32 %117 to float
%119 = fsub float %75, %118
%120 = fadd float %119, -2.000000e+00
%121 = fmul float %37, %61
%122 = fmul float %38, %61
%123 = fmul float %39, %61
%124 = fmul float %40, %61
%125 = fmul float %41, %62
%126 = fadd float %125, %121
%127 = fmul float %42, %62
%128 = fadd float %127, %122
%129 = fmul float %43, %62
%130 = fadd float %129, %123
%131 = fmul float %44, %62
%132 = fadd float %131, %124
%133 = fmul float %45, %63
%134 = fadd float %133, %126
%135 = fmul float %46, %63
%136 = fadd float %135, %128
%137 = fmul float %47, %63
%138 = fadd float %137, %130
%139 = fmul float %48, %63
%140 = fadd float %139, %132
%141 = fmul float %49, %64
%142 = fadd float %141, %134
%143 = fmul float %50, %64
%144 = fadd float %143, %136
%145 = fmul float %51, %64
%146 = fadd float %145, %138
%147 = fmul float %52, %64
%148 = fadd float %147, %140
%149 = fmul float %28, %69
%150 = fmul float %31, %69
%151 = fmul float %34, %69
%152 = fmul float %29, %70
%153 = fadd float %152, %149
%154 = fmul float %32, %70
%155 = fadd float %154, %150
%156 = fmul float %35, %70
%157 = fadd float %156, %151
%158 = fmul float %30, %120
%159 = fadd float %158, %153
%160 = fmul float %33, %120
%161 = fadd float %160, %155
%162 = fmul float %36, %120
%163 = fadd float %162, %157
%164 = fmul float %159, %159
%165 = fmul float %161, %161
%166 = fadd float %165, %164
%167 = fmul float %163, %163
%168 = fadd float %166, %167
%169 = call float @llvm.AMDGPU.rsq.clamped.f32(float %168)
%170 = fmul float %159, %169
%171 = fmul float %161, %169
%172 = fmul float %163, %169
%173 = fmul float %13, %61
%174 = fmul float %14, %61
%175 = fmul float %15, %61
%176 = fmul float %16, %61
%177 = fmul float %17, %62
%178 = fadd float %177, %173
%179 = fmul float %18, %62
%180 = fadd float %179, %174
%181 = fmul float %19, %62
%182 = fadd float %181, %175
%183 = fmul float %20, %62
%184 = fadd float %183, %176
%185 = fmul float %21, %63
%186 = fadd float %185, %178
%187 = fmul float %22, %63
%188 = fadd float %187, %180
%189 = fmul float %23, %63
%190 = fadd float %189, %182
%191 = fmul float %24, %63
%192 = fadd float %191, %184
%193 = fmul float %25, %64
%194 = fadd float %193, %186
%195 = fmul float %26, %64
%196 = fadd float %195, %188
%197 = fmul float %27, %64
%198 = fadd float %197, %190
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %86, float %87, float %88, float %115)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %95, float %97, float %99, float %116)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %146, float %170, float %171, float %172)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %194, float %196, float %198, float %192)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %142, float %144, float %146, float %148)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5
v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000
v_mov_b32_e32 v5, 0 ; 7E0A0280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500
s_load_dwordx4 s[16:19], s[8:9], 0x0 ; C0880900
s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904
s_load_dwordx4 s[24:27], s[8:9], 0x8 ; C08C0908
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s28, s[0:3], 0x10 ; C20E0110
s_buffer_load_dword s29, s[0:3], 0x11 ; C20E8111
buffer_load_format_xyzw v[6:9], v0, s[16:19], 0 idxen ; E00C2000 80040600
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[12:15], v0, s[24:27], 0 idxen ; E00C2000 80060C00
s_buffer_load_dword s16, s[0:3], 0x12 ; C2080112
s_buffer_load_dword s17, s[0:3], 0x14 ; C2088114
s_buffer_load_dword s18, s[0:3], 0x15 ; C2090115
s_buffer_load_dword s19, s[0:3], 0x16 ; C2098116
s_buffer_load_dword s20, s[0:3], 0x18 ; C20A0118
s_buffer_load_dword s21, s[0:3], 0x19 ; C20A8119
s_buffer_load_dword s22, s[0:3], 0x1a ; C20B011A
s_buffer_load_dword s23, s[0:3], 0x24 ; C20B8124
s_buffer_load_dword s24, s[0:3], 0x25 ; C20C0125
s_buffer_load_dword s25, s[0:3], 0x26 ; C20C8126
s_buffer_load_dword s26, s[0:3], 0x27 ; C20D0127
s_buffer_load_dword s27, s[0:3], 0x28 ; C20D8128
s_buffer_load_dword s30, s[0:3], 0x29 ; C20F0129
s_buffer_load_dword s31, s[0:3], 0x0 ; C20F8100
s_buffer_load_dword s32, s[0:3], 0x1 ; C2100101
s_buffer_load_dword s33, s[0:3], 0x2 ; C2108102
s_buffer_load_dword s34, s[0:3], 0x3 ; C2110103
s_buffer_load_dword s35, s[0:3], 0x4 ; C2118104
s_buffer_load_dword s36, s[0:3], 0x2a ; C212012A
s_buffer_load_dword s37, s[0:3], 0x2b ; C212812B
s_buffer_load_dword s38, s[0:3], 0x2c ; C213012C
s_buffer_load_dword s39, s[0:3], 0x2d ; C213812D
s_buffer_load_dword s40, s[0:3], 0x2e ; C214012E
s_buffer_load_dword s41, s[0:3], 0x5 ; C2148105
s_buffer_load_dword s42, s[0:3], 0x6 ; C2150106
s_buffer_load_dword s43, s[0:3], 0x7 ; C2158107
s_buffer_load_dword s44, s[0:3], 0x8 ; C2160108
s_buffer_load_dword s45, s[0:3], 0x9 ; C2168109
s_buffer_load_dword s46, s[0:3], 0xa ; C217010A
s_buffer_load_dword s47, s[0:3], 0xb ; C217810B
s_buffer_load_dword s48, s[0:3], 0xc ; C218010C
s_buffer_load_dword s49, s[0:3], 0xd ; C218810D
s_buffer_load_dword s50, s[0:3], 0xe ; C219010E
s_buffer_load_dword s51, s[0:3], 0x2f ; C219812F
s_buffer_load_dword s52, s[0:3], 0x30 ; C21A0130
s_buffer_load_dword s53, s[0:3], 0x31 ; C21A8131
s_buffer_load_dword s54, s[0:3], 0x32 ; C21B0132
s_buffer_load_dword s0, s[0:3], 0x33 ; C2000133
v_mul_f32_e32 v0, s28, v10 ; 1000141C
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v14, s17, v10 ; 101C1411
v_mul_f32_e32 v10, s20, v10 ; 10141414
v_mac_f32_e32 v0, s29, v11 ; 3E00161D
v_mac_f32_e32 v14, s18, v11 ; 3E1C1612
v_mac_f32_e32 v10, s21, v11 ; 3E141615
v_mul_f32_e32 v11, s23, v6 ; 10160C17
v_mac_f32_e32 v11, s27, v7 ; 3E160E1B
v_mul_f32_e32 v15, s24, v6 ; 101E0C18
v_mac_f32_e32 v15, s30, v7 ; 3E1E0E1E
v_mul_f32_e32 v16, s25, v6 ; 10200C19
v_mac_f32_e32 v16, s36, v7 ; 3E200E24
v_mul_f32_e32 v17, s26, v6 ; 10220C1A
v_mac_f32_e32 v17, s37, v7 ; 3E220E25
v_mul_f32_e32 v18, s31, v6 ; 10240C1F
v_mac_f32_e32 v18, s35, v7 ; 3E240E23
v_mul_f32_e32 v19, s32, v6 ; 10260C20
v_mac_f32_e32 v19, s41, v7 ; 3E260E29
v_mul_f32_e32 v20, s33, v6 ; 10280C21
v_mac_f32_e32 v20, s42, v7 ; 3E280E2A
v_mul_f32_e32 v6, s34, v6 ; 100C0C22
v_mac_f32_e32 v6, s43, v7 ; 3E0C0E2B
v_mac_f32_e32 v11, s38, v8 ; 3E161026
v_mac_f32_e32 v15, s39, v8 ; 3E1E1027
v_mac_f32_e32 v16, s40, v8 ; 3E201028
v_mac_f32_e32 v17, s51, v8 ; 3E221033
v_mac_f32_e32 v18, s44, v8 ; 3E24102C
v_mac_f32_e32 v19, s45, v8 ; 3E26102D
v_mac_f32_e32 v20, s46, v8 ; 3E28102E
v_mac_f32_e32 v6, s47, v8 ; 3E0C102F
v_mac_f32_e32 v11, s52, v9 ; 3E161234
v_mac_f32_e32 v15, s53, v9 ; 3E1E1235
v_mac_f32_e32 v16, s54, v9 ; 3E201236
v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000
v_cvt_i32_f32_e32 v4, v13 ; 7E08110D
v_cvt_i32_f32_e32 v3, v3 ; 7E061103
v_mac_f32_e32 v17, s0, v9 ; 3E221200
v_mac_f32_e32 v18, s48, v9 ; 3E241230
v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04
v_lshlrev_b32_e32 v8, 2, v3 ; 34100682
v_cvt_f32_i32_e32 v8, v8 ; 7E100B08
v_mac_f32_e32 v19, s49, v9 ; 3E261231
v_mac_f32_e32 v20, s50, v9 ; 3E281232
v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07
v_subrev_f32_e32 v8, v8, v12 ; 0A101908
v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000
v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F
v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98
v_cvt_i32_f32_e32 v1, v1 ; 7E021101
v_add_i32_e32 v7, v4, v7 ; 4A0E0F04
v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00
v_sub_i32_e32 v4, v4, v9 ; 4C081304
v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280
v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480
v_cvt_f32_i32_e32 v12, v3 ; 7E180B03
v_ashrrev_i32_e32 v3, 8, v7 ; 30060E88
v_cvt_f32_i32_e32 v7, v3 ; 7E0E0B03
v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281
v_cmp_eq_i32_e64 s[0:1], 2, v1 ; D1040000 00020282
v_cndmask_b32_e64 v1, 0, 1.0, vcc ; D2000001 01A9E480
v_cvt_f32_i32_e32 v13, v4 ; 7E1A0B04
v_mul_f32_e32 v3, v7, v1 ; 10060307
v_mac_f32_e32 v3, v12, v9 ; 3E06130C
v_cndmask_b32_e64 v21, 0, 1.0, s[0:1] ; D2000015 0001E480
v_mac_f32_e32 v3, v13, v21 ; 3E062B0D
v_add_f32_e32 v3, 0, v3 ; 06060680
v_mul_f32_e32 v3, 0x3b808081, v3 ; 100606FF 3B808081
v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD
image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[4:11], s[12:15] ; F0900300 00610303
v_add_f32_e32 v5, -2.0, v8 ; 060A10F5
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v14, s19, v5 ; 3E1C0A13
v_mac_f32_e32 v10, s22, v5 ; 3E140A16
v_add_f32_e32 v5, -1.0, v12 ; 060A18F3
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_mul_f32_e32 v8, v0, v0 ; 10100100
v_mac_f32_e32 v8, v14, v14 ; 3E101D0E
v_mac_f32_e32 v8, v10, v10 ; 3E10150A
v_rsq_clamp_f32_e32 v8, v8 ; 7E105908
v_add_f32_e32 v12, -1.0, v13 ; 06181AF3
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000
exp 15, 32, 0, 0, 0, v5, v7, v12, v2 ; F800020F 020C0705
v_mul_f32_e32 v0, v8, v0 ; 10000108
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v2, v8, v14 ; 10041D08
v_mul_f32_e32 v3, 4.0, v4 ; 100608F6
v_mul_f32_e32 v4, v8, v10 ; 10081508
exp 15, 33, 0, 0, 0, v9, v1, v21, v3 ; F800021F 03150109
exp 15, 34, 0, 0, 0, v16, v0, v2, v4 ; F800022F 04020010
exp 15, 35, 0, 0, 0, v18, v19, v20, v6 ; F800023F 06141312
exp 15, 12, 0, 1, 0, v11, v15, v16, v17 ; F80008CF 11100F0B
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 64
VGPRS: 24
Code Size: 680 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SAMP[6]
DCL SAMP[7]
DCL SAMP[8]
DCL SAMP[9]
DCL SAMP[10]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL SVIEW[6], 2D, FLOAT
DCL SVIEW[7], 2D, FLOAT
DCL SVIEW[8], 2D, FLOAT
DCL SVIEW[9], 2D, FLOAT
DCL SVIEW[10], 2D, FLOAT
DCL CONST[0..3]
DCL CONST[5..8]
DCL CONST[19..27]
DCL TEMP[0..36], LOCAL
IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000}
IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931}
IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000}
IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001}
IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].xyz, -IN[3].xyzx
1: ADD TEMP[1].xyz, CONST[1].xyzz, TEMP[0].xyzz
2: DP3 TEMP[2].x, TEMP[1].xyzz, TEMP[1].xyzz
3: RSQ TEMP[2].x, TEMP[2].xxxx
4: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
5: ADD TEMP[0].xyz, CONST[0].xyzz, TEMP[0].xyzz
6: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz
7: RSQ TEMP[2].x, TEMP[2].xxxx
8: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx
9: ABS TEMP[2].xyz, IN[2].yzww
10: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
11: RSQ TEMP[3].x, TEMP[3].xxxx
12: MAD TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx, IMM[0].xxxx
13: MUL TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy
14: MAX TEMP[2].xyz, TEMP[2].xyzz, IMM[0].zzzz
15: ADD TEMP[3].x, TEMP[2].xxxx, TEMP[2].yyyy
16: ADD TEMP[3].x, TEMP[3].xxxx, TEMP[2].zzzz
17: RCP TEMP[3].xyz, TEMP[3].xxxx
18: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xyzz
19: ADD TEMP[3], IN[0], IMM[0].wwww
20: FLR TEMP[3].xyz, TEMP[3]
21: MOV TEMP[4].x, CONST[19].xxxx
22: MUL TEMP[5].x, TEMP[3].xxxx, CONST[19].xxxx
23: MOV TEMP[6].x, TEMP[5].xxxx
24: FLR TEMP[5].x, TEMP[5].xxxx
25: MUL TEMP[5].x, TEMP[5].xxxx, CONST[19].xxxx
26: FSGE TEMP[7].x, TEMP[3].xxxx, IMM[1].xxxx
27: UIF TEMP[7].xxxx :0
28: MOV TEMP[4].x, CONST[20].xxxx
29: ADD TEMP[7].x, TEMP[3].xxxx, IMM[1].yyyy
30: MUL TEMP[7].x, TEMP[7].xxxx, CONST[20].xxxx
31: MOV TEMP[6].x, TEMP[7].xxxx
32: FLR TEMP[8].x, TEMP[7].xxxx
33: MUL TEMP[8].x, TEMP[8].xxxx, CONST[20].xxxx
34: MOV TEMP[5].x, TEMP[8].xxxx
35: FRC TEMP[7].x, TEMP[7].xxxx
36: FRC TEMP[9].x, TEMP[8].xxxx
37: MOV TEMP[7].y, TEMP[9].xxxx
38: FLR TEMP[8].x, TEMP[8].xxxx
39: ADD TEMP[8].x, TEMP[8].xxxx, IMM[1].zzzz
40: MOV TEMP[7].z, TEMP[8].xxxx
41: MOV TEMP[7].xyz, TEMP[7].xyzx
42: ELSE :0
43: FRC TEMP[6].x, TEMP[6].xxxx
44: FRC TEMP[8].x, TEMP[5].xxxx
45: MOV TEMP[6].y, TEMP[8].xxxx
46: FLR TEMP[5].x, TEMP[5].xxxx
47: MOV TEMP[6].z, TEMP[5].xxxx
48: MOV TEMP[7].xyz, TEMP[6].xyzx
49: ENDIF
50: MOV TEMP[5].x, CONST[19].xxxx
51: MUL TEMP[6].x, TEMP[3].yyyy, CONST[19].xxxx
52: MOV TEMP[8].x, TEMP[6].xxxx
53: FLR TEMP[6].x, TEMP[6].xxxx
54: MUL TEMP[6].x, TEMP[6].xxxx, CONST[19].xxxx
55: FSGE TEMP[9].x, TEMP[3].yyyy, IMM[1].xxxx
56: UIF TEMP[9].xxxx :0
57: MOV TEMP[5].x, CONST[20].xxxx
58: ADD TEMP[9].x, TEMP[3].yyyy, IMM[1].yyyy
59: MUL TEMP[9].x, TEMP[9].xxxx, CONST[20].xxxx
60: MOV TEMP[8].x, TEMP[9].xxxx
61: FLR TEMP[10].x, TEMP[9].xxxx
62: MUL TEMP[10].x, TEMP[10].xxxx, CONST[20].xxxx
63: MOV TEMP[6].x, TEMP[10].xxxx
64: FRC TEMP[9].x, TEMP[9].xxxx
65: FRC TEMP[11].x, TEMP[10].xxxx
66: MOV TEMP[9].y, TEMP[11].xxxx
67: FLR TEMP[10].x, TEMP[10].xxxx
68: ADD TEMP[10].x, TEMP[10].xxxx, IMM[1].zzzz
69: MOV TEMP[9].z, TEMP[10].xxxx
70: MOV TEMP[9].xyz, TEMP[9].xyzx
71: ELSE :0
72: FRC TEMP[8].x, TEMP[8].xxxx
73: FRC TEMP[10].x, TEMP[6].xxxx
74: MOV TEMP[8].y, TEMP[10].xxxx
75: FLR TEMP[6].x, TEMP[6].xxxx
76: MOV TEMP[8].z, TEMP[6].xxxx
77: MOV TEMP[9].xyz, TEMP[8].xyzx
78: ENDIF
79: MOV TEMP[6].x, CONST[19].xxxx
80: MUL TEMP[8].x, TEMP[3].zzzz, CONST[19].xxxx
81: MOV TEMP[10].x, TEMP[8].xxxx
82: FLR TEMP[8].x, TEMP[8].xxxx
83: MUL TEMP[8].x, TEMP[8].xxxx, CONST[19].xxxx
84: FSGE TEMP[11].x, TEMP[3].zzzz, IMM[1].xxxx
85: UIF TEMP[11].xxxx :0
86: MOV TEMP[6].x, CONST[20].xxxx
87: ADD TEMP[3].x, TEMP[3].zzzz, IMM[1].yyyy
88: MUL TEMP[3].x, TEMP[3].xxxx, CONST[20].xxxx
89: MOV TEMP[10].x, TEMP[3].xxxx
90: FLR TEMP[11].x, TEMP[3].xxxx
91: MUL TEMP[11].x, TEMP[11].xxxx, CONST[20].xxxx
92: MOV TEMP[8].x, TEMP[11].xxxx
93: FRC TEMP[3].x, TEMP[3].xxxx
94: FRC TEMP[12].x, TEMP[11].xxxx
95: MOV TEMP[3].y, TEMP[12].xxxx
96: FLR TEMP[11].x, TEMP[11].xxxx
97: ADD TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz
98: MOV TEMP[3].z, TEMP[11].xxxx
99: MOV TEMP[3].xyz, TEMP[3].xyzx
100: ELSE :0
101: FRC TEMP[10].x, TEMP[10].xxxx
102: FRC TEMP[11].x, TEMP[8].xxxx
103: MOV TEMP[10].y, TEMP[11].xxxx
104: FLR TEMP[8].x, TEMP[8].xxxx
105: MOV TEMP[10].z, TEMP[8].xxxx
106: MOV TEMP[3].xyz, TEMP[10].xyzx
107: ENDIF
108: ADD TEMP[8].xyz, IN[3].xyzz, -CONST[0].xyzz
109: DP3 TEMP[8].x, TEMP[8].xyzz, TEMP[8].xyzz
110: MUL TEMP[8].x, CONST[25].xxxx, TEMP[8].xxxx
111: LG2 TEMP[8].x, TEMP[8].xxxx
112: MUL TEMP[8].x, TEMP[8].xxxx, IMM[1].wwww
113: MUL TEMP[8].x, TEMP[8].xxxx, CONST[24].xxxx
114: MOV TEMP[10].xy, IN[3].xyxx
115: MOV TEMP[11].x, IMM[2].xxxx
116: FSNE TEMP[12].x, CONST[19].xxxx, TEMP[4].xxxx
117: UIF TEMP[12].xxxx :0
118: MOV TEMP[11].x, IMM[2].yyyy
119: RCP TEMP[12].x, CONST[22].xxxx
120: MUL TEMP[10].xy, IN[3].xyyy, TEMP[12].xxxx
121: ELSE :0
122: RCP TEMP[12].x, CONST[21].xxxx
123: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx
124: ENDIF
125: FRC TEMP[10].xy, TEMP[10].xyyy
126: MUL TEMP[12].x, CONST[23].xxxx, IMM[2].wwww
127: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
128: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
129: MUL TEMP[11].x, TEMP[11].xxxx, CONST[23].xxxx
130: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
131: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
132: MOV TEMP[11].xy, TEMP[10].xyyy
133: MOV TEMP[11].w, TEMP[8].xxxx
134: TXL TEMP[11], TEMP[11], SAMP[9], 2D
135: FSEQ TEMP[12].x, TEMP[7].zzzz, IMM[1].zzzz
136: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
137: MOV TEMP[13].xy, TEMP[10].xyyy
138: MOV TEMP[13].w, TEMP[8].xxxx
139: TXL TEMP[13], TEMP[13], SAMP[7], 2D
140: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[3].xxxx
141: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
142: MOV TEMP[15].xy, TEMP[10].xyyy
143: MOV TEMP[15].w, TEMP[8].xxxx
144: TXL TEMP[15], TEMP[15], SAMP[5], 2D
145: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[2].wwww
146: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
147: MOV TEMP[17].xy, TEMP[10].xyyy
148: MOV TEMP[17].w, TEMP[8].xxxx
149: TXL TEMP[17], TEMP[17], SAMP[3], 2D
150: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].zzzz
151: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
152: MOV TEMP[10].xy, TEMP[10].xyyy
153: MOV TEMP[10].w, TEMP[8].xxxx
154: TXL TEMP[10], TEMP[10], SAMP[1], 2D
155: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[3].yyyy
156: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
157: MUL TEMP[10], TEMP[10], TEMP[19].xxxx
158: MAD TEMP[10], TEMP[17], TEMP[18].xxxx, TEMP[10]
159: MAD TEMP[10], TEMP[15], TEMP[16].xxxx, TEMP[10]
160: MAD TEMP[10], TEMP[13], TEMP[14].xxxx, TEMP[10]
161: MAD TEMP[10], TEMP[11], TEMP[12].xxxx, TEMP[10]
162: MOV TEMP[11].xy, IN[3].zyzz
163: MOV TEMP[12].x, IMM[2].xxxx
164: FSNE TEMP[13].x, CONST[19].xxxx, TEMP[4].xxxx
165: UIF TEMP[13].xxxx :0
166: MOV TEMP[12].x, IMM[2].yyyy
167: RCP TEMP[13].x, CONST[22].xxxx
168: MUL TEMP[11].xy, IN[3].zyyy, TEMP[13].xxxx
169: ELSE :0
170: RCP TEMP[13].x, CONST[21].xxxx
171: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
172: ENDIF
173: FRC TEMP[11].xy, TEMP[11].xyyy
174: MUL TEMP[13].x, CONST[23].xxxx, IMM[2].wwww
175: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
176: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
177: MUL TEMP[12].x, TEMP[12].xxxx, CONST[23].xxxx
178: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
179: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
180: MOV TEMP[12].xy, TEMP[11].xyyy
181: MOV TEMP[12].w, TEMP[8].xxxx
182: TXL TEMP[12], TEMP[12], SAMP[9], 2D
183: FSEQ TEMP[13].x, TEMP[7].zzzz, IMM[1].zzzz
184: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
185: MOV TEMP[14].xy, TEMP[11].xyyy
186: MOV TEMP[14].w, TEMP[8].xxxx
187: TXL TEMP[14], TEMP[14], SAMP[7], 2D
188: FSEQ TEMP[15].x, TEMP[7].zzzz, IMM[3].xxxx
189: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
190: MOV TEMP[16].xy, TEMP[11].xyyy
191: MOV TEMP[16].w, TEMP[8].xxxx
192: TXL TEMP[16], TEMP[16], SAMP[5], 2D
193: FSEQ TEMP[17].x, TEMP[7].zzzz, IMM[2].wwww
194: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
195: MOV TEMP[18].xy, TEMP[11].xyyy
196: MOV TEMP[18].w, TEMP[8].xxxx
197: TXL TEMP[18], TEMP[18], SAMP[3], 2D
198: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[2].zzzz
199: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
200: MOV TEMP[11].xy, TEMP[11].xyyy
201: MOV TEMP[11].w, TEMP[8].xxxx
202: TXL TEMP[11], TEMP[11], SAMP[1], 2D
203: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[3].yyyy
204: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
205: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
206: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
207: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
208: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
209: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11]
210: MOV TEMP[12].xy, IN[3].zxzz
211: MOV TEMP[13].x, IMM[2].xxxx
212: FSNE TEMP[14].x, CONST[19].xxxx, TEMP[4].xxxx
213: UIF TEMP[14].xxxx :0
214: MOV TEMP[13].x, IMM[2].yyyy
215: RCP TEMP[14].x, CONST[22].xxxx
216: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx
217: ELSE :0
218: RCP TEMP[14].x, CONST[21].xxxx
219: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
220: ENDIF
221: FRC TEMP[12].xy, TEMP[12].xyyy
222: MUL TEMP[14].x, CONST[23].xxxx, IMM[2].wwww
223: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
224: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
225: MUL TEMP[13].x, TEMP[13].xxxx, CONST[23].xxxx
226: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
227: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
228: MOV TEMP[13].xy, TEMP[12].xyyy
229: MOV TEMP[13].w, TEMP[8].xxxx
230: TXL TEMP[13], TEMP[13], SAMP[9], 2D
231: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz
232: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
233: MOV TEMP[15].xy, TEMP[12].xyyy
234: MOV TEMP[15].w, TEMP[8].xxxx
235: TXL TEMP[15], TEMP[15], SAMP[7], 2D
236: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx
237: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
238: MOV TEMP[17].xy, TEMP[12].xyyy
239: MOV TEMP[17].w, TEMP[8].xxxx
240: TXL TEMP[17], TEMP[17], SAMP[5], 2D
241: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww
242: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
243: MOV TEMP[19].xy, TEMP[12].xyyy
244: MOV TEMP[19].w, TEMP[8].xxxx
245: TXL TEMP[19], TEMP[19], SAMP[3], 2D
246: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz
247: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
248: MOV TEMP[12].xy, TEMP[12].xyyy
249: MOV TEMP[12].w, TEMP[8].xxxx
250: TXL TEMP[12], TEMP[12], SAMP[1], 2D
251: FSEQ TEMP[21].x, TEMP[7].zzzz, IMM[3].yyyy
252: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
253: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
254: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
255: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
256: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
257: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12]
258: MOV TEMP[13].xy, IN[3].xyxx
259: MOV TEMP[14].x, IMM[2].xxxx
260: FSNE TEMP[15].x, CONST[19].xxxx, TEMP[5].xxxx
261: UIF TEMP[15].xxxx :0
262: MOV TEMP[14].x, IMM[2].yyyy
263: RCP TEMP[15].x, CONST[22].xxxx
264: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx
265: ELSE :0
266: RCP TEMP[15].x, CONST[21].xxxx
267: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
268: ENDIF
269: FRC TEMP[13].xy, TEMP[13].xyyy
270: MUL TEMP[15].x, CONST[23].xxxx, IMM[2].wwww
271: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
272: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
273: MUL TEMP[14].x, TEMP[14].xxxx, CONST[23].xxxx
274: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
275: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
276: MOV TEMP[14].xy, TEMP[13].xyyy
277: MOV TEMP[14].w, TEMP[8].xxxx
278: TXL TEMP[14], TEMP[14], SAMP[9], 2D
279: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[1].zzzz
280: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
281: MOV TEMP[16].xy, TEMP[13].xyyy
282: MOV TEMP[16].w, TEMP[8].xxxx
283: TXL TEMP[16], TEMP[16], SAMP[7], 2D
284: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[3].xxxx
285: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
286: MOV TEMP[18].xy, TEMP[13].xyyy
287: MOV TEMP[18].w, TEMP[8].xxxx
288: TXL TEMP[18], TEMP[18], SAMP[5], 2D
289: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].wwww
290: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
291: MOV TEMP[20].xy, TEMP[13].xyyy
292: MOV TEMP[20].w, TEMP[8].xxxx
293: TXL TEMP[20], TEMP[20], SAMP[3], 2D
294: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].zzzz
295: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
296: MOV TEMP[13].xy, TEMP[13].xyyy
297: MOV TEMP[13].w, TEMP[8].xxxx
298: TXL TEMP[13], TEMP[13], SAMP[1], 2D
299: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[3].yyyy
300: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
301: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
302: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
303: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
304: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
305: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13]
306: MOV TEMP[14].xy, IN[3].zyzz
307: MOV TEMP[15].x, IMM[2].xxxx
308: FSNE TEMP[16].x, CONST[19].xxxx, TEMP[5].xxxx
309: UIF TEMP[16].xxxx :0
310: MOV TEMP[15].x, IMM[2].yyyy
311: RCP TEMP[16].x, CONST[22].xxxx
312: MUL TEMP[14].xy, IN[3].zyyy, TEMP[16].xxxx
313: ELSE :0
314: RCP TEMP[16].x, CONST[21].xxxx
315: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
316: ENDIF
317: FRC TEMP[14].xy, TEMP[14].xyyy
318: MUL TEMP[16].x, CONST[23].xxxx, IMM[2].wwww
319: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
320: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
321: MUL TEMP[15].x, TEMP[15].xxxx, CONST[23].xxxx
322: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
323: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
324: MOV TEMP[15].xy, TEMP[14].xyyy
325: MOV TEMP[15].w, TEMP[8].xxxx
326: TXL TEMP[15], TEMP[15], SAMP[9], 2D
327: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[1].zzzz
328: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
329: MOV TEMP[17].xy, TEMP[14].xyyy
330: MOV TEMP[17].w, TEMP[8].xxxx
331: TXL TEMP[17], TEMP[17], SAMP[7], 2D
332: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[3].xxxx
333: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
334: MOV TEMP[19].xy, TEMP[14].xyyy
335: MOV TEMP[19].w, TEMP[8].xxxx
336: TXL TEMP[19], TEMP[19], SAMP[5], 2D
337: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[2].wwww
338: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
339: MOV TEMP[21].xy, TEMP[14].xyyy
340: MOV TEMP[21].w, TEMP[8].xxxx
341: TXL TEMP[21], TEMP[21], SAMP[3], 2D
342: FSEQ TEMP[22].x, TEMP[9].zzzz, IMM[2].zzzz
343: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
344: MOV TEMP[14].xy, TEMP[14].xyyy
345: MOV TEMP[14].w, TEMP[8].xxxx
346: TXL TEMP[14], TEMP[14], SAMP[1], 2D
347: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[3].yyyy
348: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
349: MUL TEMP[14], TEMP[14], TEMP[23].xxxx
350: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14]
351: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14]
352: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14]
353: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14]
354: MOV TEMP[15].xy, IN[3].zxzz
355: MOV TEMP[16].x, IMM[2].xxxx
356: FSNE TEMP[17].x, CONST[19].xxxx, TEMP[5].xxxx
357: UIF TEMP[17].xxxx :0
358: MOV TEMP[16].x, IMM[2].yyyy
359: RCP TEMP[17].x, CONST[22].xxxx
360: MUL TEMP[15].xy, IN[3].zxxx, TEMP[17].xxxx
361: ELSE :0
362: RCP TEMP[17].x, CONST[21].xxxx
363: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx
364: ENDIF
365: FRC TEMP[15].xy, TEMP[15].xyyy
366: MUL TEMP[17].x, CONST[23].xxxx, IMM[2].wwww
367: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx
368: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx
369: MUL TEMP[16].x, TEMP[16].xxxx, CONST[23].xxxx
370: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx
371: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
372: MOV TEMP[16].xy, TEMP[15].xyyy
373: MOV TEMP[16].w, TEMP[8].xxxx
374: TXL TEMP[16], TEMP[16], SAMP[9], 2D
375: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[1].zzzz
376: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
377: MOV TEMP[18].xy, TEMP[15].xyyy
378: MOV TEMP[18].w, TEMP[8].xxxx
379: TXL TEMP[18], TEMP[18], SAMP[7], 2D
380: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[3].xxxx
381: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
382: MOV TEMP[20].xy, TEMP[15].xyyy
383: MOV TEMP[20].w, TEMP[8].xxxx
384: TXL TEMP[20], TEMP[20], SAMP[5], 2D
385: FSEQ TEMP[21].x, TEMP[9].zzzz, IMM[2].wwww
386: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
387: MOV TEMP[22].xy, TEMP[15].xyyy
388: MOV TEMP[22].w, TEMP[8].xxxx
389: TXL TEMP[22], TEMP[22], SAMP[3], 2D
390: FSEQ TEMP[23].x, TEMP[9].zzzz, IMM[2].zzzz
391: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
392: MOV TEMP[15].xy, TEMP[15].xyyy
393: MOV TEMP[15].w, TEMP[8].xxxx
394: TXL TEMP[15], TEMP[15], SAMP[1], 2D
395: FSEQ TEMP[24].x, TEMP[9].zzzz, IMM[3].yyyy
396: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
397: MUL TEMP[15], TEMP[15], TEMP[24].xxxx
398: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15]
399: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15]
400: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15]
401: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15]
402: MOV TEMP[16].xy, IN[3].xyxx
403: MOV TEMP[17].x, IMM[2].xxxx
404: FSNE TEMP[18].x, CONST[19].xxxx, TEMP[6].xxxx
405: UIF TEMP[18].xxxx :0
406: MOV TEMP[17].x, IMM[2].yyyy
407: RCP TEMP[18].x, CONST[22].xxxx
408: MUL TEMP[16].xy, IN[3].xyyy, TEMP[18].xxxx
409: ELSE :0
410: RCP TEMP[18].x, CONST[21].xxxx
411: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx
412: ENDIF
413: FRC TEMP[16].xy, TEMP[16].xyyy
414: MUL TEMP[18].x, CONST[23].xxxx, IMM[2].wwww
415: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx
416: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx
417: MUL TEMP[17].x, TEMP[17].xxxx, CONST[23].xxxx
418: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx
419: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
420: MOV TEMP[17].xy, TEMP[16].xyyy
421: MOV TEMP[17].w, TEMP[8].xxxx
422: TXL TEMP[17], TEMP[17], SAMP[9], 2D
423: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[1].zzzz
424: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
425: MOV TEMP[19].xy, TEMP[16].xyyy
426: MOV TEMP[19].w, TEMP[8].xxxx
427: TXL TEMP[19], TEMP[19], SAMP[7], 2D
428: FSEQ TEMP[20].x, TEMP[3].zzzz, IMM[3].xxxx
429: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
430: MOV TEMP[21].xy, TEMP[16].xyyy
431: MOV TEMP[21].w, TEMP[8].xxxx
432: TXL TEMP[21], TEMP[21], SAMP[5], 2D
433: FSEQ TEMP[22].x, TEMP[3].zzzz, IMM[2].wwww
434: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
435: MOV TEMP[23].xy, TEMP[16].xyyy
436: MOV TEMP[23].w, TEMP[8].xxxx
437: TXL TEMP[23], TEMP[23], SAMP[3], 2D
438: FSEQ TEMP[24].x, TEMP[3].zzzz, IMM[2].zzzz
439: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
440: MOV TEMP[16].xy, TEMP[16].xyyy
441: MOV TEMP[16].w, TEMP[8].xxxx
442: TXL TEMP[16], TEMP[16], SAMP[1], 2D
443: FSEQ TEMP[25].x, TEMP[3].zzzz, IMM[3].yyyy
444: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
445: MUL TEMP[16], TEMP[16], TEMP[25].xxxx
446: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16]
447: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16]
448: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16]
449: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16]
450: MOV TEMP[17].xy, IN[3].zyzz
451: MOV TEMP[18].x, IMM[2].xxxx
452: FSNE TEMP[19].x, CONST[19].xxxx, TEMP[6].xxxx
453: UIF TEMP[19].xxxx :0
454: MOV TEMP[18].x, IMM[2].yyyy
455: RCP TEMP[19].x, CONST[22].xxxx
456: MUL TEMP[17].xy, IN[3].zyyy, TEMP[19].xxxx
457: ELSE :0
458: RCP TEMP[19].x, CONST[21].xxxx
459: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx
460: ENDIF
461: FRC TEMP[17].xy, TEMP[17].xyyy
462: MUL TEMP[19].x, CONST[23].xxxx, IMM[2].wwww
463: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx
464: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx
465: MUL TEMP[18].x, TEMP[18].xxxx, CONST[23].xxxx
466: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx
467: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
468: MOV TEMP[18].xy, TEMP[17].xyyy
469: MOV TEMP[18].w, TEMP[8].xxxx
470: TXL TEMP[18], TEMP[18], SAMP[9], 2D
471: FSEQ TEMP[19].x, TEMP[3].zzzz, IMM[1].zzzz
472: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
473: MOV TEMP[20].xy, TEMP[17].xyyy
474: MOV TEMP[20].w, TEMP[8].xxxx
475: TXL TEMP[20], TEMP[20], SAMP[7], 2D
476: FSEQ TEMP[21].x, TEMP[3].zzzz, IMM[3].xxxx
477: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
478: MOV TEMP[22].xy, TEMP[17].xyyy
479: MOV TEMP[22].w, TEMP[8].xxxx
480: TXL TEMP[22], TEMP[22], SAMP[5], 2D
481: FSEQ TEMP[23].x, TEMP[3].zzzz, IMM[2].wwww
482: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
483: MOV TEMP[24].xy, TEMP[17].xyyy
484: MOV TEMP[24].w, TEMP[8].xxxx
485: TXL TEMP[24], TEMP[24], SAMP[3], 2D
486: FSEQ TEMP[25].x, TEMP[3].zzzz, IMM[2].zzzz
487: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
488: MOV TEMP[17].xy, TEMP[17].xyyy
489: MOV TEMP[17].w, TEMP[8].xxxx
490: TXL TEMP[17], TEMP[17], SAMP[1], 2D
491: FSEQ TEMP[26].x, TEMP[3].zzzz, IMM[3].yyyy
492: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
493: MUL TEMP[17], TEMP[17], TEMP[26].xxxx
494: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17]
495: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17]
496: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17]
497: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17]
498: MOV TEMP[18].xy, IN[3].zxzz
499: MOV TEMP[19].x, IMM[2].xxxx
500: FSNE TEMP[20].x, CONST[19].xxxx, TEMP[6].xxxx
501: UIF TEMP[20].xxxx :0
502: MOV TEMP[19].x, IMM[2].yyyy
503: RCP TEMP[20].x, CONST[22].xxxx
504: MUL TEMP[18].xy, IN[3].zxxx, TEMP[20].xxxx
505: ELSE :0
506: RCP TEMP[20].x, CONST[21].xxxx
507: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx
508: ENDIF
509: FRC TEMP[18].xy, TEMP[18].xyyy
510: MUL TEMP[20].x, CONST[23].xxxx, IMM[2].wwww
511: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx
512: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx
513: MUL TEMP[19].x, TEMP[19].xxxx, CONST[23].xxxx
514: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx
515: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
516: MOV TEMP[19].xy, TEMP[18].xyyy
517: MOV TEMP[19].w, TEMP[8].xxxx
518: TXL TEMP[19], TEMP[19], SAMP[9], 2D
519: FSEQ TEMP[20].x, TEMP[3].zzzz, IMM[1].zzzz
520: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
521: MOV TEMP[21].xy, TEMP[18].xyyy
522: MOV TEMP[21].w, TEMP[8].xxxx
523: TXL TEMP[21], TEMP[21], SAMP[7], 2D
524: FSEQ TEMP[22].x, TEMP[3].zzzz, IMM[3].xxxx
525: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
526: MOV TEMP[23].xy, TEMP[18].xyyy
527: MOV TEMP[23].w, TEMP[8].xxxx
528: TXL TEMP[23], TEMP[23], SAMP[5], 2D
529: FSEQ TEMP[24].x, TEMP[3].zzzz, IMM[2].wwww
530: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
531: MOV TEMP[25].xy, TEMP[18].xyyy
532: MOV TEMP[25].w, TEMP[8].xxxx
533: TXL TEMP[25], TEMP[25], SAMP[3], 2D
534: FSEQ TEMP[26].x, TEMP[3].zzzz, IMM[2].zzzz
535: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
536: MOV TEMP[18].xy, TEMP[18].xyyy
537: MOV TEMP[18].w, TEMP[8].xxxx
538: TXL TEMP[18], TEMP[18], SAMP[1], 2D
539: FSEQ TEMP[27].x, TEMP[3].zzzz, IMM[3].yyyy
540: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz
541: MUL TEMP[18], TEMP[18], TEMP[27].xxxx
542: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18]
543: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18]
544: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18]
545: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18]
546: MUL TEMP[16], TEMP[16], TEMP[2].zzzz
547: MAD TEMP[16], TEMP[17], TEMP[2].xxxx, TEMP[16]
548: MAD TEMP[16], TEMP[18], TEMP[2].yyyy, TEMP[16]
549: MUL TEMP[13], TEMP[13], TEMP[2].zzzz
550: MAD TEMP[13], TEMP[14], TEMP[2].xxxx, TEMP[13]
551: MAD TEMP[13], TEMP[15], TEMP[2].yyyy, TEMP[13]
552: MUL TEMP[10], TEMP[10], TEMP[2].zzzz
553: MAD TEMP[10], TEMP[11], TEMP[2].xxxx, TEMP[10]
554: MAD TEMP[10], TEMP[12], TEMP[2].yyyy, TEMP[10]
555: MUL TEMP[10], IN[1].xxxx, TEMP[10]
556: MAD TEMP[10], IN[1].yyyy, TEMP[13], TEMP[10]
557: MAD TEMP[10].xyz, IN[1].zzzz, TEMP[16], TEMP[10]
558: MOV TEMP[11].xy, IN[3].zyzz
559: MOV TEMP[12].x, IMM[2].xxxx
560: FSNE TEMP[13].x, CONST[19].xxxx, TEMP[4].xxxx
561: UIF TEMP[13].xxxx :0
562: MOV TEMP[12].x, IMM[2].yyyy
563: RCP TEMP[13].x, CONST[22].xxxx
564: MUL TEMP[11].xy, IN[3].zyyy, TEMP[13].xxxx
565: ELSE :0
566: RCP TEMP[13].x, CONST[21].xxxx
567: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
568: ENDIF
569: FRC TEMP[11].xy, TEMP[11].xyyy
570: MUL TEMP[13].x, CONST[23].xxxx, IMM[2].wwww
571: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
572: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
573: MUL TEMP[12].x, TEMP[12].xxxx, CONST[23].xxxx
574: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
575: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
576: MOV TEMP[12].xy, TEMP[11].xyyy
577: MOV TEMP[12].w, TEMP[8].xxxx
578: TXL TEMP[12], TEMP[12], SAMP[10], 2D
579: FSEQ TEMP[13].x, TEMP[7].zzzz, IMM[1].zzzz
580: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
581: MOV TEMP[14].xy, TEMP[11].xyyy
582: MOV TEMP[14].w, TEMP[8].xxxx
583: TXL TEMP[14], TEMP[14], SAMP[8], 2D
584: FSEQ TEMP[15].x, TEMP[7].zzzz, IMM[3].xxxx
585: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
586: MOV TEMP[16].xy, TEMP[11].xyyy
587: MOV TEMP[16].w, TEMP[8].xxxx
588: TXL TEMP[16], TEMP[16], SAMP[6], 2D
589: FSEQ TEMP[17].x, TEMP[7].zzzz, IMM[2].wwww
590: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
591: MOV TEMP[18].xy, TEMP[11].xyyy
592: MOV TEMP[18].w, TEMP[8].xxxx
593: TXL TEMP[18], TEMP[18], SAMP[4], 2D
594: FSEQ TEMP[19].x, TEMP[7].zzzz, IMM[2].zzzz
595: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
596: MOV TEMP[11].xy, TEMP[11].xyyy
597: MOV TEMP[11].w, TEMP[8].xxxx
598: TXL TEMP[11], TEMP[11], SAMP[2], 2D
599: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[3].yyyy
600: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
601: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
602: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
603: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
604: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
605: MAD TEMP[11].yw, TEMP[12], TEMP[13].xxxx, TEMP[11]
606: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz
607: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy
608: MOV_SAT TEMP[28].x, TEMP[12].xxxx
609: MOV TEMP[12].xy, IN[3].zxzz
610: MOV TEMP[13].x, IMM[2].xxxx
611: FSNE TEMP[14].x, CONST[19].xxxx, TEMP[4].xxxx
612: UIF TEMP[14].xxxx :0
613: MOV TEMP[13].x, IMM[2].yyyy
614: RCP TEMP[14].x, CONST[22].xxxx
615: MUL TEMP[12].xy, IN[3].zxxx, TEMP[14].xxxx
616: ELSE :0
617: RCP TEMP[14].x, CONST[21].xxxx
618: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
619: ENDIF
620: FRC TEMP[12].xy, TEMP[12].xyyy
621: MUL TEMP[14].x, CONST[23].xxxx, IMM[2].wwww
622: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
623: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
624: MUL TEMP[13].x, TEMP[13].xxxx, CONST[23].xxxx
625: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
626: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
627: MOV TEMP[13].xy, TEMP[12].xyyy
628: MOV TEMP[13].w, TEMP[8].xxxx
629: TXL TEMP[13], TEMP[13], SAMP[10], 2D
630: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz
631: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
632: MOV TEMP[15].xy, TEMP[12].xyyy
633: MOV TEMP[15].w, TEMP[8].xxxx
634: TXL TEMP[15], TEMP[15], SAMP[8], 2D
635: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx
636: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
637: MOV TEMP[17].xy, TEMP[12].xyyy
638: MOV TEMP[17].w, TEMP[8].xxxx
639: TXL TEMP[17], TEMP[17], SAMP[6], 2D
640: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww
641: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
642: MOV TEMP[19].xy, TEMP[12].xyyy
643: MOV TEMP[19].w, TEMP[8].xxxx
644: TXL TEMP[19], TEMP[19], SAMP[4], 2D
645: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz
646: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
647: MOV TEMP[12].xy, TEMP[12].xyyy
648: MOV TEMP[12].w, TEMP[8].xxxx
649: TXL TEMP[12], TEMP[12], SAMP[2], 2D
650: FSEQ TEMP[21].x, TEMP[7].zzzz, IMM[3].yyyy
651: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
652: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
653: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
654: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
655: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
656: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12]
657: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz
658: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy
659: MOV_SAT TEMP[29].x, TEMP[13].xxxx
660: MOV TEMP[13].xy, IN[3].xyxx
661: MOV TEMP[14].x, IMM[2].xxxx
662: FSNE TEMP[15].x, CONST[19].xxxx, TEMP[4].xxxx
663: UIF TEMP[15].xxxx :0
664: MOV TEMP[14].x, IMM[2].yyyy
665: RCP TEMP[15].x, CONST[22].xxxx
666: MUL TEMP[13].xy, IN[3].xyyy, TEMP[15].xxxx
667: ELSE :0
668: RCP TEMP[15].x, CONST[21].xxxx
669: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
670: ENDIF
671: FRC TEMP[13].xy, TEMP[13].xyyy
672: MUL TEMP[15].x, CONST[23].xxxx, IMM[2].wwww
673: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
674: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
675: MUL TEMP[14].x, TEMP[14].xxxx, CONST[23].xxxx
676: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
677: MAD TEMP[4].xy, TEMP[13].xyyy, TEMP[4].xxxx, TEMP[7].xyyy
678: MOV TEMP[13].xy, TEMP[4].xyyy
679: MOV TEMP[13].w, TEMP[8].xxxx
680: TXL TEMP[13], TEMP[13], SAMP[10], 2D
681: FSEQ TEMP[14].x, TEMP[7].zzzz, IMM[1].zzzz
682: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
683: MOV TEMP[15].xy, TEMP[4].xyyy
684: MOV TEMP[15].w, TEMP[8].xxxx
685: TXL TEMP[15], TEMP[15], SAMP[8], 2D
686: FSEQ TEMP[16].x, TEMP[7].zzzz, IMM[3].xxxx
687: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
688: MOV TEMP[17].xy, TEMP[4].xyyy
689: MOV TEMP[17].w, TEMP[8].xxxx
690: TXL TEMP[17], TEMP[17], SAMP[6], 2D
691: FSEQ TEMP[18].x, TEMP[7].zzzz, IMM[2].wwww
692: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
693: MOV TEMP[19].xy, TEMP[4].xyyy
694: MOV TEMP[19].w, TEMP[8].xxxx
695: TXL TEMP[19], TEMP[19], SAMP[4], 2D
696: FSEQ TEMP[20].x, TEMP[7].zzzz, IMM[2].zzzz
697: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
698: MOV TEMP[4].xy, TEMP[4].xyyy
699: MOV TEMP[4].w, TEMP[8].xxxx
700: TXL TEMP[4], TEMP[4], SAMP[2], 2D
701: FSEQ TEMP[7].x, TEMP[7].zzzz, IMM[3].yyyy
702: AND TEMP[7].x, TEMP[7].xxxx, IMM[2].zzzz
703: MUL TEMP[4], TEMP[4], TEMP[7].xxxx
704: MAD TEMP[4], TEMP[19], TEMP[20].xxxx, TEMP[4]
705: MAD TEMP[4], TEMP[17], TEMP[18].xxxx, TEMP[4]
706: MAD TEMP[4], TEMP[15], TEMP[16].xxxx, TEMP[4]
707: MAD TEMP[4].yw, TEMP[13], TEMP[14].xxxx, TEMP[4]
708: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz
709: DP2 TEMP[7].x, TEMP[4].xyyy, TEMP[4].xyyy
710: MOV_SAT TEMP[30].x, TEMP[7].xxxx
711: MOV TEMP[7].x, IMM[3].yyyy
712: MOV TEMP[7].y, TEMP[11].xxxx
713: MOV TEMP[7].z, TEMP[11].yyyy
714: MOV TEMP[11].y, IMM[3].yyyy
715: MOV TEMP[11].x, TEMP[12].yyyy
716: MOV TEMP[11].z, TEMP[12].xxxx
717: MOV TEMP[12].z, IMM[3].yyyy
718: MOV TEMP[12].xy, TEMP[4].xyxx
719: MUL TEMP[4].xyz, TEMP[7].xyzz, TEMP[2].xxxx
720: MAD TEMP[4].xyz, TEMP[11].xyzz, TEMP[2].yyyy, TEMP[4].xyzz
721: MAD TEMP[4].xyz, TEMP[12].xyzz, TEMP[2].zzzz, TEMP[4].xyzz
722: MOV TEMP[7].xy, IN[3].zyzz
723: MOV TEMP[11].x, IMM[2].xxxx
724: FSNE TEMP[12].x, CONST[19].xxxx, TEMP[5].xxxx
725: UIF TEMP[12].xxxx :0
726: MOV TEMP[11].x, IMM[2].yyyy
727: RCP TEMP[12].x, CONST[22].xxxx
728: MUL TEMP[7].xy, IN[3].zyyy, TEMP[12].xxxx
729: ELSE :0
730: RCP TEMP[12].x, CONST[21].xxxx
731: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[12].xxxx
732: ENDIF
733: FRC TEMP[7].xy, TEMP[7].xyyy
734: MUL TEMP[12].x, CONST[23].xxxx, IMM[2].wwww
735: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
736: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
737: MUL TEMP[11].x, TEMP[11].xxxx, CONST[23].xxxx
738: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
739: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
740: MOV TEMP[11].xy, TEMP[7].xyyy
741: MOV TEMP[11].w, TEMP[8].xxxx
742: TXL TEMP[11], TEMP[11], SAMP[10], 2D
743: FSEQ TEMP[12].x, TEMP[9].zzzz, IMM[1].zzzz
744: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
745: MOV TEMP[13].xy, TEMP[7].xyyy
746: MOV TEMP[13].w, TEMP[8].xxxx
747: TXL TEMP[13], TEMP[13], SAMP[8], 2D
748: FSEQ TEMP[14].x, TEMP[9].zzzz, IMM[3].xxxx
749: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
750: MOV TEMP[15].xy, TEMP[7].xyyy
751: MOV TEMP[15].w, TEMP[8].xxxx
752: TXL TEMP[15], TEMP[15], SAMP[6], 2D
753: FSEQ TEMP[16].x, TEMP[9].zzzz, IMM[2].wwww
754: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
755: MOV TEMP[17].xy, TEMP[7].xyyy
756: MOV TEMP[17].w, TEMP[8].xxxx
757: TXL TEMP[17], TEMP[17], SAMP[4], 2D
758: FSEQ TEMP[18].x, TEMP[9].zzzz, IMM[2].zzzz
759: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
760: MOV TEMP[7].xy, TEMP[7].xyyy
761: MOV TEMP[7].w, TEMP[8].xxxx
762: TXL TEMP[7], TEMP[7], SAMP[2], 2D
763: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[3].yyyy
764: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
765: MUL TEMP[7], TEMP[7], TEMP[19].xxxx
766: MAD TEMP[7], TEMP[17], TEMP[18].xxxx, TEMP[7]
767: MAD TEMP[7], TEMP[15], TEMP[16].xxxx, TEMP[7]
768: MAD TEMP[7], TEMP[13], TEMP[14].xxxx, TEMP[7]
769: MAD TEMP[7].yw, TEMP[11], TEMP[12].xxxx, TEMP[7]
770: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz
771: DP2 TEMP[11].x, TEMP[7].xyyy, TEMP[7].xyyy
772: MOV_SAT TEMP[31].x, TEMP[11].xxxx
773: MOV TEMP[11].xy, IN[3].zxzz
774: MOV TEMP[12].x, IMM[2].xxxx
775: FSNE TEMP[13].x, CONST[19].xxxx, TEMP[5].xxxx
776: UIF TEMP[13].xxxx :0
777: MOV TEMP[12].x, IMM[2].yyyy
778: RCP TEMP[13].x, CONST[22].xxxx
779: MUL TEMP[11].xy, IN[3].zxxx, TEMP[13].xxxx
780: ELSE :0
781: RCP TEMP[13].x, CONST[21].xxxx
782: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
783: ENDIF
784: FRC TEMP[11].xy, TEMP[11].xyyy
785: MUL TEMP[13].x, CONST[23].xxxx, IMM[2].wwww
786: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
787: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
788: MUL TEMP[12].x, TEMP[12].xxxx, CONST[23].xxxx
789: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
790: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
791: MOV TEMP[12].xy, TEMP[11].xyyy
792: MOV TEMP[12].w, TEMP[8].xxxx
793: TXL TEMP[12], TEMP[12], SAMP[10], 2D
794: FSEQ TEMP[13].x, TEMP[9].zzzz, IMM[1].zzzz
795: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
796: MOV TEMP[14].xy, TEMP[11].xyyy
797: MOV TEMP[14].w, TEMP[8].xxxx
798: TXL TEMP[14], TEMP[14], SAMP[8], 2D
799: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[3].xxxx
800: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
801: MOV TEMP[16].xy, TEMP[11].xyyy
802: MOV TEMP[16].w, TEMP[8].xxxx
803: TXL TEMP[16], TEMP[16], SAMP[6], 2D
804: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[2].wwww
805: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
806: MOV TEMP[18].xy, TEMP[11].xyyy
807: MOV TEMP[18].w, TEMP[8].xxxx
808: TXL TEMP[18], TEMP[18], SAMP[4], 2D
809: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].zzzz
810: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
811: MOV TEMP[11].xy, TEMP[11].xyyy
812: MOV TEMP[11].w, TEMP[8].xxxx
813: TXL TEMP[11], TEMP[11], SAMP[2], 2D
814: FSEQ TEMP[20].x, TEMP[9].zzzz, IMM[3].yyyy
815: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
816: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
817: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
818: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
819: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
820: MAD TEMP[11].yw, TEMP[12], TEMP[13].xxxx, TEMP[11]
821: MAD TEMP[11].xy, TEMP[11].wyyy, IMM[2].wwww, IMM[3].zzzz
822: DP2 TEMP[12].x, TEMP[11].xyyy, TEMP[11].xyyy
823: MOV_SAT TEMP[32].x, TEMP[12].xxxx
824: MOV TEMP[12].xy, IN[3].xyxx
825: MOV TEMP[13].x, IMM[2].xxxx
826: FSNE TEMP[14].x, CONST[19].xxxx, TEMP[5].xxxx
827: UIF TEMP[14].xxxx :0
828: MOV TEMP[13].x, IMM[2].yyyy
829: RCP TEMP[14].x, CONST[22].xxxx
830: MUL TEMP[12].xy, IN[3].xyyy, TEMP[14].xxxx
831: ELSE :0
832: RCP TEMP[14].x, CONST[21].xxxx
833: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
834: ENDIF
835: FRC TEMP[12].xy, TEMP[12].xyyy
836: MUL TEMP[14].x, CONST[23].xxxx, IMM[2].wwww
837: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
838: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
839: MUL TEMP[13].x, TEMP[13].xxxx, CONST[23].xxxx
840: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
841: MAD TEMP[5].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[9].xyyy
842: MOV TEMP[12].xy, TEMP[5].xyyy
843: MOV TEMP[12].w, TEMP[8].xxxx
844: TXL TEMP[12], TEMP[12], SAMP[10], 2D
845: FSEQ TEMP[13].x, TEMP[9].zzzz, IMM[1].zzzz
846: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
847: MOV TEMP[14].xy, TEMP[5].xyyy
848: MOV TEMP[14].w, TEMP[8].xxxx
849: TXL TEMP[14], TEMP[14], SAMP[8], 2D
850: FSEQ TEMP[15].x, TEMP[9].zzzz, IMM[3].xxxx
851: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
852: MOV TEMP[16].xy, TEMP[5].xyyy
853: MOV TEMP[16].w, TEMP[8].xxxx
854: TXL TEMP[16], TEMP[16], SAMP[6], 2D
855: FSEQ TEMP[17].x, TEMP[9].zzzz, IMM[2].wwww
856: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
857: MOV TEMP[18].xy, TEMP[5].xyyy
858: MOV TEMP[18].w, TEMP[8].xxxx
859: TXL TEMP[18], TEMP[18], SAMP[4], 2D
860: FSEQ TEMP[19].x, TEMP[9].zzzz, IMM[2].zzzz
861: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
862: MOV TEMP[5].xy, TEMP[5].xyyy
863: MOV TEMP[5].w, TEMP[8].xxxx
864: TXL TEMP[5], TEMP[5], SAMP[2], 2D
865: FSEQ TEMP[9].x, TEMP[9].zzzz, IMM[3].yyyy
866: AND TEMP[9].x, TEMP[9].xxxx, IMM[2].zzzz
867: MUL TEMP[5], TEMP[5], TEMP[9].xxxx
868: MAD TEMP[5], TEMP[18], TEMP[19].xxxx, TEMP[5]
869: MAD TEMP[5], TEMP[16], TEMP[17].xxxx, TEMP[5]
870: MAD TEMP[5], TEMP[14], TEMP[15].xxxx, TEMP[5]
871: MAD TEMP[5].yw, TEMP[12], TEMP[13].xxxx, TEMP[5]
872: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[2].wwww, IMM[3].zzzz
873: DP2 TEMP[9].x, TEMP[5].xyyy, TEMP[5].xyyy
874: MOV_SAT TEMP[33].x, TEMP[9].xxxx
875: MOV TEMP[9].x, IMM[3].yyyy
876: MOV TEMP[9].y, TEMP[7].xxxx
877: MOV TEMP[9].z, TEMP[7].yyyy
878: MOV TEMP[7].y, IMM[3].yyyy
879: MOV TEMP[7].x, TEMP[11].yyyy
880: MOV TEMP[7].z, TEMP[11].xxxx
881: MOV TEMP[11].z, IMM[3].yyyy
882: MOV TEMP[11].xy, TEMP[5].xyxx
883: MUL TEMP[5].xyz, TEMP[9].xyzz, TEMP[2].xxxx
884: MAD TEMP[5].xyz, TEMP[7].xyzz, TEMP[2].yyyy, TEMP[5].xyzz
885: MAD TEMP[5].xyz, TEMP[11].xyzz, TEMP[2].zzzz, TEMP[5].xyzz
886: MOV TEMP[7].xy, IN[3].zyzz
887: MOV TEMP[9].x, IMM[2].xxxx
888: FSNE TEMP[11].x, CONST[19].xxxx, TEMP[6].xxxx
889: UIF TEMP[11].xxxx :0
890: MOV TEMP[9].x, IMM[2].yyyy
891: RCP TEMP[11].x, CONST[22].xxxx
892: MUL TEMP[7].xy, IN[3].zyyy, TEMP[11].xxxx
893: ELSE :0
894: RCP TEMP[11].x, CONST[21].xxxx
895: MUL TEMP[7].xy, TEMP[7].xyyy, TEMP[11].xxxx
896: ENDIF
897: FRC TEMP[7].xy, TEMP[7].xyyy
898: MUL TEMP[11].x, CONST[23].xxxx, IMM[2].wwww
899: MUL TEMP[11].x, TEMP[11].xxxx, TEMP[9].xxxx
900: ADD TEMP[11].x, IMM[2].zzzz, -TEMP[11].xxxx
901: MUL TEMP[9].x, TEMP[9].xxxx, CONST[23].xxxx
902: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[11].xxxx, TEMP[9].xxxx
903: MAD TEMP[7].xy, TEMP[7].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
904: MOV TEMP[9].xy, TEMP[7].xyyy
905: MOV TEMP[9].w, TEMP[8].xxxx
906: TXL TEMP[9], TEMP[9], SAMP[10], 2D
907: FSEQ TEMP[11].x, TEMP[3].zzzz, IMM[1].zzzz
908: AND TEMP[11].x, TEMP[11].xxxx, IMM[2].zzzz
909: MOV TEMP[12].xy, TEMP[7].xyyy
910: MOV TEMP[12].w, TEMP[8].xxxx
911: TXL TEMP[12], TEMP[12], SAMP[8], 2D
912: FSEQ TEMP[13].x, TEMP[3].zzzz, IMM[3].xxxx
913: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
914: MOV TEMP[14].xy, TEMP[7].xyyy
915: MOV TEMP[14].w, TEMP[8].xxxx
916: TXL TEMP[14], TEMP[14], SAMP[6], 2D
917: FSEQ TEMP[15].x, TEMP[3].zzzz, IMM[2].wwww
918: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
919: MOV TEMP[16].xy, TEMP[7].xyyy
920: MOV TEMP[16].w, TEMP[8].xxxx
921: TXL TEMP[16], TEMP[16], SAMP[4], 2D
922: FSEQ TEMP[17].x, TEMP[3].zzzz, IMM[2].zzzz
923: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
924: MOV TEMP[7].xy, TEMP[7].xyyy
925: MOV TEMP[7].w, TEMP[8].xxxx
926: TXL TEMP[7], TEMP[7], SAMP[2], 2D
927: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[3].yyyy
928: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
929: MUL TEMP[7], TEMP[7], TEMP[18].xxxx
930: MAD TEMP[7], TEMP[16], TEMP[17].xxxx, TEMP[7]
931: MAD TEMP[7], TEMP[14], TEMP[15].xxxx, TEMP[7]
932: MAD TEMP[7], TEMP[12], TEMP[13].xxxx, TEMP[7]
933: MAD TEMP[7].yw, TEMP[9], TEMP[11].xxxx, TEMP[7]
934: MAD TEMP[7].xy, TEMP[7].wyyy, IMM[2].wwww, IMM[3].zzzz
935: DP2 TEMP[9].x, TEMP[7].xyyy, TEMP[7].xyyy
936: MOV_SAT TEMP[34].x, TEMP[9].xxxx
937: MOV TEMP[9].xy, IN[3].zxzz
938: MOV TEMP[11].x, IMM[2].xxxx
939: FSNE TEMP[12].x, CONST[19].xxxx, TEMP[6].xxxx
940: UIF TEMP[12].xxxx :0
941: MOV TEMP[11].x, IMM[2].yyyy
942: RCP TEMP[12].x, CONST[22].xxxx
943: MUL TEMP[9].xy, IN[3].zxxx, TEMP[12].xxxx
944: ELSE :0
945: RCP TEMP[12].x, CONST[21].xxxx
946: MUL TEMP[9].xy, TEMP[9].xyyy, TEMP[12].xxxx
947: ENDIF
948: FRC TEMP[9].xy, TEMP[9].xyyy
949: MUL TEMP[12].x, CONST[23].xxxx, IMM[2].wwww
950: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[11].xxxx
951: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
952: MUL TEMP[11].x, TEMP[11].xxxx, CONST[23].xxxx
953: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[12].xxxx, TEMP[11].xxxx
954: MAD TEMP[9].xy, TEMP[9].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
955: MOV TEMP[11].xy, TEMP[9].xyyy
956: MOV TEMP[11].w, TEMP[8].xxxx
957: TXL TEMP[11], TEMP[11], SAMP[10], 2D
958: FSEQ TEMP[12].x, TEMP[3].zzzz, IMM[1].zzzz
959: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
960: MOV TEMP[13].xy, TEMP[9].xyyy
961: MOV TEMP[13].w, TEMP[8].xxxx
962: TXL TEMP[13], TEMP[13], SAMP[8], 2D
963: FSEQ TEMP[14].x, TEMP[3].zzzz, IMM[3].xxxx
964: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
965: MOV TEMP[15].xy, TEMP[9].xyyy
966: MOV TEMP[15].w, TEMP[8].xxxx
967: TXL TEMP[15], TEMP[15], SAMP[6], 2D
968: FSEQ TEMP[16].x, TEMP[3].zzzz, IMM[2].wwww
969: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
970: MOV TEMP[17].xy, TEMP[9].xyyy
971: MOV TEMP[17].w, TEMP[8].xxxx
972: TXL TEMP[17], TEMP[17], SAMP[4], 2D
973: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[2].zzzz
974: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
975: MOV TEMP[9].xy, TEMP[9].xyyy
976: MOV TEMP[9].w, TEMP[8].xxxx
977: TXL TEMP[9], TEMP[9], SAMP[2], 2D
978: FSEQ TEMP[19].x, TEMP[3].zzzz, IMM[3].yyyy
979: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
980: MUL TEMP[9], TEMP[9], TEMP[19].xxxx
981: MAD TEMP[9], TEMP[17], TEMP[18].xxxx, TEMP[9]
982: MAD TEMP[9], TEMP[15], TEMP[16].xxxx, TEMP[9]
983: MAD TEMP[9], TEMP[13], TEMP[14].xxxx, TEMP[9]
984: MAD TEMP[9].yw, TEMP[11], TEMP[12].xxxx, TEMP[9]
985: MAD TEMP[9].xy, TEMP[9].wyyy, IMM[2].wwww, IMM[3].zzzz
986: DP2 TEMP[11].x, TEMP[9].xyyy, TEMP[9].xyyy
987: MOV_SAT TEMP[35].x, TEMP[11].xxxx
988: MOV TEMP[11].xy, IN[3].xyxx
989: MOV TEMP[12].x, IMM[2].xxxx
990: FSNE TEMP[13].x, CONST[19].xxxx, TEMP[6].xxxx
991: UIF TEMP[13].xxxx :0
992: MOV TEMP[12].x, IMM[2].yyyy
993: RCP TEMP[13].x, CONST[22].xxxx
994: MUL TEMP[11].xy, IN[3].xyyy, TEMP[13].xxxx
995: ELSE :0
996: RCP TEMP[13].x, CONST[21].xxxx
997: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
998: ENDIF
999: FRC TEMP[11].xy, TEMP[11].xyyy
1000: MUL TEMP[13].x, CONST[23].xxxx, IMM[2].wwww
1001: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
1002: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
1003: MUL TEMP[12].x, TEMP[12].xxxx, CONST[23].xxxx
1004: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
1005: MAD TEMP[6].xy, TEMP[11].xyyy, TEMP[6].xxxx, TEMP[3].xyyy
1006: MOV TEMP[11].xy, TEMP[6].xyyy
1007: MOV TEMP[11].w, TEMP[8].xxxx
1008: TXL TEMP[11], TEMP[11], SAMP[10], 2D
1009: FSEQ TEMP[12].x, TEMP[3].zzzz, IMM[1].zzzz
1010: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
1011: MOV TEMP[13].xy, TEMP[6].xyyy
1012: MOV TEMP[13].w, TEMP[8].xxxx
1013: TXL TEMP[13], TEMP[13], SAMP[8], 2D
1014: FSEQ TEMP[14].x, TEMP[3].zzzz, IMM[3].xxxx
1015: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
1016: MOV TEMP[15].xy, TEMP[6].xyyy
1017: MOV TEMP[15].w, TEMP[8].xxxx
1018: TXL TEMP[15], TEMP[15], SAMP[6], 2D
1019: FSEQ TEMP[16].x, TEMP[3].zzzz, IMM[2].wwww
1020: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
1021: MOV TEMP[17].xy, TEMP[6].xyyy
1022: MOV TEMP[17].w, TEMP[8].xxxx
1023: TXL TEMP[17], TEMP[17], SAMP[4], 2D
1024: FSEQ TEMP[18].x, TEMP[3].zzzz, IMM[2].zzzz
1025: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
1026: MOV TEMP[6].xy, TEMP[6].xyyy
1027: MOV TEMP[6].w, TEMP[8].xxxx
1028: TXL TEMP[6], TEMP[6], SAMP[2], 2D
1029: FSEQ TEMP[3].x, TEMP[3].zzzz, IMM[3].yyyy
1030: AND TEMP[3].x, TEMP[3].xxxx, IMM[2].zzzz
1031: MUL TEMP[3], TEMP[6], TEMP[3].xxxx
1032: MAD TEMP[3], TEMP[17], TEMP[18].xxxx, TEMP[3]
1033: MAD TEMP[3], TEMP[15], TEMP[16].xxxx, TEMP[3]
1034: MAD TEMP[3], TEMP[13], TEMP[14].xxxx, TEMP[3]
1035: MAD TEMP[3].yw, TEMP[11], TEMP[12].xxxx, TEMP[3]
1036: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[2].wwww, IMM[3].zzzz
1037: DP2 TEMP[6].x, TEMP[3].xyyy, TEMP[3].xyyy
1038: MOV_SAT TEMP[36].x, TEMP[6].xxxx
1039: MOV TEMP[6].x, IMM[3].yyyy
1040: MOV TEMP[6].y, TEMP[7].xxxx
1041: MOV TEMP[6].z, TEMP[7].yyyy
1042: MOV TEMP[7].y, IMM[3].yyyy
1043: MOV TEMP[7].x, TEMP[9].yyyy
1044: MOV TEMP[7].z, TEMP[9].xxxx
1045: MOV TEMP[8].z, IMM[3].yyyy
1046: MOV TEMP[8].xy, TEMP[3].xyxx
1047: MOV TEMP[3].w, IMM[2].zzzz
1048: MUL TEMP[6].xyz, TEMP[6].xyzz, TEMP[2].xxxx
1049: MAD TEMP[6].xyz, TEMP[7].xyzz, TEMP[2].yyyy, TEMP[6].xyzz
1050: MAD TEMP[2].xyz, TEMP[8].xyzz, TEMP[2].zzzz, TEMP[6].xyzz
1051: MUL TEMP[4].xyz, IN[1].xxxx, TEMP[4].xyzz
1052: MAD TEMP[4].xyz, IN[1].yyyy, TEMP[5].xyzz, TEMP[4].xyzz
1053: MAD TEMP[3].xyz, IN[1].zzzz, TEMP[2].xyzz, TEMP[4].xyzz
1054: DP4 TEMP[2].x, TEMP[3], TEMP[3]
1055: RSQ TEMP[2].x, TEMP[2].xxxx
1056: MUL TEMP[2].xyz, TEMP[3], TEMP[2].xxxx
1057: MUL TEMP[2].xyz, TEMP[2].xyzz, IN[0].wwww
1058: ADD TEMP[2].xyz, IN[2].yzww, -TEMP[2].xyzz
1059: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
1060: RSQ TEMP[3].x, TEMP[3].xxxx
1061: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
1062: MUL TEMP[3], CONST[5], IN[3].xxxx
1063: MAD TEMP[3], CONST[6], IN[3].yyyy, TEMP[3]
1064: MAD TEMP[3], CONST[7], IN[3].zzzz, TEMP[3]
1065: ADD TEMP[3].xyz, TEMP[3], CONST[8]
1066: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[3].xyzz
1067: MOV TEMP[3].xy, TEMP[3].xxxx
1068: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D
1069: ADD TEMP[0].xyz, TEMP[1].xyzz, TEMP[0].xyzz
1070: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
1071: RSQ TEMP[4].x, TEMP[4].xxxx
1072: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
1073: DP3 TEMP[0].x, TEMP[2].xyzz, TEMP[0].xyzz
1074: MAX TEMP[0].x, IMM[3].wwww, TEMP[0].xxxx
1075: MUL TEMP[4].x, IMM[4].xxxx, IN[1].wwww
1076: POW TEMP[0].x, TEMP[0].xxxx, TEMP[4].xxxx
1077: MOV_SAT TEMP[0].x, TEMP[0].xxxx
1078: MOV TEMP[4].w, IMM[3].yyyy
1079: MOV TEMP[4].xyz, CONST[26].xyzx
1080: MUL TEMP[5].x, IMM[2].wwww, TEMP[0].xxxx
1081: ADD TEMP[5].x, IMM[3].xxxx, -TEMP[5].xxxx
1082: MUL TEMP[5].x, TEMP[0].xxxx, TEMP[5].xxxx
1083: MUL TEMP[5].x, TEMP[0].xxxx, TEMP[5].xxxx
1084: MUL TEMP[5].x, TEMP[5].xxxx, IN[1].wwww
1085: MUL TEMP[6].xyz, TEMP[10].xyzz, CONST[3].xyzz
1086: DP3 TEMP[1].x, TEMP[2].xyzz, TEMP[1].xyzz
1087: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1088: MUL TEMP[2], CONST[27], IMM[2].wwww
1089: MUL TEMP[2], TEMP[2], TEMP[3].wwww
1090: MAX TEMP[2], TEMP[2], TEMP[4]
1091: MIN TEMP[2].xyz, TEMP[2], IMM[4].yyyz
1092: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[10].xyzz
1093: MAD TEMP[1].xyz, TEMP[6].xyzz, TEMP[1].xxxx, TEMP[2].xyzz
1094: MAD TEMP[1].xyz, CONST[3].xyzz, TEMP[5].xxxx, TEMP[1].xyzz
1095: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[3].wwww
1096: MUL TEMP[0].xyz, TEMP[1].xyzz, IMM[0].wwww
1097: MAD TEMP[1].x, IN[2].xxxx, CONST[2].zzzz, CONST[2].wwww
1098: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1099: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xxxx
1100: MOV TEMP[0].w, IMM[2].zzzz
1101: MOV OUT[0], TEMP[0]
1102: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 44)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 400)
%54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 416)
%55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 420)
%56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 424)
%57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 432)
%58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 436)
%59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 440)
%60 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%61 = load <32 x i8>, <32 x i8> addrspace(2)* %60, align 32, !tbaa !0
%62 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%63 = load <16 x i8>, <16 x i8> addrspace(2)* %62, align 16, !tbaa !0
%64 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%65 = load <8 x i32>, <8 x i32> addrspace(2)* %64, align 32, !tbaa !0
%66 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%67 = load <4 x i32>, <4 x i32> addrspace(2)* %66, align 16, !tbaa !0
%68 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%69 = load <8 x i32>, <8 x i32> addrspace(2)* %68, align 32, !tbaa !0
%70 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%71 = load <4 x i32>, <4 x i32> addrspace(2)* %70, align 16, !tbaa !0
%72 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%73 = load <8 x i32>, <8 x i32> addrspace(2)* %72, align 32, !tbaa !0
%74 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%75 = load <4 x i32>, <4 x i32> addrspace(2)* %74, align 16, !tbaa !0
%76 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%77 = load <8 x i32>, <8 x i32> addrspace(2)* %76, align 32, !tbaa !0
%78 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%79 = load <4 x i32>, <4 x i32> addrspace(2)* %78, align 16, !tbaa !0
%80 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%81 = load <8 x i32>, <8 x i32> addrspace(2)* %80, align 32, !tbaa !0
%82 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%83 = load <4 x i32>, <4 x i32> addrspace(2)* %82, align 16, !tbaa !0
%84 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6
%85 = load <8 x i32>, <8 x i32> addrspace(2)* %84, align 32, !tbaa !0
%86 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6
%87 = load <4 x i32>, <4 x i32> addrspace(2)* %86, align 16, !tbaa !0
%88 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7
%89 = load <8 x i32>, <8 x i32> addrspace(2)* %88, align 32, !tbaa !0
%90 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7
%91 = load <4 x i32>, <4 x i32> addrspace(2)* %90, align 16, !tbaa !0
%92 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8
%93 = load <8 x i32>, <8 x i32> addrspace(2)* %92, align 32, !tbaa !0
%94 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8
%95 = load <4 x i32>, <4 x i32> addrspace(2)* %94, align 16, !tbaa !0
%96 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9
%97 = load <8 x i32>, <8 x i32> addrspace(2)* %96, align 32, !tbaa !0
%98 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9
%99 = load <4 x i32>, <4 x i32> addrspace(2)* %98, align 16, !tbaa !0
%100 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10
%101 = load <8 x i32>, <8 x i32> addrspace(2)* %100, align 32, !tbaa !0
%102 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10
%103 = load <4 x i32>, <4 x i32> addrspace(2)* %102, align 16, !tbaa !0
%104 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%105 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%106 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%107 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%108 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%109 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%110 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%111 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%112 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%116 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%117 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%118 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%119 = fsub float %27, %116
%120 = fsub float %28, %117
%121 = fsub float %29, %118
%122 = fmul float %119, %119
%123 = fmul float %120, %120
%124 = fadd float %123, %122
%125 = fmul float %121, %121
%126 = fadd float %124, %125
%127 = call float @llvm.AMDGPU.rsq.clamped.f32(float %126)
%128 = fmul float %119, %127
%129 = fmul float %120, %127
%130 = fmul float %121, %127
%131 = fsub float %24, %116
%132 = fsub float %25, %117
%133 = fsub float %26, %118
%134 = fmul float %131, %131
%135 = fmul float %132, %132
%136 = fadd float %135, %134
%137 = fmul float %133, %133
%138 = fadd float %136, %137
%139 = call float @llvm.AMDGPU.rsq.clamped.f32(float %138)
%140 = fmul float %131, %139
%141 = fmul float %132, %139
%142 = fmul float %133, %139
%143 = call float @llvm.fabs.f32(float %113)
%144 = call float @llvm.fabs.f32(float %114)
%145 = call float @llvm.fabs.f32(float %115)
%146 = fmul float %143, %143
%147 = fmul float %144, %144
%148 = fadd float %147, %146
%149 = fmul float %145, %145
%150 = fadd float %148, %149
%151 = call float @llvm.AMDGPU.rsq.clamped.f32(float %150)
%152 = fmul float %143, %151
%153 = fadd float %152, 0xBFC99999A0000000
%154 = fmul float %144, %151
%155 = fadd float %154, 0xBFC99999A0000000
%156 = fmul float %145, %151
%157 = fadd float %156, 0xBFC99999A0000000
%158 = fmul float %153, 7.000000e+00
%159 = fmul float %155, 7.000000e+00
%160 = fmul float %157, 7.000000e+00
%161 = call float @llvm.maxnum.f32(float %158, float 0x3F847AE140000000)
%162 = call float @llvm.maxnum.f32(float %159, float 0x3F847AE140000000)
%163 = call float @llvm.maxnum.f32(float %160, float 0x3F847AE140000000)
%164 = fadd float %161, %162
%165 = fadd float %164, %163
%166 = fdiv float 1.000000e+00, %165
%167 = fmul float %161, %166
%168 = fmul float %162, %166
%169 = fmul float %163, %166
%170 = fadd float %104, 5.000000e-01
%171 = fadd float %105, 5.000000e-01
%172 = fadd float %106, 5.000000e-01
%173 = call float @llvm.floor.f32(float %170)
%174 = call float @llvm.floor.f32(float %171)
%175 = call float @llvm.floor.f32(float %172)
%176 = fmul float %173, %47
%177 = call float @llvm.floor.f32(float %176)
%178 = fmul float %177, %47
%179 = fcmp ult float %173, 6.400000e+01
br i1 %179, label %ELSE, label %IF
IF: ; preds = %main_body
%180 = fadd float %173, -6.400000e+01
%181 = fmul float %180, %48
%182 = call float @llvm.floor.f32(float %181)
%183 = fmul float %182, %48
%184 = call float @llvm.floor.f32(float %181)
%185 = fsub float %181, %184
%186 = call float @llvm.floor.f32(float %183)
%187 = fsub float %183, %186
%188 = call float @llvm.floor.f32(float %183)
%189 = fadd float %188, 4.000000e+00
br label %ENDIF
ELSE: ; preds = %main_body
%190 = call float @llvm.floor.f32(float %176)
%191 = fsub float %176, %190
%192 = call float @llvm.floor.f32(float %178)
%193 = fsub float %178, %192
%194 = call float @llvm.floor.f32(float %178)
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp30.0 = phi float [ %189, %IF ], [ %194, %ELSE ]
%temp29.0 = phi float [ %187, %IF ], [ %193, %ELSE ]
%temp28.0 = phi float [ %185, %IF ], [ %191, %ELSE ]
%temp16.0 = phi float [ %48, %IF ], [ %47, %ELSE ]
%195 = fmul float %174, %47
%196 = call float @llvm.floor.f32(float %195)
%197 = fmul float %196, %47
%198 = fcmp ult float %174, 6.400000e+01
br i1 %198, label %ELSE150, label %IF149
IF149: ; preds = %ENDIF
%199 = fadd float %174, -6.400000e+01
%200 = fmul float %199, %48
%201 = call float @llvm.floor.f32(float %200)
%202 = fmul float %201, %48
%203 = call float @llvm.floor.f32(float %200)
%204 = fsub float %200, %203
%205 = call float @llvm.floor.f32(float %202)
%206 = fsub float %202, %205
%207 = call float @llvm.floor.f32(float %202)
%208 = fadd float %207, 4.000000e+00
br label %ENDIF148
ELSE150: ; preds = %ENDIF
%209 = call float @llvm.floor.f32(float %195)
%210 = fsub float %195, %209
%211 = call float @llvm.floor.f32(float %197)
%212 = fsub float %197, %211
%213 = call float @llvm.floor.f32(float %197)
br label %ENDIF148
ENDIF148: ; preds = %ELSE150, %IF149
%temp36.0 = phi float [ %204, %IF149 ], [ %210, %ELSE150 ]
%temp37.0 = phi float [ %206, %IF149 ], [ %212, %ELSE150 ]
%temp38.0 = phi float [ %208, %IF149 ], [ %213, %ELSE150 ]
%temp20.0 = phi float [ %48, %IF149 ], [ %47, %ELSE150 ]
%214 = fmul float %175, %47
%215 = call float @llvm.floor.f32(float %214)
%216 = fmul float %215, %47
%217 = fcmp ult float %175, 6.400000e+01
br i1 %217, label %ELSE153, label %IF152
IF152: ; preds = %ENDIF148
%218 = fadd float %175, -6.400000e+01
%219 = fmul float %218, %48
%220 = call float @llvm.floor.f32(float %219)
%221 = fmul float %220, %48
%222 = call float @llvm.floor.f32(float %219)
%223 = fsub float %219, %222
%224 = call float @llvm.floor.f32(float %221)
%225 = fsub float %221, %224
%226 = call float @llvm.floor.f32(float %221)
%227 = fadd float %226, 4.000000e+00
br label %ENDIF151
ELSE153: ; preds = %ENDIF148
%228 = call float @llvm.floor.f32(float %214)
%229 = fsub float %214, %228
%230 = call float @llvm.floor.f32(float %216)
%231 = fsub float %216, %230
%232 = call float @llvm.floor.f32(float %216)
br label %ENDIF151
ENDIF151: ; preds = %ELSE153, %IF152
%temp24.0 = phi float [ %48, %IF152 ], [ %47, %ELSE153 ]
%temp14.0 = phi float [ %227, %IF152 ], [ %232, %ELSE153 ]
%temp13.0 = phi float [ %225, %IF152 ], [ %231, %ELSE153 ]
%temp12.0 = phi float [ %223, %IF152 ], [ %229, %ELSE153 ]
%233 = fsub float %116, %24
%234 = fsub float %117, %25
%235 = fsub float %118, %26
%236 = fmul float %233, %233
%237 = fmul float %234, %234
%238 = fadd float %237, %236
%239 = fmul float %235, %235
%240 = fadd float %238, %239
%241 = fmul float %53, %240
%242 = call float @llvm.log2.f32(float %241)
%243 = fmul float %242, 0x3FE62E4300000000
%244 = fmul float %243, %52
%245 = fcmp une float %47, %temp16.0
%.sink208 = select i1 %245, float %50, float %49
%temp44.0 = select i1 %245, float 1.953125e-03, float 3.906250e-03
%246 = fdiv float 1.000000e+00, %.sink208
%247 = fmul float %116, %246
%248 = fmul float %117, %246
%249 = call float @llvm.floor.f32(float %247)
%250 = fsub float %247, %249
%251 = call float @llvm.floor.f32(float %248)
%252 = fsub float %248, %251
%253 = fmul float %51, 2.000000e+00
%254 = fmul float %253, %temp44.0
%255 = fsub float 1.000000e+00, %254
%256 = fmul float %temp44.0, %51
%257 = fmul float %250, %255
%258 = fadd float %257, %256
%259 = fmul float %252, %255
%260 = fadd float %259, %256
%261 = fmul float %258, %temp16.0
%262 = fadd float %261, %temp28.0
%263 = fmul float %260, %temp16.0
%264 = fadd float %263, %temp29.0
%265 = bitcast float %262 to i32
%266 = bitcast float %264 to i32
%267 = bitcast float %244 to i32
%268 = insertelement <4 x i32> undef, i32 %265, i32 0
%269 = insertelement <4 x i32> %268, i32 %266, i32 1
%270 = insertelement <4 x i32> %269, i32 %267, i32 2
%271 = bitcast <8 x i32> %97 to <32 x i8>
%272 = bitcast <4 x i32> %99 to <16 x i8>
%273 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %270, <32 x i8> %271, <16 x i8> %272, i32 2)
%274 = extractelement <4 x float> %273, i32 0
%275 = extractelement <4 x float> %273, i32 1
%276 = extractelement <4 x float> %273, i32 2
%277 = fcmp oeq float %temp30.0, 4.000000e+00
%278 = select i1 %277, float 1.000000e+00, float 0.000000e+00
%279 = bitcast float %262 to i32
%280 = bitcast float %264 to i32
%281 = bitcast float %244 to i32
%282 = insertelement <4 x i32> undef, i32 %279, i32 0
%283 = insertelement <4 x i32> %282, i32 %280, i32 1
%284 = insertelement <4 x i32> %283, i32 %281, i32 2
%285 = bitcast <8 x i32> %89 to <32 x i8>
%286 = bitcast <4 x i32> %91 to <16 x i8>
%287 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %284, <32 x i8> %285, <16 x i8> %286, i32 2)
%288 = extractelement <4 x float> %287, i32 0
%289 = extractelement <4 x float> %287, i32 1
%290 = extractelement <4 x float> %287, i32 2
%291 = fcmp oeq float %temp30.0, 3.000000e+00
%292 = select i1 %291, float 1.000000e+00, float 0.000000e+00
%293 = bitcast float %262 to i32
%294 = bitcast float %264 to i32
%295 = bitcast float %244 to i32
%296 = insertelement <4 x i32> undef, i32 %293, i32 0
%297 = insertelement <4 x i32> %296, i32 %294, i32 1
%298 = insertelement <4 x i32> %297, i32 %295, i32 2
%299 = bitcast <8 x i32> %81 to <32 x i8>
%300 = bitcast <4 x i32> %83 to <16 x i8>
%301 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %298, <32 x i8> %299, <16 x i8> %300, i32 2)
%302 = extractelement <4 x float> %301, i32 0
%303 = extractelement <4 x float> %301, i32 1
%304 = extractelement <4 x float> %301, i32 2
%305 = fcmp oeq float %temp30.0, 2.000000e+00
%306 = select i1 %305, float 1.000000e+00, float 0.000000e+00
%307 = bitcast float %262 to i32
%308 = bitcast float %264 to i32
%309 = bitcast float %244 to i32
%310 = insertelement <4 x i32> undef, i32 %307, i32 0
%311 = insertelement <4 x i32> %310, i32 %308, i32 1
%312 = insertelement <4 x i32> %311, i32 %309, i32 2
%313 = bitcast <8 x i32> %73 to <32 x i8>
%314 = bitcast <4 x i32> %75 to <16 x i8>
%315 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %312, <32 x i8> %313, <16 x i8> %314, i32 2)
%316 = extractelement <4 x float> %315, i32 0
%317 = extractelement <4 x float> %315, i32 1
%318 = extractelement <4 x float> %315, i32 2
%319 = fcmp oeq float %temp30.0, 1.000000e+00
%320 = select i1 %319, float 1.000000e+00, float 0.000000e+00
%321 = bitcast float %262 to i32
%322 = bitcast float %264 to i32
%323 = bitcast float %244 to i32
%324 = insertelement <4 x i32> undef, i32 %321, i32 0
%325 = insertelement <4 x i32> %324, i32 %322, i32 1
%326 = insertelement <4 x i32> %325, i32 %323, i32 2
%327 = bitcast <8 x i32> %65 to <32 x i8>
%328 = bitcast <4 x i32> %67 to <16 x i8>
%329 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %326, <32 x i8> %327, <16 x i8> %328, i32 2)
%330 = extractelement <4 x float> %329, i32 0
%331 = extractelement <4 x float> %329, i32 1
%332 = extractelement <4 x float> %329, i32 2
%333 = fcmp oeq float %temp30.0, 0.000000e+00
%334 = select i1 %333, float 1.000000e+00, float 0.000000e+00
%335 = fmul float %330, %334
%336 = fmul float %331, %334
%337 = fmul float %332, %334
%338 = fmul float %316, %320
%339 = fadd float %338, %335
%340 = fmul float %317, %320
%341 = fadd float %340, %336
%342 = fmul float %318, %320
%343 = fadd float %342, %337
%344 = fmul float %302, %306
%345 = fadd float %344, %339
%346 = fmul float %303, %306
%347 = fadd float %346, %341
%348 = fmul float %304, %306
%349 = fadd float %348, %343
%350 = fmul float %288, %292
%351 = fadd float %350, %345
%352 = fmul float %289, %292
%353 = fadd float %352, %347
%354 = fmul float %290, %292
%355 = fadd float %354, %349
%356 = fmul float %274, %278
%357 = fadd float %356, %351
%358 = fmul float %275, %278
%359 = fadd float %358, %353
%360 = fmul float %276, %278
%361 = fadd float %360, %355
%362 = fcmp une float %47, %temp16.0
%.sink209 = select i1 %362, float %50, float %49
%temp48.0 = select i1 %362, float 1.953125e-03, float 3.906250e-03
%363 = fdiv float 1.000000e+00, %.sink209
%364 = fmul float %118, %363
%365 = fmul float %117, %363
%366 = call float @llvm.floor.f32(float %364)
%367 = fsub float %364, %366
%368 = call float @llvm.floor.f32(float %365)
%369 = fsub float %365, %368
%370 = fmul float %51, 2.000000e+00
%371 = fmul float %370, %temp48.0
%372 = fsub float 1.000000e+00, %371
%373 = fmul float %temp48.0, %51
%374 = fmul float %367, %372
%375 = fadd float %374, %373
%376 = fmul float %369, %372
%377 = fadd float %376, %373
%378 = fmul float %375, %temp16.0
%379 = fadd float %378, %temp28.0
%380 = fmul float %377, %temp16.0
%381 = fadd float %380, %temp29.0
%382 = bitcast float %379 to i32
%383 = bitcast float %381 to i32
%384 = bitcast float %244 to i32
%385 = insertelement <4 x i32> undef, i32 %382, i32 0
%386 = insertelement <4 x i32> %385, i32 %383, i32 1
%387 = insertelement <4 x i32> %386, i32 %384, i32 2
%388 = bitcast <8 x i32> %97 to <32 x i8>
%389 = bitcast <4 x i32> %99 to <16 x i8>
%390 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %387, <32 x i8> %388, <16 x i8> %389, i32 2)
%391 = extractelement <4 x float> %390, i32 0
%392 = extractelement <4 x float> %390, i32 1
%393 = extractelement <4 x float> %390, i32 2
%394 = fcmp oeq float %temp30.0, 4.000000e+00
%395 = select i1 %394, float 1.000000e+00, float 0.000000e+00
%396 = bitcast float %379 to i32
%397 = bitcast float %381 to i32
%398 = bitcast float %244 to i32
%399 = insertelement <4 x i32> undef, i32 %396, i32 0
%400 = insertelement <4 x i32> %399, i32 %397, i32 1
%401 = insertelement <4 x i32> %400, i32 %398, i32 2
%402 = bitcast <8 x i32> %89 to <32 x i8>
%403 = bitcast <4 x i32> %91 to <16 x i8>
%404 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %401, <32 x i8> %402, <16 x i8> %403, i32 2)
%405 = extractelement <4 x float> %404, i32 0
%406 = extractelement <4 x float> %404, i32 1
%407 = extractelement <4 x float> %404, i32 2
%408 = fcmp oeq float %temp30.0, 3.000000e+00
%409 = select i1 %408, float 1.000000e+00, float 0.000000e+00
%410 = bitcast float %379 to i32
%411 = bitcast float %381 to i32
%412 = bitcast float %244 to i32
%413 = insertelement <4 x i32> undef, i32 %410, i32 0
%414 = insertelement <4 x i32> %413, i32 %411, i32 1
%415 = insertelement <4 x i32> %414, i32 %412, i32 2
%416 = bitcast <8 x i32> %81 to <32 x i8>
%417 = bitcast <4 x i32> %83 to <16 x i8>
%418 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %415, <32 x i8> %416, <16 x i8> %417, i32 2)
%419 = extractelement <4 x float> %418, i32 0
%420 = extractelement <4 x float> %418, i32 1
%421 = extractelement <4 x float> %418, i32 2
%422 = fcmp oeq float %temp30.0, 2.000000e+00
%423 = select i1 %422, float 1.000000e+00, float 0.000000e+00
%424 = bitcast float %379 to i32
%425 = bitcast float %381 to i32
%426 = bitcast float %244 to i32
%427 = insertelement <4 x i32> undef, i32 %424, i32 0
%428 = insertelement <4 x i32> %427, i32 %425, i32 1
%429 = insertelement <4 x i32> %428, i32 %426, i32 2
%430 = bitcast <8 x i32> %73 to <32 x i8>
%431 = bitcast <4 x i32> %75 to <16 x i8>
%432 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %429, <32 x i8> %430, <16 x i8> %431, i32 2)
%433 = extractelement <4 x float> %432, i32 0
%434 = extractelement <4 x float> %432, i32 1
%435 = extractelement <4 x float> %432, i32 2
%436 = fcmp oeq float %temp30.0, 1.000000e+00
%437 = select i1 %436, float 1.000000e+00, float 0.000000e+00
%438 = bitcast float %379 to i32
%439 = bitcast float %381 to i32
%440 = bitcast float %244 to i32
%441 = insertelement <4 x i32> undef, i32 %438, i32 0
%442 = insertelement <4 x i32> %441, i32 %439, i32 1
%443 = insertelement <4 x i32> %442, i32 %440, i32 2
%444 = bitcast <8 x i32> %65 to <32 x i8>
%445 = bitcast <4 x i32> %67 to <16 x i8>
%446 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %443, <32 x i8> %444, <16 x i8> %445, i32 2)
%447 = extractelement <4 x float> %446, i32 0
%448 = extractelement <4 x float> %446, i32 1
%449 = extractelement <4 x float> %446, i32 2
%450 = fcmp oeq float %temp30.0, 0.000000e+00
%451 = select i1 %450, float 1.000000e+00, float 0.000000e+00
%452 = fmul float %447, %451
%453 = fmul float %448, %451
%454 = fmul float %449, %451
%455 = fmul float %433, %437
%456 = fadd float %455, %452
%457 = fmul float %434, %437
%458 = fadd float %457, %453
%459 = fmul float %435, %437
%460 = fadd float %459, %454
%461 = fmul float %419, %423
%462 = fadd float %461, %456
%463 = fmul float %420, %423
%464 = fadd float %463, %458
%465 = fmul float %421, %423
%466 = fadd float %465, %460
%467 = fmul float %405, %409
%468 = fadd float %467, %462
%469 = fmul float %406, %409
%470 = fadd float %469, %464
%471 = fmul float %407, %409
%472 = fadd float %471, %466
%473 = fmul float %391, %395
%474 = fadd float %473, %468
%475 = fmul float %392, %395
%476 = fadd float %475, %470
%477 = fmul float %393, %395
%478 = fadd float %477, %472
%479 = fcmp une float %47, %temp16.0
%.sink210 = select i1 %479, float %50, float %49
%temp52.0 = select i1 %479, float 1.953125e-03, float 3.906250e-03
%480 = fdiv float 1.000000e+00, %.sink210
%481 = fmul float %118, %480
%482 = fmul float %116, %480
%483 = call float @llvm.floor.f32(float %481)
%484 = fsub float %481, %483
%485 = call float @llvm.floor.f32(float %482)
%486 = fsub float %482, %485
%487 = fmul float %51, 2.000000e+00
%488 = fmul float %487, %temp52.0
%489 = fsub float 1.000000e+00, %488
%490 = fmul float %temp52.0, %51
%491 = fmul float %484, %489
%492 = fadd float %491, %490
%493 = fmul float %486, %489
%494 = fadd float %493, %490
%495 = fmul float %492, %temp16.0
%496 = fadd float %495, %temp28.0
%497 = fmul float %494, %temp16.0
%498 = fadd float %497, %temp29.0
%499 = bitcast float %496 to i32
%500 = bitcast float %498 to i32
%501 = bitcast float %244 to i32
%502 = insertelement <4 x i32> undef, i32 %499, i32 0
%503 = insertelement <4 x i32> %502, i32 %500, i32 1
%504 = insertelement <4 x i32> %503, i32 %501, i32 2
%505 = bitcast <8 x i32> %97 to <32 x i8>
%506 = bitcast <4 x i32> %99 to <16 x i8>
%507 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %504, <32 x i8> %505, <16 x i8> %506, i32 2)
%508 = extractelement <4 x float> %507, i32 0
%509 = extractelement <4 x float> %507, i32 1
%510 = extractelement <4 x float> %507, i32 2
%511 = fcmp oeq float %temp30.0, 4.000000e+00
%512 = select i1 %511, float 1.000000e+00, float 0.000000e+00
%513 = bitcast float %496 to i32
%514 = bitcast float %498 to i32
%515 = bitcast float %244 to i32
%516 = insertelement <4 x i32> undef, i32 %513, i32 0
%517 = insertelement <4 x i32> %516, i32 %514, i32 1
%518 = insertelement <4 x i32> %517, i32 %515, i32 2
%519 = bitcast <8 x i32> %89 to <32 x i8>
%520 = bitcast <4 x i32> %91 to <16 x i8>
%521 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %518, <32 x i8> %519, <16 x i8> %520, i32 2)
%522 = extractelement <4 x float> %521, i32 0
%523 = extractelement <4 x float> %521, i32 1
%524 = extractelement <4 x float> %521, i32 2
%525 = fcmp oeq float %temp30.0, 3.000000e+00
%526 = select i1 %525, float 1.000000e+00, float 0.000000e+00
%527 = bitcast float %496 to i32
%528 = bitcast float %498 to i32
%529 = bitcast float %244 to i32
%530 = insertelement <4 x i32> undef, i32 %527, i32 0
%531 = insertelement <4 x i32> %530, i32 %528, i32 1
%532 = insertelement <4 x i32> %531, i32 %529, i32 2
%533 = bitcast <8 x i32> %81 to <32 x i8>
%534 = bitcast <4 x i32> %83 to <16 x i8>
%535 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %532, <32 x i8> %533, <16 x i8> %534, i32 2)
%536 = extractelement <4 x float> %535, i32 0
%537 = extractelement <4 x float> %535, i32 1
%538 = extractelement <4 x float> %535, i32 2
%539 = fcmp oeq float %temp30.0, 2.000000e+00
%540 = select i1 %539, float 1.000000e+00, float 0.000000e+00
%541 = bitcast float %496 to i32
%542 = bitcast float %498 to i32
%543 = bitcast float %244 to i32
%544 = insertelement <4 x i32> undef, i32 %541, i32 0
%545 = insertelement <4 x i32> %544, i32 %542, i32 1
%546 = insertelement <4 x i32> %545, i32 %543, i32 2
%547 = bitcast <8 x i32> %73 to <32 x i8>
%548 = bitcast <4 x i32> %75 to <16 x i8>
%549 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %546, <32 x i8> %547, <16 x i8> %548, i32 2)
%550 = extractelement <4 x float> %549, i32 0
%551 = extractelement <4 x float> %549, i32 1
%552 = extractelement <4 x float> %549, i32 2
%553 = fcmp oeq float %temp30.0, 1.000000e+00
%554 = select i1 %553, float 1.000000e+00, float 0.000000e+00
%555 = bitcast float %496 to i32
%556 = bitcast float %498 to i32
%557 = bitcast float %244 to i32
%558 = insertelement <4 x i32> undef, i32 %555, i32 0
%559 = insertelement <4 x i32> %558, i32 %556, i32 1
%560 = insertelement <4 x i32> %559, i32 %557, i32 2
%561 = bitcast <8 x i32> %65 to <32 x i8>
%562 = bitcast <4 x i32> %67 to <16 x i8>
%563 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %560, <32 x i8> %561, <16 x i8> %562, i32 2)
%564 = extractelement <4 x float> %563, i32 0
%565 = extractelement <4 x float> %563, i32 1
%566 = extractelement <4 x float> %563, i32 2
%567 = fcmp oeq float %temp30.0, 0.000000e+00
%568 = select i1 %567, float 1.000000e+00, float 0.000000e+00
%569 = fmul float %564, %568
%570 = fmul float %565, %568
%571 = fmul float %566, %568
%572 = fmul float %550, %554
%573 = fadd float %572, %569
%574 = fmul float %551, %554
%575 = fadd float %574, %570
%576 = fmul float %552, %554
%577 = fadd float %576, %571
%578 = fmul float %536, %540
%579 = fadd float %578, %573
%580 = fmul float %537, %540
%581 = fadd float %580, %575
%582 = fmul float %538, %540
%583 = fadd float %582, %577
%584 = fmul float %522, %526
%585 = fadd float %584, %579
%586 = fmul float %523, %526
%587 = fadd float %586, %581
%588 = fmul float %524, %526
%589 = fadd float %588, %583
%590 = fmul float %508, %512
%591 = fadd float %590, %585
%592 = fmul float %509, %512
%593 = fadd float %592, %587
%594 = fmul float %510, %512
%595 = fadd float %594, %589
%596 = fcmp une float %47, %temp20.0
%.sink211 = select i1 %596, float %50, float %49
%temp56.0 = select i1 %596, float 1.953125e-03, float 3.906250e-03
%597 = fdiv float 1.000000e+00, %.sink211
%598 = fmul float %116, %597
%599 = fmul float %117, %597
%600 = call float @llvm.floor.f32(float %598)
%601 = fsub float %598, %600
%602 = call float @llvm.floor.f32(float %599)
%603 = fsub float %599, %602
%604 = fmul float %51, 2.000000e+00
%605 = fmul float %604, %temp56.0
%606 = fsub float 1.000000e+00, %605
%607 = fmul float %temp56.0, %51
%608 = fmul float %601, %606
%609 = fadd float %608, %607
%610 = fmul float %603, %606
%611 = fadd float %610, %607
%612 = fmul float %609, %temp20.0
%613 = fadd float %612, %temp36.0
%614 = fmul float %611, %temp20.0
%615 = fadd float %614, %temp37.0
%616 = bitcast float %613 to i32
%617 = bitcast float %615 to i32
%618 = bitcast float %244 to i32
%619 = insertelement <4 x i32> undef, i32 %616, i32 0
%620 = insertelement <4 x i32> %619, i32 %617, i32 1
%621 = insertelement <4 x i32> %620, i32 %618, i32 2
%622 = bitcast <8 x i32> %97 to <32 x i8>
%623 = bitcast <4 x i32> %99 to <16 x i8>
%624 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %621, <32 x i8> %622, <16 x i8> %623, i32 2)
%625 = extractelement <4 x float> %624, i32 0
%626 = extractelement <4 x float> %624, i32 1
%627 = extractelement <4 x float> %624, i32 2
%628 = fcmp oeq float %temp38.0, 4.000000e+00
%629 = select i1 %628, float 1.000000e+00, float 0.000000e+00
%630 = bitcast float %613 to i32
%631 = bitcast float %615 to i32
%632 = bitcast float %244 to i32
%633 = insertelement <4 x i32> undef, i32 %630, i32 0
%634 = insertelement <4 x i32> %633, i32 %631, i32 1
%635 = insertelement <4 x i32> %634, i32 %632, i32 2
%636 = bitcast <8 x i32> %89 to <32 x i8>
%637 = bitcast <4 x i32> %91 to <16 x i8>
%638 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %635, <32 x i8> %636, <16 x i8> %637, i32 2)
%639 = extractelement <4 x float> %638, i32 0
%640 = extractelement <4 x float> %638, i32 1
%641 = extractelement <4 x float> %638, i32 2
%642 = fcmp oeq float %temp38.0, 3.000000e+00
%643 = select i1 %642, float 1.000000e+00, float 0.000000e+00
%644 = bitcast float %613 to i32
%645 = bitcast float %615 to i32
%646 = bitcast float %244 to i32
%647 = insertelement <4 x i32> undef, i32 %644, i32 0
%648 = insertelement <4 x i32> %647, i32 %645, i32 1
%649 = insertelement <4 x i32> %648, i32 %646, i32 2
%650 = bitcast <8 x i32> %81 to <32 x i8>
%651 = bitcast <4 x i32> %83 to <16 x i8>
%652 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %649, <32 x i8> %650, <16 x i8> %651, i32 2)
%653 = extractelement <4 x float> %652, i32 0
%654 = extractelement <4 x float> %652, i32 1
%655 = extractelement <4 x float> %652, i32 2
%656 = fcmp oeq float %temp38.0, 2.000000e+00
%657 = select i1 %656, float 1.000000e+00, float 0.000000e+00
%658 = bitcast float %613 to i32
%659 = bitcast float %615 to i32
%660 = bitcast float %244 to i32
%661 = insertelement <4 x i32> undef, i32 %658, i32 0
%662 = insertelement <4 x i32> %661, i32 %659, i32 1
%663 = insertelement <4 x i32> %662, i32 %660, i32 2
%664 = bitcast <8 x i32> %73 to <32 x i8>
%665 = bitcast <4 x i32> %75 to <16 x i8>
%666 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %663, <32 x i8> %664, <16 x i8> %665, i32 2)
%667 = extractelement <4 x float> %666, i32 0
%668 = extractelement <4 x float> %666, i32 1
%669 = extractelement <4 x float> %666, i32 2
%670 = fcmp oeq float %temp38.0, 1.000000e+00
%671 = select i1 %670, float 1.000000e+00, float 0.000000e+00
%672 = bitcast float %613 to i32
%673 = bitcast float %615 to i32
%674 = bitcast float %244 to i32
%675 = insertelement <4 x i32> undef, i32 %672, i32 0
%676 = insertelement <4 x i32> %675, i32 %673, i32 1
%677 = insertelement <4 x i32> %676, i32 %674, i32 2
%678 = bitcast <8 x i32> %65 to <32 x i8>
%679 = bitcast <4 x i32> %67 to <16 x i8>
%680 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %677, <32 x i8> %678, <16 x i8> %679, i32 2)
%681 = extractelement <4 x float> %680, i32 0
%682 = extractelement <4 x float> %680, i32 1
%683 = extractelement <4 x float> %680, i32 2
%684 = fcmp oeq float %temp38.0, 0.000000e+00
%685 = select i1 %684, float 1.000000e+00, float 0.000000e+00
%686 = fmul float %681, %685
%687 = fmul float %682, %685
%688 = fmul float %683, %685
%689 = fmul float %667, %671
%690 = fadd float %689, %686
%691 = fmul float %668, %671
%692 = fadd float %691, %687
%693 = fmul float %669, %671
%694 = fadd float %693, %688
%695 = fmul float %653, %657
%696 = fadd float %695, %690
%697 = fmul float %654, %657
%698 = fadd float %697, %692
%699 = fmul float %655, %657
%700 = fadd float %699, %694
%701 = fmul float %639, %643
%702 = fadd float %701, %696
%703 = fmul float %640, %643
%704 = fadd float %703, %698
%705 = fmul float %641, %643
%706 = fadd float %705, %700
%707 = fmul float %625, %629
%708 = fadd float %707, %702
%709 = fmul float %626, %629
%710 = fadd float %709, %704
%711 = fmul float %627, %629
%712 = fadd float %711, %706
%713 = fcmp une float %47, %temp20.0
%.sink212 = select i1 %713, float %50, float %49
%temp60.0 = select i1 %713, float 1.953125e-03, float 3.906250e-03
%714 = fdiv float 1.000000e+00, %.sink212
%715 = fmul float %118, %714
%716 = fmul float %117, %714
%717 = call float @llvm.floor.f32(float %715)
%718 = fsub float %715, %717
%719 = call float @llvm.floor.f32(float %716)
%720 = fsub float %716, %719
%721 = fmul float %51, 2.000000e+00
%722 = fmul float %721, %temp60.0
%723 = fsub float 1.000000e+00, %722
%724 = fmul float %temp60.0, %51
%725 = fmul float %718, %723
%726 = fadd float %725, %724
%727 = fmul float %720, %723
%728 = fadd float %727, %724
%729 = fmul float %726, %temp20.0
%730 = fadd float %729, %temp36.0
%731 = fmul float %728, %temp20.0
%732 = fadd float %731, %temp37.0
%733 = bitcast float %730 to i32
%734 = bitcast float %732 to i32
%735 = bitcast float %244 to i32
%736 = insertelement <4 x i32> undef, i32 %733, i32 0
%737 = insertelement <4 x i32> %736, i32 %734, i32 1
%738 = insertelement <4 x i32> %737, i32 %735, i32 2
%739 = bitcast <8 x i32> %97 to <32 x i8>
%740 = bitcast <4 x i32> %99 to <16 x i8>
%741 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %738, <32 x i8> %739, <16 x i8> %740, i32 2)
%742 = extractelement <4 x float> %741, i32 0
%743 = extractelement <4 x float> %741, i32 1
%744 = extractelement <4 x float> %741, i32 2
%745 = fcmp oeq float %temp38.0, 4.000000e+00
%746 = select i1 %745, float 1.000000e+00, float 0.000000e+00
%747 = bitcast float %730 to i32
%748 = bitcast float %732 to i32
%749 = bitcast float %244 to i32
%750 = insertelement <4 x i32> undef, i32 %747, i32 0
%751 = insertelement <4 x i32> %750, i32 %748, i32 1
%752 = insertelement <4 x i32> %751, i32 %749, i32 2
%753 = bitcast <8 x i32> %89 to <32 x i8>
%754 = bitcast <4 x i32> %91 to <16 x i8>
%755 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %752, <32 x i8> %753, <16 x i8> %754, i32 2)
%756 = extractelement <4 x float> %755, i32 0
%757 = extractelement <4 x float> %755, i32 1
%758 = extractelement <4 x float> %755, i32 2
%759 = fcmp oeq float %temp38.0, 3.000000e+00
%760 = select i1 %759, float 1.000000e+00, float 0.000000e+00
%761 = bitcast float %730 to i32
%762 = bitcast float %732 to i32
%763 = bitcast float %244 to i32
%764 = insertelement <4 x i32> undef, i32 %761, i32 0
%765 = insertelement <4 x i32> %764, i32 %762, i32 1
%766 = insertelement <4 x i32> %765, i32 %763, i32 2
%767 = bitcast <8 x i32> %81 to <32 x i8>
%768 = bitcast <4 x i32> %83 to <16 x i8>
%769 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %766, <32 x i8> %767, <16 x i8> %768, i32 2)
%770 = extractelement <4 x float> %769, i32 0
%771 = extractelement <4 x float> %769, i32 1
%772 = extractelement <4 x float> %769, i32 2
%773 = fcmp oeq float %temp38.0, 2.000000e+00
%774 = select i1 %773, float 1.000000e+00, float 0.000000e+00
%775 = bitcast float %730 to i32
%776 = bitcast float %732 to i32
%777 = bitcast float %244 to i32
%778 = insertelement <4 x i32> undef, i32 %775, i32 0
%779 = insertelement <4 x i32> %778, i32 %776, i32 1
%780 = insertelement <4 x i32> %779, i32 %777, i32 2
%781 = bitcast <8 x i32> %73 to <32 x i8>
%782 = bitcast <4 x i32> %75 to <16 x i8>
%783 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %780, <32 x i8> %781, <16 x i8> %782, i32 2)
%784 = extractelement <4 x float> %783, i32 0
%785 = extractelement <4 x float> %783, i32 1
%786 = extractelement <4 x float> %783, i32 2
%787 = fcmp oeq float %temp38.0, 1.000000e+00
%788 = select i1 %787, float 1.000000e+00, float 0.000000e+00
%789 = bitcast float %730 to i32
%790 = bitcast float %732 to i32
%791 = bitcast float %244 to i32
%792 = insertelement <4 x i32> undef, i32 %789, i32 0
%793 = insertelement <4 x i32> %792, i32 %790, i32 1
%794 = insertelement <4 x i32> %793, i32 %791, i32 2
%795 = bitcast <8 x i32> %65 to <32 x i8>
%796 = bitcast <4 x i32> %67 to <16 x i8>
%797 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %794, <32 x i8> %795, <16 x i8> %796, i32 2)
%798 = extractelement <4 x float> %797, i32 0
%799 = extractelement <4 x float> %797, i32 1
%800 = extractelement <4 x float> %797, i32 2
%801 = fcmp oeq float %temp38.0, 0.000000e+00
%802 = select i1 %801, float 1.000000e+00, float 0.000000e+00
%803 = fmul float %798, %802
%804 = fmul float %799, %802
%805 = fmul float %800, %802
%806 = fmul float %784, %788
%807 = fadd float %806, %803
%808 = fmul float %785, %788
%809 = fadd float %808, %804
%810 = fmul float %786, %788
%811 = fadd float %810, %805
%812 = fmul float %770, %774
%813 = fadd float %812, %807
%814 = fmul float %771, %774
%815 = fadd float %814, %809
%816 = fmul float %772, %774
%817 = fadd float %816, %811
%818 = fmul float %756, %760
%819 = fadd float %818, %813
%820 = fmul float %757, %760
%821 = fadd float %820, %815
%822 = fmul float %758, %760
%823 = fadd float %822, %817
%824 = fmul float %742, %746
%825 = fadd float %824, %819
%826 = fmul float %743, %746
%827 = fadd float %826, %821
%828 = fmul float %744, %746
%829 = fadd float %828, %823
%830 = fcmp une float %47, %temp20.0
%.sink213 = select i1 %830, float %50, float %49
%temp64.0 = select i1 %830, float 1.953125e-03, float 3.906250e-03
%831 = fdiv float 1.000000e+00, %.sink213
%832 = fmul float %118, %831
%833 = fmul float %116, %831
%834 = call float @llvm.floor.f32(float %832)
%835 = fsub float %832, %834
%836 = call float @llvm.floor.f32(float %833)
%837 = fsub float %833, %836
%838 = fmul float %51, 2.000000e+00
%839 = fmul float %838, %temp64.0
%840 = fsub float 1.000000e+00, %839
%841 = fmul float %temp64.0, %51
%842 = fmul float %835, %840
%843 = fadd float %842, %841
%844 = fmul float %837, %840
%845 = fadd float %844, %841
%846 = fmul float %843, %temp20.0
%847 = fadd float %846, %temp36.0
%848 = fmul float %845, %temp20.0
%849 = fadd float %848, %temp37.0
%850 = bitcast float %847 to i32
%851 = bitcast float %849 to i32
%852 = bitcast float %244 to i32
%853 = insertelement <4 x i32> undef, i32 %850, i32 0
%854 = insertelement <4 x i32> %853, i32 %851, i32 1
%855 = insertelement <4 x i32> %854, i32 %852, i32 2
%856 = bitcast <8 x i32> %97 to <32 x i8>
%857 = bitcast <4 x i32> %99 to <16 x i8>
%858 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %855, <32 x i8> %856, <16 x i8> %857, i32 2)
%859 = extractelement <4 x float> %858, i32 0
%860 = extractelement <4 x float> %858, i32 1
%861 = extractelement <4 x float> %858, i32 2
%862 = fcmp oeq float %temp38.0, 4.000000e+00
%863 = select i1 %862, float 1.000000e+00, float 0.000000e+00
%864 = bitcast float %847 to i32
%865 = bitcast float %849 to i32
%866 = bitcast float %244 to i32
%867 = insertelement <4 x i32> undef, i32 %864, i32 0
%868 = insertelement <4 x i32> %867, i32 %865, i32 1
%869 = insertelement <4 x i32> %868, i32 %866, i32 2
%870 = bitcast <8 x i32> %89 to <32 x i8>
%871 = bitcast <4 x i32> %91 to <16 x i8>
%872 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %869, <32 x i8> %870, <16 x i8> %871, i32 2)
%873 = extractelement <4 x float> %872, i32 0
%874 = extractelement <4 x float> %872, i32 1
%875 = extractelement <4 x float> %872, i32 2
%876 = fcmp oeq float %temp38.0, 3.000000e+00
%877 = select i1 %876, float 1.000000e+00, float 0.000000e+00
%878 = bitcast float %847 to i32
%879 = bitcast float %849 to i32
%880 = bitcast float %244 to i32
%881 = insertelement <4 x i32> undef, i32 %878, i32 0
%882 = insertelement <4 x i32> %881, i32 %879, i32 1
%883 = insertelement <4 x i32> %882, i32 %880, i32 2
%884 = bitcast <8 x i32> %81 to <32 x i8>
%885 = bitcast <4 x i32> %83 to <16 x i8>
%886 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %883, <32 x i8> %884, <16 x i8> %885, i32 2)
%887 = extractelement <4 x float> %886, i32 0
%888 = extractelement <4 x float> %886, i32 1
%889 = extractelement <4 x float> %886, i32 2
%890 = fcmp oeq float %temp38.0, 2.000000e+00
%891 = select i1 %890, float 1.000000e+00, float 0.000000e+00
%892 = bitcast float %847 to i32
%893 = bitcast float %849 to i32
%894 = bitcast float %244 to i32
%895 = insertelement <4 x i32> undef, i32 %892, i32 0
%896 = insertelement <4 x i32> %895, i32 %893, i32 1
%897 = insertelement <4 x i32> %896, i32 %894, i32 2
%898 = bitcast <8 x i32> %73 to <32 x i8>
%899 = bitcast <4 x i32> %75 to <16 x i8>
%900 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %897, <32 x i8> %898, <16 x i8> %899, i32 2)
%901 = extractelement <4 x float> %900, i32 0
%902 = extractelement <4 x float> %900, i32 1
%903 = extractelement <4 x float> %900, i32 2
%904 = fcmp oeq float %temp38.0, 1.000000e+00
%905 = select i1 %904, float 1.000000e+00, float 0.000000e+00
%906 = bitcast float %847 to i32
%907 = bitcast float %849 to i32
%908 = bitcast float %244 to i32
%909 = insertelement <4 x i32> undef, i32 %906, i32 0
%910 = insertelement <4 x i32> %909, i32 %907, i32 1
%911 = insertelement <4 x i32> %910, i32 %908, i32 2
%912 = bitcast <8 x i32> %65 to <32 x i8>
%913 = bitcast <4 x i32> %67 to <16 x i8>
%914 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %911, <32 x i8> %912, <16 x i8> %913, i32 2)
%915 = extractelement <4 x float> %914, i32 0
%916 = extractelement <4 x float> %914, i32 1
%917 = extractelement <4 x float> %914, i32 2
%918 = fcmp oeq float %temp38.0, 0.000000e+00
%919 = select i1 %918, float 1.000000e+00, float 0.000000e+00
%920 = fmul float %915, %919
%921 = fmul float %916, %919
%922 = fmul float %917, %919
%923 = fmul float %901, %905
%924 = fadd float %923, %920
%925 = fmul float %902, %905
%926 = fadd float %925, %921
%927 = fmul float %903, %905
%928 = fadd float %927, %922
%929 = fmul float %887, %891
%930 = fadd float %929, %924
%931 = fmul float %888, %891
%932 = fadd float %931, %926
%933 = fmul float %889, %891
%934 = fadd float %933, %928
%935 = fmul float %873, %877
%936 = fadd float %935, %930
%937 = fmul float %874, %877
%938 = fadd float %937, %932
%939 = fmul float %875, %877
%940 = fadd float %939, %934
%941 = fmul float %859, %863
%942 = fadd float %941, %936
%943 = fmul float %860, %863
%944 = fadd float %943, %938
%945 = fmul float %861, %863
%946 = fadd float %945, %940
%947 = fcmp une float %47, %temp24.0
%.sink214 = select i1 %947, float %50, float %49
%temp68.0 = select i1 %947, float 1.953125e-03, float 3.906250e-03
%948 = fdiv float 1.000000e+00, %.sink214
%949 = fmul float %116, %948
%950 = fmul float %117, %948
%951 = call float @llvm.floor.f32(float %949)
%952 = fsub float %949, %951
%953 = call float @llvm.floor.f32(float %950)
%954 = fsub float %950, %953
%955 = fmul float %51, 2.000000e+00
%956 = fmul float %955, %temp68.0
%957 = fsub float 1.000000e+00, %956
%958 = fmul float %temp68.0, %51
%959 = fmul float %952, %957
%960 = fadd float %959, %958
%961 = fmul float %954, %957
%962 = fadd float %961, %958
%963 = fmul float %960, %temp24.0
%964 = fadd float %963, %temp12.0
%965 = fmul float %962, %temp24.0
%966 = fadd float %965, %temp13.0
%967 = bitcast float %964 to i32
%968 = bitcast float %966 to i32
%969 = bitcast float %244 to i32
%970 = insertelement <4 x i32> undef, i32 %967, i32 0
%971 = insertelement <4 x i32> %970, i32 %968, i32 1
%972 = insertelement <4 x i32> %971, i32 %969, i32 2
%973 = bitcast <8 x i32> %97 to <32 x i8>
%974 = bitcast <4 x i32> %99 to <16 x i8>
%975 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %972, <32 x i8> %973, <16 x i8> %974, i32 2)
%976 = extractelement <4 x float> %975, i32 0
%977 = extractelement <4 x float> %975, i32 1
%978 = extractelement <4 x float> %975, i32 2
%979 = fcmp oeq float %temp14.0, 4.000000e+00
%980 = select i1 %979, float 1.000000e+00, float 0.000000e+00
%981 = bitcast float %964 to i32
%982 = bitcast float %966 to i32
%983 = bitcast float %244 to i32
%984 = insertelement <4 x i32> undef, i32 %981, i32 0
%985 = insertelement <4 x i32> %984, i32 %982, i32 1
%986 = insertelement <4 x i32> %985, i32 %983, i32 2
%987 = bitcast <8 x i32> %89 to <32 x i8>
%988 = bitcast <4 x i32> %91 to <16 x i8>
%989 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %986, <32 x i8> %987, <16 x i8> %988, i32 2)
%990 = extractelement <4 x float> %989, i32 0
%991 = extractelement <4 x float> %989, i32 1
%992 = extractelement <4 x float> %989, i32 2
%993 = fcmp oeq float %temp14.0, 3.000000e+00
%994 = select i1 %993, float 1.000000e+00, float 0.000000e+00
%995 = bitcast float %964 to i32
%996 = bitcast float %966 to i32
%997 = bitcast float %244 to i32
%998 = insertelement <4 x i32> undef, i32 %995, i32 0
%999 = insertelement <4 x i32> %998, i32 %996, i32 1
%1000 = insertelement <4 x i32> %999, i32 %997, i32 2
%1001 = bitcast <8 x i32> %81 to <32 x i8>
%1002 = bitcast <4 x i32> %83 to <16 x i8>
%1003 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1000, <32 x i8> %1001, <16 x i8> %1002, i32 2)
%1004 = extractelement <4 x float> %1003, i32 0
%1005 = extractelement <4 x float> %1003, i32 1
%1006 = extractelement <4 x float> %1003, i32 2
%1007 = fcmp oeq float %temp14.0, 2.000000e+00
%1008 = select i1 %1007, float 1.000000e+00, float 0.000000e+00
%1009 = bitcast float %964 to i32
%1010 = bitcast float %966 to i32
%1011 = bitcast float %244 to i32
%1012 = insertelement <4 x i32> undef, i32 %1009, i32 0
%1013 = insertelement <4 x i32> %1012, i32 %1010, i32 1
%1014 = insertelement <4 x i32> %1013, i32 %1011, i32 2
%1015 = bitcast <8 x i32> %73 to <32 x i8>
%1016 = bitcast <4 x i32> %75 to <16 x i8>
%1017 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1014, <32 x i8> %1015, <16 x i8> %1016, i32 2)
%1018 = extractelement <4 x float> %1017, i32 0
%1019 = extractelement <4 x float> %1017, i32 1
%1020 = extractelement <4 x float> %1017, i32 2
%1021 = fcmp oeq float %temp14.0, 1.000000e+00
%1022 = select i1 %1021, float 1.000000e+00, float 0.000000e+00
%1023 = bitcast float %964 to i32
%1024 = bitcast float %966 to i32
%1025 = bitcast float %244 to i32
%1026 = insertelement <4 x i32> undef, i32 %1023, i32 0
%1027 = insertelement <4 x i32> %1026, i32 %1024, i32 1
%1028 = insertelement <4 x i32> %1027, i32 %1025, i32 2
%1029 = bitcast <8 x i32> %65 to <32 x i8>
%1030 = bitcast <4 x i32> %67 to <16 x i8>
%1031 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1028, <32 x i8> %1029, <16 x i8> %1030, i32 2)
%1032 = extractelement <4 x float> %1031, i32 0
%1033 = extractelement <4 x float> %1031, i32 1
%1034 = extractelement <4 x float> %1031, i32 2
%1035 = fcmp oeq float %temp14.0, 0.000000e+00
%1036 = select i1 %1035, float 1.000000e+00, float 0.000000e+00
%1037 = fmul float %1032, %1036
%1038 = fmul float %1033, %1036
%1039 = fmul float %1034, %1036
%1040 = fmul float %1018, %1022
%1041 = fadd float %1040, %1037
%1042 = fmul float %1019, %1022
%1043 = fadd float %1042, %1038
%1044 = fmul float %1020, %1022
%1045 = fadd float %1044, %1039
%1046 = fmul float %1004, %1008
%1047 = fadd float %1046, %1041
%1048 = fmul float %1005, %1008
%1049 = fadd float %1048, %1043
%1050 = fmul float %1006, %1008
%1051 = fadd float %1050, %1045
%1052 = fmul float %990, %994
%1053 = fadd float %1052, %1047
%1054 = fmul float %991, %994
%1055 = fadd float %1054, %1049
%1056 = fmul float %992, %994
%1057 = fadd float %1056, %1051
%1058 = fmul float %976, %980
%1059 = fadd float %1058, %1053
%1060 = fmul float %977, %980
%1061 = fadd float %1060, %1055
%1062 = fmul float %978, %980
%1063 = fadd float %1062, %1057
%1064 = fcmp une float %47, %temp24.0
%.sink215 = select i1 %1064, float %50, float %49
%temp72.0 = select i1 %1064, float 1.953125e-03, float 3.906250e-03
%1065 = fdiv float 1.000000e+00, %.sink215
%1066 = fmul float %118, %1065
%1067 = fmul float %117, %1065
%1068 = call float @llvm.floor.f32(float %1066)
%1069 = fsub float %1066, %1068
%1070 = call float @llvm.floor.f32(float %1067)
%1071 = fsub float %1067, %1070
%1072 = fmul float %51, 2.000000e+00
%1073 = fmul float %1072, %temp72.0
%1074 = fsub float 1.000000e+00, %1073
%1075 = fmul float %temp72.0, %51
%1076 = fmul float %1069, %1074
%1077 = fadd float %1076, %1075
%1078 = fmul float %1071, %1074
%1079 = fadd float %1078, %1075
%1080 = fmul float %1077, %temp24.0
%1081 = fadd float %1080, %temp12.0
%1082 = fmul float %1079, %temp24.0
%1083 = fadd float %1082, %temp13.0
%1084 = bitcast float %1081 to i32
%1085 = bitcast float %1083 to i32
%1086 = bitcast float %244 to i32
%1087 = insertelement <4 x i32> undef, i32 %1084, i32 0
%1088 = insertelement <4 x i32> %1087, i32 %1085, i32 1
%1089 = insertelement <4 x i32> %1088, i32 %1086, i32 2
%1090 = bitcast <8 x i32> %97 to <32 x i8>
%1091 = bitcast <4 x i32> %99 to <16 x i8>
%1092 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1089, <32 x i8> %1090, <16 x i8> %1091, i32 2)
%1093 = extractelement <4 x float> %1092, i32 0
%1094 = extractelement <4 x float> %1092, i32 1
%1095 = extractelement <4 x float> %1092, i32 2
%1096 = fcmp oeq float %temp14.0, 4.000000e+00
%1097 = select i1 %1096, float 1.000000e+00, float 0.000000e+00
%1098 = bitcast float %1081 to i32
%1099 = bitcast float %1083 to i32
%1100 = bitcast float %244 to i32
%1101 = insertelement <4 x i32> undef, i32 %1098, i32 0
%1102 = insertelement <4 x i32> %1101, i32 %1099, i32 1
%1103 = insertelement <4 x i32> %1102, i32 %1100, i32 2
%1104 = bitcast <8 x i32> %89 to <32 x i8>
%1105 = bitcast <4 x i32> %91 to <16 x i8>
%1106 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1103, <32 x i8> %1104, <16 x i8> %1105, i32 2)
%1107 = extractelement <4 x float> %1106, i32 0
%1108 = extractelement <4 x float> %1106, i32 1
%1109 = extractelement <4 x float> %1106, i32 2
%1110 = fcmp oeq float %temp14.0, 3.000000e+00
%1111 = select i1 %1110, float 1.000000e+00, float 0.000000e+00
%1112 = bitcast float %1081 to i32
%1113 = bitcast float %1083 to i32
%1114 = bitcast float %244 to i32
%1115 = insertelement <4 x i32> undef, i32 %1112, i32 0
%1116 = insertelement <4 x i32> %1115, i32 %1113, i32 1
%1117 = insertelement <4 x i32> %1116, i32 %1114, i32 2
%1118 = bitcast <8 x i32> %81 to <32 x i8>
%1119 = bitcast <4 x i32> %83 to <16 x i8>
%1120 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1117, <32 x i8> %1118, <16 x i8> %1119, i32 2)
%1121 = extractelement <4 x float> %1120, i32 0
%1122 = extractelement <4 x float> %1120, i32 1
%1123 = extractelement <4 x float> %1120, i32 2
%1124 = fcmp oeq float %temp14.0, 2.000000e+00
%1125 = select i1 %1124, float 1.000000e+00, float 0.000000e+00
%1126 = bitcast float %1081 to i32
%1127 = bitcast float %1083 to i32
%1128 = bitcast float %244 to i32
%1129 = insertelement <4 x i32> undef, i32 %1126, i32 0
%1130 = insertelement <4 x i32> %1129, i32 %1127, i32 1
%1131 = insertelement <4 x i32> %1130, i32 %1128, i32 2
%1132 = bitcast <8 x i32> %73 to <32 x i8>
%1133 = bitcast <4 x i32> %75 to <16 x i8>
%1134 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1131, <32 x i8> %1132, <16 x i8> %1133, i32 2)
%1135 = extractelement <4 x float> %1134, i32 0
%1136 = extractelement <4 x float> %1134, i32 1
%1137 = extractelement <4 x float> %1134, i32 2
%1138 = fcmp oeq float %temp14.0, 1.000000e+00
%1139 = select i1 %1138, float 1.000000e+00, float 0.000000e+00
%1140 = bitcast float %1081 to i32
%1141 = bitcast float %1083 to i32
%1142 = bitcast float %244 to i32
%1143 = insertelement <4 x i32> undef, i32 %1140, i32 0
%1144 = insertelement <4 x i32> %1143, i32 %1141, i32 1
%1145 = insertelement <4 x i32> %1144, i32 %1142, i32 2
%1146 = bitcast <8 x i32> %65 to <32 x i8>
%1147 = bitcast <4 x i32> %67 to <16 x i8>
%1148 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1145, <32 x i8> %1146, <16 x i8> %1147, i32 2)
%1149 = extractelement <4 x float> %1148, i32 0
%1150 = extractelement <4 x float> %1148, i32 1
%1151 = extractelement <4 x float> %1148, i32 2
%1152 = fcmp oeq float %temp14.0, 0.000000e+00
%1153 = select i1 %1152, float 1.000000e+00, float 0.000000e+00
%1154 = fmul float %1149, %1153
%1155 = fmul float %1150, %1153
%1156 = fmul float %1151, %1153
%1157 = fmul float %1135, %1139
%1158 = fadd float %1157, %1154
%1159 = fmul float %1136, %1139
%1160 = fadd float %1159, %1155
%1161 = fmul float %1137, %1139
%1162 = fadd float %1161, %1156
%1163 = fmul float %1121, %1125
%1164 = fadd float %1163, %1158
%1165 = fmul float %1122, %1125
%1166 = fadd float %1165, %1160
%1167 = fmul float %1123, %1125
%1168 = fadd float %1167, %1162
%1169 = fmul float %1107, %1111
%1170 = fadd float %1169, %1164
%1171 = fmul float %1108, %1111
%1172 = fadd float %1171, %1166
%1173 = fmul float %1109, %1111
%1174 = fadd float %1173, %1168
%1175 = fmul float %1093, %1097
%1176 = fadd float %1175, %1170
%1177 = fmul float %1094, %1097
%1178 = fadd float %1177, %1172
%1179 = fmul float %1095, %1097
%1180 = fadd float %1179, %1174
%1181 = fcmp une float %47, %temp24.0
%.sink216 = select i1 %1181, float %50, float %49
%temp76.0 = select i1 %1181, float 1.953125e-03, float 3.906250e-03
%1182 = fdiv float 1.000000e+00, %.sink216
%1183 = fmul float %118, %1182
%1184 = fmul float %116, %1182
%1185 = call float @llvm.floor.f32(float %1183)
%1186 = fsub float %1183, %1185
%1187 = call float @llvm.floor.f32(float %1184)
%1188 = fsub float %1184, %1187
%1189 = fmul float %51, 2.000000e+00
%1190 = fmul float %1189, %temp76.0
%1191 = fsub float 1.000000e+00, %1190
%1192 = fmul float %temp76.0, %51
%1193 = fmul float %1186, %1191
%1194 = fadd float %1193, %1192
%1195 = fmul float %1188, %1191
%1196 = fadd float %1195, %1192
%1197 = fmul float %1194, %temp24.0
%1198 = fadd float %1197, %temp12.0
%1199 = fmul float %1196, %temp24.0
%1200 = fadd float %1199, %temp13.0
%1201 = bitcast float %1198 to i32
%1202 = bitcast float %1200 to i32
%1203 = bitcast float %244 to i32
%1204 = insertelement <4 x i32> undef, i32 %1201, i32 0
%1205 = insertelement <4 x i32> %1204, i32 %1202, i32 1
%1206 = insertelement <4 x i32> %1205, i32 %1203, i32 2
%1207 = bitcast <8 x i32> %97 to <32 x i8>
%1208 = bitcast <4 x i32> %99 to <16 x i8>
%1209 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1206, <32 x i8> %1207, <16 x i8> %1208, i32 2)
%1210 = extractelement <4 x float> %1209, i32 0
%1211 = extractelement <4 x float> %1209, i32 1
%1212 = extractelement <4 x float> %1209, i32 2
%1213 = fcmp oeq float %temp14.0, 4.000000e+00
%1214 = select i1 %1213, float 1.000000e+00, float 0.000000e+00
%1215 = bitcast float %1198 to i32
%1216 = bitcast float %1200 to i32
%1217 = bitcast float %244 to i32
%1218 = insertelement <4 x i32> undef, i32 %1215, i32 0
%1219 = insertelement <4 x i32> %1218, i32 %1216, i32 1
%1220 = insertelement <4 x i32> %1219, i32 %1217, i32 2
%1221 = bitcast <8 x i32> %89 to <32 x i8>
%1222 = bitcast <4 x i32> %91 to <16 x i8>
%1223 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1220, <32 x i8> %1221, <16 x i8> %1222, i32 2)
%1224 = extractelement <4 x float> %1223, i32 0
%1225 = extractelement <4 x float> %1223, i32 1
%1226 = extractelement <4 x float> %1223, i32 2
%1227 = fcmp oeq float %temp14.0, 3.000000e+00
%1228 = select i1 %1227, float 1.000000e+00, float 0.000000e+00
%1229 = bitcast float %1198 to i32
%1230 = bitcast float %1200 to i32
%1231 = bitcast float %244 to i32
%1232 = insertelement <4 x i32> undef, i32 %1229, i32 0
%1233 = insertelement <4 x i32> %1232, i32 %1230, i32 1
%1234 = insertelement <4 x i32> %1233, i32 %1231, i32 2
%1235 = bitcast <8 x i32> %81 to <32 x i8>
%1236 = bitcast <4 x i32> %83 to <16 x i8>
%1237 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1234, <32 x i8> %1235, <16 x i8> %1236, i32 2)
%1238 = extractelement <4 x float> %1237, i32 0
%1239 = extractelement <4 x float> %1237, i32 1
%1240 = extractelement <4 x float> %1237, i32 2
%1241 = fcmp oeq float %temp14.0, 2.000000e+00
%1242 = select i1 %1241, float 1.000000e+00, float 0.000000e+00
%1243 = bitcast float %1198 to i32
%1244 = bitcast float %1200 to i32
%1245 = bitcast float %244 to i32
%1246 = insertelement <4 x i32> undef, i32 %1243, i32 0
%1247 = insertelement <4 x i32> %1246, i32 %1244, i32 1
%1248 = insertelement <4 x i32> %1247, i32 %1245, i32 2
%1249 = bitcast <8 x i32> %73 to <32 x i8>
%1250 = bitcast <4 x i32> %75 to <16 x i8>
%1251 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1248, <32 x i8> %1249, <16 x i8> %1250, i32 2)
%1252 = extractelement <4 x float> %1251, i32 0
%1253 = extractelement <4 x float> %1251, i32 1
%1254 = extractelement <4 x float> %1251, i32 2
%1255 = fcmp oeq float %temp14.0, 1.000000e+00
%1256 = select i1 %1255, float 1.000000e+00, float 0.000000e+00
%1257 = bitcast float %1198 to i32
%1258 = bitcast float %1200 to i32
%1259 = bitcast float %244 to i32
%1260 = insertelement <4 x i32> undef, i32 %1257, i32 0
%1261 = insertelement <4 x i32> %1260, i32 %1258, i32 1
%1262 = insertelement <4 x i32> %1261, i32 %1259, i32 2
%1263 = bitcast <8 x i32> %65 to <32 x i8>
%1264 = bitcast <4 x i32> %67 to <16 x i8>
%1265 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1262, <32 x i8> %1263, <16 x i8> %1264, i32 2)
%1266 = extractelement <4 x float> %1265, i32 0
%1267 = extractelement <4 x float> %1265, i32 1
%1268 = extractelement <4 x float> %1265, i32 2
%1269 = fcmp oeq float %temp14.0, 0.000000e+00
%1270 = select i1 %1269, float 1.000000e+00, float 0.000000e+00
%1271 = fmul float %1266, %1270
%1272 = fmul float %1267, %1270
%1273 = fmul float %1268, %1270
%1274 = fmul float %1252, %1256
%1275 = fadd float %1274, %1271
%1276 = fmul float %1253, %1256
%1277 = fadd float %1276, %1272
%1278 = fmul float %1254, %1256
%1279 = fadd float %1278, %1273
%1280 = fmul float %1238, %1242
%1281 = fadd float %1280, %1275
%1282 = fmul float %1239, %1242
%1283 = fadd float %1282, %1277
%1284 = fmul float %1240, %1242
%1285 = fadd float %1284, %1279
%1286 = fmul float %1224, %1228
%1287 = fadd float %1286, %1281
%1288 = fmul float %1225, %1228
%1289 = fadd float %1288, %1283
%1290 = fmul float %1226, %1228
%1291 = fadd float %1290, %1285
%1292 = fmul float %1210, %1214
%1293 = fadd float %1292, %1287
%1294 = fmul float %1211, %1214
%1295 = fadd float %1294, %1289
%1296 = fmul float %1212, %1214
%1297 = fadd float %1296, %1291
%1298 = fmul float %1059, %169
%1299 = fmul float %1061, %169
%1300 = fmul float %1063, %169
%1301 = fmul float %1176, %167
%1302 = fadd float %1301, %1298
%1303 = fmul float %1178, %167
%1304 = fadd float %1303, %1299
%1305 = fmul float %1180, %167
%1306 = fadd float %1305, %1300
%1307 = fmul float %1293, %168
%1308 = fadd float %1307, %1302
%1309 = fmul float %1295, %168
%1310 = fadd float %1309, %1304
%1311 = fmul float %1297, %168
%1312 = fadd float %1311, %1306
%1313 = fmul float %708, %169
%1314 = fmul float %710, %169
%1315 = fmul float %712, %169
%1316 = fmul float %825, %167
%1317 = fadd float %1316, %1313
%1318 = fmul float %827, %167
%1319 = fadd float %1318, %1314
%1320 = fmul float %829, %167
%1321 = fadd float %1320, %1315
%1322 = fmul float %942, %168
%1323 = fadd float %1322, %1317
%1324 = fmul float %944, %168
%1325 = fadd float %1324, %1319
%1326 = fmul float %946, %168
%1327 = fadd float %1326, %1321
%1328 = fmul float %357, %169
%1329 = fmul float %359, %169
%1330 = fmul float %361, %169
%1331 = fmul float %474, %167
%1332 = fadd float %1331, %1328
%1333 = fmul float %476, %167
%1334 = fadd float %1333, %1329
%1335 = fmul float %478, %167
%1336 = fadd float %1335, %1330
%1337 = fmul float %591, %168
%1338 = fadd float %1337, %1332
%1339 = fmul float %593, %168
%1340 = fadd float %1339, %1334
%1341 = fmul float %595, %168
%1342 = fadd float %1341, %1336
%1343 = fmul float %108, %1338
%1344 = fmul float %108, %1340
%1345 = fmul float %108, %1342
%1346 = fmul float %109, %1323
%1347 = fadd float %1346, %1343
%1348 = fmul float %109, %1325
%1349 = fadd float %1348, %1344
%1350 = fmul float %109, %1327
%1351 = fadd float %1350, %1345
%1352 = fmul float %110, %1308
%1353 = fadd float %1352, %1347
%1354 = fmul float %110, %1310
%1355 = fadd float %1354, %1349
%1356 = fmul float %110, %1312
%1357 = fadd float %1356, %1351
%1358 = fcmp une float %47, %temp16.0
%.sink217 = select i1 %1358, float %50, float %49
%temp48.2 = select i1 %1358, float 1.953125e-03, float 3.906250e-03
%1359 = fdiv float 1.000000e+00, %.sink217
%1360 = fmul float %118, %1359
%1361 = fmul float %117, %1359
%1362 = call float @llvm.floor.f32(float %1360)
%1363 = fsub float %1360, %1362
%1364 = call float @llvm.floor.f32(float %1361)
%1365 = fsub float %1361, %1364
%1366 = fmul float %51, 2.000000e+00
%1367 = fmul float %1366, %temp48.2
%1368 = fsub float 1.000000e+00, %1367
%1369 = fmul float %temp48.2, %51
%1370 = fmul float %1363, %1368
%1371 = fadd float %1370, %1369
%1372 = fmul float %1365, %1368
%1373 = fadd float %1372, %1369
%1374 = fmul float %1371, %temp16.0
%1375 = fadd float %1374, %temp28.0
%1376 = fmul float %1373, %temp16.0
%1377 = fadd float %1376, %temp29.0
%1378 = bitcast float %1375 to i32
%1379 = bitcast float %1377 to i32
%1380 = bitcast float %244 to i32
%1381 = insertelement <4 x i32> undef, i32 %1378, i32 0
%1382 = insertelement <4 x i32> %1381, i32 %1379, i32 1
%1383 = insertelement <4 x i32> %1382, i32 %1380, i32 2
%1384 = bitcast <8 x i32> %101 to <32 x i8>
%1385 = bitcast <4 x i32> %103 to <16 x i8>
%1386 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1383, <32 x i8> %1384, <16 x i8> %1385, i32 2)
%1387 = extractelement <4 x float> %1386, i32 1
%1388 = extractelement <4 x float> %1386, i32 3
%1389 = fcmp oeq float %temp30.0, 4.000000e+00
%1390 = select i1 %1389, float 1.000000e+00, float 0.000000e+00
%1391 = bitcast float %1375 to i32
%1392 = bitcast float %1377 to i32
%1393 = bitcast float %244 to i32
%1394 = insertelement <4 x i32> undef, i32 %1391, i32 0
%1395 = insertelement <4 x i32> %1394, i32 %1392, i32 1
%1396 = insertelement <4 x i32> %1395, i32 %1393, i32 2
%1397 = bitcast <8 x i32> %93 to <32 x i8>
%1398 = bitcast <4 x i32> %95 to <16 x i8>
%1399 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1396, <32 x i8> %1397, <16 x i8> %1398, i32 2)
%1400 = extractelement <4 x float> %1399, i32 1
%1401 = extractelement <4 x float> %1399, i32 3
%1402 = fcmp oeq float %temp30.0, 3.000000e+00
%1403 = select i1 %1402, float 1.000000e+00, float 0.000000e+00
%1404 = bitcast float %1375 to i32
%1405 = bitcast float %1377 to i32
%1406 = bitcast float %244 to i32
%1407 = insertelement <4 x i32> undef, i32 %1404, i32 0
%1408 = insertelement <4 x i32> %1407, i32 %1405, i32 1
%1409 = insertelement <4 x i32> %1408, i32 %1406, i32 2
%1410 = bitcast <8 x i32> %85 to <32 x i8>
%1411 = bitcast <4 x i32> %87 to <16 x i8>
%1412 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1409, <32 x i8> %1410, <16 x i8> %1411, i32 2)
%1413 = extractelement <4 x float> %1412, i32 1
%1414 = extractelement <4 x float> %1412, i32 3
%1415 = fcmp oeq float %temp30.0, 2.000000e+00
%1416 = select i1 %1415, float 1.000000e+00, float 0.000000e+00
%1417 = bitcast float %1375 to i32
%1418 = bitcast float %1377 to i32
%1419 = bitcast float %244 to i32
%1420 = insertelement <4 x i32> undef, i32 %1417, i32 0
%1421 = insertelement <4 x i32> %1420, i32 %1418, i32 1
%1422 = insertelement <4 x i32> %1421, i32 %1419, i32 2
%1423 = bitcast <8 x i32> %77 to <32 x i8>
%1424 = bitcast <4 x i32> %79 to <16 x i8>
%1425 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1422, <32 x i8> %1423, <16 x i8> %1424, i32 2)
%1426 = extractelement <4 x float> %1425, i32 1
%1427 = extractelement <4 x float> %1425, i32 3
%1428 = fcmp oeq float %temp30.0, 1.000000e+00
%1429 = select i1 %1428, float 1.000000e+00, float 0.000000e+00
%1430 = bitcast float %1375 to i32
%1431 = bitcast float %1377 to i32
%1432 = bitcast float %244 to i32
%1433 = insertelement <4 x i32> undef, i32 %1430, i32 0
%1434 = insertelement <4 x i32> %1433, i32 %1431, i32 1
%1435 = insertelement <4 x i32> %1434, i32 %1432, i32 2
%1436 = bitcast <8 x i32> %69 to <32 x i8>
%1437 = bitcast <4 x i32> %71 to <16 x i8>
%1438 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1435, <32 x i8> %1436, <16 x i8> %1437, i32 2)
%1439 = extractelement <4 x float> %1438, i32 1
%1440 = extractelement <4 x float> %1438, i32 3
%1441 = fcmp oeq float %temp30.0, 0.000000e+00
%1442 = select i1 %1441, float 1.000000e+00, float 0.000000e+00
%1443 = fmul float %1439, %1442
%1444 = fmul float %1440, %1442
%1445 = fmul float %1426, %1429
%1446 = fadd float %1445, %1443
%1447 = fmul float %1427, %1429
%1448 = fadd float %1447, %1444
%1449 = fmul float %1413, %1416
%1450 = fadd float %1449, %1446
%1451 = fmul float %1414, %1416
%1452 = fadd float %1451, %1448
%1453 = fmul float %1400, %1403
%1454 = fadd float %1453, %1450
%1455 = fmul float %1401, %1403
%1456 = fadd float %1455, %1452
%1457 = fmul float %1387, %1390
%1458 = fadd float %1457, %1454
%1459 = fmul float %1388, %1390
%1460 = fadd float %1459, %1456
%1461 = fmul float %1460, 2.000000e+00
%1462 = fadd float %1461, -1.000000e+00
%1463 = fmul float %1458, 2.000000e+00
%1464 = fadd float %1463, -1.000000e+00
%1465 = fmul float %1462, %1462
%1466 = fmul float %1464, %1464
%1467 = fadd float %1465, %1466
%1468 = call float @llvm.AMDIL.clamp.(float %1467, float 0.000000e+00, float 1.000000e+00)
%1469 = fcmp une float %47, %temp16.0
%.sink218 = select i1 %1469, float %50, float %49
%temp52.2 = select i1 %1469, float 1.953125e-03, float 3.906250e-03
%1470 = fdiv float 1.000000e+00, %.sink218
%1471 = fmul float %118, %1470
%1472 = fmul float %116, %1470
%1473 = call float @llvm.floor.f32(float %1471)
%1474 = fsub float %1471, %1473
%1475 = call float @llvm.floor.f32(float %1472)
%1476 = fsub float %1472, %1475
%1477 = fmul float %51, 2.000000e+00
%1478 = fmul float %1477, %temp52.2
%1479 = fsub float 1.000000e+00, %1478
%1480 = fmul float %temp52.2, %51
%1481 = fmul float %1474, %1479
%1482 = fadd float %1481, %1480
%1483 = fmul float %1476, %1479
%1484 = fadd float %1483, %1480
%1485 = fmul float %1482, %temp16.0
%1486 = fadd float %1485, %temp28.0
%1487 = fmul float %1484, %temp16.0
%1488 = fadd float %1487, %temp29.0
%1489 = bitcast float %1486 to i32
%1490 = bitcast float %1488 to i32
%1491 = bitcast float %244 to i32
%1492 = insertelement <4 x i32> undef, i32 %1489, i32 0
%1493 = insertelement <4 x i32> %1492, i32 %1490, i32 1
%1494 = insertelement <4 x i32> %1493, i32 %1491, i32 2
%1495 = bitcast <8 x i32> %101 to <32 x i8>
%1496 = bitcast <4 x i32> %103 to <16 x i8>
%1497 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1494, <32 x i8> %1495, <16 x i8> %1496, i32 2)
%1498 = extractelement <4 x float> %1497, i32 1
%1499 = extractelement <4 x float> %1497, i32 3
%1500 = fcmp oeq float %temp30.0, 4.000000e+00
%1501 = select i1 %1500, float 1.000000e+00, float 0.000000e+00
%1502 = bitcast float %1486 to i32
%1503 = bitcast float %1488 to i32
%1504 = bitcast float %244 to i32
%1505 = insertelement <4 x i32> undef, i32 %1502, i32 0
%1506 = insertelement <4 x i32> %1505, i32 %1503, i32 1
%1507 = insertelement <4 x i32> %1506, i32 %1504, i32 2
%1508 = bitcast <8 x i32> %93 to <32 x i8>
%1509 = bitcast <4 x i32> %95 to <16 x i8>
%1510 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1507, <32 x i8> %1508, <16 x i8> %1509, i32 2)
%1511 = extractelement <4 x float> %1510, i32 1
%1512 = extractelement <4 x float> %1510, i32 3
%1513 = fcmp oeq float %temp30.0, 3.000000e+00
%1514 = select i1 %1513, float 1.000000e+00, float 0.000000e+00
%1515 = bitcast float %1486 to i32
%1516 = bitcast float %1488 to i32
%1517 = bitcast float %244 to i32
%1518 = insertelement <4 x i32> undef, i32 %1515, i32 0
%1519 = insertelement <4 x i32> %1518, i32 %1516, i32 1
%1520 = insertelement <4 x i32> %1519, i32 %1517, i32 2
%1521 = bitcast <8 x i32> %85 to <32 x i8>
%1522 = bitcast <4 x i32> %87 to <16 x i8>
%1523 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1520, <32 x i8> %1521, <16 x i8> %1522, i32 2)
%1524 = extractelement <4 x float> %1523, i32 1
%1525 = extractelement <4 x float> %1523, i32 3
%1526 = fcmp oeq float %temp30.0, 2.000000e+00
%1527 = select i1 %1526, float 1.000000e+00, float 0.000000e+00
%1528 = bitcast float %1486 to i32
%1529 = bitcast float %1488 to i32
%1530 = bitcast float %244 to i32
%1531 = insertelement <4 x i32> undef, i32 %1528, i32 0
%1532 = insertelement <4 x i32> %1531, i32 %1529, i32 1
%1533 = insertelement <4 x i32> %1532, i32 %1530, i32 2
%1534 = bitcast <8 x i32> %77 to <32 x i8>
%1535 = bitcast <4 x i32> %79 to <16 x i8>
%1536 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1533, <32 x i8> %1534, <16 x i8> %1535, i32 2)
%1537 = extractelement <4 x float> %1536, i32 1
%1538 = extractelement <4 x float> %1536, i32 3
%1539 = fcmp oeq float %temp30.0, 1.000000e+00
%1540 = select i1 %1539, float 1.000000e+00, float 0.000000e+00
%1541 = bitcast float %1486 to i32
%1542 = bitcast float %1488 to i32
%1543 = bitcast float %244 to i32
%1544 = insertelement <4 x i32> undef, i32 %1541, i32 0
%1545 = insertelement <4 x i32> %1544, i32 %1542, i32 1
%1546 = insertelement <4 x i32> %1545, i32 %1543, i32 2
%1547 = bitcast <8 x i32> %69 to <32 x i8>
%1548 = bitcast <4 x i32> %71 to <16 x i8>
%1549 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1546, <32 x i8> %1547, <16 x i8> %1548, i32 2)
%1550 = extractelement <4 x float> %1549, i32 1
%1551 = extractelement <4 x float> %1549, i32 3
%1552 = fcmp oeq float %temp30.0, 0.000000e+00
%1553 = select i1 %1552, float 1.000000e+00, float 0.000000e+00
%1554 = fmul float %1550, %1553
%1555 = fmul float %1551, %1553
%1556 = fmul float %1537, %1540
%1557 = fadd float %1556, %1554
%1558 = fmul float %1538, %1540
%1559 = fadd float %1558, %1555
%1560 = fmul float %1524, %1527
%1561 = fadd float %1560, %1557
%1562 = fmul float %1525, %1527
%1563 = fadd float %1562, %1559
%1564 = fmul float %1511, %1514
%1565 = fadd float %1564, %1561
%1566 = fmul float %1512, %1514
%1567 = fadd float %1566, %1563
%1568 = fmul float %1498, %1501
%1569 = fadd float %1568, %1565
%1570 = fmul float %1499, %1501
%1571 = fadd float %1570, %1567
%1572 = fmul float %1571, 2.000000e+00
%1573 = fadd float %1572, -1.000000e+00
%1574 = fmul float %1569, 2.000000e+00
%1575 = fadd float %1574, -1.000000e+00
%1576 = fmul float %1573, %1573
%1577 = fmul float %1575, %1575
%1578 = fadd float %1576, %1577
%1579 = call float @llvm.AMDIL.clamp.(float %1578, float 0.000000e+00, float 1.000000e+00)
%1580 = fcmp une float %47, %temp16.0
%.sink219 = select i1 %1580, float %50, float %49
%temp56.2 = select i1 %1580, float 1.953125e-03, float 3.906250e-03
%1581 = fdiv float 1.000000e+00, %.sink219
%1582 = fmul float %116, %1581
%1583 = fmul float %117, %1581
%1584 = call float @llvm.floor.f32(float %1582)
%1585 = fsub float %1582, %1584
%1586 = call float @llvm.floor.f32(float %1583)
%1587 = fsub float %1583, %1586
%1588 = fmul float %51, 2.000000e+00
%1589 = fmul float %1588, %temp56.2
%1590 = fsub float 1.000000e+00, %1589
%1591 = fmul float %temp56.2, %51
%1592 = fmul float %1585, %1590
%1593 = fadd float %1592, %1591
%1594 = fmul float %1587, %1590
%1595 = fadd float %1594, %1591
%1596 = fmul float %1593, %temp16.0
%1597 = fadd float %1596, %temp28.0
%1598 = fmul float %1595, %temp16.0
%1599 = fadd float %1598, %temp29.0
%1600 = bitcast float %1597 to i32
%1601 = bitcast float %1599 to i32
%1602 = bitcast float %244 to i32
%1603 = insertelement <4 x i32> undef, i32 %1600, i32 0
%1604 = insertelement <4 x i32> %1603, i32 %1601, i32 1
%1605 = insertelement <4 x i32> %1604, i32 %1602, i32 2
%1606 = bitcast <8 x i32> %101 to <32 x i8>
%1607 = bitcast <4 x i32> %103 to <16 x i8>
%1608 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1605, <32 x i8> %1606, <16 x i8> %1607, i32 2)
%1609 = extractelement <4 x float> %1608, i32 1
%1610 = extractelement <4 x float> %1608, i32 3
%1611 = fcmp oeq float %temp30.0, 4.000000e+00
%1612 = select i1 %1611, float 1.000000e+00, float 0.000000e+00
%1613 = bitcast float %1597 to i32
%1614 = bitcast float %1599 to i32
%1615 = bitcast float %244 to i32
%1616 = insertelement <4 x i32> undef, i32 %1613, i32 0
%1617 = insertelement <4 x i32> %1616, i32 %1614, i32 1
%1618 = insertelement <4 x i32> %1617, i32 %1615, i32 2
%1619 = bitcast <8 x i32> %93 to <32 x i8>
%1620 = bitcast <4 x i32> %95 to <16 x i8>
%1621 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1618, <32 x i8> %1619, <16 x i8> %1620, i32 2)
%1622 = extractelement <4 x float> %1621, i32 1
%1623 = extractelement <4 x float> %1621, i32 3
%1624 = fcmp oeq float %temp30.0, 3.000000e+00
%1625 = select i1 %1624, float 1.000000e+00, float 0.000000e+00
%1626 = bitcast float %1597 to i32
%1627 = bitcast float %1599 to i32
%1628 = bitcast float %244 to i32
%1629 = insertelement <4 x i32> undef, i32 %1626, i32 0
%1630 = insertelement <4 x i32> %1629, i32 %1627, i32 1
%1631 = insertelement <4 x i32> %1630, i32 %1628, i32 2
%1632 = bitcast <8 x i32> %85 to <32 x i8>
%1633 = bitcast <4 x i32> %87 to <16 x i8>
%1634 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1631, <32 x i8> %1632, <16 x i8> %1633, i32 2)
%1635 = extractelement <4 x float> %1634, i32 1
%1636 = extractelement <4 x float> %1634, i32 3
%1637 = fcmp oeq float %temp30.0, 2.000000e+00
%1638 = select i1 %1637, float 1.000000e+00, float 0.000000e+00
%1639 = bitcast float %1597 to i32
%1640 = bitcast float %1599 to i32
%1641 = bitcast float %244 to i32
%1642 = insertelement <4 x i32> undef, i32 %1639, i32 0
%1643 = insertelement <4 x i32> %1642, i32 %1640, i32 1
%1644 = insertelement <4 x i32> %1643, i32 %1641, i32 2
%1645 = bitcast <8 x i32> %77 to <32 x i8>
%1646 = bitcast <4 x i32> %79 to <16 x i8>
%1647 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1644, <32 x i8> %1645, <16 x i8> %1646, i32 2)
%1648 = extractelement <4 x float> %1647, i32 1
%1649 = extractelement <4 x float> %1647, i32 3
%1650 = fcmp oeq float %temp30.0, 1.000000e+00
%1651 = select i1 %1650, float 1.000000e+00, float 0.000000e+00
%1652 = bitcast float %1597 to i32
%1653 = bitcast float %1599 to i32
%1654 = bitcast float %244 to i32
%1655 = insertelement <4 x i32> undef, i32 %1652, i32 0
%1656 = insertelement <4 x i32> %1655, i32 %1653, i32 1
%1657 = insertelement <4 x i32> %1656, i32 %1654, i32 2
%1658 = bitcast <8 x i32> %69 to <32 x i8>
%1659 = bitcast <4 x i32> %71 to <16 x i8>
%1660 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1657, <32 x i8> %1658, <16 x i8> %1659, i32 2)
%1661 = extractelement <4 x float> %1660, i32 1
%1662 = extractelement <4 x float> %1660, i32 3
%1663 = fcmp oeq float %temp30.0, 0.000000e+00
%1664 = select i1 %1663, float 1.000000e+00, float 0.000000e+00
%1665 = fmul float %1661, %1664
%1666 = fmul float %1662, %1664
%1667 = fmul float %1648, %1651
%1668 = fadd float %1667, %1665
%1669 = fmul float %1649, %1651
%1670 = fadd float %1669, %1666
%1671 = fmul float %1635, %1638
%1672 = fadd float %1671, %1668
%1673 = fmul float %1636, %1638
%1674 = fadd float %1673, %1670
%1675 = fmul float %1622, %1625
%1676 = fadd float %1675, %1672
%1677 = fmul float %1623, %1625
%1678 = fadd float %1677, %1674
%1679 = fmul float %1609, %1612
%1680 = fadd float %1679, %1676
%1681 = fmul float %1610, %1612
%1682 = fadd float %1681, %1678
%1683 = fmul float %1682, 2.000000e+00
%1684 = fadd float %1683, -1.000000e+00
%1685 = fmul float %1680, 2.000000e+00
%1686 = fadd float %1685, -1.000000e+00
%1687 = fmul float %1684, %1684
%1688 = fmul float %1686, %1686
%1689 = fadd float %1687, %1688
%1690 = call float @llvm.AMDIL.clamp.(float %1689, float 0.000000e+00, float 1.000000e+00)
%1691 = fmul float %167, 0.000000e+00
%1692 = fmul float %1462, %167
%1693 = fmul float %1464, %167
%1694 = fmul float %1575, %168
%1695 = fadd float %1694, %1691
%1696 = fmul float %168, 0.000000e+00
%1697 = fadd float %1696, %1692
%1698 = fmul float %1573, %168
%1699 = fadd float %1698, %1693
%1700 = fmul float %1684, %169
%1701 = fadd float %1700, %1695
%1702 = fmul float %1686, %169
%1703 = fadd float %1702, %1697
%1704 = fmul float %169, 0.000000e+00
%1705 = fadd float %1704, %1699
%1706 = fcmp une float %47, %temp20.0
%.sink220 = select i1 %1706, float %50, float %49
%temp44.3 = select i1 %1706, float 1.953125e-03, float 3.906250e-03
%1707 = fdiv float 1.000000e+00, %.sink220
%1708 = fmul float %118, %1707
%1709 = fmul float %117, %1707
%1710 = call float @llvm.floor.f32(float %1708)
%1711 = fsub float %1708, %1710
%1712 = call float @llvm.floor.f32(float %1709)
%1713 = fsub float %1709, %1712
%1714 = fmul float %51, 2.000000e+00
%1715 = fmul float %1714, %temp44.3
%1716 = fsub float 1.000000e+00, %1715
%1717 = fmul float %temp44.3, %51
%1718 = fmul float %1711, %1716
%1719 = fadd float %1718, %1717
%1720 = fmul float %1713, %1716
%1721 = fadd float %1720, %1717
%1722 = fmul float %1719, %temp20.0
%1723 = fadd float %1722, %temp36.0
%1724 = fmul float %1721, %temp20.0
%1725 = fadd float %1724, %temp37.0
%1726 = bitcast float %1723 to i32
%1727 = bitcast float %1725 to i32
%1728 = bitcast float %244 to i32
%1729 = insertelement <4 x i32> undef, i32 %1726, i32 0
%1730 = insertelement <4 x i32> %1729, i32 %1727, i32 1
%1731 = insertelement <4 x i32> %1730, i32 %1728, i32 2
%1732 = bitcast <8 x i32> %101 to <32 x i8>
%1733 = bitcast <4 x i32> %103 to <16 x i8>
%1734 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1731, <32 x i8> %1732, <16 x i8> %1733, i32 2)
%1735 = extractelement <4 x float> %1734, i32 1
%1736 = extractelement <4 x float> %1734, i32 3
%1737 = fcmp oeq float %temp38.0, 4.000000e+00
%1738 = select i1 %1737, float 1.000000e+00, float 0.000000e+00
%1739 = bitcast float %1723 to i32
%1740 = bitcast float %1725 to i32
%1741 = bitcast float %244 to i32
%1742 = insertelement <4 x i32> undef, i32 %1739, i32 0
%1743 = insertelement <4 x i32> %1742, i32 %1740, i32 1
%1744 = insertelement <4 x i32> %1743, i32 %1741, i32 2
%1745 = bitcast <8 x i32> %93 to <32 x i8>
%1746 = bitcast <4 x i32> %95 to <16 x i8>
%1747 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1744, <32 x i8> %1745, <16 x i8> %1746, i32 2)
%1748 = extractelement <4 x float> %1747, i32 1
%1749 = extractelement <4 x float> %1747, i32 3
%1750 = fcmp oeq float %temp38.0, 3.000000e+00
%1751 = select i1 %1750, float 1.000000e+00, float 0.000000e+00
%1752 = bitcast float %1723 to i32
%1753 = bitcast float %1725 to i32
%1754 = bitcast float %244 to i32
%1755 = insertelement <4 x i32> undef, i32 %1752, i32 0
%1756 = insertelement <4 x i32> %1755, i32 %1753, i32 1
%1757 = insertelement <4 x i32> %1756, i32 %1754, i32 2
%1758 = bitcast <8 x i32> %85 to <32 x i8>
%1759 = bitcast <4 x i32> %87 to <16 x i8>
%1760 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1757, <32 x i8> %1758, <16 x i8> %1759, i32 2)
%1761 = extractelement <4 x float> %1760, i32 1
%1762 = extractelement <4 x float> %1760, i32 3
%1763 = fcmp oeq float %temp38.0, 2.000000e+00
%1764 = select i1 %1763, float 1.000000e+00, float 0.000000e+00
%1765 = bitcast float %1723 to i32
%1766 = bitcast float %1725 to i32
%1767 = bitcast float %244 to i32
%1768 = insertelement <4 x i32> undef, i32 %1765, i32 0
%1769 = insertelement <4 x i32> %1768, i32 %1766, i32 1
%1770 = insertelement <4 x i32> %1769, i32 %1767, i32 2
%1771 = bitcast <8 x i32> %77 to <32 x i8>
%1772 = bitcast <4 x i32> %79 to <16 x i8>
%1773 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1770, <32 x i8> %1771, <16 x i8> %1772, i32 2)
%1774 = extractelement <4 x float> %1773, i32 1
%1775 = extractelement <4 x float> %1773, i32 3
%1776 = fcmp oeq float %temp38.0, 1.000000e+00
%1777 = select i1 %1776, float 1.000000e+00, float 0.000000e+00
%1778 = bitcast float %1723 to i32
%1779 = bitcast float %1725 to i32
%1780 = bitcast float %244 to i32
%1781 = insertelement <4 x i32> undef, i32 %1778, i32 0
%1782 = insertelement <4 x i32> %1781, i32 %1779, i32 1
%1783 = insertelement <4 x i32> %1782, i32 %1780, i32 2
%1784 = bitcast <8 x i32> %69 to <32 x i8>
%1785 = bitcast <4 x i32> %71 to <16 x i8>
%1786 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1783, <32 x i8> %1784, <16 x i8> %1785, i32 2)
%1787 = extractelement <4 x float> %1786, i32 1
%1788 = extractelement <4 x float> %1786, i32 3
%1789 = fcmp oeq float %temp38.0, 0.000000e+00
%1790 = select i1 %1789, float 1.000000e+00, float 0.000000e+00
%1791 = fmul float %1787, %1790
%1792 = fmul float %1788, %1790
%1793 = fmul float %1774, %1777
%1794 = fadd float %1793, %1791
%1795 = fmul float %1775, %1777
%1796 = fadd float %1795, %1792
%1797 = fmul float %1761, %1764
%1798 = fadd float %1797, %1794
%1799 = fmul float %1762, %1764
%1800 = fadd float %1799, %1796
%1801 = fmul float %1748, %1751
%1802 = fadd float %1801, %1798
%1803 = fmul float %1749, %1751
%1804 = fadd float %1803, %1800
%1805 = fmul float %1735, %1738
%1806 = fadd float %1805, %1802
%1807 = fmul float %1736, %1738
%1808 = fadd float %1807, %1804
%1809 = fmul float %1808, 2.000000e+00
%1810 = fadd float %1809, -1.000000e+00
%1811 = fmul float %1806, 2.000000e+00
%1812 = fadd float %1811, -1.000000e+00
%1813 = fmul float %1810, %1810
%1814 = fmul float %1812, %1812
%1815 = fadd float %1813, %1814
%1816 = call float @llvm.AMDIL.clamp.(float %1815, float 0.000000e+00, float 1.000000e+00)
%1817 = fcmp une float %47, %temp20.0
%.sink221 = select i1 %1817, float %50, float %49
%temp48.4 = select i1 %1817, float 1.953125e-03, float 3.906250e-03
%1818 = fdiv float 1.000000e+00, %.sink221
%1819 = fmul float %118, %1818
%1820 = fmul float %116, %1818
%1821 = call float @llvm.floor.f32(float %1819)
%1822 = fsub float %1819, %1821
%1823 = call float @llvm.floor.f32(float %1820)
%1824 = fsub float %1820, %1823
%1825 = fmul float %51, 2.000000e+00
%1826 = fmul float %1825, %temp48.4
%1827 = fsub float 1.000000e+00, %1826
%1828 = fmul float %temp48.4, %51
%1829 = fmul float %1822, %1827
%1830 = fadd float %1829, %1828
%1831 = fmul float %1824, %1827
%1832 = fadd float %1831, %1828
%1833 = fmul float %1830, %temp20.0
%1834 = fadd float %1833, %temp36.0
%1835 = fmul float %1832, %temp20.0
%1836 = fadd float %1835, %temp37.0
%1837 = bitcast float %1834 to i32
%1838 = bitcast float %1836 to i32
%1839 = bitcast float %244 to i32
%1840 = insertelement <4 x i32> undef, i32 %1837, i32 0
%1841 = insertelement <4 x i32> %1840, i32 %1838, i32 1
%1842 = insertelement <4 x i32> %1841, i32 %1839, i32 2
%1843 = bitcast <8 x i32> %101 to <32 x i8>
%1844 = bitcast <4 x i32> %103 to <16 x i8>
%1845 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1842, <32 x i8> %1843, <16 x i8> %1844, i32 2)
%1846 = extractelement <4 x float> %1845, i32 1
%1847 = extractelement <4 x float> %1845, i32 3
%1848 = fcmp oeq float %temp38.0, 4.000000e+00
%1849 = select i1 %1848, float 1.000000e+00, float 0.000000e+00
%1850 = bitcast float %1834 to i32
%1851 = bitcast float %1836 to i32
%1852 = bitcast float %244 to i32
%1853 = insertelement <4 x i32> undef, i32 %1850, i32 0
%1854 = insertelement <4 x i32> %1853, i32 %1851, i32 1
%1855 = insertelement <4 x i32> %1854, i32 %1852, i32 2
%1856 = bitcast <8 x i32> %93 to <32 x i8>
%1857 = bitcast <4 x i32> %95 to <16 x i8>
%1858 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1855, <32 x i8> %1856, <16 x i8> %1857, i32 2)
%1859 = extractelement <4 x float> %1858, i32 1
%1860 = extractelement <4 x float> %1858, i32 3
%1861 = fcmp oeq float %temp38.0, 3.000000e+00
%1862 = select i1 %1861, float 1.000000e+00, float 0.000000e+00
%1863 = bitcast float %1834 to i32
%1864 = bitcast float %1836 to i32
%1865 = bitcast float %244 to i32
%1866 = insertelement <4 x i32> undef, i32 %1863, i32 0
%1867 = insertelement <4 x i32> %1866, i32 %1864, i32 1
%1868 = insertelement <4 x i32> %1867, i32 %1865, i32 2
%1869 = bitcast <8 x i32> %85 to <32 x i8>
%1870 = bitcast <4 x i32> %87 to <16 x i8>
%1871 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1868, <32 x i8> %1869, <16 x i8> %1870, i32 2)
%1872 = extractelement <4 x float> %1871, i32 1
%1873 = extractelement <4 x float> %1871, i32 3
%1874 = fcmp oeq float %temp38.0, 2.000000e+00
%1875 = select i1 %1874, float 1.000000e+00, float 0.000000e+00
%1876 = bitcast float %1834 to i32
%1877 = bitcast float %1836 to i32
%1878 = bitcast float %244 to i32
%1879 = insertelement <4 x i32> undef, i32 %1876, i32 0
%1880 = insertelement <4 x i32> %1879, i32 %1877, i32 1
%1881 = insertelement <4 x i32> %1880, i32 %1878, i32 2
%1882 = bitcast <8 x i32> %77 to <32 x i8>
%1883 = bitcast <4 x i32> %79 to <16 x i8>
%1884 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1881, <32 x i8> %1882, <16 x i8> %1883, i32 2)
%1885 = extractelement <4 x float> %1884, i32 1
%1886 = extractelement <4 x float> %1884, i32 3
%1887 = fcmp oeq float %temp38.0, 1.000000e+00
%1888 = select i1 %1887, float 1.000000e+00, float 0.000000e+00
%1889 = bitcast float %1834 to i32
%1890 = bitcast float %1836 to i32
%1891 = bitcast float %244 to i32
%1892 = insertelement <4 x i32> undef, i32 %1889, i32 0
%1893 = insertelement <4 x i32> %1892, i32 %1890, i32 1
%1894 = insertelement <4 x i32> %1893, i32 %1891, i32 2
%1895 = bitcast <8 x i32> %69 to <32 x i8>
%1896 = bitcast <4 x i32> %71 to <16 x i8>
%1897 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1894, <32 x i8> %1895, <16 x i8> %1896, i32 2)
%1898 = extractelement <4 x float> %1897, i32 1
%1899 = extractelement <4 x float> %1897, i32 3
%1900 = fcmp oeq float %temp38.0, 0.000000e+00
%1901 = select i1 %1900, float 1.000000e+00, float 0.000000e+00
%1902 = fmul float %1898, %1901
%1903 = fmul float %1899, %1901
%1904 = fmul float %1885, %1888
%1905 = fadd float %1904, %1902
%1906 = fmul float %1886, %1888
%1907 = fadd float %1906, %1903
%1908 = fmul float %1872, %1875
%1909 = fadd float %1908, %1905
%1910 = fmul float %1873, %1875
%1911 = fadd float %1910, %1907
%1912 = fmul float %1859, %1862
%1913 = fadd float %1912, %1909
%1914 = fmul float %1860, %1862
%1915 = fadd float %1914, %1911
%1916 = fmul float %1846, %1849
%1917 = fadd float %1916, %1913
%1918 = fmul float %1847, %1849
%1919 = fadd float %1918, %1915
%1920 = fmul float %1919, 2.000000e+00
%1921 = fadd float %1920, -1.000000e+00
%1922 = fmul float %1917, 2.000000e+00
%1923 = fadd float %1922, -1.000000e+00
%1924 = fmul float %1921, %1921
%1925 = fmul float %1923, %1923
%1926 = fadd float %1924, %1925
%1927 = call float @llvm.AMDIL.clamp.(float %1926, float 0.000000e+00, float 1.000000e+00)
%1928 = fcmp une float %47, %temp20.0
%.sink222 = select i1 %1928, float %50, float %49
%temp52.4 = select i1 %1928, float 1.953125e-03, float 3.906250e-03
%1929 = fdiv float 1.000000e+00, %.sink222
%1930 = fmul float %116, %1929
%1931 = fmul float %117, %1929
%1932 = call float @llvm.floor.f32(float %1930)
%1933 = fsub float %1930, %1932
%1934 = call float @llvm.floor.f32(float %1931)
%1935 = fsub float %1931, %1934
%1936 = fmul float %51, 2.000000e+00
%1937 = fmul float %1936, %temp52.4
%1938 = fsub float 1.000000e+00, %1937
%1939 = fmul float %temp52.4, %51
%1940 = fmul float %1933, %1938
%1941 = fadd float %1940, %1939
%1942 = fmul float %1935, %1938
%1943 = fadd float %1942, %1939
%1944 = fmul float %1941, %temp20.0
%1945 = fadd float %1944, %temp36.0
%1946 = fmul float %1943, %temp20.0
%1947 = fadd float %1946, %temp37.0
%1948 = bitcast float %1945 to i32
%1949 = bitcast float %1947 to i32
%1950 = bitcast float %244 to i32
%1951 = insertelement <4 x i32> undef, i32 %1948, i32 0
%1952 = insertelement <4 x i32> %1951, i32 %1949, i32 1
%1953 = insertelement <4 x i32> %1952, i32 %1950, i32 2
%1954 = bitcast <8 x i32> %101 to <32 x i8>
%1955 = bitcast <4 x i32> %103 to <16 x i8>
%1956 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1953, <32 x i8> %1954, <16 x i8> %1955, i32 2)
%1957 = extractelement <4 x float> %1956, i32 1
%1958 = extractelement <4 x float> %1956, i32 3
%1959 = fcmp oeq float %temp38.0, 4.000000e+00
%1960 = select i1 %1959, float 1.000000e+00, float 0.000000e+00
%1961 = bitcast float %1945 to i32
%1962 = bitcast float %1947 to i32
%1963 = bitcast float %244 to i32
%1964 = insertelement <4 x i32> undef, i32 %1961, i32 0
%1965 = insertelement <4 x i32> %1964, i32 %1962, i32 1
%1966 = insertelement <4 x i32> %1965, i32 %1963, i32 2
%1967 = bitcast <8 x i32> %93 to <32 x i8>
%1968 = bitcast <4 x i32> %95 to <16 x i8>
%1969 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1966, <32 x i8> %1967, <16 x i8> %1968, i32 2)
%1970 = extractelement <4 x float> %1969, i32 1
%1971 = extractelement <4 x float> %1969, i32 3
%1972 = fcmp oeq float %temp38.0, 3.000000e+00
%1973 = select i1 %1972, float 1.000000e+00, float 0.000000e+00
%1974 = bitcast float %1945 to i32
%1975 = bitcast float %1947 to i32
%1976 = bitcast float %244 to i32
%1977 = insertelement <4 x i32> undef, i32 %1974, i32 0
%1978 = insertelement <4 x i32> %1977, i32 %1975, i32 1
%1979 = insertelement <4 x i32> %1978, i32 %1976, i32 2
%1980 = bitcast <8 x i32> %85 to <32 x i8>
%1981 = bitcast <4 x i32> %87 to <16 x i8>
%1982 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1979, <32 x i8> %1980, <16 x i8> %1981, i32 2)
%1983 = extractelement <4 x float> %1982, i32 1
%1984 = extractelement <4 x float> %1982, i32 3
%1985 = fcmp oeq float %temp38.0, 2.000000e+00
%1986 = select i1 %1985, float 1.000000e+00, float 0.000000e+00
%1987 = bitcast float %1945 to i32
%1988 = bitcast float %1947 to i32
%1989 = bitcast float %244 to i32
%1990 = insertelement <4 x i32> undef, i32 %1987, i32 0
%1991 = insertelement <4 x i32> %1990, i32 %1988, i32 1
%1992 = insertelement <4 x i32> %1991, i32 %1989, i32 2
%1993 = bitcast <8 x i32> %77 to <32 x i8>
%1994 = bitcast <4 x i32> %79 to <16 x i8>
%1995 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1992, <32 x i8> %1993, <16 x i8> %1994, i32 2)
%1996 = extractelement <4 x float> %1995, i32 1
%1997 = extractelement <4 x float> %1995, i32 3
%1998 = fcmp oeq float %temp38.0, 1.000000e+00
%1999 = select i1 %1998, float 1.000000e+00, float 0.000000e+00
%2000 = bitcast float %1945 to i32
%2001 = bitcast float %1947 to i32
%2002 = bitcast float %244 to i32
%2003 = insertelement <4 x i32> undef, i32 %2000, i32 0
%2004 = insertelement <4 x i32> %2003, i32 %2001, i32 1
%2005 = insertelement <4 x i32> %2004, i32 %2002, i32 2
%2006 = bitcast <8 x i32> %69 to <32 x i8>
%2007 = bitcast <4 x i32> %71 to <16 x i8>
%2008 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2005, <32 x i8> %2006, <16 x i8> %2007, i32 2)
%2009 = extractelement <4 x float> %2008, i32 1
%2010 = extractelement <4 x float> %2008, i32 3
%2011 = fcmp oeq float %temp38.0, 0.000000e+00
%2012 = select i1 %2011, float 1.000000e+00, float 0.000000e+00
%2013 = fmul float %2009, %2012
%2014 = fmul float %2010, %2012
%2015 = fmul float %1996, %1999
%2016 = fadd float %2015, %2013
%2017 = fmul float %1997, %1999
%2018 = fadd float %2017, %2014
%2019 = fmul float %1983, %1986
%2020 = fadd float %2019, %2016
%2021 = fmul float %1984, %1986
%2022 = fadd float %2021, %2018
%2023 = fmul float %1970, %1973
%2024 = fadd float %2023, %2020
%2025 = fmul float %1971, %1973
%2026 = fadd float %2025, %2022
%2027 = fmul float %1957, %1960
%2028 = fadd float %2027, %2024
%2029 = fmul float %1958, %1960
%2030 = fadd float %2029, %2026
%2031 = fmul float %2030, 2.000000e+00
%2032 = fadd float %2031, -1.000000e+00
%2033 = fmul float %2028, 2.000000e+00
%2034 = fadd float %2033, -1.000000e+00
%2035 = fmul float %2032, %2032
%2036 = fmul float %2034, %2034
%2037 = fadd float %2035, %2036
%2038 = call float @llvm.AMDIL.clamp.(float %2037, float 0.000000e+00, float 1.000000e+00)
%2039 = fmul float %167, 0.000000e+00
%2040 = fmul float %1810, %167
%2041 = fmul float %1812, %167
%2042 = fmul float %1923, %168
%2043 = fadd float %2042, %2039
%2044 = fmul float %168, 0.000000e+00
%2045 = fadd float %2044, %2040
%2046 = fmul float %1921, %168
%2047 = fadd float %2046, %2041
%2048 = fmul float %2032, %169
%2049 = fadd float %2048, %2043
%2050 = fmul float %2034, %169
%2051 = fadd float %2050, %2045
%2052 = fmul float %169, 0.000000e+00
%2053 = fadd float %2052, %2047
%2054 = fcmp une float %47, %temp24.0
%.sink223 = select i1 %2054, float %50, float %49
%temp36.1 = select i1 %2054, float 1.953125e-03, float 3.906250e-03
%2055 = fdiv float 1.000000e+00, %.sink223
%2056 = fmul float %118, %2055
%2057 = fmul float %117, %2055
%2058 = call float @llvm.floor.f32(float %2056)
%2059 = fsub float %2056, %2058
%2060 = call float @llvm.floor.f32(float %2057)
%2061 = fsub float %2057, %2060
%2062 = fmul float %51, 2.000000e+00
%2063 = fmul float %2062, %temp36.1
%2064 = fsub float 1.000000e+00, %2063
%2065 = fmul float %temp36.1, %51
%2066 = fmul float %2059, %2064
%2067 = fadd float %2066, %2065
%2068 = fmul float %2061, %2064
%2069 = fadd float %2068, %2065
%2070 = fmul float %2067, %temp24.0
%2071 = fadd float %2070, %temp12.0
%2072 = fmul float %2069, %temp24.0
%2073 = fadd float %2072, %temp13.0
%2074 = bitcast float %2071 to i32
%2075 = bitcast float %2073 to i32
%2076 = bitcast float %244 to i32
%2077 = insertelement <4 x i32> undef, i32 %2074, i32 0
%2078 = insertelement <4 x i32> %2077, i32 %2075, i32 1
%2079 = insertelement <4 x i32> %2078, i32 %2076, i32 2
%2080 = bitcast <8 x i32> %101 to <32 x i8>
%2081 = bitcast <4 x i32> %103 to <16 x i8>
%2082 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2079, <32 x i8> %2080, <16 x i8> %2081, i32 2)
%2083 = extractelement <4 x float> %2082, i32 1
%2084 = extractelement <4 x float> %2082, i32 3
%2085 = fcmp oeq float %temp14.0, 4.000000e+00
%2086 = select i1 %2085, float 1.000000e+00, float 0.000000e+00
%2087 = bitcast float %2071 to i32
%2088 = bitcast float %2073 to i32
%2089 = bitcast float %244 to i32
%2090 = insertelement <4 x i32> undef, i32 %2087, i32 0
%2091 = insertelement <4 x i32> %2090, i32 %2088, i32 1
%2092 = insertelement <4 x i32> %2091, i32 %2089, i32 2
%2093 = bitcast <8 x i32> %93 to <32 x i8>
%2094 = bitcast <4 x i32> %95 to <16 x i8>
%2095 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2092, <32 x i8> %2093, <16 x i8> %2094, i32 2)
%2096 = extractelement <4 x float> %2095, i32 1
%2097 = extractelement <4 x float> %2095, i32 3
%2098 = fcmp oeq float %temp14.0, 3.000000e+00
%2099 = select i1 %2098, float 1.000000e+00, float 0.000000e+00
%2100 = bitcast float %2071 to i32
%2101 = bitcast float %2073 to i32
%2102 = bitcast float %244 to i32
%2103 = insertelement <4 x i32> undef, i32 %2100, i32 0
%2104 = insertelement <4 x i32> %2103, i32 %2101, i32 1
%2105 = insertelement <4 x i32> %2104, i32 %2102, i32 2
%2106 = bitcast <8 x i32> %85 to <32 x i8>
%2107 = bitcast <4 x i32> %87 to <16 x i8>
%2108 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2105, <32 x i8> %2106, <16 x i8> %2107, i32 2)
%2109 = extractelement <4 x float> %2108, i32 1
%2110 = extractelement <4 x float> %2108, i32 3
%2111 = fcmp oeq float %temp14.0, 2.000000e+00
%2112 = select i1 %2111, float 1.000000e+00, float 0.000000e+00
%2113 = bitcast float %2071 to i32
%2114 = bitcast float %2073 to i32
%2115 = bitcast float %244 to i32
%2116 = insertelement <4 x i32> undef, i32 %2113, i32 0
%2117 = insertelement <4 x i32> %2116, i32 %2114, i32 1
%2118 = insertelement <4 x i32> %2117, i32 %2115, i32 2
%2119 = bitcast <8 x i32> %77 to <32 x i8>
%2120 = bitcast <4 x i32> %79 to <16 x i8>
%2121 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2118, <32 x i8> %2119, <16 x i8> %2120, i32 2)
%2122 = extractelement <4 x float> %2121, i32 1
%2123 = extractelement <4 x float> %2121, i32 3
%2124 = fcmp oeq float %temp14.0, 1.000000e+00
%2125 = select i1 %2124, float 1.000000e+00, float 0.000000e+00
%2126 = bitcast float %2071 to i32
%2127 = bitcast float %2073 to i32
%2128 = bitcast float %244 to i32
%2129 = insertelement <4 x i32> undef, i32 %2126, i32 0
%2130 = insertelement <4 x i32> %2129, i32 %2127, i32 1
%2131 = insertelement <4 x i32> %2130, i32 %2128, i32 2
%2132 = bitcast <8 x i32> %69 to <32 x i8>
%2133 = bitcast <4 x i32> %71 to <16 x i8>
%2134 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2131, <32 x i8> %2132, <16 x i8> %2133, i32 2)
%2135 = extractelement <4 x float> %2134, i32 1
%2136 = extractelement <4 x float> %2134, i32 3
%2137 = fcmp oeq float %temp14.0, 0.000000e+00
%2138 = select i1 %2137, float 1.000000e+00, float 0.000000e+00
%2139 = fmul float %2135, %2138
%2140 = fmul float %2136, %2138
%2141 = fmul float %2122, %2125
%2142 = fadd float %2141, %2139
%2143 = fmul float %2123, %2125
%2144 = fadd float %2143, %2140
%2145 = fmul float %2109, %2112
%2146 = fadd float %2145, %2142
%2147 = fmul float %2110, %2112
%2148 = fadd float %2147, %2144
%2149 = fmul float %2096, %2099
%2150 = fadd float %2149, %2146
%2151 = fmul float %2097, %2099
%2152 = fadd float %2151, %2148
%2153 = fmul float %2083, %2086
%2154 = fadd float %2153, %2150
%2155 = fmul float %2084, %2086
%2156 = fadd float %2155, %2152
%2157 = fmul float %2156, 2.000000e+00
%2158 = fadd float %2157, -1.000000e+00
%2159 = fmul float %2154, 2.000000e+00
%2160 = fadd float %2159, -1.000000e+00
%2161 = fmul float %2158, %2158
%2162 = fmul float %2160, %2160
%2163 = fadd float %2161, %2162
%2164 = call float @llvm.AMDIL.clamp.(float %2163, float 0.000000e+00, float 1.000000e+00)
%2165 = fcmp une float %47, %temp24.0
%.sink224 = select i1 %2165, float %50, float %49
%temp44.5 = select i1 %2165, float 1.953125e-03, float 3.906250e-03
%2166 = fdiv float 1.000000e+00, %.sink224
%2167 = fmul float %118, %2166
%2168 = fmul float %116, %2166
%2169 = call float @llvm.floor.f32(float %2167)
%2170 = fsub float %2167, %2169
%2171 = call float @llvm.floor.f32(float %2168)
%2172 = fsub float %2168, %2171
%2173 = fmul float %51, 2.000000e+00
%2174 = fmul float %2173, %temp44.5
%2175 = fsub float 1.000000e+00, %2174
%2176 = fmul float %temp44.5, %51
%2177 = fmul float %2170, %2175
%2178 = fadd float %2177, %2176
%2179 = fmul float %2172, %2175
%2180 = fadd float %2179, %2176
%2181 = fmul float %2178, %temp24.0
%2182 = fadd float %2181, %temp12.0
%2183 = fmul float %2180, %temp24.0
%2184 = fadd float %2183, %temp13.0
%2185 = bitcast float %2182 to i32
%2186 = bitcast float %2184 to i32
%2187 = bitcast float %244 to i32
%2188 = insertelement <4 x i32> undef, i32 %2185, i32 0
%2189 = insertelement <4 x i32> %2188, i32 %2186, i32 1
%2190 = insertelement <4 x i32> %2189, i32 %2187, i32 2
%2191 = bitcast <8 x i32> %101 to <32 x i8>
%2192 = bitcast <4 x i32> %103 to <16 x i8>
%2193 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2190, <32 x i8> %2191, <16 x i8> %2192, i32 2)
%2194 = extractelement <4 x float> %2193, i32 1
%2195 = extractelement <4 x float> %2193, i32 3
%2196 = fcmp oeq float %temp14.0, 4.000000e+00
%2197 = select i1 %2196, float 1.000000e+00, float 0.000000e+00
%2198 = bitcast float %2182 to i32
%2199 = bitcast float %2184 to i32
%2200 = bitcast float %244 to i32
%2201 = insertelement <4 x i32> undef, i32 %2198, i32 0
%2202 = insertelement <4 x i32> %2201, i32 %2199, i32 1
%2203 = insertelement <4 x i32> %2202, i32 %2200, i32 2
%2204 = bitcast <8 x i32> %93 to <32 x i8>
%2205 = bitcast <4 x i32> %95 to <16 x i8>
%2206 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2203, <32 x i8> %2204, <16 x i8> %2205, i32 2)
%2207 = extractelement <4 x float> %2206, i32 1
%2208 = extractelement <4 x float> %2206, i32 3
%2209 = fcmp oeq float %temp14.0, 3.000000e+00
%2210 = select i1 %2209, float 1.000000e+00, float 0.000000e+00
%2211 = bitcast float %2182 to i32
%2212 = bitcast float %2184 to i32
%2213 = bitcast float %244 to i32
%2214 = insertelement <4 x i32> undef, i32 %2211, i32 0
%2215 = insertelement <4 x i32> %2214, i32 %2212, i32 1
%2216 = insertelement <4 x i32> %2215, i32 %2213, i32 2
%2217 = bitcast <8 x i32> %85 to <32 x i8>
%2218 = bitcast <4 x i32> %87 to <16 x i8>
%2219 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2216, <32 x i8> %2217, <16 x i8> %2218, i32 2)
%2220 = extractelement <4 x float> %2219, i32 1
%2221 = extractelement <4 x float> %2219, i32 3
%2222 = fcmp oeq float %temp14.0, 2.000000e+00
%2223 = select i1 %2222, float 1.000000e+00, float 0.000000e+00
%2224 = bitcast float %2182 to i32
%2225 = bitcast float %2184 to i32
%2226 = bitcast float %244 to i32
%2227 = insertelement <4 x i32> undef, i32 %2224, i32 0
%2228 = insertelement <4 x i32> %2227, i32 %2225, i32 1
%2229 = insertelement <4 x i32> %2228, i32 %2226, i32 2
%2230 = bitcast <8 x i32> %77 to <32 x i8>
%2231 = bitcast <4 x i32> %79 to <16 x i8>
%2232 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2229, <32 x i8> %2230, <16 x i8> %2231, i32 2)
%2233 = extractelement <4 x float> %2232, i32 1
%2234 = extractelement <4 x float> %2232, i32 3
%2235 = fcmp oeq float %temp14.0, 1.000000e+00
%2236 = select i1 %2235, float 1.000000e+00, float 0.000000e+00
%2237 = bitcast float %2182 to i32
%2238 = bitcast float %2184 to i32
%2239 = bitcast float %244 to i32
%2240 = insertelement <4 x i32> undef, i32 %2237, i32 0
%2241 = insertelement <4 x i32> %2240, i32 %2238, i32 1
%2242 = insertelement <4 x i32> %2241, i32 %2239, i32 2
%2243 = bitcast <8 x i32> %69 to <32 x i8>
%2244 = bitcast <4 x i32> %71 to <16 x i8>
%2245 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2242, <32 x i8> %2243, <16 x i8> %2244, i32 2)
%2246 = extractelement <4 x float> %2245, i32 1
%2247 = extractelement <4 x float> %2245, i32 3
%2248 = fcmp oeq float %temp14.0, 0.000000e+00
%2249 = select i1 %2248, float 1.000000e+00, float 0.000000e+00
%2250 = fmul float %2246, %2249
%2251 = fmul float %2247, %2249
%2252 = fmul float %2233, %2236
%2253 = fadd float %2252, %2250
%2254 = fmul float %2234, %2236
%2255 = fadd float %2254, %2251
%2256 = fmul float %2220, %2223
%2257 = fadd float %2256, %2253
%2258 = fmul float %2221, %2223
%2259 = fadd float %2258, %2255
%2260 = fmul float %2207, %2210
%2261 = fadd float %2260, %2257
%2262 = fmul float %2208, %2210
%2263 = fadd float %2262, %2259
%2264 = fmul float %2194, %2197
%2265 = fadd float %2264, %2261
%2266 = fmul float %2195, %2197
%2267 = fadd float %2266, %2263
%2268 = fmul float %2267, 2.000000e+00
%2269 = fadd float %2268, -1.000000e+00
%2270 = fmul float %2265, 2.000000e+00
%2271 = fadd float %2270, -1.000000e+00
%2272 = fmul float %2269, %2269
%2273 = fmul float %2271, %2271
%2274 = fadd float %2272, %2273
%2275 = call float @llvm.AMDIL.clamp.(float %2274, float 0.000000e+00, float 1.000000e+00)
%2276 = fcmp une float %47, %temp24.0
%.sink225 = select i1 %2276, float %50, float %49
%temp48.6 = select i1 %2276, float 1.953125e-03, float 3.906250e-03
%2277 = fdiv float 1.000000e+00, %.sink225
%2278 = fmul float %116, %2277
%2279 = fmul float %117, %2277
%2280 = call float @llvm.floor.f32(float %2278)
%2281 = fsub float %2278, %2280
%2282 = call float @llvm.floor.f32(float %2279)
%2283 = fsub float %2279, %2282
%2284 = fmul float %51, 2.000000e+00
%2285 = fmul float %2284, %temp48.6
%2286 = fsub float 1.000000e+00, %2285
%2287 = fmul float %temp48.6, %51
%2288 = fmul float %2281, %2286
%2289 = fadd float %2288, %2287
%2290 = fmul float %2283, %2286
%2291 = fadd float %2290, %2287
%2292 = fmul float %2289, %temp24.0
%2293 = fadd float %2292, %temp12.0
%2294 = fmul float %2291, %temp24.0
%2295 = fadd float %2294, %temp13.0
%2296 = bitcast float %2293 to i32
%2297 = bitcast float %2295 to i32
%2298 = bitcast float %244 to i32
%2299 = insertelement <4 x i32> undef, i32 %2296, i32 0
%2300 = insertelement <4 x i32> %2299, i32 %2297, i32 1
%2301 = insertelement <4 x i32> %2300, i32 %2298, i32 2
%2302 = bitcast <8 x i32> %101 to <32 x i8>
%2303 = bitcast <4 x i32> %103 to <16 x i8>
%2304 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2301, <32 x i8> %2302, <16 x i8> %2303, i32 2)
%2305 = extractelement <4 x float> %2304, i32 1
%2306 = extractelement <4 x float> %2304, i32 3
%2307 = fcmp oeq float %temp14.0, 4.000000e+00
%2308 = select i1 %2307, float 1.000000e+00, float 0.000000e+00
%2309 = bitcast float %2293 to i32
%2310 = bitcast float %2295 to i32
%2311 = bitcast float %244 to i32
%2312 = insertelement <4 x i32> undef, i32 %2309, i32 0
%2313 = insertelement <4 x i32> %2312, i32 %2310, i32 1
%2314 = insertelement <4 x i32> %2313, i32 %2311, i32 2
%2315 = bitcast <8 x i32> %93 to <32 x i8>
%2316 = bitcast <4 x i32> %95 to <16 x i8>
%2317 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2314, <32 x i8> %2315, <16 x i8> %2316, i32 2)
%2318 = extractelement <4 x float> %2317, i32 1
%2319 = extractelement <4 x float> %2317, i32 3
%2320 = fcmp oeq float %temp14.0, 3.000000e+00
%2321 = select i1 %2320, float 1.000000e+00, float 0.000000e+00
%2322 = bitcast float %2293 to i32
%2323 = bitcast float %2295 to i32
%2324 = bitcast float %244 to i32
%2325 = insertelement <4 x i32> undef, i32 %2322, i32 0
%2326 = insertelement <4 x i32> %2325, i32 %2323, i32 1
%2327 = insertelement <4 x i32> %2326, i32 %2324, i32 2
%2328 = bitcast <8 x i32> %85 to <32 x i8>
%2329 = bitcast <4 x i32> %87 to <16 x i8>
%2330 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2327, <32 x i8> %2328, <16 x i8> %2329, i32 2)
%2331 = extractelement <4 x float> %2330, i32 1
%2332 = extractelement <4 x float> %2330, i32 3
%2333 = fcmp oeq float %temp14.0, 2.000000e+00
%2334 = select i1 %2333, float 1.000000e+00, float 0.000000e+00
%2335 = bitcast float %2293 to i32
%2336 = bitcast float %2295 to i32
%2337 = bitcast float %244 to i32
%2338 = insertelement <4 x i32> undef, i32 %2335, i32 0
%2339 = insertelement <4 x i32> %2338, i32 %2336, i32 1
%2340 = insertelement <4 x i32> %2339, i32 %2337, i32 2
%2341 = bitcast <8 x i32> %77 to <32 x i8>
%2342 = bitcast <4 x i32> %79 to <16 x i8>
%2343 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2340, <32 x i8> %2341, <16 x i8> %2342, i32 2)
%2344 = extractelement <4 x float> %2343, i32 1
%2345 = extractelement <4 x float> %2343, i32 3
%2346 = fcmp oeq float %temp14.0, 1.000000e+00
%2347 = select i1 %2346, float 1.000000e+00, float 0.000000e+00
%2348 = bitcast float %2293 to i32
%2349 = bitcast float %2295 to i32
%2350 = bitcast float %244 to i32
%2351 = insertelement <4 x i32> undef, i32 %2348, i32 0
%2352 = insertelement <4 x i32> %2351, i32 %2349, i32 1
%2353 = insertelement <4 x i32> %2352, i32 %2350, i32 2
%2354 = bitcast <8 x i32> %69 to <32 x i8>
%2355 = bitcast <4 x i32> %71 to <16 x i8>
%2356 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2353, <32 x i8> %2354, <16 x i8> %2355, i32 2)
%2357 = extractelement <4 x float> %2356, i32 1
%2358 = extractelement <4 x float> %2356, i32 3
%2359 = fcmp oeq float %temp14.0, 0.000000e+00
%2360 = select i1 %2359, float 1.000000e+00, float 0.000000e+00
%2361 = fmul float %2357, %2360
%2362 = fmul float %2358, %2360
%2363 = fmul float %2344, %2347
%2364 = fadd float %2363, %2361
%2365 = fmul float %2345, %2347
%2366 = fadd float %2365, %2362
%2367 = fmul float %2331, %2334
%2368 = fadd float %2367, %2364
%2369 = fmul float %2332, %2334
%2370 = fadd float %2369, %2366
%2371 = fmul float %2318, %2321
%2372 = fadd float %2371, %2368
%2373 = fmul float %2319, %2321
%2374 = fadd float %2373, %2370
%2375 = fmul float %2305, %2308
%2376 = fadd float %2375, %2372
%2377 = fmul float %2306, %2308
%2378 = fadd float %2377, %2374
%2379 = fmul float %2378, 2.000000e+00
%2380 = fadd float %2379, -1.000000e+00
%2381 = fmul float %2376, 2.000000e+00
%2382 = fadd float %2381, -1.000000e+00
%2383 = fmul float %2380, %2380
%2384 = fmul float %2382, %2382
%2385 = fadd float %2383, %2384
%2386 = call float @llvm.AMDIL.clamp.(float %2385, float 0.000000e+00, float 1.000000e+00)
%2387 = fmul float %167, 0.000000e+00
%2388 = fmul float %2158, %167
%2389 = fmul float %2160, %167
%2390 = fmul float %2271, %168
%2391 = fadd float %2390, %2387
%2392 = fmul float %168, 0.000000e+00
%2393 = fadd float %2392, %2388
%2394 = fmul float %2269, %168
%2395 = fadd float %2394, %2389
%2396 = fmul float %2380, %169
%2397 = fadd float %2396, %2391
%2398 = fmul float %2382, %169
%2399 = fadd float %2398, %2393
%2400 = fmul float %169, 0.000000e+00
%2401 = fadd float %2400, %2395
%2402 = fmul float %108, %1701
%2403 = fmul float %108, %1703
%2404 = fmul float %108, %1705
%2405 = fmul float %109, %2049
%2406 = fadd float %2405, %2402
%2407 = fmul float %109, %2051
%2408 = fadd float %2407, %2403
%2409 = fmul float %109, %2053
%2410 = fadd float %2409, %2404
%2411 = fmul float %110, %2397
%2412 = fadd float %2411, %2406
%2413 = fmul float %110, %2399
%2414 = fadd float %2413, %2408
%2415 = fmul float %110, %2401
%2416 = fadd float %2415, %2410
%2417 = fmul float %2412, %2412
%2418 = fmul float %2414, %2414
%2419 = fadd float %2417, %2418
%2420 = fmul float %2416, %2416
%2421 = fadd float %2419, %2420
%2422 = fadd float %2421, 1.000000e+00
%2423 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2422)
%2424 = fmul float %2412, %2423
%2425 = fmul float %2414, %2423
%2426 = fmul float %2416, %2423
%2427 = fmul float %2424, %107
%2428 = fmul float %2425, %107
%2429 = fmul float %2426, %107
%2430 = fsub float %113, %2427
%2431 = fsub float %114, %2428
%2432 = fsub float %115, %2429
%2433 = fmul float %2430, %2430
%2434 = fmul float %2431, %2431
%2435 = fadd float %2434, %2433
%2436 = fmul float %2432, %2432
%2437 = fadd float %2435, %2436
%2438 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2437)
%2439 = fmul float %2430, %2438
%2440 = fmul float %2431, %2438
%2441 = fmul float %2432, %2438
%2442 = fmul float %35, %116
%2443 = fmul float %36, %116
%2444 = fmul float %37, %116
%2445 = fmul float %38, %117
%2446 = fadd float %2445, %2442
%2447 = fmul float %39, %117
%2448 = fadd float %2447, %2443
%2449 = fmul float %40, %117
%2450 = fadd float %2449, %2444
%2451 = fmul float %41, %118
%2452 = fadd float %2451, %2446
%2453 = fmul float %42, %118
%2454 = fadd float %2453, %2448
%2455 = fmul float %43, %118
%2456 = fadd float %2455, %2450
%2457 = fadd float %2452, %44
%2458 = fadd float %2454, %45
%2459 = fadd float %2456, %46
%2460 = fmul float %2457, %2457
%2461 = fmul float %2458, %2458
%2462 = fadd float %2461, %2460
%2463 = fmul float %2459, %2459
%2464 = fadd float %2462, %2463
%2465 = bitcast float %2464 to i32
%2466 = bitcast float %2464 to i32
%2467 = insertelement <2 x i32> undef, i32 %2465, i32 0
%2468 = insertelement <2 x i32> %2467, i32 %2466, i32 1
%2469 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %2468, <32 x i8> %61, <16 x i8> %63, i32 2)
%2470 = extractelement <4 x float> %2469, i32 3
%2471 = fadd float %128, %140
%2472 = fadd float %129, %141
%2473 = fadd float %130, %142
%2474 = fmul float %2471, %2471
%2475 = fmul float %2472, %2472
%2476 = fadd float %2475, %2474
%2477 = fmul float %2473, %2473
%2478 = fadd float %2476, %2477
%2479 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2478)
%2480 = fmul float %2471, %2479
%2481 = fmul float %2472, %2479
%2482 = fmul float %2473, %2479
%2483 = fmul float %2439, %2480
%2484 = fmul float %2440, %2481
%2485 = fadd float %2484, %2483
%2486 = fmul float %2441, %2482
%2487 = fadd float %2485, %2486
%2488 = call float @llvm.maxnum.f32(float %2487, float 0x3F1A36E2E0000000)
%2489 = fmul float %111, 3.200000e+01
%2490 = call float @llvm.pow.f32(float %2488, float %2489)
%2491 = call float @llvm.AMDIL.clamp.(float %2490, float 0.000000e+00, float 1.000000e+00)
%2492 = fmul float %2491, 2.000000e+00
%2493 = fsub float 3.000000e+00, %2492
%2494 = fmul float %2491, %2493
%2495 = fmul float %2491, %2494
%2496 = fmul float %2495, %111
%2497 = fmul float %1353, %32
%2498 = fmul float %1355, %33
%2499 = fmul float %1357, %34
%2500 = fmul float %2439, %128
%2501 = fmul float %2440, %129
%2502 = fadd float %2501, %2500
%2503 = fmul float %2441, %130
%2504 = fadd float %2502, %2503
%2505 = call float @llvm.AMDIL.clamp.(float %2504, float 0.000000e+00, float 1.000000e+00)
%2506 = fmul float %57, 2.000000e+00
%2507 = fmul float %58, 2.000000e+00
%2508 = fmul float %59, 2.000000e+00
%2509 = fmul float %2506, %2470
%2510 = fmul float %2507, %2470
%2511 = fmul float %2508, %2470
%2512 = call float @llvm.maxnum.f32(float %2509, float %54)
%2513 = call float @llvm.maxnum.f32(float %2510, float %55)
%2514 = call float @llvm.maxnum.f32(float %2511, float %56)
%2515 = call float @llvm.minnum.f32(float %2512, float 1.000000e+00)
%2516 = call float @llvm.minnum.f32(float %2513, float 1.000000e+00)
%2517 = call float @llvm.minnum.f32(float %2514, float 1.000000e+00)
%2518 = fmul float %2515, %1353
%2519 = fmul float %2516, %1355
%2520 = fmul float %2517, %1357
%2521 = fmul float %2497, %2505
%2522 = fadd float %2521, %2518
%2523 = fmul float %2498, %2505
%2524 = fadd float %2523, %2519
%2525 = fmul float %2499, %2505
%2526 = fadd float %2525, %2520
%2527 = fmul float %32, %2496
%2528 = fadd float %2527, %2522
%2529 = fmul float %33, %2496
%2530 = fadd float %2529, %2524
%2531 = fmul float %34, %2496
%2532 = fadd float %2531, %2526
%2533 = fmul float %2528, %2470
%2534 = fmul float %2530, %2470
%2535 = fmul float %2532, %2470
%2536 = fmul float %2533, 5.000000e-01
%2537 = fmul float %2534, 5.000000e-01
%2538 = fmul float %2535, 5.000000e-01
%2539 = fmul float %112, %30
%2540 = fadd float %2539, %31
%2541 = call float @llvm.AMDIL.clamp.(float %2540, float 0.000000e+00, float 1.000000e+00)
%2542 = fmul float %2536, %2541
%2543 = fmul float %2537, %2541
%2544 = fmul float %2538, %2541
%2545 = call i32 @llvm.SI.packf16(float %2542, float %2543)
%2546 = bitcast i32 %2545 to float
%2547 = call i32 @llvm.SI.packf16(float %2544, float 1.000000e+00)
%2548 = bitcast i32 %2547 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2546, float %2548, float %2546, float %2548)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b64 s[100:101], s[6:7] ; BEE40406
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v14, v0, 0, 0, [m0] ; C8380000
v_interp_p2_f32 v14, [v14], v1, 0, 0, [m0] ; C8390001
v_interp_p1_f32 v17, v0, 1, 0, [m0] ; C8440100
v_interp_p2_f32 v17, [v17], v1, 1, 0, [m0] ; C8450101
v_interp_p1_f32 v18, v0, 2, 0, [m0] ; C8480200
v_interp_p2_f32 v18, [v18], v1, 2, 0, [m0] ; C8490201
v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300
v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301
v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400
v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401
v_interp_p1_f32 v9, v0, 1, 1, [m0] ; C8240500
v_interp_p2_f32 v9, [v9], v1, 1, 1, [m0] ; C8250501
v_interp_p1_f32 v4, v0, 2, 1, [m0] ; C8100600
v_interp_p2_f32 v4, [v4], v1, 2, 1, [m0] ; C8110601
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v2, v0, 3, 1, [m0] ; C8080700
v_interp_p2_f32 v2, [v2], v1, 3, 1, [m0] ; C8090701
v_interp_p1_f32 v12, v0, 0, 2, [m0] ; C8300800
v_interp_p2_f32 v12, [v12], v1, 0, 2, [m0] ; C8310801
v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900
v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901
v_interp_p1_f32 v6, v0, 2, 2, [m0] ; C8180A00
v_interp_p2_f32 v6, [v6], v1, 2, 2, [m0] ; C8190A01
v_interp_p1_f32 v7, v0, 3, 2, [m0] ; C81C0B00
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
v_interp_p2_f32 v7, [v7], v1, 3, 2, [m0] ; C81D0B01
v_interp_p1_f32 v10, v0, 0, 3, [m0] ; C8280C00
v_interp_p2_f32 v10, [v10], v1, 0, 3, [m0] ; C8290C01
v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01
v_interp_p1_f32 v0, v0, 2, 3, [m0] ; C8000E00
v_interp_p2_f32 v0, [v0], v1, 2, 3, [m0] ; C8010E01
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v1, s8, v10 ; 08021408
v_sub_f32_e32 v13, s9, v11 ; 081A1609
s_buffer_load_dword s12, s[0:3], 0x4c ; C206014C
s_buffer_load_dword s8, s[0:3], 0x50 ; C2040150
s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100
s_buffer_load_dword s41, s[0:3], 0x1 ; C2148101
v_sub_f32_e32 v21, s10, v0 ; 082A000A
v_mul_f32_e32 v15, v1, v1 ; 101E0301
v_mac_f32_e32 v15, v13, v13 ; 3E1E1B0D
v_mac_f32_e32 v15, v21, v21 ; 3E1E2B15
s_buffer_load_dword s40, s[0:3], 0x2 ; C2140102
v_rsq_clamp_f32_e32 v22, v15 ; 7E2C590F
v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0
v_floor_f32_e32 v16, v14 ; 7E20490E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v15, s13, v10 ; 081E140D
v_sub_f32_e32 v14, s41, v11 ; 081C1629
v_mov_b32_e32 v19, 0x42800000 ; 7E2602FF 42800000
v_cmp_le_f32_e32 vcc, v19, v16 ; 7C062113
v_mul_f32_e32 v19, v15, v15 ; 10261F0F
v_mac_f32_e32 v19, v14, v14 ; 3E261D0E
v_sub_f32_e32 v36, s40, v0 ; 08480028
v_mac_f32_e32 v19, v36, v36 ; 3E264924
v_rsq_clamp_f32_e32 v39, v19 ; 7E4E5913
v_mov_b32_e32 v19, 0x7fffffff ; 7E2602FF 7FFFFFFF
v_and_b32_e32 v23, v5, v19 ; 362E2705
v_and_b32_e32 v24, v6, v19 ; 36302706
v_and_b32_e32 v34, v7, v19 ; 36442707
v_mul_f32_e64 v19, |v5|, |v5| ; D2100313 00020B05
v_mad_f32 v19, |v6|, |v6|, v19 ; D2820313 044E0D06
v_mad_f32 v19, |v7|, |v7|, v19 ; D2820313 044E0F07
v_rsq_clamp_f32_e32 v35, v19 ; 7E465913
v_add_f32_e32 v17, 0.5, v17 ; 062222F0
v_add_f32_e32 v18, 0.5, v18 ; 062424F0
v_floor_f32_e32 v20, v17 ; 7E284911
v_floor_f32_e32 v17, v18 ; 7E224912
v_mov_b32_e32 v27, s8 ; 7E360208
v_mul_f32_e32 v25, s12, v16 ; 1032200C
v_floor_f32_e32 v19, v25 ; 7E264919
s_and_saveexec_b64 s[10:11], vcc ; BE8A246A
s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E
v_mov_b32_e32 v18, 0xc2800000 ; 7E2402FF C2800000
v_add_f32_e32 v16, v16, v18 ; 06202510
v_mul_f32_e32 v18, s8, v16 ; 10242008
v_floor_f32_e32 v18, v18 ; 7E244912
v_mul_f32_e32 v26, s8, v18 ; 10342408
v_mad_f32 v31, v16, s8, -v18 ; D282001F 84481110
v_floor_f32_e32 v16, v26 ; 7E20491A
v_mad_f32 v32, v18, s8, -v16 ; D2820020 84401112
v_add_f32_e32 v18, 4.0, v16 ; 062420F6
s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A
v_mov_b32_e32 v16, s12 ; 7E20020C
v_mov_b32_e32 v38, v27 ; 7E4C031B
s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E
v_mul_f32_e32 v18, s12, v19 ; 1024260C
v_floor_f32_e32 v26, v25 ; 7E344919
v_subrev_f32_e32 v31, v26, v25 ; 0A3E331A
v_floor_f32_e32 v18, v18 ; 7E244912
v_mad_f32 v32, v19, s12, -v18 ; D2820020 84481913
v_mov_b32_e32 v38, v16 ; 7E4C0310
s_or_b64 exec, exec, s[10:11] ; 88FE0A7E
v_mul_f32_e32 v26, s12, v20 ; 1034280C
v_floor_f32_e32 v25, v26 ; 7E32491A
v_mov_b32_e32 v19, 0x42800000 ; 7E2602FF 42800000
v_cmp_le_f32_e32 vcc, v19, v20 ; 7C062913
s_and_saveexec_b64 s[10:11], vcc ; BE8A246A
s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E
v_mov_b32_e32 v19, 0xc2800000 ; 7E2602FF C2800000
v_add_f32_e32 v19, v20, v19 ; 06262714
v_mul_f32_e32 v20, s8, v19 ; 10282608
v_floor_f32_e32 v20, v20 ; 7E284914
v_mul_f32_e32 v29, s8, v20 ; 103A2808
v_mad_f32 v28, v19, s8, -v20 ; D282001C 84501113
v_floor_f32_e32 v19, v29 ; 7E26491D
v_mad_f32 v29, v20, s8, -v19 ; D282001D 844C1114
v_add_f32_e32 v19, 4.0, v19 ; 062626F6
s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A
v_mov_b32_e32 v30, v27 ; 7E3C031B
s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E
v_mul_f32_e32 v19, s12, v25 ; 1026320C
v_floor_f32_e32 v20, v26 ; 7E28491A
v_subrev_f32_e32 v28, v20, v26 ; 0A383514
v_floor_f32_e32 v19, v19 ; 7E264913
v_mad_f32 v29, v25, s12, -v19 ; D282001D 844C1919
v_mov_b32_e32 v30, v16 ; 7E3C0310
s_or_b64 exec, exec, s[10:11] ; 88FE0A7E
s_buffer_load_dword s14, s[0:3], 0xb ; C207010B
s_buffer_load_dword s15, s[0:3], 0x54 ; C2078154
s_buffer_load_dword s9, s[0:3], 0x58 ; C2048158
v_mul_f32_e32 v41, s12, v17 ; 1052220C
v_floor_f32_e32 v40, v41 ; 7E504929
v_mov_b32_e32 v20, 0x42800000 ; 7E2802FF 42800000
v_cmp_le_f32_e32 vcc, v20, v17 ; 7C062314
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_saveexec_b64 s[10:11], vcc ; BE8A246A
s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E
v_mov_b32_e32 v20, 0xc2800000 ; 7E2802FF C2800000
v_add_f32_e32 v17, v17, v20 ; 06222911
v_mul_f32_e32 v20, s8, v17 ; 10282208
v_floor_f32_e32 v20, v20 ; 7E284914
v_mul_f32_e32 v26, s8, v20 ; 10342808
v_mad_f32 v25, v17, s8, -v20 ; D2820019 84501111
v_floor_f32_e32 v17, v26 ; 7E22491A
v_mad_f32 v26, v20, s8, -v17 ; D282001A 84441114
v_add_f32_e32 v20, 4.0, v17 ; 062822F6
s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A
v_mov_b32_e32 v42, s14 ; 7E54020E
v_mov_b32_e32 v33, s15 ; 7E42020F
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xc ; C207810C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 0 ; 05FB000F
s_buffer_load_dword s15, s[0:3], 0xd ; C207810D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 1 ; 05FB020F
s_buffer_load_dword s15, s[0:3], 0xe ; C207810E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 2 ; 05FB040F
s_buffer_load_dword s15, s[0:3], 0x14 ; C2078114
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 10 ; 05FB140F
s_buffer_load_dword s15, s[0:3], 0x15 ; C2078115
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 11 ; 05FB160F
s_buffer_load_dword s15, s[0:3], 0x16 ; C2078116
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 12 ; 05FB180F
s_buffer_load_dword s15, s[0:3], 0x18 ; C2078118
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 13 ; 05FB1A0F
s_buffer_load_dword s15, s[0:3], 0x19 ; C2078119
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 14 ; 05FB1C0F
s_buffer_load_dword s15, s[0:3], 0x1a ; C207811A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 15 ; 05FB1E0F
s_buffer_load_dword s15, s[0:3], 0x1c ; C207811C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 16 ; 05FB200F
s_buffer_load_dword s15, s[0:3], 0x1d ; C207811D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 17 ; 05FB220F
s_buffer_load_dword s15, s[0:3], 0x1e ; C207811E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 18 ; 05FB240F
s_buffer_load_dword s15, s[0:3], 0x20 ; C2078120
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 19 ; 05FB260F
s_buffer_load_dword s15, s[0:3], 0x21 ; C2078121
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 20 ; 05FB280F
s_buffer_load_dword s15, s[0:3], 0x22 ; C2078122
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 9 ; 05FB120F
s_buffer_load_dword s39, s[0:3], 0x5c ; C213815C
s_buffer_load_dword s42, s[0:3], 0x60 ; C2150160
s_buffer_load_dword s24, s[0:3], 0x64 ; C20C0164
s_buffer_load_dword s15, s[0:3], 0x68 ; C2078168
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 5 ; 05FB0A0F
s_buffer_load_dword s15, s[0:3], 0x69 ; C2078169
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 3 ; 05FB060F
s_buffer_load_dword s15, s[0:3], 0x6a ; C207816A
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 4 ; 05FB080F
s_buffer_load_dword s15, s[0:3], 0x6c ; C207816C
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 8 ; 05FB100F
s_buffer_load_dword s15, s[0:3], 0x6d ; C207816D
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s15, 6 ; 05FB0C0F
s_buffer_load_dword s0, s[0:3], 0x6e ; C200016E
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s0, 7 ; 05FB0E00
v_mov_b32_e32 v37, s9 ; 7E4A0209
s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E
v_mul_f32_e32 v17, s12, v40 ; 1022500C
v_floor_f32_e32 v20, v41 ; 7E284929
v_subrev_f32_e32 v25, v20, v41 ; 0A325314
v_floor_f32_e32 v20, v17 ; 7E284911
v_mad_f32 v26, v40, s12, -v20 ; D282001A 84501928
v_mov_b32_e32 v27, v16 ; 7E360310
s_or_b64 exec, exec, s[10:11] ; 88FE0A7E
v_mac_f32_e32 v42, s14, v12 ; 3E54180E
v_mul_f32_e32 v16, v39, v15 ; 10201F27
v_mul_f32_e32 v17, v39, v14 ; 10221D27
v_mul_f32_e32 v15, v39, v36 ; 101E4927
v_mac_f32_e32 v16, v22, v1 ; 3E200316
v_mul_f32_e32 v14, v22, v1 ; 101C0316
v_mac_f32_e32 v17, v22, v13 ; 3E221B16
v_mul_f32_e32 v13, v22, v13 ; 101A1B16
v_mac_f32_e32 v15, v22, v21 ; 3E1E2B16
v_mul_f32_e32 v12, v22, v21 ; 10182B16
v_add_f32_e64 v1, 0, v42 clamp ; D2060801 00025480
v_cmp_neq_f32_e64 s[16:17], s12, v38 ; D01A0010 00024C0C
v_cmp_eq_f32_e64 s[0:1], 4.0, v18 ; D0040000 000224F6
v_writelane_b32 v253, s0, 23 ; 05FB2E00
v_writelane_b32 v253, s1, 24 ; 05FB3001
v_cmp_eq_f32_e64 s[0:1], 2.0, v18 ; D0040000 000224F4
v_writelane_b32 v253, s0, 25 ; 05FB3200
v_writelane_b32 v253, s1, 26 ; 05FB3401
v_cmp_eq_f32_e64 s[0:1], 1.0, v18 ; D0040000 000224F2
v_writelane_b32 v253, s0, 27 ; 05FB3600
v_writelane_b32 v253, s1, 28 ; 05FB3801
v_cmp_eq_f32_e32 vcc, 0, v18 ; 7C042480
v_cmp_neq_f32_e64 s[14:15], s12, v30 ; D01A000E 00023C0C
v_subrev_f32_e32 v21, s13, v10 ; 0A2A140D
v_mul_f32_e32 v36, v21, v21 ; 10482B15
v_cmp_neq_f32_e64 s[12:13], s12, v27 ; D01A000C 0002360C
v_subrev_f32_e32 v21, s41, v11 ; 0A2A1629
v_mac_f32_e32 v36, v21, v21 ; 3E482B15
v_mov_b32_e32 v21, 0xbe4ccccd ; 7E2A02FF BE4CCCCD
v_mad_f32 v22, v35, v23, v21 ; D2820016 04562F23
v_mad_f32 v23, v35, v24, v21 ; D2820017 04563123
v_mac_f32_e32 v21, v35, v34 ; 3E2A4523
v_mov_b32_e32 v24, 0x40e00000 ; 7E3002FF 40E00000
v_mul_f32_e32 v22, v24, v22 ; 102C2D18
v_mul_f32_e32 v23, v24, v23 ; 102E2F18
v_mul_f32_e32 v21, v24, v21 ; 102A2B18
v_mov_b32_e32 v24, 0x3c23d70a ; 7E3002FF 3C23D70A
v_max_f32_e32 v22, v24, v22 ; 202C2D18
v_max_f32_e32 v23, v24, v23 ; 202E2F18
v_max_f32_e32 v21, v24, v21 ; 202A2B18
v_add_f32_e32 v24, v23, v22 ; 06302D17
v_add_f32_e32 v24, v21, v24 ; 06303115
v_rcp_f32_e32 v24, v24 ; 7E305518
v_subrev_f32_e32 v34, s40, v0 ; 0A440028
v_mac_f32_e32 v36, v34, v34 ; 3E484522
v_mul_f32_e32 v34, s24, v36 ; 10444818
v_log_f32_e32 v34, v34 ; 7E444F22
v_cmp_eq_f32_e64 s[0:1], 4.0, v19 ; D0040000 000226F6
v_writelane_b32 v253, s0, 21 ; 05FB2A00
v_writelane_b32 v253, s1, 22 ; 05FB2C01
v_cndmask_b32_e64 v35, v33, v37, s[16:17] ; D2000023 00424B21
v_rcp_f32_e32 v35, v35 ; 7E465523
v_mul_f32_e32 v34, 0x3f317218, v34 ; 104444FF 3F317218
v_mov_b32_e32 v39, 0x3b000000 ; 7E4E02FF 3B000000
v_mov_b32_e32 v40, 0x3b800000 ; 7E5002FF 3B800000
v_cndmask_b32_e64 v36, v40, v39, s[16:17] ; D2000024 00424F28
v_mul_f32_e32 v41, v35, v10 ; 10521523
v_floor_f32_e32 v41, v41 ; 7E524929
v_mad_f32 v41, v10, v35, -v41 ; D2820029 84A6470A
v_mul_f32_e32 v42, v35, v11 ; 10541723
v_floor_f32_e32 v42, v42 ; 7E54492A
v_mad_f32 v42, v11, v35, -v42 ; D282002A 84AA470B
v_mul_f32_e32 v43, v35, v0 ; 10560123
v_floor_f32_e32 v43, v43 ; 7E56492B
v_mad_f32 v35, v0, v35, -v43 ; D2820023 84AE4700
v_add_f32_e64 v43, s39, s39 ; D206002B 00004E27
v_mad_f32 v44, -v43, v36, 1.0 ; D282002C 23CA492B
v_mul_f32_e32 v45, s39, v36 ; 105A4827
v_mad_f32 v41, v44, v41, v45 ; D2820029 04B6532C
v_mad_f32 v42, v44, v42, v45 ; D282002A 04B6552C
v_mac_f32_e32 v45, v44, v35 ; 3E5A472C
v_mul_f32_e32 v36, s42, v34 ; 1048442A
v_mad_f32 v34, v38, v41, v31 ; D2820022 047E5326
v_mad_f32 v35, v38, v42, v32 ; D2820023 04825526
v_cndmask_b32_e64 v42, v33, v37, s[14:15] ; D200002A 003A4B21
v_rcp_f32_e32 v42, v42 ; 7E54552A
v_mac_f32_e32 v31, v38, v45 ; 3E3E5B26
v_mov_b32_e32 v44, v31 ; 7E58031F
v_mov_b32_e32 v45, v32 ; 7E5A0320
v_mov_b32_e32 v46, v33 ; 7E5C0321
v_mov_b32_e32 v47, v34 ; 7E5E0322
v_mac_f32_e32 v32, v38, v41 ; 3E405326
v_mul_f32_e32 v38, v42, v10 ; 104C152A
v_floor_f32_e32 v38, v38 ; 7E4C4926
v_mad_f32 v38, v10, v42, -v38 ; D2820026 849A550A
v_mul_f32_e32 v41, v42, v11 ; 1052172A
v_floor_f32_e32 v41, v41 ; 7E524929
v_mad_f32 v41, v11, v42, -v41 ; D2820029 84A6550B
v_mul_f32_e32 v45, v42, v0 ; 105A012A
v_floor_f32_e32 v45, v45 ; 7E5A492D
v_mad_f32 v42, v0, v42, -v45 ; D282002A 84B65500
v_cndmask_b32_e64 v33, v33, v37, s[12:13] ; D2000021 00324B21
v_cndmask_b32_e64 v37, v40, v39, s[14:15] ; D2000025 003A4F28
v_mad_f32 v45, -v43, v37, 1.0 ; D282002D 23CA4B2B
v_mul_f32_e32 v37, s39, v37 ; 104A4A27
v_mad_f32 v46, v45, v38, v37 ; D282002E 04964D2D
v_rcp_f32_e32 v33, v33 ; 7E425521
v_mad_f32 v38, v45, v41, v37 ; D2820026 0496532D
v_mac_f32_e32 v37, v45, v42 ; 3E4A552D
v_cndmask_b32_e64 v39, v40, v39, s[12:13] ; D2000027 00324F28
v_mul_f32_e32 v40, v33, v10 ; 10501521
v_floor_f32_e32 v40, v40 ; 7E504928
v_mad_f32 v40, v10, v33, -v40 ; D2820028 84A2430A
v_mul_f32_e32 v41, v33, v11 ; 10521721
v_floor_f32_e32 v41, v41 ; 7E524929
v_mad_f32 v41, v11, v33, -v41 ; D2820029 84A6430B
v_mul_f32_e32 v42, v33, v0 ; 10540121
v_floor_f32_e32 v42, v42 ; 7E54492A
v_mad_f32 v33, v0, v33, -v42 ; D2820021 84AA4300
v_mad_f32 v42, -v43, v39, 1.0 ; D282002A 23CA4F2B
v_mul_f32_e32 v39, s39, v39 ; 104E4E27
v_mad_f32 v40, v42, v40, v39 ; D2820028 049E512A
v_mad_f32 v41, v42, v41, v39 ; D2820029 049E532A
v_mac_f32_e32 v39, v42, v33 ; 3E4E432A
v_mad_f32 v48, v30, v46, v28 ; D2820030 04725D1E
v_mad_f32 v49, v30, v38, v29 ; D2820031 04764D1E
s_load_dwordx8 s[48:55], s[100:101], 0x48 ; C0D86548
s_load_dwordx4 s[96:99], s[4:5], 0x24 ; C0B00524
v_mac_f32_e32 v28, v30, v37 ; 3E384B1E
v_mad_f32 v37, v27, v40, v25 ; D2820025 0466511B
v_mad_f32 v38, v27, v41, v26 ; D2820026 046A531B
v_mac_f32_e32 v25, v27, v39 ; 3E324F1B
v_mov_b32_e32 v51, v28 ; 7E66031C
v_mov_b32_e32 v52, v29 ; 7E68031D
v_mov_b32_e32 v53, v30 ; 7E6A031E
v_mov_b32_e32 v54, v31 ; 7E6C031F
v_mac_f32_e32 v29, v30, v46 ; 3E3A5D1E
v_mov_b32_e32 v55, v25 ; 7E6E0319
v_mov_b32_e32 v56, v26 ; 7E70031A
v_mov_b32_e32 v57, v27 ; 7E72031B
v_mov_b32_e32 v58, v28 ; 7E74031C
v_mac_f32_e32 v26, v27, v40 ; 3E34511B
v_mov_b32_e32 v45, v35 ; 7E5A0323
v_mov_b32_e32 v50, v36 ; 7E640324
v_mov_b32_e32 v52, v49 ; 7E680331
v_mov_b32_e32 v39, v36 ; 7E4E0324
v_mov_b32_e32 v56, v38 ; 7E700326
v_mov_b32_e32 v46, v36 ; 7E5C0324
v_mov_b32_e32 v33, v36 ; 7E420324
v_mov_b32_e32 v53, v36 ; 7E6A0324
v_mov_b32_e32 v30, v36 ; 7E3C0324
v_mov_b32_e32 v57, v36 ; 7E720324
v_mov_b32_e32 v27, v36 ; 7E360324
s_load_dwordx4 s[32:35], s[4:5], 0x1c ; C090051C
s_load_dwordx8 s[16:23], s[100:101], 0x38 ; C0C86538
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample_l v[40:42], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[48:55], s[96:99] ; F0900700 030C2822
s_load_dwordx4 s[8:11], s[4:5], 0x14 ; C0840514
s_mov_b64 s[84:85], s[4:5] ; BED40404
v_writelane_b32 v253, s84, 29 ; 05FB3A54
v_writelane_b32 v253, s85, 30 ; 05FB3C55
s_load_dwordx8 s[0:7], s[100:101], 0x28 ; C0C06528
s_load_dwordx4 s[60:63], s[84:85], 0x20 ; C09E5520
s_load_dwordx4 s[36:39], s[84:85], 0xc ; C092550C
s_load_dwordx8 s[24:31], s[100:101], 0x18 ; C0CC6518
s_load_dwordx4 s[12:15], s[84:85], 0x4 ; C0865504
s_load_dwordx8 s[76:83], s[100:101], 0x40 ; C0E66540
s_load_dwordx8 s[40:47], s[100:101], 0x8 ; C0D46508
image_sample_l v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[16:23], s[32:35] ; F0900700 01043B22
s_load_dwordx4 s[64:67], s[84:85], 0x18 ; C0A05518
s_load_dwordx8 s[68:75], s[100:101], 0x30 ; C0E26530
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
image_sample_l v[62:64], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[0:7], s[8:11] ; F0900700 00403E22
s_load_dwordx4 s[56:59], s[84:85], 0x10 ; C09C5510
s_load_dwordx8 s[88:95], s[100:101], 0x20 ; C0EC6520
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v253, s88, 43 ; 05FB5658
v_writelane_b32 v253, s89, 44 ; 05FB5859
v_writelane_b32 v253, s90, 45 ; 05FB5A5A
v_writelane_b32 v253, s91, 46 ; 05FB5C5B
v_writelane_b32 v253, s92, 47 ; 05FB5E5C
v_writelane_b32 v253, s93, 48 ; 05FB605D
v_writelane_b32 v253, s94, 49 ; 05FB625E
v_writelane_b32 v253, s95, 50 ; 05FB645F
image_sample_l v[65:67], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[24:31], s[36:39] ; F0900700 01264122
s_load_dwordx4 s[88:91], s[84:85], 0x8 ; C0AC5508
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_writelane_b32 v253, s88, 31 ; 05FB3E58
v_writelane_b32 v253, s89, 32 ; 05FB4059
v_writelane_b32 v253, s90, 33 ; 05FB425A
v_writelane_b32 v253, s91, 34 ; 05FB445B
s_load_dwordx8 s[88:95], s[100:101], 0x10 ; C0EC6510
s_waitcnt lgkmcnt(0) ; BF8C007F
v_writelane_b32 v253, s88, 35 ; 05FB4658
v_writelane_b32 v253, s89, 36 ; 05FB4859
v_writelane_b32 v253, s90, 37 ; 05FB4A5A
v_writelane_b32 v253, s91, 38 ; 05FB4C5B
v_writelane_b32 v253, s92, 39 ; 05FB4E5C
v_writelane_b32 v253, s93, 40 ; 05FB505D
v_writelane_b32 v253, s94, 41 ; 05FB525E
v_writelane_b32 v253, s95, 42 ; 05FB545F
image_sample_l v[68:70], 7, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[40:47], s[12:15] ; F0900700 006A4422
image_sample_l v[71:73], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[48:55], s[96:99] ; F0900700 030C472C
image_sample_l v[74:76], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[16:23], s[32:35] ; F0900700 01044A2C
image_sample_l v[77:79], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[0:7], s[8:11] ; F0900700 00404D2C
image_sample_l v[80:82], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[24:31], s[36:39] ; F0900700 0126502C
image_sample_l v[83:85], 7, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[40:47], s[12:15] ; F0900700 006A532C
image_sample_l v[86:88], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[48:55], s[96:99] ; F0900700 030C561F
image_sample_l v[89:91], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[16:23], s[32:35] ; F0900700 0104591F
image_sample_l v[92:94], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[0:7], s[8:11] ; F0900700 00405C1F
image_sample_l v[95:97], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[24:31], s[36:39] ; F0900700 01265F1F
image_sample_l v[98:100], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[40:47], s[12:15] ; F0900700 006A621F
image_sample_l v[101:103], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[48:55], s[96:99] ; F0900700 030C6530
image_sample_l v[104:106], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[16:23], s[32:35] ; F0900700 01046830
image_sample_l v[107:109], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[0:7], s[8:11] ; F0900700 00406B30
image_sample_l v[110:112], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[24:31], s[36:39] ; F0900700 01266E30
image_sample_l v[113:115], 7, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[40:47], s[12:15] ; F0900700 006A7130
image_sample_l v[116:118], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[48:55], s[96:99] ; F0900700 030C7433
image_sample_l v[119:121], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[16:23], s[32:35] ; F0900700 01047733
image_sample_l v[122:124], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[0:7], s[8:11] ; F0900700 00407A33
image_sample_l v[125:127], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[24:31], s[36:39] ; F0900700 01267D33
image_sample_l v[128:130], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[40:47], s[12:15] ; F0900700 006A8033
image_sample_l v[131:133], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[48:55], s[96:99] ; F0900700 030C831C
image_sample_l v[134:136], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[16:23], s[32:35] ; F0900700 0104861C
image_sample_l v[137:139], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[0:7], s[8:11] ; F0900700 0040891C
image_sample_l v[140:142], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[24:31], s[36:39] ; F0900700 01268C1C
image_sample_l v[143:145], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[40:47], s[12:15] ; F0900700 006A8F1C
image_sample_l v[146:148], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[48:55], s[96:99] ; F0900700 030C9225
image_sample_l v[149:151], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[16:23], s[32:35] ; F0900700 01049525
image_sample_l v[152:154], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[0:7], s[8:11] ; F0900700 00409825
image_sample_l v[155:157], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[24:31], s[36:39] ; F0900700 01269B25
image_sample_l v[158:160], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[40:47], s[12:15] ; F0900700 006A9E25
image_sample_l v[161:163], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[48:55], s[96:99] ; F0900700 030CA137
s_load_dwordx4 s[84:87], s[84:85], 0x28 ; C0AA5528
s_load_dwordx8 s[88:95], s[100:101], 0x50 ; C0EC6550
image_sample_l v[164:166], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[16:23], s[32:35] ; F0900700 0104A437
image_sample_l v[167:169], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[0:7], s[8:11] ; F0900700 0040A737
image_sample_l v[170:172], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[24:31], s[36:39] ; F0900700 0126AA37
image_sample_l v[173:175], 7, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[40:47], s[12:15] ; F0900700 006AAD37
image_sample_l v[176:178], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[48:55], s[96:99] ; F0900700 030CB019
image_sample_l v[179:181], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[16:23], s[32:35] ; F0900700 0104B319
image_sample_l v[182:184], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[0:7], s[8:11] ; F0900700 0040B619
image_sample_l v[185:187], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[24:31], s[36:39] ; F0900700 0126B919
image_sample_l v[188:190], 7, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[40:47], s[12:15] ; F0900700 006ABC19
s_waitcnt vmcnt(9) lgkmcnt(0) ; BF8C0079
image_sample_l v[191:192], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[88:95], s[84:87] ; F0900A00 02B6BF2C
image_sample_l v[193:194], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[76:83], s[60:63] ; F0900A00 01F3C12C
image_sample_l v[195:196], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[68:75], s[64:67] ; F0900A00 0211C32C
v_readlane_b32 s12, v253, 43 ; 021957FD
v_readlane_b32 s13, v253, 44 ; 021B59FD
v_readlane_b32 s14, v253, 45 ; 021D5BFD
v_readlane_b32 s15, v253, 46 ; 021F5DFD
v_readlane_b32 s16, v253, 47 ; 02215FFD
v_readlane_b32 s17, v253, 48 ; 022361FD
v_readlane_b32 s18, v253, 49 ; 022563FD
v_readlane_b32 s19, v253, 50 ; 022765FD
s_nop 2 ; BF800002
image_sample_l v[197:198], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[12:19], s[56:59] ; F0900A00 01C3C52C
v_readlane_b32 s0, v253, 31 ; 02013FFD
v_readlane_b32 s1, v253, 32 ; 020341FD
v_readlane_b32 s2, v253, 33 ; 020543FD
v_readlane_b32 s3, v253, 34 ; 020745FD
s_nop 2 ; BF800002
v_readlane_b32 s4, v253, 35 ; 020947FD
v_readlane_b32 s5, v253, 36 ; 020B49FD
v_readlane_b32 s6, v253, 37 ; 020D4BFD
v_readlane_b32 s7, v253, 38 ; 020F4DFD
v_readlane_b32 s8, v253, 39 ; 02114FFD
v_readlane_b32 s9, v253, 40 ; 021351FD
v_readlane_b32 s10, v253, 41 ; 021553FD
v_readlane_b32 s11, v253, 42 ; 021755FD
s_nop 2 ; BF800002
image_sample_l v[43:44], 10, 0, 0, 0, 0, 0, 0, 0, v[44:47], s[4:11], s[0:3] ; F0900A00 00012B2C
image_sample_l v[45:46], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[88:95], s[84:87] ; F0900A00 02B62D1F
image_sample_l v[199:200], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[76:83], s[60:63] ; F0900A00 01F3C71F
image_sample_l v[201:202], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[68:75], s[64:67] ; F0900A00 0211C91F
image_sample_l v[203:204], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[12:19], s[56:59] ; F0900A00 01C3CB1F
s_mov_b32 s16, s12 ; BE90030C
s_mov_b32 s17, s13 ; BE91030D
s_mov_b32 s18, s14 ; BE92030E
s_mov_b32 s19, s15 ; BE93030F
s_mov_b32 s20, s16 ; BE940310
s_mov_b32 s21, s17 ; BE950311
s_mov_b32 s22, s18 ; BE960312
s_mov_b32 s23, s19 ; BE970313
image_sample_l v[31:32], 10, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[4:11], s[0:3] ; F0900A00 00011F1F
s_mov_b32 s8, s4 ; BE880304
s_mov_b32 s9, s5 ; BE890305
s_mov_b32 s10, s6 ; BE8A0306
s_mov_b32 s11, s7 ; BE8B0307
s_mov_b32 s12, s8 ; BE8C0308
s_mov_b32 s13, s9 ; BE8D0309
s_mov_b32 s14, s10 ; BE8E030A
s_mov_b32 s15, s11 ; BE8F030B
s_mov_b32 s4, s0 ; BE840300
s_mov_b32 s5, s1 ; BE850301
s_mov_b32 s6, s2 ; BE860302
s_mov_b32 s7, s3 ; BE870303
image_sample_l v[205:206], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[88:95], s[84:87] ; F0900A00 02B6CD22
image_sample_l v[207:208], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[76:83], s[60:63] ; F0900A00 01F3CF22
image_sample_l v[209:210], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[68:75], s[64:67] ; F0900A00 0211D122
image_sample_l v[211:212], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[16:23], s[56:59] ; F0900A00 01C4D322
image_sample_l v[33:34], 10, 0, 0, 0, 0, 0, 0, 0, v[34:37], s[8:15], s[4:7] ; F0900A00 00222122
image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[88:95], s[84:87] ; F0900A00 02B62333
image_sample_l v[213:214], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[76:83], s[60:63] ; F0900A00 01F3D533
image_sample_l v[215:216], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[68:75], s[64:67] ; F0900A00 0211D733
image_sample_l v[217:218], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[16:23], s[56:59] ; F0900A00 01C4D933
image_sample_l v[51:52], 10, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[8:15], s[4:7] ; F0900A00 00223333
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[53:54], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[88:95], s[84:87] ; F0900A00 02B6351C
image_sample_l v[219:220], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[76:83], s[60:63] ; F0900A00 01F3DB1C
image_sample_l v[221:222], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[68:75], s[64:67] ; F0900A00 0211DD1C
image_sample_l v[223:224], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[16:23], s[56:59] ; F0900A00 01C4DF1C
image_sample_l v[28:29], 10, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[8:15], s[4:7] ; F0900A00 00221C1C
s_waitcnt vmcnt(5) ; BF8C0775
image_sample_l v[225:226], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[88:95], s[84:87] ; F0900A00 02B6E130
image_sample_l v[227:228], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[76:83], s[60:63] ; F0900A00 01F3E330
image_sample_l v[229:230], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[68:75], s[64:67] ; F0900A00 0211E530
image_sample_l v[231:232], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[16:23], s[56:59] ; F0900A00 01C4E730
image_sample_l v[47:48], 10, 0, 0, 0, 0, 0, 0, 0, v[48:51], s[8:15], s[4:7] ; F0900A00 00222F30
image_sample_l v[49:50], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[88:95], s[84:87] ; F0900A00 02B63137
image_sample_l v[233:234], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[76:83], s[60:63] ; F0900A00 01F3E937
image_sample_l v[235:236], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[68:75], s[64:67] ; F0900A00 0211EB37
image_sample_l v[237:238], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[16:23], s[56:59] ; F0900A00 01C4ED37
image_sample_l v[55:56], 10, 0, 0, 0, 0, 0, 0, 0, v[55:58], s[8:15], s[4:7] ; F0900A00 00223737
s_waitcnt vmcnt(10) ; BF8C077A
image_sample_l v[57:58], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[88:95], s[84:87] ; F0900A00 02B63919
image_sample_l v[239:240], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[76:83], s[60:63] ; F0900A00 01F3EF19
image_sample_l v[241:242], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[68:75], s[64:67] ; F0900A00 0211F119
image_sample_l v[243:244], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[16:23], s[56:59] ; F0900A00 01C4F319
image_sample_l v[25:26], 10, 0, 0, 0, 0, 0, 0, 0, v[25:28], s[8:15], s[4:7] ; F0900A00 00221919
image_sample_l v[245:246], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[88:95], s[84:87] ; F0900A00 02B6F525
image_sample_l v[247:248], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[76:83], s[60:63] ; F0900A00 01F3F725
image_sample_l v[249:250], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[68:75], s[64:67] ; F0900A00 0211F925
image_sample_l v[251:252], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[16:23], s[56:59] ; F0900A00 01C4FB25
v_readlane_b32 s0, v253, 27 ; 020137FD
v_readlane_b32 s1, v253, 28 ; 020339FD
s_nop 2 ; BF800002
v_cndmask_b32_e64 v27, 0, 1.0, s[0:1] ; D200001B 0001E480
v_cndmask_b32_e64 v30, 0, 1.0, vcc ; D200001E 01A9E480
v_mul_f32_e32 v68, v30, v68 ; 1088891E
v_mul_f32_e32 v69, v30, v69 ; 108A8B1E
v_mul_f32_e32 v70, v30, v70 ; 108C8D1E
v_mac_f32_e32 v68, v27, v65 ; 3E88831B
v_mac_f32_e32 v69, v27, v66 ; 3E8A851B
v_mac_f32_e32 v70, v27, v67 ; 3E8C871B
v_readlane_b32 s0, v253, 25 ; 020133FD
v_readlane_b32 s1, v253, 26 ; 020335FD
s_nop 2 ; BF800002
v_cndmask_b32_e64 v65, 0, 1.0, s[0:1] ; D2000041 0001E480
v_mac_f32_e32 v68, v65, v62 ; 3E887D41
v_mac_f32_e32 v69, v65, v63 ; 3E8A7F41
v_mac_f32_e32 v70, v65, v64 ; 3E8C8141
v_mov_b32_e32 v62, 0x40400000 ; 7E7C02FF 40400000
v_cmp_eq_f32_e64 s[2:3], v18, v62 ; D0040002 00027D12
v_cndmask_b32_e64 v18, 0, 1.0, s[2:3] ; D2000012 0009E480
v_mac_f32_e32 v68, v18, v59 ; 3E887712
v_mac_f32_e32 v69, v18, v60 ; 3E8A7912
v_mac_f32_e32 v70, v18, v61 ; 3E8C7B12
v_readlane_b32 s0, v253, 23 ; 02012FFD
v_readlane_b32 s1, v253, 24 ; 020331FD
s_nop 2 ; BF800002
v_cndmask_b32_e64 v59, 0, 1.0, s[0:1] ; D200003B 0001E480
v_mac_f32_e32 v68, v59, v40 ; 3E88513B
v_mac_f32_e32 v69, v59, v41 ; 3E8A533B
v_mac_f32_e32 v70, v59, v42 ; 3E8C553B
v_mul_f32_e32 v40, v30, v83 ; 1050A71E
v_mul_f32_e32 v41, v30, v84 ; 1052A91E
v_mul_f32_e32 v42, v30, v85 ; 1054AB1E
v_mac_f32_e32 v40, v27, v80 ; 3E50A11B
v_mac_f32_e32 v41, v27, v81 ; 3E52A31B
v_mac_f32_e32 v42, v27, v82 ; 3E54A51B
v_mac_f32_e32 v40, v65, v77 ; 3E509B41
v_mac_f32_e32 v41, v65, v78 ; 3E529D41
v_mac_f32_e32 v42, v65, v79 ; 3E549F41
v_mac_f32_e32 v40, v18, v74 ; 3E509512
v_mac_f32_e32 v41, v18, v75 ; 3E529712
v_mac_f32_e32 v42, v18, v76 ; 3E549912
v_mac_f32_e32 v40, v59, v71 ; 3E508F3B
v_mac_f32_e32 v41, v59, v72 ; 3E52913B
v_mac_f32_e32 v42, v59, v73 ; 3E54933B
v_mul_f32_e32 v60, v30, v98 ; 1078C51E
v_mul_f32_e32 v61, v30, v99 ; 107AC71E
v_mul_f32_e32 v63, v30, v100 ; 107EC91E
v_mac_f32_e32 v60, v27, v95 ; 3E78BF1B
v_mac_f32_e32 v61, v27, v96 ; 3E7AC11B
v_mac_f32_e32 v63, v27, v97 ; 3E7EC31B
v_mac_f32_e32 v60, v65, v92 ; 3E78B941
v_mac_f32_e32 v61, v65, v93 ; 3E7ABB41
v_mac_f32_e32 v63, v65, v94 ; 3E7EBD41
v_mac_f32_e32 v60, v18, v89 ; 3E78B312
v_mac_f32_e32 v61, v18, v90 ; 3E7AB512
v_mac_f32_e32 v63, v18, v91 ; 3E7EB712
v_mac_f32_e32 v60, v59, v86 ; 3E78AD3B
v_mac_f32_e32 v61, v59, v87 ; 3E7AAF3B
v_mac_f32_e32 v63, v59, v88 ; 3E7EB13B
v_cmp_eq_f32_e64 s[0:1], 0, v19 ; D0040000 00022680
v_cndmask_b32_e64 v64, 0, 1.0, s[0:1] ; D2000040 0001E480
v_mul_f32_e32 v66, v64, v113 ; 1084E340
v_mul_f32_e32 v67, v64, v114 ; 1086E540
v_mul_f32_e32 v71, v64, v115 ; 108EE740
v_cmp_eq_f32_e64 s[0:1], 1.0, v19 ; D0040000 000226F2
v_cndmask_b32_e64 v72, 0, 1.0, s[0:1] ; D2000048 0001E480
v_mac_f32_e32 v66, v72, v110 ; 3E84DD48
v_mac_f32_e32 v67, v72, v111 ; 3E86DF48
v_mac_f32_e32 v71, v72, v112 ; 3E8EE148
v_cmp_eq_f32_e64 s[0:1], 2.0, v19 ; D0040000 000226F4
v_cndmask_b32_e64 v73, 0, 1.0, s[0:1] ; D2000049 0001E480
v_mac_f32_e32 v66, v73, v107 ; 3E84D749
v_mac_f32_e32 v67, v73, v108 ; 3E86D949
v_mac_f32_e32 v71, v73, v109 ; 3E8EDB49
v_cmp_eq_f32_e64 s[0:1], v19, v62 ; D0040000 00027D13
v_cndmask_b32_e64 v19, 0, 1.0, s[0:1] ; D2000013 0001E480
v_mac_f32_e32 v66, v19, v104 ; 3E84D113
v_mac_f32_e32 v67, v19, v105 ; 3E86D313
v_mac_f32_e32 v71, v19, v106 ; 3E8ED513
v_readlane_b32 s0, v253, 21 ; 02012BFD
v_readlane_b32 s1, v253, 22 ; 02032DFD
s_nop 2 ; BF800002
v_cndmask_b32_e64 v74, 0, 1.0, s[0:1] ; D200004A 0001E480
v_mac_f32_e32 v66, v74, v101 ; 3E84CB4A
v_mac_f32_e32 v67, v74, v102 ; 3E86CD4A
v_mac_f32_e32 v71, v74, v103 ; 3E8ECF4A
v_mul_f32_e32 v75, v64, v128 ; 10970140
v_mul_f32_e32 v76, v64, v129 ; 10990340
v_mul_f32_e32 v77, v64, v130 ; 109B0540
v_mac_f32_e32 v75, v72, v125 ; 3E96FB48
v_mac_f32_e32 v76, v72, v126 ; 3E98FD48
v_mac_f32_e32 v77, v72, v127 ; 3E9AFF48
v_mac_f32_e32 v75, v73, v122 ; 3E96F549
v_mac_f32_e32 v76, v73, v123 ; 3E98F749
v_mac_f32_e32 v77, v73, v124 ; 3E9AF949
v_mac_f32_e32 v75, v19, v119 ; 3E96EF13
v_mac_f32_e32 v76, v19, v120 ; 3E98F113
v_mac_f32_e32 v77, v19, v121 ; 3E9AF313
v_mac_f32_e32 v75, v74, v116 ; 3E96E94A
v_mac_f32_e32 v76, v74, v117 ; 3E98EB4A
v_mac_f32_e32 v77, v74, v118 ; 3E9AED4A
v_mul_f32_e32 v78, v64, v143 ; 109D1F40
v_mul_f32_e32 v79, v64, v144 ; 109F2140
v_mul_f32_e32 v80, v64, v145 ; 10A12340
v_mac_f32_e32 v78, v72, v140 ; 3E9D1948
v_mac_f32_e32 v79, v72, v141 ; 3E9F1B48
v_mac_f32_e32 v80, v72, v142 ; 3EA11D48
v_mac_f32_e32 v78, v73, v137 ; 3E9D1349
v_mac_f32_e32 v79, v73, v138 ; 3E9F1549
v_mac_f32_e32 v80, v73, v139 ; 3EA11749
v_mac_f32_e32 v78, v19, v134 ; 3E9D0D13
v_mac_f32_e32 v79, v19, v135 ; 3E9F0F13
v_mac_f32_e32 v80, v19, v136 ; 3EA11113
v_mac_f32_e32 v78, v74, v131 ; 3E9D074A
v_mac_f32_e32 v79, v74, v132 ; 3E9F094A
v_mac_f32_e32 v80, v74, v133 ; 3EA10B4A
v_cmp_eq_f32_e32 vcc, 0, v20 ; 7C042880
v_cndmask_b32_e64 v81, 0, 1.0, vcc ; D2000051 01A9E480
v_mul_f32_e32 v82, v81, v158 ; 10A53D51
v_mul_f32_e32 v83, v81, v159 ; 10A73F51
v_mul_f32_e32 v84, v81, v160 ; 10A94151
v_cmp_eq_f32_e32 vcc, 1.0, v20 ; 7C0428F2
v_cndmask_b32_e64 v85, 0, 1.0, vcc ; D2000055 01A9E480
v_mac_f32_e32 v82, v85, v155 ; 3EA53755
v_mac_f32_e32 v83, v85, v156 ; 3EA73955
v_mac_f32_e32 v84, v85, v157 ; 3EA93B55
v_cmp_eq_f32_e32 vcc, 2.0, v20 ; 7C0428F4
v_cndmask_b32_e64 v86, 0, 1.0, vcc ; D2000056 01A9E480
v_mac_f32_e32 v82, v86, v152 ; 3EA53156
v_mac_f32_e32 v83, v86, v153 ; 3EA73356
v_mac_f32_e32 v84, v86, v154 ; 3EA93556
v_cmp_eq_f32_e32 vcc, v20, v62 ; 7C047D14
v_cndmask_b32_e64 v87, 0, 1.0, vcc ; D2000057 01A9E480
v_mac_f32_e32 v82, v87, v149 ; 3EA52B57
v_mac_f32_e32 v83, v87, v150 ; 3EA72D57
v_mac_f32_e32 v84, v87, v151 ; 3EA92F57
v_cmp_eq_f32_e32 vcc, 4.0, v20 ; 7C0428F6
v_cndmask_b32_e64 v20, 0, 1.0, vcc ; D2000014 01A9E480
v_mac_f32_e32 v82, v20, v146 ; 3EA52514
v_mac_f32_e32 v83, v20, v147 ; 3EA72714
v_mac_f32_e32 v84, v20, v148 ; 3EA92914
v_mul_f32_e32 v88, v81, v173 ; 10B15B51
v_mul_f32_e32 v89, v81, v174 ; 10B35D51
v_mul_f32_e32 v90, v81, v175 ; 10B55F51
v_mac_f32_e32 v88, v85, v170 ; 3EB15555
v_mac_f32_e32 v89, v85, v171 ; 3EB35755
v_mac_f32_e32 v90, v85, v172 ; 3EB55955
v_mac_f32_e32 v88, v86, v167 ; 3EB14F56
v_mac_f32_e32 v89, v86, v168 ; 3EB35156
v_mac_f32_e32 v90, v86, v169 ; 3EB55356
v_mac_f32_e32 v88, v87, v164 ; 3EB14957
v_mac_f32_e32 v89, v87, v165 ; 3EB34B57
v_mac_f32_e32 v90, v87, v166 ; 3EB54D57
v_mac_f32_e32 v88, v20, v161 ; 3EB14314
v_mac_f32_e32 v89, v20, v162 ; 3EB34514
v_mac_f32_e32 v90, v20, v163 ; 3EB54714
v_mul_f32_e32 v91, v81, v188 ; 10B77951
v_mul_f32_e32 v92, v81, v189 ; 10B97B51
v_mul_f32_e32 v93, v81, v190 ; 10BB7D51
v_mac_f32_e32 v91, v85, v185 ; 3EB77355
v_mac_f32_e32 v92, v85, v186 ; 3EB97555
v_mac_f32_e32 v93, v85, v187 ; 3EBB7755
v_mac_f32_e32 v91, v86, v182 ; 3EB76D56
v_mac_f32_e32 v92, v86, v183 ; 3EB96F56
v_mac_f32_e32 v93, v86, v184 ; 3EBB7156
v_mac_f32_e32 v91, v87, v179 ; 3EB76757
v_mac_f32_e32 v92, v87, v180 ; 3EB96957
v_mac_f32_e32 v93, v87, v181 ; 3EBB6B57
v_mac_f32_e32 v91, v20, v176 ; 3EB76114
v_mac_f32_e32 v92, v20, v177 ; 3EB96314
v_mac_f32_e32 v93, v20, v178 ; 3EBB6514
v_mul_f32_e32 v43, v30, v43 ; 1056571E
v_mul_f32_e32 v44, v30, v44 ; 1058591E
v_mac_f32_e32 v43, v27, v197 ; 3E578B1B
v_mac_f32_e32 v44, v27, v198 ; 3E598D1B
v_mul_f32_e32 v31, v30, v31 ; 103E3F1E
v_mul_f32_e32 v32, v30, v32 ; 1040411E
v_mac_f32_e32 v31, v27, v203 ; 3E3F971B
v_mac_f32_e32 v32, v27, v204 ; 3E41991B
v_mul_f32_e32 v33, v30, v33 ; 1042431E
v_mul_f32_e32 v30, v30, v34 ; 103C451E
v_mac_f32_e32 v33, v27, v211 ; 3E43A71B
v_mac_f32_e32 v30, v27, v212 ; 3E3DA91B
v_mac_f32_e32 v43, v65, v195 ; 3E578741
v_mac_f32_e32 v44, v65, v196 ; 3E598941
v_mac_f32_e32 v31, v65, v201 ; 3E3F9341
v_mac_f32_e32 v32, v65, v202 ; 3E419541
v_mac_f32_e32 v33, v65, v209 ; 3E43A341
v_mac_f32_e32 v30, v65, v210 ; 3E3DA541
v_mac_f32_e32 v43, v18, v193 ; 3E578312
v_mac_f32_e32 v44, v18, v194 ; 3E598512
v_mac_f32_e32 v31, v18, v199 ; 3E3F8F12
v_mac_f32_e32 v32, v18, v200 ; 3E419112
v_mac_f32_e32 v33, v18, v207 ; 3E439F12
v_mac_f32_e32 v30, v18, v208 ; 3E3DA112
v_mac_f32_e32 v43, v59, v191 ; 3E577F3B
v_mac_f32_e32 v44, v59, v192 ; 3E59813B
v_mac_f32_e32 v31, v59, v45 ; 3E3E5B3B
v_mac_f32_e32 v32, v59, v46 ; 3E405D3B
v_mac_f32_e32 v33, v59, v205 ; 3E439B3B
v_mac_f32_e32 v30, v59, v206 ; 3E3D9D3B
v_mul_f32_e32 v18, v64, v51 ; 10246740
v_mul_f32_e32 v27, v64, v52 ; 10366940
v_mac_f32_e32 v18, v72, v217 ; 3E25B348
v_mac_f32_e32 v27, v72, v218 ; 3E37B548
v_mul_f32_e32 v28, v64, v28 ; 10383940
v_mul_f32_e32 v29, v64, v29 ; 103A3B40
v_mac_f32_e32 v28, v72, v223 ; 3E39BF48
v_mac_f32_e32 v29, v72, v224 ; 3E3BC148
s_waitcnt vmcnt(14) ; BF8C077E
v_mul_f32_e32 v34, v64, v47 ; 10445F40
v_mul_f32_e32 v45, v64, v48 ; 105A6140
v_mac_f32_e32 v34, v72, v231 ; 3E45CF48
v_mac_f32_e32 v45, v72, v232 ; 3E5BD148
v_mac_f32_e32 v18, v73, v215 ; 3E25AF49
v_mac_f32_e32 v27, v73, v216 ; 3E37B149
v_mac_f32_e32 v28, v73, v221 ; 3E39BB49
v_mac_f32_e32 v29, v73, v222 ; 3E3BBD49
v_mac_f32_e32 v34, v73, v229 ; 3E45CB49
v_mac_f32_e32 v45, v73, v230 ; 3E5BCD49
v_mac_f32_e32 v18, v19, v213 ; 3E25AB13
v_mac_f32_e32 v27, v19, v214 ; 3E37AD13
v_mac_f32_e32 v28, v19, v219 ; 3E39B713
v_mac_f32_e32 v29, v19, v220 ; 3E3BB913
v_mac_f32_e32 v34, v19, v227 ; 3E45C713
v_mac_f32_e32 v45, v19, v228 ; 3E5BC913
v_mac_f32_e32 v18, v74, v35 ; 3E24474A
v_mac_f32_e32 v27, v74, v36 ; 3E36494A
v_mac_f32_e32 v28, v74, v53 ; 3E386B4A
v_mac_f32_e32 v29, v74, v54 ; 3E3A6D4A
v_mac_f32_e32 v34, v74, v225 ; 3E45C34A
v_mac_f32_e32 v45, v74, v226 ; 3E5BC54A
image_sample_l v[35:36], 10, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[8:15], s[4:7] ; F0900A00 00222325
s_waitcnt vmcnt(10) ; BF8C077A
v_mul_f32_e32 v19, v81, v55 ; 10266F51
v_mul_f32_e32 v37, v81, v56 ; 104A7151
v_mac_f32_e32 v19, v85, v237 ; 3E27DB55
v_mac_f32_e32 v37, v85, v238 ; 3E4BDD55
s_waitcnt vmcnt(5) ; BF8C0775
v_mul_f32_e32 v25, v81, v25 ; 10323351
v_mul_f32_e32 v26, v81, v26 ; 10343551
v_mac_f32_e32 v25, v85, v243 ; 3E33E755
v_mac_f32_e32 v26, v85, v244 ; 3E35E955
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v35, v81, v35 ; 10464751
v_mul_f32_e32 v36, v81, v36 ; 10484951
v_mac_f32_e32 v35, v85, v251 ; 3E47F755
v_mac_f32_e32 v36, v85, v252 ; 3E49F955
v_mac_f32_e32 v19, v86, v235 ; 3E27D756
v_mac_f32_e32 v37, v86, v236 ; 3E4BD956
v_mac_f32_e32 v25, v86, v241 ; 3E33E356
v_mac_f32_e32 v26, v86, v242 ; 3E35E556
v_mac_f32_e32 v35, v86, v249 ; 3E47F356
v_mac_f32_e32 v36, v86, v250 ; 3E49F556
v_mac_f32_e32 v19, v87, v233 ; 3E27D357
v_mac_f32_e32 v37, v87, v234 ; 3E4BD557
v_mac_f32_e32 v25, v87, v239 ; 3E33DF57
v_mac_f32_e32 v26, v87, v240 ; 3E35E157
v_mac_f32_e32 v35, v87, v247 ; 3E47EF57
v_mac_f32_e32 v36, v87, v248 ; 3E49F157
v_mac_f32_e32 v19, v20, v49 ; 3E266314
v_mac_f32_e32 v37, v20, v50 ; 3E4A6514
v_mac_f32_e32 v25, v20, v57 ; 3E327314
v_mac_f32_e32 v26, v20, v58 ; 3E347514
v_mac_f32_e32 v35, v20, v245 ; 3E47EB14
v_mac_f32_e32 v36, v20, v246 ; 3E49ED14
v_mul_f32_e32 v20, v24, v22 ; 10282D18
v_mul_f32_e32 v22, v24, v23 ; 102C2F18
v_mul_f32_e32 v21, v24, v21 ; 102A2B18
v_mul_f32_e32 v23, v21, v66 ; 102E8515
v_mac_f32_e32 v23, v20, v75 ; 3E2E9714
v_mul_f32_e32 v24, v21, v67 ; 10308715
v_mac_f32_e32 v24, v20, v76 ; 3E309914
v_mul_f32_e32 v38, v21, v71 ; 104C8F15
v_mac_f32_e32 v38, v20, v77 ; 3E4C9B14
v_mac_f32_e32 v23, v22, v78 ; 3E2E9D16
v_mac_f32_e32 v24, v22, v79 ; 3E309F16
v_mac_f32_e32 v38, v22, v80 ; 3E4CA116
v_mul_f32_e32 v39, v21, v68 ; 104E8915
v_mac_f32_e32 v39, v20, v40 ; 3E4E5114
v_mul_f32_e32 v40, v21, v69 ; 10508B15
v_mac_f32_e32 v40, v20, v41 ; 3E505314
v_mul_f32_e32 v41, v21, v70 ; 10528D15
v_mac_f32_e32 v41, v20, v42 ; 3E525514
v_mac_f32_e32 v39, v22, v60 ; 3E4E7916
v_mac_f32_e32 v40, v22, v61 ; 3E507B16
v_mac_f32_e32 v41, v22, v63 ; 3E527F16
v_mul_f32_e32 v39, v39, v8 ; 104E1127
v_mac_f32_e32 v39, v23, v9 ; 3E4E1317
v_mul_f32_e32 v23, v40, v8 ; 102E1128
v_mac_f32_e32 v23, v24, v9 ; 3E2E1318
v_mul_f32_e32 v24, v41, v8 ; 10301129
v_mac_f32_e32 v24, v38, v9 ; 3E301326
v_mad_f32 v38, 2.0, v43, -1.0 ; D2820026 03CE56F4
v_mad_f32 v32, 2.0, v32, -1.0 ; D2820020 03CE40F4
v_mul_f32_e32 v38, v20, v38 ; 104C4D14
v_mac_f32_e32 v38, v22, v32 ; 3E4C4116
v_mad_f32 v31, 2.0, v31, -1.0 ; D282001F 03CE3EF4
v_mad_f32 v30, 2.0, v30, -1.0 ; D282001E 03CE3CF4
v_mul_f32_e32 v32, 0, v20 ; 10402880
v_mad_f32 v31, v22, v31, v32 ; D282001F 04823F16
v_mac_f32_e32 v31, v21, v30 ; 3E3E3D15
v_mad_f32 v30, 2.0, v44, -1.0 ; D282001E 03CE58F4
v_mad_f32 v33, 2.0, v33, -1.0 ; D2820021 03CE42F4
v_mul_f32_e32 v30, v20, v30 ; 103C3D14
v_mac_f32_e32 v30, 0, v22 ; 3E3C2C80
v_mac_f32_e32 v30, v21, v33 ; 3E3C4315
v_mad_f32 v18, 2.0, v18, -1.0 ; D2820012 03CE24F4
v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4
v_mul_f32_e32 v18, v20, v18 ; 10242514
v_mac_f32_e32 v18, v22, v29 ; 3E243B16
v_mad_f32 v28, 2.0, v28, -1.0 ; D282001C 03CE38F4
v_mad_f32 v29, 2.0, v45, -1.0 ; D282001D 03CE5AF4
v_mad_f32 v28, v22, v28, v32 ; D282001C 04823916
v_mac_f32_e32 v28, v21, v29 ; 3E383B15
v_mad_f32 v27, 2.0, v27, -1.0 ; D282001B 03CE36F4
v_mad_f32 v29, 2.0, v34, -1.0 ; D282001D 03CE44F4
v_mul_f32_e32 v27, v20, v27 ; 10363714
v_mac_f32_e32 v27, 0, v22 ; 3E362C80
v_mac_f32_e32 v27, v21, v29 ; 3E363B15
v_mac_f32_e32 v38, 0, v21 ; 3E4C2A80
v_mul_f32_e32 v29, v31, v8 ; 103A111F
v_mul_f32_e32 v30, v30, v8 ; 103C111E
v_mul_f32_e32 v8, v38, v8 ; 10101126
v_mac_f32_e32 v29, v28, v9 ; 3E3A131C
v_mac_f32_e32 v30, v27, v9 ; 3E3C131B
v_mac_f32_e32 v18, 0, v21 ; 3E242A80
v_mac_f32_e32 v8, v18, v9 ; 3E101312
v_mul_f32_e32 v9, v21, v82 ; 1012A515
v_mac_f32_e32 v9, v20, v88 ; 3E12B114
v_mul_f32_e32 v18, v21, v83 ; 1024A715
v_mac_f32_e32 v18, v20, v89 ; 3E24B314
v_mul_f32_e32 v27, v21, v84 ; 1036A915
v_mac_f32_e32 v27, v20, v90 ; 3E36B514
v_mac_f32_e32 v9, v22, v91 ; 3E12B716
v_mac_f32_e32 v18, v22, v92 ; 3E24B916
v_mac_f32_e32 v27, v22, v93 ; 3E36BB16
v_mac_f32_e32 v39, v9, v4 ; 3E4E0909
v_mac_f32_e32 v23, v18, v4 ; 3E2E0912
v_mad_f32 v9, 2.0, v37, -1.0 ; D2820009 03CE4AF4
v_mad_f32 v18, 2.0, v19, -1.0 ; D2820012 03CE26F4
v_mul_f32_e32 v9, v20, v9 ; 10121314
v_mul_f32_e32 v18, v20, v18 ; 10242514
v_mad_f32 v19, 2.0, v25, -1.0 ; D2820013 03CE32F4
v_mac_f32_e32 v32, v22, v19 ; 3E402716
v_mad_f32 v19, 2.0, v26, -1.0 ; D2820013 03CE34F4
v_mac_f32_e32 v18, v22, v19 ; 3E242716
v_mac_f32_e32 v9, 0, v22 ; 3E122C80
v_mad_f32 v19, 2.0, v36, -1.0 ; D2820013 03CE48F4
v_mac_f32_e32 v32, v21, v19 ; 3E402715
v_mad_f32 v19, 2.0, v35, -1.0 ; D2820013 03CE46F4
v_mac_f32_e32 v9, v21, v19 ; 3E122715
v_mac_f32_e32 v18, 0, v21 ; 3E242A80
v_mac_f32_e32 v29, v32, v4 ; 3E3A0920
v_mac_f32_e32 v30, v9, v4 ; 3E3C0909
v_mac_f32_e32 v8, v18, v4 ; 3E100912
v_readlane_b32 s0, v253, 10 ; 020115FD
s_nop 2 ; BF800002
v_mul_f32_e32 v9, s0, v10 ; 10121400
v_readlane_b32 s0, v253, 11 ; 020117FD
s_nop 2 ; BF800002
v_mul_f32_e32 v18, s0, v10 ; 10241400
v_readlane_b32 s0, v253, 12 ; 020119FD
s_nop 2 ; BF800002
v_mul_f32_e32 v10, s0, v10 ; 10141400
v_readlane_b32 s0, v253, 13 ; 02011BFD
s_nop 2 ; BF800002
v_mac_f32_e32 v9, s0, v11 ; 3E121600
v_readlane_b32 s0, v253, 14 ; 02011DFD
s_nop 2 ; BF800002
v_mac_f32_e32 v18, s0, v11 ; 3E241600
v_readlane_b32 s0, v253, 15 ; 02011FFD
s_nop 2 ; BF800002
v_mac_f32_e32 v10, s0, v11 ; 3E141600
v_readlane_b32 s0, v253, 16 ; 020121FD
s_nop 2 ; BF800002
v_mac_f32_e32 v9, s0, v0 ; 3E120000
v_readlane_b32 s0, v253, 17 ; 020123FD
s_nop 2 ; BF800002
v_mac_f32_e32 v18, s0, v0 ; 3E240000
v_readlane_b32 s0, v253, 18 ; 020125FD
s_nop 2 ; BF800002
v_mac_f32_e32 v10, s0, v0 ; 3E140000
v_readlane_b32 s0, v253, 19 ; 020127FD
s_nop 2 ; BF800002
v_add_f32_e32 v0, s0, v9 ; 06001200
v_readlane_b32 s0, v253, 20 ; 020129FD
s_nop 2 ; BF800002
v_add_f32_e32 v9, s0, v18 ; 06122400
v_readlane_b32 s0, v253, 29 ; 02013BFD
v_readlane_b32 s1, v253, 30 ; 02033DFD
s_nop 2 ; BF800002
s_load_dwordx4 s[0:3], s[0:1], 0x0 ; C0800100
s_load_dwordx8 s[4:11], s[100:101], 0x0 ; C0C26500
v_mul_f32_e32 v18, v0, v0 ; 10240100
v_mac_f32_e32 v18, v9, v9 ; 3E241309
v_readlane_b32 s12, v253, 9 ; 021913FD
s_nop 2 ; BF800002
v_add_f32_e32 v0, s12, v10 ; 0600140C
v_mul_f32_e32 v9, v30, v30 ; 10123D1E
v_mac_f32_e32 v9, v29, v29 ; 3E123B1D
v_mac_f32_e32 v9, v8, v8 ; 3E121108
v_add_f32_e32 v9, 1.0, v9 ; 061212F2
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mac_f32_e32 v18, v0, v0 ; 3E240100
v_mac_f32_e32 v24, v27, v4 ; 3E30091B
v_mov_b32_e32 v19, v18 ; 7E260312
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[4:11], s[0:3] ; F0800800 00010012
v_mul_f32_e32 v4, v9, v29 ; 10083B09
v_mul_f32_e32 v10, v9, v30 ; 10143D09
v_mul_f32_e32 v8, v9, v8 ; 10101109
v_mad_f32 v4, -v4, v3, v5 ; D2820004 24160704
v_mad_f32 v5, -v10, v3, v6 ; D2820005 241A070A
v_mad_f32 v3, -v8, v3, v7 ; D2820003 241E0708
v_mul_f32_e32 v6, v4, v4 ; 100C0904
v_mac_f32_e32 v6, v5, v5 ; 3E0C0B05
v_mac_f32_e32 v6, v3, v3 ; 3E0C0703
v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906
v_mul_f32_e32 v7, v16, v16 ; 100E2110
v_mac_f32_e32 v7, v17, v17 ; 3E0E2311
v_mac_f32_e32 v7, v15, v15 ; 3E0E1F0F
v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907
v_mul_f32_e32 v4, v6, v4 ; 10080906
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_mul_f32_e32 v3, v6, v3 ; 10060706
v_mul_f32_e32 v6, v7, v16 ; 100C2107
v_mul_f32_e32 v8, v7, v17 ; 10102307
v_mul_f32_e32 v7, v7, v15 ; 100E1F07
v_mul_f32_e32 v6, v6, v4 ; 100C0906
v_mac_f32_e32 v6, v8, v5 ; 3E0C0B08
v_mac_f32_e32 v6, v7, v3 ; 3E0C0707
v_max_f32_e32 v6, 0x38d1b717, v6 ; 200C0CFF 38D1B717
v_log_f32_e32 v6, v6 ; 7E0C4F06
v_mul_f32_e32 v4, v14, v4 ; 1008090E
v_mac_f32_e32 v4, v13, v5 ; 3E080B0D
v_mul_f32_e32 v5, 0x42000000, v2 ; 100A04FF 42000000
v_mul_legacy_f32_e32 v5, v5, v6 ; 0E0A0D05
v_exp_f32_e32 v5, v5 ; 7E0A4B05
v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80
v_mac_f32_e32 v62, -2.0, v5 ; 3E7C0AF5
v_mul_f32_e32 v6, v62, v5 ; 100C0B3E
v_mul_f32_e32 v5, v6, v5 ; 100A0B06
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_mac_f32_e32 v4, v12, v3 ; 3E08070C
v_readlane_b32 s0, v253, 0 ; 020101FD
s_nop 2 ; BF800002
v_mul_f32_e32 v3, s0, v39 ; 10064E00
v_readlane_b32 s1, v253, 1 ; 020303FD
s_nop 2 ; BF800002
v_mul_f32_e32 v5, s1, v23 ; 100A2E01
v_readlane_b32 s2, v253, 8 ; 020511FD
s_nop 2 ; BF800002
v_add_f32_e64 v6, s2, s2 ; D2060006 00000402
s_waitcnt vmcnt(0) ; BF8C0770
v_mul_f32_e32 v6, v0, v6 ; 100C0D00
v_readlane_b32 s2, v253, 5 ; 02050BFD
s_nop 2 ; BF800002
v_max_f32_e32 v6, s2, v6 ; 200C0C02
v_readlane_b32 s2, v253, 2 ; 020505FD
s_nop 2 ; BF800002
v_mul_f32_e32 v7, s2, v24 ; 100E3002
v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880
v_readlane_b32 s3, v253, 6 ; 02070DFD
s_nop 2 ; BF800002
v_add_f32_e64 v8, s3, s3 ; D2060008 00000603
v_readlane_b32 s3, v253, 7 ; 02070FFD
s_nop 2 ; BF800002
v_add_f32_e64 v9, s3, s3 ; D2060009 00000603
v_mul_f32_e32 v8, v0, v8 ; 10101100
v_mul_f32_e32 v9, v0, v9 ; 10121300
v_readlane_b32 s3, v253, 3 ; 020707FD
s_nop 2 ; BF800002
v_max_f32_e32 v8, s3, v8 ; 20101003
v_readlane_b32 s3, v253, 4 ; 020709FD
s_nop 2 ; BF800002
v_max_f32_e32 v9, s3, v9 ; 20121203
v_min_f32_e32 v6, 1.0, v6 ; 1E0C0CF2
v_min_f32_e32 v8, 1.0, v8 ; 1E1010F2
v_min_f32_e32 v9, 1.0, v9 ; 1E1212F2
v_mul_f32_e32 v6, v39, v6 ; 100C0D27
v_mul_f32_e32 v8, v23, v8 ; 10101117
v_mul_f32_e32 v9, v24, v9 ; 10121318
v_mac_f32_e32 v6, v4, v3 ; 3E0C0704
v_mac_f32_e32 v8, v4, v5 ; 3E100B04
v_mac_f32_e32 v9, v4, v7 ; 3E120F04
v_mac_f32_e32 v6, s0, v2 ; 3E0C0400
v_mac_f32_e32 v8, s1, v2 ; 3E100401
v_mac_f32_e32 v9, s2, v2 ; 3E120402
v_mul_f32_e32 v2, v0, v6 ; 10040D00
v_mul_f32_e32 v3, v0, v8 ; 10061100
v_mul_f32_e32 v0, v0, v9 ; 10001300
v_mul_f32_e32 v2, 0.5, v2 ; 100404F0
v_mul_f32_e32 v3, 0.5, v3 ; 100606F0
v_mul_f32_e32 v0, 0.5, v0 ; 100000F0
v_mul_f32_e32 v2, v1, v2 ; 10040501
v_mul_f32_e32 v3, v1, v3 ; 10060701
v_mul_f32_e32 v0, v1, v0 ; 10000101
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 256
Code Size: 5048 bytes
LDS: 0 blocks
Scratch: 14336 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL IN[3]
DCL IN[4]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL CONST[0..29]
DCL TEMP[0..9], LOCAL
IMM[0] FLT32 { 0.0000, 1.0000, -1.0000, 0.0000}
0: MOV TEMP[0].w, IN[4].wwww
1: MUL TEMP[1].xyz, IN[0].xyzz, CONST[16].xyzz
2: MUL TEMP[2].x, IN[2].wwww, IN[2].wwww
3: MOV TEMP[3].y, IMM[0].xxxx
4: MOV TEMP[3].x, TEMP[2].xxxx
5: MOV TEMP[3].z, TEMP[2].xxxx
6: MUL TEMP[2], CONST[17], TEMP[1].xxxx
7: MAD TEMP[2], CONST[18], TEMP[1].yyyy, TEMP[2]
8: MAD TEMP[2].xyz, CONST[19], TEMP[1].zzzz, TEMP[2]
9: LRP TEMP[1].xyz, TEMP[3].xyzz, TEMP[2].xyzz, TEMP[1].xyzz
10: DP3 TEMP[2].x, CONST[21].xyzz, TEMP[1].xyzz
11: ADD TEMP[2].x, TEMP[2].xxxx, CONST[21].wwww
12: MUL TEMP[2].xyz, TEMP[2].xxxx, CONST[21].xyzz
13: ADD TEMP[2].xyz, TEMP[1].xyzz, -TEMP[2].xyzz
14: LRP TEMP[1].xyz, CONST[22].xxxx, TEMP[1].xyzz, TEMP[2].xyzz
15: MOV TEMP[2].xz, IMM[0].xxxx
16: MOV TEMP[2].y, CONST[23].xxxx
17: MUL TEMP[3].xyz, TEMP[2].zxyy, IN[1].yzxx
18: MAD TEMP[2].xyz, TEMP[2].yzxx, IN[1].zxyy, -TEMP[3].xyzz
19: MUL TEMP[3].xyz, IN[1].zxyy, TEMP[2].yzxx
20: MAD TEMP[3].xyz, IN[1].yzxx, TEMP[2].zxyy, -TEMP[3].xyzz
21: MUL TEMP[4].xyz, IN[1].zxyy, TEMP[3].yzxx
22: MAD TEMP[4].xyz, IN[1].yzxx, TEMP[3].zxyy, -TEMP[4].xyzz
23: DP3 TEMP[2].x, TEMP[4].xyzz, TEMP[2].xyzz
24: FSLT TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx
25: UIF TEMP[2].xxxx :0
26: MOV TEMP[0].w, IMM[0].zzzz
27: ELSE :0
28: MOV TEMP[0].w, IMM[0].yyyy
29: ENDIF
30: MUL TEMP[2], CONST[26], TEMP[1].xxxx
31: MAD TEMP[2], CONST[27], TEMP[1].yyyy, TEMP[2]
32: MAD TEMP[2], CONST[28], TEMP[1].zzzz, TEMP[2]
33: ADD TEMP[2], TEMP[2], CONST[29]
34: MAD TEMP[4].xy, IN[3].xyyy, CONST[24].xyyy, CONST[24].zwww
35: MAD TEMP[5].xy, IN[3].xyyy, CONST[25].xyyy, CONST[25].zwww
36: MOV TEMP[4].zw, TEMP[5].yyxy
37: MUL TEMP[5], CONST[7], TEMP[1].xxxx
38: MAD TEMP[5], CONST[8], TEMP[1].yyyy, TEMP[5]
39: MAD TEMP[1], CONST[9], TEMP[1].zzzz, TEMP[5]
40: ADD TEMP[1].xyz, TEMP[1], CONST[10]
41: MOV TEMP[5].x, CONST[11].xxxx
42: MOV TEMP[5].y, CONST[12].xxxx
43: MOV TEMP[5].z, CONST[13].xxxx
44: MOV TEMP[6].x, CONST[11].yyyy
45: MOV TEMP[6].y, CONST[12].yyyy
46: MOV TEMP[6].z, CONST[13].yyyy
47: MOV TEMP[7].x, CONST[11].zzzz
48: MOV TEMP[7].y, CONST[12].zzzz
49: MOV TEMP[7].z, CONST[13].zzzz
50: MUL TEMP[5].xyz, TEMP[5].xyzz, IN[1].xxxx
51: MAD TEMP[5].xyz, TEMP[6].xyzz, IN[1].yyyy, TEMP[5].xyzz
52: MAD TEMP[5].xyz, TEMP[7].xyzz, IN[1].zzzz, TEMP[5].xyzz
53: DP3 TEMP[6].x, TEMP[5].xyzz, TEMP[5].xyzz
54: RSQ TEMP[6].x, TEMP[6].xxxx
55: MUL TEMP[5].xyz, TEMP[5].xyzz, TEMP[6].xxxx
56: MUL TEMP[6].xyz, CONST[7].xyzz, TEMP[3].xxxx
57: MAD TEMP[6].xyz, CONST[8].xyzz, TEMP[3].yyyy, TEMP[6].xyzz
58: MAD TEMP[3].xyz, CONST[9].xyzz, TEMP[3].zzzz, TEMP[6].xyzz
59: DP3 TEMP[6].x, TEMP[3].xyzz, TEMP[3].xyzz
60: RSQ TEMP[6].x, TEMP[6].xxxx
61: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[6].xxxx
62: MUL TEMP[6].xyz, TEMP[5].zxyy, TEMP[3].yzxx
63: MAD TEMP[6].xyz, TEMP[5].yzxx, TEMP[3].zxyy, -TEMP[6].xyzz
64: MUL TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].wwww
65: MOV TEMP[6].x, TEMP[3].xxxx
66: MOV TEMP[6].y, TEMP[0].xxxx
67: MOV TEMP[6].z, TEMP[5].xxxx
68: MOV TEMP[6].w, TEMP[1].xxxx
69: MOV TEMP[7].x, TEMP[3].yyyy
70: MOV TEMP[7].y, TEMP[0].yyyy
71: MOV TEMP[7].z, TEMP[5].yyyy
72: MOV TEMP[7].w, TEMP[1].yyyy
73: MOV TEMP[3].x, TEMP[3].zzzz
74: MOV TEMP[3].y, TEMP[0].zzzz
75: MOV TEMP[3].z, TEMP[5].zzzz
76: MOV TEMP[3].w, TEMP[1].zzzz
77: MOV TEMP[0].w, IMM[0].yyyy
78: MOV TEMP[0].xyz, TEMP[5].xyzx
79: DP4 TEMP[1].x, CONST[0], TEMP[0]
80: DP4 TEMP[8].x, CONST[1], TEMP[0]
81: MOV TEMP[1].y, TEMP[8].xxxx
82: DP4 TEMP[0].x, CONST[2], TEMP[0]
83: MOV TEMP[1].z, TEMP[0].xxxx
84: MUL TEMP[0], TEMP[5].xyzz, TEMP[5].yzzx
85: DP4 TEMP[8].x, CONST[3], TEMP[0]
86: DP4 TEMP[9].x, CONST[4], TEMP[0]
87: MOV TEMP[8].y, TEMP[9].xxxx
88: DP4 TEMP[0].x, CONST[5], TEMP[0]
89: MOV TEMP[8].z, TEMP[0].xxxx
90: MUL TEMP[0].x, TEMP[5].yyyy, TEMP[5].yyyy
91: MAD TEMP[0].x, TEMP[5].xxxx, TEMP[5].xxxx, -TEMP[0].xxxx
92: MAD TEMP[0].xyz, CONST[6].xyzz, TEMP[0].xxxx, TEMP[8].xyzz
93: ADD TEMP[0].xyz, TEMP[0].xyzz, TEMP[1].xyzz
94: MOV TEMP[0].yzw, TEMP[0].yxyz
95: MAD TEMP[0].x, TEMP[2].zzzz, CONST[15].zzzz, CONST[15].wwww
96: MOV OUT[1], TEMP[4]
97: MOV OUT[2], TEMP[6]
98: MOV OUT[3], TEMP[7]
99: MOV OUT[4], TEMP[3]
100: MOV OUT[0], TEMP[2]
101: MOV OUT[5], TEMP[0]
102: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 256)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 260)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 264)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 336)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 340)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 344)
%78 = call float @llvm.SI.load.const(<16 x i8> %12, i32 348)
%79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352)
%80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 368)
%81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384)
%82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388)
%83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 392)
%84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 396)
%85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 400)
%86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 404)
%87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 408)
%88 = call float @llvm.SI.load.const(<16 x i8> %12, i32 412)
%89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 416)
%90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 420)
%91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 424)
%92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 428)
%93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 432)
%94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 436)
%95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 440)
%96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 444)
%97 = call float @llvm.SI.load.const(<16 x i8> %12, i32 448)
%98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 452)
%99 = call float @llvm.SI.load.const(<16 x i8> %12, i32 456)
%100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 460)
%101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 464)
%102 = call float @llvm.SI.load.const(<16 x i8> %12, i32 468)
%103 = call float @llvm.SI.load.const(<16 x i8> %12, i32 472)
%104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 476)
%105 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%106 = load <16 x i8>, <16 x i8> addrspace(2)* %105, align 16, !tbaa !0
%107 = add i32 %5, %7
%108 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %106, i32 0, i32 %107)
%109 = extractelement <4 x float> %108, i32 0
%110 = extractelement <4 x float> %108, i32 1
%111 = extractelement <4 x float> %108, i32 2
%112 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%113 = load <16 x i8>, <16 x i8> addrspace(2)* %112, align 16, !tbaa !0
%114 = add i32 %5, %7
%115 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %113, i32 0, i32 %114)
%116 = extractelement <4 x float> %115, i32 0
%117 = extractelement <4 x float> %115, i32 1
%118 = extractelement <4 x float> %115, i32 2
%119 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%120 = load <16 x i8>, <16 x i8> addrspace(2)* %119, align 16, !tbaa !0
%121 = add i32 %5, %7
%122 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %120, i32 0, i32 %121)
%123 = extractelement <4 x float> %122, i32 3
%124 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 3
%125 = load <16 x i8>, <16 x i8> addrspace(2)* %124, align 16, !tbaa !0
%126 = add i32 %5, %7
%127 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %125, i32 0, i32 %126)
%128 = extractelement <4 x float> %127, i32 0
%129 = extractelement <4 x float> %127, i32 1
%130 = fmul float %109, %63
%131 = fmul float %110, %64
%132 = fmul float %111, %65
%133 = fmul float %123, %123
%134 = fmul float %66, %130
%135 = fmul float %67, %130
%136 = fmul float %68, %130
%137 = fmul float %69, %131
%138 = fadd float %137, %134
%139 = fmul float %70, %131
%140 = fadd float %139, %135
%141 = fmul float %71, %131
%142 = fadd float %141, %136
%143 = fmul float %72, %132
%144 = fadd float %143, %138
%145 = fmul float %73, %132
%146 = fadd float %145, %140
%147 = fmul float %74, %132
%148 = fadd float %147, %142
%149 = call float @llvm.AMDGPU.lrp(float %133, float %144, float %130)
%150 = call float @llvm.AMDGPU.lrp(float 0.000000e+00, float %146, float %131)
%151 = call float @llvm.AMDGPU.lrp(float %133, float %148, float %132)
%152 = fmul float %75, %149
%153 = fmul float %76, %150
%154 = fadd float %153, %152
%155 = fmul float %77, %151
%156 = fadd float %154, %155
%157 = fadd float %156, %78
%158 = fmul float %157, %75
%159 = fmul float %157, %76
%160 = fmul float %157, %77
%161 = fsub float %149, %158
%162 = fsub float %150, %159
%163 = fsub float %151, %160
%164 = call float @llvm.AMDGPU.lrp(float %79, float %149, float %161)
%165 = call float @llvm.AMDGPU.lrp(float %79, float %150, float %162)
%166 = call float @llvm.AMDGPU.lrp(float %79, float %151, float %163)
%167 = fmul float %117, 0.000000e+00
%168 = fmul float %118, 0.000000e+00
%169 = fmul float %80, %116
%170 = fmul float %80, %118
%171 = fsub float %170, %167
%172 = fmul float %116, 0.000000e+00
%173 = fsub float %172, %168
%174 = fmul float %117, 0.000000e+00
%175 = fsub float %174, %169
%176 = fmul float %118, %173
%177 = fmul float %116, %175
%178 = fmul float %117, %171
%179 = fmul float %117, %175
%180 = fsub float %179, %176
%181 = fmul float %118, %171
%182 = fsub float %181, %177
%183 = fmul float %116, %173
%184 = fsub float %183, %178
%185 = fmul float %118, %182
%186 = fmul float %116, %184
%187 = fmul float %117, %180
%188 = fmul float %117, %184
%189 = fsub float %188, %185
%190 = fmul float %118, %180
%191 = fsub float %190, %186
%192 = fmul float %116, %182
%193 = fsub float %192, %187
%194 = fmul float %189, %171
%195 = fmul float %191, %173
%196 = fadd float %195, %194
%197 = fmul float %193, %175
%198 = fadd float %196, %197
%199 = fcmp olt float %198, 0.000000e+00
%. = select i1 %199, float -1.000000e+00, float 1.000000e+00
%200 = fmul float %89, %164
%201 = fmul float %90, %164
%202 = fmul float %91, %164
%203 = fmul float %92, %164
%204 = fmul float %93, %165
%205 = fadd float %204, %200
%206 = fmul float %94, %165
%207 = fadd float %206, %201
%208 = fmul float %95, %165
%209 = fadd float %208, %202
%210 = fmul float %96, %165
%211 = fadd float %210, %203
%212 = fmul float %97, %166
%213 = fadd float %212, %205
%214 = fmul float %98, %166
%215 = fadd float %214, %207
%216 = fmul float %99, %166
%217 = fadd float %216, %209
%218 = fmul float %100, %166
%219 = fadd float %218, %211
%220 = fadd float %213, %101
%221 = fadd float %215, %102
%222 = fadd float %217, %103
%223 = fadd float %219, %104
%224 = fmul float %128, %81
%225 = fadd float %224, %83
%226 = fmul float %129, %82
%227 = fadd float %226, %84
%228 = fmul float %128, %85
%229 = fadd float %228, %87
%230 = fmul float %129, %86
%231 = fadd float %230, %88
%232 = fmul float %40, %164
%233 = fmul float %41, %164
%234 = fmul float %42, %164
%235 = fmul float %43, %165
%236 = fadd float %235, %232
%237 = fmul float %44, %165
%238 = fadd float %237, %233
%239 = fmul float %45, %165
%240 = fadd float %239, %234
%241 = fmul float %46, %166
%242 = fadd float %241, %236
%243 = fmul float %47, %166
%244 = fadd float %243, %238
%245 = fmul float %48, %166
%246 = fadd float %245, %240
%247 = fadd float %242, %49
%248 = fadd float %244, %50
%249 = fadd float %246, %51
%250 = fmul float %52, %116
%251 = fmul float %55, %116
%252 = fmul float %58, %116
%253 = fmul float %53, %117
%254 = fadd float %253, %250
%255 = fmul float %56, %117
%256 = fadd float %255, %251
%257 = fmul float %59, %117
%258 = fadd float %257, %252
%259 = fmul float %54, %118
%260 = fadd float %259, %254
%261 = fmul float %57, %118
%262 = fadd float %261, %256
%263 = fmul float %60, %118
%264 = fadd float %263, %258
%265 = fmul float %260, %260
%266 = fmul float %262, %262
%267 = fadd float %266, %265
%268 = fmul float %264, %264
%269 = fadd float %267, %268
%270 = call float @llvm.AMDGPU.rsq.clamped.f32(float %269)
%271 = fmul float %260, %270
%272 = fmul float %262, %270
%273 = fmul float %264, %270
%274 = fmul float %40, %180
%275 = fmul float %41, %180
%276 = fmul float %42, %180
%277 = fmul float %43, %182
%278 = fadd float %277, %274
%279 = fmul float %44, %182
%280 = fadd float %279, %275
%281 = fmul float %45, %182
%282 = fadd float %281, %276
%283 = fmul float %46, %184
%284 = fadd float %283, %278
%285 = fmul float %47, %184
%286 = fadd float %285, %280
%287 = fmul float %48, %184
%288 = fadd float %287, %282
%289 = fmul float %284, %284
%290 = fmul float %286, %286
%291 = fadd float %290, %289
%292 = fmul float %288, %288
%293 = fadd float %291, %292
%294 = call float @llvm.AMDGPU.rsq.clamped.f32(float %293)
%295 = fmul float %284, %294
%296 = fmul float %286, %294
%297 = fmul float %288, %294
%298 = fmul float %273, %296
%299 = fmul float %271, %297
%300 = fmul float %272, %295
%301 = fmul float %272, %297
%302 = fsub float %301, %298
%303 = fmul float %273, %295
%304 = fsub float %303, %299
%305 = fmul float %271, %296
%306 = fsub float %305, %300
%307 = fmul float %302, %.
%308 = fmul float %304, %.
%309 = fmul float %306, %.
%310 = fmul float %13, %271
%311 = fmul float %14, %272
%312 = fadd float %310, %311
%313 = fmul float %15, %273
%314 = fadd float %312, %313
%315 = fadd float %314, %16
%316 = fmul float %17, %271
%317 = fmul float %18, %272
%318 = fadd float %316, %317
%319 = fmul float %19, %273
%320 = fadd float %318, %319
%321 = fadd float %320, %20
%322 = fmul float %21, %271
%323 = fmul float %22, %272
%324 = fadd float %322, %323
%325 = fmul float %23, %273
%326 = fadd float %324, %325
%327 = fadd float %326, %24
%328 = fmul float %271, %272
%329 = fmul float %272, %273
%330 = fmul float %273, %273
%331 = fmul float %273, %271
%332 = fmul float %25, %328
%333 = fmul float %26, %329
%334 = fadd float %332, %333
%335 = fmul float %27, %330
%336 = fadd float %334, %335
%337 = fmul float %28, %331
%338 = fadd float %336, %337
%339 = fmul float %29, %328
%340 = fmul float %30, %329
%341 = fadd float %339, %340
%342 = fmul float %31, %330
%343 = fadd float %341, %342
%344 = fmul float %32, %331
%345 = fadd float %343, %344
%346 = fmul float %33, %328
%347 = fmul float %34, %329
%348 = fadd float %346, %347
%349 = fmul float %35, %330
%350 = fadd float %348, %349
%351 = fmul float %36, %331
%352 = fadd float %350, %351
%353 = fmul float %272, %272
%354 = fmul float %271, %271
%355 = fsub float %354, %353
%356 = fmul float %37, %355
%357 = fadd float %356, %338
%358 = fmul float %38, %355
%359 = fadd float %358, %345
%360 = fmul float %39, %355
%361 = fadd float %360, %352
%362 = fadd float %357, %315
%363 = fadd float %359, %321
%364 = fadd float %361, %327
%365 = fmul float %222, %61
%366 = fadd float %365, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %225, float %227, float %229, float %231)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %295, float %307, float %271, float %247)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %296, float %308, float %272, float %248)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %297, float %309, float %273, float %249)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %366, float %362, float %363, float %364)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %220, float %221, float %222, float %223)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904
s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908
s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[4:7], v0, s[12:15], 0 idxen ; E00C2000 80030400
s_waitcnt vmcnt(0) ; BF8C0770
buffer_load_format_xyzw v[7:10], v0, s[16:19], 0 idxen ; E00C2000 80040700
buffer_load_format_xyzw v[11:14], v0, s[20:23], 0 idxen ; E00C2000 80050B00
s_load_dwordx4 s[44:47], s[2:3], 0x0 ; C0960300
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
s_buffer_load_dword s4, s[44:47], 0x0 ; C2022D00
s_buffer_load_dword s6, s[44:47], 0x1 ; C2032D01
s_buffer_load_dword s3, s[44:47], 0x2 ; C201AD02
s_buffer_load_dword s0, s[44:47], 0x3 ; C2002D03
s_buffer_load_dword s7, s[44:47], 0x4 ; C203AD04
s_buffer_load_dword s9, s[44:47], 0x5 ; C204AD05
s_buffer_load_dword s5, s[44:47], 0x6 ; C202AD06
s_buffer_load_dword s1, s[44:47], 0x7 ; C200AD07
s_buffer_load_dword s10, s[44:47], 0x8 ; C2052D08
s_buffer_load_dword s11, s[44:47], 0x9 ; C205AD09
s_buffer_load_dword s8, s[44:47], 0xa ; C2042D0A
s_buffer_load_dword s2, s[44:47], 0xb ; C2012D0B
s_buffer_load_dword s19, s[44:47], 0xc ; C209AD0C
s_buffer_load_dword s24, s[44:47], 0xd ; C20C2D0D
s_buffer_load_dword s18, s[44:47], 0xe ; C2092D0E
s_buffer_load_dword s12, s[44:47], 0xf ; C2062D0F
s_buffer_load_dword s21, s[44:47], 0x10 ; C20AAD10
s_buffer_load_dword s27, s[44:47], 0x11 ; C20DAD11
s_buffer_load_dword s20, s[44:47], 0x12 ; C20A2D12
s_buffer_load_dword s13, s[44:47], 0x13 ; C206AD13
s_buffer_load_dword s25, s[44:47], 0x14 ; C20CAD14
s_buffer_load_dword s28, s[44:47], 0x15 ; C20E2D15
s_buffer_load_dword s22, s[44:47], 0x16 ; C20B2D16
s_buffer_load_dword s17, s[44:47], 0x17 ; C208AD17
s_buffer_load_dword s14, s[44:47], 0x18 ; C2072D18
s_buffer_load_dword s15, s[44:47], 0x19 ; C207AD19
s_buffer_load_dword s16, s[44:47], 0x1a ; C2082D1A
s_buffer_load_dword s48, s[44:47], 0x1c ; C2182D1C
s_buffer_load_dword s49, s[44:47], 0x1d ; C218AD1D
s_buffer_load_dword s50, s[44:47], 0x1e ; C2192D1E
s_buffer_load_dword s51, s[44:47], 0x20 ; C219AD20
s_buffer_load_dword s52, s[44:47], 0x21 ; C21A2D21
s_buffer_load_dword s53, s[44:47], 0x22 ; C21AAD22
s_buffer_load_dword s54, s[44:47], 0x24 ; C21B2D24
s_buffer_load_dword s55, s[44:47], 0x25 ; C21BAD25
s_buffer_load_dword s56, s[44:47], 0x26 ; C21C2D26
s_buffer_load_dword s57, s[44:47], 0x28 ; C21CAD28
s_buffer_load_dword s58, s[44:47], 0x29 ; C21D2D29
s_buffer_load_dword s59, s[44:47], 0x2a ; C21DAD2A
s_buffer_load_dword s60, s[44:47], 0x2c ; C21E2D2C
s_buffer_load_dword s61, s[44:47], 0x2d ; C21EAD2D
s_buffer_load_dword s62, s[44:47], 0x2e ; C21F2D2E
s_buffer_load_dword s63, s[44:47], 0x30 ; C21FAD30
s_buffer_load_dword s64, s[44:47], 0x31 ; C2202D31
s_buffer_load_dword s65, s[44:47], 0x32 ; C220AD32
s_buffer_load_dword s66, s[44:47], 0x34 ; C2212D34
s_buffer_load_dword s67, s[44:47], 0x35 ; C221AD35
s_buffer_load_dword s68, s[44:47], 0x36 ; C2222D36
s_buffer_load_dword s23, s[44:47], 0x3e ; C20BAD3E
s_buffer_load_dword s26, s[44:47], 0x3f ; C20D2D3F
s_buffer_load_dword s69, s[44:47], 0x40 ; C222AD40
s_buffer_load_dword s70, s[44:47], 0x41 ; C2232D41
s_buffer_load_dword s71, s[44:47], 0x42 ; C223AD42
s_buffer_load_dword s72, s[44:47], 0x44 ; C2242D44
s_buffer_load_dword s73, s[44:47], 0x45 ; C224AD45
s_buffer_load_dword s74, s[44:47], 0x46 ; C2252D46
s_buffer_load_dword s75, s[44:47], 0x48 ; C225AD48
s_buffer_load_dword s76, s[44:47], 0x49 ; C2262D49
s_buffer_load_dword s77, s[44:47], 0x4a ; C226AD4A
s_buffer_load_dword s78, s[44:47], 0x4c ; C2272D4C
s_buffer_load_dword s79, s[44:47], 0x4d ; C227AD4D
s_buffer_load_dword s80, s[44:47], 0x4e ; C2282D4E
s_buffer_load_dword s81, s[44:47], 0x54 ; C228AD54
s_buffer_load_dword s82, s[44:47], 0x55 ; C2292D55
s_buffer_load_dword s83, s[44:47], 0x56 ; C229AD56
s_buffer_load_dword s84, s[44:47], 0x57 ; C22A2D57
s_buffer_load_dword s85, s[44:47], 0x58 ; C22AAD58
s_buffer_load_dword s86, s[44:47], 0x5c ; C22B2D5C
s_buffer_load_dword s87, s[44:47], 0x60 ; C22BAD60
s_buffer_load_dword s88, s[44:47], 0x61 ; C22C2D61
s_buffer_load_dword s89, s[44:47], 0x62 ; C22CAD62
s_buffer_load_dword s90, s[44:47], 0x63 ; C22D2D63
s_buffer_load_dword s91, s[44:47], 0x64 ; C22DAD64
s_buffer_load_dword s92, s[44:47], 0x65 ; C22E2D65
s_buffer_load_dword s93, s[44:47], 0x66 ; C22EAD66
s_buffer_load_dword s94, s[44:47], 0x67 ; C22F2D67
s_buffer_load_dword s95, s[44:47], 0x68 ; C22FAD68
s_buffer_load_dword s37, s[44:47], 0x69 ; C212AD69
s_buffer_load_dword s38, s[44:47], 0x6a ; C2132D6A
s_buffer_load_dword s39, s[44:47], 0x6b ; C213AD6B
s_buffer_load_dword s40, s[44:47], 0x6c ; C2142D6C
s_buffer_load_dword s41, s[44:47], 0x6d ; C214AD6D
s_buffer_load_dword s42, s[44:47], 0x6e ; C2152D6E
s_buffer_load_dword s43, s[44:47], 0x6f ; C215AD6F
s_buffer_load_dword s34, s[44:47], 0x70 ; C2112D70
s_buffer_load_dword s29, s[44:47], 0x71 ; C20EAD71
s_buffer_load_dword s35, s[44:47], 0x72 ; C211AD72
s_buffer_load_dword s30, s[44:47], 0x73 ; C20F2D73
s_buffer_load_dword s31, s[44:47], 0x74 ; C20FAD74
s_buffer_load_dword s32, s[44:47], 0x75 ; C2102D75
s_buffer_load_dword s36, s[44:47], 0x76 ; C2122D76
s_buffer_load_dword s33, s[44:47], 0x77 ; C210AD77
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s89 ; 7E000259
v_mov_b32_e32 v7, s90 ; 7E0E025A
v_mov_b32_e32 v8, s93 ; 7E10025D
v_mov_b32_e32 v9, s94 ; 7E12025E
v_mul_f32_e32 v1, s69, v1 ; 10020245
v_mul_f32_e32 v2, s70, v2 ; 10040446
v_mul_f32_e32 v3, s71, v3 ; 10060647
v_mac_f32_e32 v0, s87, v11 ; 3E001657
v_mac_f32_e32 v7, s88, v12 ; 3E0E1858
v_mac_f32_e32 v8, s91, v11 ; 3E10165B
v_mac_f32_e32 v9, s92, v12 ; 3E12185C
v_mul_f32_e32 v11, s60, v4 ; 1016083C
v_mul_f32_e32 v12, s63, v4 ; 1018083F
v_mul_f32_e32 v13, s66, v4 ; 101A0842
v_mul_f32_e32 v14, s72, v1 ; 101C0248
v_mul_f32_e32 v15, s73, v1 ; 101E0249
v_mul_f32_e32 v16, s74, v1 ; 1020024A
v_mov_b32_e32 v17, 0x80000000 ; 7E2202FF 80000000
v_mul_f32_e32 v18, s86, v4 ; 10240856
v_mul_f32_e32 v19, v17, v5 ; 10260B11
v_mac_f32_e32 v19, s86, v6 ; 3E260C56
v_mac_f32_e32 v11, s61, v5 ; 3E160A3D
v_mac_f32_e32 v12, s64, v5 ; 3E180A40
v_mac_f32_e32 v13, s67, v5 ; 3E1A0A43
v_mac_f32_e32 v14, s75, v2 ; 3E1C044B
v_mac_f32_e32 v15, s76, v2 ; 3E1E044C
v_mac_f32_e32 v16, s77, v2 ; 3E20044D
v_mac_f32_e32 v11, s62, v6 ; 3E160C3E
v_mac_f32_e32 v12, s65, v6 ; 3E180C41
v_mac_f32_e32 v13, s68, v6 ; 3E1A0C44
v_mac_f32_e32 v14, s78, v3 ; 3E1C064E
v_mac_f32_e32 v15, s79, v3 ; 3E1E064F
v_mac_f32_e32 v16, s80, v3 ; 3E200650
v_mul_f32_e32 v20, v10, v10 ; 1028150A
v_mad_f32 v10, -v10, v10, 1.0 ; D282000A 23CA150A
v_mul_f32_e32 v1, v1, v10 ; 10021501
v_mul_f32_e32 v3, v3, v10 ; 10061503
v_mac_f32_e32 v1, v14, v20 ; 3E02290E
v_mac_f32_e32 v3, v16, v20 ; 3E062910
v_mac_f32_e32 v2, 0, v15 ; 3E041E80
v_mul_f32_e32 v10, s81, v1 ; 10140251
v_mac_f32_e32 v10, s82, v2 ; 3E140452
v_mac_f32_e32 v10, s83, v3 ; 3E140653
v_add_f32_e32 v10, s84, v10 ; 06141454
v_mad_f32 v14, -v10, s81, v1 ; D282000E 2404A30A
v_mad_f32 v15, -v10, s82, v2 ; D282000F 2408A50A
v_mad_f32 v10, -v10, s83, v3 ; D282000A 240CA70A
v_sub_f32_e64 v16, 1.0, s85 ; D2080010 0000AAF2
v_mul_f32_e32 v14, v14, v16 ; 101C210E
v_mul_f32_e32 v15, v15, v16 ; 101E210F
v_mul_f32_e32 v10, v10, v16 ; 1014210A
v_mac_f32_e32 v14, s85, v1 ; 3E1C0255
v_mac_f32_e32 v15, s85, v2 ; 3E1E0455
v_mac_f32_e32 v10, s85, v3 ; 3E140655
v_mul_f32_e32 v1, s95, v14 ; 10021C5F
v_mul_f32_e32 v2, v17, v6 ; 10040D11
v_mac_f32_e32 v2, 0, v4 ; 3E040880
v_mad_f32 v3, 0, v5, -v18 ; D2820003 844A0A80
v_mul_f32_e32 v16, v2, v6 ; 10200D02
v_mad_f32 v16, v5, v3, -v16 ; D2820010 84420705
v_mul_f32_e32 v17, s48, v16 ; 10222030
v_mul_f32_e32 v18, s48, v14 ; 10241C30
v_mul_f32_e32 v20, s49, v16 ; 10282031
v_mul_f32_e32 v21, s49, v14 ; 102A1C31
v_mul_f32_e32 v22, s50, v16 ; 102C2032
v_mul_f32_e32 v23, s50, v14 ; 102E1C32
v_mul_f32_e32 v24, v3, v4 ; 10300903
v_mad_f32 v24, v6, v19, -v24 ; D2820018 84622706
v_mac_f32_e32 v17, s51, v24 ; 3E223033
v_mac_f32_e32 v18, s51, v15 ; 3E241E33
v_mac_f32_e32 v20, s52, v24 ; 3E283034
v_mac_f32_e32 v21, s52, v15 ; 3E2A1E34
v_mac_f32_e32 v22, s53, v24 ; 3E2C3035
v_mac_f32_e32 v23, s53, v15 ; 3E2E1E35
v_mul_f32_e32 v25, v19, v5 ; 10320B13
v_mad_f32 v25, v4, v2, -v25 ; D2820019 84660504
v_mac_f32_e32 v17, s54, v25 ; 3E223236
v_mac_f32_e32 v18, s54, v10 ; 3E241436
v_mac_f32_e32 v20, s55, v25 ; 3E283237
v_mac_f32_e32 v21, s55, v10 ; 3E2A1437
v_mul_f32_e32 v26, v25, v4 ; 10340919
v_mad_f32 v26, v6, v16, -v26 ; D282001A 846A2106
v_mul_f32_e32 v16, v16, v5 ; 10200B10
v_mad_f32 v4, v4, v24, -v16 ; D2820004 84423104
v_mul_f32_e32 v6, v24, v6 ; 100C0D18
v_mad_f32 v5, v5, v25, -v6 ; D2820005 841A3305
v_mul_f32_e32 v5, v19, v5 ; 100A0B13
v_mac_f32_e32 v5, v2, v26 ; 3E0A3502
v_mac_f32_e32 v5, v3, v4 ; 3E0A0903
v_cmp_gt_f32_e32 vcc, 0, v5 ; 7C080A80
v_cndmask_b32_e64 v2, 1.0, -1.0, vcc ; D2000002 01A9E6F2
v_mac_f32_e32 v22, s56, v25 ; 3E2C3238
v_mac_f32_e32 v23, s56, v10 ; 3E2E1438
v_add_f32_e32 v3, s57, v18 ; 06062439
v_add_f32_e32 v4, s58, v21 ; 06082A3A
v_add_f32_e32 v5, s59, v23 ; 060A2E3B
exp 15, 32, 0, 0, 0, v0, v7, v8, v9 ; F800020F 09080700
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v0, v11, v11 ; 1000170B
v_mac_f32_e32 v0, v12, v12 ; 3E00190C
v_mac_f32_e32 v0, v13, v13 ; 3E001B0D
v_rsq_clamp_f32_e32 v0, v0 ; 7E005900
v_mul_f32_e32 v6, v17, v17 ; 100C2311
v_mac_f32_e32 v6, v20, v20 ; 3E0C2914
v_mac_f32_e32 v6, v22, v22 ; 3E0C2D16
v_rsq_clamp_f32_e32 v6, v6 ; 7E0C5906
v_mul_f32_e32 v7, v0, v11 ; 100E1700
v_mul_f32_e32 v8, v0, v12 ; 10101900
v_mul_f32_e32 v0, v0, v13 ; 10001B00
v_mul_f32_e32 v9, v6, v17 ; 10122306
v_mul_f32_e32 v11, v6, v20 ; 10162906
v_mul_f32_e32 v6, v6, v22 ; 100C2D06
v_mul_f32_e32 v12, v11, v0 ; 1018010B
v_mad_f32 v12, v8, v6, -v12 ; D282000C 84320D08
v_mul_f32_e32 v12, v2, v12 ; 10181902
exp 15, 33, 0, 0, 0, v9, v12, v7, v3 ; F800021F 03070C09
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, v6, v7 ; 10060F06
v_mad_f32 v3, v0, v9, -v3 ; D2820003 840E1300
v_mul_f32_e32 v3, v2, v3 ; 10060702
exp 15, 34, 0, 0, 0, v11, v3, v8, v4 ; F800022F 0408030B
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, v9, v8 ; 10061109
v_mad_f32 v3, v7, v11, -v3 ; D2820003 840E1707
v_mul_f32_e32 v2, v2, v3 ; 10040702
exp 15, 35, 0, 0, 0, v6, v2, v0, v5 ; F800023F 05000206
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v2, s37, v14 ; 10041C25
v_mul_f32_e32 v3, s38, v14 ; 10061C26
v_mul_f32_e32 v4, s39, v14 ; 10081C27
v_mac_f32_e32 v1, s40, v15 ; 3E021E28
v_mac_f32_e32 v2, s41, v15 ; 3E041E29
v_mac_f32_e32 v3, s42, v15 ; 3E061E2A
v_mac_f32_e32 v4, s43, v15 ; 3E081E2B
v_mul_f32_e32 v5, v0, v8 ; 100A1100
v_mul_f32_e32 v6, s24, v5 ; 100C0A18
v_mul_f32_e32 v9, s27, v5 ; 10120A1B
v_mul_f32_e32 v5, s28, v5 ; 100A0A1C
v_mul_f32_e32 v11, v8, v7 ; 10160F08
v_mac_f32_e32 v6, s19, v11 ; 3E0C1613
v_mac_f32_e32 v9, s21, v11 ; 3E121615
v_mac_f32_e32 v5, s25, v11 ; 3E0A1619
v_mul_f32_e32 v11, v0, v0 ; 10160100
v_mac_f32_e32 v6, s18, v11 ; 3E0C1612
v_mac_f32_e32 v9, s20, v11 ; 3E121614
v_mac_f32_e32 v5, s22, v11 ; 3E0A1616
v_mul_f32_e32 v11, s6, v8 ; 10161006
v_mac_f32_e32 v11, s4, v7 ; 3E160E04
v_mul_f32_e32 v12, s9, v8 ; 10181009
v_mac_f32_e32 v12, s7, v7 ; 3E180E07
v_mul_f32_e32 v13, s11, v8 ; 101A100B
v_mac_f32_e32 v13, s10, v7 ; 3E1A0E0A
v_mac_f32_e32 v11, s3, v0 ; 3E160003
v_mac_f32_e32 v12, s5, v0 ; 3E180005
v_mac_f32_e32 v13, s8, v0 ; 3E1A0008
v_mul_f32_e32 v0, v7, v0 ; 10000107
v_mac_f32_e32 v6, s12, v0 ; 3E0C000C
v_mac_f32_e32 v9, s13, v0 ; 3E12000D
v_mac_f32_e32 v5, s17, v0 ; 3E0A0011
v_mul_f32_e32 v0, v8, v8 ; 10001108
v_mad_f32 v0, v7, v7, -v0 ; D2820000 84020F07
v_mac_f32_e32 v6, s14, v0 ; 3E0C000E
v_mac_f32_e32 v9, s15, v0 ; 3E12000F
v_mac_f32_e32 v5, s16, v0 ; 3E0A0010
v_mac_f32_e32 v1, s34, v10 ; 3E021422
v_mac_f32_e32 v3, s35, v10 ; 3E061423
v_add_f32_e32 v0, s0, v11 ; 06001600
v_add_f32_e32 v7, s1, v12 ; 060E1801
v_add_f32_e32 v8, s2, v13 ; 06101A02
v_add_f32_e32 v3, s36, v3 ; 06060624
v_mov_b32_e32 v11, s26 ; 7E16021A
v_mac_f32_e32 v11, s23, v3 ; 3E160617
v_add_f32_e32 v0, v0, v6 ; 06000D00
v_add_f32_e32 v6, v7, v9 ; 060C1307
v_add_f32_e32 v5, v8, v5 ; 060A0B08
exp 15, 36, 0, 0, 0, v11, v0, v6, v5 ; F800024F 0506000B
v_mac_f32_e32 v2, s29, v10 ; 3E04141D
v_mac_f32_e32 v4, s30, v10 ; 3E08141E
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v0, s31, v1 ; 0600021F
v_add_f32_e32 v1, s32, v2 ; 06020420
v_add_f32_e32 v2, s33, v4 ; 06040821
exp 15, 12, 0, 1, 0, v0, v1, v3, v2 ; F80008CF 02030100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 104
VGPRS: 28
Code Size: 1276 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0..5]
DCL CONST[8..9]
DCL TEMP[0..6], LOCAL
IMM[0] FLT32 { 2.0000, -1.0000, 1.0000, 0.0000}
0: MOV TEMP[0].x, IN[1].wwww
1: MOV TEMP[0].y, IN[2].wwww
2: MOV TEMP[0].z, IN[3].wwww
3: MOV TEMP[1].xy, IN[0].xyyy
4: TEX TEMP[1], TEMP[1], SAMP[0], 2D
5: MUL TEMP[2].xyz, TEMP[1].xyzz, CONST[8].xyzz
6: MOV TEMP[3].xy, IN[0].zwww
7: TEX TEMP[3].yw, TEMP[3], SAMP[1], 2D
8: MAD TEMP[3].xy, TEMP[3].wyyy, IMM[0].xxxx, IMM[0].yyyy
9: DP2 TEMP[4].x, TEMP[3].xyyy, TEMP[3].xyyy
10: MOV_SAT TEMP[4].x, TEMP[4].xxxx
11: ADD TEMP[4].x, IMM[0].zzzz, -TEMP[4].xxxx
12: SQRT TEMP[4].x, TEMP[4].xxxx
13: MOV TEMP[3].z, TEMP[4].xxxx
14: FSLT TEMP[4].x, TEMP[1].wwww, CONST[9].xxxx
15: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].zzzz
16: KILL_IF -TEMP[4].xxxx
17: MOV TEMP[4].w, IMM[0].wwww
18: DP3 TEMP[5].x, IN[1].xyzz, TEMP[3].xyzz
19: DP3 TEMP[6].x, IN[2].xyzz, TEMP[3].xyzz
20: MOV TEMP[5].y, TEMP[6].xxxx
21: DP3 TEMP[3].x, IN[3].xyzz, TEMP[3].xyzz
22: MOV TEMP[5].z, TEMP[3].xxxx
23: MUL TEMP[4].xyz, TEMP[2].xyzz, IN[4].yzww
24: ADD TEMP[0].xyz, CONST[0].xyzz, -TEMP[0].xyzz
25: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz
26: RSQ TEMP[3].x, TEMP[3].xxxx
27: MAD TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx, CONST[1].xyzz
28: DP3 TEMP[3].x, TEMP[0].xyzz, TEMP[0].xyzz
29: RSQ TEMP[3].x, TEMP[3].xxxx
30: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[3].xxxx
31: DP3 TEMP[0].x, TEMP[5].xyzz, TEMP[0].xyzz
32: MAX TEMP[0].x, IMM[0].wwww, TEMP[0].xxxx
33: POW TEMP[0].x, TEMP[0].xxxx, CONST[4].xxxx
34: MUL TEMP[0].xyz, TEMP[0].xxxx, CONST[5]
35: MUL TEMP[2].xyz, TEMP[2].xyzz, CONST[3].xyzz
36: DP3 TEMP[3].x, TEMP[5].xyzz, CONST[1].xyzz
37: MAX TEMP[3].x, IMM[0].wwww, TEMP[3].xxxx
38: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
39: MAD TEMP[0].xyz, CONST[3].xyzz, TEMP[0].xyzz, TEMP[2].xyzz
40: MUL TEMP[0].xyz, TEMP[0].xyzz, IMM[0].xxxx
41: MOV TEMP[0].w, TEMP[1].wwww
42: ADD TEMP[0], TEMP[4], TEMP[0]
43: MOV TEMP[4].w, TEMP[0].wwww
44: MOV_SAT TEMP[1].x, IN[4].xxxx
45: LRP TEMP[4].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz
46: MOV OUT[0], TEMP[4]
47: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144)
%44 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%45 = load <32 x i8>, <32 x i8> addrspace(2)* %44, align 32, !tbaa !0
%46 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%49 = bitcast <8 x i32> addrspace(2)* %48 to <32 x i8> addrspace(2)*
%50 = load <32 x i8>, <32 x i8> addrspace(2)* %49, align 32, !tbaa !0
%51 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%52 = bitcast <4 x i32> addrspace(2)* %51 to <16 x i8> addrspace(2)*
%53 = load <16 x i8>, <16 x i8> addrspace(2)* %52, align 16, !tbaa !0
%54 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%55 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%56 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%57 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%58 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%59 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%60 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%61 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%62 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%63 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%64 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7)
%65 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%66 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%67 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%68 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%69 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%70 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%71 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%72 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%73 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%74 = bitcast float %54 to i32
%75 = bitcast float %55 to i32
%76 = insertelement <2 x i32> undef, i32 %74, i32 0
%77 = insertelement <2 x i32> %76, i32 %75, i32 1
%78 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %45, <16 x i8> %47, i32 2)
%79 = extractelement <4 x float> %78, i32 0
%80 = extractelement <4 x float> %78, i32 1
%81 = extractelement <4 x float> %78, i32 2
%82 = extractelement <4 x float> %78, i32 3
%83 = fmul float %79, %40
%84 = fmul float %80, %41
%85 = fmul float %81, %42
%86 = bitcast float %56 to i32
%87 = bitcast float %57 to i32
%88 = insertelement <2 x i32> undef, i32 %86, i32 0
%89 = insertelement <2 x i32> %88, i32 %87, i32 1
%90 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %50, <16 x i8> %53, i32 2)
%91 = extractelement <4 x float> %90, i32 1
%92 = extractelement <4 x float> %90, i32 3
%93 = fmul float %92, 2.000000e+00
%94 = fadd float %93, -1.000000e+00
%95 = fmul float %91, 2.000000e+00
%96 = fadd float %95, -1.000000e+00
%97 = fmul float %94, %94
%98 = fmul float %96, %96
%99 = fadd float %97, %98
%100 = call float @llvm.AMDIL.clamp.(float %99, float 0.000000e+00, float 1.000000e+00)
%101 = fsub float 1.000000e+00, %100
%102 = call float @llvm.sqrt.f32(float %101)
%103 = fcmp olt float %82, %43
%104 = select i1 %103, float -1.000000e+00, float 0.000000e+00
call void @llvm.AMDGPU.kill(float %104)
%105 = fmul float %58, %94
%106 = fmul float %59, %96
%107 = fadd float %106, %105
%108 = fmul float %60, %102
%109 = fadd float %107, %108
%110 = fmul float %62, %94
%111 = fmul float %63, %96
%112 = fadd float %111, %110
%113 = fmul float %64, %102
%114 = fadd float %112, %113
%115 = fmul float %66, %94
%116 = fmul float %67, %96
%117 = fadd float %116, %115
%118 = fmul float %68, %102
%119 = fadd float %117, %118
%120 = fmul float %83, %71
%121 = fmul float %84, %72
%122 = fmul float %85, %73
%123 = fsub float %24, %61
%124 = fsub float %25, %65
%125 = fsub float %26, %69
%126 = fmul float %123, %123
%127 = fmul float %124, %124
%128 = fadd float %127, %126
%129 = fmul float %125, %125
%130 = fadd float %128, %129
%131 = call float @llvm.AMDGPU.rsq.clamped.f32(float %130)
%132 = fmul float %123, %131
%133 = fadd float %132, %27
%134 = fmul float %124, %131
%135 = fadd float %134, %28
%136 = fmul float %125, %131
%137 = fadd float %136, %29
%138 = fmul float %133, %133
%139 = fmul float %135, %135
%140 = fadd float %139, %138
%141 = fmul float %137, %137
%142 = fadd float %140, %141
%143 = call float @llvm.AMDGPU.rsq.clamped.f32(float %142)
%144 = fmul float %133, %143
%145 = fmul float %135, %143
%146 = fmul float %137, %143
%147 = fmul float %109, %144
%148 = fmul float %114, %145
%149 = fadd float %148, %147
%150 = fmul float %119, %146
%151 = fadd float %149, %150
%152 = call float @llvm.maxnum.f32(float %151, float 0.000000e+00)
%153 = call float @llvm.pow.f32(float %152, float %36)
%154 = fmul float %153, %37
%155 = fmul float %153, %38
%156 = fmul float %153, %39
%157 = fmul float %83, %33
%158 = fmul float %84, %34
%159 = fmul float %85, %35
%160 = fmul float %109, %27
%161 = fmul float %114, %28
%162 = fadd float %161, %160
%163 = fmul float %119, %29
%164 = fadd float %162, %163
%165 = call float @llvm.maxnum.f32(float %164, float 0.000000e+00)
%166 = fmul float %157, %165
%167 = fmul float %158, %165
%168 = fmul float %159, %165
%169 = fmul float %33, %154
%170 = fadd float %169, %166
%171 = fmul float %34, %155
%172 = fadd float %171, %167
%173 = fmul float %35, %156
%174 = fadd float %173, %168
%175 = fmul float %170, 2.000000e+00
%176 = fmul float %172, 2.000000e+00
%177 = fmul float %174, 2.000000e+00
%178 = fadd float %120, %175
%179 = fadd float %121, %176
%180 = fadd float %122, %177
%181 = fadd float %82, 0.000000e+00
%182 = call float @llvm.AMDIL.clamp.(float %70, float 0.000000e+00, float 1.000000e+00)
%183 = call float @llvm.AMDGPU.lrp(float %182, float %178, float %30)
%184 = call float @llvm.AMDGPU.lrp(float %182, float %179, float %31)
%185 = call float @llvm.AMDGPU.lrp(float %182, float %180, float %32)
%186 = call i32 @llvm.SI.packf16(float %183, float %184)
%187 = bitcast i32 %186 to float
%188 = call i32 @llvm.SI.packf16(float %185, float %181)
%189 = bitcast i32 %188 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %187, float %189, float %187, float %189)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
declare void @llvm.AMDGPU.kill(float)
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000
v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001
v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100
v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101
v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200
v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201
v_interp_p1_f32 v5, v0, 3, 0, [m0] ; C8140300
v_interp_p2_f32 v5, [v5], v1, 3, 0, [m0] ; C8150301
v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400
v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401
v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500
v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501
v_interp_p1_f32 v8, v0, 2, 1, [m0] ; C8200600
v_interp_p2_f32 v8, [v8], v1, 2, 1, [m0] ; C8210601
v_interp_p1_f32 v9, v0, 3, 1, [m0] ; C8240700
v_interp_p2_f32 v9, [v9], v1, 3, 1, [m0] ; C8250701
v_interp_p1_f32 v10, v0, 0, 2, [m0] ; C8280800
v_interp_p2_f32 v10, [v10], v1, 0, 2, [m0] ; C8290801
v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900
v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901
v_interp_p1_f32 v12, v0, 2, 2, [m0] ; C8300A00
v_interp_p2_f32 v12, [v12], v1, 2, 2, [m0] ; C8310A01
v_interp_p1_f32 v13, v0, 3, 2, [m0] ; C8340B00
v_interp_p2_f32 v13, [v13], v1, 3, 2, [m0] ; C8350B01
v_interp_p1_f32 v14, v0, 0, 3, [m0] ; C8380C00
v_interp_p2_f32 v14, [v14], v1, 0, 3, [m0] ; C8390C01
v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00
v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01
v_interp_p1_f32 v16, v0, 2, 3, [m0] ; C8400E00
v_interp_p2_f32 v16, [v16], v1, 2, 3, [m0] ; C8410E01
v_interp_p1_f32 v17, v0, 3, 3, [m0] ; C8440F00
v_interp_p2_f32 v17, [v17], v1, 3, 3, [m0] ; C8450F01
v_interp_p1_f32 v18, v0, 0, 4, [m0] ; C8481000
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p2_f32 v18, [v18], v1, 0, 4, [m0] ; C8491001
v_interp_p1_f32 v19, v0, 1, 4, [m0] ; C84C1100
v_interp_p2_f32 v19, [v19], v1, 1, 4, [m0] ; C84D1101
v_interp_p1_f32 v20, v0, 2, 4, [m0] ; C8501200
v_interp_p2_f32 v20, [v20], v1, 2, 4, [m0] ; C8511201
v_interp_p1_f32 v0, v0, 3, 4, [m0] ; C8001300
v_interp_p2_f32 v0, [v0], v1, 3, 4, [m0] ; C8011301
s_waitcnt lgkmcnt(0) ; BF8C007F
image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00441502
image_sample v[1:2], 10, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[24:31], s[12:15] ; F0800A00 00660104
s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124
s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120
s_buffer_load_dword s6, s[0:3], 0x21 ; C2030121
s_buffer_load_dword s7, s[0:3], 0x22 ; C2038122
s_waitcnt vmcnt(0) ; BF8C0770
v_mad_f32 v2, 2.0, v2, -1.0 ; D2820002 03CE04F4
v_mad_f32 v1, 2.0, v1, -1.0 ; D2820001 03CE02F4
v_mul_f32_e32 v3, v1, v1 ; 10060301
v_mac_f32_e32 v3, v2, v2 ; 3E060502
v_add_f32_e64 v3, 0, v3 clamp ; D2060803 00020680
v_sub_f32_e32 v3, 1.0, v3 ; 080606F2
v_sqrt_f32_e32 v3, v3 ; 7E066703
s_waitcnt lgkmcnt(0) ; BF8C007F
v_cmp_gt_f32_e32 vcc, s4, v24 ; 7C083004
v_mul_f32_e32 v4, s5, v21 ; 10082A05
v_mul_f32_e32 v5, s6, v22 ; 100A2C06
v_mul_f32_e32 v21, s7, v23 ; 102A2E07
v_cndmask_b32_e64 v22, 0, -1.0, vcc ; D2000016 01A9E680
v_cmpx_le_f32_e32 vcc, 0, v22 ; 7C262C80
v_mul_f32_e32 v6, v2, v6 ; 100C0D02
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
v_mac_f32_e32 v6, v1, v7 ; 3E0C0F01
v_mul_f32_e32 v7, v2, v10 ; 100E1502
v_mac_f32_e32 v7, v1, v11 ; 3E0E1701
v_mul_f32_e32 v2, v2, v14 ; 10041D02
v_mac_f32_e32 v2, v1, v15 ; 3E041F01
s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104
s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105
s_waitcnt lgkmcnt(0) ; BF8C007F
v_sub_f32_e32 v1, s4, v9 ; 08021204
v_sub_f32_e32 v9, s5, v13 ; 08121A05
v_sub_f32_e32 v10, s6, v17 ; 08142206
v_mul_f32_e32 v11, v1, v1 ; 10160301
v_mac_f32_e32 v11, v9, v9 ; 3E161309
v_mac_f32_e32 v11, v10, v10 ; 3E16150A
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116
s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106
s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108
s_buffer_load_dword s9, s[0:3], 0x9 ; C2048109
s_buffer_load_dword s10, s[0:3], 0xa ; C205010A
s_buffer_load_dword s11, s[0:3], 0xc ; C205810C
s_buffer_load_dword s12, s[0:3], 0xd ; C206010D
s_buffer_load_dword s13, s[0:3], 0xe ; C206810E
s_buffer_load_dword s14, s[0:3], 0x10 ; C2070110
s_buffer_load_dword s15, s[0:3], 0x14 ; C2078114
s_buffer_load_dword s0, s[0:3], 0x15 ; C2000115
v_mad_f32 v1, v1, v11, s7 ; D2820001 001E1701
v_mad_f32 v9, v9, v11, s8 ; D2820009 00221709
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mad_f32 v10, v10, v11, s5 ; D282000A 0016170A
v_mul_f32_e32 v11, v1, v1 ; 10160301
v_mac_f32_e32 v11, v9, v9 ; 3E161309
v_mac_f32_e32 v11, v10, v10 ; 3E16150A
v_rsq_clamp_f32_e32 v11, v11 ; 7E16590B
v_mac_f32_e32 v6, v3, v8 ; 3E0C1103
v_mac_f32_e32 v7, v3, v12 ; 3E0E1903
v_mac_f32_e32 v2, v3, v16 ; 3E042103
v_mul_f32_e32 v1, v11, v1 ; 1002030B
v_mul_f32_e32 v3, v11, v9 ; 1006130B
v_mul_f32_e32 v8, v11, v10 ; 1010150B
v_mul_f32_e32 v1, v1, v6 ; 10020D01
v_mac_f32_e32 v1, v3, v7 ; 3E020F03
v_mac_f32_e32 v1, v8, v2 ; 3E020508
v_max_f32_e32 v1, 0, v1 ; 20020280
v_log_f32_e32 v1, v1 ; 7E024F01
v_mul_f32_e32 v3, s7, v6 ; 10060C07
v_mac_f32_e32 v3, s8, v7 ; 3E060E08
v_mac_f32_e32 v3, s5, v2 ; 3E060405
v_mul_legacy_f32_e32 v1, s14, v1 ; 0E02020E
v_exp_f32_e32 v1, v1 ; 7E024B01
v_mul_f32_e32 v2, s15, v1 ; 1004020F
v_max_f32_e32 v3, 0, v3 ; 20060680
v_mul_f32_e32 v6, s11, v4 ; 100C080B
v_mul_f32_e32 v7, v3, v6 ; 100E0D03
v_mac_f32_e32 v7, s11, v2 ; 3E0E040B
v_mac_f32_e32 v7, v3, v6 ; 3E0E0D03
v_mac_f32_e32 v7, s11, v2 ; 3E0E040B
v_mul_f32_e32 v2, s0, v1 ; 10040200
v_mul_f32_e32 v6, s12, v5 ; 100C0A0C
v_mul_f32_e32 v8, v3, v6 ; 10100D03
v_mac_f32_e32 v8, s12, v2 ; 3E10040C
v_mac_f32_e32 v8, v3, v6 ; 3E100D03
v_mac_f32_e32 v8, s12, v2 ; 3E10040C
v_mul_f32_e32 v1, s4, v1 ; 10020204
v_mul_f32_e32 v2, s13, v21 ; 10042A0D
v_mul_f32_e32 v6, v3, v2 ; 100C0503
v_mac_f32_e32 v6, s13, v1 ; 3E0C020D
v_mac_f32_e32 v6, v3, v2 ; 3E0C0503
v_mac_f32_e32 v6, s13, v1 ; 3E0C020D
v_add_f32_e64 v1, 0, v18 clamp ; D2060801 00022480
v_sub_f32_e32 v2, 1.0, v1 ; 080402F2
v_mul_f32_e32 v3, s6, v2 ; 10060406
v_mul_f32_e32 v9, s9, v2 ; 10120409
v_mul_f32_e32 v2, s10, v2 ; 1004040A
v_add_f32_e32 v10, 0, v24 ; 06143080
v_mac_f32_e32 v7, v19, v4 ; 3E0E0913
v_mac_f32_e32 v8, v20, v5 ; 3E100B14
v_mac_f32_e32 v6, v0, v21 ; 3E0C2B00
v_mac_f32_e32 v3, v7, v1 ; 3E060307
v_mac_f32_e32 v9, v8, v1 ; 3E120308
v_mac_f32_e32 v2, v6, v1 ; 3E040306
v_cvt_pkrtz_f16_f32_e32 v0, v3, v9 ; 5E001303
v_cvt_pkrtz_f16_f32_e32 v1, v2, v10 ; 5E021502
exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 28
Code Size: 676 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL CONST[0..3]
DCL TEMP[0..1], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV TEMP[1].xy, IN[1].xyxx
5: MOV OUT[1], TEMP[1]
6: MOV OUT[0], TEMP[0]
7: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%29 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%30 = load <16 x i8>, <16 x i8> addrspace(2)* %29, align 16, !tbaa !0
%31 = add i32 %5, %7
%32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31)
%33 = extractelement <4 x float> %32, i32 0
%34 = extractelement <4 x float> %32, i32 1
%35 = extractelement <4 x float> %32, i32 2
%36 = extractelement <4 x float> %32, i32 3
%37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !tbaa !0
%39 = add i32 %5, %7
%40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39)
%41 = extractelement <4 x float> %40, i32 0
%42 = extractelement <4 x float> %40, i32 1
%43 = fmul float %13, %33
%44 = fmul float %14, %33
%45 = fmul float %15, %33
%46 = fmul float %16, %33
%47 = fmul float %17, %34
%48 = fadd float %47, %43
%49 = fmul float %18, %34
%50 = fadd float %49, %44
%51 = fmul float %19, %34
%52 = fadd float %51, %45
%53 = fmul float %20, %34
%54 = fadd float %53, %46
%55 = fmul float %21, %35
%56 = fadd float %55, %48
%57 = fmul float %22, %35
%58 = fadd float %57, %50
%59 = fmul float %23, %35
%60 = fadd float %59, %52
%61 = fmul float %24, %35
%62 = fadd float %61, %54
%63 = fmul float %25, %36
%64 = fadd float %63, %56
%65 = fmul float %26, %36
%66 = fadd float %65, %58
%67 = fmul float %27, %36
%68 = fadd float %67, %60
%69 = fmul float %28, %36
%70 = fadd float %69, %62
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %64, float %66, float %68, float %70)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v1, 0 ; 7E020280
s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900
s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s12, s[0:3], 0x0 ; C2060100
s_buffer_load_dword s13, s[0:3], 0x1 ; C2068101
buffer_load_format_xyzw v[2:5], v0, s[4:7], 0 idxen ; E00C2000 80010200
buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600
s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102
s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103
s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104
s_buffer_load_dword s7, s[0:3], 0x5 ; C2038105
s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106
s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107
s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108
s_buffer_load_dword s11, s[0:3], 0x9 ; C2058109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(1) lgkmcnt(0) ; BF8C0071
v_mul_f32_e32 v0, s12, v2 ; 1000040C
s_waitcnt vmcnt(0) ; BF8C0770
v_mac_f32_e32 v0, s6, v3 ; 3E000606
v_mul_f32_e32 v8, s13, v2 ; 1010040D
v_mac_f32_e32 v8, s7, v3 ; 3E100607
v_mul_f32_e32 v9, s4, v2 ; 10120404
v_mac_f32_e32 v9, s8, v3 ; 3E120608
v_mul_f32_e32 v2, s5, v2 ; 10040405
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v0, s10, v4 ; 3E00080A
v_mac_f32_e32 v8, s11, v4 ; 3E10080B
v_mac_f32_e32 v9, s14, v4 ; 3E12080E
v_mac_f32_e32 v2, s15, v4 ; 3E04080F
v_mac_f32_e32 v0, s16, v5 ; 3E000A10
v_mac_f32_e32 v8, s17, v5 ; 3E100A11
v_mac_f32_e32 v9, s18, v5 ; 3E120A12
v_mac_f32_e32 v2, s0, v5 ; 3E040A00
exp 15, 32, 0, 0, 0, v6, v7, v1, v1 ; F800020F 01010706
exp 15, 12, 0, 1, 0, v0, v8, v9, v2 ; F80008CF 02090800
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 12
Code Size: 196 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL CONST[0]
DCL CONST[3..6]
DCL TEMP[0..4], LOCAL
IMM[0] FLT32 { 0.1000, 0.6000, 0.3000, 0.5000}
IMM[1] FLT32 { 3.0000, 2.0000, 1.0000, -0.5000}
IMM[2] FLT32 { 0.0000, 5.0000, 0.0500, 0.7500}
IMM[3] FLT32 { 1.1800, -0.2500, 0.0000, 0.0000}
0: MOV TEMP[0].xy, IN[0].xyyy
1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
2: MUL TEMP[1].x, TEMP[0].xxxx, IMM[0].zzzz
3: MAD TEMP[1].x, TEMP[0].yyyy, IMM[0].yyyy, TEMP[1].xxxx
4: MAD TEMP[1].x, TEMP[0].zzzz, IMM[0].xxxx, TEMP[1].xxxx
5: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].wwww
6: LRP TEMP[0], IMM[0].wwww, TEMP[1].xxxx, TEMP[0]
7: MUL TEMP[2].xyz, CONST[0].xyzz, IMM[1].xxxx
8: MUL TEMP[1].xyz, TEMP[0].xyzz, TEMP[2].xyzz
9: ADD TEMP[2].xy, CONST[3].xyyy, CONST[3].zwww
10: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[0].wwww
11: ADD TEMP[2].xy, IN[0].xyyy, -TEMP[2].xyyy
12: ADD TEMP[3].xy, CONST[3].zwww, -CONST[3].xyyy
13: RCP TEMP[4].x, TEMP[3].xxxx
14: RCP TEMP[4].y, TEMP[3].yyyy
15: MUL TEMP[2].xy, TEMP[2].xyyy, TEMP[4].xyyy
16: MUL TEMP[2].xy, TEMP[2].xyyy, IMM[1].yyyy
17: MOV TEMP[3].xy, TEMP[2].xyxx
18: MUL TEMP[4].x, TEMP[2].xxxx, TEMP[2].xxxx
19: ADD TEMP[4].x, IMM[1].zzzz, -TEMP[4].xxxx
20: MUL TEMP[2].x, TEMP[2].yyyy, TEMP[2].yyyy
21: ADD TEMP[2].x, TEMP[4].xxxx, -TEMP[2].xxxx
22: MOV TEMP[4].xy, IN[0].xyyy
23: TEX TEMP[4].xyz, TEMP[4], SAMP[1], 2D
24: ADD TEMP[4].xy, TEMP[4].xyzz, IMM[1].wwww
25: MOV TEMP[4].xy, TEMP[4].xyxx
26: MOV TEMP[4].z, IMM[2].xxxx
27: SQRT TEMP[2].x, TEMP[2].xxxx
28: MOV TEMP[3].z, TEMP[2].xxxx
29: ADD TEMP[2].xyz, TEMP[3].xyzz, -TEMP[4].xyzz
30: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
31: RSQ TEMP[3].x, TEMP[3].xxxx
32: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
33: MOV TEMP[3].xy, CONST[4].xyxx
34: MOV TEMP[3].z, -CONST[4].zzzz
35: DP3 TEMP[2].x, TEMP[3].xyzz, TEMP[2].xyzz
36: FSLT TEMP[3].x, IMM[2].xxxx, TEMP[2].xxxx
37: UIF TEMP[3].xxxx :0
38: MAD TEMP[3].x, TEMP[2].xxxx, IMM[2].yyyy, IMM[2].zzzz
39: ELSE :0
40: MAD TEMP[3].x, TEMP[2].xxxx, IMM[2].zzzz, IMM[2].zzzz
41: ENDIF
42: MOV_SAT TEMP[2].x, TEMP[2].xxxx
43: MAD TEMP[2].x, TEMP[2].xxxx, IMM[1].yyyy, IMM[2].wwww
44: MUL TEMP[1].xyz, TEMP[1].xyzz, TEMP[2].xxxx
45: MOV_SAT TEMP[2].x, TEMP[3].xxxx
46: MUL TEMP[3].x, CONST[5].xxxx, IMM[0].zzzz
47: MAD TEMP[3].x, CONST[5].yyyy, IMM[0].yyyy, TEMP[3].xxxx
48: MAD TEMP[3].x, CONST[5].zzzz, IMM[0].xxxx, TEMP[3].xxxx
49: ADD TEMP[3].x, IMM[3].xxxx, -TEMP[3].xxxx
50: POW TEMP[3].x, TEMP[3].xxxx, IMM[1].xxxx
51: ADD TEMP[3].x, TEMP[3].xxxx, IMM[3].yyyy
52: MOV_SAT TEMP[3].x, TEMP[3].xxxx
53: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[3].xxxx
54: MUL TEMP[0].x, TEMP[0].wwww, TEMP[2].xxxx
55: MUL TEMP[0].x, TEMP[0].xxxx, CONST[6].xxxx
56: MOV TEMP[1].w, TEMP[0].xxxx
57: MOV OUT[0], TEMP[1]
58: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96)
%38 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%39 = load <32 x i8>, <32 x i8> addrspace(2)* %38, align 32, !tbaa !0
%40 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%41 = load <16 x i8>, <16 x i8> addrspace(2)* %40, align 16, !tbaa !0
%42 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%43 = bitcast <8 x i32> addrspace(2)* %42 to <32 x i8> addrspace(2)*
%44 = load <32 x i8>, <32 x i8> addrspace(2)* %43, align 32, !tbaa !0
%45 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%46 = bitcast <4 x i32> addrspace(2)* %45 to <16 x i8> addrspace(2)*
%47 = load <16 x i8>, <16 x i8> addrspace(2)* %46, align 16, !tbaa !0
%48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%50 = bitcast float %48 to i32
%51 = bitcast float %49 to i32
%52 = insertelement <2 x i32> undef, i32 %50, i32 0
%53 = insertelement <2 x i32> %52, i32 %51, i32 1
%54 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %53, <32 x i8> %39, <16 x i8> %41, i32 2)
%55 = extractelement <4 x float> %54, i32 0
%56 = extractelement <4 x float> %54, i32 1
%57 = extractelement <4 x float> %54, i32 2
%58 = extractelement <4 x float> %54, i32 3
%59 = fmul float %55, 0x3FD3333340000000
%60 = fmul float %56, 0x3FE3333340000000
%61 = fadd float %60, %59
%62 = fmul float %57, 0x3FB99999A0000000
%63 = fadd float %62, %61
%64 = fmul float %63, %58
%65 = call float @llvm.AMDGPU.lrp(float 5.000000e-01, float %64, float %55)
%66 = call float @llvm.AMDGPU.lrp(float 5.000000e-01, float %64, float %56)
%67 = call float @llvm.AMDGPU.lrp(float 5.000000e-01, float %64, float %57)
%68 = call float @llvm.AMDGPU.lrp(float 5.000000e-01, float %64, float %58)
%69 = fmul float %24, 3.000000e+00
%70 = fmul float %25, 3.000000e+00
%71 = fmul float %26, 3.000000e+00
%72 = fmul float %65, %69
%73 = fmul float %66, %70
%74 = fmul float %67, %71
%75 = fadd float %27, %29
%76 = fadd float %28, %30
%77 = fmul float %75, 5.000000e-01
%78 = fmul float %76, 5.000000e-01
%79 = fsub float %48, %77
%80 = fsub float %49, %78
%81 = fsub float %29, %27
%82 = fsub float %30, %28
%83 = fdiv float 1.000000e+00, %81
%84 = fdiv float 1.000000e+00, %82
%85 = fmul float %79, %83
%86 = fmul float %80, %84
%87 = fmul float %85, 2.000000e+00
%88 = fmul float %86, 2.000000e+00
%89 = fmul float %87, %87
%90 = fsub float 1.000000e+00, %89
%91 = fmul float %88, %88
%92 = fsub float %90, %91
%93 = bitcast float %48 to i32
%94 = bitcast float %49 to i32
%95 = insertelement <2 x i32> undef, i32 %93, i32 0
%96 = insertelement <2 x i32> %95, i32 %94, i32 1
%97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %44, <16 x i8> %47, i32 2)
%98 = extractelement <4 x float> %97, i32 0
%99 = extractelement <4 x float> %97, i32 1
%100 = fadd float %98, -5.000000e-01
%101 = fadd float %99, -5.000000e-01
%102 = call float @llvm.sqrt.f32(float %92)
%103 = fsub float %87, %100
%104 = fsub float %88, %101
%105 = fmul float %103, %103
%106 = fmul float %104, %104
%107 = fadd float %106, %105
%108 = fmul float %102, %102
%109 = fadd float %107, %108
%110 = call float @llvm.AMDGPU.rsq.clamped.f32(float %109)
%111 = fmul float %103, %110
%112 = fmul float %104, %110
%113 = fmul float %102, %110
%114 = fmul float %31, %111
%115 = fmul float %32, %112
%116 = fadd float %115, %114
%117 = fmul float %33, %113
%118 = fsub float %116, %117
%119 = fcmp ogt float %118, 0.000000e+00
%.sink.v = select i1 %119, float 5.000000e+00, float 0x3FA99999A0000000
%.sink = fmul float %118, %.sink.v
%120 = fadd float %.sink, 0x3FA99999A0000000
%121 = call float @llvm.AMDIL.clamp.(float %118, float 0.000000e+00, float 1.000000e+00)
%122 = fmul float %121, 2.000000e+00
%123 = fadd float %122, 7.500000e-01
%124 = fmul float %72, %123
%125 = fmul float %73, %123
%126 = fmul float %74, %123
%127 = call float @llvm.AMDIL.clamp.(float %120, float 0.000000e+00, float 1.000000e+00)
%128 = fmul float %34, 0x3FD3333340000000
%129 = fmul float %35, 0x3FE3333340000000
%130 = fadd float %129, %128
%131 = fmul float %36, 0x3FB99999A0000000
%132 = fadd float %131, %130
%133 = fsub float 0x3FF2E147A0000000, %132
%134 = call float @llvm.pow.f32(float %133, float 3.000000e+00)
%135 = fadd float %134, -2.500000e-01
%136 = call float @llvm.AMDIL.clamp.(float %135, float 0.000000e+00, float 1.000000e+00)
%137 = fmul float %127, %136
%138 = fmul float %68, %137
%139 = fmul float %138, %37
%140 = call i32 @llvm.SI.packf16(float %124, float %125)
%141 = bitcast i32 %140 to float
%142 = call i32 @llvm.SI.packf16(float %126, float %139)
%143 = bitcast i32 %142 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %141, float %143, float %141, float %143)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.sqrt.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_mov_b32 m0, s9 ; BEFC0309
v_mov_b32_e32 v2, 0x3e99999a ; 7E0402FF 3E99999A
v_mov_b32_e32 v3, 0x3f19999a ; 7E0602FF 3F19999A
v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD
v_mov_b32_e32 v5, 0x40400000 ; 7E0A02FF 40400000
v_mov_b32_e32 v6, 0x40a00000 ; 7E0C02FF 40A00000
v_mov_b32_e32 v7, 0x3d4ccccd ; 7E0E02FF 3D4CCCCD
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000
v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001
v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100
v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101
s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500
s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504
s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700
s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0xc ; C203810C
s_buffer_load_dword s32, s[0:3], 0xd ; C210010D
s_buffer_load_dword s33, s[0:3], 0xe ; C210810E
s_buffer_load_dword s34, s[0:3], 0xf ; C211010F
image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[16:23], s[8:11] ; F0800F00 00440A08
image_sample v[0:1], 3, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[24:31], s[12:15] ; F0800300 00660008
s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v14, s4, v5 ; 101C0A04
v_mul_f32_e32 v15, s5, v5 ; 101E0A05
v_mul_f32_e32 v16, s6, v5 ; 10200A06
s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111
s_buffer_load_dword s5, s[0:3], 0x12 ; C2028112
s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114
s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115
s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116
s_buffer_load_dword s0, s[0:3], 0x18 ; C2000118
s_waitcnt vmcnt(1) ; BF8C0771
v_mul_f32_e32 v17, v2, v10 ; 10221502
v_mac_f32_e32 v17, v3, v11 ; 3E221703
v_mac_f32_e32 v17, v4, v12 ; 3E221904
v_mul_f32_e32 v17, v13, v17 ; 1022230D
v_mul_f32_e32 v10, 0.5, v10 ; 101414F0
v_mac_f32_e32 v10, 0.5, v17 ; 3E1422F0
v_mul_f32_e32 v10, v14, v10 ; 1014150E
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v2, s6, v2 ; 10040406
v_mac_f32_e32 v2, s9, v3 ; 3E040609
v_mac_f32_e32 v2, s10, v4 ; 3E04080A
v_sub_f32_e32 v2, 0x3f970a3d, v2 ; 080404FF 3F970A3D
v_log_f32_e32 v2, v2 ; 7E044F02
v_mul_f32_e32 v3, 0.5, v11 ; 100616F0
v_mac_f32_e32 v3, 0.5, v17 ; 3E0622F0
v_mul_f32_e32 v3, v15, v3 ; 1006070F
v_mul_legacy_f32_e32 v2, v5, v2 ; 0E040505
v_exp_f32_e32 v2, v2 ; 7E044B02
v_mul_f32_e32 v4, 0.5, v12 ; 100818F0
v_mul_f32_e32 v5, 0.5, v13 ; 100A1AF0
v_mac_f32_e32 v4, 0.5, v17 ; 3E0822F0
v_mac_f32_e32 v5, 0.5, v17 ; 3E0A22F0
v_mul_f32_e32 v4, v16, v4 ; 10080910
v_mov_b32_e32 v11, s7 ; 7E160207
v_sub_f32_e32 v11, s33, v11 ; 08161621
v_mov_b32_e32 v12, s33 ; 7E180221
v_add_f32_e32 v12, s7, v12 ; 06181807
v_mad_f32 v8, 0.5, -v12, v8 ; D2820008 442218F0
v_rcp_f32_e32 v11, v11 ; 7E16550B
v_mov_b32_e32 v12, s32 ; 7E180220
v_sub_f32_e32 v12, s34, v12 ; 08181822
v_mov_b32_e32 v13, s34 ; 7E1A0222
v_add_f32_e32 v13, s32, v13 ; 061A1A20
v_rcp_f32_e32 v12, v12 ; 7E18550C
v_mad_f32 v9, 0.5, -v13, v9 ; D2820009 44261AF0
v_mul_f32_e32 v13, v11, v8 ; 101A110B
v_mad_f32 v8, v11, v8, v13 ; D2820008 0436110B
v_mul_f32_e32 v11, v12, v9 ; 1016130C
v_mad_f32 v9, v12, v9, v11 ; D2820009 042E130C
v_mad_f32 v8, -v8, v8, 1.0 ; D2820008 23CA1108
v_mad_f32 v8, -v9, v9, v8 ; D2820008 24221309
v_add_f32_e32 v0, -0.5, v0 ; 060000F1
v_add_f32_e32 v1, -0.5, v1 ; 060202F1
v_sqrt_f32_e32 v8, v8 ; 7E106708
v_mad_f32 v0, 2.0, v13, -v0 ; D2820000 84021AF4
v_mad_f32 v1, 2.0, v11, -v1 ; D2820001 840616F4
v_mul_f32_e32 v9, v0, v0 ; 10120100
v_mac_f32_e32 v9, v1, v1 ; 3E120301
v_mac_f32_e32 v9, v8, v8 ; 3E121108
v_rsq_clamp_f32_e32 v9, v9 ; 7E125909
v_mul_f32_e32 v0, v9, v0 ; 10000109
v_mul_f32_e32 v1, v9, v1 ; 10020309
v_mul_f32_e32 v8, v9, v8 ; 10101109
v_mul_f32_e32 v0, s8, v0 ; 10000008
v_mac_f32_e32 v0, s4, v1 ; 3E000204
v_mad_f32 v0, -s5, v8, v0 ; D2820000 24021005
v_cmp_lt_f32_e32 vcc, 0, v0 ; 7C020080
v_cndmask_b32_e32 v1, v7, v6 ; 00020D07
v_mac_f32_e32 v7, v1, v0 ; 3E0E0101
v_add_f32_e64 v0, 0, v0 clamp ; D2060800 00020080
v_madak_f32_e32 v0, 2.0, v0, 0x3f400000 ; 420000F4 3F400000
v_mul_f32_e32 v1, v0, v10 ; 10021500
v_mul_f32_e32 v3, v0, v3 ; 10060700
v_mul_f32_e32 v0, v0, v4 ; 10000900
v_add_f32_e64 v4, 0, v7 clamp ; D2060804 00020E80
v_mov_b32_e32 v6, 0xbe800000 ; 7E0C02FF BE800000
v_add_f32_e32 v2, v2, v6 ; 06040D02
v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480
v_mul_f32_e32 v2, v2, v4 ; 10040902
v_mul_f32_e32 v2, v2, v5 ; 10040B02
v_mul_f32_e32 v2, s0, v2 ; 10040400
v_cvt_pkrtz_f16_f32_e32 v1, v1, v3 ; 5E020701
v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500
exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 40
VGPRS: 20
Code Size: 552 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
VERT
DCL IN[0]
DCL IN[1]
DCL IN[2]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
DCL OUT[2], GENERIC[1]
DCL OUT[3], GENERIC[2]
DCL OUT[4], GENERIC[3]
DCL OUT[5], GENERIC[4]
DCL OUT[6], GENERIC[5]
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
DCL CONST[0..15]
DCL CONST[17..20]
DCL TEMP[0..8], LOCAL
IMM[0] FLT32 { 0.2500, -1.0000, 10.0000, 0.4999}
IMM[1] INT32 {256, 0, 1, 2}
IMM[2] FLT32 { 1.0000, 0.0000, 0.1000, 0.0039}
IMM[3] FLT32 { 16.0000, -8.0000, 4.0000, -2.0000}
IMM[4] INT32 {4, 0, 0, 0}
IMM[5] FLT32 { 0.5000, 0.0000, 0.0000, 0.0000}
0: MUL TEMP[0].x, IN[2].xxxx, IMM[0].xxxx
1: F2I TEMP[0].x, TEMP[0].xxxx
2: F2I TEMP[1].x, IN[2].yyyy
3: IDIV TEMP[2].x, TEMP[1].xxxx, IMM[1].xxxx
4: I2F TEMP[3].x, TEMP[0].xxxx
5: I2F TEMP[4].x, TEMP[2].xxxx
6: MOV TEMP[3].y, TEMP[4].xxxx
7: UMUL TEMP[2].x, TEMP[2].xxxx, IMM[1].xxxx
8: INEG TEMP[2].x, TEMP[2].xxxx
9: UADD TEMP[2].x, TEMP[1].xxxx, TEMP[2].xxxx
10: I2F TEMP[2].x, TEMP[2].xxxx
11: MOV TEMP[3].z, TEMP[2].xxxx
12: ADD TEMP[2].xyz, TEMP[3].xyzz, IMM[0].yyyy
13: I2F TEMP[1].x, TEMP[1].xxxx
14: ADD TEMP[1].x, IN[2].yyyy, -TEMP[1].xxxx
15: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww
16: F2I TEMP[1].x, TEMP[1].xxxx
17: USEQ TEMP[4].x, TEMP[1].xxxx, IMM[1].yyyy
18: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].xxxx
19: USEQ TEMP[5].x, TEMP[1].xxxx, IMM[1].zzzz
20: AND TEMP[5].x, TEMP[5].xxxx, IMM[2].xxxx
21: MOV TEMP[4].y, TEMP[5].xxxx
22: USEQ TEMP[1].x, TEMP[1].xxxx, IMM[1].wwww
23: AND TEMP[1].x, TEMP[1].xxxx, IMM[2].xxxx
24: MOV TEMP[4].z, TEMP[1].xxxx
25: MOV TEMP[1].xyz, TEMP[4].xyzx
26: MOV TEMP[4].w, IMM[2].yyyy
27: MOV TEMP[4].xyz, TEMP[3].xyzx
28: MOV TEMP[3].y, IMM[2].yzyy
29: DP4 TEMP[4].x, TEMP[1], TEMP[4]
30: MUL TEMP[3].x, TEMP[4].xxxx, IMM[2].wwww
31: MOV TEMP[3].xy, TEMP[3].xyyy
32: MOV TEMP[3].w, IMM[2].yyyy
33: TXL TEMP[3].xy, TEMP[3], SAMP[0], 2D
34: MAD TEMP[4].x, TEMP[3].xxxx, IMM[3].xxxx, IMM[3].yyyy
35: MOV TEMP[2].w, TEMP[4].xxxx
36: MUL TEMP[3].x, TEMP[3].yyyy, IMM[3].zzzz
37: MOV TEMP[1].w, TEMP[3].xxxx
38: UMUL TEMP[0].x, IMM[4].xxxx, TEMP[0].xxxx
39: I2F TEMP[0].x, TEMP[0].xxxx
40: ADD TEMP[0].x, IN[2].xxxx, -TEMP[0].xxxx
41: ADD TEMP[0].x, TEMP[0].xxxx, IMM[3].wwww
42: MUL TEMP[3], CONST[17], IN[0].xxxx
43: MAD TEMP[3], CONST[18], IN[0].yyyy, TEMP[3]
44: MAD TEMP[3], CONST[19], IN[0].zzzz, TEMP[3]
45: MAD TEMP[3], CONST[20], IN[0].wwww, TEMP[3]
46: MOV TEMP[4].x, CONST[12].xxxx
47: MOV TEMP[4].y, CONST[13].xxxx
48: MOV TEMP[4].z, CONST[14].xxxx
49: MOV TEMP[5].x, CONST[12].yyyy
50: MOV TEMP[5].y, CONST[13].yyyy
51: MOV TEMP[5].z, CONST[14].yyyy
52: MOV TEMP[6].x, CONST[12].zzzz
53: MOV TEMP[6].y, CONST[13].zzzz
54: MOV TEMP[6].z, CONST[14].zzzz
55: MUL TEMP[4].xyz, TEMP[4].xyzz, IN[1].xxxx
56: MAD TEMP[4].xyz, TEMP[5].xyzz, IN[1].yyyy, TEMP[4].xyzz
57: MAD TEMP[0].xyz, TEMP[6].xyzz, TEMP[0].xxxx, TEMP[4].xyzz
58: DP3 TEMP[4].x, TEMP[0].xyzz, TEMP[0].xyzz
59: RSQ TEMP[4].x, TEMP[4].xxxx
60: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
61: MOV TEMP[4].w, IMM[2].xxxx
62: MOV TEMP[4].xyz, TEMP[0].xyzx
63: DP4 TEMP[5].x, CONST[1], TEMP[4]
64: DP4 TEMP[6].x, CONST[2], TEMP[4]
65: MOV TEMP[5].y, TEMP[6].xxxx
66: DP4 TEMP[4].x, CONST[3], TEMP[4]
67: MOV TEMP[5].z, TEMP[4].xxxx
68: MUL TEMP[4], TEMP[0].xyzz, TEMP[0].yzzx
69: DP4 TEMP[6].x, CONST[4], TEMP[4]
70: DP4 TEMP[7].x, CONST[5], TEMP[4]
71: MOV TEMP[6].y, TEMP[7].xxxx
72: DP4 TEMP[4].x, CONST[6], TEMP[4]
73: MOV TEMP[6].z, TEMP[4].xxxx
74: MUL TEMP[4].xyw, TEMP[3], IMM[5].xxxx
75: MOV TEMP[7].x, TEMP[4].xxxx
76: MUL TEMP[8].x, TEMP[4].yyyy, CONST[0].xxxx
77: MOV TEMP[7].y, TEMP[8].xxxx
78: ADD TEMP[4].xy, TEMP[7].xyyy, TEMP[4].wwww
79: MOV TEMP[4].zw, TEMP[3].wwzw
80: MUL TEMP[7].x, TEMP[0].yyyy, TEMP[0].yyyy
81: MAD TEMP[7].x, TEMP[0].xxxx, TEMP[0].xxxx, -TEMP[7].xxxx
82: MAD TEMP[6].xyz, CONST[7].xyzz, TEMP[7].xxxx, TEMP[6].xyzz
83: ADD TEMP[5].xyz, TEMP[6].xyzz, TEMP[5].xyzz
84: MOV TEMP[0].yzw, TEMP[0].yxyz
85: MUL TEMP[6], CONST[8], IN[0].xxxx
86: MAD TEMP[6], CONST[9], IN[0].yyyy, TEMP[6]
87: MAD TEMP[6], CONST[10], IN[0].zzzz, TEMP[6]
88: MAD TEMP[6].xyz, CONST[11], IN[0].wwww, TEMP[6]
89: MOV TEMP[6].xyz, TEMP[6].xyzx
90: MOV TEMP[6].w, TEMP[5].xxxx
91: MOV TEMP[5].xy, TEMP[5].yzyy
92: MOV TEMP[0].x, TEMP[3].zzzz
93: MOV OUT[5], TEMP[6]
94: MOV OUT[1], TEMP[2]
95: MOV OUT[2], TEMP[1]
96: MOV OUT[4], TEMP[0]
97: MOV OUT[3], TEMP[4]
98: MOV OUT[0], TEMP[3]
99: MOV OUT[6], TEMP[5]
100: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 {
main_body:
%11 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%12 = load <16 x i8>, <16 x i8> addrspace(2)* %11, align 16, !tbaa !0
%13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0)
%14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16)
%15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20)
%16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24)
%17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28)
%18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32)
%19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36)
%20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40)
%21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44)
%22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48)
%23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52)
%24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56)
%25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60)
%26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64)
%27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68)
%28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72)
%29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76)
%30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80)
%31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84)
%32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88)
%33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92)
%34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96)
%35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100)
%36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104)
%37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108)
%38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112)
%39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116)
%40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120)
%41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128)
%42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132)
%43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136)
%44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144)
%45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148)
%46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152)
%47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160)
%48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164)
%49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168)
%50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176)
%51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180)
%52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184)
%53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192)
%54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196)
%55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200)
%56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208)
%57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212)
%58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216)
%59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224)
%60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228)
%61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232)
%62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 272)
%63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 276)
%64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 280)
%65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 284)
%66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 288)
%67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 292)
%68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 296)
%69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 300)
%70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 304)
%71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 308)
%72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 312)
%73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 316)
%74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 320)
%75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 324)
%76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 328)
%77 = call float @llvm.SI.load.const(<16 x i8> %12, i32 332)
%78 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%79 = load <32 x i8>, <32 x i8> addrspace(2)* %78, align 32, !tbaa !0
%80 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%81 = load <16 x i8>, <16 x i8> addrspace(2)* %80, align 16, !tbaa !0
%82 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 0
%83 = load <16 x i8>, <16 x i8> addrspace(2)* %82, align 16, !tbaa !0
%84 = add i32 %5, %7
%85 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %83, i32 0, i32 %84)
%86 = extractelement <4 x float> %85, i32 0
%87 = extractelement <4 x float> %85, i32 1
%88 = extractelement <4 x float> %85, i32 2
%89 = extractelement <4 x float> %85, i32 3
%90 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 1
%91 = load <16 x i8>, <16 x i8> addrspace(2)* %90, align 16, !tbaa !0
%92 = add i32 %5, %7
%93 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %91, i32 0, i32 %92)
%94 = extractelement <4 x float> %93, i32 0
%95 = extractelement <4 x float> %93, i32 1
%96 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %4, i64 0, i64 2
%97 = load <16 x i8>, <16 x i8> addrspace(2)* %96, align 16, !tbaa !0
%98 = add i32 %5, %7
%99 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %97, i32 0, i32 %98)
%100 = extractelement <4 x float> %99, i32 0
%101 = extractelement <4 x float> %99, i32 1
%102 = fmul float %100, 2.500000e-01
%103 = fptosi float %102 to i32
%104 = fptosi float %101 to i32
%105 = sdiv i32 %104, 256
%106 = sitofp i32 %103 to float
%107 = sitofp i32 %105 to float
%108 = shl nsw i32 %105, 8
%109 = sub i32 %104, %108
%110 = sitofp i32 %109 to float
%111 = fadd float %106, -1.000000e+00
%112 = fadd float %107, -1.000000e+00
%113 = fadd float %110, -1.000000e+00
%114 = sitofp i32 %104 to float
%115 = fsub float %101, %114
%116 = fmul float %115, 1.000000e+01
%117 = fadd float %116, 0x3FDFFE5CA0000000
%118 = fptosi float %117 to i32
%119 = icmp eq i32 %118, 0
%120 = select i1 %119, float 1.000000e+00, float 0.000000e+00
%121 = icmp eq i32 %118, 1
%122 = select i1 %121, float 1.000000e+00, float 0.000000e+00
%123 = icmp eq i32 %118, 2
%124 = select i1 %123, float 1.000000e+00, float 0.000000e+00
%125 = fmul float %120, %106
%126 = fmul float %122, %107
%127 = fadd float %125, %126
%128 = fmul float %124, %110
%129 = fadd float %127, %128
%130 = fadd float %129, 0.000000e+00
%131 = fmul float %130, 0x3F70101020000000
%132 = bitcast float %131 to i32
%133 = insertelement <4 x i32> undef, i32 %132, i32 0
%134 = insertelement <4 x i32> %133, i32 1036831949, i32 1
%135 = insertelement <4 x i32> %134, i32 0, i32 2
%136 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %135, <32 x i8> %79, <16 x i8> %81, i32 2)
%137 = extractelement <4 x float> %136, i32 0
%138 = extractelement <4 x float> %136, i32 1
%139 = fmul float %137, 1.600000e+01
%140 = fadd float %139, -8.000000e+00
%141 = fmul float %138, 4.000000e+00
%142 = shl i32 %103, 2
%143 = sitofp i32 %142 to float
%144 = fsub float %100, %143
%145 = fadd float %144, -2.000000e+00
%146 = fmul float %62, %86
%147 = fmul float %63, %86
%148 = fmul float %64, %86
%149 = fmul float %65, %86
%150 = fmul float %66, %87
%151 = fadd float %150, %146
%152 = fmul float %67, %87
%153 = fadd float %152, %147
%154 = fmul float %68, %87
%155 = fadd float %154, %148
%156 = fmul float %69, %87
%157 = fadd float %156, %149
%158 = fmul float %70, %88
%159 = fadd float %158, %151
%160 = fmul float %71, %88
%161 = fadd float %160, %153
%162 = fmul float %72, %88
%163 = fadd float %162, %155
%164 = fmul float %73, %88
%165 = fadd float %164, %157
%166 = fmul float %74, %89
%167 = fadd float %166, %159
%168 = fmul float %75, %89
%169 = fadd float %168, %161
%170 = fmul float %76, %89
%171 = fadd float %170, %163
%172 = fmul float %77, %89
%173 = fadd float %172, %165
%174 = fmul float %53, %94
%175 = fmul float %56, %94
%176 = fmul float %59, %94
%177 = fmul float %54, %95
%178 = fadd float %177, %174
%179 = fmul float %57, %95
%180 = fadd float %179, %175
%181 = fmul float %60, %95
%182 = fadd float %181, %176
%183 = fmul float %55, %145
%184 = fadd float %183, %178
%185 = fmul float %58, %145
%186 = fadd float %185, %180
%187 = fmul float %61, %145
%188 = fadd float %187, %182
%189 = fmul float %184, %184
%190 = fmul float %186, %186
%191 = fadd float %190, %189
%192 = fmul float %188, %188
%193 = fadd float %191, %192
%194 = call float @llvm.AMDGPU.rsq.clamped.f32(float %193)
%195 = fmul float %184, %194
%196 = fmul float %186, %194
%197 = fmul float %188, %194
%198 = fmul float %14, %195
%199 = fmul float %15, %196
%200 = fadd float %198, %199
%201 = fmul float %16, %197
%202 = fadd float %200, %201
%203 = fadd float %202, %17
%204 = fmul float %18, %195
%205 = fmul float %19, %196
%206 = fadd float %204, %205
%207 = fmul float %20, %197
%208 = fadd float %206, %207
%209 = fadd float %208, %21
%210 = fmul float %22, %195
%211 = fmul float %23, %196
%212 = fadd float %210, %211
%213 = fmul float %24, %197
%214 = fadd float %212, %213
%215 = fadd float %214, %25
%216 = fmul float %195, %196
%217 = fmul float %196, %197
%218 = fmul float %197, %197
%219 = fmul float %197, %195
%220 = fmul float %26, %216
%221 = fmul float %27, %217
%222 = fadd float %220, %221
%223 = fmul float %28, %218
%224 = fadd float %222, %223
%225 = fmul float %29, %219
%226 = fadd float %224, %225
%227 = fmul float %30, %216
%228 = fmul float %31, %217
%229 = fadd float %227, %228
%230 = fmul float %32, %218
%231 = fadd float %229, %230
%232 = fmul float %33, %219
%233 = fadd float %231, %232
%234 = fmul float %34, %216
%235 = fmul float %35, %217
%236 = fadd float %234, %235
%237 = fmul float %36, %218
%238 = fadd float %236, %237
%239 = fmul float %37, %219
%240 = fadd float %238, %239
%241 = fmul float %167, 5.000000e-01
%242 = fmul float %169, 5.000000e-01
%243 = fmul float %173, 5.000000e-01
%244 = fmul float %242, %13
%245 = fadd float %241, %243
%246 = fadd float %244, %243
%247 = fmul float %196, %196
%248 = fmul float %195, %195
%249 = fsub float %248, %247
%250 = fmul float %38, %249
%251 = fadd float %250, %226
%252 = fmul float %39, %249
%253 = fadd float %252, %233
%254 = fmul float %40, %249
%255 = fadd float %254, %240
%256 = fadd float %251, %203
%257 = fadd float %253, %209
%258 = fadd float %255, %215
%259 = fmul float %41, %86
%260 = fmul float %42, %86
%261 = fmul float %43, %86
%262 = fmul float %44, %87
%263 = fadd float %262, %259
%264 = fmul float %45, %87
%265 = fadd float %264, %260
%266 = fmul float %46, %87
%267 = fadd float %266, %261
%268 = fmul float %47, %88
%269 = fadd float %268, %263
%270 = fmul float %48, %88
%271 = fadd float %270, %265
%272 = fmul float %49, %88
%273 = fadd float %272, %267
%274 = fmul float %50, %89
%275 = fadd float %274, %269
%276 = fmul float %51, %89
%277 = fadd float %276, %271
%278 = fmul float %52, %89
%279 = fadd float %278, %273
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %111, float %112, float %113, float %140)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %120, float %122, float %124, float %141)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %245, float %246, float %171, float %173)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %171, float %195, float %196, float %197)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %275, float %277, float %279, float %256)
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %257, float %258, float %258, float 0.000000e+00)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %167, float %169, float %171, float %173)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
!0 = !{!"const", null, i32 1}
Shader Disassembly:
v_mov_b32_e32 v1, 0x3efff2e5 ; 7E0202FF 3EFFF2E5
v_mov_b32_e32 v2, 0xc1000000 ; 7E0402FF C1000000
v_mov_b32_e32 v5, 0 ; 7E0A0280
v_add_i32_e32 v0, s10, v0 ; 4A00000A
s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300
s_load_dwordx4 s[16:19], s[4:5], 0x0 ; C0880500
s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900
s_load_dwordx4 s[20:23], s[8:9], 0x4 ; C08A0904
s_load_dwordx4 s[24:27], s[8:9], 0x8 ; C08C0908
s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s39, s[12:15], 0x30 ; C2138D30
s_buffer_load_dword s40, s[12:15], 0x31 ; C2140D31
buffer_load_format_xyzw v[6:9], v0, s[0:3], 0 idxen ; E00C2000 80000600
buffer_load_format_xyzw v[10:13], v0, s[20:23], 0 idxen ; E00C2000 80050A00
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
buffer_load_format_xyzw v[12:15], v0, s[24:27], 0 idxen ; E00C2000 80060C00
s_buffer_load_dword s1, s[12:15], 0x32 ; C2008D32
s_buffer_load_dword s41, s[12:15], 0x34 ; C2148D34
s_buffer_load_dword s42, s[12:15], 0x35 ; C2150D35
s_buffer_load_dword s2, s[12:15], 0x36 ; C2010D36
s_buffer_load_dword s43, s[12:15], 0x38 ; C2158D38
s_buffer_load_dword s44, s[12:15], 0x39 ; C2160D39
s_buffer_load_dword s3, s[12:15], 0x3a ; C2018D3A
s_buffer_load_dword s45, s[12:15], 0x44 ; C2168D44
s_buffer_load_dword s46, s[12:15], 0x45 ; C2170D45
s_buffer_load_dword s47, s[12:15], 0x46 ; C2178D46
s_buffer_load_dword s48, s[12:15], 0x47 ; C2180D47
s_buffer_load_dword s49, s[12:15], 0x48 ; C2188D48
s_buffer_load_dword s50, s[12:15], 0x49 ; C2190D49
s_buffer_load_dword s51, s[12:15], 0x4a ; C2198D4A
s_buffer_load_dword s52, s[12:15], 0x4b ; C21A0D4B
s_buffer_load_dword s28, s[12:15], 0x4c ; C20E0D4C
s_buffer_load_dword s30, s[12:15], 0x4d ; C20F0D4D
s_buffer_load_dword s29, s[12:15], 0x4e ; C20E8D4E
s_buffer_load_dword s0, s[12:15], 0xd ; C2000D0D
s_buffer_load_dword s31, s[12:15], 0x4f ; C20F8D4F
s_buffer_load_dword s26, s[12:15], 0x50 ; C20D0D50
s_buffer_load_dword s23, s[12:15], 0x51 ; C20B8D51
s_buffer_load_dword s24, s[12:15], 0x52 ; C20C0D52
s_buffer_load_dword s22, s[12:15], 0x53 ; C20B0D53
s_buffer_load_dword s35, s[12:15], 0x22 ; C2118D22
s_buffer_load_dword s37, s[12:15], 0x24 ; C2128D24
s_buffer_load_dword s36, s[12:15], 0x25 ; C2120D25
s_buffer_load_dword s34, s[12:15], 0x26 ; C2110D26
s_buffer_load_dword s33, s[12:15], 0x28 ; C2108D28
s_buffer_load_dword s32, s[12:15], 0x29 ; C2100D29
s_buffer_load_dword s27, s[12:15], 0x2a ; C20D8D2A
s_buffer_load_dword s25, s[12:15], 0x2c ; C20C8D2C
s_buffer_load_dword s21, s[12:15], 0x2d ; C20A8D2D
s_buffer_load_dword s20, s[12:15], 0x2e ; C20A0D2E
s_buffer_load_dword s38, s[12:15], 0x20 ; C2130D20
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s45, v6 ; 10000C2D
v_mul_f32_e32 v14, s46, v6 ; 101C0C2E
v_mul_f32_e32 v15, s39, v10 ; 101E1427
v_mul_f32_e32 v16, s41, v10 ; 10201429
v_mul_f32_e32 v10, s43, v10 ; 1014142B
v_mac_f32_e32 v0, s49, v7 ; 3E000E31
v_mac_f32_e32 v14, s50, v7 ; 3E1C0E32
s_buffer_load_dword s39, s[12:15], 0x21 ; C2138D21
v_mac_f32_e32 v15, s40, v11 ; 3E1E1628
v_mac_f32_e32 v16, s42, v11 ; 3E20162A
v_mac_f32_e32 v10, s44, v11 ; 3E14162C
v_mul_f32_e32 v11, s47, v6 ; 10160C2F
v_mac_f32_e32 v11, s51, v7 ; 3E160E33
v_mul_f32_e32 v17, s48, v6 ; 10220C30
v_mac_f32_e32 v17, s52, v7 ; 3E220E34
v_mul_f32_e32 v18, s38, v6 ; 10240C26
v_mac_f32_e32 v18, s37, v7 ; 3E240E25
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v19, s39, v6 ; 10260C27
v_mac_f32_e32 v19, s36, v7 ; 3E260E24
v_mul_f32_e32 v6, s35, v6 ; 100C0C23
v_mac_f32_e32 v6, s34, v7 ; 3E0C0E22
v_mac_f32_e32 v0, s28, v8 ; 3E00101C
v_mac_f32_e32 v14, s30, v8 ; 3E1C101E
v_mac_f32_e32 v11, s29, v8 ; 3E16101D
v_mac_f32_e32 v17, s31, v8 ; 3E22101F
v_mac_f32_e32 v18, s33, v8 ; 3E241021
v_mac_f32_e32 v19, s32, v8 ; 3E261020
v_mac_f32_e32 v6, s27, v8 ; 3E0C101B
v_mac_f32_e32 v0, s26, v9 ; 3E00121A
v_mac_f32_e32 v14, s23, v9 ; 3E1C1217
v_mac_f32_e32 v11, s24, v9 ; 3E161218
v_mul_f32_e32 v3, 0x3e800000, v12 ; 100618FF 3E800000
v_cvt_i32_f32_e32 v4, v13 ; 7E08110D
v_cvt_i32_f32_e32 v3, v3 ; 7E061103
v_mac_f32_e32 v17, s22, v9 ; 3E221216
v_mac_f32_e32 v18, s25, v9 ; 3E241219
v_cvt_f32_i32_e32 v7, v4 ; 7E0E0B04
v_lshlrev_b32_e32 v8, 2, v3 ; 34100682
v_cvt_f32_i32_e32 v8, v8 ; 7E100B08
v_mac_f32_e32 v19, s21, v9 ; 3E261215
v_mac_f32_e32 v6, s20, v9 ; 3E0C1214
v_subrev_f32_e32 v7, v7, v13 ; 0A0E1B07
v_subrev_f32_e32 v8, v8, v12 ; 0A101908
v_madmk_f32_e32 v1, v7, v1, 0x41200000 ; 40020307 41200000
v_ashrrev_i32_e32 v7, 31, v4 ; 300E089F
v_lshrrev_b32_e32 v7, 24, v7 ; 2C0E0E98
v_cvt_i32_f32_e32 v1, v1 ; 7E021101
v_add_i32_e32 v7, v4, v7 ; 4A0E0F04
v_and_b32_e32 v9, 0xffffff00, v7 ; 36120EFF FFFFFF00
v_sub_i32_e32 v4, v4, v9 ; 4C081304
v_cmp_eq_i32_e32 vcc, 0, v1 ; 7D040280
v_cndmask_b32_e64 v9, 0, 1.0, vcc ; D2000009 01A9E480
v_cmp_eq_i32_e32 vcc, 1, v1 ; 7D040281
v_cndmask_b32_e64 v12, 0, 1.0, vcc ; D200000C 01A9E480
v_ashrrev_i32_e32 v7, 8, v7 ; 300E0E88
v_cvt_f32_i32_e32 v7, v7 ; 7E0E0B07
v_cvt_f32_i32_e32 v13, v3 ; 7E1A0B03
v_cvt_f32_i32_e32 v4, v4 ; 7E080B04
v_cmp_eq_i32_e32 vcc, 2, v1 ; 7D040282
v_mul_f32_e32 v1, v7, v12 ; 10021907
v_mac_f32_e32 v1, v13, v9 ; 3E02130D
v_cndmask_b32_e64 v20, 0, 1.0, vcc ; D2000014 01A9E480
v_mac_f32_e32 v1, v4, v20 ; 3E022904
v_add_f32_e32 v1, 0, v1 ; 06020280
v_mul_f32_e32 v3, 0x3b808081, v1 ; 100602FF 3B808081
v_add_f32_e32 v1, -1.0, v13 ; 06021AF3
v_add_f32_e32 v7, -1.0, v7 ; 060E0EF3
v_add_f32_e32 v13, -1.0, v4 ; 061A08F3
v_mov_b32_e32 v4, 0x3dcccccd ; 7E0802FF 3DCCCCCD
image_sample_l v[3:4], 3, 0, 0, 0, 0, 0, 0, 0, v[3:6], s[4:11], s[16:19] ; F0900300 00810303
s_waitcnt vmcnt(0) ; BF8C0770
v_madmk_f32_e32 v2, v3, v2, 0x41800000 ; 40040503 41800000
exp 15, 32, 0, 0, 0, v1, v7, v13, v2 ; F800020F 020D0701
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v1, 4.0, v4 ; 100208F6
exp 15, 33, 0, 0, 0, v9, v12, v20, v1 ; F800021F 01140C09
s_buffer_load_dword s4, s[12:15], 0x1c ; C2020D1C
s_buffer_load_dword s5, s[12:15], 0x1d ; C2028D1D
s_buffer_load_dword s6, s[12:15], 0x1e ; C2030D1E
s_buffer_load_dword s7, s[12:15], 0x0 ; C2038D00
s_buffer_load_dword s8, s[12:15], 0x4 ; C2040D04
s_buffer_load_dword s9, s[12:15], 0x5 ; C2048D05
s_buffer_load_dword s10, s[12:15], 0x6 ; C2050D06
s_buffer_load_dword s11, s[12:15], 0x7 ; C2058D07
s_buffer_load_dword s16, s[12:15], 0x8 ; C2080D08
s_buffer_load_dword s17, s[12:15], 0x9 ; C2088D09
s_buffer_load_dword s18, s[12:15], 0xa ; C2090D0A
s_buffer_load_dword s19, s[12:15], 0xb ; C2098D0B
s_buffer_load_dword s20, s[12:15], 0xc ; C20A0D0C
s_buffer_load_dword s21, s[12:15], 0xe ; C20A8D0E
s_buffer_load_dword s22, s[12:15], 0xf ; C20B0D0F
s_buffer_load_dword s23, s[12:15], 0x10 ; C20B8D10
s_buffer_load_dword s24, s[12:15], 0x11 ; C20C0D11
s_buffer_load_dword s25, s[12:15], 0x12 ; C20C8D12
s_buffer_load_dword s26, s[12:15], 0x13 ; C20D0D13
s_buffer_load_dword s27, s[12:15], 0x14 ; C20D8D14
s_buffer_load_dword s28, s[12:15], 0x15 ; C20E0D15
s_buffer_load_dword s29, s[12:15], 0x16 ; C20E8D16
s_buffer_load_dword s30, s[12:15], 0x17 ; C20F0D17
s_buffer_load_dword s31, s[12:15], 0x18 ; C20F8D18
s_buffer_load_dword s32, s[12:15], 0x19 ; C2100D19
s_buffer_load_dword s33, s[12:15], 0x1a ; C2108D1A
s_buffer_load_dword s12, s[12:15], 0x1b ; C2060D1B
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v1, -2.0, v8 ; 060210F5
v_mac_f32_e32 v15, s1, v1 ; 3E1E0201
v_mac_f32_e32 v16, s2, v1 ; 3E200202
v_mac_f32_e32 v10, s3, v1 ; 3E140203
v_mul_f32_e32 v1, v15, v15 ; 10021F0F
v_mac_f32_e32 v1, v16, v16 ; 3E022110
v_mac_f32_e32 v1, v10, v10 ; 3E02150A
v_rsq_clamp_f32_e32 v1, v1 ; 7E025901
v_mul_f32_e32 v2, 0.5, v14 ; 10041CF0
v_mul_f32_e32 v3, 0.5, v17 ; 100622F0
v_mad_f32 v4, 0.5, v0, v3 ; D2820004 040E00F0
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mac_f32_e32 v3, s7, v2 ; 3E060407
v_mul_f32_e32 v2, v1, v15 ; 10041F01
v_mul_f32_e32 v7, v1, v16 ; 100E2101
v_mul_f32_e32 v1, v1, v10 ; 10021501
v_mul_f32_e32 v8, v1, v7 ; 10100F01
v_mul_f32_e32 v9, s24, v8 ; 10121018
v_mul_f32_e32 v10, s28, v8 ; 1014101C
v_mul_f32_e32 v8, s32, v8 ; 10101020
v_mul_f32_e32 v12, v7, v2 ; 10180507
v_mac_f32_e32 v9, s23, v12 ; 3E121817
v_mac_f32_e32 v10, s27, v12 ; 3E14181B
v_mac_f32_e32 v8, s31, v12 ; 3E10181F
v_mul_f32_e32 v12, v1, v1 ; 10180301
v_mac_f32_e32 v9, s25, v12 ; 3E121819
v_mac_f32_e32 v10, s29, v12 ; 3E14181D
v_mac_f32_e32 v8, s33, v12 ; 3E101821
v_mul_f32_e32 v12, v2, v1 ; 10180302
v_mac_f32_e32 v9, s26, v12 ; 3E12181A
v_mac_f32_e32 v10, s30, v12 ; 3E14181E
v_mac_f32_e32 v8, s12, v12 ; 3E10180C
exp 15, 34, 0, 0, 0, v4, v3, v11, v17 ; F800022F 110B0304
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v3, s9, v7 ; 10060E09
v_mac_f32_e32 v3, s8, v2 ; 3E060408
v_mul_f32_e32 v4, s17, v7 ; 10080E11
v_mac_f32_e32 v4, s16, v2 ; 3E080410
v_mul_f32_e32 v12, s0, v7 ; 10180E00
v_mac_f32_e32 v12, s20, v2 ; 3E180414
v_mac_f32_e32 v3, s10, v1 ; 3E06020A
v_mac_f32_e32 v4, s18, v1 ; 3E080212
v_mac_f32_e32 v12, s21, v1 ; 3E180215
exp 15, 35, 0, 0, 0, v11, v2, v7, v1 ; F800023F 0107020B
s_waitcnt expcnt(0) ; BF8C070F
v_mul_f32_e32 v1, v7, v7 ; 10020F07
v_mad_f32 v1, v2, v2, -v1 ; D2820001 84060502
v_mac_f32_e32 v9, s4, v1 ; 3E120204
v_mac_f32_e32 v10, s5, v1 ; 3E140205
v_mac_f32_e32 v8, s6, v1 ; 3E100206
v_add_f32_e32 v1, s11, v3 ; 0602060B
v_add_f32_e32 v1, v1, v9 ; 06021301
exp 15, 36, 0, 0, 0, v18, v19, v6, v1 ; F800024F 01061312
s_waitcnt expcnt(0) ; BF8C070F
v_add_f32_e32 v1, s19, v4 ; 06020813
v_add_f32_e32 v1, v1, v10 ; 06021501
v_add_f32_e32 v2, s22, v12 ; 06041816
v_add_f32_e32 v2, v2, v8 ; 06041102
exp 15, 37, 0, 0, 0, v1, v2, v2, v5 ; F800025F 05020201
exp 15, 12, 0, 1, 0, v0, v14, v11, v17 ; F80008CF 110B0E00
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 56
VGPRS: 24
Code Size: 972 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
********************
SHADER KEY
export_16bpc = 0x3
last_cbuf = 0
color_two_side = 0
alpha_func = 7
alpha_to_one = 0
poly_stipple = 0
FRAG
DCL IN[0], GENERIC[0], PERSPECTIVE
DCL IN[1], GENERIC[1], PERSPECTIVE
DCL IN[2], GENERIC[2], PERSPECTIVE
DCL IN[3], GENERIC[3], PERSPECTIVE
DCL IN[4], GENERIC[4], PERSPECTIVE
DCL IN[5], GENERIC[5], PERSPECTIVE
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SAMP[1]
DCL SAMP[2]
DCL SAMP[3]
DCL SAMP[4]
DCL SAMP[5]
DCL SAMP[6]
DCL SAMP[7]
DCL SAMP[8]
DCL SAMP[9]
DCL SAMP[10]
DCL SVIEW[0], 2D, FLOAT
DCL SVIEW[1], 2D, FLOAT
DCL SVIEW[2], 2D, FLOAT
DCL SVIEW[3], 2D, FLOAT
DCL SVIEW[4], 2D, FLOAT
DCL SVIEW[5], 2D, FLOAT
DCL SVIEW[6], 2D, FLOAT
DCL SVIEW[7], 2D, FLOAT
DCL SVIEW[8], 2D, FLOAT
DCL SVIEW[9], 2D, FLOAT
DCL SVIEW[10], 2D, FLOAT
DCL CONST[0..4]
DCL CONST[16..24]
DCL TEMP[0..37], LOCAL
IMM[0] FLT32 { -0.2000, 7.0000, 0.0100, 0.5000}
IMM[1] FLT32 { 64.0000, -64.0000, 4.0000, 0.6931}
IMM[2] FLT32 { 0.0039, 0.0020, 1.0000, 2.0000}
IMM[3] FLT32 { 3.0000, 0.0000, -1.0000, 0.0001}
IMM[4] FLT32 { 32.0000, 1.0000, 0.0000, 0.0000}
0: MOV TEMP[0].x, IN[4].wwww
1: MOV TEMP[0].yz, IN[5].yxyy
2: DP3 TEMP[1].x, CONST[1].xyzz, CONST[1].xyzz
3: RSQ TEMP[1].x, TEMP[1].xxxx
4: MUL TEMP[1].xyz, CONST[1].xyzz, TEMP[1].xxxx
5: ADD TEMP[2].xyz, CONST[0].xyzz, -IN[4].xyzz
6: DP3 TEMP[3].x, TEMP[2].xyzz, TEMP[2].xyzz
7: RSQ TEMP[3].x, TEMP[3].xxxx
8: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[3].xxxx
9: ABS TEMP[3].xyz, IN[3].yzww
10: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
11: RSQ TEMP[4].x, TEMP[4].xxxx
12: MAD TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx, IMM[0].xxxx
13: MUL TEMP[3].xyz, TEMP[3].xyzz, IMM[0].yyyy
14: MAX TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz
15: ADD TEMP[4].x, TEMP[3].xxxx, TEMP[3].yyyy
16: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[3].zzzz
17: RCP TEMP[4].xyz, TEMP[4].xxxx
18: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xyzz
19: ADD TEMP[4], IN[0], IMM[0].wwww
20: FLR TEMP[4].xyz, TEMP[4]
21: MOV TEMP[5].x, CONST[16].xxxx
22: MUL TEMP[6].x, TEMP[4].xxxx, CONST[16].xxxx
23: MOV TEMP[7].x, TEMP[6].xxxx
24: FLR TEMP[6].x, TEMP[6].xxxx
25: MUL TEMP[6].x, TEMP[6].xxxx, CONST[16].xxxx
26: FSGE TEMP[8].x, TEMP[4].xxxx, IMM[1].xxxx
27: UIF TEMP[8].xxxx :0
28: MOV TEMP[5].x, CONST[17].xxxx
29: ADD TEMP[8].x, TEMP[4].xxxx, IMM[1].yyyy
30: MUL TEMP[8].x, TEMP[8].xxxx, CONST[17].xxxx
31: MOV TEMP[7].x, TEMP[8].xxxx
32: FLR TEMP[9].x, TEMP[8].xxxx
33: MUL TEMP[9].x, TEMP[9].xxxx, CONST[17].xxxx
34: MOV TEMP[6].x, TEMP[9].xxxx
35: FRC TEMP[8].x, TEMP[8].xxxx
36: FRC TEMP[10].x, TEMP[9].xxxx
37: MOV TEMP[8].y, TEMP[10].xxxx
38: FLR TEMP[9].x, TEMP[9].xxxx
39: ADD TEMP[9].x, TEMP[9].xxxx, IMM[1].zzzz
40: MOV TEMP[8].z, TEMP[9].xxxx
41: MOV TEMP[8].xyz, TEMP[8].xyzx
42: ELSE :0
43: FRC TEMP[7].x, TEMP[7].xxxx
44: FRC TEMP[9].x, TEMP[6].xxxx
45: MOV TEMP[7].y, TEMP[9].xxxx
46: FLR TEMP[6].x, TEMP[6].xxxx
47: MOV TEMP[7].z, TEMP[6].xxxx
48: MOV TEMP[8].xyz, TEMP[7].xyzx
49: ENDIF
50: MOV TEMP[6].x, CONST[16].xxxx
51: MUL TEMP[7].x, TEMP[4].yyyy, CONST[16].xxxx
52: MOV TEMP[9].x, TEMP[7].xxxx
53: FLR TEMP[7].x, TEMP[7].xxxx
54: MUL TEMP[7].x, TEMP[7].xxxx, CONST[16].xxxx
55: FSGE TEMP[10].x, TEMP[4].yyyy, IMM[1].xxxx
56: UIF TEMP[10].xxxx :0
57: MOV TEMP[6].x, CONST[17].xxxx
58: ADD TEMP[10].x, TEMP[4].yyyy, IMM[1].yyyy
59: MUL TEMP[10].x, TEMP[10].xxxx, CONST[17].xxxx
60: MOV TEMP[9].x, TEMP[10].xxxx
61: FLR TEMP[11].x, TEMP[10].xxxx
62: MUL TEMP[11].x, TEMP[11].xxxx, CONST[17].xxxx
63: MOV TEMP[7].x, TEMP[11].xxxx
64: FRC TEMP[10].x, TEMP[10].xxxx
65: FRC TEMP[12].x, TEMP[11].xxxx
66: MOV TEMP[10].y, TEMP[12].xxxx
67: FLR TEMP[11].x, TEMP[11].xxxx
68: ADD TEMP[11].x, TEMP[11].xxxx, IMM[1].zzzz
69: MOV TEMP[10].z, TEMP[11].xxxx
70: MOV TEMP[10].xyz, TEMP[10].xyzx
71: ELSE :0
72: FRC TEMP[9].x, TEMP[9].xxxx
73: FRC TEMP[11].x, TEMP[7].xxxx
74: MOV TEMP[9].y, TEMP[11].xxxx
75: FLR TEMP[7].x, TEMP[7].xxxx
76: MOV TEMP[9].z, TEMP[7].xxxx
77: MOV TEMP[10].xyz, TEMP[9].xyzx
78: ENDIF
79: MOV TEMP[7].x, CONST[16].xxxx
80: MUL TEMP[9].x, TEMP[4].zzzz, CONST[16].xxxx
81: MOV TEMP[11].x, TEMP[9].xxxx
82: FLR TEMP[9].x, TEMP[9].xxxx
83: MUL TEMP[9].x, TEMP[9].xxxx, CONST[16].xxxx
84: FSGE TEMP[12].x, TEMP[4].zzzz, IMM[1].xxxx
85: UIF TEMP[12].xxxx :0
86: MOV TEMP[7].x, CONST[17].xxxx
87: ADD TEMP[4].x, TEMP[4].zzzz, IMM[1].yyyy
88: MUL TEMP[4].x, TEMP[4].xxxx, CONST[17].xxxx
89: MOV TEMP[11].x, TEMP[4].xxxx
90: FLR TEMP[12].x, TEMP[4].xxxx
91: MUL TEMP[12].x, TEMP[12].xxxx, CONST[17].xxxx
92: MOV TEMP[9].x, TEMP[12].xxxx
93: FRC TEMP[4].x, TEMP[4].xxxx
94: FRC TEMP[13].x, TEMP[12].xxxx
95: MOV TEMP[4].y, TEMP[13].xxxx
96: FLR TEMP[12].x, TEMP[12].xxxx
97: ADD TEMP[12].x, TEMP[12].xxxx, IMM[1].zzzz
98: MOV TEMP[4].z, TEMP[12].xxxx
99: MOV TEMP[4].xyz, TEMP[4].xyzx
100: ELSE :0
101: FRC TEMP[11].x, TEMP[11].xxxx
102: FRC TEMP[12].x, TEMP[9].xxxx
103: MOV TEMP[11].y, TEMP[12].xxxx
104: FLR TEMP[9].x, TEMP[9].xxxx
105: MOV TEMP[11].z, TEMP[9].xxxx
106: MOV TEMP[4].xyz, TEMP[11].xyzx
107: ENDIF
108: ADD TEMP[9].xyz, IN[4].xyzz, -CONST[0].xyzz
109: DP3 TEMP[9].x, TEMP[9].xyzz, TEMP[9].xyzz
110: MUL TEMP[9].x, CONST[22].xxxx, TEMP[9].xxxx
111: LG2 TEMP[9].x, TEMP[9].xxxx
112: MUL TEMP[9].x, TEMP[9].xxxx, IMM[1].wwww
113: MUL TEMP[9].x, TEMP[9].xxxx, CONST[21].xxxx
114: MOV TEMP[11].xy, IN[4].xyxx
115: MOV TEMP[12].x, IMM[2].xxxx
116: FSNE TEMP[13].x, CONST[16].xxxx, TEMP[5].xxxx
117: UIF TEMP[13].xxxx :0
118: MOV TEMP[12].x, IMM[2].yyyy
119: RCP TEMP[13].x, CONST[19].xxxx
120: MUL TEMP[11].xy, IN[4].xyyy, TEMP[13].xxxx
121: ELSE :0
122: RCP TEMP[13].x, CONST[18].xxxx
123: MUL TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx
124: ENDIF
125: FRC TEMP[11].xy, TEMP[11].xyyy
126: MUL TEMP[13].x, CONST[20].xxxx, IMM[2].wwww
127: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
128: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
129: MUL TEMP[12].x, TEMP[12].xxxx, CONST[20].xxxx
130: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
131: MAD TEMP[11].xy, TEMP[11].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
132: MOV TEMP[12].xy, TEMP[11].xyyy
133: MOV TEMP[12].w, TEMP[9].xxxx
134: TXL TEMP[12], TEMP[12], SAMP[9], 2D
135: FSEQ TEMP[13].x, TEMP[8].zzzz, IMM[1].zzzz
136: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
137: MOV TEMP[14].xy, TEMP[11].xyyy
138: MOV TEMP[14].w, TEMP[9].xxxx
139: TXL TEMP[14], TEMP[14], SAMP[7], 2D
140: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[3].xxxx
141: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
142: MOV TEMP[16].xy, TEMP[11].xyyy
143: MOV TEMP[16].w, TEMP[9].xxxx
144: TXL TEMP[16], TEMP[16], SAMP[5], 2D
145: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[2].wwww
146: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
147: MOV TEMP[18].xy, TEMP[11].xyyy
148: MOV TEMP[18].w, TEMP[9].xxxx
149: TXL TEMP[18], TEMP[18], SAMP[3], 2D
150: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].zzzz
151: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
152: MOV TEMP[11].xy, TEMP[11].xyyy
153: MOV TEMP[11].w, TEMP[9].xxxx
154: TXL TEMP[11], TEMP[11], SAMP[1], 2D
155: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[3].yyyy
156: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
157: MUL TEMP[11], TEMP[11], TEMP[20].xxxx
158: MAD TEMP[11], TEMP[18], TEMP[19].xxxx, TEMP[11]
159: MAD TEMP[11], TEMP[16], TEMP[17].xxxx, TEMP[11]
160: MAD TEMP[11], TEMP[14], TEMP[15].xxxx, TEMP[11]
161: MAD TEMP[11], TEMP[12], TEMP[13].xxxx, TEMP[11]
162: MOV TEMP[12].xy, IN[4].zyzz
163: MOV TEMP[13].x, IMM[2].xxxx
164: FSNE TEMP[14].x, CONST[16].xxxx, TEMP[5].xxxx
165: UIF TEMP[14].xxxx :0
166: MOV TEMP[13].x, IMM[2].yyyy
167: RCP TEMP[14].x, CONST[19].xxxx
168: MUL TEMP[12].xy, IN[4].zyyy, TEMP[14].xxxx
169: ELSE :0
170: RCP TEMP[14].x, CONST[18].xxxx
171: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
172: ENDIF
173: FRC TEMP[12].xy, TEMP[12].xyyy
174: MUL TEMP[14].x, CONST[20].xxxx, IMM[2].wwww
175: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
176: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
177: MUL TEMP[13].x, TEMP[13].xxxx, CONST[20].xxxx
178: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
179: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
180: MOV TEMP[13].xy, TEMP[12].xyyy
181: MOV TEMP[13].w, TEMP[9].xxxx
182: TXL TEMP[13], TEMP[13], SAMP[9], 2D
183: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[1].zzzz
184: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
185: MOV TEMP[15].xy, TEMP[12].xyyy
186: MOV TEMP[15].w, TEMP[9].xxxx
187: TXL TEMP[15], TEMP[15], SAMP[7], 2D
188: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[3].xxxx
189: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
190: MOV TEMP[17].xy, TEMP[12].xyyy
191: MOV TEMP[17].w, TEMP[9].xxxx
192: TXL TEMP[17], TEMP[17], SAMP[5], 2D
193: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].wwww
194: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
195: MOV TEMP[19].xy, TEMP[12].xyyy
196: MOV TEMP[19].w, TEMP[9].xxxx
197: TXL TEMP[19], TEMP[19], SAMP[3], 2D
198: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[2].zzzz
199: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
200: MOV TEMP[12].xy, TEMP[12].xyyy
201: MOV TEMP[12].w, TEMP[9].xxxx
202: TXL TEMP[12], TEMP[12], SAMP[1], 2D
203: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[3].yyyy
204: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
205: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
206: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
207: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
208: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
209: MAD TEMP[12], TEMP[13], TEMP[14].xxxx, TEMP[12]
210: MOV TEMP[13].xy, IN[4].zxzz
211: MOV TEMP[14].x, IMM[2].xxxx
212: FSNE TEMP[15].x, CONST[16].xxxx, TEMP[5].xxxx
213: UIF TEMP[15].xxxx :0
214: MOV TEMP[14].x, IMM[2].yyyy
215: RCP TEMP[15].x, CONST[19].xxxx
216: MUL TEMP[13].xy, IN[4].zxxx, TEMP[15].xxxx
217: ELSE :0
218: RCP TEMP[15].x, CONST[18].xxxx
219: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
220: ENDIF
221: FRC TEMP[13].xy, TEMP[13].xyyy
222: MUL TEMP[15].x, CONST[20].xxxx, IMM[2].wwww
223: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
224: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
225: MUL TEMP[14].x, TEMP[14].xxxx, CONST[20].xxxx
226: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
227: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
228: MOV TEMP[14].xy, TEMP[13].xyyy
229: MOV TEMP[14].w, TEMP[9].xxxx
230: TXL TEMP[14], TEMP[14], SAMP[9], 2D
231: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz
232: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
233: MOV TEMP[16].xy, TEMP[13].xyyy
234: MOV TEMP[16].w, TEMP[9].xxxx
235: TXL TEMP[16], TEMP[16], SAMP[7], 2D
236: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx
237: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
238: MOV TEMP[18].xy, TEMP[13].xyyy
239: MOV TEMP[18].w, TEMP[9].xxxx
240: TXL TEMP[18], TEMP[18], SAMP[5], 2D
241: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww
242: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
243: MOV TEMP[20].xy, TEMP[13].xyyy
244: MOV TEMP[20].w, TEMP[9].xxxx
245: TXL TEMP[20], TEMP[20], SAMP[3], 2D
246: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz
247: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
248: MOV TEMP[13].xy, TEMP[13].xyyy
249: MOV TEMP[13].w, TEMP[9].xxxx
250: TXL TEMP[13], TEMP[13], SAMP[1], 2D
251: FSEQ TEMP[22].x, TEMP[8].zzzz, IMM[3].yyyy
252: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
253: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
254: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
255: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
256: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
257: MAD TEMP[13], TEMP[14], TEMP[15].xxxx, TEMP[13]
258: MOV TEMP[14].xy, IN[4].xyxx
259: MOV TEMP[15].x, IMM[2].xxxx
260: FSNE TEMP[16].x, CONST[16].xxxx, TEMP[6].xxxx
261: UIF TEMP[16].xxxx :0
262: MOV TEMP[15].x, IMM[2].yyyy
263: RCP TEMP[16].x, CONST[19].xxxx
264: MUL TEMP[14].xy, IN[4].xyyy, TEMP[16].xxxx
265: ELSE :0
266: RCP TEMP[16].x, CONST[18].xxxx
267: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
268: ENDIF
269: FRC TEMP[14].xy, TEMP[14].xyyy
270: MUL TEMP[16].x, CONST[20].xxxx, IMM[2].wwww
271: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
272: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
273: MUL TEMP[15].x, TEMP[15].xxxx, CONST[20].xxxx
274: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
275: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
276: MOV TEMP[15].xy, TEMP[14].xyyy
277: MOV TEMP[15].w, TEMP[9].xxxx
278: TXL TEMP[15], TEMP[15], SAMP[9], 2D
279: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[1].zzzz
280: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
281: MOV TEMP[17].xy, TEMP[14].xyyy
282: MOV TEMP[17].w, TEMP[9].xxxx
283: TXL TEMP[17], TEMP[17], SAMP[7], 2D
284: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[3].xxxx
285: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
286: MOV TEMP[19].xy, TEMP[14].xyyy
287: MOV TEMP[19].w, TEMP[9].xxxx
288: TXL TEMP[19], TEMP[19], SAMP[5], 2D
289: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].wwww
290: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
291: MOV TEMP[21].xy, TEMP[14].xyyy
292: MOV TEMP[21].w, TEMP[9].xxxx
293: TXL TEMP[21], TEMP[21], SAMP[3], 2D
294: FSEQ TEMP[22].x, TEMP[10].zzzz, IMM[2].zzzz
295: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
296: MOV TEMP[14].xy, TEMP[14].xyyy
297: MOV TEMP[14].w, TEMP[9].xxxx
298: TXL TEMP[14], TEMP[14], SAMP[1], 2D
299: FSEQ TEMP[23].x, TEMP[10].zzzz, IMM[3].yyyy
300: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
301: MUL TEMP[14], TEMP[14], TEMP[23].xxxx
302: MAD TEMP[14], TEMP[21], TEMP[22].xxxx, TEMP[14]
303: MAD TEMP[14], TEMP[19], TEMP[20].xxxx, TEMP[14]
304: MAD TEMP[14], TEMP[17], TEMP[18].xxxx, TEMP[14]
305: MAD TEMP[14], TEMP[15], TEMP[16].xxxx, TEMP[14]
306: MOV TEMP[15].xy, IN[4].zyzz
307: MOV TEMP[16].x, IMM[2].xxxx
308: FSNE TEMP[17].x, CONST[16].xxxx, TEMP[6].xxxx
309: UIF TEMP[17].xxxx :0
310: MOV TEMP[16].x, IMM[2].yyyy
311: RCP TEMP[17].x, CONST[19].xxxx
312: MUL TEMP[15].xy, IN[4].zyyy, TEMP[17].xxxx
313: ELSE :0
314: RCP TEMP[17].x, CONST[18].xxxx
315: MUL TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx
316: ENDIF
317: FRC TEMP[15].xy, TEMP[15].xyyy
318: MUL TEMP[17].x, CONST[20].xxxx, IMM[2].wwww
319: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[16].xxxx
320: ADD TEMP[17].x, IMM[2].zzzz, -TEMP[17].xxxx
321: MUL TEMP[16].x, TEMP[16].xxxx, CONST[20].xxxx
322: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[17].xxxx, TEMP[16].xxxx
323: MAD TEMP[15].xy, TEMP[15].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
324: MOV TEMP[16].xy, TEMP[15].xyyy
325: MOV TEMP[16].w, TEMP[9].xxxx
326: TXL TEMP[16], TEMP[16], SAMP[9], 2D
327: FSEQ TEMP[17].x, TEMP[10].zzzz, IMM[1].zzzz
328: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
329: MOV TEMP[18].xy, TEMP[15].xyyy
330: MOV TEMP[18].w, TEMP[9].xxxx
331: TXL TEMP[18], TEMP[18], SAMP[7], 2D
332: FSEQ TEMP[19].x, TEMP[10].zzzz, IMM[3].xxxx
333: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
334: MOV TEMP[20].xy, TEMP[15].xyyy
335: MOV TEMP[20].w, TEMP[9].xxxx
336: TXL TEMP[20], TEMP[20], SAMP[5], 2D
337: FSEQ TEMP[21].x, TEMP[10].zzzz, IMM[2].wwww
338: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
339: MOV TEMP[22].xy, TEMP[15].xyyy
340: MOV TEMP[22].w, TEMP[9].xxxx
341: TXL TEMP[22], TEMP[22], SAMP[3], 2D
342: FSEQ TEMP[23].x, TEMP[10].zzzz, IMM[2].zzzz
343: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
344: MOV TEMP[15].xy, TEMP[15].xyyy
345: MOV TEMP[15].w, TEMP[9].xxxx
346: TXL TEMP[15], TEMP[15], SAMP[1], 2D
347: FSEQ TEMP[24].x, TEMP[10].zzzz, IMM[3].yyyy
348: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
349: MUL TEMP[15], TEMP[15], TEMP[24].xxxx
350: MAD TEMP[15], TEMP[22], TEMP[23].xxxx, TEMP[15]
351: MAD TEMP[15], TEMP[20], TEMP[21].xxxx, TEMP[15]
352: MAD TEMP[15], TEMP[18], TEMP[19].xxxx, TEMP[15]
353: MAD TEMP[15], TEMP[16], TEMP[17].xxxx, TEMP[15]
354: MOV TEMP[16].xy, IN[4].zxzz
355: MOV TEMP[17].x, IMM[2].xxxx
356: FSNE TEMP[18].x, CONST[16].xxxx, TEMP[6].xxxx
357: UIF TEMP[18].xxxx :0
358: MOV TEMP[17].x, IMM[2].yyyy
359: RCP TEMP[18].x, CONST[19].xxxx
360: MUL TEMP[16].xy, IN[4].zxxx, TEMP[18].xxxx
361: ELSE :0
362: RCP TEMP[18].x, CONST[18].xxxx
363: MUL TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx
364: ENDIF
365: FRC TEMP[16].xy, TEMP[16].xyyy
366: MUL TEMP[18].x, CONST[20].xxxx, IMM[2].wwww
367: MUL TEMP[18].x, TEMP[18].xxxx, TEMP[17].xxxx
368: ADD TEMP[18].x, IMM[2].zzzz, -TEMP[18].xxxx
369: MUL TEMP[17].x, TEMP[17].xxxx, CONST[20].xxxx
370: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[18].xxxx, TEMP[17].xxxx
371: MAD TEMP[16].xy, TEMP[16].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
372: MOV TEMP[17].xy, TEMP[16].xyyy
373: MOV TEMP[17].w, TEMP[9].xxxx
374: TXL TEMP[17], TEMP[17], SAMP[9], 2D
375: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[1].zzzz
376: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
377: MOV TEMP[19].xy, TEMP[16].xyyy
378: MOV TEMP[19].w, TEMP[9].xxxx
379: TXL TEMP[19], TEMP[19], SAMP[7], 2D
380: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[3].xxxx
381: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
382: MOV TEMP[21].xy, TEMP[16].xyyy
383: MOV TEMP[21].w, TEMP[9].xxxx
384: TXL TEMP[21], TEMP[21], SAMP[5], 2D
385: FSEQ TEMP[22].x, TEMP[10].zzzz, IMM[2].wwww
386: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
387: MOV TEMP[23].xy, TEMP[16].xyyy
388: MOV TEMP[23].w, TEMP[9].xxxx
389: TXL TEMP[23], TEMP[23], SAMP[3], 2D
390: FSEQ TEMP[24].x, TEMP[10].zzzz, IMM[2].zzzz
391: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
392: MOV TEMP[16].xy, TEMP[16].xyyy
393: MOV TEMP[16].w, TEMP[9].xxxx
394: TXL TEMP[16], TEMP[16], SAMP[1], 2D
395: FSEQ TEMP[25].x, TEMP[10].zzzz, IMM[3].yyyy
396: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
397: MUL TEMP[16], TEMP[16], TEMP[25].xxxx
398: MAD TEMP[16], TEMP[23], TEMP[24].xxxx, TEMP[16]
399: MAD TEMP[16], TEMP[21], TEMP[22].xxxx, TEMP[16]
400: MAD TEMP[16], TEMP[19], TEMP[20].xxxx, TEMP[16]
401: MAD TEMP[16], TEMP[17], TEMP[18].xxxx, TEMP[16]
402: MOV TEMP[17].xy, IN[4].xyxx
403: MOV TEMP[18].x, IMM[2].xxxx
404: FSNE TEMP[19].x, CONST[16].xxxx, TEMP[7].xxxx
405: UIF TEMP[19].xxxx :0
406: MOV TEMP[18].x, IMM[2].yyyy
407: RCP TEMP[19].x, CONST[19].xxxx
408: MUL TEMP[17].xy, IN[4].xyyy, TEMP[19].xxxx
409: ELSE :0
410: RCP TEMP[19].x, CONST[18].xxxx
411: MUL TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx
412: ENDIF
413: FRC TEMP[17].xy, TEMP[17].xyyy
414: MUL TEMP[19].x, CONST[20].xxxx, IMM[2].wwww
415: MUL TEMP[19].x, TEMP[19].xxxx, TEMP[18].xxxx
416: ADD TEMP[19].x, IMM[2].zzzz, -TEMP[19].xxxx
417: MUL TEMP[18].x, TEMP[18].xxxx, CONST[20].xxxx
418: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[19].xxxx, TEMP[18].xxxx
419: MAD TEMP[17].xy, TEMP[17].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
420: MOV TEMP[18].xy, TEMP[17].xyyy
421: MOV TEMP[18].w, TEMP[9].xxxx
422: TXL TEMP[18], TEMP[18], SAMP[9], 2D
423: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[1].zzzz
424: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
425: MOV TEMP[20].xy, TEMP[17].xyyy
426: MOV TEMP[20].w, TEMP[9].xxxx
427: TXL TEMP[20], TEMP[20], SAMP[7], 2D
428: FSEQ TEMP[21].x, TEMP[4].zzzz, IMM[3].xxxx
429: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
430: MOV TEMP[22].xy, TEMP[17].xyyy
431: MOV TEMP[22].w, TEMP[9].xxxx
432: TXL TEMP[22], TEMP[22], SAMP[5], 2D
433: FSEQ TEMP[23].x, TEMP[4].zzzz, IMM[2].wwww
434: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
435: MOV TEMP[24].xy, TEMP[17].xyyy
436: MOV TEMP[24].w, TEMP[9].xxxx
437: TXL TEMP[24], TEMP[24], SAMP[3], 2D
438: FSEQ TEMP[25].x, TEMP[4].zzzz, IMM[2].zzzz
439: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
440: MOV TEMP[17].xy, TEMP[17].xyyy
441: MOV TEMP[17].w, TEMP[9].xxxx
442: TXL TEMP[17], TEMP[17], SAMP[1], 2D
443: FSEQ TEMP[26].x, TEMP[4].zzzz, IMM[3].yyyy
444: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
445: MUL TEMP[17], TEMP[17], TEMP[26].xxxx
446: MAD TEMP[17], TEMP[24], TEMP[25].xxxx, TEMP[17]
447: MAD TEMP[17], TEMP[22], TEMP[23].xxxx, TEMP[17]
448: MAD TEMP[17], TEMP[20], TEMP[21].xxxx, TEMP[17]
449: MAD TEMP[17], TEMP[18], TEMP[19].xxxx, TEMP[17]
450: MOV TEMP[18].xy, IN[4].zyzz
451: MOV TEMP[19].x, IMM[2].xxxx
452: FSNE TEMP[20].x, CONST[16].xxxx, TEMP[7].xxxx
453: UIF TEMP[20].xxxx :0
454: MOV TEMP[19].x, IMM[2].yyyy
455: RCP TEMP[20].x, CONST[19].xxxx
456: MUL TEMP[18].xy, IN[4].zyyy, TEMP[20].xxxx
457: ELSE :0
458: RCP TEMP[20].x, CONST[18].xxxx
459: MUL TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx
460: ENDIF
461: FRC TEMP[18].xy, TEMP[18].xyyy
462: MUL TEMP[20].x, CONST[20].xxxx, IMM[2].wwww
463: MUL TEMP[20].x, TEMP[20].xxxx, TEMP[19].xxxx
464: ADD TEMP[20].x, IMM[2].zzzz, -TEMP[20].xxxx
465: MUL TEMP[19].x, TEMP[19].xxxx, CONST[20].xxxx
466: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[20].xxxx, TEMP[19].xxxx
467: MAD TEMP[18].xy, TEMP[18].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
468: MOV TEMP[19].xy, TEMP[18].xyyy
469: MOV TEMP[19].w, TEMP[9].xxxx
470: TXL TEMP[19], TEMP[19], SAMP[9], 2D
471: FSEQ TEMP[20].x, TEMP[4].zzzz, IMM[1].zzzz
472: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
473: MOV TEMP[21].xy, TEMP[18].xyyy
474: MOV TEMP[21].w, TEMP[9].xxxx
475: TXL TEMP[21], TEMP[21], SAMP[7], 2D
476: FSEQ TEMP[22].x, TEMP[4].zzzz, IMM[3].xxxx
477: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
478: MOV TEMP[23].xy, TEMP[18].xyyy
479: MOV TEMP[23].w, TEMP[9].xxxx
480: TXL TEMP[23], TEMP[23], SAMP[5], 2D
481: FSEQ TEMP[24].x, TEMP[4].zzzz, IMM[2].wwww
482: AND TEMP[24].x, TEMP[24].xxxx, IMM[2].zzzz
483: MOV TEMP[25].xy, TEMP[18].xyyy
484: MOV TEMP[25].w, TEMP[9].xxxx
485: TXL TEMP[25], TEMP[25], SAMP[3], 2D
486: FSEQ TEMP[26].x, TEMP[4].zzzz, IMM[2].zzzz
487: AND TEMP[26].x, TEMP[26].xxxx, IMM[2].zzzz
488: MOV TEMP[18].xy, TEMP[18].xyyy
489: MOV TEMP[18].w, TEMP[9].xxxx
490: TXL TEMP[18], TEMP[18], SAMP[1], 2D
491: FSEQ TEMP[27].x, TEMP[4].zzzz, IMM[3].yyyy
492: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz
493: MUL TEMP[18], TEMP[18], TEMP[27].xxxx
494: MAD TEMP[18], TEMP[25], TEMP[26].xxxx, TEMP[18]
495: MAD TEMP[18], TEMP[23], TEMP[24].xxxx, TEMP[18]
496: MAD TEMP[18], TEMP[21], TEMP[22].xxxx, TEMP[18]
497: MAD TEMP[18], TEMP[19], TEMP[20].xxxx, TEMP[18]
498: MOV TEMP[19].xy, IN[4].zxzz
499: MOV TEMP[20].x, IMM[2].xxxx
500: FSNE TEMP[21].x, CONST[16].xxxx, TEMP[7].xxxx
501: UIF TEMP[21].xxxx :0
502: MOV TEMP[20].x, IMM[2].yyyy
503: RCP TEMP[21].x, CONST[19].xxxx
504: MUL TEMP[19].xy, IN[4].zxxx, TEMP[21].xxxx
505: ELSE :0
506: RCP TEMP[21].x, CONST[18].xxxx
507: MUL TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx
508: ENDIF
509: FRC TEMP[19].xy, TEMP[19].xyyy
510: MUL TEMP[21].x, CONST[20].xxxx, IMM[2].wwww
511: MUL TEMP[21].x, TEMP[21].xxxx, TEMP[20].xxxx
512: ADD TEMP[21].x, IMM[2].zzzz, -TEMP[21].xxxx
513: MUL TEMP[20].x, TEMP[20].xxxx, CONST[20].xxxx
514: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[21].xxxx, TEMP[20].xxxx
515: MAD TEMP[19].xy, TEMP[19].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
516: MOV TEMP[20].xy, TEMP[19].xyyy
517: MOV TEMP[20].w, TEMP[9].xxxx
518: TXL TEMP[20], TEMP[20], SAMP[9], 2D
519: FSEQ TEMP[21].x, TEMP[4].zzzz, IMM[1].zzzz
520: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
521: MOV TEMP[22].xy, TEMP[19].xyyy
522: MOV TEMP[22].w, TEMP[9].xxxx
523: TXL TEMP[22], TEMP[22], SAMP[7], 2D
524: FSEQ TEMP[23].x, TEMP[4].zzzz, IMM[3].xxxx
525: AND TEMP[23].x, TEMP[23].xxxx, IMM[2].zzzz
526: MOV TEMP[24].xy, TEMP[19].xyyy
527: MOV TEMP[24].w, TEMP[9].xxxx
528: TXL TEMP[24], TEMP[24], SAMP[5], 2D
529: FSEQ TEMP[25].x, TEMP[4].zzzz, IMM[2].wwww
530: AND TEMP[25].x, TEMP[25].xxxx, IMM[2].zzzz
531: MOV TEMP[26].xy, TEMP[19].xyyy
532: MOV TEMP[26].w, TEMP[9].xxxx
533: TXL TEMP[26], TEMP[26], SAMP[3], 2D
534: FSEQ TEMP[27].x, TEMP[4].zzzz, IMM[2].zzzz
535: AND TEMP[27].x, TEMP[27].xxxx, IMM[2].zzzz
536: MOV TEMP[19].xy, TEMP[19].xyyy
537: MOV TEMP[19].w, TEMP[9].xxxx
538: TXL TEMP[19], TEMP[19], SAMP[1], 2D
539: FSEQ TEMP[28].x, TEMP[4].zzzz, IMM[3].yyyy
540: AND TEMP[28].x, TEMP[28].xxxx, IMM[2].zzzz
541: MUL TEMP[19], TEMP[19], TEMP[28].xxxx
542: MAD TEMP[19], TEMP[26], TEMP[27].xxxx, TEMP[19]
543: MAD TEMP[19], TEMP[24], TEMP[25].xxxx, TEMP[19]
544: MAD TEMP[19], TEMP[22], TEMP[23].xxxx, TEMP[19]
545: MAD TEMP[19], TEMP[20], TEMP[21].xxxx, TEMP[19]
546: MUL TEMP[17], TEMP[17], TEMP[3].zzzz
547: MAD TEMP[17], TEMP[18], TEMP[3].xxxx, TEMP[17]
548: MAD TEMP[17], TEMP[19], TEMP[3].yyyy, TEMP[17]
549: MUL TEMP[14], TEMP[14], TEMP[3].zzzz
550: MAD TEMP[14], TEMP[15], TEMP[3].xxxx, TEMP[14]
551: MAD TEMP[14], TEMP[16], TEMP[3].yyyy, TEMP[14]
552: MUL TEMP[11], TEMP[11], TEMP[3].zzzz
553: MAD TEMP[11], TEMP[12], TEMP[3].xxxx, TEMP[11]
554: MAD TEMP[11], TEMP[13], TEMP[3].yyyy, TEMP[11]
555: MUL TEMP[11], IN[1].xxxx, TEMP[11]
556: MAD TEMP[11], IN[1].yyyy, TEMP[14], TEMP[11]
557: MAD TEMP[11].xyz, IN[1].zzzz, TEMP[17], TEMP[11]
558: MOV TEMP[12].xy, IN[4].zyzz
559: MOV TEMP[13].x, IMM[2].xxxx
560: FSNE TEMP[14].x, CONST[16].xxxx, TEMP[5].xxxx
561: UIF TEMP[14].xxxx :0
562: MOV TEMP[13].x, IMM[2].yyyy
563: RCP TEMP[14].x, CONST[19].xxxx
564: MUL TEMP[12].xy, IN[4].zyyy, TEMP[14].xxxx
565: ELSE :0
566: RCP TEMP[14].x, CONST[18].xxxx
567: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
568: ENDIF
569: FRC TEMP[12].xy, TEMP[12].xyyy
570: MUL TEMP[14].x, CONST[20].xxxx, IMM[2].wwww
571: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
572: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
573: MUL TEMP[13].x, TEMP[13].xxxx, CONST[20].xxxx
574: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
575: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
576: MOV TEMP[13].xy, TEMP[12].xyyy
577: MOV TEMP[13].w, TEMP[9].xxxx
578: TXL TEMP[13], TEMP[13], SAMP[10], 2D
579: FSEQ TEMP[14].x, TEMP[8].zzzz, IMM[1].zzzz
580: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
581: MOV TEMP[15].xy, TEMP[12].xyyy
582: MOV TEMP[15].w, TEMP[9].xxxx
583: TXL TEMP[15], TEMP[15], SAMP[8], 2D
584: FSEQ TEMP[16].x, TEMP[8].zzzz, IMM[3].xxxx
585: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
586: MOV TEMP[17].xy, TEMP[12].xyyy
587: MOV TEMP[17].w, TEMP[9].xxxx
588: TXL TEMP[17], TEMP[17], SAMP[6], 2D
589: FSEQ TEMP[18].x, TEMP[8].zzzz, IMM[2].wwww
590: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
591: MOV TEMP[19].xy, TEMP[12].xyyy
592: MOV TEMP[19].w, TEMP[9].xxxx
593: TXL TEMP[19], TEMP[19], SAMP[4], 2D
594: FSEQ TEMP[20].x, TEMP[8].zzzz, IMM[2].zzzz
595: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
596: MOV TEMP[12].xy, TEMP[12].xyyy
597: MOV TEMP[12].w, TEMP[9].xxxx
598: TXL TEMP[12], TEMP[12], SAMP[2], 2D
599: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[3].yyyy
600: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
601: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
602: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
603: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
604: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
605: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12]
606: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz
607: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy
608: MOV_SAT TEMP[29].x, TEMP[13].xxxx
609: MOV TEMP[13].xy, IN[4].zxzz
610: MOV TEMP[14].x, IMM[2].xxxx
611: FSNE TEMP[15].x, CONST[16].xxxx, TEMP[5].xxxx
612: UIF TEMP[15].xxxx :0
613: MOV TEMP[14].x, IMM[2].yyyy
614: RCP TEMP[15].x, CONST[19].xxxx
615: MUL TEMP[13].xy, IN[4].zxxx, TEMP[15].xxxx
616: ELSE :0
617: RCP TEMP[15].x, CONST[18].xxxx
618: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
619: ENDIF
620: FRC TEMP[13].xy, TEMP[13].xyyy
621: MUL TEMP[15].x, CONST[20].xxxx, IMM[2].wwww
622: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
623: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
624: MUL TEMP[14].x, TEMP[14].xxxx, CONST[20].xxxx
625: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
626: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
627: MOV TEMP[14].xy, TEMP[13].xyyy
628: MOV TEMP[14].w, TEMP[9].xxxx
629: TXL TEMP[14], TEMP[14], SAMP[10], 2D
630: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz
631: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
632: MOV TEMP[16].xy, TEMP[13].xyyy
633: MOV TEMP[16].w, TEMP[9].xxxx
634: TXL TEMP[16], TEMP[16], SAMP[8], 2D
635: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx
636: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
637: MOV TEMP[18].xy, TEMP[13].xyyy
638: MOV TEMP[18].w, TEMP[9].xxxx
639: TXL TEMP[18], TEMP[18], SAMP[6], 2D
640: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww
641: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
642: MOV TEMP[20].xy, TEMP[13].xyyy
643: MOV TEMP[20].w, TEMP[9].xxxx
644: TXL TEMP[20], TEMP[20], SAMP[4], 2D
645: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz
646: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
647: MOV TEMP[13].xy, TEMP[13].xyyy
648: MOV TEMP[13].w, TEMP[9].xxxx
649: TXL TEMP[13], TEMP[13], SAMP[2], 2D
650: FSEQ TEMP[22].x, TEMP[8].zzzz, IMM[3].yyyy
651: AND TEMP[22].x, TEMP[22].xxxx, IMM[2].zzzz
652: MUL TEMP[13], TEMP[13], TEMP[22].xxxx
653: MAD TEMP[13], TEMP[20], TEMP[21].xxxx, TEMP[13]
654: MAD TEMP[13], TEMP[18], TEMP[19].xxxx, TEMP[13]
655: MAD TEMP[13], TEMP[16], TEMP[17].xxxx, TEMP[13]
656: MAD TEMP[13].yw, TEMP[14], TEMP[15].xxxx, TEMP[13]
657: MAD TEMP[13].xy, TEMP[13].wyyy, IMM[2].wwww, IMM[3].zzzz
658: DP2 TEMP[14].x, TEMP[13].xyyy, TEMP[13].xyyy
659: MOV_SAT TEMP[30].x, TEMP[14].xxxx
660: MOV TEMP[14].xy, IN[4].xyxx
661: MOV TEMP[15].x, IMM[2].xxxx
662: FSNE TEMP[16].x, CONST[16].xxxx, TEMP[5].xxxx
663: UIF TEMP[16].xxxx :0
664: MOV TEMP[15].x, IMM[2].yyyy
665: RCP TEMP[16].x, CONST[19].xxxx
666: MUL TEMP[14].xy, IN[4].xyyy, TEMP[16].xxxx
667: ELSE :0
668: RCP TEMP[16].x, CONST[18].xxxx
669: MUL TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx
670: ENDIF
671: FRC TEMP[14].xy, TEMP[14].xyyy
672: MUL TEMP[16].x, CONST[20].xxxx, IMM[2].wwww
673: MUL TEMP[16].x, TEMP[16].xxxx, TEMP[15].xxxx
674: ADD TEMP[16].x, IMM[2].zzzz, -TEMP[16].xxxx
675: MUL TEMP[15].x, TEMP[15].xxxx, CONST[20].xxxx
676: MAD TEMP[14].xy, TEMP[14].xyyy, TEMP[16].xxxx, TEMP[15].xxxx
677: MAD TEMP[5].xy, TEMP[14].xyyy, TEMP[5].xxxx, TEMP[8].xyyy
678: MOV TEMP[14].xy, TEMP[5].xyyy
679: MOV TEMP[14].w, TEMP[9].xxxx
680: TXL TEMP[14], TEMP[14], SAMP[10], 2D
681: FSEQ TEMP[15].x, TEMP[8].zzzz, IMM[1].zzzz
682: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
683: MOV TEMP[16].xy, TEMP[5].xyyy
684: MOV TEMP[16].w, TEMP[9].xxxx
685: TXL TEMP[16], TEMP[16], SAMP[8], 2D
686: FSEQ TEMP[17].x, TEMP[8].zzzz, IMM[3].xxxx
687: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
688: MOV TEMP[18].xy, TEMP[5].xyyy
689: MOV TEMP[18].w, TEMP[9].xxxx
690: TXL TEMP[18], TEMP[18], SAMP[6], 2D
691: FSEQ TEMP[19].x, TEMP[8].zzzz, IMM[2].wwww
692: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
693: MOV TEMP[20].xy, TEMP[5].xyyy
694: MOV TEMP[20].w, TEMP[9].xxxx
695: TXL TEMP[20], TEMP[20], SAMP[4], 2D
696: FSEQ TEMP[21].x, TEMP[8].zzzz, IMM[2].zzzz
697: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
698: MOV TEMP[5].xy, TEMP[5].xyyy
699: MOV TEMP[5].w, TEMP[9].xxxx
700: TXL TEMP[5], TEMP[5], SAMP[2], 2D
701: FSEQ TEMP[8].x, TEMP[8].zzzz, IMM[3].yyyy
702: AND TEMP[8].x, TEMP[8].xxxx, IMM[2].zzzz
703: MUL TEMP[5], TEMP[5], TEMP[8].xxxx
704: MAD TEMP[5], TEMP[20], TEMP[21].xxxx, TEMP[5]
705: MAD TEMP[5], TEMP[18], TEMP[19].xxxx, TEMP[5]
706: MAD TEMP[5], TEMP[16], TEMP[17].xxxx, TEMP[5]
707: MAD TEMP[5].yw, TEMP[14], TEMP[15].xxxx, TEMP[5]
708: MAD TEMP[5].xy, TEMP[5].wyyy, IMM[2].wwww, IMM[3].zzzz
709: DP2 TEMP[8].x, TEMP[5].xyyy, TEMP[5].xyyy
710: MOV_SAT TEMP[31].x, TEMP[8].xxxx
711: MOV TEMP[8].x, IMM[3].yyyy
712: MOV TEMP[8].y, TEMP[12].xxxx
713: MOV TEMP[8].z, TEMP[12].yyyy
714: MOV TEMP[12].y, IMM[3].yyyy
715: MOV TEMP[12].x, TEMP[13].yyyy
716: MOV TEMP[12].z, TEMP[13].xxxx
717: MOV TEMP[13].z, IMM[3].yyyy
718: MOV TEMP[13].xy, TEMP[5].xyxx
719: MUL TEMP[5].xyz, TEMP[8].xyzz, TEMP[3].xxxx
720: MAD TEMP[5].xyz, TEMP[12].xyzz, TEMP[3].yyyy, TEMP[5].xyzz
721: MAD TEMP[5].xyz, TEMP[13].xyzz, TEMP[3].zzzz, TEMP[5].xyzz
722: MOV TEMP[8].xy, IN[4].zyzz
723: MOV TEMP[12].x, IMM[2].xxxx
724: FSNE TEMP[13].x, CONST[16].xxxx, TEMP[6].xxxx
725: UIF TEMP[13].xxxx :0
726: MOV TEMP[12].x, IMM[2].yyyy
727: RCP TEMP[13].x, CONST[19].xxxx
728: MUL TEMP[8].xy, IN[4].zyyy, TEMP[13].xxxx
729: ELSE :0
730: RCP TEMP[13].x, CONST[18].xxxx
731: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx
732: ENDIF
733: FRC TEMP[8].xy, TEMP[8].xyyy
734: MUL TEMP[13].x, CONST[20].xxxx, IMM[2].wwww
735: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
736: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
737: MUL TEMP[12].x, TEMP[12].xxxx, CONST[20].xxxx
738: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
739: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
740: MOV TEMP[12].xy, TEMP[8].xyyy
741: MOV TEMP[12].w, TEMP[9].xxxx
742: TXL TEMP[12], TEMP[12], SAMP[10], 2D
743: FSEQ TEMP[13].x, TEMP[10].zzzz, IMM[1].zzzz
744: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
745: MOV TEMP[14].xy, TEMP[8].xyyy
746: MOV TEMP[14].w, TEMP[9].xxxx
747: TXL TEMP[14], TEMP[14], SAMP[8], 2D
748: FSEQ TEMP[15].x, TEMP[10].zzzz, IMM[3].xxxx
749: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
750: MOV TEMP[16].xy, TEMP[8].xyyy
751: MOV TEMP[16].w, TEMP[9].xxxx
752: TXL TEMP[16], TEMP[16], SAMP[6], 2D
753: FSEQ TEMP[17].x, TEMP[10].zzzz, IMM[2].wwww
754: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
755: MOV TEMP[18].xy, TEMP[8].xyyy
756: MOV TEMP[18].w, TEMP[9].xxxx
757: TXL TEMP[18], TEMP[18], SAMP[4], 2D
758: FSEQ TEMP[19].x, TEMP[10].zzzz, IMM[2].zzzz
759: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
760: MOV TEMP[8].xy, TEMP[8].xyyy
761: MOV TEMP[8].w, TEMP[9].xxxx
762: TXL TEMP[8], TEMP[8], SAMP[2], 2D
763: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[3].yyyy
764: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
765: MUL TEMP[8], TEMP[8], TEMP[20].xxxx
766: MAD TEMP[8], TEMP[18], TEMP[19].xxxx, TEMP[8]
767: MAD TEMP[8], TEMP[16], TEMP[17].xxxx, TEMP[8]
768: MAD TEMP[8], TEMP[14], TEMP[15].xxxx, TEMP[8]
769: MAD TEMP[8].yw, TEMP[12], TEMP[13].xxxx, TEMP[8]
770: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz
771: DP2 TEMP[12].x, TEMP[8].xyyy, TEMP[8].xyyy
772: MOV_SAT TEMP[32].x, TEMP[12].xxxx
773: MOV TEMP[12].xy, IN[4].zxzz
774: MOV TEMP[13].x, IMM[2].xxxx
775: FSNE TEMP[14].x, CONST[16].xxxx, TEMP[6].xxxx
776: UIF TEMP[14].xxxx :0
777: MOV TEMP[13].x, IMM[2].yyyy
778: RCP TEMP[14].x, CONST[19].xxxx
779: MUL TEMP[12].xy, IN[4].zxxx, TEMP[14].xxxx
780: ELSE :0
781: RCP TEMP[14].x, CONST[18].xxxx
782: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
783: ENDIF
784: FRC TEMP[12].xy, TEMP[12].xyyy
785: MUL TEMP[14].x, CONST[20].xxxx, IMM[2].wwww
786: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
787: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
788: MUL TEMP[13].x, TEMP[13].xxxx, CONST[20].xxxx
789: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
790: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
791: MOV TEMP[13].xy, TEMP[12].xyyy
792: MOV TEMP[13].w, TEMP[9].xxxx
793: TXL TEMP[13], TEMP[13], SAMP[10], 2D
794: FSEQ TEMP[14].x, TEMP[10].zzzz, IMM[1].zzzz
795: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
796: MOV TEMP[15].xy, TEMP[12].xyyy
797: MOV TEMP[15].w, TEMP[9].xxxx
798: TXL TEMP[15], TEMP[15], SAMP[8], 2D
799: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[3].xxxx
800: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
801: MOV TEMP[17].xy, TEMP[12].xyyy
802: MOV TEMP[17].w, TEMP[9].xxxx
803: TXL TEMP[17], TEMP[17], SAMP[6], 2D
804: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[2].wwww
805: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
806: MOV TEMP[19].xy, TEMP[12].xyyy
807: MOV TEMP[19].w, TEMP[9].xxxx
808: TXL TEMP[19], TEMP[19], SAMP[4], 2D
809: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].zzzz
810: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
811: MOV TEMP[12].xy, TEMP[12].xyyy
812: MOV TEMP[12].w, TEMP[9].xxxx
813: TXL TEMP[12], TEMP[12], SAMP[2], 2D
814: FSEQ TEMP[21].x, TEMP[10].zzzz, IMM[3].yyyy
815: AND TEMP[21].x, TEMP[21].xxxx, IMM[2].zzzz
816: MUL TEMP[12], TEMP[12], TEMP[21].xxxx
817: MAD TEMP[12], TEMP[19], TEMP[20].xxxx, TEMP[12]
818: MAD TEMP[12], TEMP[17], TEMP[18].xxxx, TEMP[12]
819: MAD TEMP[12], TEMP[15], TEMP[16].xxxx, TEMP[12]
820: MAD TEMP[12].yw, TEMP[13], TEMP[14].xxxx, TEMP[12]
821: MAD TEMP[12].xy, TEMP[12].wyyy, IMM[2].wwww, IMM[3].zzzz
822: DP2 TEMP[13].x, TEMP[12].xyyy, TEMP[12].xyyy
823: MOV_SAT TEMP[33].x, TEMP[13].xxxx
824: MOV TEMP[13].xy, IN[4].xyxx
825: MOV TEMP[14].x, IMM[2].xxxx
826: FSNE TEMP[15].x, CONST[16].xxxx, TEMP[6].xxxx
827: UIF TEMP[15].xxxx :0
828: MOV TEMP[14].x, IMM[2].yyyy
829: RCP TEMP[15].x, CONST[19].xxxx
830: MUL TEMP[13].xy, IN[4].xyyy, TEMP[15].xxxx
831: ELSE :0
832: RCP TEMP[15].x, CONST[18].xxxx
833: MUL TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx
834: ENDIF
835: FRC TEMP[13].xy, TEMP[13].xyyy
836: MUL TEMP[15].x, CONST[20].xxxx, IMM[2].wwww
837: MUL TEMP[15].x, TEMP[15].xxxx, TEMP[14].xxxx
838: ADD TEMP[15].x, IMM[2].zzzz, -TEMP[15].xxxx
839: MUL TEMP[14].x, TEMP[14].xxxx, CONST[20].xxxx
840: MAD TEMP[13].xy, TEMP[13].xyyy, TEMP[15].xxxx, TEMP[14].xxxx
841: MAD TEMP[6].xy, TEMP[13].xyyy, TEMP[6].xxxx, TEMP[10].xyyy
842: MOV TEMP[13].xy, TEMP[6].xyyy
843: MOV TEMP[13].w, TEMP[9].xxxx
844: TXL TEMP[13], TEMP[13], SAMP[10], 2D
845: FSEQ TEMP[14].x, TEMP[10].zzzz, IMM[1].zzzz
846: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
847: MOV TEMP[15].xy, TEMP[6].xyyy
848: MOV TEMP[15].w, TEMP[9].xxxx
849: TXL TEMP[15], TEMP[15], SAMP[8], 2D
850: FSEQ TEMP[16].x, TEMP[10].zzzz, IMM[3].xxxx
851: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
852: MOV TEMP[17].xy, TEMP[6].xyyy
853: MOV TEMP[17].w, TEMP[9].xxxx
854: TXL TEMP[17], TEMP[17], SAMP[6], 2D
855: FSEQ TEMP[18].x, TEMP[10].zzzz, IMM[2].wwww
856: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
857: MOV TEMP[19].xy, TEMP[6].xyyy
858: MOV TEMP[19].w, TEMP[9].xxxx
859: TXL TEMP[19], TEMP[19], SAMP[4], 2D
860: FSEQ TEMP[20].x, TEMP[10].zzzz, IMM[2].zzzz
861: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
862: MOV TEMP[6].xy, TEMP[6].xyyy
863: MOV TEMP[6].w, TEMP[9].xxxx
864: TXL TEMP[6], TEMP[6], SAMP[2], 2D
865: FSEQ TEMP[10].x, TEMP[10].zzzz, IMM[3].yyyy
866: AND TEMP[10].x, TEMP[10].xxxx, IMM[2].zzzz
867: MUL TEMP[6], TEMP[6], TEMP[10].xxxx
868: MAD TEMP[6], TEMP[19], TEMP[20].xxxx, TEMP[6]
869: MAD TEMP[6], TEMP[17], TEMP[18].xxxx, TEMP[6]
870: MAD TEMP[6], TEMP[15], TEMP[16].xxxx, TEMP[6]
871: MAD TEMP[6].yw, TEMP[13], TEMP[14].xxxx, TEMP[6]
872: MAD TEMP[6].xy, TEMP[6].wyyy, IMM[2].wwww, IMM[3].zzzz
873: DP2 TEMP[10].x, TEMP[6].xyyy, TEMP[6].xyyy
874: MOV_SAT TEMP[34].x, TEMP[10].xxxx
875: MOV TEMP[10].x, IMM[3].yyyy
876: MOV TEMP[10].y, TEMP[8].xxxx
877: MOV TEMP[10].z, TEMP[8].yyyy
878: MOV TEMP[8].y, IMM[3].yyyy
879: MOV TEMP[8].x, TEMP[12].yyyy
880: MOV TEMP[8].z, TEMP[12].xxxx
881: MOV TEMP[12].z, IMM[3].yyyy
882: MOV TEMP[12].xy, TEMP[6].xyxx
883: MUL TEMP[6].xyz, TEMP[10].xyzz, TEMP[3].xxxx
884: MAD TEMP[6].xyz, TEMP[8].xyzz, TEMP[3].yyyy, TEMP[6].xyzz
885: MAD TEMP[6].xyz, TEMP[12].xyzz, TEMP[3].zzzz, TEMP[6].xyzz
886: MOV TEMP[8].xy, IN[4].zyzz
887: MOV TEMP[10].x, IMM[2].xxxx
888: FSNE TEMP[12].x, CONST[16].xxxx, TEMP[7].xxxx
889: UIF TEMP[12].xxxx :0
890: MOV TEMP[10].x, IMM[2].yyyy
891: RCP TEMP[12].x, CONST[19].xxxx
892: MUL TEMP[8].xy, IN[4].zyyy, TEMP[12].xxxx
893: ELSE :0
894: RCP TEMP[12].x, CONST[18].xxxx
895: MUL TEMP[8].xy, TEMP[8].xyyy, TEMP[12].xxxx
896: ENDIF
897: FRC TEMP[8].xy, TEMP[8].xyyy
898: MUL TEMP[12].x, CONST[20].xxxx, IMM[2].wwww
899: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[10].xxxx
900: ADD TEMP[12].x, IMM[2].zzzz, -TEMP[12].xxxx
901: MUL TEMP[10].x, TEMP[10].xxxx, CONST[20].xxxx
902: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[12].xxxx, TEMP[10].xxxx
903: MAD TEMP[8].xy, TEMP[8].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
904: MOV TEMP[10].xy, TEMP[8].xyyy
905: MOV TEMP[10].w, TEMP[9].xxxx
906: TXL TEMP[10], TEMP[10], SAMP[10], 2D
907: FSEQ TEMP[12].x, TEMP[4].zzzz, IMM[1].zzzz
908: AND TEMP[12].x, TEMP[12].xxxx, IMM[2].zzzz
909: MOV TEMP[13].xy, TEMP[8].xyyy
910: MOV TEMP[13].w, TEMP[9].xxxx
911: TXL TEMP[13], TEMP[13], SAMP[8], 2D
912: FSEQ TEMP[14].x, TEMP[4].zzzz, IMM[3].xxxx
913: AND TEMP[14].x, TEMP[14].xxxx, IMM[2].zzzz
914: MOV TEMP[15].xy, TEMP[8].xyyy
915: MOV TEMP[15].w, TEMP[9].xxxx
916: TXL TEMP[15], TEMP[15], SAMP[6], 2D
917: FSEQ TEMP[16].x, TEMP[4].zzzz, IMM[2].wwww
918: AND TEMP[16].x, TEMP[16].xxxx, IMM[2].zzzz
919: MOV TEMP[17].xy, TEMP[8].xyyy
920: MOV TEMP[17].w, TEMP[9].xxxx
921: TXL TEMP[17], TEMP[17], SAMP[4], 2D
922: FSEQ TEMP[18].x, TEMP[4].zzzz, IMM[2].zzzz
923: AND TEMP[18].x, TEMP[18].xxxx, IMM[2].zzzz
924: MOV TEMP[8].xy, TEMP[8].xyyy
925: MOV TEMP[8].w, TEMP[9].xxxx
926: TXL TEMP[8], TEMP[8], SAMP[2], 2D
927: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[3].yyyy
928: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
929: MUL TEMP[8], TEMP[8], TEMP[19].xxxx
930: MAD TEMP[8], TEMP[17], TEMP[18].xxxx, TEMP[8]
931: MAD TEMP[8], TEMP[15], TEMP[16].xxxx, TEMP[8]
932: MAD TEMP[8], TEMP[13], TEMP[14].xxxx, TEMP[8]
933: MAD TEMP[8].yw, TEMP[10], TEMP[12].xxxx, TEMP[8]
934: MAD TEMP[8].xy, TEMP[8].wyyy, IMM[2].wwww, IMM[3].zzzz
935: DP2 TEMP[10].x, TEMP[8].xyyy, TEMP[8].xyyy
936: MOV_SAT TEMP[35].x, TEMP[10].xxxx
937: MOV TEMP[10].xy, IN[4].zxzz
938: MOV TEMP[12].x, IMM[2].xxxx
939: FSNE TEMP[13].x, CONST[16].xxxx, TEMP[7].xxxx
940: UIF TEMP[13].xxxx :0
941: MOV TEMP[12].x, IMM[2].yyyy
942: RCP TEMP[13].x, CONST[19].xxxx
943: MUL TEMP[10].xy, IN[4].zxxx, TEMP[13].xxxx
944: ELSE :0
945: RCP TEMP[13].x, CONST[18].xxxx
946: MUL TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx
947: ENDIF
948: FRC TEMP[10].xy, TEMP[10].xyyy
949: MUL TEMP[13].x, CONST[20].xxxx, IMM[2].wwww
950: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[12].xxxx
951: ADD TEMP[13].x, IMM[2].zzzz, -TEMP[13].xxxx
952: MUL TEMP[12].x, TEMP[12].xxxx, CONST[20].xxxx
953: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[13].xxxx, TEMP[12].xxxx
954: MAD TEMP[10].xy, TEMP[10].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
955: MOV TEMP[12].xy, TEMP[10].xyyy
956: MOV TEMP[12].w, TEMP[9].xxxx
957: TXL TEMP[12], TEMP[12], SAMP[10], 2D
958: FSEQ TEMP[13].x, TEMP[4].zzzz, IMM[1].zzzz
959: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
960: MOV TEMP[14].xy, TEMP[10].xyyy
961: MOV TEMP[14].w, TEMP[9].xxxx
962: TXL TEMP[14], TEMP[14], SAMP[8], 2D
963: FSEQ TEMP[15].x, TEMP[4].zzzz, IMM[3].xxxx
964: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
965: MOV TEMP[16].xy, TEMP[10].xyyy
966: MOV TEMP[16].w, TEMP[9].xxxx
967: TXL TEMP[16], TEMP[16], SAMP[6], 2D
968: FSEQ TEMP[17].x, TEMP[4].zzzz, IMM[2].wwww
969: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
970: MOV TEMP[18].xy, TEMP[10].xyyy
971: MOV TEMP[18].w, TEMP[9].xxxx
972: TXL TEMP[18], TEMP[18], SAMP[4], 2D
973: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[2].zzzz
974: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
975: MOV TEMP[10].xy, TEMP[10].xyyy
976: MOV TEMP[10].w, TEMP[9].xxxx
977: TXL TEMP[10], TEMP[10], SAMP[2], 2D
978: FSEQ TEMP[20].x, TEMP[4].zzzz, IMM[3].yyyy
979: AND TEMP[20].x, TEMP[20].xxxx, IMM[2].zzzz
980: MUL TEMP[10], TEMP[10], TEMP[20].xxxx
981: MAD TEMP[10], TEMP[18], TEMP[19].xxxx, TEMP[10]
982: MAD TEMP[10], TEMP[16], TEMP[17].xxxx, TEMP[10]
983: MAD TEMP[10], TEMP[14], TEMP[15].xxxx, TEMP[10]
984: MAD TEMP[10].yw, TEMP[12], TEMP[13].xxxx, TEMP[10]
985: MAD TEMP[10].xy, TEMP[10].wyyy, IMM[2].wwww, IMM[3].zzzz
986: DP2 TEMP[12].x, TEMP[10].xyyy, TEMP[10].xyyy
987: MOV_SAT TEMP[36].x, TEMP[12].xxxx
988: MOV TEMP[12].xy, IN[4].xyxx
989: MOV TEMP[13].x, IMM[2].xxxx
990: FSNE TEMP[14].x, CONST[16].xxxx, TEMP[7].xxxx
991: UIF TEMP[14].xxxx :0
992: MOV TEMP[13].x, IMM[2].yyyy
993: RCP TEMP[14].x, CONST[19].xxxx
994: MUL TEMP[12].xy, IN[4].xyyy, TEMP[14].xxxx
995: ELSE :0
996: RCP TEMP[14].x, CONST[18].xxxx
997: MUL TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx
998: ENDIF
999: FRC TEMP[12].xy, TEMP[12].xyyy
1000: MUL TEMP[14].x, CONST[20].xxxx, IMM[2].wwww
1001: MUL TEMP[14].x, TEMP[14].xxxx, TEMP[13].xxxx
1002: ADD TEMP[14].x, IMM[2].zzzz, -TEMP[14].xxxx
1003: MUL TEMP[13].x, TEMP[13].xxxx, CONST[20].xxxx
1004: MAD TEMP[12].xy, TEMP[12].xyyy, TEMP[14].xxxx, TEMP[13].xxxx
1005: MAD TEMP[7].xy, TEMP[12].xyyy, TEMP[7].xxxx, TEMP[4].xyyy
1006: MOV TEMP[12].xy, TEMP[7].xyyy
1007: MOV TEMP[12].w, TEMP[9].xxxx
1008: TXL TEMP[12], TEMP[12], SAMP[10], 2D
1009: FSEQ TEMP[13].x, TEMP[4].zzzz, IMM[1].zzzz
1010: AND TEMP[13].x, TEMP[13].xxxx, IMM[2].zzzz
1011: MOV TEMP[14].xy, TEMP[7].xyyy
1012: MOV TEMP[14].w, TEMP[9].xxxx
1013: TXL TEMP[14], TEMP[14], SAMP[8], 2D
1014: FSEQ TEMP[15].x, TEMP[4].zzzz, IMM[3].xxxx
1015: AND TEMP[15].x, TEMP[15].xxxx, IMM[2].zzzz
1016: MOV TEMP[16].xy, TEMP[7].xyyy
1017: MOV TEMP[16].w, TEMP[9].xxxx
1018: TXL TEMP[16], TEMP[16], SAMP[6], 2D
1019: FSEQ TEMP[17].x, TEMP[4].zzzz, IMM[2].wwww
1020: AND TEMP[17].x, TEMP[17].xxxx, IMM[2].zzzz
1021: MOV TEMP[18].xy, TEMP[7].xyyy
1022: MOV TEMP[18].w, TEMP[9].xxxx
1023: TXL TEMP[18], TEMP[18], SAMP[4], 2D
1024: FSEQ TEMP[19].x, TEMP[4].zzzz, IMM[2].zzzz
1025: AND TEMP[19].x, TEMP[19].xxxx, IMM[2].zzzz
1026: MOV TEMP[7].xy, TEMP[7].xyyy
1027: MOV TEMP[7].w, TEMP[9].xxxx
1028: TXL TEMP[7], TEMP[7], SAMP[2], 2D
1029: FSEQ TEMP[4].x, TEMP[4].zzzz, IMM[3].yyyy
1030: AND TEMP[4].x, TEMP[4].xxxx, IMM[2].zzzz
1031: MUL TEMP[4], TEMP[7], TEMP[4].xxxx
1032: MAD TEMP[4], TEMP[18], TEMP[19].xxxx, TEMP[4]
1033: MAD TEMP[4], TEMP[16], TEMP[17].xxxx, TEMP[4]
1034: MAD TEMP[4], TEMP[14], TEMP[15].xxxx, TEMP[4]
1035: MAD TEMP[4].yw, TEMP[12], TEMP[13].xxxx, TEMP[4]
1036: MAD TEMP[4].xy, TEMP[4].wyyy, IMM[2].wwww, IMM[3].zzzz
1037: DP2 TEMP[7].x, TEMP[4].xyyy, TEMP[4].xyyy
1038: MOV_SAT TEMP[37].x, TEMP[7].xxxx
1039: MOV TEMP[7].x, IMM[3].yyyy
1040: MOV TEMP[7].y, TEMP[8].xxxx
1041: MOV TEMP[7].z, TEMP[8].yyyy
1042: MOV TEMP[8].y, IMM[3].yyyy
1043: MOV TEMP[8].x, TEMP[10].yyyy
1044: MOV TEMP[8].z, TEMP[10].xxxx
1045: MOV TEMP[9].z, IMM[3].yyyy
1046: MOV TEMP[9].xy, TEMP[4].xyxx
1047: MOV TEMP[4].w, IMM[2].zzzz
1048: MUL TEMP[7].xyz, TEMP[7].xyzz, TEMP[3].xxxx
1049: MAD TEMP[7].xyz, TEMP[8].xyzz, TEMP[3].yyyy, TEMP[7].xyzz
1050: MAD TEMP[3].xyz, TEMP[9].xyzz, TEMP[3].zzzz, TEMP[7].xyzz
1051: MUL TEMP[5].xyz, IN[1].xxxx, TEMP[5].xyzz
1052: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[6].xyzz, TEMP[5].xyzz
1053: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[3].xyzz, TEMP[5].xyzz
1054: DP4 TEMP[3].x, TEMP[4], TEMP[4]
1055: RSQ TEMP[3].x, TEMP[3].xxxx
1056: MUL TEMP[3].xyz, TEMP[4], TEMP[3].xxxx
1057: MUL TEMP[3].xyz, TEMP[3].xyzz, IN[0].wwww
1058: ADD TEMP[3].xyz, IN[3].yzww, -TEMP[3].xyzz
1059: DP3 TEMP[4].x, TEMP[3].xyzz, TEMP[3].xyzz
1060: RSQ TEMP[4].x, TEMP[4].xxxx
1061: MUL TEMP[3].xyz, TEMP[3].xyzz, TEMP[4].xxxx
1062: MOV TEMP[4].xy, IN[2].xyyy
1063: MOV TEMP[4].w, IN[2].wwww
1064: TXP TEMP[4].x, TEMP[4], SAMP[0], 2D
1065: MOV TEMP[5].w, IMM[3].yyyy
1066: MUL TEMP[5].xyz, TEMP[11].xyzz, TEMP[0].xyzz
1067: ADD TEMP[0].xyz, TEMP[1].xyzz, TEMP[2].xyzz
1068: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz
1069: RSQ TEMP[2].x, TEMP[2].xxxx
1070: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[2].xxxx
1071: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[0].xyzz
1072: MAX TEMP[0].x, IMM[3].wwww, TEMP[0].xxxx
1073: MUL TEMP[2].x, IMM[4].xxxx, IN[1].wwww
1074: POW TEMP[0].x, TEMP[0].xxxx, TEMP[2].xxxx
1075: MOV_SAT TEMP[0].x, TEMP[0].xxxx
1076: MOV TEMP[2].w, IMM[3].yyyy
1077: MOV TEMP[2].xyz, CONST[23].xyzx
1078: MOV TEMP[6].w, IMM[2].zzzz
1079: MUL TEMP[7].x, IMM[2].wwww, TEMP[0].xxxx
1080: ADD TEMP[7].x, IMM[3].xxxx, -TEMP[7].xxxx
1081: MUL TEMP[7].x, TEMP[0].xxxx, TEMP[7].xxxx
1082: MUL TEMP[0].x, TEMP[0].xxxx, TEMP[7].xxxx
1083: MUL TEMP[0].x, TEMP[0].xxxx, IN[1].wwww
1084: MUL TEMP[7].xyz, TEMP[11].xyzz, CONST[4].xyzz
1085: DP3 TEMP[1].x, TEMP[3].xyzz, TEMP[1].xyzz
1086: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1087: MUL TEMP[3], CONST[24], IMM[2].wwww
1088: MUL TEMP[3], TEMP[3], TEMP[4].xxxx
1089: MAX TEMP[2], TEMP[3], TEMP[2]
1090: MIN TEMP[2].xyz, TEMP[2], IMM[4].yyyz
1091: MUL TEMP[2].xyz, TEMP[2].xyzz, TEMP[11].xyzz
1092: MAD TEMP[1].xyz, TEMP[7].xyzz, TEMP[1].xxxx, TEMP[2].xyzz
1093: MAD TEMP[0].xyz, CONST[4].xyzz, TEMP[0].xxxx, TEMP[1].xyzz
1094: MUL TEMP[0].xyz, TEMP[0].xyzz, TEMP[4].xxxx
1095: MUL TEMP[6].xyz, TEMP[0].xyzz, IMM[0].wwww
1096: ADD TEMP[0].xyz, TEMP[5], TEMP[6]
1097: MAD TEMP[1].x, IN[3].xxxx, CONST[3].zzzz, CONST[3].wwww
1098: MOV_SAT TEMP[1].x, TEMP[1].xxxx
1099: LRP TEMP[5].xyz, TEMP[1].xxxx, TEMP[0].xyzz, CONST[2].xyzz
1100: MOV TEMP[5].w, IMM[2].zzzz
1101: MOV OUT[0], TEMP[5]
1102: END
; ModuleID = 'tgsi'
define void @main([9 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
main_body:
%22 = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !tbaa !0
%24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0)
%25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4)
%26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8)
%27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16)
%28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20)
%29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24)
%30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32)
%31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36)
%32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40)
%33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56)
%34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60)
%35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64)
%36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68)
%37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72)
%38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256)
%39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272)
%40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288)
%41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304)
%42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320)
%43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336)
%44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352)
%45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368)
%46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 372)
%47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 376)
%48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 384)
%49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 388)
%50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 392)
%51 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
%52 = load <32 x i8>, <32 x i8> addrspace(2)* %51, align 32, !tbaa !0
%53 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
%54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !tbaa !0
%55 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 1
%56 = load <8 x i32>, <8 x i32> addrspace(2)* %55, align 32, !tbaa !0
%57 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 1
%58 = load <4 x i32>, <4 x i32> addrspace(2)* %57, align 16, !tbaa !0
%59 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 2
%60 = load <8 x i32>, <8 x i32> addrspace(2)* %59, align 32, !tbaa !0
%61 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 2
%62 = load <4 x i32>, <4 x i32> addrspace(2)* %61, align 16, !tbaa !0
%63 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 3
%64 = load <8 x i32>, <8 x i32> addrspace(2)* %63, align 32, !tbaa !0
%65 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 3
%66 = load <4 x i32>, <4 x i32> addrspace(2)* %65, align 16, !tbaa !0
%67 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 4
%68 = load <8 x i32>, <8 x i32> addrspace(2)* %67, align 32, !tbaa !0
%69 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 4
%70 = load <4 x i32>, <4 x i32> addrspace(2)* %69, align 16, !tbaa !0
%71 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 5
%72 = load <8 x i32>, <8 x i32> addrspace(2)* %71, align 32, !tbaa !0
%73 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 5
%74 = load <4 x i32>, <4 x i32> addrspace(2)* %73, align 16, !tbaa !0
%75 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 6
%76 = load <8 x i32>, <8 x i32> addrspace(2)* %75, align 32, !tbaa !0
%77 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 6
%78 = load <4 x i32>, <4 x i32> addrspace(2)* %77, align 16, !tbaa !0
%79 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 7
%80 = load <8 x i32>, <8 x i32> addrspace(2)* %79, align 32, !tbaa !0
%81 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 7
%82 = load <4 x i32>, <4 x i32> addrspace(2)* %81, align 16, !tbaa !0
%83 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 8
%84 = load <8 x i32>, <8 x i32> addrspace(2)* %83, align 32, !tbaa !0
%85 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 8
%86 = load <4 x i32>, <4 x i32> addrspace(2)* %85, align 16, !tbaa !0
%87 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 9
%88 = load <8 x i32>, <8 x i32> addrspace(2)* %87, align 32, !tbaa !0
%89 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 9
%90 = load <4 x i32>, <4 x i32> addrspace(2)* %89, align 16, !tbaa !0
%91 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %3, i64 0, i64 10
%92 = load <8 x i32>, <8 x i32> addrspace(2)* %91, align 32, !tbaa !0
%93 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %2, i64 0, i64 10
%94 = load <4 x i32>, <4 x i32> addrspace(2)* %93, align 16, !tbaa !0
%95 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7)
%96 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7)
%97 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7)
%98 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7)
%99 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7)
%100 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7)
%101 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7)
%102 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7)
%103 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7)
%104 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7)
%105 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7)
%106 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7)
%107 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7)
%108 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7)
%109 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7)
%110 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7)
%111 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7)
%112 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7)
%113 = call float @llvm.SI.fs.interp(i32 3, i32 4, i32 %5, <2 x i32> %7)
%114 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7)
%115 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7)
%116 = fmul float %27, %27
%117 = fmul float %28, %28
%118 = fadd float %117, %116
%119 = fmul float %29, %29
%120 = fadd float %118, %119
%121 = call float @llvm.AMDGPU.rsq.clamped.f32(float %120)
%122 = fmul float %27, %121
%123 = fmul float %28, %121
%124 = fmul float %29, %121
%125 = fsub float %24, %110
%126 = fsub float %25, %111
%127 = fsub float %26, %112
%128 = fmul float %125, %125
%129 = fmul float %126, %126
%130 = fadd float %129, %128
%131 = fmul float %127, %127
%132 = fadd float %130, %131
%133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132)
%134 = fmul float %125, %133
%135 = fmul float %126, %133
%136 = fmul float %127, %133
%137 = call float @llvm.fabs.f32(float %107)
%138 = call float @llvm.fabs.f32(float %108)
%139 = call float @llvm.fabs.f32(float %109)
%140 = fmul float %137, %137
%141 = fmul float %138, %138
%142 = fadd float %141, %140
%143 = fmul float %139, %139
%144 = fadd float %142, %143
%145 = call float @llvm.AMDGPU.rsq.clamped.f32(float %144)
%146 = fmul float %137, %145
%147 = fadd float %146, 0xBFC99999A0000000
%148 = fmul float %138, %145
%149 = fadd float %148, 0xBFC99999A0000000
%150 = fmul float %139, %145
%151 = fadd float %150, 0xBFC99999A0000000
%152 = fmul float %147, 7.000000e+00
%153 = fmul float %149, 7.000000e+00
%154 = fmul float %151, 7.000000e+00
%155 = call float @llvm.maxnum.f32(float %152, float 0x3F847AE140000000)
%156 = call float @llvm.maxnum.f32(float %153, float 0x3F847AE140000000)
%157 = call float @llvm.maxnum.f32(float %154, float 0x3F847AE140000000)
%158 = fadd float %155, %156
%159 = fadd float %158, %157
%160 = fdiv float 1.000000e+00, %159
%161 = fmul float %155, %160
%162 = fmul float %156, %160
%163 = fmul float %157, %160
%164 = fadd float %95, 5.000000e-01
%165 = fadd float %96, 5.000000e-01
%166 = fadd float %97, 5.000000e-01
%167 = call float @llvm.floor.f32(float %164)
%168 = call float @llvm.floor.f32(float %165)
%169 = call float @llvm.floor.f32(float %166)
%170 = fmul float %167, %38
%171 = call float @llvm.floor.f32(float %170)
%172 = fmul float %171, %38
%173 = fcmp ult float %167, 6.400000e+01
br i1 %173, label %ELSE, label %IF
IF: ; preds = %main_body
%174 = fadd float %167, -6.400000e+01
%175 = fmul float %174, %39
%176 = call float @llvm.floor.f32(float %175)
%177 = fmul float %176, %39
%178 = call float @llvm.floor.f32(float %175)
%179 = fsub float %175, %178
%180 = call float @llvm.floor.f32(float %177)
%181 = fsub float %177, %180
%182 = call float @llvm.floor.f32(float %177)
%183 = fadd float %182, 4.000000e+00
br label %ENDIF
ELSE: ; preds = %main_body
%184 = call float @llvm.floor.f32(float %170)
%185 = fsub float %170, %184
%186 = call float @llvm.floor.f32(float %172)
%187 = fsub float %172, %186
%188 = call float @llvm.floor.f32(float %172)
br label %ENDIF
ENDIF: ; preds = %ELSE, %IF
%temp32.0 = phi float [ %179, %IF ], [ %185, %ELSE ]
%temp33.0 = phi float [ %181, %IF ], [ %187, %ELSE ]
%temp34.0 = phi float [ %183, %IF ], [ %188, %ELSE ]
%temp20.0 = phi float [ %39, %IF ], [ %38, %ELSE ]
%189 = fmul float %168, %38
%190 = call float @llvm.floor.f32(float %189)
%191 = fmul float %190, %38
%192 = fcmp ult float %168, 6.400000e+01
br i1 %192, label %ELSE154, label %IF153
IF153: ; preds = %ENDIF
%193 = fadd float %168, -6.400000e+01
%194 = fmul float %193, %39
%195 = call float @llvm.floor.f32(float %194)
%196 = fmul float %195, %39
%197 = call float @llvm.floor.f32(float %194)
%198 = fsub float %194, %197
%199 = call float @llvm.floor.f32(float %196)
%200 = fsub float %196, %199
%201 = call float @llvm.floor.f32(float %196)
%202 = fadd float %201, 4.000000e+00
br label %ENDIF152
ELSE154: ; preds = %ENDIF
%203 = call float @llvm.floor.f32(float %189)
%204 = fsub float %189, %203
%205 = call float @llvm.floor.f32(float %191)
%206 = fsub float %191, %205
%207 = call float @llvm.floor.f32(float %191)
br label %ENDIF152
ENDIF152: ; preds = %ELSE154, %IF153
%temp40.0 = phi float [ %198, %IF153 ], [ %204, %ELSE154 ]
%temp41.0 = phi float [ %200, %IF153 ], [ %206, %ELSE154 ]
%temp42.0 = phi float [ %202, %IF153 ], [ %207, %ELSE154 ]
%temp24.0 = phi float [ %39, %IF153 ], [ %38, %ELSE154 ]
%208 = fmul float %169, %38
%209 = call float @llvm.floor.f32(float %208)
%210 = fmul float %209, %38
%211 = fcmp ult float %169, 6.400000e+01
br i1 %211, label %ELSE157, label %IF156
IF156: ; preds = %ENDIF152
%212 = fadd float %169, -6.400000e+01
%213 = fmul float %212, %39
%214 = call float @llvm.floor.f32(float %213)
%215 = fmul float %214, %39
%216 = call float @llvm.floor.f32(float %213)
%217 = fsub float %213, %216
%218 = call float @llvm.floor.f32(float %215)
%219 = fsub float %215, %218
%220 = call float @llvm.floor.f32(float %215)
%221 = fadd float %220, 4.000000e+00
br label %ENDIF155
ELSE157: ; preds = %ENDIF152
%222 = call float @llvm.floor.f32(float %208)
%223 = fsub float %208, %222
%224 = call float @llvm.floor.f32(float %210)
%225 = fsub float %210, %224
%226 = call float @llvm.floor.f32(float %210)
br label %ENDIF155
ENDIF155: ; preds = %ELSE157, %IF156
%temp28.0 = phi float [ %39, %IF156 ], [ %38, %ELSE157 ]
%temp18.0 = phi float [ %221, %IF156 ], [ %226, %ELSE157 ]
%temp17.0 = phi float [ %219, %IF156 ], [ %225, %ELSE157 ]
%temp16.0 = phi float [ %217, %IF156 ], [ %223, %ELSE157 ]
%227 = fsub float %110, %24
%228 = fsub float %111, %25
%229 = fsub float %112, %26
%230 = fmul float %227, %227
%231 = fmul float %228, %228
%232 = fadd float %231, %230
%233 = fmul float %229, %229
%234 = fadd float %232, %233
%235 = fmul float %44, %234
%236 = call float @llvm.log2.f32(float %235)
%237 = fmul float %236, 0x3FE62E4300000000
%238 = fmul float %237, %43
%239 = fcmp une float %38, %temp20.0
%.sink212 = select i1 %239, float %41, float %40
%temp48.0 = select i1 %239, float 1.953125e-03, float 3.906250e-03
%240 = fdiv float 1.000000e+00, %.sink212
%241 = fmul float %110, %240
%242 = fmul float %111, %240
%243 = call float @llvm.floor.f32(float %241)
%244 = fsub float %241, %243
%245 = call float @llvm.floor.f32(float %242)
%246 = fsub float %242, %245
%247 = fmul float %42, 2.000000e+00
%248 = fmul float %247, %temp48.0
%249 = fsub float 1.000000e+00, %248
%250 = fmul float %temp48.0, %42
%251 = fmul float %244, %249
%252 = fadd float %251, %250
%253 = fmul float %246, %249
%254 = fadd float %253, %250
%255 = fmul float %252, %temp20.0
%256 = fadd float %255, %temp32.0
%257 = fmul float %254, %temp20.0
%258 = fadd float %257, %temp33.0
%259 = bitcast float %256 to i32
%260 = bitcast float %258 to i32
%261 = bitcast float %238 to i32
%262 = insertelement <4 x i32> undef, i32 %259, i32 0
%263 = insertelement <4 x i32> %262, i32 %260, i32 1
%264 = insertelement <4 x i32> %263, i32 %261, i32 2
%265 = bitcast <8 x i32> %88 to <32 x i8>
%266 = bitcast <4 x i32> %90 to <16 x i8>
%267 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %264, <32 x i8> %265, <16 x i8> %266, i32 2)
%268 = extractelement <4 x float> %267, i32 0
%269 = extractelement <4 x float> %267, i32 1
%270 = extractelement <4 x float> %267, i32 2
%271 = fcmp oeq float %temp34.0, 4.000000e+00
%272 = select i1 %271, float 1.000000e+00, float 0.000000e+00
%273 = bitcast float %256 to i32
%274 = bitcast float %258 to i32
%275 = bitcast float %238 to i32
%276 = insertelement <4 x i32> undef, i32 %273, i32 0
%277 = insertelement <4 x i32> %276, i32 %274, i32 1
%278 = insertelement <4 x i32> %277, i32 %275, i32 2
%279 = bitcast <8 x i32> %80 to <32 x i8>
%280 = bitcast <4 x i32> %82 to <16 x i8>
%281 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %278, <32 x i8> %279, <16 x i8> %280, i32 2)
%282 = extractelement <4 x float> %281, i32 0
%283 = extractelement <4 x float> %281, i32 1
%284 = extractelement <4 x float> %281, i32 2
%285 = fcmp oeq float %temp34.0, 3.000000e+00
%286 = select i1 %285, float 1.000000e+00, float 0.000000e+00
%287 = bitcast float %256 to i32
%288 = bitcast float %258 to i32
%289 = bitcast float %238 to i32
%290 = insertelement <4 x i32> undef, i32 %287, i32 0
%291 = insertelement <4 x i32> %290, i32 %288, i32 1
%292 = insertelement <4 x i32> %291, i32 %289, i32 2
%293 = bitcast <8 x i32> %72 to <32 x i8>
%294 = bitcast <4 x i32> %74 to <16 x i8>
%295 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %292, <32 x i8> %293, <16 x i8> %294, i32 2)
%296 = extractelement <4 x float> %295, i32 0
%297 = extractelement <4 x float> %295, i32 1
%298 = extractelement <4 x float> %295, i32 2
%299 = fcmp oeq float %temp34.0, 2.000000e+00
%300 = select i1 %299, float 1.000000e+00, float 0.000000e+00
%301 = bitcast float %256 to i32
%302 = bitcast float %258 to i32
%303 = bitcast float %238 to i32
%304 = insertelement <4 x i32> undef, i32 %301, i32 0
%305 = insertelement <4 x i32> %304, i32 %302, i32 1
%306 = insertelement <4 x i32> %305, i32 %303, i32 2
%307 = bitcast <8 x i32> %64 to <32 x i8>
%308 = bitcast <4 x i32> %66 to <16 x i8>
%309 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %306, <32 x i8> %307, <16 x i8> %308, i32 2)
%310 = extractelement <4 x float> %309, i32 0
%311 = extractelement <4 x float> %309, i32 1
%312 = extractelement <4 x float> %309, i32 2
%313 = fcmp oeq float %temp34.0, 1.000000e+00
%314 = select i1 %313, float 1.000000e+00, float 0.000000e+00
%315 = bitcast float %256 to i32
%316 = bitcast float %258 to i32
%317 = bitcast float %238 to i32
%318 = insertelement <4 x i32> undef, i32 %315, i32 0
%319 = insertelement <4 x i32> %318, i32 %316, i32 1
%320 = insertelement <4 x i32> %319, i32 %317, i32 2
%321 = bitcast <8 x i32> %56 to <32 x i8>
%322 = bitcast <4 x i32> %58 to <16 x i8>
%323 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %320, <32 x i8> %321, <16 x i8> %322, i32 2)
%324 = extractelement <4 x float> %323, i32 0
%325 = extractelement <4 x float> %323, i32 1
%326 = extractelement <4 x float> %323, i32 2
%327 = fcmp oeq float %temp34.0, 0.000000e+00
%328 = select i1 %327, float 1.000000e+00, float 0.000000e+00
%329 = fmul float %324, %328
%330 = fmul float %325, %328
%331 = fmul float %326, %328
%332 = fmul float %310, %314
%333 = fadd float %332, %329
%334 = fmul float %311, %314
%335 = fadd float %334, %330
%336 = fmul float %312, %314
%337 = fadd float %336, %331
%338 = fmul float %296, %300
%339 = fadd float %338, %333
%340 = fmul float %297, %300
%341 = fadd float %340, %335
%342 = fmul float %298, %300
%343 = fadd float %342, %337
%344 = fmul float %282, %286
%345 = fadd float %344, %339
%346 = fmul float %283, %286
%347 = fadd float %346, %341
%348 = fmul float %284, %286
%349 = fadd float %348, %343
%350 = fmul float %268, %272
%351 = fadd float %350, %345
%352 = fmul float %269, %272
%353 = fadd float %352, %347
%354 = fmul float %270, %272
%355 = fadd float %354, %349
%356 = fcmp une float %38, %temp20.0
%.sink213 = select i1 %356, float %41, float %40
%temp52.0 = select i1 %356, float 1.953125e-03, float 3.906250e-03
%357 = fdiv float 1.000000e+00, %.sink213
%358 = fmul float %112, %357
%359 = fmul float %111, %357
%360 = call float @llvm.floor.f32(float %358)
%361 = fsub float %358, %360
%362 = call float @llvm.floor.f32(float %359)
%363 = fsub float %359, %362
%364 = fmul float %42, 2.000000e+00
%365 = fmul float %364, %temp52.0
%366 = fsub float 1.000000e+00, %365
%367 = fmul float %temp52.0, %42
%368 = fmul float %361, %366
%369 = fadd float %368, %367
%370 = fmul float %363, %366
%371 = fadd float %370, %367
%372 = fmul float %369, %temp20.0
%373 = fadd float %372, %temp32.0
%374 = fmul float %371, %temp20.0
%375 = fadd float %374, %temp33.0
%376 = bitcast float %373 to i32
%377 = bitcast float %375 to i32
%378 = bitcast float %238 to i32
%379 = insertelement <4 x i32> undef, i32 %376, i32 0
%380 = insertelement <4 x i32> %379, i32 %377, i32 1
%381 = insertelement <4 x i32> %380, i32 %378, i32 2
%382 = bitcast <8 x i32> %88 to <32 x i8>
%383 = bitcast <4 x i32> %90 to <16 x i8>
%384 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %381, <32 x i8> %382, <16 x i8> %383, i32 2)
%385 = extractelement <4 x float> %384, i32 0
%386 = extractelement <4 x float> %384, i32 1
%387 = extractelement <4 x float> %384, i32 2
%388 = fcmp oeq float %temp34.0, 4.000000e+00
%389 = select i1 %388, float 1.000000e+00, float 0.000000e+00
%390 = bitcast float %373 to i32
%391 = bitcast float %375 to i32
%392 = bitcast float %238 to i32
%393 = insertelement <4 x i32> undef, i32 %390, i32 0
%394 = insertelement <4 x i32> %393, i32 %391, i32 1
%395 = insertelement <4 x i32> %394, i32 %392, i32 2
%396 = bitcast <8 x i32> %80 to <32 x i8>
%397 = bitcast <4 x i32> %82 to <16 x i8>
%398 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %395, <32 x i8> %396, <16 x i8> %397, i32 2)
%399 = extractelement <4 x float> %398, i32 0
%400 = extractelement <4 x float> %398, i32 1
%401 = extractelement <4 x float> %398, i32 2
%402 = fcmp oeq float %temp34.0, 3.000000e+00
%403 = select i1 %402, float 1.000000e+00, float 0.000000e+00
%404 = bitcast float %373 to i32
%405 = bitcast float %375 to i32
%406 = bitcast float %238 to i32
%407 = insertelement <4 x i32> undef, i32 %404, i32 0
%408 = insertelement <4 x i32> %407, i32 %405, i32 1
%409 = insertelement <4 x i32> %408, i32 %406, i32 2
%410 = bitcast <8 x i32> %72 to <32 x i8>
%411 = bitcast <4 x i32> %74 to <16 x i8>
%412 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %409, <32 x i8> %410, <16 x i8> %411, i32 2)
%413 = extractelement <4 x float> %412, i32 0
%414 = extractelement <4 x float> %412, i32 1
%415 = extractelement <4 x float> %412, i32 2
%416 = fcmp oeq float %temp34.0, 2.000000e+00
%417 = select i1 %416, float 1.000000e+00, float 0.000000e+00
%418 = bitcast float %373 to i32
%419 = bitcast float %375 to i32
%420 = bitcast float %238 to i32
%421 = insertelement <4 x i32> undef, i32 %418, i32 0
%422 = insertelement <4 x i32> %421, i32 %419, i32 1
%423 = insertelement <4 x i32> %422, i32 %420, i32 2
%424 = bitcast <8 x i32> %64 to <32 x i8>
%425 = bitcast <4 x i32> %66 to <16 x i8>
%426 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %423, <32 x i8> %424, <16 x i8> %425, i32 2)
%427 = extractelement <4 x float> %426, i32 0
%428 = extractelement <4 x float> %426, i32 1
%429 = extractelement <4 x float> %426, i32 2
%430 = fcmp oeq float %temp34.0, 1.000000e+00
%431 = select i1 %430, float 1.000000e+00, float 0.000000e+00
%432 = bitcast float %373 to i32
%433 = bitcast float %375 to i32
%434 = bitcast float %238 to i32
%435 = insertelement <4 x i32> undef, i32 %432, i32 0
%436 = insertelement <4 x i32> %435, i32 %433, i32 1
%437 = insertelement <4 x i32> %436, i32 %434, i32 2
%438 = bitcast <8 x i32> %56 to <32 x i8>
%439 = bitcast <4 x i32> %58 to <16 x i8>
%440 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %437, <32 x i8> %438, <16 x i8> %439, i32 2)
%441 = extractelement <4 x float> %440, i32 0
%442 = extractelement <4 x float> %440, i32 1
%443 = extractelement <4 x float> %440, i32 2
%444 = fcmp oeq float %temp34.0, 0.000000e+00
%445 = select i1 %444, float 1.000000e+00, float 0.000000e+00
%446 = fmul float %441, %445
%447 = fmul float %442, %445
%448 = fmul float %443, %445
%449 = fmul float %427, %431
%450 = fadd float %449, %446
%451 = fmul float %428, %431
%452 = fadd float %451, %447
%453 = fmul float %429, %431
%454 = fadd float %453, %448
%455 = fmul float %413, %417
%456 = fadd float %455, %450
%457 = fmul float %414, %417
%458 = fadd float %457, %452
%459 = fmul float %415, %417
%460 = fadd float %459, %454
%461 = fmul float %399, %403
%462 = fadd float %461, %456
%463 = fmul float %400, %403
%464 = fadd float %463, %458
%465 = fmul float %401, %403
%466 = fadd float %465, %460
%467 = fmul float %385, %389
%468 = fadd float %467, %462
%469 = fmul float %386, %389
%470 = fadd float %469, %464
%471 = fmul float %387, %389
%472 = fadd float %471, %466
%473 = fcmp une float %38, %temp20.0
%.sink214 = select i1 %473, float %41, float %40
%temp56.0 = select i1 %473, float 1.953125e-03, float 3.906250e-03
%474 = fdiv float 1.000000e+00, %.sink214
%475 = fmul float %112, %474
%476 = fmul float %110, %474
%477 = call float @llvm.floor.f32(float %475)
%478 = fsub float %475, %477
%479 = call float @llvm.floor.f32(float %476)
%480 = fsub float %476, %479
%481 = fmul float %42, 2.000000e+00
%482 = fmul float %481, %temp56.0
%483 = fsub float 1.000000e+00, %482
%484 = fmul float %temp56.0, %42
%485 = fmul float %478, %483
%486 = fadd float %485, %484
%487 = fmul float %480, %483
%488 = fadd float %487, %484
%489 = fmul float %486, %temp20.0
%490 = fadd float %489, %temp32.0
%491 = fmul float %488, %temp20.0
%492 = fadd float %491, %temp33.0
%493 = bitcast float %490 to i32
%494 = bitcast float %492 to i32
%495 = bitcast float %238 to i32
%496 = insertelement <4 x i32> undef, i32 %493, i32 0
%497 = insertelement <4 x i32> %496, i32 %494, i32 1
%498 = insertelement <4 x i32> %497, i32 %495, i32 2
%499 = bitcast <8 x i32> %88 to <32 x i8>
%500 = bitcast <4 x i32> %90 to <16 x i8>
%501 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %498, <32 x i8> %499, <16 x i8> %500, i32 2)
%502 = extractelement <4 x float> %501, i32 0
%503 = extractelement <4 x float> %501, i32 1
%504 = extractelement <4 x float> %501, i32 2
%505 = fcmp oeq float %temp34.0, 4.000000e+00
%506 = select i1 %505, float 1.000000e+00, float 0.000000e+00
%507 = bitcast float %490 to i32
%508 = bitcast float %492 to i32
%509 = bitcast float %238 to i32
%510 = insertelement <4 x i32> undef, i32 %507, i32 0
%511 = insertelement <4 x i32> %510, i32 %508, i32 1
%512 = insertelement <4 x i32> %511, i32 %509, i32 2
%513 = bitcast <8 x i32> %80 to <32 x i8>
%514 = bitcast <4 x i32> %82 to <16 x i8>
%515 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %512, <32 x i8> %513, <16 x i8> %514, i32 2)
%516 = extractelement <4 x float> %515, i32 0
%517 = extractelement <4 x float> %515, i32 1
%518 = extractelement <4 x float> %515, i32 2
%519 = fcmp oeq float %temp34.0, 3.000000e+00
%520 = select i1 %519, float 1.000000e+00, float 0.000000e+00
%521 = bitcast float %490 to i32
%522 = bitcast float %492 to i32
%523 = bitcast float %238 to i32
%524 = insertelement <4 x i32> undef, i32 %521, i32 0
%525 = insertelement <4 x i32> %524, i32 %522, i32 1
%526 = insertelement <4 x i32> %525, i32 %523, i32 2
%527 = bitcast <8 x i32> %72 to <32 x i8>
%528 = bitcast <4 x i32> %74 to <16 x i8>
%529 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %526, <32 x i8> %527, <16 x i8> %528, i32 2)
%530 = extractelement <4 x float> %529, i32 0
%531 = extractelement <4 x float> %529, i32 1
%532 = extractelement <4 x float> %529, i32 2
%533 = fcmp oeq float %temp34.0, 2.000000e+00
%534 = select i1 %533, float 1.000000e+00, float 0.000000e+00
%535 = bitcast float %490 to i32
%536 = bitcast float %492 to i32
%537 = bitcast float %238 to i32
%538 = insertelement <4 x i32> undef, i32 %535, i32 0
%539 = insertelement <4 x i32> %538, i32 %536, i32 1
%540 = insertelement <4 x i32> %539, i32 %537, i32 2
%541 = bitcast <8 x i32> %64 to <32 x i8>
%542 = bitcast <4 x i32> %66 to <16 x i8>
%543 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %540, <32 x i8> %541, <16 x i8> %542, i32 2)
%544 = extractelement <4 x float> %543, i32 0
%545 = extractelement <4 x float> %543, i32 1
%546 = extractelement <4 x float> %543, i32 2
%547 = fcmp oeq float %temp34.0, 1.000000e+00
%548 = select i1 %547, float 1.000000e+00, float 0.000000e+00
%549 = bitcast float %490 to i32
%550 = bitcast float %492 to i32
%551 = bitcast float %238 to i32
%552 = insertelement <4 x i32> undef, i32 %549, i32 0
%553 = insertelement <4 x i32> %552, i32 %550, i32 1
%554 = insertelement <4 x i32> %553, i32 %551, i32 2
%555 = bitcast <8 x i32> %56 to <32 x i8>
%556 = bitcast <4 x i32> %58 to <16 x i8>
%557 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %554, <32 x i8> %555, <16 x i8> %556, i32 2)
%558 = extractelement <4 x float> %557, i32 0
%559 = extractelement <4 x float> %557, i32 1
%560 = extractelement <4 x float> %557, i32 2
%561 = fcmp oeq float %temp34.0, 0.000000e+00
%562 = select i1 %561, float 1.000000e+00, float 0.000000e+00
%563 = fmul float %558, %562
%564 = fmul float %559, %562
%565 = fmul float %560, %562
%566 = fmul float %544, %548
%567 = fadd float %566, %563
%568 = fmul float %545, %548
%569 = fadd float %568, %564
%570 = fmul float %546, %548
%571 = fadd float %570, %565
%572 = fmul float %530, %534
%573 = fadd float %572, %567
%574 = fmul float %531, %534
%575 = fadd float %574, %569
%576 = fmul float %532, %534
%577 = fadd float %576, %571
%578 = fmul float %516, %520
%579 = fadd float %578, %573
%580 = fmul float %517, %520
%581 = fadd float %580, %575
%582 = fmul float %518, %520
%583 = fadd float %582, %577
%584 = fmul float %502, %506
%585 = fadd float %584, %579
%586 = fmul float %503, %506
%587 = fadd float %586, %581
%588 = fmul float %504, %506
%589 = fadd float %588, %583
%590 = fcmp une float %38, %temp24.0
%.sink215 = select i1 %590, float %41, float %40
%temp60.0 = select i1 %590, float 1.953125e-03, float 3.906250e-03
%591 = fdiv float 1.000000e+00, %.sink215
%592 = fmul float %110, %591
%593 = fmul float %111, %591
%594 = call float @llvm.floor.f32(float %592)
%595 = fsub float %592, %594
%596 = call float @llvm.floor.f32(float %593)
%597 = fsub float %593, %596
%598 = fmul float %42, 2.000000e+00
%599 = fmul float %598, %temp60.0
%600 = fsub float 1.000000e+00, %599
%601 = fmul float %temp60.0, %42
%602 = fmul float %595, %600
%603 = fadd float %602, %601
%604 = fmul float %597, %600
%605 = fadd float %604, %601
%606 = fmul float %603, %temp24.0
%607 = fadd float %606, %temp40.0
%608 = fmul float %605, %temp24.0
%609 = fadd float %608, %temp41.0
%610 = bitcast float %607 to i32
%611 = bitcast float %609 to i32
%612 = bitcast float %238 to i32
%613 = insertelement <4 x i32> undef, i32 %610, i32 0
%614 = insertelement <4 x i32> %613, i32 %611, i32 1
%615 = insertelement <4 x i32> %614, i32 %612, i32 2
%616 = bitcast <8 x i32> %88 to <32 x i8>
%617 = bitcast <4 x i32> %90 to <16 x i8>
%618 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %615, <32 x i8> %616, <16 x i8> %617, i32 2)
%619 = extractelement <4 x float> %618, i32 0
%620 = extractelement <4 x float> %618, i32 1
%621 = extractelement <4 x float> %618, i32 2
%622 = fcmp oeq float %temp42.0, 4.000000e+00
%623 = select i1 %622, float 1.000000e+00, float 0.000000e+00
%624 = bitcast float %607 to i32
%625 = bitcast float %609 to i32
%626 = bitcast float %238 to i32
%627 = insertelement <4 x i32> undef, i32 %624, i32 0
%628 = insertelement <4 x i32> %627, i32 %625, i32 1
%629 = insertelement <4 x i32> %628, i32 %626, i32 2
%630 = bitcast <8 x i32> %80 to <32 x i8>
%631 = bitcast <4 x i32> %82 to <16 x i8>
%632 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %629, <32 x i8> %630, <16 x i8> %631, i32 2)
%633 = extractelement <4 x float> %632, i32 0
%634 = extractelement <4 x float> %632, i32 1
%635 = extractelement <4 x float> %632, i32 2
%636 = fcmp oeq float %temp42.0, 3.000000e+00
%637 = select i1 %636, float 1.000000e+00, float 0.000000e+00
%638 = bitcast float %607 to i32
%639 = bitcast float %609 to i32
%640 = bitcast float %238 to i32
%641 = insertelement <4 x i32> undef, i32 %638, i32 0
%642 = insertelement <4 x i32> %641, i32 %639, i32 1
%643 = insertelement <4 x i32> %642, i32 %640, i32 2
%644 = bitcast <8 x i32> %72 to <32 x i8>
%645 = bitcast <4 x i32> %74 to <16 x i8>
%646 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %643, <32 x i8> %644, <16 x i8> %645, i32 2)
%647 = extractelement <4 x float> %646, i32 0
%648 = extractelement <4 x float> %646, i32 1
%649 = extractelement <4 x float> %646, i32 2
%650 = fcmp oeq float %temp42.0, 2.000000e+00
%651 = select i1 %650, float 1.000000e+00, float 0.000000e+00
%652 = bitcast float %607 to i32
%653 = bitcast float %609 to i32
%654 = bitcast float %238 to i32
%655 = insertelement <4 x i32> undef, i32 %652, i32 0
%656 = insertelement <4 x i32> %655, i32 %653, i32 1
%657 = insertelement <4 x i32> %656, i32 %654, i32 2
%658 = bitcast <8 x i32> %64 to <32 x i8>
%659 = bitcast <4 x i32> %66 to <16 x i8>
%660 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %657, <32 x i8> %658, <16 x i8> %659, i32 2)
%661 = extractelement <4 x float> %660, i32 0
%662 = extractelement <4 x float> %660, i32 1
%663 = extractelement <4 x float> %660, i32 2
%664 = fcmp oeq float %temp42.0, 1.000000e+00
%665 = select i1 %664, float 1.000000e+00, float 0.000000e+00
%666 = bitcast float %607 to i32
%667 = bitcast float %609 to i32
%668 = bitcast float %238 to i32
%669 = insertelement <4 x i32> undef, i32 %666, i32 0
%670 = insertelement <4 x i32> %669, i32 %667, i32 1
%671 = insertelement <4 x i32> %670, i32 %668, i32 2
%672 = bitcast <8 x i32> %56 to <32 x i8>
%673 = bitcast <4 x i32> %58 to <16 x i8>
%674 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %671, <32 x i8> %672, <16 x i8> %673, i32 2)
%675 = extractelement <4 x float> %674, i32 0
%676 = extractelement <4 x float> %674, i32 1
%677 = extractelement <4 x float> %674, i32 2
%678 = fcmp oeq float %temp42.0, 0.000000e+00
%679 = select i1 %678, float 1.000000e+00, float 0.000000e+00
%680 = fmul float %675, %679
%681 = fmul float %676, %679
%682 = fmul float %677, %679
%683 = fmul float %661, %665
%684 = fadd float %683, %680
%685 = fmul float %662, %665
%686 = fadd float %685, %681
%687 = fmul float %663, %665
%688 = fadd float %687, %682
%689 = fmul float %647, %651
%690 = fadd float %689, %684
%691 = fmul float %648, %651
%692 = fadd float %691, %686
%693 = fmul float %649, %651
%694 = fadd float %693, %688
%695 = fmul float %633, %637
%696 = fadd float %695, %690
%697 = fmul float %634, %637
%698 = fadd float %697, %692
%699 = fmul float %635, %637
%700 = fadd float %699, %694
%701 = fmul float %619, %623
%702 = fadd float %701, %696
%703 = fmul float %620, %623
%704 = fadd float %703, %698
%705 = fmul float %621, %623
%706 = fadd float %705, %700
%707 = fcmp une float %38, %temp24.0
%.sink216 = select i1 %707, float %41, float %40
%temp64.0 = select i1 %707, float 1.953125e-03, float 3.906250e-03
%708 = fdiv float 1.000000e+00, %.sink216
%709 = fmul float %112, %708
%710 = fmul float %111, %708
%711 = call float @llvm.floor.f32(float %709)
%712 = fsub float %709, %711
%713 = call float @llvm.floor.f32(float %710)
%714 = fsub float %710, %713
%715 = fmul float %42, 2.000000e+00
%716 = fmul float %715, %temp64.0
%717 = fsub float 1.000000e+00, %716
%718 = fmul float %temp64.0, %42
%719 = fmul float %712, %717
%720 = fadd float %719, %718
%721 = fmul float %714, %717
%722 = fadd float %721, %718
%723 = fmul float %720, %temp24.0
%724 = fadd float %723, %temp40.0
%725 = fmul float %722, %temp24.0
%726 = fadd float %725, %temp41.0
%727 = bitcast float %724 to i32
%728 = bitcast float %726 to i32
%729 = bitcast float %238 to i32
%730 = insertelement <4 x i32> undef, i32 %727, i32 0
%731 = insertelement <4 x i32> %730, i32 %728, i32 1
%732 = insertelement <4 x i32> %731, i32 %729, i32 2
%733 = bitcast <8 x i32> %88 to <32 x i8>
%734 = bitcast <4 x i32> %90 to <16 x i8>
%735 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %732, <32 x i8> %733, <16 x i8> %734, i32 2)
%736 = extractelement <4 x float> %735, i32 0
%737 = extractelement <4 x float> %735, i32 1
%738 = extractelement <4 x float> %735, i32 2
%739 = fcmp oeq float %temp42.0, 4.000000e+00
%740 = select i1 %739, float 1.000000e+00, float 0.000000e+00
%741 = bitcast float %724 to i32
%742 = bitcast float %726 to i32
%743 = bitcast float %238 to i32
%744 = insertelement <4 x i32> undef, i32 %741, i32 0
%745 = insertelement <4 x i32> %744, i32 %742, i32 1
%746 = insertelement <4 x i32> %745, i32 %743, i32 2
%747 = bitcast <8 x i32> %80 to <32 x i8>
%748 = bitcast <4 x i32> %82 to <16 x i8>
%749 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %746, <32 x i8> %747, <16 x i8> %748, i32 2)
%750 = extractelement <4 x float> %749, i32 0
%751 = extractelement <4 x float> %749, i32 1
%752 = extractelement <4 x float> %749, i32 2
%753 = fcmp oeq float %temp42.0, 3.000000e+00
%754 = select i1 %753, float 1.000000e+00, float 0.000000e+00
%755 = bitcast float %724 to i32
%756 = bitcast float %726 to i32
%757 = bitcast float %238 to i32
%758 = insertelement <4 x i32> undef, i32 %755, i32 0
%759 = insertelement <4 x i32> %758, i32 %756, i32 1
%760 = insertelement <4 x i32> %759, i32 %757, i32 2
%761 = bitcast <8 x i32> %72 to <32 x i8>
%762 = bitcast <4 x i32> %74 to <16 x i8>
%763 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %760, <32 x i8> %761, <16 x i8> %762, i32 2)
%764 = extractelement <4 x float> %763, i32 0
%765 = extractelement <4 x float> %763, i32 1
%766 = extractelement <4 x float> %763, i32 2
%767 = fcmp oeq float %temp42.0, 2.000000e+00
%768 = select i1 %767, float 1.000000e+00, float 0.000000e+00
%769 = bitcast float %724 to i32
%770 = bitcast float %726 to i32
%771 = bitcast float %238 to i32
%772 = insertelement <4 x i32> undef, i32 %769, i32 0
%773 = insertelement <4 x i32> %772, i32 %770, i32 1
%774 = insertelement <4 x i32> %773, i32 %771, i32 2
%775 = bitcast <8 x i32> %64 to <32 x i8>
%776 = bitcast <4 x i32> %66 to <16 x i8>
%777 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %774, <32 x i8> %775, <16 x i8> %776, i32 2)
%778 = extractelement <4 x float> %777, i32 0
%779 = extractelement <4 x float> %777, i32 1
%780 = extractelement <4 x float> %777, i32 2
%781 = fcmp oeq float %temp42.0, 1.000000e+00
%782 = select i1 %781, float 1.000000e+00, float 0.000000e+00
%783 = bitcast float %724 to i32
%784 = bitcast float %726 to i32
%785 = bitcast float %238 to i32
%786 = insertelement <4 x i32> undef, i32 %783, i32 0
%787 = insertelement <4 x i32> %786, i32 %784, i32 1
%788 = insertelement <4 x i32> %787, i32 %785, i32 2
%789 = bitcast <8 x i32> %56 to <32 x i8>
%790 = bitcast <4 x i32> %58 to <16 x i8>
%791 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %788, <32 x i8> %789, <16 x i8> %790, i32 2)
%792 = extractelement <4 x float> %791, i32 0
%793 = extractelement <4 x float> %791, i32 1
%794 = extractelement <4 x float> %791, i32 2
%795 = fcmp oeq float %temp42.0, 0.000000e+00
%796 = select i1 %795, float 1.000000e+00, float 0.000000e+00
%797 = fmul float %792, %796
%798 = fmul float %793, %796
%799 = fmul float %794, %796
%800 = fmul float %778, %782
%801 = fadd float %800, %797
%802 = fmul float %779, %782
%803 = fadd float %802, %798
%804 = fmul float %780, %782
%805 = fadd float %804, %799
%806 = fmul float %764, %768
%807 = fadd float %806, %801
%808 = fmul float %765, %768
%809 = fadd float %808, %803
%810 = fmul float %766, %768
%811 = fadd float %810, %805
%812 = fmul float %750, %754
%813 = fadd float %812, %807
%814 = fmul float %751, %754
%815 = fadd float %814, %809
%816 = fmul float %752, %754
%817 = fadd float %816, %811
%818 = fmul float %736, %740
%819 = fadd float %818, %813
%820 = fmul float %737, %740
%821 = fadd float %820, %815
%822 = fmul float %738, %740
%823 = fadd float %822, %817
%824 = fcmp une float %38, %temp24.0
%.sink217 = select i1 %824, float %41, float %40
%temp68.0 = select i1 %824, float 1.953125e-03, float 3.906250e-03
%825 = fdiv float 1.000000e+00, %.sink217
%826 = fmul float %112, %825
%827 = fmul float %110, %825
%828 = call float @llvm.floor.f32(float %826)
%829 = fsub float %826, %828
%830 = call float @llvm.floor.f32(float %827)
%831 = fsub float %827, %830
%832 = fmul float %42, 2.000000e+00
%833 = fmul float %832, %temp68.0
%834 = fsub float 1.000000e+00, %833
%835 = fmul float %temp68.0, %42
%836 = fmul float %829, %834
%837 = fadd float %836, %835
%838 = fmul float %831, %834
%839 = fadd float %838, %835
%840 = fmul float %837, %temp24.0
%841 = fadd float %840, %temp40.0
%842 = fmul float %839, %temp24.0
%843 = fadd float %842, %temp41.0
%844 = bitcast float %841 to i32
%845 = bitcast float %843 to i32
%846 = bitcast float %238 to i32
%847 = insertelement <4 x i32> undef, i32 %844, i32 0
%848 = insertelement <4 x i32> %847, i32 %845, i32 1
%849 = insertelement <4 x i32> %848, i32 %846, i32 2
%850 = bitcast <8 x i32> %88 to <32 x i8>
%851 = bitcast <4 x i32> %90 to <16 x i8>
%852 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %849, <32 x i8> %850, <16 x i8> %851, i32 2)
%853 = extractelement <4 x float> %852, i32 0
%854 = extractelement <4 x float> %852, i32 1
%855 = extractelement <4 x float> %852, i32 2
%856 = fcmp oeq float %temp42.0, 4.000000e+00
%857 = select i1 %856, float 1.000000e+00, float 0.000000e+00
%858 = bitcast float %841 to i32
%859 = bitcast float %843 to i32
%860 = bitcast float %238 to i32
%861 = insertelement <4 x i32> undef, i32 %858, i32 0
%862 = insertelement <4 x i32> %861, i32 %859, i32 1
%863 = insertelement <4 x i32> %862, i32 %860, i32 2
%864 = bitcast <8 x i32> %80 to <32 x i8>
%865 = bitcast <4 x i32> %82 to <16 x i8>
%866 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %863, <32 x i8> %864, <16 x i8> %865, i32 2)
%867 = extractelement <4 x float> %866, i32 0
%868 = extractelement <4 x float> %866, i32 1
%869 = extractelement <4 x float> %866, i32 2
%870 = fcmp oeq float %temp42.0, 3.000000e+00
%871 = select i1 %870, float 1.000000e+00, float 0.000000e+00
%872 = bitcast float %841 to i32
%873 = bitcast float %843 to i32
%874 = bitcast float %238 to i32
%875 = insertelement <4 x i32> undef, i32 %872, i32 0
%876 = insertelement <4 x i32> %875, i32 %873, i32 1
%877 = insertelement <4 x i32> %876, i32 %874, i32 2
%878 = bitcast <8 x i32> %72 to <32 x i8>
%879 = bitcast <4 x i32> %74 to <16 x i8>
%880 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %877, <32 x i8> %878, <16 x i8> %879, i32 2)
%881 = extractelement <4 x float> %880, i32 0
%882 = extractelement <4 x float> %880, i32 1
%883 = extractelement <4 x float> %880, i32 2
%884 = fcmp oeq float %temp42.0, 2.000000e+00
%885 = select i1 %884, float 1.000000e+00, float 0.000000e+00
%886 = bitcast float %841 to i32
%887 = bitcast float %843 to i32
%888 = bitcast float %238 to i32
%889 = insertelement <4 x i32> undef, i32 %886, i32 0
%890 = insertelement <4 x i32> %889, i32 %887, i32 1
%891 = insertelement <4 x i32> %890, i32 %888, i32 2
%892 = bitcast <8 x i32> %64 to <32 x i8>
%893 = bitcast <4 x i32> %66 to <16 x i8>
%894 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %891, <32 x i8> %892, <16 x i8> %893, i32 2)
%895 = extractelement <4 x float> %894, i32 0
%896 = extractelement <4 x float> %894, i32 1
%897 = extractelement <4 x float> %894, i32 2
%898 = fcmp oeq float %temp42.0, 1.000000e+00
%899 = select i1 %898, float 1.000000e+00, float 0.000000e+00
%900 = bitcast float %841 to i32
%901 = bitcast float %843 to i32
%902 = bitcast float %238 to i32
%903 = insertelement <4 x i32> undef, i32 %900, i32 0
%904 = insertelement <4 x i32> %903, i32 %901, i32 1
%905 = insertelement <4 x i32> %904, i32 %902, i32 2
%906 = bitcast <8 x i32> %56 to <32 x i8>
%907 = bitcast <4 x i32> %58 to <16 x i8>
%908 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %905, <32 x i8> %906, <16 x i8> %907, i32 2)
%909 = extractelement <4 x float> %908, i32 0
%910 = extractelement <4 x float> %908, i32 1
%911 = extractelement <4 x float> %908, i32 2
%912 = fcmp oeq float %temp42.0, 0.000000e+00
%913 = select i1 %912, float 1.000000e+00, float 0.000000e+00
%914 = fmul float %909, %913
%915 = fmul float %910, %913
%916 = fmul float %911, %913
%917 = fmul float %895, %899
%918 = fadd float %917, %914
%919 = fmul float %896, %899
%920 = fadd float %919, %915
%921 = fmul float %897, %899
%922 = fadd float %921, %916
%923 = fmul float %881, %885
%924 = fadd float %923, %918
%925 = fmul float %882, %885
%926 = fadd float %925, %920
%927 = fmul float %883, %885
%928 = fadd float %927, %922
%929 = fmul float %867, %871
%930 = fadd float %929, %924
%931 = fmul float %868, %871
%932 = fadd float %931, %926
%933 = fmul float %869, %871
%934 = fadd float %933, %928
%935 = fmul float %853, %857
%936 = fadd float %935, %930
%937 = fmul float %854, %857
%938 = fadd float %937, %932
%939 = fmul float %855, %857
%940 = fadd float %939, %934
%941 = fcmp une float %38, %temp28.0
%.sink218 = select i1 %941, float %41, float %40
%temp72.0 = select i1 %941, float 1.953125e-03, float 3.906250e-03
%942 = fdiv float 1.000000e+00, %.sink218
%943 = fmul float %110, %942
%944 = fmul float %111, %942
%945 = call float @llvm.floor.f32(float %943)
%946 = fsub float %943, %945
%947 = call float @llvm.floor.f32(float %944)
%948 = fsub float %944, %947
%949 = fmul float %42, 2.000000e+00
%950 = fmul float %949, %temp72.0
%951 = fsub float 1.000000e+00, %950
%952 = fmul float %temp72.0, %42
%953 = fmul float %946, %951
%954 = fadd float %953, %952
%955 = fmul float %948, %951
%956 = fadd float %955, %952
%957 = fmul float %954, %temp28.0
%958 = fadd float %957, %temp16.0
%959 = fmul float %956, %temp28.0
%960 = fadd float %959, %temp17.0
%961 = bitcast float %958 to i32
%962 = bitcast float %960 to i32
%963 = bitcast float %238 to i32
%964 = insertelement <4 x i32> undef, i32 %961, i32 0
%965 = insertelement <4 x i32> %964, i32 %962, i32 1
%966 = insertelement <4 x i32> %965, i32 %963, i32 2
%967 = bitcast <8 x i32> %88 to <32 x i8>
%968 = bitcast <4 x i32> %90 to <16 x i8>
%969 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %966, <32 x i8> %967, <16 x i8> %968, i32 2)
%970 = extractelement <4 x float> %969, i32 0
%971 = extractelement <4 x float> %969, i32 1
%972 = extractelement <4 x float> %969, i32 2
%973 = fcmp oeq float %temp18.0, 4.000000e+00
%974 = select i1 %973, float 1.000000e+00, float 0.000000e+00
%975 = bitcast float %958 to i32
%976 = bitcast float %960 to i32
%977 = bitcast float %238 to i32
%978 = insertelement <4 x i32> undef, i32 %975, i32 0
%979 = insertelement <4 x i32> %978, i32 %976, i32 1
%980 = insertelement <4 x i32> %979, i32 %977, i32 2
%981 = bitcast <8 x i32> %80 to <32 x i8>
%982 = bitcast <4 x i32> %82 to <16 x i8>
%983 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %980, <32 x i8> %981, <16 x i8> %982, i32 2)
%984 = extractelement <4 x float> %983, i32 0
%985 = extractelement <4 x float> %983, i32 1
%986 = extractelement <4 x float> %983, i32 2
%987 = fcmp oeq float %temp18.0, 3.000000e+00
%988 = select i1 %987, float 1.000000e+00, float 0.000000e+00
%989 = bitcast float %958 to i32
%990 = bitcast float %960 to i32
%991 = bitcast float %238 to i32
%992 = insertelement <4 x i32> undef, i32 %989, i32 0
%993 = insertelement <4 x i32> %992, i32 %990, i32 1
%994 = insertelement <4 x i32> %993, i32 %991, i32 2
%995 = bitcast <8 x i32> %72 to <32 x i8>
%996 = bitcast <4 x i32> %74 to <16 x i8>
%997 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %994, <32 x i8> %995, <16 x i8> %996, i32 2)
%998 = extractelement <4 x float> %997, i32 0
%999 = extractelement <4 x float> %997, i32 1
%1000 = extractelement <4 x float> %997, i32 2
%1001 = fcmp oeq float %temp18.0, 2.000000e+00
%1002 = select i1 %1001, float 1.000000e+00, float 0.000000e+00
%1003 = bitcast float %958 to i32
%1004 = bitcast float %960 to i32
%1005 = bitcast float %238 to i32
%1006 = insertelement <4 x i32> undef, i32 %1003, i32 0
%1007 = insertelement <4 x i32> %1006, i32 %1004, i32 1
%1008 = insertelement <4 x i32> %1007, i32 %1005, i32 2
%1009 = bitcast <8 x i32> %64 to <32 x i8>
%1010 = bitcast <4 x i32> %66 to <16 x i8>
%1011 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1008, <32 x i8> %1009, <16 x i8> %1010, i32 2)
%1012 = extractelement <4 x float> %1011, i32 0
%1013 = extractelement <4 x float> %1011, i32 1
%1014 = extractelement <4 x float> %1011, i32 2
%1015 = fcmp oeq float %temp18.0, 1.000000e+00
%1016 = select i1 %1015, float 1.000000e+00, float 0.000000e+00
%1017 = bitcast float %958 to i32
%1018 = bitcast float %960 to i32
%1019 = bitcast float %238 to i32
%1020 = insertelement <4 x i32> undef, i32 %1017, i32 0
%1021 = insertelement <4 x i32> %1020, i32 %1018, i32 1
%1022 = insertelement <4 x i32> %1021, i32 %1019, i32 2
%1023 = bitcast <8 x i32> %56 to <32 x i8>
%1024 = bitcast <4 x i32> %58 to <16 x i8>
%1025 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1022, <32 x i8> %1023, <16 x i8> %1024, i32 2)
%1026 = extractelement <4 x float> %1025, i32 0
%1027 = extractelement <4 x float> %1025, i32 1
%1028 = extractelement <4 x float> %1025, i32 2
%1029 = fcmp oeq float %temp18.0, 0.000000e+00
%1030 = select i1 %1029, float 1.000000e+00, float 0.000000e+00
%1031 = fmul float %1026, %1030
%1032 = fmul float %1027, %1030
%1033 = fmul float %1028, %1030
%1034 = fmul float %1012, %1016
%1035 = fadd float %1034, %1031
%1036 = fmul float %1013, %1016
%1037 = fadd float %1036, %1032
%1038 = fmul float %1014, %1016
%1039 = fadd float %1038, %1033
%1040 = fmul float %998, %1002
%1041 = fadd float %1040, %1035
%1042 = fmul float %999, %1002
%1043 = fadd float %1042, %1037
%1044 = fmul float %1000, %1002
%1045 = fadd float %1044, %1039
%1046 = fmul float %984, %988
%1047 = fadd float %1046, %1041
%1048 = fmul float %985, %988
%1049 = fadd float %1048, %1043
%1050 = fmul float %986, %988
%1051 = fadd float %1050, %1045
%1052 = fmul float %970, %974
%1053 = fadd float %1052, %1047
%1054 = fmul float %971, %974
%1055 = fadd float %1054, %1049
%1056 = fmul float %972, %974
%1057 = fadd float %1056, %1051
%1058 = fcmp une float %38, %temp28.0
%.sink219 = select i1 %1058, float %41, float %40
%temp76.0 = select i1 %1058, float 1.953125e-03, float 3.906250e-03
%1059 = fdiv float 1.000000e+00, %.sink219
%1060 = fmul float %112, %1059
%1061 = fmul float %111, %1059
%1062 = call float @llvm.floor.f32(float %1060)
%1063 = fsub float %1060, %1062
%1064 = call float @llvm.floor.f32(float %1061)
%1065 = fsub float %1061, %1064
%1066 = fmul float %42, 2.000000e+00
%1067 = fmul float %1066, %temp76.0
%1068 = fsub float 1.000000e+00, %1067
%1069 = fmul float %temp76.0, %42
%1070 = fmul float %1063, %1068
%1071 = fadd float %1070, %1069
%1072 = fmul float %1065, %1068
%1073 = fadd float %1072, %1069
%1074 = fmul float %1071, %temp28.0
%1075 = fadd float %1074, %temp16.0
%1076 = fmul float %1073, %temp28.0
%1077 = fadd float %1076, %temp17.0
%1078 = bitcast float %1075 to i32
%1079 = bitcast float %1077 to i32
%1080 = bitcast float %238 to i32
%1081 = insertelement <4 x i32> undef, i32 %1078, i32 0
%1082 = insertelement <4 x i32> %1081, i32 %1079, i32 1
%1083 = insertelement <4 x i32> %1082, i32 %1080, i32 2
%1084 = bitcast <8 x i32> %88 to <32 x i8>
%1085 = bitcast <4 x i32> %90 to <16 x i8>
%1086 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1083, <32 x i8> %1084, <16 x i8> %1085, i32 2)
%1087 = extractelement <4 x float> %1086, i32 0
%1088 = extractelement <4 x float> %1086, i32 1
%1089 = extractelement <4 x float> %1086, i32 2
%1090 = fcmp oeq float %temp18.0, 4.000000e+00
%1091 = select i1 %1090, float 1.000000e+00, float 0.000000e+00
%1092 = bitcast float %1075 to i32
%1093 = bitcast float %1077 to i32
%1094 = bitcast float %238 to i32
%1095 = insertelement <4 x i32> undef, i32 %1092, i32 0
%1096 = insertelement <4 x i32> %1095, i32 %1093, i32 1
%1097 = insertelement <4 x i32> %1096, i32 %1094, i32 2
%1098 = bitcast <8 x i32> %80 to <32 x i8>
%1099 = bitcast <4 x i32> %82 to <16 x i8>
%1100 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1097, <32 x i8> %1098, <16 x i8> %1099, i32 2)
%1101 = extractelement <4 x float> %1100, i32 0
%1102 = extractelement <4 x float> %1100, i32 1
%1103 = extractelement <4 x float> %1100, i32 2
%1104 = fcmp oeq float %temp18.0, 3.000000e+00
%1105 = select i1 %1104, float 1.000000e+00, float 0.000000e+00
%1106 = bitcast float %1075 to i32
%1107 = bitcast float %1077 to i32
%1108 = bitcast float %238 to i32
%1109 = insertelement <4 x i32> undef, i32 %1106, i32 0
%1110 = insertelement <4 x i32> %1109, i32 %1107, i32 1
%1111 = insertelement <4 x i32> %1110, i32 %1108, i32 2
%1112 = bitcast <8 x i32> %72 to <32 x i8>
%1113 = bitcast <4 x i32> %74 to <16 x i8>
%1114 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1111, <32 x i8> %1112, <16 x i8> %1113, i32 2)
%1115 = extractelement <4 x float> %1114, i32 0
%1116 = extractelement <4 x float> %1114, i32 1
%1117 = extractelement <4 x float> %1114, i32 2
%1118 = fcmp oeq float %temp18.0, 2.000000e+00
%1119 = select i1 %1118, float 1.000000e+00, float 0.000000e+00
%1120 = bitcast float %1075 to i32
%1121 = bitcast float %1077 to i32
%1122 = bitcast float %238 to i32
%1123 = insertelement <4 x i32> undef, i32 %1120, i32 0
%1124 = insertelement <4 x i32> %1123, i32 %1121, i32 1
%1125 = insertelement <4 x i32> %1124, i32 %1122, i32 2
%1126 = bitcast <8 x i32> %64 to <32 x i8>
%1127 = bitcast <4 x i32> %66 to <16 x i8>
%1128 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1125, <32 x i8> %1126, <16 x i8> %1127, i32 2)
%1129 = extractelement <4 x float> %1128, i32 0
%1130 = extractelement <4 x float> %1128, i32 1
%1131 = extractelement <4 x float> %1128, i32 2
%1132 = fcmp oeq float %temp18.0, 1.000000e+00
%1133 = select i1 %1132, float 1.000000e+00, float 0.000000e+00
%1134 = bitcast float %1075 to i32
%1135 = bitcast float %1077 to i32
%1136 = bitcast float %238 to i32
%1137 = insertelement <4 x i32> undef, i32 %1134, i32 0
%1138 = insertelement <4 x i32> %1137, i32 %1135, i32 1
%1139 = insertelement <4 x i32> %1138, i32 %1136, i32 2
%1140 = bitcast <8 x i32> %56 to <32 x i8>
%1141 = bitcast <4 x i32> %58 to <16 x i8>
%1142 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1139, <32 x i8> %1140, <16 x i8> %1141, i32 2)
%1143 = extractelement <4 x float> %1142, i32 0
%1144 = extractelement <4 x float> %1142, i32 1
%1145 = extractelement <4 x float> %1142, i32 2
%1146 = fcmp oeq float %temp18.0, 0.000000e+00
%1147 = select i1 %1146, float 1.000000e+00, float 0.000000e+00
%1148 = fmul float %1143, %1147
%1149 = fmul float %1144, %1147
%1150 = fmul float %1145, %1147
%1151 = fmul float %1129, %1133
%1152 = fadd float %1151, %1148
%1153 = fmul float %1130, %1133
%1154 = fadd float %1153, %1149
%1155 = fmul float %1131, %1133
%1156 = fadd float %1155, %1150
%1157 = fmul float %1115, %1119
%1158 = fadd float %1157, %1152
%1159 = fmul float %1116, %1119
%1160 = fadd float %1159, %1154
%1161 = fmul float %1117, %1119
%1162 = fadd float %1161, %1156
%1163 = fmul float %1101, %1105
%1164 = fadd float %1163, %1158
%1165 = fmul float %1102, %1105
%1166 = fadd float %1165, %1160
%1167 = fmul float %1103, %1105
%1168 = fadd float %1167, %1162
%1169 = fmul float %1087, %1091
%1170 = fadd float %1169, %1164
%1171 = fmul float %1088, %1091
%1172 = fadd float %1171, %1166
%1173 = fmul float %1089, %1091
%1174 = fadd float %1173, %1168
%1175 = fcmp une float %38, %temp28.0
%.sink220 = select i1 %1175, float %41, float %40
%temp80.0 = select i1 %1175, float 1.953125e-03, float 3.906250e-03
%1176 = fdiv float 1.000000e+00, %.sink220
%1177 = fmul float %112, %1176
%1178 = fmul float %110, %1176
%1179 = call float @llvm.floor.f32(float %1177)
%1180 = fsub float %1177, %1179
%1181 = call float @llvm.floor.f32(float %1178)
%1182 = fsub float %1178, %1181
%1183 = fmul float %42, 2.000000e+00
%1184 = fmul float %1183, %temp80.0
%1185 = fsub float 1.000000e+00, %1184
%1186 = fmul float %temp80.0, %42
%1187 = fmul float %1180, %1185
%1188 = fadd float %1187, %1186
%1189 = fmul float %1182, %1185
%1190 = fadd float %1189, %1186
%1191 = fmul float %1188, %temp28.0
%1192 = fadd float %1191, %temp16.0
%1193 = fmul float %1190, %temp28.0
%1194 = fadd float %1193, %temp17.0
%1195 = bitcast float %1192 to i32
%1196 = bitcast float %1194 to i32
%1197 = bitcast float %238 to i32
%1198 = insertelement <4 x i32> undef, i32 %1195, i32 0
%1199 = insertelement <4 x i32> %1198, i32 %1196, i32 1
%1200 = insertelement <4 x i32> %1199, i32 %1197, i32 2
%1201 = bitcast <8 x i32> %88 to <32 x i8>
%1202 = bitcast <4 x i32> %90 to <16 x i8>
%1203 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1200, <32 x i8> %1201, <16 x i8> %1202, i32 2)
%1204 = extractelement <4 x float> %1203, i32 0
%1205 = extractelement <4 x float> %1203, i32 1
%1206 = extractelement <4 x float> %1203, i32 2
%1207 = fcmp oeq float %temp18.0, 4.000000e+00
%1208 = select i1 %1207, float 1.000000e+00, float 0.000000e+00
%1209 = bitcast float %1192 to i32
%1210 = bitcast float %1194 to i32
%1211 = bitcast float %238 to i32
%1212 = insertelement <4 x i32> undef, i32 %1209, i32 0
%1213 = insertelement <4 x i32> %1212, i32 %1210, i32 1
%1214 = insertelement <4 x i32> %1213, i32 %1211, i32 2
%1215 = bitcast <8 x i32> %80 to <32 x i8>
%1216 = bitcast <4 x i32> %82 to <16 x i8>
%1217 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1214, <32 x i8> %1215, <16 x i8> %1216, i32 2)
%1218 = extractelement <4 x float> %1217, i32 0
%1219 = extractelement <4 x float> %1217, i32 1
%1220 = extractelement <4 x float> %1217, i32 2
%1221 = fcmp oeq float %temp18.0, 3.000000e+00
%1222 = select i1 %1221, float 1.000000e+00, float 0.000000e+00
%1223 = bitcast float %1192 to i32
%1224 = bitcast float %1194 to i32
%1225 = bitcast float %238 to i32
%1226 = insertelement <4 x i32> undef, i32 %1223, i32 0
%1227 = insertelement <4 x i32> %1226, i32 %1224, i32 1
%1228 = insertelement <4 x i32> %1227, i32 %1225, i32 2
%1229 = bitcast <8 x i32> %72 to <32 x i8>
%1230 = bitcast <4 x i32> %74 to <16 x i8>
%1231 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1228, <32 x i8> %1229, <16 x i8> %1230, i32 2)
%1232 = extractelement <4 x float> %1231, i32 0
%1233 = extractelement <4 x float> %1231, i32 1
%1234 = extractelement <4 x float> %1231, i32 2
%1235 = fcmp oeq float %temp18.0, 2.000000e+00
%1236 = select i1 %1235, float 1.000000e+00, float 0.000000e+00
%1237 = bitcast float %1192 to i32
%1238 = bitcast float %1194 to i32
%1239 = bitcast float %238 to i32
%1240 = insertelement <4 x i32> undef, i32 %1237, i32 0
%1241 = insertelement <4 x i32> %1240, i32 %1238, i32 1
%1242 = insertelement <4 x i32> %1241, i32 %1239, i32 2
%1243 = bitcast <8 x i32> %64 to <32 x i8>
%1244 = bitcast <4 x i32> %66 to <16 x i8>
%1245 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1242, <32 x i8> %1243, <16 x i8> %1244, i32 2)
%1246 = extractelement <4 x float> %1245, i32 0
%1247 = extractelement <4 x float> %1245, i32 1
%1248 = extractelement <4 x float> %1245, i32 2
%1249 = fcmp oeq float %temp18.0, 1.000000e+00
%1250 = select i1 %1249, float 1.000000e+00, float 0.000000e+00
%1251 = bitcast float %1192 to i32
%1252 = bitcast float %1194 to i32
%1253 = bitcast float %238 to i32
%1254 = insertelement <4 x i32> undef, i32 %1251, i32 0
%1255 = insertelement <4 x i32> %1254, i32 %1252, i32 1
%1256 = insertelement <4 x i32> %1255, i32 %1253, i32 2
%1257 = bitcast <8 x i32> %56 to <32 x i8>
%1258 = bitcast <4 x i32> %58 to <16 x i8>
%1259 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1256, <32 x i8> %1257, <16 x i8> %1258, i32 2)
%1260 = extractelement <4 x float> %1259, i32 0
%1261 = extractelement <4 x float> %1259, i32 1
%1262 = extractelement <4 x float> %1259, i32 2
%1263 = fcmp oeq float %temp18.0, 0.000000e+00
%1264 = select i1 %1263, float 1.000000e+00, float 0.000000e+00
%1265 = fmul float %1260, %1264
%1266 = fmul float %1261, %1264
%1267 = fmul float %1262, %1264
%1268 = fmul float %1246, %1250
%1269 = fadd float %1268, %1265
%1270 = fmul float %1247, %1250
%1271 = fadd float %1270, %1266
%1272 = fmul float %1248, %1250
%1273 = fadd float %1272, %1267
%1274 = fmul float %1232, %1236
%1275 = fadd float %1274, %1269
%1276 = fmul float %1233, %1236
%1277 = fadd float %1276, %1271
%1278 = fmul float %1234, %1236
%1279 = fadd float %1278, %1273
%1280 = fmul float %1218, %1222
%1281 = fadd float %1280, %1275
%1282 = fmul float %1219, %1222
%1283 = fadd float %1282, %1277
%1284 = fmul float %1220, %1222
%1285 = fadd float %1284, %1279
%1286 = fmul float %1204, %1208
%1287 = fadd float %1286, %1281
%1288 = fmul float %1205, %1208
%1289 = fadd float %1288, %1283
%1290 = fmul float %1206, %1208
%1291 = fadd float %1290, %1285
%1292 = fmul float %1053, %163
%1293 = fmul float %1055, %163
%1294 = fmul float %1057, %163
%1295 = fmul float %1170, %161
%1296 = fadd float %1295, %1292
%1297 = fmul float %1172, %161
%1298 = fadd float %1297, %1293
%1299 = fmul float %1174, %161
%1300 = fadd float %1299, %1294
%1301 = fmul float %1287, %162
%1302 = fadd float %1301, %1296
%1303 = fmul float %1289, %162
%1304 = fadd float %1303, %1298
%1305 = fmul float %1291, %162
%1306 = fadd float %1305, %1300
%1307 = fmul float %702, %163
%1308 = fmul float %704, %163
%1309 = fmul float %706, %163
%1310 = fmul float %819, %161
%1311 = fadd float %1310, %1307
%1312 = fmul float %821, %161
%1313 = fadd float %1312, %1308
%1314 = fmul float %823, %161
%1315 = fadd float %1314, %1309
%1316 = fmul float %936, %162
%1317 = fadd float %1316, %1311
%1318 = fmul float %938, %162
%1319 = fadd float %1318, %1313
%1320 = fmul float %940, %162
%1321 = fadd float %1320, %1315
%1322 = fmul float %351, %163
%1323 = fmul float %353, %163
%1324 = fmul float %355, %163
%1325 = fmul float %468, %161
%1326 = fadd float %1325, %1322
%1327 = fmul float %470, %161
%1328 = fadd float %1327, %1323
%1329 = fmul float %472, %161
%1330 = fadd float %1329, %1324
%1331 = fmul float %585, %162
%1332 = fadd float %1331, %1326
%1333 = fmul float %587, %162
%1334 = fadd float %1333, %1328
%1335 = fmul float %589, %162
%1336 = fadd float %1335, %1330
%1337 = fmul float %99, %1332
%1338 = fmul float %99, %1334
%1339 = fmul float %99, %1336
%1340 = fmul float %100, %1317
%1341 = fadd float %1340, %1337
%1342 = fmul float %100, %1319
%1343 = fadd float %1342, %1338
%1344 = fmul float %100, %1321
%1345 = fadd float %1344, %1339
%1346 = fmul float %101, %1302
%1347 = fadd float %1346, %1341
%1348 = fmul float %101, %1304
%1349 = fadd float %1348, %1343
%1350 = fmul float %101, %1306
%1351 = fadd float %1350, %1345
%1352 = fcmp une float %38, %temp20.0
%.sink221 = select i1 %1352, float %41, float %40
%temp52.2 = select i1 %1352, float 1.953125e-03, float 3.906250e-03
%1353 = fdiv float 1.000000e+00, %.sink221
%1354 = fmul float %112, %1353
%1355 = fmul float %111, %1353
%1356 = call float @llvm.floor.f32(float %1354)
%1357 = fsub float %1354, %1356
%1358 = call float @llvm.floor.f32(float %1355)
%1359 = fsub float %1355, %1358
%1360 = fmul float %42, 2.000000e+00
%1361 = fmul float %1360, %temp52.2
%1362 = fsub float 1.000000e+00, %1361
%1363 = fmul float %temp52.2, %42
%1364 = fmul float %1357, %1362
%1365 = fadd float %1364, %1363
%1366 = fmul float %1359, %1362
%1367 = fadd float %1366, %1363
%1368 = fmul float %1365, %temp20.0
%1369 = fadd float %1368, %temp32.0
%1370 = fmul float %1367, %temp20.0
%1371 = fadd float %1370, %temp33.0
%1372 = bitcast float %1369 to i32
%1373 = bitcast float %1371 to i32
%1374 = bitcast float %238 to i32
%1375 = insertelement <4 x i32> undef, i32 %1372, i32 0
%1376 = insertelement <4 x i32> %1375, i32 %1373, i32 1
%1377 = insertelement <4 x i32> %1376, i32 %1374, i32 2
%1378 = bitcast <8 x i32> %92 to <32 x i8>
%1379 = bitcast <4 x i32> %94 to <16 x i8>
%1380 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1377, <32 x i8> %1378, <16 x i8> %1379, i32 2)
%1381 = extractelement <4 x float> %1380, i32 1
%1382 = extractelement <4 x float> %1380, i32 3
%1383 = fcmp oeq float %temp34.0, 4.000000e+00
%1384 = select i1 %1383, float 1.000000e+00, float 0.000000e+00
%1385 = bitcast float %1369 to i32
%1386 = bitcast float %1371 to i32
%1387 = bitcast float %238 to i32
%1388 = insertelement <4 x i32> undef, i32 %1385, i32 0
%1389 = insertelement <4 x i32> %1388, i32 %1386, i32 1
%1390 = insertelement <4 x i32> %1389, i32 %1387, i32 2
%1391 = bitcast <8 x i32> %84 to <32 x i8>
%1392 = bitcast <4 x i32> %86 to <16 x i8>
%1393 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1390, <32 x i8> %1391, <16 x i8> %1392, i32 2)
%1394 = extractelement <4 x float> %1393, i32 1
%1395 = extractelement <4 x float> %1393, i32 3
%1396 = fcmp oeq float %temp34.0, 3.000000e+00
%1397 = select i1 %1396, float 1.000000e+00, float 0.000000e+00
%1398 = bitcast float %1369 to i32
%1399 = bitcast float %1371 to i32
%1400 = bitcast float %238 to i32
%1401 = insertelement <4 x i32> undef, i32 %1398, i32 0
%1402 = insertelement <4 x i32> %1401, i32 %1399, i32 1
%1403 = insertelement <4 x i32> %1402, i32 %1400, i32 2
%1404 = bitcast <8 x i32> %76 to <32 x i8>
%1405 = bitcast <4 x i32> %78 to <16 x i8>
%1406 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1403, <32 x i8> %1404, <16 x i8> %1405, i32 2)
%1407 = extractelement <4 x float> %1406, i32 1
%1408 = extractelement <4 x float> %1406, i32 3
%1409 = fcmp oeq float %temp34.0, 2.000000e+00
%1410 = select i1 %1409, float 1.000000e+00, float 0.000000e+00
%1411 = bitcast float %1369 to i32
%1412 = bitcast float %1371 to i32
%1413 = bitcast float %238 to i32
%1414 = insertelement <4 x i32> undef, i32 %1411, i32 0
%1415 = insertelement <4 x i32> %1414, i32 %1412, i32 1
%1416 = insertelement <4 x i32> %1415, i32 %1413, i32 2
%1417 = bitcast <8 x i32> %68 to <32 x i8>
%1418 = bitcast <4 x i32> %70 to <16 x i8>
%1419 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1416, <32 x i8> %1417, <16 x i8> %1418, i32 2)
%1420 = extractelement <4 x float> %1419, i32 1
%1421 = extractelement <4 x float> %1419, i32 3
%1422 = fcmp oeq float %temp34.0, 1.000000e+00
%1423 = select i1 %1422, float 1.000000e+00, float 0.000000e+00
%1424 = bitcast float %1369 to i32
%1425 = bitcast float %1371 to i32
%1426 = bitcast float %238 to i32
%1427 = insertelement <4 x i32> undef, i32 %1424, i32 0
%1428 = insertelement <4 x i32> %1427, i32 %1425, i32 1
%1429 = insertelement <4 x i32> %1428, i32 %1426, i32 2
%1430 = bitcast <8 x i32> %60 to <32 x i8>
%1431 = bitcast <4 x i32> %62 to <16 x i8>
%1432 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1429, <32 x i8> %1430, <16 x i8> %1431, i32 2)
%1433 = extractelement <4 x float> %1432, i32 1
%1434 = extractelement <4 x float> %1432, i32 3
%1435 = fcmp oeq float %temp34.0, 0.000000e+00
%1436 = select i1 %1435, float 1.000000e+00, float 0.000000e+00
%1437 = fmul float %1433, %1436
%1438 = fmul float %1434, %1436
%1439 = fmul float %1420, %1423
%1440 = fadd float %1439, %1437
%1441 = fmul float %1421, %1423
%1442 = fadd float %1441, %1438
%1443 = fmul float %1407, %1410
%1444 = fadd float %1443, %1440
%1445 = fmul float %1408, %1410
%1446 = fadd float %1445, %1442
%1447 = fmul float %1394, %1397
%1448 = fadd float %1447, %1444
%1449 = fmul float %1395, %1397
%1450 = fadd float %1449, %1446
%1451 = fmul float %1381, %1384
%1452 = fadd float %1451, %1448
%1453 = fmul float %1382, %1384
%1454 = fadd float %1453, %1450
%1455 = fmul float %1454, 2.000000e+00
%1456 = fadd float %1455, -1.000000e+00
%1457 = fmul float %1452, 2.000000e+00
%1458 = fadd float %1457, -1.000000e+00
%1459 = fmul float %1456, %1456
%1460 = fmul float %1458, %1458
%1461 = fadd float %1459, %1460
%1462 = call float @llvm.AMDIL.clamp.(float %1461, float 0.000000e+00, float 1.000000e+00)
%1463 = fcmp une float %38, %temp20.0
%.sink222 = select i1 %1463, float %41, float %40
%temp56.2 = select i1 %1463, float 1.953125e-03, float 3.906250e-03
%1464 = fdiv float 1.000000e+00, %.sink222
%1465 = fmul float %112, %1464
%1466 = fmul float %110, %1464
%1467 = call float @llvm.floor.f32(float %1465)
%1468 = fsub float %1465, %1467
%1469 = call float @llvm.floor.f32(float %1466)
%1470 = fsub float %1466, %1469
%1471 = fmul float %42, 2.000000e+00
%1472 = fmul float %1471, %temp56.2
%1473 = fsub float 1.000000e+00, %1472
%1474 = fmul float %temp56.2, %42
%1475 = fmul float %1468, %1473
%1476 = fadd float %1475, %1474
%1477 = fmul float %1470, %1473
%1478 = fadd float %1477, %1474
%1479 = fmul float %1476, %temp20.0
%1480 = fadd float %1479, %temp32.0
%1481 = fmul float %1478, %temp20.0
%1482 = fadd float %1481, %temp33.0
%1483 = bitcast float %1480 to i32
%1484 = bitcast float %1482 to i32
%1485 = bitcast float %238 to i32
%1486 = insertelement <4 x i32> undef, i32 %1483, i32 0
%1487 = insertelement <4 x i32> %1486, i32 %1484, i32 1
%1488 = insertelement <4 x i32> %1487, i32 %1485, i32 2
%1489 = bitcast <8 x i32> %92 to <32 x i8>
%1490 = bitcast <4 x i32> %94 to <16 x i8>
%1491 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1488, <32 x i8> %1489, <16 x i8> %1490, i32 2)
%1492 = extractelement <4 x float> %1491, i32 1
%1493 = extractelement <4 x float> %1491, i32 3
%1494 = fcmp oeq float %temp34.0, 4.000000e+00
%1495 = select i1 %1494, float 1.000000e+00, float 0.000000e+00
%1496 = bitcast float %1480 to i32
%1497 = bitcast float %1482 to i32
%1498 = bitcast float %238 to i32
%1499 = insertelement <4 x i32> undef, i32 %1496, i32 0
%1500 = insertelement <4 x i32> %1499, i32 %1497, i32 1
%1501 = insertelement <4 x i32> %1500, i32 %1498, i32 2
%1502 = bitcast <8 x i32> %84 to <32 x i8>
%1503 = bitcast <4 x i32> %86 to <16 x i8>
%1504 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1501, <32 x i8> %1502, <16 x i8> %1503, i32 2)
%1505 = extractelement <4 x float> %1504, i32 1
%1506 = extractelement <4 x float> %1504, i32 3
%1507 = fcmp oeq float %temp34.0, 3.000000e+00
%1508 = select i1 %1507, float 1.000000e+00, float 0.000000e+00
%1509 = bitcast float %1480 to i32
%1510 = bitcast float %1482 to i32
%1511 = bitcast float %238 to i32
%1512 = insertelement <4 x i32> undef, i32 %1509, i32 0
%1513 = insertelement <4 x i32> %1512, i32 %1510, i32 1
%1514 = insertelement <4 x i32> %1513, i32 %1511, i32 2
%1515 = bitcast <8 x i32> %76 to <32 x i8>
%1516 = bitcast <4 x i32> %78 to <16 x i8>
%1517 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1514, <32 x i8> %1515, <16 x i8> %1516, i32 2)
%1518 = extractelement <4 x float> %1517, i32 1
%1519 = extractelement <4 x float> %1517, i32 3
%1520 = fcmp oeq float %temp34.0, 2.000000e+00
%1521 = select i1 %1520, float 1.000000e+00, float 0.000000e+00
%1522 = bitcast float %1480 to i32
%1523 = bitcast float %1482 to i32
%1524 = bitcast float %238 to i32
%1525 = insertelement <4 x i32> undef, i32 %1522, i32 0
%1526 = insertelement <4 x i32> %1525, i32 %1523, i32 1
%1527 = insertelement <4 x i32> %1526, i32 %1524, i32 2
%1528 = bitcast <8 x i32> %68 to <32 x i8>
%1529 = bitcast <4 x i32> %70 to <16 x i8>
%1530 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1527, <32 x i8> %1528, <16 x i8> %1529, i32 2)
%1531 = extractelement <4 x float> %1530, i32 1
%1532 = extractelement <4 x float> %1530, i32 3
%1533 = fcmp oeq float %temp34.0, 1.000000e+00
%1534 = select i1 %1533, float 1.000000e+00, float 0.000000e+00
%1535 = bitcast float %1480 to i32
%1536 = bitcast float %1482 to i32
%1537 = bitcast float %238 to i32
%1538 = insertelement <4 x i32> undef, i32 %1535, i32 0
%1539 = insertelement <4 x i32> %1538, i32 %1536, i32 1
%1540 = insertelement <4 x i32> %1539, i32 %1537, i32 2
%1541 = bitcast <8 x i32> %60 to <32 x i8>
%1542 = bitcast <4 x i32> %62 to <16 x i8>
%1543 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1540, <32 x i8> %1541, <16 x i8> %1542, i32 2)
%1544 = extractelement <4 x float> %1543, i32 1
%1545 = extractelement <4 x float> %1543, i32 3
%1546 = fcmp oeq float %temp34.0, 0.000000e+00
%1547 = select i1 %1546, float 1.000000e+00, float 0.000000e+00
%1548 = fmul float %1544, %1547
%1549 = fmul float %1545, %1547
%1550 = fmul float %1531, %1534
%1551 = fadd float %1550, %1548
%1552 = fmul float %1532, %1534
%1553 = fadd float %1552, %1549
%1554 = fmul float %1518, %1521
%1555 = fadd float %1554, %1551
%1556 = fmul float %1519, %1521
%1557 = fadd float %1556, %1553
%1558 = fmul float %1505, %1508
%1559 = fadd float %1558, %1555
%1560 = fmul float %1506, %1508
%1561 = fadd float %1560, %1557
%1562 = fmul float %1492, %1495
%1563 = fadd float %1562, %1559
%1564 = fmul float %1493, %1495
%1565 = fadd float %1564, %1561
%1566 = fmul float %1565, 2.000000e+00
%1567 = fadd float %1566, -1.000000e+00
%1568 = fmul float %1563, 2.000000e+00
%1569 = fadd float %1568, -1.000000e+00
%1570 = fmul float %1567, %1567
%1571 = fmul float %1569, %1569
%1572 = fadd float %1570, %1571
%1573 = call float @llvm.AMDIL.clamp.(float %1572, float 0.000000e+00, float 1.000000e+00)
%1574 = fcmp une float %38, %temp20.0
%.sink223 = select i1 %1574, float %41, float %40
%temp60.2 = select i1 %1574, float 1.953125e-03, float 3.906250e-03
%1575 = fdiv float 1.000000e+00, %.sink223
%1576 = fmul float %110, %1575
%1577 = fmul float %111, %1575
%1578 = call float @llvm.floor.f32(float %1576)
%1579 = fsub float %1576, %1578
%1580 = call float @llvm.floor.f32(float %1577)
%1581 = fsub float %1577, %1580
%1582 = fmul float %42, 2.000000e+00
%1583 = fmul float %1582, %temp60.2
%1584 = fsub float 1.000000e+00, %1583
%1585 = fmul float %temp60.2, %42
%1586 = fmul float %1579, %1584
%1587 = fadd float %1586, %1585
%1588 = fmul float %1581, %1584
%1589 = fadd float %1588, %1585
%1590 = fmul float %1587, %temp20.0
%1591 = fadd float %1590, %temp32.0
%1592 = fmul float %1589, %temp20.0
%1593 = fadd float %1592, %temp33.0
%1594 = bitcast float %1591 to i32
%1595 = bitcast float %1593 to i32
%1596 = bitcast float %238 to i32
%1597 = insertelement <4 x i32> undef, i32 %1594, i32 0
%1598 = insertelement <4 x i32> %1597, i32 %1595, i32 1
%1599 = insertelement <4 x i32> %1598, i32 %1596, i32 2
%1600 = bitcast <8 x i32> %92 to <32 x i8>
%1601 = bitcast <4 x i32> %94 to <16 x i8>
%1602 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1599, <32 x i8> %1600, <16 x i8> %1601, i32 2)
%1603 = extractelement <4 x float> %1602, i32 1
%1604 = extractelement <4 x float> %1602, i32 3
%1605 = fcmp oeq float %temp34.0, 4.000000e+00
%1606 = select i1 %1605, float 1.000000e+00, float 0.000000e+00
%1607 = bitcast float %1591 to i32
%1608 = bitcast float %1593 to i32
%1609 = bitcast float %238 to i32
%1610 = insertelement <4 x i32> undef, i32 %1607, i32 0
%1611 = insertelement <4 x i32> %1610, i32 %1608, i32 1
%1612 = insertelement <4 x i32> %1611, i32 %1609, i32 2
%1613 = bitcast <8 x i32> %84 to <32 x i8>
%1614 = bitcast <4 x i32> %86 to <16 x i8>
%1615 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1612, <32 x i8> %1613, <16 x i8> %1614, i32 2)
%1616 = extractelement <4 x float> %1615, i32 1
%1617 = extractelement <4 x float> %1615, i32 3
%1618 = fcmp oeq float %temp34.0, 3.000000e+00
%1619 = select i1 %1618, float 1.000000e+00, float 0.000000e+00
%1620 = bitcast float %1591 to i32
%1621 = bitcast float %1593 to i32
%1622 = bitcast float %238 to i32
%1623 = insertelement <4 x i32> undef, i32 %1620, i32 0
%1624 = insertelement <4 x i32> %1623, i32 %1621, i32 1
%1625 = insertelement <4 x i32> %1624, i32 %1622, i32 2
%1626 = bitcast <8 x i32> %76 to <32 x i8>
%1627 = bitcast <4 x i32> %78 to <16 x i8>
%1628 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1625, <32 x i8> %1626, <16 x i8> %1627, i32 2)
%1629 = extractelement <4 x float> %1628, i32 1
%1630 = extractelement <4 x float> %1628, i32 3
%1631 = fcmp oeq float %temp34.0, 2.000000e+00
%1632 = select i1 %1631, float 1.000000e+00, float 0.000000e+00
%1633 = bitcast float %1591 to i32
%1634 = bitcast float %1593 to i32
%1635 = bitcast float %238 to i32
%1636 = insertelement <4 x i32> undef, i32 %1633, i32 0
%1637 = insertelement <4 x i32> %1636, i32 %1634, i32 1
%1638 = insertelement <4 x i32> %1637, i32 %1635, i32 2
%1639 = bitcast <8 x i32> %68 to <32 x i8>
%1640 = bitcast <4 x i32> %70 to <16 x i8>
%1641 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1638, <32 x i8> %1639, <16 x i8> %1640, i32 2)
%1642 = extractelement <4 x float> %1641, i32 1
%1643 = extractelement <4 x float> %1641, i32 3
%1644 = fcmp oeq float %temp34.0, 1.000000e+00
%1645 = select i1 %1644, float 1.000000e+00, float 0.000000e+00
%1646 = bitcast float %1591 to i32
%1647 = bitcast float %1593 to i32
%1648 = bitcast float %238 to i32
%1649 = insertelement <4 x i32> undef, i32 %1646, i32 0
%1650 = insertelement <4 x i32> %1649, i32 %1647, i32 1
%1651 = insertelement <4 x i32> %1650, i32 %1648, i32 2
%1652 = bitcast <8 x i32> %60 to <32 x i8>
%1653 = bitcast <4 x i32> %62 to <16 x i8>
%1654 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1651, <32 x i8> %1652, <16 x i8> %1653, i32 2)
%1655 = extractelement <4 x float> %1654, i32 1
%1656 = extractelement <4 x float> %1654, i32 3
%1657 = fcmp oeq float %temp34.0, 0.000000e+00
%1658 = select i1 %1657, float 1.000000e+00, float 0.000000e+00
%1659 = fmul float %1655, %1658
%1660 = fmul float %1656, %1658
%1661 = fmul float %1642, %1645
%1662 = fadd float %1661, %1659
%1663 = fmul float %1643, %1645
%1664 = fadd float %1663, %1660
%1665 = fmul float %1629, %1632
%1666 = fadd float %1665, %1662
%1667 = fmul float %1630, %1632
%1668 = fadd float %1667, %1664
%1669 = fmul float %1616, %1619
%1670 = fadd float %1669, %1666
%1671 = fmul float %1617, %1619
%1672 = fadd float %1671, %1668
%1673 = fmul float %1603, %1606
%1674 = fadd float %1673, %1670
%1675 = fmul float %1604, %1606
%1676 = fadd float %1675, %1672
%1677 = fmul float %1676, 2.000000e+00
%1678 = fadd float %1677, -1.000000e+00
%1679 = fmul float %1674, 2.000000e+00
%1680 = fadd float %1679, -1.000000e+00
%1681 = fmul float %1678, %1678
%1682 = fmul float %1680, %1680
%1683 = fadd float %1681, %1682
%1684 = call float @llvm.AMDIL.clamp.(float %1683, float 0.000000e+00, float 1.000000e+00)
%1685 = fmul float %161, 0.000000e+00
%1686 = fmul float %1456, %161
%1687 = fmul float %1458, %161
%1688 = fmul float %1569, %162
%1689 = fadd float %1688, %1685
%1690 = fmul float %162, 0.000000e+00
%1691 = fadd float %1690, %1686
%1692 = fmul float %1567, %162
%1693 = fadd float %1692, %1687
%1694 = fmul float %1678, %163
%1695 = fadd float %1694, %1689
%1696 = fmul float %1680, %163
%1697 = fadd float %1696, %1691
%1698 = fmul float %163, 0.000000e+00
%1699 = fadd float %1698, %1693
%1700 = fcmp une float %38, %temp24.0
%.sink224 = select i1 %1700, float %41, float %40
%temp48.3 = select i1 %1700, float 1.953125e-03, float 3.906250e-03
%1701 = fdiv float 1.000000e+00, %.sink224
%1702 = fmul float %112, %1701
%1703 = fmul float %111, %1701
%1704 = call float @llvm.floor.f32(float %1702)
%1705 = fsub float %1702, %1704
%1706 = call float @llvm.floor.f32(float %1703)
%1707 = fsub float %1703, %1706
%1708 = fmul float %42, 2.000000e+00
%1709 = fmul float %1708, %temp48.3
%1710 = fsub float 1.000000e+00, %1709
%1711 = fmul float %temp48.3, %42
%1712 = fmul float %1705, %1710
%1713 = fadd float %1712, %1711
%1714 = fmul float %1707, %1710
%1715 = fadd float %1714, %1711
%1716 = fmul float %1713, %temp24.0
%1717 = fadd float %1716, %temp40.0
%1718 = fmul float %1715, %temp24.0
%1719 = fadd float %1718, %temp41.0
%1720 = bitcast float %1717 to i32
%1721 = bitcast float %1719 to i32
%1722 = bitcast float %238 to i32
%1723 = insertelement <4 x i32> undef, i32 %1720, i32 0
%1724 = insertelement <4 x i32> %1723, i32 %1721, i32 1
%1725 = insertelement <4 x i32> %1724, i32 %1722, i32 2
%1726 = bitcast <8 x i32> %92 to <32 x i8>
%1727 = bitcast <4 x i32> %94 to <16 x i8>
%1728 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1725, <32 x i8> %1726, <16 x i8> %1727, i32 2)
%1729 = extractelement <4 x float> %1728, i32 1
%1730 = extractelement <4 x float> %1728, i32 3
%1731 = fcmp oeq float %temp42.0, 4.000000e+00
%1732 = select i1 %1731, float 1.000000e+00, float 0.000000e+00
%1733 = bitcast float %1717 to i32
%1734 = bitcast float %1719 to i32
%1735 = bitcast float %238 to i32
%1736 = insertelement <4 x i32> undef, i32 %1733, i32 0
%1737 = insertelement <4 x i32> %1736, i32 %1734, i32 1
%1738 = insertelement <4 x i32> %1737, i32 %1735, i32 2
%1739 = bitcast <8 x i32> %84 to <32 x i8>
%1740 = bitcast <4 x i32> %86 to <16 x i8>
%1741 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1738, <32 x i8> %1739, <16 x i8> %1740, i32 2)
%1742 = extractelement <4 x float> %1741, i32 1
%1743 = extractelement <4 x float> %1741, i32 3
%1744 = fcmp oeq float %temp42.0, 3.000000e+00
%1745 = select i1 %1744, float 1.000000e+00, float 0.000000e+00
%1746 = bitcast float %1717 to i32
%1747 = bitcast float %1719 to i32
%1748 = bitcast float %238 to i32
%1749 = insertelement <4 x i32> undef, i32 %1746, i32 0
%1750 = insertelement <4 x i32> %1749, i32 %1747, i32 1
%1751 = insertelement <4 x i32> %1750, i32 %1748, i32 2
%1752 = bitcast <8 x i32> %76 to <32 x i8>
%1753 = bitcast <4 x i32> %78 to <16 x i8>
%1754 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1751, <32 x i8> %1752, <16 x i8> %1753, i32 2)
%1755 = extractelement <4 x float> %1754, i32 1
%1756 = extractelement <4 x float> %1754, i32 3
%1757 = fcmp oeq float %temp42.0, 2.000000e+00
%1758 = select i1 %1757, float 1.000000e+00, float 0.000000e+00
%1759 = bitcast float %1717 to i32
%1760 = bitcast float %1719 to i32
%1761 = bitcast float %238 to i32
%1762 = insertelement <4 x i32> undef, i32 %1759, i32 0
%1763 = insertelement <4 x i32> %1762, i32 %1760, i32 1
%1764 = insertelement <4 x i32> %1763, i32 %1761, i32 2
%1765 = bitcast <8 x i32> %68 to <32 x i8>
%1766 = bitcast <4 x i32> %70 to <16 x i8>
%1767 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1764, <32 x i8> %1765, <16 x i8> %1766, i32 2)
%1768 = extractelement <4 x float> %1767, i32 1
%1769 = extractelement <4 x float> %1767, i32 3
%1770 = fcmp oeq float %temp42.0, 1.000000e+00
%1771 = select i1 %1770, float 1.000000e+00, float 0.000000e+00
%1772 = bitcast float %1717 to i32
%1773 = bitcast float %1719 to i32
%1774 = bitcast float %238 to i32
%1775 = insertelement <4 x i32> undef, i32 %1772, i32 0
%1776 = insertelement <4 x i32> %1775, i32 %1773, i32 1
%1777 = insertelement <4 x i32> %1776, i32 %1774, i32 2
%1778 = bitcast <8 x i32> %60 to <32 x i8>
%1779 = bitcast <4 x i32> %62 to <16 x i8>
%1780 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1777, <32 x i8> %1778, <16 x i8> %1779, i32 2)
%1781 = extractelement <4 x float> %1780, i32 1
%1782 = extractelement <4 x float> %1780, i32 3
%1783 = fcmp oeq float %temp42.0, 0.000000e+00
%1784 = select i1 %1783, float 1.000000e+00, float 0.000000e+00
%1785 = fmul float %1781, %1784
%1786 = fmul float %1782, %1784
%1787 = fmul float %1768, %1771
%1788 = fadd float %1787, %1785
%1789 = fmul float %1769, %1771
%1790 = fadd float %1789, %1786
%1791 = fmul float %1755, %1758
%1792 = fadd float %1791, %1788
%1793 = fmul float %1756, %1758
%1794 = fadd float %1793, %1790
%1795 = fmul float %1742, %1745
%1796 = fadd float %1795, %1792
%1797 = fmul float %1743, %1745
%1798 = fadd float %1797, %1794
%1799 = fmul float %1729, %1732
%1800 = fadd float %1799, %1796
%1801 = fmul float %1730, %1732
%1802 = fadd float %1801, %1798
%1803 = fmul float %1802, 2.000000e+00
%1804 = fadd float %1803, -1.000000e+00
%1805 = fmul float %1800, 2.000000e+00
%1806 = fadd float %1805, -1.000000e+00
%1807 = fmul float %1804, %1804
%1808 = fmul float %1806, %1806
%1809 = fadd float %1807, %1808
%1810 = call float @llvm.AMDIL.clamp.(float %1809, float 0.000000e+00, float 1.000000e+00)
%1811 = fcmp une float %38, %temp24.0
%.sink225 = select i1 %1811, float %41, float %40
%temp52.4 = select i1 %1811, float 1.953125e-03, float 3.906250e-03
%1812 = fdiv float 1.000000e+00, %.sink225
%1813 = fmul float %112, %1812
%1814 = fmul float %110, %1812
%1815 = call float @llvm.floor.f32(float %1813)
%1816 = fsub float %1813, %1815
%1817 = call float @llvm.floor.f32(float %1814)
%1818 = fsub float %1814, %1817
%1819 = fmul float %42, 2.000000e+00
%1820 = fmul float %1819, %temp52.4
%1821 = fsub float 1.000000e+00, %1820
%1822 = fmul float %temp52.4, %42
%1823 = fmul float %1816, %1821
%1824 = fadd float %1823, %1822
%1825 = fmul float %1818, %1821
%1826 = fadd float %1825, %1822
%1827 = fmul float %1824, %temp24.0
%1828 = fadd float %1827, %temp40.0
%1829 = fmul float %1826, %temp24.0
%1830 = fadd float %1829, %temp41.0
%1831 = bitcast float %1828 to i32
%1832 = bitcast float %1830 to i32
%1833 = bitcast float %238 to i32
%1834 = insertelement <4 x i32> undef, i32 %1831, i32 0
%1835 = insertelement <4 x i32> %1834, i32 %1832, i32 1
%1836 = insertelement <4 x i32> %1835, i32 %1833, i32 2
%1837 = bitcast <8 x i32> %92 to <32 x i8>
%1838 = bitcast <4 x i32> %94 to <16 x i8>
%1839 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1836, <32 x i8> %1837, <16 x i8> %1838, i32 2)
%1840 = extractelement <4 x float> %1839, i32 1
%1841 = extractelement <4 x float> %1839, i32 3
%1842 = fcmp oeq float %temp42.0, 4.000000e+00
%1843 = select i1 %1842, float 1.000000e+00, float 0.000000e+00
%1844 = bitcast float %1828 to i32
%1845 = bitcast float %1830 to i32
%1846 = bitcast float %238 to i32
%1847 = insertelement <4 x i32> undef, i32 %1844, i32 0
%1848 = insertelement <4 x i32> %1847, i32 %1845, i32 1
%1849 = insertelement <4 x i32> %1848, i32 %1846, i32 2
%1850 = bitcast <8 x i32> %84 to <32 x i8>
%1851 = bitcast <4 x i32> %86 to <16 x i8>
%1852 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1849, <32 x i8> %1850, <16 x i8> %1851, i32 2)
%1853 = extractelement <4 x float> %1852, i32 1
%1854 = extractelement <4 x float> %1852, i32 3
%1855 = fcmp oeq float %temp42.0, 3.000000e+00
%1856 = select i1 %1855, float 1.000000e+00, float 0.000000e+00
%1857 = bitcast float %1828 to i32
%1858 = bitcast float %1830 to i32
%1859 = bitcast float %238 to i32
%1860 = insertelement <4 x i32> undef, i32 %1857, i32 0
%1861 = insertelement <4 x i32> %1860, i32 %1858, i32 1
%1862 = insertelement <4 x i32> %1861, i32 %1859, i32 2
%1863 = bitcast <8 x i32> %76 to <32 x i8>
%1864 = bitcast <4 x i32> %78 to <16 x i8>
%1865 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1862, <32 x i8> %1863, <16 x i8> %1864, i32 2)
%1866 = extractelement <4 x float> %1865, i32 1
%1867 = extractelement <4 x float> %1865, i32 3
%1868 = fcmp oeq float %temp42.0, 2.000000e+00
%1869 = select i1 %1868, float 1.000000e+00, float 0.000000e+00
%1870 = bitcast float %1828 to i32
%1871 = bitcast float %1830 to i32
%1872 = bitcast float %238 to i32
%1873 = insertelement <4 x i32> undef, i32 %1870, i32 0
%1874 = insertelement <4 x i32> %1873, i32 %1871, i32 1
%1875 = insertelement <4 x i32> %1874, i32 %1872, i32 2
%1876 = bitcast <8 x i32> %68 to <32 x i8>
%1877 = bitcast <4 x i32> %70 to <16 x i8>
%1878 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1875, <32 x i8> %1876, <16 x i8> %1877, i32 2)
%1879 = extractelement <4 x float> %1878, i32 1
%1880 = extractelement <4 x float> %1878, i32 3
%1881 = fcmp oeq float %temp42.0, 1.000000e+00
%1882 = select i1 %1881, float 1.000000e+00, float 0.000000e+00
%1883 = bitcast float %1828 to i32
%1884 = bitcast float %1830 to i32
%1885 = bitcast float %238 to i32
%1886 = insertelement <4 x i32> undef, i32 %1883, i32 0
%1887 = insertelement <4 x i32> %1886, i32 %1884, i32 1
%1888 = insertelement <4 x i32> %1887, i32 %1885, i32 2
%1889 = bitcast <8 x i32> %60 to <32 x i8>
%1890 = bitcast <4 x i32> %62 to <16 x i8>
%1891 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1888, <32 x i8> %1889, <16 x i8> %1890, i32 2)
%1892 = extractelement <4 x float> %1891, i32 1
%1893 = extractelement <4 x float> %1891, i32 3
%1894 = fcmp oeq float %temp42.0, 0.000000e+00
%1895 = select i1 %1894, float 1.000000e+00, float 0.000000e+00
%1896 = fmul float %1892, %1895
%1897 = fmul float %1893, %1895
%1898 = fmul float %1879, %1882
%1899 = fadd float %1898, %1896
%1900 = fmul float %1880, %1882
%1901 = fadd float %1900, %1897
%1902 = fmul float %1866, %1869
%1903 = fadd float %1902, %1899
%1904 = fmul float %1867, %1869
%1905 = fadd float %1904, %1901
%1906 = fmul float %1853, %1856
%1907 = fadd float %1906, %1903
%1908 = fmul float %1854, %1856
%1909 = fadd float %1908, %1905
%1910 = fmul float %1840, %1843
%1911 = fadd float %1910, %1907
%1912 = fmul float %1841, %1843
%1913 = fadd float %1912, %1909
%1914 = fmul float %1913, 2.000000e+00
%1915 = fadd float %1914, -1.000000e+00
%1916 = fmul float %1911, 2.000000e+00
%1917 = fadd float %1916, -1.000000e+00
%1918 = fmul float %1915, %1915
%1919 = fmul float %1917, %1917
%1920 = fadd float %1918, %1919
%1921 = call float @llvm.AMDIL.clamp.(float %1920, float 0.000000e+00, float 1.000000e+00)
%1922 = fcmp une float %38, %temp24.0
%.sink226 = select i1 %1922, float %41, float %40
%temp56.4 = select i1 %1922, float 1.953125e-03, float 3.906250e-03
%1923 = fdiv float 1.000000e+00, %.sink226
%1924 = fmul float %110, %1923
%1925 = fmul float %111, %1923
%1926 = call float @llvm.floor.f32(float %1924)
%1927 = fsub float %1924, %1926
%1928 = call float @llvm.floor.f32(float %1925)
%1929 = fsub float %1925, %1928
%1930 = fmul float %42, 2.000000e+00
%1931 = fmul float %1930, %temp56.4
%1932 = fsub float 1.000000e+00, %1931
%1933 = fmul float %temp56.4, %42
%1934 = fmul float %1927, %1932
%1935 = fadd float %1934, %1933
%1936 = fmul float %1929, %1932
%1937 = fadd float %1936, %1933
%1938 = fmul float %1935, %temp24.0
%1939 = fadd float %1938, %temp40.0
%1940 = fmul float %1937, %temp24.0
%1941 = fadd float %1940, %temp41.0
%1942 = bitcast float %1939 to i32
%1943 = bitcast float %1941 to i32
%1944 = bitcast float %238 to i32
%1945 = insertelement <4 x i32> undef, i32 %1942, i32 0
%1946 = insertelement <4 x i32> %1945, i32 %1943, i32 1
%1947 = insertelement <4 x i32> %1946, i32 %1944, i32 2
%1948 = bitcast <8 x i32> %92 to <32 x i8>
%1949 = bitcast <4 x i32> %94 to <16 x i8>
%1950 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1947, <32 x i8> %1948, <16 x i8> %1949, i32 2)
%1951 = extractelement <4 x float> %1950, i32 1
%1952 = extractelement <4 x float> %1950, i32 3
%1953 = fcmp oeq float %temp42.0, 4.000000e+00
%1954 = select i1 %1953, float 1.000000e+00, float 0.000000e+00
%1955 = bitcast float %1939 to i32
%1956 = bitcast float %1941 to i32
%1957 = bitcast float %238 to i32
%1958 = insertelement <4 x i32> undef, i32 %1955, i32 0
%1959 = insertelement <4 x i32> %1958, i32 %1956, i32 1
%1960 = insertelement <4 x i32> %1959, i32 %1957, i32 2
%1961 = bitcast <8 x i32> %84 to <32 x i8>
%1962 = bitcast <4 x i32> %86 to <16 x i8>
%1963 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1960, <32 x i8> %1961, <16 x i8> %1962, i32 2)
%1964 = extractelement <4 x float> %1963, i32 1
%1965 = extractelement <4 x float> %1963, i32 3
%1966 = fcmp oeq float %temp42.0, 3.000000e+00
%1967 = select i1 %1966, float 1.000000e+00, float 0.000000e+00
%1968 = bitcast float %1939 to i32
%1969 = bitcast float %1941 to i32
%1970 = bitcast float %238 to i32
%1971 = insertelement <4 x i32> undef, i32 %1968, i32 0
%1972 = insertelement <4 x i32> %1971, i32 %1969, i32 1
%1973 = insertelement <4 x i32> %1972, i32 %1970, i32 2
%1974 = bitcast <8 x i32> %76 to <32 x i8>
%1975 = bitcast <4 x i32> %78 to <16 x i8>
%1976 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1973, <32 x i8> %1974, <16 x i8> %1975, i32 2)
%1977 = extractelement <4 x float> %1976, i32 1
%1978 = extractelement <4 x float> %1976, i32 3
%1979 = fcmp oeq float %temp42.0, 2.000000e+00
%1980 = select i1 %1979, float 1.000000e+00, float 0.000000e+00
%1981 = bitcast float %1939 to i32
%1982 = bitcast float %1941 to i32
%1983 = bitcast float %238 to i32
%1984 = insertelement <4 x i32> undef, i32 %1981, i32 0
%1985 = insertelement <4 x i32> %1984, i32 %1982, i32 1
%1986 = insertelement <4 x i32> %1985, i32 %1983, i32 2
%1987 = bitcast <8 x i32> %68 to <32 x i8>
%1988 = bitcast <4 x i32> %70 to <16 x i8>
%1989 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1986, <32 x i8> %1987, <16 x i8> %1988, i32 2)
%1990 = extractelement <4 x float> %1989, i32 1
%1991 = extractelement <4 x float> %1989, i32 3
%1992 = fcmp oeq float %temp42.0, 1.000000e+00
%1993 = select i1 %1992, float 1.000000e+00, float 0.000000e+00
%1994 = bitcast float %1939 to i32
%1995 = bitcast float %1941 to i32
%1996 = bitcast float %238 to i32
%1997 = insertelement <4 x i32> undef, i32 %1994, i32 0
%1998 = insertelement <4 x i32> %1997, i32 %1995, i32 1
%1999 = insertelement <4 x i32> %1998, i32 %1996, i32 2
%2000 = bitcast <8 x i32> %60 to <32 x i8>
%2001 = bitcast <4 x i32> %62 to <16 x i8>
%2002 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %1999, <32 x i8> %2000, <16 x i8> %2001, i32 2)
%2003 = extractelement <4 x float> %2002, i32 1
%2004 = extractelement <4 x float> %2002, i32 3
%2005 = fcmp oeq float %temp42.0, 0.000000e+00
%2006 = select i1 %2005, float 1.000000e+00, float 0.000000e+00
%2007 = fmul float %2003, %2006
%2008 = fmul float %2004, %2006
%2009 = fmul float %1990, %1993
%2010 = fadd float %2009, %2007
%2011 = fmul float %1991, %1993
%2012 = fadd float %2011, %2008
%2013 = fmul float %1977, %1980
%2014 = fadd float %2013, %2010
%2015 = fmul float %1978, %1980
%2016 = fadd float %2015, %2012
%2017 = fmul float %1964, %1967
%2018 = fadd float %2017, %2014
%2019 = fmul float %1965, %1967
%2020 = fadd float %2019, %2016
%2021 = fmul float %1951, %1954
%2022 = fadd float %2021, %2018
%2023 = fmul float %1952, %1954
%2024 = fadd float %2023, %2020
%2025 = fmul float %2024, 2.000000e+00
%2026 = fadd float %2025, -1.000000e+00
%2027 = fmul float %2022, 2.000000e+00
%2028 = fadd float %2027, -1.000000e+00
%2029 = fmul float %2026, %2026
%2030 = fmul float %2028, %2028
%2031 = fadd float %2029, %2030
%2032 = call float @llvm.AMDIL.clamp.(float %2031, float 0.000000e+00, float 1.000000e+00)
%2033 = fmul float %161, 0.000000e+00
%2034 = fmul float %1804, %161
%2035 = fmul float %1806, %161
%2036 = fmul float %1917, %162
%2037 = fadd float %2036, %2033
%2038 = fmul float %162, 0.000000e+00
%2039 = fadd float %2038, %2034
%2040 = fmul float %1915, %162
%2041 = fadd float %2040, %2035
%2042 = fmul float %2026, %163
%2043 = fadd float %2042, %2037
%2044 = fmul float %2028, %163
%2045 = fadd float %2044, %2039
%2046 = fmul float %163, 0.000000e+00
%2047 = fadd float %2046, %2041
%2048 = fcmp une float %38, %temp28.0
%.sink227 = select i1 %2048, float %41, float %40
%temp40.1 = select i1 %2048, float 1.953125e-03, float 3.906250e-03
%2049 = fdiv float 1.000000e+00, %.sink227
%2050 = fmul float %112, %2049
%2051 = fmul float %111, %2049
%2052 = call float @llvm.floor.f32(float %2050)
%2053 = fsub float %2050, %2052
%2054 = call float @llvm.floor.f32(float %2051)
%2055 = fsub float %2051, %2054
%2056 = fmul float %42, 2.000000e+00
%2057 = fmul float %2056, %temp40.1
%2058 = fsub float 1.000000e+00, %2057
%2059 = fmul float %temp40.1, %42
%2060 = fmul float %2053, %2058
%2061 = fadd float %2060, %2059
%2062 = fmul float %2055, %2058
%2063 = fadd float %2062, %2059
%2064 = fmul float %2061, %temp28.0
%2065 = fadd float %2064, %temp16.0
%2066 = fmul float %2063, %temp28.0
%2067 = fadd float %2066, %temp17.0
%2068 = bitcast float %2065 to i32
%2069 = bitcast float %2067 to i32
%2070 = bitcast float %238 to i32
%2071 = insertelement <4 x i32> undef, i32 %2068, i32 0
%2072 = insertelement <4 x i32> %2071, i32 %2069, i32 1
%2073 = insertelement <4 x i32> %2072, i32 %2070, i32 2
%2074 = bitcast <8 x i32> %92 to <32 x i8>
%2075 = bitcast <4 x i32> %94 to <16 x i8>
%2076 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2073, <32 x i8> %2074, <16 x i8> %2075, i32 2)
%2077 = extractelement <4 x float> %2076, i32 1
%2078 = extractelement <4 x float> %2076, i32 3
%2079 = fcmp oeq float %temp18.0, 4.000000e+00
%2080 = select i1 %2079, float 1.000000e+00, float 0.000000e+00
%2081 = bitcast float %2065 to i32
%2082 = bitcast float %2067 to i32
%2083 = bitcast float %238 to i32
%2084 = insertelement <4 x i32> undef, i32 %2081, i32 0
%2085 = insertelement <4 x i32> %2084, i32 %2082, i32 1
%2086 = insertelement <4 x i32> %2085, i32 %2083, i32 2
%2087 = bitcast <8 x i32> %84 to <32 x i8>
%2088 = bitcast <4 x i32> %86 to <16 x i8>
%2089 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2086, <32 x i8> %2087, <16 x i8> %2088, i32 2)
%2090 = extractelement <4 x float> %2089, i32 1
%2091 = extractelement <4 x float> %2089, i32 3
%2092 = fcmp oeq float %temp18.0, 3.000000e+00
%2093 = select i1 %2092, float 1.000000e+00, float 0.000000e+00
%2094 = bitcast float %2065 to i32
%2095 = bitcast float %2067 to i32
%2096 = bitcast float %238 to i32
%2097 = insertelement <4 x i32> undef, i32 %2094, i32 0
%2098 = insertelement <4 x i32> %2097, i32 %2095, i32 1
%2099 = insertelement <4 x i32> %2098, i32 %2096, i32 2
%2100 = bitcast <8 x i32> %76 to <32 x i8>
%2101 = bitcast <4 x i32> %78 to <16 x i8>
%2102 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2099, <32 x i8> %2100, <16 x i8> %2101, i32 2)
%2103 = extractelement <4 x float> %2102, i32 1
%2104 = extractelement <4 x float> %2102, i32 3
%2105 = fcmp oeq float %temp18.0, 2.000000e+00
%2106 = select i1 %2105, float 1.000000e+00, float 0.000000e+00
%2107 = bitcast float %2065 to i32
%2108 = bitcast float %2067 to i32
%2109 = bitcast float %238 to i32
%2110 = insertelement <4 x i32> undef, i32 %2107, i32 0
%2111 = insertelement <4 x i32> %2110, i32 %2108, i32 1
%2112 = insertelement <4 x i32> %2111, i32 %2109, i32 2
%2113 = bitcast <8 x i32> %68 to <32 x i8>
%2114 = bitcast <4 x i32> %70 to <16 x i8>
%2115 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2112, <32 x i8> %2113, <16 x i8> %2114, i32 2)
%2116 = extractelement <4 x float> %2115, i32 1
%2117 = extractelement <4 x float> %2115, i32 3
%2118 = fcmp oeq float %temp18.0, 1.000000e+00
%2119 = select i1 %2118, float 1.000000e+00, float 0.000000e+00
%2120 = bitcast float %2065 to i32
%2121 = bitcast float %2067 to i32
%2122 = bitcast float %238 to i32
%2123 = insertelement <4 x i32> undef, i32 %2120, i32 0
%2124 = insertelement <4 x i32> %2123, i32 %2121, i32 1
%2125 = insertelement <4 x i32> %2124, i32 %2122, i32 2
%2126 = bitcast <8 x i32> %60 to <32 x i8>
%2127 = bitcast <4 x i32> %62 to <16 x i8>
%2128 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2125, <32 x i8> %2126, <16 x i8> %2127, i32 2)
%2129 = extractelement <4 x float> %2128, i32 1
%2130 = extractelement <4 x float> %2128, i32 3
%2131 = fcmp oeq float %temp18.0, 0.000000e+00
%2132 = select i1 %2131, float 1.000000e+00, float 0.000000e+00
%2133 = fmul float %2129, %2132
%2134 = fmul float %2130, %2132
%2135 = fmul float %2116, %2119
%2136 = fadd float %2135, %2133
%2137 = fmul float %2117, %2119
%2138 = fadd float %2137, %2134
%2139 = fmul float %2103, %2106
%2140 = fadd float %2139, %2136
%2141 = fmul float %2104, %2106
%2142 = fadd float %2141, %2138
%2143 = fmul float %2090, %2093
%2144 = fadd float %2143, %2140
%2145 = fmul float %2091, %2093
%2146 = fadd float %2145, %2142
%2147 = fmul float %2077, %2080
%2148 = fadd float %2147, %2144
%2149 = fmul float %2078, %2080
%2150 = fadd float %2149, %2146
%2151 = fmul float %2150, 2.000000e+00
%2152 = fadd float %2151, -1.000000e+00
%2153 = fmul float %2148, 2.000000e+00
%2154 = fadd float %2153, -1.000000e+00
%2155 = fmul float %2152, %2152
%2156 = fmul float %2154, %2154
%2157 = fadd float %2155, %2156
%2158 = call float @llvm.AMDIL.clamp.(float %2157, float 0.000000e+00, float 1.000000e+00)
%2159 = fcmp une float %38, %temp28.0
%.sink228 = select i1 %2159, float %41, float %40
%temp48.5 = select i1 %2159, float 1.953125e-03, float 3.906250e-03
%2160 = fdiv float 1.000000e+00, %.sink228
%2161 = fmul float %112, %2160
%2162 = fmul float %110, %2160
%2163 = call float @llvm.floor.f32(float %2161)
%2164 = fsub float %2161, %2163
%2165 = call float @llvm.floor.f32(float %2162)
%2166 = fsub float %2162, %2165
%2167 = fmul float %42, 2.000000e+00
%2168 = fmul float %2167, %temp48.5
%2169 = fsub float 1.000000e+00, %2168
%2170 = fmul float %temp48.5, %42
%2171 = fmul float %2164, %2169
%2172 = fadd float %2171, %2170
%2173 = fmul float %2166, %2169
%2174 = fadd float %2173, %2170
%2175 = fmul float %2172, %temp28.0
%2176 = fadd float %2175, %temp16.0
%2177 = fmul float %2174, %temp28.0
%2178 = fadd float %2177, %temp17.0
%2179 = bitcast float %2176 to i32
%2180 = bitcast float %2178 to i32
%2181 = bitcast float %238 to i32
%2182 = insertelement <4 x i32> undef, i32 %2179, i32 0
%2183 = insertelement <4 x i32> %2182, i32 %2180, i32 1
%2184 = insertelement <4 x i32> %2183, i32 %2181, i32 2
%2185 = bitcast <8 x i32> %92 to <32 x i8>
%2186 = bitcast <4 x i32> %94 to <16 x i8>
%2187 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2184, <32 x i8> %2185, <16 x i8> %2186, i32 2)
%2188 = extractelement <4 x float> %2187, i32 1
%2189 = extractelement <4 x float> %2187, i32 3
%2190 = fcmp oeq float %temp18.0, 4.000000e+00
%2191 = select i1 %2190, float 1.000000e+00, float 0.000000e+00
%2192 = bitcast float %2176 to i32
%2193 = bitcast float %2178 to i32
%2194 = bitcast float %238 to i32
%2195 = insertelement <4 x i32> undef, i32 %2192, i32 0
%2196 = insertelement <4 x i32> %2195, i32 %2193, i32 1
%2197 = insertelement <4 x i32> %2196, i32 %2194, i32 2
%2198 = bitcast <8 x i32> %84 to <32 x i8>
%2199 = bitcast <4 x i32> %86 to <16 x i8>
%2200 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2197, <32 x i8> %2198, <16 x i8> %2199, i32 2)
%2201 = extractelement <4 x float> %2200, i32 1
%2202 = extractelement <4 x float> %2200, i32 3
%2203 = fcmp oeq float %temp18.0, 3.000000e+00
%2204 = select i1 %2203, float 1.000000e+00, float 0.000000e+00
%2205 = bitcast float %2176 to i32
%2206 = bitcast float %2178 to i32
%2207 = bitcast float %238 to i32
%2208 = insertelement <4 x i32> undef, i32 %2205, i32 0
%2209 = insertelement <4 x i32> %2208, i32 %2206, i32 1
%2210 = insertelement <4 x i32> %2209, i32 %2207, i32 2
%2211 = bitcast <8 x i32> %76 to <32 x i8>
%2212 = bitcast <4 x i32> %78 to <16 x i8>
%2213 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2210, <32 x i8> %2211, <16 x i8> %2212, i32 2)
%2214 = extractelement <4 x float> %2213, i32 1
%2215 = extractelement <4 x float> %2213, i32 3
%2216 = fcmp oeq float %temp18.0, 2.000000e+00
%2217 = select i1 %2216, float 1.000000e+00, float 0.000000e+00
%2218 = bitcast float %2176 to i32
%2219 = bitcast float %2178 to i32
%2220 = bitcast float %238 to i32
%2221 = insertelement <4 x i32> undef, i32 %2218, i32 0
%2222 = insertelement <4 x i32> %2221, i32 %2219, i32 1
%2223 = insertelement <4 x i32> %2222, i32 %2220, i32 2
%2224 = bitcast <8 x i32> %68 to <32 x i8>
%2225 = bitcast <4 x i32> %70 to <16 x i8>
%2226 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2223, <32 x i8> %2224, <16 x i8> %2225, i32 2)
%2227 = extractelement <4 x float> %2226, i32 1
%2228 = extractelement <4 x float> %2226, i32 3
%2229 = fcmp oeq float %temp18.0, 1.000000e+00
%2230 = select i1 %2229, float 1.000000e+00, float 0.000000e+00
%2231 = bitcast float %2176 to i32
%2232 = bitcast float %2178 to i32
%2233 = bitcast float %238 to i32
%2234 = insertelement <4 x i32> undef, i32 %2231, i32 0
%2235 = insertelement <4 x i32> %2234, i32 %2232, i32 1
%2236 = insertelement <4 x i32> %2235, i32 %2233, i32 2
%2237 = bitcast <8 x i32> %60 to <32 x i8>
%2238 = bitcast <4 x i32> %62 to <16 x i8>
%2239 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2236, <32 x i8> %2237, <16 x i8> %2238, i32 2)
%2240 = extractelement <4 x float> %2239, i32 1
%2241 = extractelement <4 x float> %2239, i32 3
%2242 = fcmp oeq float %temp18.0, 0.000000e+00
%2243 = select i1 %2242, float 1.000000e+00, float 0.000000e+00
%2244 = fmul float %2240, %2243
%2245 = fmul float %2241, %2243
%2246 = fmul float %2227, %2230
%2247 = fadd float %2246, %2244
%2248 = fmul float %2228, %2230
%2249 = fadd float %2248, %2245
%2250 = fmul float %2214, %2217
%2251 = fadd float %2250, %2247
%2252 = fmul float %2215, %2217
%2253 = fadd float %2252, %2249
%2254 = fmul float %2201, %2204
%2255 = fadd float %2254, %2251
%2256 = fmul float %2202, %2204
%2257 = fadd float %2256, %2253
%2258 = fmul float %2188, %2191
%2259 = fadd float %2258, %2255
%2260 = fmul float %2189, %2191
%2261 = fadd float %2260, %2257
%2262 = fmul float %2261, 2.000000e+00
%2263 = fadd float %2262, -1.000000e+00
%2264 = fmul float %2259, 2.000000e+00
%2265 = fadd float %2264, -1.000000e+00
%2266 = fmul float %2263, %2263
%2267 = fmul float %2265, %2265
%2268 = fadd float %2266, %2267
%2269 = call float @llvm.AMDIL.clamp.(float %2268, float 0.000000e+00, float 1.000000e+00)
%2270 = fcmp une float %38, %temp28.0
%.sink229 = select i1 %2270, float %41, float %40
%temp52.6 = select i1 %2270, float 1.953125e-03, float 3.906250e-03
%2271 = fdiv float 1.000000e+00, %.sink229
%2272 = fmul float %110, %2271
%2273 = fmul float %111, %2271
%2274 = call float @llvm.floor.f32(float %2272)
%2275 = fsub float %2272, %2274
%2276 = call float @llvm.floor.f32(float %2273)
%2277 = fsub float %2273, %2276
%2278 = fmul float %42, 2.000000e+00
%2279 = fmul float %2278, %temp52.6
%2280 = fsub float 1.000000e+00, %2279
%2281 = fmul float %temp52.6, %42
%2282 = fmul float %2275, %2280
%2283 = fadd float %2282, %2281
%2284 = fmul float %2277, %2280
%2285 = fadd float %2284, %2281
%2286 = fmul float %2283, %temp28.0
%2287 = fadd float %2286, %temp16.0
%2288 = fmul float %2285, %temp28.0
%2289 = fadd float %2288, %temp17.0
%2290 = bitcast float %2287 to i32
%2291 = bitcast float %2289 to i32
%2292 = bitcast float %238 to i32
%2293 = insertelement <4 x i32> undef, i32 %2290, i32 0
%2294 = insertelement <4 x i32> %2293, i32 %2291, i32 1
%2295 = insertelement <4 x i32> %2294, i32 %2292, i32 2
%2296 = bitcast <8 x i32> %92 to <32 x i8>
%2297 = bitcast <4 x i32> %94 to <16 x i8>
%2298 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2295, <32 x i8> %2296, <16 x i8> %2297, i32 2)
%2299 = extractelement <4 x float> %2298, i32 1
%2300 = extractelement <4 x float> %2298, i32 3
%2301 = fcmp oeq float %temp18.0, 4.000000e+00
%2302 = select i1 %2301, float 1.000000e+00, float 0.000000e+00
%2303 = bitcast float %2287 to i32
%2304 = bitcast float %2289 to i32
%2305 = bitcast float %238 to i32
%2306 = insertelement <4 x i32> undef, i32 %2303, i32 0
%2307 = insertelement <4 x i32> %2306, i32 %2304, i32 1
%2308 = insertelement <4 x i32> %2307, i32 %2305, i32 2
%2309 = bitcast <8 x i32> %84 to <32 x i8>
%2310 = bitcast <4 x i32> %86 to <16 x i8>
%2311 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2308, <32 x i8> %2309, <16 x i8> %2310, i32 2)
%2312 = extractelement <4 x float> %2311, i32 1
%2313 = extractelement <4 x float> %2311, i32 3
%2314 = fcmp oeq float %temp18.0, 3.000000e+00
%2315 = select i1 %2314, float 1.000000e+00, float 0.000000e+00
%2316 = bitcast float %2287 to i32
%2317 = bitcast float %2289 to i32
%2318 = bitcast float %238 to i32
%2319 = insertelement <4 x i32> undef, i32 %2316, i32 0
%2320 = insertelement <4 x i32> %2319, i32 %2317, i32 1
%2321 = insertelement <4 x i32> %2320, i32 %2318, i32 2
%2322 = bitcast <8 x i32> %76 to <32 x i8>
%2323 = bitcast <4 x i32> %78 to <16 x i8>
%2324 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2321, <32 x i8> %2322, <16 x i8> %2323, i32 2)
%2325 = extractelement <4 x float> %2324, i32 1
%2326 = extractelement <4 x float> %2324, i32 3
%2327 = fcmp oeq float %temp18.0, 2.000000e+00
%2328 = select i1 %2327, float 1.000000e+00, float 0.000000e+00
%2329 = bitcast float %2287 to i32
%2330 = bitcast float %2289 to i32
%2331 = bitcast float %238 to i32
%2332 = insertelement <4 x i32> undef, i32 %2329, i32 0
%2333 = insertelement <4 x i32> %2332, i32 %2330, i32 1
%2334 = insertelement <4 x i32> %2333, i32 %2331, i32 2
%2335 = bitcast <8 x i32> %68 to <32 x i8>
%2336 = bitcast <4 x i32> %70 to <16 x i8>
%2337 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2334, <32 x i8> %2335, <16 x i8> %2336, i32 2)
%2338 = extractelement <4 x float> %2337, i32 1
%2339 = extractelement <4 x float> %2337, i32 3
%2340 = fcmp oeq float %temp18.0, 1.000000e+00
%2341 = select i1 %2340, float 1.000000e+00, float 0.000000e+00
%2342 = bitcast float %2287 to i32
%2343 = bitcast float %2289 to i32
%2344 = bitcast float %238 to i32
%2345 = insertelement <4 x i32> undef, i32 %2342, i32 0
%2346 = insertelement <4 x i32> %2345, i32 %2343, i32 1
%2347 = insertelement <4 x i32> %2346, i32 %2344, i32 2
%2348 = bitcast <8 x i32> %60 to <32 x i8>
%2349 = bitcast <4 x i32> %62 to <16 x i8>
%2350 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %2347, <32 x i8> %2348, <16 x i8> %2349, i32 2)
%2351 = extractelement <4 x float> %2350, i32 1
%2352 = extractelement <4 x float> %2350, i32 3
%2353 = fcmp oeq float %temp18.0, 0.000000e+00
%2354 = select i1 %2353, float 1.000000e+00, float 0.000000e+00
%2355 = fmul float %2351, %2354
%2356 = fmul float %2352, %2354
%2357 = fmul float %2338, %2341
%2358 = fadd float %2357, %2355
%2359 = fmul float %2339, %2341
%2360 = fadd float %2359, %2356
%2361 = fmul float %2325, %2328
%2362 = fadd float %2361, %2358
%2363 = fmul float %2326, %2328
%2364 = fadd float %2363, %2360
%2365 = fmul float %2312, %2315
%2366 = fadd float %2365, %2362
%2367 = fmul float %2313, %2315
%2368 = fadd float %2367, %2364
%2369 = fmul float %2299, %2302
%2370 = fadd float %2369, %2366
%2371 = fmul float %2300, %2302
%2372 = fadd float %2371, %2368
%2373 = fmul float %2372, 2.000000e+00
%2374 = fadd float %2373, -1.000000e+00
%2375 = fmul float %2370, 2.000000e+00
%2376 = fadd float %2375, -1.000000e+00
%2377 = fmul float %2374, %2374
%2378 = fmul float %2376, %2376
%2379 = fadd float %2377, %2378
%2380 = call float @llvm.AMDIL.clamp.(float %2379, float 0.000000e+00, float 1.000000e+00)
%2381 = fmul float %161, 0.000000e+00
%2382 = fmul float %2152, %161
%2383 = fmul float %2154, %161
%2384 = fmul float %2265, %162
%2385 = fadd float %2384, %2381
%2386 = fmul float %162, 0.000000e+00
%2387 = fadd float %2386, %2382
%2388 = fmul float %2263, %162
%2389 = fadd float %2388, %2383
%2390 = fmul float %2374, %163
%2391 = fadd float %2390, %2385
%2392 = fmul float %2376, %163
%2393 = fadd float %2392, %2387
%2394 = fmul float %163, 0.000000e+00
%2395 = fadd float %2394, %2389
%2396 = fmul float %99, %1695
%2397 = fmul float %99, %1697
%2398 = fmul float %99, %1699
%2399 = fmul float %100, %2043
%2400 = fadd float %2399, %2396
%2401 = fmul float %100, %2045
%2402 = fadd float %2401, %2397
%2403 = fmul float %100, %2047
%2404 = fadd float %2403, %2398
%2405 = fmul float %101, %2391
%2406 = fadd float %2405, %2400
%2407 = fmul float %101, %2393
%2408 = fadd float %2407, %2402
%2409 = fmul float %101, %2395
%2410 = fadd float %2409, %2404
%2411 = fmul float %2406, %2406
%2412 = fmul float %2408, %2408
%2413 = fadd float %2411, %2412
%2414 = fmul float %2410, %2410
%2415 = fadd float %2413, %2414
%2416 = fadd float %2415, 1.000000e+00
%2417 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2416)
%2418 = fmul float %2406, %2417
%2419 = fmul float %2408, %2417
%2420 = fmul float %2410, %2417
%2421 = fmul float %2418, %98
%2422 = fmul float %2419, %98
%2423 = fmul float %2420, %98
%2424 = fsub float %107, %2421
%2425 = fsub float %108, %2422
%2426 = fsub float %109, %2423
%2427 = fmul float %2424, %2424
%2428 = fmul float %2425, %2425
%2429 = fadd float %2428, %2427
%2430 = fmul float %2426, %2426
%2431 = fadd float %2429, %2430
%2432 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2431)
%2433 = fmul float %2424, %2432
%2434 = fmul float %2425, %2432
%2435 = fmul float %2426, %2432
%2436 = fdiv float %103, %105
%2437 = fdiv float %104, %105
%2438 = bitcast float %2436 to i32
%2439 = bitcast float %2437 to i32
%2440 = insertelement <2 x i32> undef, i32 %2438, i32 0
%2441 = insertelement <2 x i32> %2440, i32 %2439, i32 1
%2442 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %2441, <32 x i8> %52, <16 x i8> %54, i32 2)
%2443 = extractelement <4 x float> %2442, i32 0
%2444 = fmul float %1347, %113
%2445 = fmul float %1349, %114
%2446 = fmul float %1351, %115
%2447 = fadd float %122, %134
%2448 = fadd float %123, %135
%2449 = fadd float %124, %136
%2450 = fmul float %2447, %2447
%2451 = fmul float %2448, %2448
%2452 = fadd float %2451, %2450
%2453 = fmul float %2449, %2449
%2454 = fadd float %2452, %2453
%2455 = call float @llvm.AMDGPU.rsq.clamped.f32(float %2454)
%2456 = fmul float %2447, %2455
%2457 = fmul float %2448, %2455
%2458 = fmul float %2449, %2455
%2459 = fmul float %2433, %2456
%2460 = fmul float %2434, %2457
%2461 = fadd float %2460, %2459
%2462 = fmul float %2435, %2458
%2463 = fadd float %2461, %2462
%2464 = call float @llvm.maxnum.f32(float %2463, float 0x3F1A36E2E0000000)
%2465 = fmul float %102, 3.200000e+01
%2466 = call float @llvm.pow.f32(float %2464, float %2465)
%2467 = call float @llvm.AMDIL.clamp.(float %2466, float 0.000000e+00, float 1.000000e+00)
%2468 = fmul float %2467, 2.000000e+00
%2469 = fsub float 3.000000e+00, %2468
%2470 = fmul float %2467, %2469
%2471 = fmul float %2467, %2470
%2472 = fmul float %2471, %102
%2473 = fmul float %1347, %35
%2474 = fmul float %1349, %36
%2475 = fmul float %1351, %37
%2476 = fmul float %2433, %122
%2477 = fmul float %2434, %123
%2478 = fadd float %2477, %2476
%2479 = fmul float %2435, %124
%2480 = fadd float %2478, %2479
%2481 = call float @llvm.AMDIL.clamp.(float %2480, float 0.000000e+00, float 1.000000e+00)
%2482 = fmul float %48, 2.000000e+00
%2483 = fmul float %49, 2.000000e+00
%2484 = fmul float %50, 2.000000e+00
%2485 = fmul float %2482, %2443
%2486 = fmul float %2483, %2443
%2487 = fmul float %2484, %2443
%2488 = call float @llvm.maxnum.f32(float %2485, float %45)
%2489 = call float @llvm.maxnum.f32(float %2486, float %46)
%2490 = call float @llvm.maxnum.f32(float %2487, float %47)
%2491 = call float @llvm.minnum.f32(float %2488, float 1.000000e+00)
%2492 = call float @llvm.minnum.f32(float %2489, float 1.000000e+00)
%2493 = call float @llvm.minnum.f32(float %2490, float 1.000000e+00)
%2494 = fmul float %2491, %1347
%2495 = fmul float %2492, %1349
%2496 = fmul float %2493, %1351
%2497 = fmul float %2473, %2481
%2498 = fadd float %2497, %2494
%2499 = fmul float %2474, %2481
%2500 = fadd float %2499, %2495
%2501 = fmul float %2475, %2481
%2502 = fadd float %2501, %2496
%2503 = fmul float %35, %2472
%2504 = fadd float %2503, %2498
%2505 = fmul float %36, %2472
%2506 = fadd float %2505, %2500
%2507 = fmul float %37, %2472
%2508 = fadd float %2507, %2502
%2509 = fmul float %2504, %2443
%2510 = fmul float %2506, %2443
%2511 = fmul float %2508, %2443
%2512 = fmul float %2509, 5.000000e-01
%2513 = fmul float %2510, 5.000000e-01
%2514 = fmul float %2511, 5.000000e-01
%2515 = fadd float %2444, %2512
%2516 = fadd float %2445, %2513
%2517 = fadd float %2446, %2514
%2518 = fmul float %106, %33
%2519 = fadd float %2518, %34
%2520 = call float @llvm.AMDIL.clamp.(float %2519, float 0.000000e+00, float 1.000000e+00)
%2521 = call float @llvm.AMDGPU.lrp(float %2520, float %2515, float %30)
%2522 = call float @llvm.AMDGPU.lrp(float %2520, float %2516, float %31)
%2523 = call float @llvm.AMDGPU.lrp(float %2520, float %2517, float %32)
%2524 = call i32 @llvm.SI.packf16(float %2521, float %2522)
%2525 = bitcast i32 %2524 to float
%2526 = call i32 @llvm.SI.packf16(float %2523, float 1.000000e+00)
%2527 = bitcast i32 %2526 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %2525, float %2527, float %2525, float %2527)
ret void
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.fabs.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.maxnum.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.floor.f32(float) #1
; Function Attrs: nounwind readnone
declare float @llvm.log2.f32(float) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: readnone
declare float @llvm.AMDIL.clamp.(float, float, float) #2
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
; Function Attrs: nounwind readnone
declare float @llvm.pow.f32(float, float) #1
; Function Attrs: nounwind readnone
declare float @llvm.minnum.f32(float, float) #1
; Function Attrs: readnone
declare float @llvm.AMDGPU.lrp(float, float, float) #2
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
!0 = !{!"const", null, i32 1}
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
LLVM triggered Diagnostic Handler: Ran out of VGPRs for spilling SGPR
Stacktrace:
Native stacktrace:
/media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libmono.so(+0x915c6) [0x7f48efa425c6]
/media/bigdata/games/steam/steamapps/common/Planet Explorers/PE_Client_Data/Mono/x86_64/libmono.so(+0x3481b) [0x7f48ef9e581b]
/usr/lib/libpthread.so.0(+0x10d60) [0x7f48f5840d60]
/usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter15getRegisterNameEj+0xa) [0x7f48e6bce59a]
/usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter15printRegOperandEjRNS_11raw_ostreamERKNS_14MCRegisterInfoE+0x6b1) [0x7f48e6bcec61]
/usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter12printOperandEPKNS_6MCInstEjRNS_11raw_ostreamE+0x86) [0x7f48e6bcecf6]
/usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter16printInstructionEPKNS_6MCInstERNS_11raw_ostreamE+0x9e0) [0x7f48e6bcfa70]
/usr/lib/libLLVM.so.3.7(_ZN4llvm17AMDGPUInstPrinter9printInstEPKNS_6MCInstERNS_11raw_ostreamENS_9StringRefERKNS_15MCSubtargetInfoE+0x37) [0x7f48e6bd1527]
/usr/lib/libLLVM.so.3.7(_ZN4llvm16AMDGPUAsmPrinter15EmitInstructionEPKNS_12MachineInstrE+0x29c) [0x7f48e6b3acec]
/usr/lib/libLLVM.so.3.7(_ZN4llvm10AsmPrinter16EmitFunctionBodyEv+0x8b1) [0x7f48e637ad91]
/usr/lib/libLLVM.so.3.7(_ZN4llvm16AMDGPUAsmPrinter20runOnMachineFunctionERNS_15MachineFunctionE+0x2fa) [0x7f48e6b3100a]
/usr/lib/libLLVM.so.3.7(_ZN4llvm13FPPassManager13runOnFunctionERNS_8FunctionE+0x27f) [0x7f48e5f7385f]
/usr/lib/libLLVM.so.3.7(_ZN4llvm13FPPassManager11runOnModuleERNS_6ModuleE+0x2b) [0x7f48e5f73bfb]
/usr/lib/libLLVM.so.3.7(_ZN4llvm6legacy15PassManagerImpl3runERNS_6ModuleE+0x2f6) [0x7f48e5f733e6]
/usr/lib/libLLVM.so.3.7(LLVMTargetMachineEmitToMemoryBuffer+0x1b8) [0x7f48e6a09eb8]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x652bbb) [0x7f48edd67bbb]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5b7cc8) [0x7f48edccccc8]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5b8759) [0x7f48edccd759]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5c1e67) [0x7f48edcd6e67]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5c268a) [0x7f48edcd768a]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x5bf4fb) [0x7f48edcd44fb]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x37b787) [0x7f48eda90787]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x1d94df) [0x7f48ed8ee4df]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x1ab448) [0x7f48ed8c0448]
/usr/lib/xorg/modules/dri/radeonsi_dri.so(+0x1ab8c9) [0x7f48ed8c08c9]
./PE_Client.x86_64() [0xd48b9d]
./PE_Client.x86_64() [0x5f78df]
./PE_Client.x86_64() [0x5f7956]
./PE_Client.x86_64() [0x4a2f0c]
./PE_Client.x86_64() [0x4a3ef0]
./PE_Client.x86_64() [0x4a593e]
./PE_Client.x86_64() [0x4b7b7a]
./PE_Client.x86_64() [0x4c4d5c]
./PE_Client.x86_64() [0x5157e4]
./PE_Client.x86_64() [0x706b88]
./PE_Client.x86_64() [0x46482f]
/usr/lib/libc.so.6(__libc_start_main+0xf0) [0x7f48f402f610]
./PE_Client.x86_64() [0x46dd0d]
Debug info from gdb:
ptrace: Operation not permitted.
No threads.
=================================================================
Got a SIGSEGV while executing native code. This usually indicates
a fatal error in the mono runtime or one of the native libraries
used by your application.
=================================================================