[S_API FAIL] SteamAPI_Init() failed; SteamAPI_IsSteamRunning() failed. [S_API FAIL] SteamAPI_Init() failed; unable to locate a running instance of Steam, or a local steamclient.so. SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 GUID Assets\dlc\dlc_01\mongol.civ5pkg 7a036b7fb9a80e8dea7b73fb58c5a288 GUID Assets\dlc\dlc_02\spaininca.civ5pkg 4f75e72761f4c6019b55a0a7b90444a2 GUID Assets\dlc\dlc_03\polynesia.civ5pkg 99ac9d5f6ca4b5bed0ab89c0fd3b9e6d GUID Assets\dlc\dlc_04\denmark.civ5pkg 0efb155307bd6d14c9290b49b5364a3e GUID Assets\dlc\dlc_05\korea.civ5pkg 9f4df81cf712ae9480737f816bf6f4c8 GUID Assets\dlc\dlc_06\ancientwonders.civ5pkg 92b102db9a3c7dc068030c3ce33bbb48 GUID Assets\dlc\dlc_07\civcomplete.civ5pkg eb01a0be4d8e5312f53b042c8a7c30b5 GUID Assets\dlc\dlc_deluxe\babylon.civ5pkg 712495341921f2b288746c6d44fd6867 GUID Assets\dlc\dlc_sp_maps\dlc_sp_maps.civ5pkg 52b285c37939913e0a5b72933bb06067 GUID Assets\dlc\dlc_sp_maps_2\dlc_sp_maps_2.civ5pkg 16a61e7a2a7bb4bc2d1f677b5bb58ff4 GUID Assets\dlc\dlc_sp_maps_3\dlc_sp_maps_3.civ5pkg 1954db58e0a60b018969c49440fa01ef GUID Assets\dlc\expansion\expansion1.civ5pkg 8bc30c58378345cb0911c5848926f1ff GUID Assets\dlc\expansion2\expansion2.civ5pkg 31dfaa9838c5b051d4c2112ddd9e7eb3 GUID Assets\dlc\shared\upgrade1.civ5pkg e818fa28902977b42ee5e3426f5112e6 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MOV TEMP[0].x, IN[0].xxxx 2: MOV TEMP[0].y, IN[0].yyyy 3: MOV TEMP[1].xy, IN[1].xyxx 4: MOV OUT[1], TEMP[1] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v5, v5 ; F800020F 05050201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 12, 0, 1, 0, v0, v1, v5, v4 ; F80008CF 04050100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %35, float %36, float %37, float %38) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: MOV TEMP[1].w, IMM[0].zzzz 85: TXL TEMP[1], TEMP[1], SAMP[1], 2D 86: MOV TEMP[0], TEMP[1] 87: ELSE :0 88: MOV TEMP[1].xy, IN[1].xyyy 89: TEX TEMP[1], TEMP[1], SAMP[1], 2D 90: MOV TEMP[0], TEMP[1] 91: ENDIF 92: MUL TEMP[0], TEMP[0], IN[0] 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = bitcast float 0.000000e+00 to i32 %151 = insertelement <4 x i32> undef, i32 %148, i32 0 %152 = insertelement <4 x i32> %151, i32 %149, i32 1 %153 = insertelement <4 x i32> %152, i32 %150, i32 2 %154 = insertelement <4 x i32> %153, i32 undef, i32 3 %155 = bitcast <8 x i32> %35 to <32 x i8> %156 = bitcast <4 x i32> %37 to <16 x i8> %157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %154, <32 x i8> %155, <16 x i8> %156, i32 2) br label %ENDIF ELSE: ; preds = %main_body %158 = bitcast float %42 to i32 %159 = bitcast float %43 to i32 %160 = insertelement <2 x i32> undef, i32 %158, i32 0 %161 = insertelement <2 x i32> %160, i32 %159, i32 1 %162 = bitcast <8 x i32> %35 to <32 x i8> %163 = bitcast <4 x i32> %37 to <16 x i8> %164 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %161, <32 x i8> %162, <16 x i8> %163, i32 2) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %164, %ELSE ], [ %157, %IF ] %165 = extractelement <4 x float> %.sink, i32 0 %166 = extractelement <4 x float> %.sink, i32 1 %167 = extractelement <4 x float> %.sink, i32 2 %168 = extractelement <4 x float> %.sink, i32 3 %169 = fmul float %165, %38 %170 = fmul float %166, %39 %171 = fmul float %167, %40 %172 = fmul float %168, %41 %173 = call i32 @llvm.SI.packf16(float %169, float %170) %174 = bitcast i32 %173 to float %175 = call i32 @llvm.SI.packf16(float %171, float %172) %176 = bitcast i32 %175 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %174, float %176, float %174, float %176) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640806 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s24, s[0:3], 0xc ; C20C010C s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109 s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108 s_buffer_load_dword s27, s[0:3], 0x4 ; C20D8104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v7 ; 10000E19 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v8, s25 ; 7E105419 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mul_f32_e32 v1, s26, v6 ; 10020C1A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v12, 0.5, v1 ; 061802F0 v_rcp_f32_e32 v13, s26 ; 7E1A541A v_mul_f32_e32 v8, v13, v12 ; 1010190D v_mov_b32_e32 v10, 0 ; 7E140280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[12:13], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[32:39], s[28:31] ; F0900900 00E80C08 v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v13, v14, 0.5 ; D282000E 03C21D0D v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v12 ; 101E18FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v12, s27, v12, 0.5 ; D282000C 03C2181B v_fract_f32_e32 v13, v12 ; 7E1A410C v_subrev_f32_e32 v12, v13, v12 ; 0A18190D v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_mov_b32_e32 v13, 0x44800000 ; 7E1A02FF 44800000 v_subrev_f32_e32 v13, s27, v13 ; 0A1A1A1B v_cmp_ge_f32_e64 s[28:29], v13, 0 ; D00C001C 0001010D v_cndmask_b32_e64 v13, 0, -1, s[28:29] ; D200080D 00718280 v_cmp_ne_i32_e64 s[28:29], v13, 0 ; D10A001C 0001010D v_cndmask_b32_e64 v12, v14, v12, s[28:29] ; D200000C 1072190E v_rcp_f32_e32 v13, s8 ; 7E1A5408 v_mad_f32 v13, s24, v13, 0.5 ; D282000D 03C21A18 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v12, v13 ; 101C1B0C v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[28:29] ; D200000E 10721B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v12, v15 ; 101E1F0C v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v1, v1, s8, -v14 ; D2820001 84381101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_mul_f32_e32 v8, v14, v1 ; 1010030E v_rcp_f32_e32 v1, v13 ; 7E02550D v_mul_f32_e32 v13, v12, v1 ; 101A030C v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v1, v1, v12, -v13 ; D2820001 84361901 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v9, v1, v0 ; 10120101 image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[12:15] ; F0900F00 00640808 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v0, v2, v11 ; 10001702 v_mul_f32_e32 v1, v3, v10 ; 10021503 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_mul_f32_e32 v1, v4, v9 ; 10021304 v_mul_f32_e32 v2, v5, v8 ; 10041105 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: MOV TEMP[1].w, IMM[0].zzzz 85: TXL TEMP[1], TEMP[1], SAMP[1], 2D 86: MOV TEMP[0], TEMP[1] 87: ELSE :0 88: MOV TEMP[1].xy, IN[1].xyyy 89: TEX TEMP[1], TEMP[1], SAMP[1], 2D 90: MOV TEMP[0], TEMP[1] 91: ENDIF 92: MUL TEMP[0], TEMP[0], IN[0] 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = bitcast float 0.000000e+00 to i32 %151 = insertelement <4 x i32> undef, i32 %148, i32 0 %152 = insertelement <4 x i32> %151, i32 %149, i32 1 %153 = insertelement <4 x i32> %152, i32 %150, i32 2 %154 = insertelement <4 x i32> %153, i32 undef, i32 3 %155 = bitcast <8 x i32> %35 to <32 x i8> %156 = bitcast <4 x i32> %37 to <16 x i8> %157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %154, <32 x i8> %155, <16 x i8> %156, i32 2) br label %ENDIF ELSE: ; preds = %main_body %158 = bitcast float %42 to i32 %159 = bitcast float %43 to i32 %160 = insertelement <2 x i32> undef, i32 %158, i32 0 %161 = insertelement <2 x i32> %160, i32 %159, i32 1 %162 = bitcast <8 x i32> %35 to <32 x i8> %163 = bitcast <4 x i32> %37 to <16 x i8> %164 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %161, <32 x i8> %162, <16 x i8> %163, i32 2) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %164, %ELSE ], [ %157, %IF ] %165 = extractelement <4 x float> %.sink, i32 0 %166 = extractelement <4 x float> %.sink, i32 1 %167 = extractelement <4 x float> %.sink, i32 2 %168 = extractelement <4 x float> %.sink, i32 3 %169 = fmul float %165, %38 %170 = fmul float %166, %39 %171 = fmul float %167, %40 %172 = fmul float %168, %41 %173 = call i32 @llvm.SI.packf16(float %169, float %170) %174 = bitcast i32 %173 to float %175 = call i32 @llvm.SI.packf16(float %171, float %172) %176 = bitcast i32 %175 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %174, float %176, float %174, float %176) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640806 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s24, s[0:3], 0xc ; C20C010C s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109 s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108 s_buffer_load_dword s27, s[0:3], 0x4 ; C20D8104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v7 ; 10000E19 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v8, s25 ; 7E105419 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mul_f32_e32 v1, s26, v6 ; 10020C1A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v12, 0.5, v1 ; 061802F0 v_rcp_f32_e32 v13, s26 ; 7E1A541A v_mul_f32_e32 v8, v13, v12 ; 1010190D v_mov_b32_e32 v10, 0 ; 7E140280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[12:13], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[32:39], s[28:31] ; F0900900 00E80C08 v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v13, v14, 0.5 ; D282000E 03C21D0D v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v12 ; 101E18FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v12, s27, v12, 0.5 ; D282000C 03C2181B v_fract_f32_e32 v13, v12 ; 7E1A410C v_subrev_f32_e32 v12, v13, v12 ; 0A18190D v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_mov_b32_e32 v13, 0x44800000 ; 7E1A02FF 44800000 v_subrev_f32_e32 v13, s27, v13 ; 0A1A1A1B v_cmp_ge_f32_e64 s[28:29], v13, 0 ; D00C001C 0001010D v_cndmask_b32_e64 v13, 0, -1, s[28:29] ; D200080D 00718280 v_cmp_ne_i32_e64 s[28:29], v13, 0 ; D10A001C 0001010D v_cndmask_b32_e64 v12, v14, v12, s[28:29] ; D200000C 1072190E v_rcp_f32_e32 v13, s8 ; 7E1A5408 v_mad_f32 v13, s24, v13, 0.5 ; D282000D 03C21A18 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v12, v13 ; 101C1B0C v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[28:29] ; D200000E 10721B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v12, v15 ; 101E1F0C v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v1, v1, s8, -v14 ; D2820001 84381101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_mul_f32_e32 v8, v14, v1 ; 1010030E v_rcp_f32_e32 v1, v13 ; 7E02550D v_mul_f32_e32 v13, v12, v1 ; 101A030C v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v1, v1, v12, -v13 ; D2820001 84361901 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v9, v1, v0 ; 10120101 image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[12:15] ; F0900F00 00640808 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v0, v2, v11 ; 10001702 v_mul_f32_e32 v1, v3, v10 ; 10021503 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_mul_f32_e32 v1, v4, v9 ; 10021304 v_mul_f32_e32 v2, v5, v8 ; 10041105 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table /media/ssd_plain/sliedes/_steam/steam/SteamApps/common/Sid Meier's Civilization V/steamassets/controller.vdf SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[5] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[4], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[7] 4: ADD TEMP[1].x, CONST[3].xxxx, CONST[3].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[0], CONST[1], IN[1] 7: MUL TEMP[2].w, TEMP[0].wwww, CONST[2].wwww 8: MOV TEMP[2].w, TEMP[2].wwww 9: MOV TEMP[2].xyz, TEMP[0].xyzx 10: MOV OUT[1], TEMP[2] 11: MOV OUT[0], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fmul float %40, %13 %51 = fadd float %50, %15 %52 = fmul float %41, %14 %53 = fadd float %52, %16 %54 = fmul float %53, %28 %55 = fmul float %53, %29 %56 = fmul float %53, %30 %57 = fmul float %53, %31 %58 = fmul float %51, %24 %59 = fadd float %58, %54 %60 = fmul float %51, %25 %61 = fadd float %60, %55 %62 = fmul float %51, %26 %63 = fadd float %62, %56 %64 = fmul float %51, %27 %65 = fadd float %64, %57 %66 = fadd float %59, %32 %67 = fadd float %61, %33 %68 = fadd float %63, %34 %69 = fadd float %65, %35 %70 = fadd float %22, %23 %71 = fmul float %66, %70 %72 = fmul float %67, %70 %73 = fmul float %68, %70 %74 = fmul float %69, %70 %75 = fmul float %17, %46 %76 = fmul float %18, %47 %77 = fmul float %19, %48 %78 = fmul float %20, %49 %79 = fmul float %78, %21 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %77, float %79) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %71, float %72, float %73, float %74) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_cvt_pkrtz_f16_f32_e32 v0, v4, v3 ; 5E000704 exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..24] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: F2I TEMP[0].x, IN[3].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+4].zwzw 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xy, IN[0], CONST[ADDR[0].x+4], TEMP[1] 6: UARL ADDR[0].x, TEMP[0].xxxx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[2], CONST[ADDR[0].x+13].zwzw 9: UARL ADDR[0].x, TEMP[0].xxxx 10: MAD TEMP[2].xy, IN[1], CONST[ADDR[0].x+13], TEMP[2] 11: MOV TEMP[2].xy, TEMP[2].xyxx 12: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 13: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[3] 14: ADD TEMP[0], TEMP[0], CONST[3] 15: ADD TEMP[1].x, CONST[24].xxxx, CONST[24].yyyy 16: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 17: MUL TEMP[0], CONST[22], IN[2] 18: MUL TEMP[3].w, TEMP[0].wwww, CONST[23].wwww 19: MOV TEMP[3].w, TEMP[3].wwww 20: MOV TEMP[3].xyz, TEMP[0].xyzx 21: MOV TEMP[0].xy, IN[3].yzyy 22: MOV TEMP[2].zw, IMM[0].yyxy 23: MOV TEMP[0].zw, IMM[0].yyxy 24: MOV OUT[1], TEMP[3] 25: MOV OUT[2], TEMP[2] 26: MOV OUT[0], TEMP[1] 27: MOV OUT[3], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = fptosi float %56 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 72 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 76 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = bitcast float %60 to i32 %69 = shl i32 %68, 4 %70 = add i32 %69, 64 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %36, %71 %73 = fadd float %72, %64 %74 = shl i32 %68, 4 %75 = add i32 %74, 68 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %37, %76 %78 = fadd float %77, %67 %79 = bitcast float %60 to i32 %80 = shl i32 %79, 4 %81 = add i32 %80, 216 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %79, 4 %84 = add i32 %83, 220 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = bitcast float %60 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 208 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %42, %89 %91 = fadd float %90, %82 %92 = shl i32 %86, 4 %93 = add i32 %92, 212 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fmul float %43, %94 %96 = fadd float %95, %85 %97 = fmul float %78, %17 %98 = fmul float %78, %18 %99 = fmul float %78, %19 %100 = fmul float %78, %20 %101 = fmul float %73, %13 %102 = fadd float %101, %97 %103 = fmul float %73, %14 %104 = fadd float %103, %98 %105 = fmul float %73, %15 %106 = fadd float %105, %99 %107 = fmul float %73, %16 %108 = fadd float %107, %100 %109 = fadd float %102, %21 %110 = fadd float %104, %22 %111 = fadd float %106, %23 %112 = fadd float %108, %24 %113 = fadd float %30, %31 %114 = fmul float %109, %113 %115 = fmul float %110, %113 %116 = fmul float %111, %113 %117 = fmul float %112, %113 %118 = fmul float %25, %48 %119 = fmul float %26, %49 %120 = fmul float %27, %50 %121 = fmul float %28, %51 %122 = fmul float %121, %29 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %118, float %119, float %120, float %122) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %91, float %96, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %57, float %58, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %115, float %116, float %117) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[16:19], 0 idxen ; E00C2000 80040100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s16, s[0:3], 0x5a ; C208015A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s16, v3 ; 100A0610 s_buffer_load_dword s16, s[0:3], 0x59 ; C2080159 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s16, v2 ; 100C0410 s_buffer_load_dword s16, s[0:3], 0x58 ; C2080158 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s16, v1 ; 100E0210 s_buffer_load_dword s16, s[0:3], 0x5b ; C208015B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v4 ; 10020810 s_buffer_load_dword s16, s[0:3], 0x5f ; C208015F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v1 ; 10020210 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v5, v1 ; 7E0A1101 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_add_i32_e32 v6, 0xdc, v5 ; 4A0C0AFF 000000DC buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 v_add_i32_e32 v7, 0xd4, v5 ; 4A0E0AFF 000000D4 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v9, v7, v6 ; D2820006 041A0F09 v_add_i32_e32 v7, 0xd8, v5 ; 4A0E0AFF 000000D8 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_add_i32_e32 v12, 0xd0, v5 ; 4A180AFF 000000D0 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v12, v7 ; D2820007 041E1908 v_mov_b32_e32 v8, 1.0 ; 7E1002F2 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 33, 0, 0, 0, v7, v6, v9, v8 ; F800021F 08090607 exp 15, 34, 0, 0, 0, v2, v3, v9, v8 ; F800022F 08090302 s_waitcnt expcnt(0) ; BF8C070F v_add_i32_e32 v1, 0x48, v5 ; 4A020AFF 00000048 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_add_i32_e32 v2, 64, v5 ; 4A040AC0 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v2, v1 ; D2820000 04060506 v_add_i32_e32 v1, 0x4c, v5 ; 4A020AFF 0000004C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_add_i32_e32 v2, 0x44, v5 ; 4A040AFF 00000044 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v1 ; 10040204 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x61 ; C2020161 s_buffer_load_dword s5, s[0:3], 0x60 ; C2028160 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_add_f32_e32 v3, s5, v3 ; 06060605 v_mul_f32_e32 v2, v3, v2 ; 10040503 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 v_mul_f32_e32 v4, v3, v4 ; 10080903 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v3, v0 ; 10000103 exp 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[0], TEMP[0], IN[2].yyyy 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MAD TEMP[0], IN[2].xxxx, TEMP[1], TEMP[0] 6: MUL TEMP[0], TEMP[0], IN[0] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %38 = bitcast float %34 to i32 %39 = bitcast float %35 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <8 x i32> %27 to <32 x i8> %43 = bitcast <4 x i32> %29 to <16 x i8> %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %42, <16 x i8> %43, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fmul float %45, %37 %50 = fmul float %46, %37 %51 = fmul float %47, %37 %52 = fmul float %48, %37 %53 = bitcast float %34 to i32 %54 = bitcast float %35 to i32 %55 = insertelement <2 x i32> undef, i32 %53, i32 0 %56 = insertelement <2 x i32> %55, i32 %54, i32 1 %57 = bitcast <8 x i32> %23 to <32 x i8> %58 = bitcast <4 x i32> %25 to <16 x i8> %59 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %56, <32 x i8> %57, <16 x i8> %58, i32 2) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = fmul float %36, %60 %65 = fadd float %64, %49 %66 = fmul float %36, %61 %67 = fadd float %66, %50 %68 = fmul float %36, %62 %69 = fadd float %68, %51 %70 = fmul float %36, %63 %71 = fadd float %70, %52 %72 = fmul float %65, %30 %73 = fmul float %67, %31 %74 = fmul float %69, %32 %75 = fmul float %71, %33 %76 = call i32 @llvm.SI.packf16(float %72, float %73) %77 = bitcast i32 %76 to float %78 = call i32 @llvm.SI.packf16(float %74, float %75) %79 = bitcast i32 %78 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %77, float %79, float %77, float %79) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800F00 00450402 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v8, v7 ; 10120F08 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030A02 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v2, v13, v9 ; D2820003 04261B02 v_interp_p1_f32 v9, v0, 3, 0, [m0] ; C8240300 v_interp_p2_f32 v9, [v9], v1, 3, 0, [m0] ; C8250301 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v9, v8, v6 ; 10120D08 v_mad_f32 v9, v2, v12, v9 ; D2820009 04261902 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_mul_f32_e32 v9, v14, v9 ; 1012130E v_cvt_pkrtz_f16_f32_e32 v3, v9, v3 ; 5E060709 v_mul_f32_e32 v9, v8, v5 ; 10120B08 v_mad_f32 v9, v2, v11, v9 ; D2820009 04261702 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_mul_f32_e32 v9, v14, v9 ; 1012130E v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mad_f32 v2, v2, v10, v4 ; D2820002 04121502 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mul_f32_e32 v0, v4, v2 ; 10000504 v_cvt_pkrtz_f16_f32_e32 v0, v0, v9 ; 5E001300 exp 15, 0, 1, 1, 1, v0, v3, v0, v3 ; F8001C0F 03000300 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[4].zwzw, IN[0] 1: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[0], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: ADD TEMP[1].x, CONST[6].xxxx, CONST[6].yyyy 5: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[1].w, CONST[5].wwww, IN[2].wwww 7: MOV TEMP[1].w, TEMP[1].wwww 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV TEMP[1].xyz, IN[2].xyzx 10: MOV TEMP[2].zw, IMM[0].yyxy 11: MOV OUT[1], TEMP[1] 12: MOV OUT[2], TEMP[2] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = add i32 %5, %7 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fadd float %25, %34 %51 = fadd float %26, %35 %52 = fmul float %51, %17 %53 = fmul float %51, %18 %54 = fmul float %51, %19 %55 = fmul float %51, %20 %56 = fmul float %50, %13 %57 = fadd float %56, %52 %58 = fmul float %50, %14 %59 = fadd float %58, %53 %60 = fmul float %50, %15 %61 = fadd float %60, %54 %62 = fmul float %50, %16 %63 = fadd float %62, %55 %64 = fadd float %57, %21 %65 = fadd float %59, %22 %66 = fadd float %61, %23 %67 = fadd float %63, %24 %68 = fadd float %28, %29 %69 = fmul float %64, %68 %70 = fmul float %65, %68 %71 = fmul float %66, %68 %72 = fmul float %67, %68 %73 = fmul float %27, %49 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %70, float %71, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v4 ; 100A0808 exp 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v4, s4, v0 ; 06080004 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v1 ; 06000204 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..4] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 65280.0000} IMM[1] FLT32 {65280.0000, 255.0000, 0.0000, 0.0000} 0: MOV TEMP[0].y, IMM[0].xxxx 1: ADD TEMP[1].x, IMM[0].yyyy, -CONST[2].xxxx 2: MOV TEMP[0].x, TEMP[1].xxxx 3: FSNE TEMP[2].x, CONST[0].xxxx, -CONST[0].xxxx 4: UIF TEMP[2].xxxx :0 5: FSLT TEMP[2].x, IMM[0].xxxx, CONST[1].xxxx 6: UIF TEMP[2].xxxx :0 7: MUL TEMP[2].zw, CONST[3].xyxy, IN[1].xyxy 8: MOV TEMP[0].zw, TEMP[2].wwzw 9: FRC TEMP[2].xy, TEMP[2].zwzw 10: ADD TEMP[3].zw, TEMP[0], -TEMP[2].xyxy 11: MOV TEMP[0].zw, TEMP[3].wwzw 12: MAD TEMP[3].xy, IN[1], CONST[3], -TEMP[3].zwzw 13: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 14: RCP TEMP[5].x, CONST[3].xxxx 15: RCP TEMP[6].x, CONST[3].yyyy 16: MOV TEMP[5].y, TEMP[6].xxxx 17: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[5] 18: MOV TEMP[4].xy, TEMP[4].xyyy 19: MOV TEMP[4].w, IMM[0].zzzz 20: TXL TEMP[4], TEMP[4], SAMP[0], 2D 21: MOV TEMP[5].zw, TEMP[4] 22: DP2 TEMP[6].x, TEMP[4].wxxx, IMM[1].xyyy 23: ADD TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 24: FRC TEMP[7].w, TEMP[6].xxxx 25: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].xxxx 26: MOV TEMP[0].z, TEMP[6].zzzz 27: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].xxxx 28: FRC TEMP[6].z, TEMP[4].wwww 29: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 30: MOV TEMP[0].w, TEMP[4].wwww 31: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 32: FSGE TEMP[6].x, TEMP[1].xxxx, IMM[0].zzzz 33: UIF TEMP[6].xxxx :0 34: MOV TEMP[6].x, TEMP[4].wwww 35: ELSE :0 36: MOV TEMP[6].x, TEMP[4].zzzz 37: ENDIF 38: RCP TEMP[4].x, CONST[1].xxxx 39: MAD TEMP[4].w, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].xxxx 40: FRC TEMP[7].z, TEMP[4].wwww 41: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[7].zzzz 42: MUL TEMP[7].z, TEMP[4].wwww, TEMP[6].xxxx 43: MOV TEMP[8].x, -TEMP[4].wwww 44: FSGE TEMP[7].x, TEMP[7].zzzz, IMM[0].zzzz 45: UIF TEMP[7].xxxx :0 46: MOV TEMP[7].x, TEMP[4].wwww 47: ELSE :0 48: MOV TEMP[7].x, TEMP[8].xxxx 49: ENDIF 50: MOV TEMP[2].z, TEMP[7].xxxx 51: RCP TEMP[8].x, TEMP[7].xxxx 52: MUL TEMP[8].w, TEMP[6].xxxx, TEMP[8].xxxx 53: FRC TEMP[8].w, TEMP[8].wwww 54: MOV TEMP[2].w, TEMP[8].wwww 55: MUL TEMP[7].x, TEMP[8].wwww, TEMP[7].xxxx 56: MOV TEMP[5].x, TEMP[7].xxxx 57: RCP TEMP[4].x, TEMP[4].wwww 58: MUL TEMP[4].y, TEMP[4].xxxx, TEMP[6].xxxx 59: MOV TEMP[5].y, TEMP[4].yyyy 60: FRC TEMP[4].zw, TEMP[5].xyxy 61: MOV TEMP[0].zw, TEMP[4].wwzw 62: ADD TEMP[4].zw, -TEMP[0], TEMP[5].xyxy 63: MOV TEMP[0].zw, TEMP[4].wwzw 64: ADD TEMP[3].zw, TEMP[3].xyxy, TEMP[0] 65: MOV TEMP[0].zw, TEMP[3].wwzw 66: MUL TEMP[3].zw, TEMP[0], CONST[1].xxxx 67: MOV TEMP[0].zw, TEMP[3].wwzw 68: FRC TEMP[3].xy, TEMP[3].zwzw 69: ADD TEMP[3].zw, TEMP[0], -TEMP[3].xyxy 70: MOV TEMP[0].zw, TEMP[3].wwzw 71: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 72: MOV TEMP[0].w, TEMP[3].wwzw 73: RCP TEMP[2].x, CONST[4].xxxx 74: RCP TEMP[4].x, CONST[4].yyyy 75: MOV TEMP[2].y, TEMP[4].xxxx 76: MUL TEMP[3].xy, TEMP[3].zwzw, TEMP[2] 77: MOV TEMP[3].xy, TEMP[3].xyyy 78: MOV TEMP[3].w, IMM[0].zzzz 79: TXL TEMP[3], TEMP[3], SAMP[1], 2D 80: MOV TEMP[2], TEMP[3] 81: MOV TEMP[0].z, TEMP[3].wwww 82: ELSE :0 83: MOV TEMP[3].xy, IN[1].xyyy 84: TEX TEMP[3], TEMP[3], SAMP[1], 2D 85: MOV TEMP[2], TEMP[3] 86: MOV TEMP[0].z, TEMP[3].wwww 87: ENDIF 88: MUL TEMP[3].w, TEMP[0].zzzz, IN[0].wwww 89: MOV TEMP[3].w, TEMP[3].wwww 90: MOV TEMP[3].xyz, IN[0].xyzx 91: ELSE :0 92: FSLT TEMP[4].x, IMM[0].xxxx, CONST[1].xxxx 93: UIF TEMP[4].xxxx :0 94: MUL TEMP[4].zw, CONST[3].xyxy, IN[1].xyxy 95: MOV TEMP[0].zw, TEMP[4].wwzw 96: FRC TEMP[4].xy, TEMP[4].zwzw 97: MOV TEMP[2].xy, TEMP[4].xyxx 98: ADD TEMP[4].zw, TEMP[0], -TEMP[4].xyxy 99: MOV TEMP[0].zw, TEMP[4].wwzw 100: MAD TEMP[4].xy, IN[1], CONST[3], -TEMP[4].zwzw 101: MOV TEMP[2].xy, TEMP[4].xyxx 102: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 103: RCP TEMP[5].x, CONST[3].xxxx 104: RCP TEMP[6].x, CONST[3].yyyy 105: MOV TEMP[5].y, TEMP[6].xxxx 106: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[5] 107: MOV TEMP[4].xy, TEMP[4].xyyy 108: MOV TEMP[4].w, IMM[0].zzzz 109: TXL TEMP[4], TEMP[4], SAMP[0], 2D 110: MOV TEMP[5].zw, TEMP[4] 111: DP2 TEMP[6].x, TEMP[4].wxxx, IMM[1].xyyy 112: ADD TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 113: FRC TEMP[7].w, TEMP[6].xxxx 114: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].xxxx 115: MOV TEMP[0].z, TEMP[6].zzzz 116: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].xxxx 117: FRC TEMP[6].z, TEMP[4].wwww 118: MOV TEMP[2].z, TEMP[6].zzzz 119: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 120: MOV TEMP[0].w, TEMP[4].wwww 121: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 122: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 123: UIF TEMP[1].xxxx :0 124: MOV TEMP[1].x, TEMP[4].wwww 125: ELSE :0 126: MOV TEMP[1].x, TEMP[4].zzzz 127: ENDIF 128: MOV TEMP[0].x, TEMP[1].xxxx 129: RCP TEMP[4].x, CONST[1].xxxx 130: MAD TEMP[4].y, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].xxxx 131: FRC TEMP[6].z, TEMP[4].yyyy 132: ADD TEMP[4].y, -TEMP[6].zzzz, TEMP[4].yyyy 133: MUL TEMP[6].z, TEMP[4].yyyy, TEMP[1].xxxx 134: MOV TEMP[7].x, -TEMP[4].yyyy 135: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].zzzz 136: UIF TEMP[6].xxxx :0 137: MOV TEMP[6].x, TEMP[4].yyyy 138: ELSE :0 139: MOV TEMP[6].x, TEMP[7].xxxx 140: ENDIF 141: MOV TEMP[0].z, TEMP[6].xxxx 142: RCP TEMP[6].x, TEMP[6].xxxx 143: MUL TEMP[1].w, TEMP[6].xxxx, TEMP[1].xxxx 144: FRC TEMP[1].w, TEMP[1].wwww 145: MOV TEMP[0].w, TEMP[1].wwww 146: RCP TEMP[1].x, TEMP[4].yyyy 147: MOV TEMP[0].y, TEMP[1].xxxx 148: MUL TEMP[1].xy, TEMP[0].wyzw, TEMP[0].zxzw 149: MOV TEMP[5].xy, TEMP[1].xyxx 150: FRC TEMP[1].xy, TEMP[5] 151: MOV TEMP[0].xy, TEMP[1].xyxx 152: ADD TEMP[1].xy, -TEMP[0], TEMP[5] 153: MOV TEMP[0].xy, TEMP[1].xyxx 154: ADD TEMP[1].xy, TEMP[2], TEMP[0] 155: MOV TEMP[0].xy, TEMP[1].xyxx 156: MUL TEMP[1].xy, TEMP[0], CONST[1].xxxx 157: MOV TEMP[0].xy, TEMP[1].xyxx 158: FRC TEMP[1].zw, TEMP[1].xyxy 159: MOV TEMP[0].zw, TEMP[1].wwzw 160: ADD TEMP[1].xy, -TEMP[1].zwzw, TEMP[0] 161: MOV TEMP[0].xy, TEMP[1].xyxx 162: ADD TEMP[1].xy, TEMP[0], IMM[0].xxxx 163: MOV TEMP[0].xy, TEMP[1].xyxx 164: RCP TEMP[2].x, CONST[4].xxxx 165: RCP TEMP[1].x, CONST[4].yyyy 166: MOV TEMP[2].y, TEMP[1].xxxx 167: MUL TEMP[1].xy, TEMP[0], TEMP[2] 168: MOV TEMP[1].xy, TEMP[1].xyyy 169: MOV TEMP[1].w, IMM[0].zzzz 170: TXL TEMP[1], TEMP[1], SAMP[1], 2D 171: MOV TEMP[0].xyz, TEMP[1] 172: MOV TEMP[2].x, TEMP[1].wwww 173: ELSE :0 174: MOV TEMP[1].xy, IN[1].xyyy 175: TEX TEMP[1], TEMP[1], SAMP[1], 2D 176: MOV TEMP[0].xyz, TEMP[1] 177: MOV TEMP[2].x, TEMP[1].wwww 178: ENDIF 179: MUL TEMP[1].w, TEMP[2].xxxx, IN[0].wwww 180: MOV TEMP[0].w, TEMP[1].wwww 181: MOV TEMP[3], TEMP[0] 182: ENDIF 183: MOV OUT[0], TEMP[3] 184: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %31 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %32 = load <8 x i32> addrspace(2)* %31, !tbaa !0 %33 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %34 = load <4 x i32> addrspace(2)* %33, !tbaa !0 %35 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %36 = load <8 x i32> addrspace(2)* %35, !tbaa !0 %37 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %38 = load <4 x i32> addrspace(2)* %37, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = fsub float -0.000000e+00, %26 %46 = fadd float 1.024000e+03, %45 %47 = fsub float -0.000000e+00, %24 %48 = fcmp une float %24, %47 %49 = sext i1 %48 to i32 %50 = bitcast i32 %49 to float %51 = bitcast float %50 to i32 %52 = icmp ne i32 %51, 0 %53 = fcmp olt float 5.000000e-01, %25 %54 = sext i1 %53 to i32 %55 = bitcast i32 %54 to float %56 = bitcast float %55 to i32 %57 = icmp ne i32 %56, 0 br i1 %52, label %IF, label %ELSE IF: ; preds = %main_body br i1 %57, label %IF37, label %ELSE38 ELSE: ; preds = %main_body br i1 %57, label %IF46, label %ELSE47 ENDIF: ; preds = %IF37, %ELSE38, %ENDIF45 %.sink56.sink = phi <4 x float> [ %.sink56, %ENDIF45 ], [ %172, %ELSE38 ], [ %165, %IF37 ] %temp12.0 = phi float [ %282, %ENDIF45 ], [ %39, %ELSE38 ], [ %39, %IF37 ] %temp13.0 = phi float [ %283, %ENDIF45 ], [ %40, %ELSE38 ], [ %40, %IF37 ] %temp14.0 = phi float [ %284, %ENDIF45 ], [ %41, %ELSE38 ], [ %41, %IF37 ] %58 = extractelement <4 x float> %.sink56.sink, i32 3 %59 = fmul float %58, %42 %60 = call i32 @llvm.SI.packf16(float %temp12.0, float %temp13.0) %61 = bitcast i32 %60 to float %62 = call i32 @llvm.SI.packf16(float %temp14.0, float %59) %63 = bitcast i32 %62 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %61, float %63, float %61, float %63) ret void IF37: ; preds = %IF %64 = fmul float %27, %43 %65 = fmul float %28, %44 %66 = call float @llvm.AMDIL.fraction.(float %64) %67 = call float @llvm.AMDIL.fraction.(float %65) %68 = fsub float -0.000000e+00, %66 %69 = fadd float %64, %68 %70 = fsub float -0.000000e+00, %67 %71 = fadd float %65, %70 %72 = fsub float -0.000000e+00, %69 %73 = fmul float %43, %27 %74 = fadd float %73, %72 %75 = fsub float -0.000000e+00, %71 %76 = fmul float %44, %28 %77 = fadd float %76, %75 %78 = fadd float %69, 5.000000e-01 %79 = fadd float %71, 5.000000e-01 %80 = fdiv float 1.000000e+00, %27 %81 = fdiv float 1.000000e+00, %28 %82 = fmul float %78, %80 %83 = fmul float %79, %81 %84 = bitcast float %82 to i32 %85 = bitcast float %83 to i32 %86 = bitcast float 0.000000e+00 to i32 %87 = insertelement <4 x i32> undef, i32 %84, i32 0 %88 = insertelement <4 x i32> %87, i32 %85, i32 1 %89 = insertelement <4 x i32> %88, i32 %86, i32 2 %90 = insertelement <4 x i32> %89, i32 undef, i32 3 %91 = bitcast <8 x i32> %32 to <32 x i8> %92 = bitcast <4 x i32> %34 to <16 x i8> %93 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %90, <32 x i8> %91, <16 x i8> %92, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 3 %96 = fmul float %95, 6.528000e+04 %97 = fmul float %94, 2.550000e+02 %98 = fadd float %96, %97 %99 = fadd float %98, 5.000000e-01 %100 = call float @llvm.AMDIL.fraction.(float %99) %101 = fsub float -0.000000e+00, %100 %102 = fadd float %101, %99 %103 = fmul float %26, %94 %104 = fadd float %103, 5.000000e-01 %105 = call float @llvm.AMDIL.fraction.(float %104) %106 = fsub float -0.000000e+00, %105 %107 = fadd float %104, %106 %108 = fadd float %102, 5.000000e-01 %109 = fadd float %107, 5.000000e-01 %110 = fcmp oge float %46, 0.000000e+00 %111 = sext i1 %110 to i32 %112 = bitcast i32 %111 to float %113 = bitcast float %112 to i32 %114 = icmp ne i32 %113, 0 %. = select i1 %114, float %109, float %108 %115 = fdiv float 1.000000e+00, %25 %116 = fmul float %29, %115 %117 = fadd float %116, 5.000000e-01 %118 = call float @llvm.AMDIL.fraction.(float %117) %119 = fsub float -0.000000e+00, %118 %120 = fadd float %117, %119 %121 = fmul float %120, %. %122 = fsub float -0.000000e+00, %120 %123 = fcmp oge float %121, 0.000000e+00 %124 = sext i1 %123 to i32 %125 = bitcast i32 %124 to float %126 = bitcast float %125 to i32 %127 = icmp ne i32 %126, 0 %temp28.0 = select i1 %127, float %120, float %122 %128 = fdiv float 1.000000e+00, %temp28.0 %129 = fmul float %., %128 %130 = call float @llvm.AMDIL.fraction.(float %129) %131 = fmul float %130, %temp28.0 %132 = fdiv float 1.000000e+00, %120 %133 = fmul float %132, %. %134 = call float @llvm.AMDIL.fraction.(float %131) %135 = call float @llvm.AMDIL.fraction.(float %133) %136 = fsub float -0.000000e+00, %134 %137 = fadd float %136, %131 %138 = fsub float -0.000000e+00, %135 %139 = fadd float %138, %133 %140 = fadd float %74, %137 %141 = fadd float %77, %139 %142 = fmul float %140, %25 %143 = fmul float %141, %25 %144 = call float @llvm.AMDIL.fraction.(float %142) %145 = call float @llvm.AMDIL.fraction.(float %143) %146 = fsub float -0.000000e+00, %144 %147 = fadd float %142, %146 %148 = fsub float -0.000000e+00, %145 %149 = fadd float %143, %148 %150 = fadd float %147, 5.000000e-01 %151 = fadd float %149, 5.000000e-01 %152 = fdiv float 1.000000e+00, %29 %153 = fdiv float 1.000000e+00, %30 %154 = fmul float %150, %152 %155 = fmul float %151, %153 %156 = bitcast float %154 to i32 %157 = bitcast float %155 to i32 %158 = bitcast float 0.000000e+00 to i32 %159 = insertelement <4 x i32> undef, i32 %156, i32 0 %160 = insertelement <4 x i32> %159, i32 %157, i32 1 %161 = insertelement <4 x i32> %160, i32 %158, i32 2 %162 = insertelement <4 x i32> %161, i32 undef, i32 3 %163 = bitcast <8 x i32> %36 to <32 x i8> %164 = bitcast <4 x i32> %38 to <16 x i8> %165 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %162, <32 x i8> %163, <16 x i8> %164, i32 2) br label %ENDIF ELSE38: ; preds = %IF %166 = bitcast float %43 to i32 %167 = bitcast float %44 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %36 to <32 x i8> %171 = bitcast <4 x i32> %38 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) br label %ENDIF IF46: ; preds = %ELSE %173 = fmul float %27, %43 %174 = fmul float %28, %44 %175 = call float @llvm.AMDIL.fraction.(float %173) %176 = call float @llvm.AMDIL.fraction.(float %174) %177 = fsub float -0.000000e+00, %175 %178 = fadd float %173, %177 %179 = fsub float -0.000000e+00, %176 %180 = fadd float %174, %179 %181 = fsub float -0.000000e+00, %178 %182 = fmul float %43, %27 %183 = fadd float %182, %181 %184 = fsub float -0.000000e+00, %180 %185 = fmul float %44, %28 %186 = fadd float %185, %184 %187 = fadd float %178, 5.000000e-01 %188 = fadd float %180, 5.000000e-01 %189 = fdiv float 1.000000e+00, %27 %190 = fdiv float 1.000000e+00, %28 %191 = fmul float %187, %189 %192 = fmul float %188, %190 %193 = bitcast float %191 to i32 %194 = bitcast float %192 to i32 %195 = bitcast float 0.000000e+00 to i32 %196 = insertelement <4 x i32> undef, i32 %193, i32 0 %197 = insertelement <4 x i32> %196, i32 %194, i32 1 %198 = insertelement <4 x i32> %197, i32 %195, i32 2 %199 = insertelement <4 x i32> %198, i32 undef, i32 3 %200 = bitcast <8 x i32> %32 to <32 x i8> %201 = bitcast <4 x i32> %34 to <16 x i8> %202 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %199, <32 x i8> %200, <16 x i8> %201, i32 2) %203 = extractelement <4 x float> %202, i32 0 %204 = extractelement <4 x float> %202, i32 3 %205 = fmul float %204, 6.528000e+04 %206 = fmul float %203, 2.550000e+02 %207 = fadd float %205, %206 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %210, %208 %212 = fmul float %26, %203 %213 = fadd float %212, 5.000000e-01 %214 = call float @llvm.AMDIL.fraction.(float %213) %215 = fsub float -0.000000e+00, %214 %216 = fadd float %213, %215 %217 = fadd float %211, 5.000000e-01 %218 = fadd float %216, 5.000000e-01 %219 = fcmp oge float %46, 0.000000e+00 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 %.55 = select i1 %223, float %218, float %217 %224 = fdiv float 1.000000e+00, %25 %225 = fmul float %29, %224 %226 = fadd float %225, 5.000000e-01 %227 = call float @llvm.AMDIL.fraction.(float %226) %228 = fsub float -0.000000e+00, %227 %229 = fadd float %228, %226 %230 = fmul float %229, %.55 %231 = fsub float -0.000000e+00, %229 %232 = fcmp oge float %230, 0.000000e+00 %233 = sext i1 %232 to i32 %234 = bitcast i32 %233 to float %235 = bitcast float %234 to i32 %236 = icmp ne i32 %235, 0 %temp24.1 = select i1 %236, float %229, float %231 %237 = fdiv float 1.000000e+00, %temp24.1 %238 = fmul float %237, %.55 %239 = call float @llvm.AMDIL.fraction.(float %238) %240 = fdiv float 1.000000e+00, %229 %241 = fmul float %239, %temp24.1 %242 = fmul float %240, %.55 %243 = call float @llvm.AMDIL.fraction.(float %241) %244 = call float @llvm.AMDIL.fraction.(float %242) %245 = fsub float -0.000000e+00, %243 %246 = fadd float %245, %241 %247 = fsub float -0.000000e+00, %244 %248 = fadd float %247, %242 %249 = fadd float %183, %246 %250 = fadd float %186, %248 %251 = fmul float %249, %25 %252 = fmul float %250, %25 %253 = call float @llvm.AMDIL.fraction.(float %251) %254 = call float @llvm.AMDIL.fraction.(float %252) %255 = fsub float -0.000000e+00, %253 %256 = fadd float %255, %251 %257 = fsub float -0.000000e+00, %254 %258 = fadd float %257, %252 %259 = fadd float %256, 5.000000e-01 %260 = fadd float %258, 5.000000e-01 %261 = fdiv float 1.000000e+00, %29 %262 = fdiv float 1.000000e+00, %30 %263 = fmul float %259, %261 %264 = fmul float %260, %262 %265 = bitcast float %263 to i32 %266 = bitcast float %264 to i32 %267 = bitcast float 0.000000e+00 to i32 %268 = insertelement <4 x i32> undef, i32 %265, i32 0 %269 = insertelement <4 x i32> %268, i32 %266, i32 1 %270 = insertelement <4 x i32> %269, i32 %267, i32 2 %271 = insertelement <4 x i32> %270, i32 undef, i32 3 %272 = bitcast <8 x i32> %36 to <32 x i8> %273 = bitcast <4 x i32> %38 to <16 x i8> %274 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %271, <32 x i8> %272, <16 x i8> %273, i32 2) br label %ENDIF45 ELSE47: ; preds = %ELSE %275 = bitcast float %43 to i32 %276 = bitcast float %44 to i32 %277 = insertelement <2 x i32> undef, i32 %275, i32 0 %278 = insertelement <2 x i32> %277, i32 %276, i32 1 %279 = bitcast <8 x i32> %36 to <32 x i8> %280 = bitcast <4 x i32> %38 to <16 x i8> %281 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %278, <32 x i8> %279, <16 x i8> %280, i32 2) br label %ENDIF45 ENDIF45: ; preds = %ELSE47, %IF46 %.sink56 = phi <4 x float> [ %281, %ELSE47 ], [ %274, %IF46 ] %282 = extractelement <4 x float> %.sink56, i32 0 %283 = extractelement <4 x float> %.sink56, i32 1 %284 = extractelement <4 x float> %.sink56, i32 2 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 s_load_dwordx4 s[32:35], s[2:3], 0x0 ; C0900300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[32:35], 0x0 ; C2002100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_neq_f32_e64 s[0:1], s0, -s0 ; D01A0000 40000000 v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; D2000000 00018280 v_cmp_eq_i32_e64 s[36:37], v0, 0 ; D1040024 00010100 s_buffer_load_dword s0, s[32:35], 0x4 ; C2002104 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[2:3], s0, 0.5 ; D0080002 0001E000 v_cndmask_b32_e64 v1, 0, -1, s[2:3] ; D2000801 00098280 s_buffer_load_dword s1, s[32:35], 0x8 ; C200A108 v_mov_b32_e32 v0, 0x44800000 ; 7E0002FF 44800000 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s1, v0 ; 0A000001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_buffer_load_dword s2, s[32:35], 0x11 ; C2012111 s_buffer_load_dword s3, s[32:35], 0x10 ; C201A110 s_buffer_load_dword s6, s[32:35], 0xd ; C203210D s_buffer_load_dword s7, s[32:35], 0xc ; C203A10C s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[4:5], s[36:37] ; BE842424 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_11 ; BF880000 v_cmp_eq_i32_e64 s[32:33], v1, 0 ; D1040020 00010101 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[12:15] ; F0800F00 00660803 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[32:33], s[32:33] ; BEA02520 s_xor_b64 exec, exec, s[32:33] ; 89FE207E s_cbranch_execz BB0_12 ; BF880000 v_mul_f32_e32 v8, s6, v4 ; 10100806 v_fract_f32_e32 v8, v8 ; 7E104108 v_mad_f32 v8, s6, v4, -v8 ; D2820008 84220806 v_add_f32_e32 v9, 0.5, v8 ; 061210F0 v_rcp_f32_e32 v10, s6 ; 7E145406 v_mul_f32_e32 v10, v10, v9 ; 1014130A v_mul_f32_e32 v13, s7, v3 ; 101A0607 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v13, s7, v3, -v13 ; D282000D 84360607 v_add_f32_e32 v14, 0.5, v13 ; 061C1AF0 v_rcp_f32_e32 v15, s7 ; 7E1E5407 v_mul_f32_e32 v9, v15, v14 ; 10121D0F v_mov_b32_e32 v11, 0 ; 7E160280 image_sample_l v[14:15], 9, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[16:23], s[8:11] ; F0900900 00440E09 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, 0x437f0000, v14 ; 10201CFF 437F0000 v_mov_b32_e32 v17, 0x477f0000 ; 7E2202FF 477F0000 v_mad_f32 v16, v17, v15, v16 ; D2820010 04421F11 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_fract_f32_e32 v17, v16 ; 7E224110 v_subrev_f32_e32 v16, v17, v16 ; 0A202111 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_mad_f32 v14, s1, v14, 0.5 ; D282000E 03C21C01 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_cmp_ge_f32_e64 s[34:35], v0, 0 ; D00C0022 00010100 v_cndmask_b32_e64 v15, 0, -1, s[34:35] ; D200080F 00898280 v_cmp_ne_i32_e64 s[34:35], v15, 0 ; D10A0022 0001010F v_cndmask_b32_e64 v14, v16, v14, s[34:35] ; D200000E 008A1D10 v_rcp_f32_e32 v15, s0 ; 7E1E5400 v_mad_f32 v15, s3, v15, 0.5 ; D282000F 03C21E03 v_fract_f32_e32 v16, v15 ; 7E20410F v_subrev_f32_e32 v15, v16, v15 ; 0A1E1F10 v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_cmp_ge_f32_e64 s[34:35], v16, 0 ; D00C0022 00010110 v_cndmask_b32_e64 v16, 0, -1, s[34:35] ; D2000010 00898280 v_cmp_ne_i32_e64 s[34:35], v16, 0 ; D10A0022 00010110 v_xor_b32_e32 v16, 0x80000000, v15 ; 3A201EFF 80000000 v_cndmask_b32_e64 v16, v16, v15, s[34:35] ; D2000010 008A1F10 v_rcp_f32_e32 v17, v16 ; 7E225510 v_mul_f32_e32 v17, v14, v17 ; 1022230E v_fract_f32_e32 v17, v17 ; 7E224111 v_mul_f32_e32 v18, v16, v17 ; 10242310 v_fract_f32_e32 v18, v18 ; 7E244112 v_mad_f32 v16, v17, v16, -v18 ; D2820010 844A2111 v_mad_f32 v13, s7, v3, -v13 ; D282000D 84360607 v_add_f32_e32 v13, v16, v13 ; 061A1B10 v_mul_f32_e32 v16, s0, v13 ; 10201A00 v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v13, v13, s0, -v16 ; D282000D 8440010D v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0 v_rcp_f32_e32 v16, s3 ; 7E205403 v_mul_f32_e32 v9, v16, v13 ; 10121B10 v_rcp_f32_e32 v13, v15 ; 7E1A550F v_mul_f32_e32 v15, v14, v13 ; 101E1B0E v_fract_f32_e32 v15, v15 ; 7E1E410F v_mad_f32 v13, v13, v14, -v15 ; D282000D 843E1D0D v_mad_f32 v8, s6, v4, -v8 ; D2820008 84220806 v_add_f32_e32 v8, v13, v8 ; 0610110D v_mul_f32_e32 v13, s0, v8 ; 101A1000 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v8, v8, s0, -v13 ; D2820008 84340108 v_add_f32_e32 v8, 0.5, v8 ; 061010F0 v_rcp_f32_e32 v13, s2 ; 7E1A5402 v_mul_f32_e32 v10, v13, v8 ; 1014110D image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[24:31], s[12:15] ; F0900F00 00660809 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mov_b32_e32 v12, v8 ; 7E180308 v_mov_b32_e32 v13, v9 ; 7E1A0309 v_mov_b32_e32 v14, v10 ; 7E1C030A v_mov_b32_e32 v15, v11 ; 7E1E030B s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E s_cbranch_execz BB0_4 ; BF880000 v_cmp_eq_i32_e64 s[32:33], v1, 0 ; D1040020 00010101 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[12:15] ; F0800F00 00660C03 v_mov_b32_e32 v10, v5 ; 7E140305 v_mov_b32_e32 v9, v6 ; 7E120306 v_mov_b32_e32 v8, v7 ; 7E100307 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[32:33], s[32:33] ; BEA02520 s_waitcnt expcnt(0) ; BF8C070F s_xor_b64 exec, exec, s[32:33] ; 89FE207E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v1, s6, v4 ; 10020806 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s6, v4, -v1 ; D2820001 84060806 v_add_f32_e32 v8, 0.5, v1 ; 061002F0 v_rcp_f32_e32 v9, s6 ; 7E125406 v_mul_f32_e32 v9, v9, v8 ; 10121109 v_mul_f32_e32 v12, s7, v3 ; 10180607 v_fract_f32_e32 v12, v12 ; 7E18410C v_mad_f32 v12, s7, v3, -v12 ; D282000C 84320607 v_add_f32_e32 v13, 0.5, v12 ; 061A18F0 v_rcp_f32_e32 v14, s7 ; 7E1C5407 v_mul_f32_e32 v8, v14, v13 ; 10101B0E v_mov_b32_e32 v10, 0 ; 7E140280 image_sample_l v[13:14], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[8:11] ; F0900900 00440D08 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, 0x437f0000, v13 ; 101E1AFF 437F0000 v_mov_b32_e32 v16, 0x477f0000 ; 7E2002FF 477F0000 v_mad_f32 v15, v16, v14, v15 ; D282000F 043E1D10 v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0 v_fract_f32_e32 v16, v15 ; 7E20410F v_subrev_f32_e32 v15, v16, v15 ; 0A1E1F10 v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0 v_mad_f32 v13, s1, v13, 0.5 ; D282000D 03C21A01 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0 v_cmp_ge_f32_e64 s[34:35], v0, 0 ; D00C0022 00010100 v_cndmask_b32_e64 v0, 0, -1, s[34:35] ; D2000000 00898280 v_cmp_ne_i32_e64 s[34:35], v0, 0 ; D10A0022 00010100 v_cndmask_b32_e64 v0, v15, v13, s[34:35] ; D2000000 188A1B0F v_rcp_f32_e32 v13, s0 ; 7E1A5400 v_mad_f32 v13, s3, v13, 0.5 ; D282000D 03C21A03 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v0, v13 ; 101C1B00 v_cmp_ge_f32_e64 s[34:35], v14, 0 ; D00C0022 0001010E v_cndmask_b32_e64 v14, 0, -1, s[34:35] ; D200000E 00898280 v_cmp_ne_i32_e64 s[34:35], v14, 0 ; D10A0022 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[34:35] ; D200000E 108A1B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v15, v0 ; 101E010F v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v12, s7, v3, -v12 ; D282000C 84320607 v_add_f32_e32 v12, v14, v12 ; 0618190E v_mul_f32_e32 v14, s0, v12 ; 101C1800 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v12, v12, s0, -v14 ; D282000C 8438010C v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_rcp_f32_e32 v14, s3 ; 7E1C5403 v_mul_f32_e32 v8, v14, v12 ; 1010190E v_rcp_f32_e32 v12, v13 ; 7E18550D v_mul_f32_e32 v13, v0, v12 ; 101A1900 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v0, v12, v0, -v13 ; D2820000 8436010C v_mad_f32 v1, s6, v4, -v1 ; D2820001 84060806 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v1, s0, v0 ; 10020000 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s0, -v1 ; D2820000 84040100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s2 ; 7E025402 v_mul_f32_e32 v9, v1, v0 ; 10120101 image_sample_l v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[24:31], s[12:15] ; F0900F00 00660C08 v_mov_b32_e32 v10, v5 ; 7E140305 v_mov_b32_e32 v9, v6 ; 7E120306 v_mov_b32_e32 v8, v7 ; 7E100307 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[32:33] ; 88FE207E s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 v_mul_f32_e32 v1, v2, v15 ; 10021F02 v_cvt_pkrtz_f16_f32_e32 v1, v10, v1 ; 5E02030A exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..24] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: F2I TEMP[0].x, IN[3].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+4].zwzw 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xy, IN[0], CONST[ADDR[0].x+4], TEMP[1] 6: UARL ADDR[0].x, TEMP[0].xxxx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[2], CONST[ADDR[0].x+13].zwzw 9: UARL ADDR[0].x, TEMP[0].xxxx 10: MAD TEMP[2].xy, IN[1], CONST[ADDR[0].x+13], TEMP[2] 11: MOV TEMP[2].xy, TEMP[2].xyxx 12: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 13: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[3] 14: ADD TEMP[0], TEMP[0], CONST[3] 15: ADD TEMP[1].x, CONST[24].xxxx, CONST[24].yyyy 16: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 17: MUL TEMP[0], CONST[22], IN[2] 18: MUL TEMP[3].w, TEMP[0].wwww, CONST[23].wwww 19: MOV TEMP[3].w, TEMP[3].wwww 20: MOV TEMP[3].xyz, TEMP[0].xyzx 21: MOV TEMP[0].xy, IN[3].yzyy 22: MOV TEMP[2].zw, IMM[0].yyxy 23: MOV TEMP[0].zw, IMM[0].yyxy 24: MOV OUT[1], TEMP[3] 25: MOV OUT[2], TEMP[2] 26: MOV OUT[0], TEMP[1] 27: MOV OUT[3], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = fptosi float %56 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 72 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 76 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = bitcast float %60 to i32 %69 = shl i32 %68, 4 %70 = add i32 %69, 64 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %36, %71 %73 = fadd float %72, %64 %74 = shl i32 %68, 4 %75 = add i32 %74, 68 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %37, %76 %78 = fadd float %77, %67 %79 = bitcast float %60 to i32 %80 = shl i32 %79, 4 %81 = add i32 %80, 216 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %79, 4 %84 = add i32 %83, 220 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = bitcast float %60 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 208 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %42, %89 %91 = fadd float %90, %82 %92 = shl i32 %86, 4 %93 = add i32 %92, 212 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fmul float %43, %94 %96 = fadd float %95, %85 %97 = fmul float %78, %17 %98 = fmul float %78, %18 %99 = fmul float %78, %19 %100 = fmul float %78, %20 %101 = fmul float %73, %13 %102 = fadd float %101, %97 %103 = fmul float %73, %14 %104 = fadd float %103, %98 %105 = fmul float %73, %15 %106 = fadd float %105, %99 %107 = fmul float %73, %16 %108 = fadd float %107, %100 %109 = fadd float %102, %21 %110 = fadd float %104, %22 %111 = fadd float %106, %23 %112 = fadd float %108, %24 %113 = fadd float %30, %31 %114 = fmul float %109, %113 %115 = fmul float %110, %113 %116 = fmul float %111, %113 %117 = fmul float %112, %113 %118 = fmul float %25, %48 %119 = fmul float %26, %49 %120 = fmul float %27, %50 %121 = fmul float %28, %51 %122 = fmul float %121, %29 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %118, float %119, float %120, float %122) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %91, float %96, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %57, float %58, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %115, float %116, float %117) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[16:19], 0 idxen ; E00C2000 80040100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s16, s[0:3], 0x5a ; C208015A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s16, v3 ; 100A0610 s_buffer_load_dword s16, s[0:3], 0x59 ; C2080159 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s16, v2 ; 100C0410 s_buffer_load_dword s16, s[0:3], 0x58 ; C2080158 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s16, v1 ; 100E0210 s_buffer_load_dword s16, s[0:3], 0x5b ; C208015B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v4 ; 10020810 s_buffer_load_dword s16, s[0:3], 0x5f ; C208015F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v1 ; 10020210 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v5, v1 ; 7E0A1101 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_add_i32_e32 v6, 0xdc, v5 ; 4A0C0AFF 000000DC buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 v_add_i32_e32 v7, 0xd4, v5 ; 4A0E0AFF 000000D4 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v9, v7, v6 ; D2820006 041A0F09 v_add_i32_e32 v7, 0xd8, v5 ; 4A0E0AFF 000000D8 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_add_i32_e32 v12, 0xd0, v5 ; 4A180AFF 000000D0 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v12, v7 ; D2820007 041E1908 v_mov_b32_e32 v8, 1.0 ; 7E1002F2 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 33, 0, 0, 0, v7, v6, v9, v8 ; F800021F 08090607 exp 15, 34, 0, 0, 0, v2, v3, v9, v8 ; F800022F 08090302 s_waitcnt expcnt(0) ; BF8C070F v_add_i32_e32 v1, 0x48, v5 ; 4A020AFF 00000048 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_add_i32_e32 v2, 64, v5 ; 4A040AC0 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v2, v1 ; D2820000 04060506 v_add_i32_e32 v1, 0x4c, v5 ; 4A020AFF 0000004C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_add_i32_e32 v2, 0x44, v5 ; 4A040AFF 00000044 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v1 ; 10040204 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x61 ; C2020161 s_buffer_load_dword s5, s[0:3], 0x60 ; C2028160 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_add_f32_e32 v3, s5, v3 ; 06060605 v_mul_f32_e32 v2, v3, v2 ; 10040503 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 v_mul_f32_e32 v4, v3, v4 ; 10080903 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v3, v0 ; 10000103 exp 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL IN[3], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[4] DCL CONST[0..1] DCL TEMP[0] DCL TEMP[1..5], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[4].xxxx, CONST[4].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[1].xyxx, CONST[1].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3].xyz, CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[1].xy, IN[2].xyyy 33: TEX TEMP[1], TEMP[1], SAMP[1], 2D 34: MUL TEMP[3], TEMP[1], IN[3].yyyy 35: MOV TEMP[1].xy, IN[2].xyyy 36: TEX TEMP[1], TEMP[1], SAMP[0], 2D 37: MAD TEMP[3], IN[3].xxxx, TEMP[1], TEMP[3] 38: MUL TEMP[1], TEMP[3], IN[1] 39: MOV OUT[0], TEMP[1] 40: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %50 = fmul float %15, %32 %51 = fadd float %50, %33 %52 = fmul float %14, %28 %53 = fadd float %52, %30 %54 = fmul float %51, %29 %55 = fadd float %54, %31 %56 = fsub float -0.000000e+00, %24 %57 = fsub float -0.000000e+00, %26 %58 = fadd float %56, %53 %59 = fadd float %56, %53 %60 = fadd float %56, %53 %61 = fcmp olt float %58, 0.000000e+00 %62 = sext i1 %61 to i32 %63 = fcmp olt float %59, 0.000000e+00 %64 = sext i1 %63 to i32 %65 = fcmp olt float %60, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = bitcast i32 %62 to float %68 = bitcast i32 %64 to float %69 = bitcast i32 %66 to float %70 = bitcast float %67 to i32 %71 = bitcast float %69 to i32 %72 = or i32 %70, %71 %73 = bitcast i32 %72 to float %74 = bitcast float %73 to i32 %75 = bitcast float %68 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = icmp ne i32 %78, 0 br i1 %79, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %80 = fsub float -0.000000e+00, %53 %81 = fadd float %25, %80 %82 = fsub float -0.000000e+00, %53 %83 = fadd float %25, %82 %84 = fsub float -0.000000e+00, %53 %85 = fadd float %25, %84 %86 = fcmp olt float %81, 0.000000e+00 %87 = sext i1 %86 to i32 %88 = fcmp olt float %83, 0.000000e+00 %89 = sext i1 %88 to i32 %90 = fcmp olt float %85, 0.000000e+00 %91 = sext i1 %90 to i32 %92 = bitcast i32 %87 to float %93 = bitcast i32 %89 to float %94 = bitcast i32 %91 to float %95 = bitcast float %92 to i32 %96 = bitcast float %94 to i32 %97 = or i32 %95, %96 %98 = bitcast i32 %97 to float %99 = bitcast float %98 to i32 %100 = bitcast float %93 to i32 %101 = or i32 %99, %100 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = icmp ne i32 %103, 0 br i1 %104, label %IF25, label %ENDIF24 IF25: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF24 ENDIF24: ; preds = %ENDIF, %IF25 %105 = fadd float %57, %55 %106 = fadd float %57, %55 %107 = fadd float %57, %55 %108 = fcmp olt float %105, 0.000000e+00 %109 = sext i1 %108 to i32 %110 = fcmp olt float %106, 0.000000e+00 %111 = sext i1 %110 to i32 %112 = fcmp olt float %107, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = bitcast i32 %109 to float %115 = bitcast i32 %111 to float %116 = bitcast i32 %113 to float %117 = bitcast float %114 to i32 %118 = bitcast float %116 to i32 %119 = or i32 %117, %118 %120 = bitcast i32 %119 to float %121 = bitcast float %120 to i32 %122 = bitcast float %115 to i32 %123 = or i32 %121, %122 %124 = bitcast i32 %123 to float %125 = bitcast float %124 to i32 %126 = icmp ne i32 %125, 0 br i1 %126, label %IF28, label %ENDIF27 IF28: ; preds = %ENDIF24 call void @llvm.AMDGPU.kilp() br label %ENDIF27 ENDIF27: ; preds = %ENDIF24, %IF28 %127 = fsub float -0.000000e+00, %55 %128 = fadd float %27, %127 %129 = fsub float -0.000000e+00, %55 %130 = fadd float %27, %129 %131 = fsub float -0.000000e+00, %55 %132 = fadd float %27, %131 %133 = fcmp olt float %128, 0.000000e+00 %134 = sext i1 %133 to i32 %135 = fcmp olt float %130, 0.000000e+00 %136 = sext i1 %135 to i32 %137 = fcmp olt float %132, 0.000000e+00 %138 = sext i1 %137 to i32 %139 = bitcast i32 %134 to float %140 = bitcast i32 %136 to float %141 = bitcast i32 %138 to float %142 = bitcast float %139 to i32 %143 = bitcast float %141 to i32 %144 = or i32 %142, %143 %145 = bitcast i32 %144 to float %146 = bitcast float %145 to i32 %147 = bitcast float %140 to i32 %148 = or i32 %146, %147 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = icmp ne i32 %150, 0 br i1 %151, label %IF31, label %ENDIF30 IF31: ; preds = %ENDIF27 call void @llvm.AMDGPU.kilp() br label %ENDIF30 ENDIF30: ; preds = %ENDIF27, %IF31 %152 = bitcast float %46 to i32 %153 = bitcast float %47 to i32 %154 = insertelement <2 x i32> undef, i32 %152, i32 0 %155 = insertelement <2 x i32> %154, i32 %153, i32 1 %156 = bitcast <8 x i32> %39 to <32 x i8> %157 = bitcast <4 x i32> %41 to <16 x i8> %158 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %156, <16 x i8> %157, i32 2) %159 = extractelement <4 x float> %158, i32 0 %160 = extractelement <4 x float> %158, i32 1 %161 = extractelement <4 x float> %158, i32 2 %162 = extractelement <4 x float> %158, i32 3 %163 = fmul float %159, %49 %164 = fmul float %160, %49 %165 = fmul float %161, %49 %166 = fmul float %162, %49 %167 = bitcast float %46 to i32 %168 = bitcast float %47 to i32 %169 = insertelement <2 x i32> undef, i32 %167, i32 0 %170 = insertelement <2 x i32> %169, i32 %168, i32 1 %171 = bitcast <8 x i32> %35 to <32 x i8> %172 = bitcast <4 x i32> %37 to <16 x i8> %173 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %170, <32 x i8> %171, <16 x i8> %172, i32 2) %174 = extractelement <4 x float> %173, i32 0 %175 = extractelement <4 x float> %173, i32 1 %176 = extractelement <4 x float> %173, i32 2 %177 = extractelement <4 x float> %173, i32 3 %178 = fmul float %48, %174 %179 = fadd float %178, %163 %180 = fmul float %48, %175 %181 = fadd float %180, %164 %182 = fmul float %48, %176 %183 = fadd float %182, %165 %184 = fmul float %48, %177 %185 = fadd float %184, %166 %186 = fmul float %179, %42 %187 = fmul float %181, %43 %188 = fmul float %183, %44 %189 = fmul float %185, %45 %190 = call i32 @llvm.SI.packf16(float %186, float %187) %191 = bitcast i32 %190 to float %192 = call i32 @llvm.SI.packf16(float %188, float %189) %193 = bitcast i32 %192 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %191, float %193, float %191, float %193) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 1, 2, [m0] ; C8100900 v_interp_p2_f32 v4, [v4], v1, 1, 2, [m0] ; C8110901 v_interp_p1_f32 v5, v0, 0, 2, [m0] ; C8140800 v_interp_p2_f32 v5, [v5], v1, 0, 2, [m0] ; C8150801 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v11, v0, 3, 0, [m0] ; C82C0300 v_interp_p2_f32 v11, [v11], v1, 3, 0, [m0] ; C82D0301 v_interp_p1_f32 v10, v0, 2, 0, [m0] ; C8280200 v_interp_p2_f32 v10, [v10], v1, 2, 0, [m0] ; C8290201 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v0, s9, v2, v0 ; D2820000 04020409 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v1, s8, v0 ; 0A020008 v_cmp_lt_f32_e64 s[8:9], v1, 0 ; D0020008 00010101 s_buffer_load_dword s12, s[0:3], 0x10 ; C2060110 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[14:15], s[8:9] ; BE8E2408 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s9, s[0:3], 0x7 ; C2048107 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 v_mov_b32_e32 v2, s12 ; 7E04020C v_mov_b32_e32 v1, s13 ; 7E02020D s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s11, v0 ; 0800000B v_cmp_lt_f32_e64 s[12:13], v0, 0 ; D002000C 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mad_f32 v0, v2, v3, s8 ; D2820000 00220702 v_mad_f32 v0, v1, v0, s9 ; D2820000 00260101 v_subrev_f32_e32 v1, s10, v0 ; 0A02000A v_cmp_lt_f32_e64 s[2:3], v1, 0 ; D0020002 00010101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[2:3], s[2:3] ; BE822402 s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[2:3] ; 88FE027E v_sub_f32_e32 v0, s0, v0 ; 08000000 v_cmp_lt_f32_e64 s[0:1], v0, 0 ; D0020000 00010100 s_and_saveexec_b64 s[0:1], s[0:1] ; BE802400 s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[0:1] ; 88FE007E s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[20:27], s[8:11] ; F0800F00 00450006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v4, v3 ; 10180704 image_sample v[13:16], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[12:19], s[0:3] ; F0800F00 00030D06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v5, v16, v12 ; D2820006 04322105 v_mul_f32_e32 v6, v11, v6 ; 100C0D0B v_mul_f32_e32 v7, v4, v2 ; 100E0504 v_mad_f32 v7, v5, v15, v7 ; D2820007 041E1F05 v_mul_f32_e32 v7, v10, v7 ; 100E0F0A v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_mul_f32_e32 v7, v4, v1 ; 100E0304 v_mad_f32 v7, v5, v14, v7 ; D2820007 041E1D05 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_mad_f32 v0, v5, v13, v0 ; D2820000 04021B05 v_mul_f32_e32 v0, v8, v0 ; 10000108 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[4].zwzw, IN[0] 1: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[0], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: ADD TEMP[1].x, CONST[6].xxxx, CONST[6].yyyy 5: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[1].w, CONST[5].wwww, IN[2].wwww 7: MOV TEMP[1].w, TEMP[1].wwww 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV TEMP[1].xyz, IN[2].xyzx 10: MOV TEMP[2].zw, IMM[0].yyxy 11: MOV OUT[1], TEMP[1] 12: MOV OUT[2], TEMP[2] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = add i32 %5, %7 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fadd float %25, %34 %51 = fadd float %26, %35 %52 = fmul float %51, %17 %53 = fmul float %51, %18 %54 = fmul float %51, %19 %55 = fmul float %51, %20 %56 = fmul float %50, %13 %57 = fadd float %56, %52 %58 = fmul float %50, %14 %59 = fadd float %58, %53 %60 = fmul float %50, %15 %61 = fadd float %60, %54 %62 = fmul float %50, %16 %63 = fadd float %62, %55 %64 = fadd float %57, %21 %65 = fadd float %59, %22 %66 = fadd float %61, %23 %67 = fadd float %63, %24 %68 = fadd float %28, %29 %69 = fmul float %64, %68 %70 = fmul float %65, %68 %71 = fmul float %66, %68 %72 = fmul float %67, %68 %73 = fmul float %27, %49 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %70, float %71, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v4 ; 100A0808 exp 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v4, s4, v0 ; 06080004 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v1 ; 06000204 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[9] DCL CONST[0..6] DCL TEMP[0] DCL TEMP[1..9], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 1024.0000, 65280.0000} IMM[1] FLT32 {65280.0000, 255.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[9].xxxx, CONST[9].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[6].xyxx, CONST[6].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[3].y, IMM[0].yyyy 33: ADD TEMP[1].x, IMM[0].zzzz, -CONST[3].xxxx 34: MOV TEMP[3].x, TEMP[1].xxxx 35: FSNE TEMP[2].x, CONST[1].xxxx, -CONST[1].xxxx 36: UIF TEMP[2].xxxx :2 37: FSLT TEMP[2].x, IMM[0].yyyy, CONST[2].xxxx 38: UIF TEMP[2].xxxx :2 39: MUL TEMP[2].zw, CONST[4].xyxy, IN[2].xyxy 40: MOV TEMP[3].zw, TEMP[2].wwzw 41: FRC TEMP[2].xy, TEMP[2].zwzw 42: ADD TEMP[4].zw, TEMP[3], -TEMP[2].xyxy 43: MOV TEMP[3].zw, TEMP[4].wwzw 44: MAD TEMP[4].xy, IN[2], CONST[4], -TEMP[4].zwzw 45: ADD TEMP[5].zw, TEMP[3], IMM[0].yyyy 46: RCP TEMP[6].x, CONST[4].xxxx 47: RCP TEMP[7].x, CONST[4].yyyy 48: MOV TEMP[6].y, TEMP[7].xxxx 49: MUL TEMP[5].xy, TEMP[5].zwzw, TEMP[6] 50: MOV TEMP[5].xy, TEMP[5].xyyy 51: MOV TEMP[5].w, IMM[0].xxxx 52: TXL TEMP[5], TEMP[5], SAMP[0], 2D 53: MOV TEMP[6].zw, TEMP[5] 54: DP2 TEMP[7].x, TEMP[5].wxxx, IMM[1].xyyy 55: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy 56: FRC TEMP[8].w, TEMP[7].xxxx 57: ADD TEMP[7].z, -TEMP[8].wwww, TEMP[7].xxxx 58: MOV TEMP[3].z, TEMP[7].zzzz 59: MAD TEMP[5].w, CONST[3].xxxx, TEMP[5].xxxx, IMM[0].yyyy 60: FRC TEMP[7].z, TEMP[5].wwww 61: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[7].zzzz 62: MOV TEMP[3].w, TEMP[5].wwww 63: ADD TEMP[5].zw, TEMP[3], IMM[0].yyyy 64: FSGE TEMP[7].x, TEMP[1].xxxx, IMM[0].xxxx 65: UIF TEMP[7].xxxx :2 66: MOV TEMP[7].x, TEMP[5].wwww 67: ELSE :2 68: MOV TEMP[7].x, TEMP[5].zzzz 69: ENDIF 70: RCP TEMP[5].x, CONST[2].xxxx 71: MAD TEMP[5].w, CONST[5].xxxx, TEMP[5].xxxx, IMM[0].yyyy 72: FRC TEMP[8].z, TEMP[5].wwww 73: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[8].zzzz 74: MUL TEMP[8].z, TEMP[5].wwww, TEMP[7].xxxx 75: MOV TEMP[9].x, -TEMP[5].wwww 76: FSGE TEMP[8].x, TEMP[8].zzzz, IMM[0].xxxx 77: UIF TEMP[8].xxxx :2 78: MOV TEMP[8].x, TEMP[5].wwww 79: ELSE :2 80: MOV TEMP[8].x, TEMP[9].xxxx 81: ENDIF 82: MOV TEMP[2].z, TEMP[8].xxxx 83: RCP TEMP[9].x, TEMP[8].xxxx 84: MUL TEMP[9].w, TEMP[7].xxxx, TEMP[9].xxxx 85: FRC TEMP[9].w, TEMP[9].wwww 86: MOV TEMP[2].w, TEMP[9].wwww 87: MUL TEMP[8].x, TEMP[9].wwww, TEMP[8].xxxx 88: MOV TEMP[6].x, TEMP[8].xxxx 89: RCP TEMP[5].x, TEMP[5].wwww 90: MUL TEMP[5].y, TEMP[5].xxxx, TEMP[7].xxxx 91: MOV TEMP[6].y, TEMP[5].yyyy 92: FRC TEMP[5].zw, TEMP[6].xyxy 93: MOV TEMP[3].zw, TEMP[5].wwzw 94: ADD TEMP[5].zw, -TEMP[3], TEMP[6].xyxy 95: MOV TEMP[3].zw, TEMP[5].wwzw 96: ADD TEMP[4].zw, TEMP[4].xyxy, TEMP[3] 97: MOV TEMP[3].zw, TEMP[4].wwzw 98: MUL TEMP[4].zw, TEMP[3], CONST[2].xxxx 99: MOV TEMP[3].zw, TEMP[4].wwzw 100: FRC TEMP[4].xy, TEMP[4].zwzw 101: ADD TEMP[4].zw, TEMP[3], -TEMP[4].xyxy 102: MOV TEMP[3].zw, TEMP[4].wwzw 103: ADD TEMP[4].zw, TEMP[3], IMM[0].yyyy 104: MOV TEMP[3].w, TEMP[4].wwzw 105: RCP TEMP[2].x, CONST[5].xxxx 106: RCP TEMP[5].x, CONST[5].yyyy 107: MOV TEMP[2].y, TEMP[5].xxxx 108: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[2] 109: MOV TEMP[4].xy, TEMP[4].xyyy 110: MOV TEMP[4].w, IMM[0].xxxx 111: TXL TEMP[4], TEMP[4], SAMP[1], 2D 112: MOV TEMP[2], TEMP[4] 113: MOV TEMP[3].z, TEMP[4].wwww 114: ELSE :2 115: MOV TEMP[4].xy, IN[2].xyyy 116: TEX TEMP[4], TEMP[4], SAMP[1], 2D 117: MOV TEMP[2], TEMP[4] 118: MOV TEMP[3].z, TEMP[4].wwww 119: ENDIF 120: MUL TEMP[4].w, TEMP[3].zzzz, IN[1].wwww 121: MOV TEMP[4].w, TEMP[4].wwww 122: MOV TEMP[4].xyz, IN[1].xyzx 123: ELSE :2 124: FSLT TEMP[5].x, IMM[0].yyyy, CONST[2].xxxx 125: UIF TEMP[5].xxxx :2 126: MUL TEMP[5].zw, CONST[4].xyxy, IN[2].xyxy 127: MOV TEMP[3].zw, TEMP[5].wwzw 128: FRC TEMP[5].xy, TEMP[5].zwzw 129: MOV TEMP[2].xy, TEMP[5].xyxx 130: ADD TEMP[5].zw, TEMP[3], -TEMP[5].xyxy 131: MOV TEMP[3].zw, TEMP[5].wwzw 132: MAD TEMP[5].xy, IN[2], CONST[4], -TEMP[5].zwzw 133: MOV TEMP[2].xy, TEMP[5].xyxx 134: ADD TEMP[5].zw, TEMP[3], IMM[0].yyyy 135: RCP TEMP[6].x, CONST[4].xxxx 136: RCP TEMP[7].x, CONST[4].yyyy 137: MOV TEMP[6].y, TEMP[7].xxxx 138: MUL TEMP[5].xy, TEMP[5].zwzw, TEMP[6] 139: MOV TEMP[5].xy, TEMP[5].xyyy 140: MOV TEMP[5].w, IMM[0].xxxx 141: TXL TEMP[5], TEMP[5], SAMP[0], 2D 142: MOV TEMP[6].zw, TEMP[5] 143: DP2 TEMP[7].x, TEMP[5].wxxx, IMM[1].xyyy 144: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].yyyy 145: FRC TEMP[8].w, TEMP[7].xxxx 146: ADD TEMP[7].z, -TEMP[8].wwww, TEMP[7].xxxx 147: MOV TEMP[3].z, TEMP[7].zzzz 148: MAD TEMP[5].w, CONST[3].xxxx, TEMP[5].xxxx, IMM[0].yyyy 149: FRC TEMP[7].z, TEMP[5].wwww 150: MOV TEMP[2].z, TEMP[7].zzzz 151: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[7].zzzz 152: MOV TEMP[3].w, TEMP[5].wwww 153: ADD TEMP[5].zw, TEMP[3], IMM[0].yyyy 154: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 155: UIF TEMP[1].xxxx :2 156: MOV TEMP[1].x, TEMP[5].wwww 157: ELSE :2 158: MOV TEMP[1].x, TEMP[5].zzzz 159: ENDIF 160: MOV TEMP[3].x, TEMP[1].xxxx 161: RCP TEMP[5].x, CONST[2].xxxx 162: MAD TEMP[5].y, CONST[5].xxxx, TEMP[5].xxxx, IMM[0].yyyy 163: FRC TEMP[7].z, TEMP[5].yyyy 164: ADD TEMP[5].y, -TEMP[7].zzzz, TEMP[5].yyyy 165: MUL TEMP[7].z, TEMP[5].yyyy, TEMP[1].xxxx 166: MOV TEMP[8].x, -TEMP[5].yyyy 167: FSGE TEMP[7].x, TEMP[7].zzzz, IMM[0].xxxx 168: UIF TEMP[7].xxxx :2 169: MOV TEMP[7].x, TEMP[5].yyyy 170: ELSE :2 171: MOV TEMP[7].x, TEMP[8].xxxx 172: ENDIF 173: MOV TEMP[3].z, TEMP[7].xxxx 174: RCP TEMP[7].x, TEMP[7].xxxx 175: MUL TEMP[1].w, TEMP[7].xxxx, TEMP[1].xxxx 176: FRC TEMP[1].w, TEMP[1].wwww 177: MOV TEMP[3].w, TEMP[1].wwww 178: RCP TEMP[1].x, TEMP[5].yyyy 179: MOV TEMP[3].y, TEMP[1].xxxx 180: MUL TEMP[1].xy, TEMP[3].wyzw, TEMP[3].zxzw 181: MOV TEMP[6].xy, TEMP[1].xyxx 182: FRC TEMP[1].xy, TEMP[6] 183: MOV TEMP[3].xy, TEMP[1].xyxx 184: ADD TEMP[1].xy, -TEMP[3], TEMP[6] 185: MOV TEMP[3].xy, TEMP[1].xyxx 186: ADD TEMP[1].xy, TEMP[2], TEMP[3] 187: MOV TEMP[3].xy, TEMP[1].xyxx 188: MUL TEMP[1].xy, TEMP[3], CONST[2].xxxx 189: MOV TEMP[3].xy, TEMP[1].xyxx 190: FRC TEMP[1].zw, TEMP[1].xyxy 191: MOV TEMP[3].zw, TEMP[1].wwzw 192: ADD TEMP[1].xy, -TEMP[1].zwzw, TEMP[3] 193: MOV TEMP[3].xy, TEMP[1].xyxx 194: ADD TEMP[1].xy, TEMP[3], IMM[0].yyyy 195: MOV TEMP[3].xy, TEMP[1].xyxx 196: RCP TEMP[2].x, CONST[5].xxxx 197: RCP TEMP[1].x, CONST[5].yyyy 198: MOV TEMP[2].y, TEMP[1].xxxx 199: MUL TEMP[1].xy, TEMP[3], TEMP[2] 200: MOV TEMP[1].xy, TEMP[1].xyyy 201: MOV TEMP[1].w, IMM[0].xxxx 202: TXL TEMP[1], TEMP[1], SAMP[1], 2D 203: MOV TEMP[3].xyz, TEMP[1] 204: MOV TEMP[2].x, TEMP[1].wwww 205: ELSE :2 206: MOV TEMP[1].xy, IN[2].xyyy 207: TEX TEMP[1], TEMP[1], SAMP[1], 2D 208: MOV TEMP[3].xyz, TEMP[1] 209: MOV TEMP[2].x, TEMP[1].wwww 210: ENDIF 211: MUL TEMP[1].w, TEMP[2].xxxx, IN[1].wwww 212: MOV TEMP[3].w, TEMP[1].wwww 213: MOV TEMP[4], TEMP[3] 214: ENDIF 215: MOV OUT[0], TEMP[4] 216: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %41 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %42 = load <8 x i32> addrspace(2)* %41, !tbaa !0 %43 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %44 = load <4 x i32> addrspace(2)* %43, !tbaa !0 %45 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %46 = load <8 x i32> addrspace(2)* %45, !tbaa !0 %47 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %48 = load <4 x i32> addrspace(2)* %47, !tbaa !0 %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %55 = fmul float %15, %39 %56 = fadd float %55, %40 %57 = fmul float %14, %35 %58 = fadd float %57, %37 %59 = fmul float %56, %36 %60 = fadd float %59, %38 %61 = fsub float -0.000000e+00, %24 %62 = fsub float -0.000000e+00, %26 %63 = fadd float %61, %58 %64 = fadd float %61, %58 %65 = fadd float %61, %58 %66 = fcmp olt float %63, 0.000000e+00 %67 = sext i1 %66 to i32 %68 = fcmp olt float %64, 0.000000e+00 %69 = sext i1 %68 to i32 %70 = fcmp olt float %65, 0.000000e+00 %71 = sext i1 %70 to i32 %72 = bitcast i32 %67 to float %73 = bitcast i32 %69 to float %74 = bitcast i32 %71 to float %75 = bitcast float %72 to i32 %76 = bitcast float %74 to i32 %77 = or i32 %75, %76 %78 = bitcast i32 %77 to float %79 = bitcast float %78 to i32 %80 = bitcast float %73 to i32 %81 = or i32 %79, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = icmp ne i32 %83, 0 br i1 %84, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %85 = fsub float -0.000000e+00, %58 %86 = fadd float %25, %85 %87 = fsub float -0.000000e+00, %58 %88 = fadd float %25, %87 %89 = fsub float -0.000000e+00, %58 %90 = fadd float %25, %89 %91 = fcmp olt float %86, 0.000000e+00 %92 = sext i1 %91 to i32 %93 = fcmp olt float %88, 0.000000e+00 %94 = sext i1 %93 to i32 %95 = fcmp olt float %90, 0.000000e+00 %96 = sext i1 %95 to i32 %97 = bitcast i32 %92 to float %98 = bitcast i32 %94 to float %99 = bitcast i32 %96 to float %100 = bitcast float %97 to i32 %101 = bitcast float %99 to i32 %102 = or i32 %100, %101 %103 = bitcast i32 %102 to float %104 = bitcast float %103 to i32 %105 = bitcast float %98 to i32 %106 = or i32 %104, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = icmp ne i32 %108, 0 br i1 %109, label %IF41, label %ENDIF40 IF41: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF40 ENDIF40: ; preds = %ENDIF, %IF41 %110 = fadd float %62, %60 %111 = fadd float %62, %60 %112 = fadd float %62, %60 %113 = fcmp olt float %110, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %111, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = fcmp olt float %112, 0.000000e+00 %118 = sext i1 %117 to i32 %119 = bitcast i32 %114 to float %120 = bitcast i32 %116 to float %121 = bitcast i32 %118 to float %122 = bitcast float %119 to i32 %123 = bitcast float %121 to i32 %124 = or i32 %122, %123 %125 = bitcast i32 %124 to float %126 = bitcast float %125 to i32 %127 = bitcast float %120 to i32 %128 = or i32 %126, %127 %129 = bitcast i32 %128 to float %130 = bitcast float %129 to i32 %131 = icmp ne i32 %130, 0 br i1 %131, label %IF44, label %ENDIF43 IF44: ; preds = %ENDIF40 call void @llvm.AMDGPU.kilp() br label %ENDIF43 ENDIF43: ; preds = %ENDIF40, %IF44 %132 = fsub float -0.000000e+00, %60 %133 = fadd float %27, %132 %134 = fsub float -0.000000e+00, %60 %135 = fadd float %27, %134 %136 = fsub float -0.000000e+00, %60 %137 = fadd float %27, %136 %138 = fcmp olt float %133, 0.000000e+00 %139 = sext i1 %138 to i32 %140 = fcmp olt float %135, 0.000000e+00 %141 = sext i1 %140 to i32 %142 = fcmp olt float %137, 0.000000e+00 %143 = sext i1 %142 to i32 %144 = bitcast i32 %139 to float %145 = bitcast i32 %141 to float %146 = bitcast i32 %143 to float %147 = bitcast float %144 to i32 %148 = bitcast float %146 to i32 %149 = or i32 %147, %148 %150 = bitcast i32 %149 to float %151 = bitcast float %150 to i32 %152 = bitcast float %145 to i32 %153 = or i32 %151, %152 %154 = bitcast i32 %153 to float %155 = bitcast float %154 to i32 %156 = icmp ne i32 %155, 0 br i1 %156, label %IF47, label %ENDIF46 IF47: ; preds = %ENDIF43 call void @llvm.AMDGPU.kilp() br label %ENDIF46 ENDIF46: ; preds = %ENDIF43, %IF47 %157 = fsub float -0.000000e+00, %30 %158 = fadd float 1.024000e+03, %157 %159 = fsub float -0.000000e+00, %28 %160 = fcmp une float %28, %159 %161 = sext i1 %160 to i32 %162 = bitcast i32 %161 to float %163 = bitcast float %162 to i32 %164 = icmp ne i32 %163, 0 %165 = fcmp olt float 5.000000e-01, %29 %166 = sext i1 %165 to i32 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = icmp ne i32 %168, 0 br i1 %164, label %IF50, label %ELSE51 IF50: ; preds = %ENDIF46 br i1 %169, label %IF53, label %ELSE54 ELSE51: ; preds = %ENDIF46 br i1 %169, label %IF62, label %ELSE63 ENDIF49: ; preds = %IF53, %ELSE54, %ENDIF61 %.sink72.sink = phi <4 x float> [ %.sink72, %ENDIF61 ], [ %284, %ELSE54 ], [ %277, %IF53 ] %temp16.0 = phi float [ %394, %ENDIF61 ], [ %49, %ELSE54 ], [ %49, %IF53 ] %temp17.0 = phi float [ %395, %ENDIF61 ], [ %50, %ELSE54 ], [ %50, %IF53 ] %temp18.0 = phi float [ %396, %ENDIF61 ], [ %51, %ELSE54 ], [ %51, %IF53 ] %170 = extractelement <4 x float> %.sink72.sink, i32 3 %171 = fmul float %170, %52 %172 = call i32 @llvm.SI.packf16(float %temp16.0, float %temp17.0) %173 = bitcast i32 %172 to float %174 = call i32 @llvm.SI.packf16(float %temp18.0, float %171) %175 = bitcast i32 %174 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %173, float %175, float %173, float %175) ret void IF53: ; preds = %IF50 %176 = fmul float %31, %53 %177 = fmul float %32, %54 %178 = call float @llvm.AMDIL.fraction.(float %176) %179 = call float @llvm.AMDIL.fraction.(float %177) %180 = fsub float -0.000000e+00, %178 %181 = fadd float %176, %180 %182 = fsub float -0.000000e+00, %179 %183 = fadd float %177, %182 %184 = fsub float -0.000000e+00, %181 %185 = fmul float %53, %31 %186 = fadd float %185, %184 %187 = fsub float -0.000000e+00, %183 %188 = fmul float %54, %32 %189 = fadd float %188, %187 %190 = fadd float %181, 5.000000e-01 %191 = fadd float %183, 5.000000e-01 %192 = fdiv float 1.000000e+00, %31 %193 = fdiv float 1.000000e+00, %32 %194 = fmul float %190, %192 %195 = fmul float %191, %193 %196 = bitcast float %194 to i32 %197 = bitcast float %195 to i32 %198 = bitcast float 0.000000e+00 to i32 %199 = insertelement <4 x i32> undef, i32 %196, i32 0 %200 = insertelement <4 x i32> %199, i32 %197, i32 1 %201 = insertelement <4 x i32> %200, i32 %198, i32 2 %202 = insertelement <4 x i32> %201, i32 undef, i32 3 %203 = bitcast <8 x i32> %42 to <32 x i8> %204 = bitcast <4 x i32> %44 to <16 x i8> %205 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %202, <32 x i8> %203, <16 x i8> %204, i32 2) %206 = extractelement <4 x float> %205, i32 0 %207 = extractelement <4 x float> %205, i32 3 %208 = fmul float %207, 6.528000e+04 %209 = fmul float %206, 2.550000e+02 %210 = fadd float %208, %209 %211 = fadd float %210, 5.000000e-01 %212 = call float @llvm.AMDIL.fraction.(float %211) %213 = fsub float -0.000000e+00, %212 %214 = fadd float %213, %211 %215 = fmul float %30, %206 %216 = fadd float %215, 5.000000e-01 %217 = call float @llvm.AMDIL.fraction.(float %216) %218 = fsub float -0.000000e+00, %217 %219 = fadd float %216, %218 %220 = fadd float %214, 5.000000e-01 %221 = fadd float %219, 5.000000e-01 %222 = fcmp oge float %158, 0.000000e+00 %223 = sext i1 %222 to i32 %224 = bitcast i32 %223 to float %225 = bitcast float %224 to i32 %226 = icmp ne i32 %225, 0 %. = select i1 %226, float %221, float %220 %227 = fdiv float 1.000000e+00, %29 %228 = fmul float %33, %227 %229 = fadd float %228, 5.000000e-01 %230 = call float @llvm.AMDIL.fraction.(float %229) %231 = fsub float -0.000000e+00, %230 %232 = fadd float %229, %231 %233 = fmul float %232, %. %234 = fsub float -0.000000e+00, %232 %235 = fcmp oge float %233, 0.000000e+00 %236 = sext i1 %235 to i32 %237 = bitcast i32 %236 to float %238 = bitcast float %237 to i32 %239 = icmp ne i32 %238, 0 %temp32.0 = select i1 %239, float %232, float %234 %240 = fdiv float 1.000000e+00, %temp32.0 %241 = fmul float %., %240 %242 = call float @llvm.AMDIL.fraction.(float %241) %243 = fmul float %242, %temp32.0 %244 = fdiv float 1.000000e+00, %232 %245 = fmul float %244, %. %246 = call float @llvm.AMDIL.fraction.(float %243) %247 = call float @llvm.AMDIL.fraction.(float %245) %248 = fsub float -0.000000e+00, %246 %249 = fadd float %248, %243 %250 = fsub float -0.000000e+00, %247 %251 = fadd float %250, %245 %252 = fadd float %186, %249 %253 = fadd float %189, %251 %254 = fmul float %252, %29 %255 = fmul float %253, %29 %256 = call float @llvm.AMDIL.fraction.(float %254) %257 = call float @llvm.AMDIL.fraction.(float %255) %258 = fsub float -0.000000e+00, %256 %259 = fadd float %254, %258 %260 = fsub float -0.000000e+00, %257 %261 = fadd float %255, %260 %262 = fadd float %259, 5.000000e-01 %263 = fadd float %261, 5.000000e-01 %264 = fdiv float 1.000000e+00, %33 %265 = fdiv float 1.000000e+00, %34 %266 = fmul float %262, %264 %267 = fmul float %263, %265 %268 = bitcast float %266 to i32 %269 = bitcast float %267 to i32 %270 = bitcast float 0.000000e+00 to i32 %271 = insertelement <4 x i32> undef, i32 %268, i32 0 %272 = insertelement <4 x i32> %271, i32 %269, i32 1 %273 = insertelement <4 x i32> %272, i32 %270, i32 2 %274 = insertelement <4 x i32> %273, i32 undef, i32 3 %275 = bitcast <8 x i32> %46 to <32 x i8> %276 = bitcast <4 x i32> %48 to <16 x i8> %277 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %274, <32 x i8> %275, <16 x i8> %276, i32 2) br label %ENDIF49 ELSE54: ; preds = %IF50 %278 = bitcast float %53 to i32 %279 = bitcast float %54 to i32 %280 = insertelement <2 x i32> undef, i32 %278, i32 0 %281 = insertelement <2 x i32> %280, i32 %279, i32 1 %282 = bitcast <8 x i32> %46 to <32 x i8> %283 = bitcast <4 x i32> %48 to <16 x i8> %284 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %281, <32 x i8> %282, <16 x i8> %283, i32 2) br label %ENDIF49 IF62: ; preds = %ELSE51 %285 = fmul float %31, %53 %286 = fmul float %32, %54 %287 = call float @llvm.AMDIL.fraction.(float %285) %288 = call float @llvm.AMDIL.fraction.(float %286) %289 = fsub float -0.000000e+00, %287 %290 = fadd float %285, %289 %291 = fsub float -0.000000e+00, %288 %292 = fadd float %286, %291 %293 = fsub float -0.000000e+00, %290 %294 = fmul float %53, %31 %295 = fadd float %294, %293 %296 = fsub float -0.000000e+00, %292 %297 = fmul float %54, %32 %298 = fadd float %297, %296 %299 = fadd float %290, 5.000000e-01 %300 = fadd float %292, 5.000000e-01 %301 = fdiv float 1.000000e+00, %31 %302 = fdiv float 1.000000e+00, %32 %303 = fmul float %299, %301 %304 = fmul float %300, %302 %305 = bitcast float %303 to i32 %306 = bitcast float %304 to i32 %307 = bitcast float 0.000000e+00 to i32 %308 = insertelement <4 x i32> undef, i32 %305, i32 0 %309 = insertelement <4 x i32> %308, i32 %306, i32 1 %310 = insertelement <4 x i32> %309, i32 %307, i32 2 %311 = insertelement <4 x i32> %310, i32 undef, i32 3 %312 = bitcast <8 x i32> %42 to <32 x i8> %313 = bitcast <4 x i32> %44 to <16 x i8> %314 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %311, <32 x i8> %312, <16 x i8> %313, i32 2) %315 = extractelement <4 x float> %314, i32 0 %316 = extractelement <4 x float> %314, i32 3 %317 = fmul float %316, 6.528000e+04 %318 = fmul float %315, 2.550000e+02 %319 = fadd float %317, %318 %320 = fadd float %319, 5.000000e-01 %321 = call float @llvm.AMDIL.fraction.(float %320) %322 = fsub float -0.000000e+00, %321 %323 = fadd float %322, %320 %324 = fmul float %30, %315 %325 = fadd float %324, 5.000000e-01 %326 = call float @llvm.AMDIL.fraction.(float %325) %327 = fsub float -0.000000e+00, %326 %328 = fadd float %325, %327 %329 = fadd float %323, 5.000000e-01 %330 = fadd float %328, 5.000000e-01 %331 = fcmp oge float %158, 0.000000e+00 %332 = sext i1 %331 to i32 %333 = bitcast i32 %332 to float %334 = bitcast float %333 to i32 %335 = icmp ne i32 %334, 0 %.71 = select i1 %335, float %330, float %329 %336 = fdiv float 1.000000e+00, %29 %337 = fmul float %33, %336 %338 = fadd float %337, 5.000000e-01 %339 = call float @llvm.AMDIL.fraction.(float %338) %340 = fsub float -0.000000e+00, %339 %341 = fadd float %340, %338 %342 = fmul float %341, %.71 %343 = fsub float -0.000000e+00, %341 %344 = fcmp oge float %342, 0.000000e+00 %345 = sext i1 %344 to i32 %346 = bitcast i32 %345 to float %347 = bitcast float %346 to i32 %348 = icmp ne i32 %347, 0 %temp28.1 = select i1 %348, float %341, float %343 %349 = fdiv float 1.000000e+00, %temp28.1 %350 = fmul float %349, %.71 %351 = call float @llvm.AMDIL.fraction.(float %350) %352 = fdiv float 1.000000e+00, %341 %353 = fmul float %351, %temp28.1 %354 = fmul float %352, %.71 %355 = call float @llvm.AMDIL.fraction.(float %353) %356 = call float @llvm.AMDIL.fraction.(float %354) %357 = fsub float -0.000000e+00, %355 %358 = fadd float %357, %353 %359 = fsub float -0.000000e+00, %356 %360 = fadd float %359, %354 %361 = fadd float %295, %358 %362 = fadd float %298, %360 %363 = fmul float %361, %29 %364 = fmul float %362, %29 %365 = call float @llvm.AMDIL.fraction.(float %363) %366 = call float @llvm.AMDIL.fraction.(float %364) %367 = fsub float -0.000000e+00, %365 %368 = fadd float %367, %363 %369 = fsub float -0.000000e+00, %366 %370 = fadd float %369, %364 %371 = fadd float %368, 5.000000e-01 %372 = fadd float %370, 5.000000e-01 %373 = fdiv float 1.000000e+00, %33 %374 = fdiv float 1.000000e+00, %34 %375 = fmul float %371, %373 %376 = fmul float %372, %374 %377 = bitcast float %375 to i32 %378 = bitcast float %376 to i32 %379 = bitcast float 0.000000e+00 to i32 %380 = insertelement <4 x i32> undef, i32 %377, i32 0 %381 = insertelement <4 x i32> %380, i32 %378, i32 1 %382 = insertelement <4 x i32> %381, i32 %379, i32 2 %383 = insertelement <4 x i32> %382, i32 undef, i32 3 %384 = bitcast <8 x i32> %46 to <32 x i8> %385 = bitcast <4 x i32> %48 to <16 x i8> %386 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %383, <32 x i8> %384, <16 x i8> %385, i32 2) br label %ENDIF61 ELSE63: ; preds = %ELSE51 %387 = bitcast float %53 to i32 %388 = bitcast float %54 to i32 %389 = insertelement <2 x i32> undef, i32 %387, i32 0 %390 = insertelement <2 x i32> %389, i32 %388, i32 1 %391 = bitcast <8 x i32> %46 to <32 x i8> %392 = bitcast <4 x i32> %48 to <16 x i8> %393 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %390, <32 x i8> %391, <16 x i8> %392, i32 2) br label %ENDIF61 ENDIF61: ; preds = %ELSE63, %IF62 %.sink72 = phi <4 x float> [ %393, %ELSE63 ], [ %386, %IF62 ] %394 = extractelement <4 x float> %.sink72, i32 0 %395 = extractelement <4 x float> %.sink72, i32 1 %396 = extractelement <4 x float> %.sink72, i32 2 br label %ENDIF49 } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v5, v0, 0, 1, [m0] ; C8140400 v_interp_p2_f32 v5, [v5], v1, 0, 1, [m0] ; C8150401 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_interp_p1_f32 v8, v0, 1, 0, [m0] ; C8200100 v_interp_p2_f32 v8, [v8], v1, 1, 0, [m0] ; C8210101 v_interp_p1_f32 v9, v0, 0, 0, [m0] ; C8240000 v_interp_p2_f32 v9, [v9], v1, 0, 0, [m0] ; C8250001 s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x1a ; C2000D1A s_buffer_load_dword s1, s[12:15], 0x18 ; C2008D18 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s0 ; 7E000200 v_mad_f32 v0, s1, v2, v0 ; D2820000 04020401 s_buffer_load_dword s0, s[12:15], 0x0 ; C2000D00 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v1, s0, v0 ; 0A020000 v_cmp_lt_f32_e64 s[0:1], v1, 0 ; D0020000 00010101 s_buffer_load_dword s8, s[12:15], 0x24 ; C2040D24 s_buffer_load_dword s9, s[12:15], 0x19 ; C2048D19 s_buffer_load_dword s2, s[12:15], 0x1 ; C2010D01 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[0:1] ; BE8A2400 s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_buffer_load_dword s0, s[12:15], 0x25 ; C2000D25 s_buffer_load_dword s1, s[12:15], 0x1b ; C2008D1B s_buffer_load_dword s3, s[12:15], 0x2 ; C2018D02 v_mov_b32_e32 v2, s8 ; 7E040208 v_mov_b32_e32 v1, s9 ; 7E020209 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s2, v0 ; 08000002 v_cmp_lt_f32_e64 s[8:9], v0, 0 ; D0020008 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[8:9], s[8:9] ; BE882408 s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[8:9] ; 88FE087E s_buffer_load_dword s2, s[12:15], 0x3 ; C2010D03 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mad_f32 v0, v2, v3, s0 ; D2820000 00020702 v_mad_f32 v0, v1, v0, s1 ; D2820000 00060101 v_subrev_f32_e32 v1, s3, v0 ; 0A020003 v_cmp_lt_f32_e64 s[0:1], v1, 0 ; D0020000 00010101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[8:9], s[0:1] ; BE882400 s_xor_b64 s[8:9], exec, s[8:9] ; 8988087E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[8:9] ; 88FE087E s_buffer_load_dword s0, s[12:15], 0xc ; C2000D0C s_buffer_load_dword s1, s[12:15], 0x8 ; C2008D08 s_buffer_load_dword s10, s[12:15], 0x4 ; C2050D04 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s2, v0 ; 08000002 v_cmp_lt_f32_e64 s[2:3], v0, 0 ; D0020002 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[16:17], s[2:3] ; BE902402 s_xor_b64 s[16:17], exec, s[16:17] ; 8990107E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[16:17] ; 88FE107E s_buffer_load_dword s2, s[12:15], 0x15 ; C2010D15 s_buffer_load_dword s3, s[12:15], 0x14 ; C2018D14 s_buffer_load_dword s8, s[12:15], 0x11 ; C2040D11 s_buffer_load_dword s9, s[12:15], 0x10 ; C2048D10 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmp_neq_f32_e64 s[10:11], s10, -s10 ; D01A000A 4000140A v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 v_cmp_gt_f32_e64 s[12:13], s1, 0.5 ; D008000C 0001E001 v_cndmask_b32_e64 v1, 0, -1, s[12:13] ; D2000801 00318280 v_mov_b32_e32 v0, 0x44800000 ; 7E0002FF 44800000 v_subrev_f32_e32 v0, s0, v0 ; 0A000000 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx8 s[20:27], s[6:7], 0x0 ; C0CA0700 s_load_dwordx8 s[28:35], s[6:7], 0x8 ; C0CE0708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[4:5], s[10:11] ; BE84240A s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_19 ; BF880000 v_cmp_eq_i32_e64 s[6:7], v1, 0 ; D1040006 00010101 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[28:35], s[16:19] ; F0800F00 00870A05 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_20 ; BF880000 v_mul_f32_e32 v2, s8, v6 ; 10040C08 v_fract_f32_e32 v2, v2 ; 7E044102 v_mad_f32 v2, s8, v6, -v2 ; D2820002 840A0C08 v_add_f32_e32 v3, 0.5, v2 ; 060604F0 v_rcp_f32_e32 v10, s8 ; 7E145408 v_mul_f32_e32 v11, v10, v3 ; 1016070A v_mul_f32_e32 v3, s9, v5 ; 10060A09 v_fract_f32_e32 v3, v3 ; 7E064103 v_mad_f32 v3, s9, v5, -v3 ; D2820003 840E0A09 v_add_f32_e32 v14, 0.5, v3 ; 061C06F0 v_rcp_f32_e32 v15, s9 ; 7E1E5409 v_mul_f32_e32 v10, v15, v14 ; 10141D0F v_mov_b32_e32 v12, 0 ; 7E180280 image_sample_l v[14:15], 9, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[20:27], s[12:15] ; F0900900 00650E0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, 0x437f0000, v14 ; 10201CFF 437F0000 v_mov_b32_e32 v17, 0x477f0000 ; 7E2202FF 477F0000 v_mad_f32 v16, v17, v15, v16 ; D2820010 04421F11 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_fract_f32_e32 v17, v16 ; 7E224110 v_subrev_f32_e32 v16, v17, v16 ; 0A202111 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_mad_f32 v14, s0, v14, 0.5 ; D282000E 03C21C00 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_cmp_ge_f32_e64 s[10:11], v0, 0 ; D00C000A 00010100 v_cndmask_b32_e64 v15, 0, -1, s[10:11] ; D200080F 00298280 v_cmp_ne_i32_e64 s[10:11], v15, 0 ; D10A000A 0001010F v_cndmask_b32_e64 v14, v16, v14, s[10:11] ; D200000E 002A1D10 v_rcp_f32_e32 v15, s1 ; 7E1E5401 v_mad_f32 v15, s3, v15, 0.5 ; D282000F 03C21E03 v_fract_f32_e32 v16, v15 ; 7E20410F v_subrev_f32_e32 v15, v16, v15 ; 0A1E1F10 v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_cmp_ge_f32_e64 s[10:11], v16, 0 ; D00C000A 00010110 v_cndmask_b32_e64 v16, 0, -1, s[10:11] ; D2000010 00298280 v_cmp_ne_i32_e64 s[10:11], v16, 0 ; D10A000A 00010110 v_xor_b32_e32 v16, 0x80000000, v15 ; 3A201EFF 80000000 v_cndmask_b32_e64 v16, v16, v15, s[10:11] ; D2000010 002A1F10 v_rcp_f32_e32 v17, v16 ; 7E225510 v_mul_f32_e32 v17, v14, v17 ; 1022230E v_fract_f32_e32 v17, v17 ; 7E224111 v_mul_f32_e32 v18, v16, v17 ; 10242310 v_fract_f32_e32 v18, v18 ; 7E244112 v_mad_f32 v16, v17, v16, -v18 ; D2820010 844A2111 v_mad_f32 v3, s9, v5, -v3 ; D2820003 840E0A09 v_add_f32_e32 v3, v16, v3 ; 06060710 v_mul_f32_e32 v16, s1, v3 ; 10200601 v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v3, v3, s1, -v16 ; D2820003 84400303 v_add_f32_e32 v3, 0.5, v3 ; 060606F0 v_rcp_f32_e32 v16, s3 ; 7E205403 v_mul_f32_e32 v10, v16, v3 ; 10140710 v_rcp_f32_e32 v3, v15 ; 7E06550F v_mul_f32_e32 v15, v14, v3 ; 101E070E v_fract_f32_e32 v15, v15 ; 7E1E410F v_mad_f32 v3, v3, v14, -v15 ; D2820003 843E1D03 v_mad_f32 v2, s8, v6, -v2 ; D2820002 840A0C08 v_add_f32_e32 v2, v3, v2 ; 06040503 v_mul_f32_e32 v3, s1, v2 ; 10060401 v_fract_f32_e32 v3, v3 ; 7E064103 v_mad_f32 v2, v2, s1, -v3 ; D2820002 840C0302 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_rcp_f32_e32 v3, s2 ; 7E065402 v_mul_f32_e32 v11, v3, v2 ; 10160503 image_sample_l v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[28:35], s[16:19] ; F0900F00 00870A0A s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[6:7] ; 88FE067E v_mov_b32_e32 v14, v10 ; 7E1C030A v_mov_b32_e32 v15, v11 ; 7E1E030B v_mov_b32_e32 v16, v12 ; 7E20030C v_mov_b32_e32 v17, v13 ; 7E22030D s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E s_cbranch_execz BB0_12 ; BF880000 v_cmp_eq_i32_e64 s[6:7], v1, 0 ; D1040006 00010101 s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[28:35], s[16:19] ; F0800F00 00870E05 v_mov_b32_e32 v12, v7 ; 7E180307 v_mov_b32_e32 v11, v8 ; 7E160308 v_mov_b32_e32 v10, v9 ; 7E140309 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_waitcnt expcnt(0) ; BF8C070F s_xor_b64 exec, exec, s[6:7] ; 89FE067E s_cbranch_execz BB0_11 ; BF880000 v_mul_f32_e32 v1, s8, v6 ; 10020C08 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s8, v6, -v1 ; D2820001 84060C08 v_add_f32_e32 v2, 0.5, v1 ; 060402F0 v_rcp_f32_e32 v3, s8 ; 7E065408 v_mul_f32_e32 v11, v3, v2 ; 10160503 v_mul_f32_e32 v2, s9, v5 ; 10040A09 v_fract_f32_e32 v2, v2 ; 7E044102 v_mad_f32 v2, s9, v5, -v2 ; D2820002 840A0A09 v_add_f32_e32 v3, 0.5, v2 ; 060604F0 v_rcp_f32_e32 v14, s9 ; 7E1C5409 v_mul_f32_e32 v10, v14, v3 ; 1014070E v_mov_b32_e32 v12, 0 ; 7E180280 image_sample_l v[14:15], 9, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[20:27], s[12:15] ; F0900900 00650E0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, 0x437f0000, v14 ; 10061CFF 437F0000 v_mov_b32_e32 v16, 0x477f0000 ; 7E2002FF 477F0000 v_mad_f32 v3, v16, v15, v3 ; D2820003 040E1F10 v_add_f32_e32 v3, 0.5, v3 ; 060606F0 v_fract_f32_e32 v16, v3 ; 7E204103 v_subrev_f32_e32 v3, v16, v3 ; 0A060710 v_add_f32_e32 v3, 0.5, v3 ; 060606F0 v_mad_f32 v14, s0, v14, 0.5 ; D282000E 03C21C00 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_cmp_ge_f32_e64 s[10:11], v0, 0 ; D00C000A 00010100 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_ne_i32_e64 s[10:11], v0, 0 ; D10A000A 00010100 v_cndmask_b32_e64 v0, v3, v14, s[10:11] ; D2000000 182A1D03 v_rcp_f32_e32 v3, s1 ; 7E065401 v_mad_f32 v3, s3, v3, 0.5 ; D2820003 03C20603 v_fract_f32_e32 v14, v3 ; 7E1C4103 v_subrev_f32_e32 v3, v14, v3 ; 0A06070E v_mul_f32_e32 v14, v0, v3 ; 101C0700 v_cmp_ge_f32_e64 s[10:11], v14, 0 ; D00C000A 0001010E v_cndmask_b32_e64 v14, 0, -1, s[10:11] ; D200000E 00298280 v_cmp_ne_i32_e64 s[10:11], v14, 0 ; D10A000A 0001010E v_xor_b32_e32 v14, 0x80000000, v3 ; 3A1C06FF 80000000 v_cndmask_b32_e64 v14, v14, v3, s[10:11] ; D200000E 102A070E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v15, v0 ; 101E010F v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v2, s9, v5, -v2 ; D2820002 840A0A09 v_add_f32_e32 v2, v14, v2 ; 0604050E v_mul_f32_e32 v14, s1, v2 ; 101C0401 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v2, v2, s1, -v14 ; D2820002 84380302 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_rcp_f32_e32 v14, s3 ; 7E1C5403 v_mul_f32_e32 v10, v14, v2 ; 1014050E v_rcp_f32_e32 v2, v3 ; 7E045503 v_mul_f32_e32 v3, v0, v2 ; 10060500 v_fract_f32_e32 v3, v3 ; 7E064103 v_mad_f32 v0, v2, v0, -v3 ; D2820000 840E0102 v_mad_f32 v1, s8, v6, -v1 ; D2820001 84060C08 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v1, s1, v0 ; 10020001 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s1, -v1 ; D2820000 84040300 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s2 ; 7E025402 v_mul_f32_e32 v11, v1, v0 ; 10160101 image_sample_l v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[28:35], s[16:19] ; F0900F00 00870E0A v_mov_b32_e32 v12, v7 ; 7E180307 v_mov_b32_e32 v11, v8 ; 7E160308 v_mov_b32_e32 v10, v9 ; 7E140309 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[6:7] ; 88FE067E s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_pkrtz_f16_f32_e32 v0, v10, v11 ; 5E00170A v_mul_f32_e32 v1, v4, v17 ; 10022304 v_cvt_pkrtz_f16_f32_e32 v1, v12, v1 ; 5E02030C exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: MOV TEMP[1].w, IMM[0].zzzz 85: TXL TEMP[1], TEMP[1], SAMP[1], 2D 86: MOV TEMP[0], TEMP[1] 87: ELSE :0 88: MOV TEMP[1].xy, IN[1].xyyy 89: TEX TEMP[1], TEMP[1], SAMP[1], 2D 90: MOV TEMP[0], TEMP[1] 91: ENDIF 92: MUL TEMP[0], TEMP[0], IN[0] 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = bitcast float 0.000000e+00 to i32 %151 = insertelement <4 x i32> undef, i32 %148, i32 0 %152 = insertelement <4 x i32> %151, i32 %149, i32 1 %153 = insertelement <4 x i32> %152, i32 %150, i32 2 %154 = insertelement <4 x i32> %153, i32 undef, i32 3 %155 = bitcast <8 x i32> %35 to <32 x i8> %156 = bitcast <4 x i32> %37 to <16 x i8> %157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %154, <32 x i8> %155, <16 x i8> %156, i32 2) br label %ENDIF ELSE: ; preds = %main_body %158 = bitcast float %42 to i32 %159 = bitcast float %43 to i32 %160 = insertelement <2 x i32> undef, i32 %158, i32 0 %161 = insertelement <2 x i32> %160, i32 %159, i32 1 %162 = bitcast <8 x i32> %35 to <32 x i8> %163 = bitcast <4 x i32> %37 to <16 x i8> %164 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %161, <32 x i8> %162, <16 x i8> %163, i32 2) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %164, %ELSE ], [ %157, %IF ] %165 = extractelement <4 x float> %.sink, i32 0 %166 = extractelement <4 x float> %.sink, i32 1 %167 = extractelement <4 x float> %.sink, i32 2 %168 = extractelement <4 x float> %.sink, i32 3 %169 = fmul float %165, %38 %170 = fmul float %166, %39 %171 = fmul float %167, %40 %172 = fmul float %168, %41 %173 = call i32 @llvm.SI.packf16(float %169, float %170) %174 = bitcast i32 %173 to float %175 = call i32 @llvm.SI.packf16(float %171, float %172) %176 = bitcast i32 %175 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %174, float %176, float %174, float %176) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640806 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s24, s[0:3], 0xc ; C20C010C s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109 s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108 s_buffer_load_dword s27, s[0:3], 0x4 ; C20D8104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v7 ; 10000E19 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v8, s25 ; 7E105419 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mul_f32_e32 v1, s26, v6 ; 10020C1A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v12, 0.5, v1 ; 061802F0 v_rcp_f32_e32 v13, s26 ; 7E1A541A v_mul_f32_e32 v8, v13, v12 ; 1010190D v_mov_b32_e32 v10, 0 ; 7E140280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[12:13], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[32:39], s[28:31] ; F0900900 00E80C08 v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v13, v14, 0.5 ; D282000E 03C21D0D v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v12 ; 101E18FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v12, s27, v12, 0.5 ; D282000C 03C2181B v_fract_f32_e32 v13, v12 ; 7E1A410C v_subrev_f32_e32 v12, v13, v12 ; 0A18190D v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_mov_b32_e32 v13, 0x44800000 ; 7E1A02FF 44800000 v_subrev_f32_e32 v13, s27, v13 ; 0A1A1A1B v_cmp_ge_f32_e64 s[28:29], v13, 0 ; D00C001C 0001010D v_cndmask_b32_e64 v13, 0, -1, s[28:29] ; D200080D 00718280 v_cmp_ne_i32_e64 s[28:29], v13, 0 ; D10A001C 0001010D v_cndmask_b32_e64 v12, v14, v12, s[28:29] ; D200000C 1072190E v_rcp_f32_e32 v13, s8 ; 7E1A5408 v_mad_f32 v13, s24, v13, 0.5 ; D282000D 03C21A18 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v12, v13 ; 101C1B0C v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[28:29] ; D200000E 10721B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v12, v15 ; 101E1F0C v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v1, v1, s8, -v14 ; D2820001 84381101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_mul_f32_e32 v8, v14, v1 ; 1010030E v_rcp_f32_e32 v1, v13 ; 7E02550D v_mul_f32_e32 v13, v12, v1 ; 101A030C v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v1, v1, v12, -v13 ; D2820001 84361901 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v9, v1, v0 ; 10120101 image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[12:15] ; F0900F00 00640808 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v0, v2, v11 ; 10001702 v_mul_f32_e32 v1, v3, v10 ; 10021503 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_mul_f32_e32 v1, v4, v9 ; 10021304 v_mul_f32_e32 v2, v5, v8 ; 10041105 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV TEMP[1].xy, IN[1].xyxx 2: MOV TEMP[1].zw, IMM[0].xxyx 3: MOV OUT[1], TEMP[1] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 0, v0, 1.0 ; D2820004 03CA0080 v_add_f32_e32 v5, 0, v2 ; 060A0480 v_add_f32_e32 v6, 0, v1 ; 060C0280 v_add_f32_e32 v0, 0, v0 ; 06000080 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..3] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 17.0000, 3721.0000} IMM[1] FLT32 { 13.0000, 930.2500, 1860.5000, -0.5000} IMM[2] FLT32 { -0.0000, -1.0000, -6.0000, 6.0000} IMM[3] FLT32 { -2.0000, 3.0000, 0.5000, -1.0000} IMM[4] FLT32 { -1.0000, 1.0000, 0.0000, 0.5000} IMM[5] FLT32 { 1.0000, -1.0000, -2.0000, 2.0000} IMM[6] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, CONST[2].xxxx 1: MUL TEMP[0], TEMP[0].xxxx, CONST[0].yyxx 2: FRC TEMP[1], TEMP[0].yyww 3: MOV TEMP[2].w, TEMP[1].wwww 4: ADD TEMP[0], TEMP[0], -TEMP[1] 5: RCP TEMP[2].x, TEMP[0].yyyy 6: MUL TEMP[1].yz, TEMP[0].xyww, IN[0].xyxw 7: MOV TEMP[2].yz, TEMP[1].zyzz 8: FRC TEMP[1].xy, TEMP[2].zyzw 9: MOV TEMP[3].xy, TEMP[1].xyxx 10: MAD TEMP[4], IN[0].yyxx, TEMP[0], -TEMP[1].yyxx 11: RCP TEMP[0].x, TEMP[0].wwww 12: ADD TEMP[4], TEMP[4], IMM[0].xyxy 13: MUL TEMP[5], TEMP[4].xyxy, IMM[0].zzzz 14: MUL TEMP[2], TEMP[2].xxxx, TEMP[5] 15: FRC TEMP[6], TEMP[2] 16: MUL TEMP[2], TEMP[6], TEMP[6] 17: MAD TEMP[2], TEMP[2], IMM[0].wwww, TEMP[4].zzww 18: MUL TEMP[2], TEMP[2], IMM[1].xxxx 19: MUL TEMP[0], TEMP[0].xxxx, TEMP[2] 20: FRC TEMP[6], TEMP[0] 21: MUL TEMP[0], TEMP[6], TEMP[6] 22: MUL TEMP[2], TEMP[0], IMM[1].yyyy 23: MUL TEMP[0], TEMP[0], IMM[1].zzzz 24: FRC TEMP[6], TEMP[0] 25: ADD TEMP[0], TEMP[6], IMM[1].wwww 26: FRC TEMP[6], TEMP[2] 27: ADD TEMP[2], TEMP[6], IMM[1].wwww 28: MOV TEMP[6], -TEMP[2] 29: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 30: UIF TEMP[7].xxxx :0 31: MOV TEMP[7].x, IMM[0].xxxx 32: ELSE :0 33: MOV TEMP[7].x, IMM[0].yyyy 34: ENDIF 35: MOV TEMP[7].x, TEMP[7].xxxx 36: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].xxxx 37: UIF TEMP[8].xxxx :0 38: MOV TEMP[8].x, IMM[0].xxxx 39: ELSE :0 40: MOV TEMP[8].x, IMM[0].yyyy 41: ENDIF 42: MOV TEMP[7].y, TEMP[8].xxxx 43: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].xxxx 44: UIF TEMP[8].xxxx :0 45: MOV TEMP[8].x, IMM[0].xxxx 46: ELSE :0 47: MOV TEMP[8].x, IMM[0].yyyy 48: ENDIF 49: MOV TEMP[7].z, TEMP[8].xxxx 50: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].xxxx 51: UIF TEMP[6].xxxx :0 52: MOV TEMP[6].x, IMM[0].xxxx 53: ELSE :0 54: MOV TEMP[6].x, IMM[0].yyyy 55: ENDIF 56: MOV TEMP[7].w, TEMP[6].xxxx 57: FSGE TEMP[6].x, TEMP[2].xxxx, IMM[0].xxxx 58: UIF TEMP[6].xxxx :0 59: MOV TEMP[6].x, IMM[2].xxxx 60: ELSE :0 61: MOV TEMP[6].x, IMM[2].yyyy 62: ENDIF 63: MOV TEMP[6].x, TEMP[6].xxxx 64: FSGE TEMP[8].x, TEMP[2].yyyy, IMM[0].xxxx 65: UIF TEMP[8].xxxx :0 66: MOV TEMP[8].x, IMM[2].xxxx 67: ELSE :0 68: MOV TEMP[8].x, IMM[2].yyyy 69: ENDIF 70: MOV TEMP[6].y, TEMP[8].xxxx 71: FSGE TEMP[8].x, TEMP[2].zzzz, IMM[0].xxxx 72: UIF TEMP[8].xxxx :0 73: MOV TEMP[8].x, IMM[2].xxxx 74: ELSE :0 75: MOV TEMP[8].x, IMM[2].yyyy 76: ENDIF 77: MOV TEMP[6].z, TEMP[8].xxxx 78: FSGE TEMP[8].x, TEMP[2].wwww, IMM[0].xxxx 79: UIF TEMP[8].xxxx :0 80: MOV TEMP[8].x, IMM[2].xxxx 81: ELSE :0 82: MOV TEMP[8].x, IMM[2].yyyy 83: ENDIF 84: MOV TEMP[6].w, TEMP[8].xxxx 85: ADD TEMP[2], TEMP[6], TEMP[7] 86: ADD TEMP[4], TEMP[1].xxyy, IMM[2].xyxy 87: MUL TEMP[5], TEMP[2], TEMP[4].zwzw 88: MOV TEMP[6], -TEMP[0] 89: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 90: UIF TEMP[7].xxxx :0 91: MOV TEMP[7].x, IMM[0].xxxx 92: ELSE :0 93: MOV TEMP[7].x, IMM[0].yyyy 94: ENDIF 95: MOV TEMP[7].x, TEMP[7].xxxx 96: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].xxxx 97: UIF TEMP[8].xxxx :0 98: MOV TEMP[8].x, IMM[0].xxxx 99: ELSE :0 100: MOV TEMP[8].x, IMM[0].yyyy 101: ENDIF 102: MOV TEMP[7].y, TEMP[8].xxxx 103: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].xxxx 104: UIF TEMP[8].xxxx :0 105: MOV TEMP[8].x, IMM[0].xxxx 106: ELSE :0 107: MOV TEMP[8].x, IMM[0].yyyy 108: ENDIF 109: MOV TEMP[7].z, TEMP[8].xxxx 110: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].xxxx 111: UIF TEMP[6].xxxx :0 112: MOV TEMP[6].x, IMM[0].xxxx 113: ELSE :0 114: MOV TEMP[6].x, IMM[0].yyyy 115: ENDIF 116: MOV TEMP[7].w, TEMP[6].xxxx 117: FSGE TEMP[6].x, TEMP[0].xxxx, IMM[0].xxxx 118: UIF TEMP[6].xxxx :0 119: MOV TEMP[6].x, IMM[2].xxxx 120: ELSE :0 121: MOV TEMP[6].x, IMM[2].yyyy 122: ENDIF 123: MOV TEMP[6].x, TEMP[6].xxxx 124: FSGE TEMP[8].x, TEMP[0].yyyy, IMM[0].xxxx 125: UIF TEMP[8].xxxx :0 126: MOV TEMP[8].x, IMM[2].xxxx 127: ELSE :0 128: MOV TEMP[8].x, IMM[2].yyyy 129: ENDIF 130: MOV TEMP[6].y, TEMP[8].xxxx 131: FSGE TEMP[8].x, TEMP[0].zzzz, IMM[0].xxxx 132: UIF TEMP[8].xxxx :0 133: MOV TEMP[8].x, IMM[2].xxxx 134: ELSE :0 135: MOV TEMP[8].x, IMM[2].yyyy 136: ENDIF 137: MOV TEMP[6].z, TEMP[8].xxxx 138: FSGE TEMP[8].x, TEMP[0].wwww, IMM[0].xxxx 139: UIF TEMP[8].xxxx :0 140: MOV TEMP[8].x, IMM[2].xxxx 141: ELSE :0 142: MOV TEMP[8].x, IMM[2].yyyy 143: ENDIF 144: MOV TEMP[6].w, TEMP[8].xxxx 145: ADD TEMP[0], TEMP[6], TEMP[7] 146: MAD TEMP[4], TEMP[4].xxyy, TEMP[0], TEMP[5] 147: ADD TEMP[4], -TEMP[4].xyxz, TEMP[4].zwyw 148: MAD TEMP[6].zw, TEMP[1].xyxy, IMM[2].zzzz, IMM[2].wwww 149: MOV TEMP[3].zw, TEMP[6].wwzw 150: MUL TEMP[5], TEMP[1].xyxy, TEMP[3] 151: MAD TEMP[3], TEMP[1].xyxx, IMM[3].xxxx, IMM[3].yyyy 152: MUL TEMP[3], TEMP[3], TEMP[5].xyxx 153: LRP TEMP[6].xy, TEMP[3].wwww, TEMP[0].zwzw, TEMP[0] 154: MOV TEMP[1].xy, TEMP[6].xyxx 155: LRP TEMP[6].zw, TEMP[3].yyyy, TEMP[2].xyyw, TEMP[2].xyxz 156: MOV TEMP[1].zw, TEMP[6].wwzw 157: MAD TEMP[0], TEMP[4], TEMP[5].zzww, TEMP[1] 158: LRP TEMP[1], TEMP[3], TEMP[0].wyww, TEMP[0].zxzz 159: MOV TEMP[2].z, TEMP[1].zyzz 160: MUL TEMP[0], TEMP[1], CONST[1].xxxx 161: MUL TEMP[0], TEMP[0], IMM[3].zzzz 162: MAD TEMP[0], TEMP[0], IMM[4].xyzz, IMM[4].wwzy 163: MOV TEMP[2].w, IMM[0].xxxx 164: RCP TEMP[2].x, CONST[0].xxxx 165: ADD TEMP[1].xy, TEMP[2].xwzw, IN[0] 166: MOV TEMP[1].xy, TEMP[1].xyyy 167: TEX TEMP[1].xw, TEMP[1], SAMP[0], 2D 168: MOV TEMP[3].x, TEMP[1].xxxw 169: MOV TEMP[2].y, -TEMP[2].xxxx 170: MOV TEMP[4].yw, TEMP[2].xyxx 171: RCP TEMP[4].x, CONST[0].yyyy 172: ADD TEMP[1].xy, TEMP[4].yxzw, IN[0] 173: ADD TEMP[6].yw, TEMP[4].xwzx, IN[0].xxzy 174: MOV TEMP[6].xy, TEMP[6].ywww 175: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 176: MOV TEMP[3].z, TEMP[6].xxxx 177: MOV TEMP[1].xy, TEMP[1].xyyy 178: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 179: MOV TEMP[3].y, TEMP[1].xxxx 180: DP3 TEMP[1].x, TEMP[3].yzxx, IMM[5].xyzz 181: ADD TEMP[6].yw, TEMP[2], IN[0].xxzy 182: MOV TEMP[6].xy, TEMP[6].ywww 183: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 184: MOV TEMP[5].z, TEMP[6].xxxx 185: MOV TEMP[2].z, -TEMP[4].xxxx 186: ADD TEMP[6].yw, TEMP[2].xxzz, IN[0].xxzy 187: MOV TEMP[4].yw, TEMP[6].wyww 188: MOV TEMP[6].xy, TEMP[6].ywww 189: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 190: MOV TEMP[5].y, TEMP[6].xxxx 191: ADD TEMP[6].xy, TEMP[2].yzzw, IN[0] 192: MOV TEMP[2].xy, TEMP[6].xyxx 193: ADD TEMP[7].zw, TEMP[2].xywz, IN[0].xyxy 194: MOV TEMP[7].xy, TEMP[7].zwww 195: TEX TEMP[7].x, TEMP[7], SAMP[0], 2D 196: MOV TEMP[5].w, TEMP[7].xxxx 197: MOV TEMP[6].xy, TEMP[6].xyyy 198: TEX TEMP[6].xw, TEMP[6], SAMP[0], 2D 199: MOV TEMP[2].w, TEMP[6].wwww 200: MOV TEMP[5].x, TEMP[6].xxxx 201: DP3 TEMP[6].x, TEMP[5].xyww, IMM[5].xxww 202: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[5].xyww 203: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[7].xxxx 204: MOV TEMP[5].x, TEMP[1].xxxx 205: MOV TEMP[4].z, IMM[0].xxxx 206: ADD TEMP[1].xz, TEMP[4].zyxw, IN[0].xyyw 207: MOV TEMP[1].xy, TEMP[1].xzzz 208: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 209: MOV TEMP[3].w, TEMP[1].xxxx 210: DP3 TEMP[1].x, TEMP[3].yzww, IMM[3].wwxx 211: ADD TEMP[1].y, TEMP[1].xxxx, TEMP[6].xxxx 212: MOV TEMP[5].y, TEMP[1].yyyy 213: MUL TEMP[1].xy, TEMP[5], CONST[3].xxxx 214: MOV TEMP[2].xy, TEMP[1].xyxx 215: MUL TEMP[4].xyz, TEMP[2].yxyw, IMM[3].zzzz 216: DP2 TEMP[2].x, TEMP[1].xyyy, TEMP[1].xyyy 217: MAX TEMP[1].x, TEMP[2].xxxx, IMM[6].xxxx 218: RSQ TEMP[5].x, TEMP[1].xxxx 219: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx 220: CMP TEMP[2].x, -TEMP[1].xxxx, TEMP[5].xxxx, IMM[0].xxxx 221: MAD TEMP[1].yzw, TEMP[4].xxyz, IMM[4].xxyz, IMM[4].xwwz 222: MOV TEMP[2].yzw, TEMP[1].zyzw 223: ADD TEMP[3], -TEMP[0], TEMP[2].yzwx 224: MOV_SAT TEMP[1].x, TEMP[2].xxxx 225: MAD TEMP[0], TEMP[1].xxxx, TEMP[3], TEMP[0] 226: MOV OUT[0], TEMP[0] 227: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %30 = load <8 x i32> addrspace(2)* %29, !tbaa !0 %31 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %32 = load <4 x i32> addrspace(2)* %31, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fdiv float 1.000000e+00, %27 %36 = fmul float %35, %25 %37 = fmul float %35, %25 %38 = fmul float %35, %24 %39 = fmul float %35, %24 %40 = call float @llvm.AMDIL.fraction.(float %37) %41 = call float @llvm.AMDIL.fraction.(float %37) %42 = call float @llvm.AMDIL.fraction.(float %39) %43 = call float @llvm.AMDIL.fraction.(float %39) %44 = fsub float -0.000000e+00, %40 %45 = fadd float %36, %44 %46 = fsub float -0.000000e+00, %41 %47 = fadd float %37, %46 %48 = fsub float -0.000000e+00, %42 %49 = fadd float %38, %48 %50 = fsub float -0.000000e+00, %43 %51 = fadd float %39, %50 %52 = fdiv float 1.000000e+00, %47 %53 = fmul float %47, %34 %54 = fmul float %51, %33 %55 = call float @llvm.AMDIL.fraction.(float %54) %56 = call float @llvm.AMDIL.fraction.(float %53) %57 = fsub float -0.000000e+00, %56 %58 = fmul float %34, %45 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %34, %47 %62 = fadd float %61, %60 %63 = fsub float -0.000000e+00, %55 %64 = fmul float %33, %49 %65 = fadd float %64, %63 %66 = fsub float -0.000000e+00, %55 %67 = fmul float %33, %51 %68 = fadd float %67, %66 %69 = fdiv float 1.000000e+00, %51 %70 = fadd float %59, 0.000000e+00 %71 = fadd float %62, 1.000000e+00 %72 = fadd float %65, 0.000000e+00 %73 = fadd float %68, 1.000000e+00 %74 = fmul float %70, 1.700000e+01 %75 = fmul float %71, 1.700000e+01 %76 = fmul float %70, 1.700000e+01 %77 = fmul float %71, 1.700000e+01 %78 = fmul float %52, %74 %79 = fmul float %52, %75 %80 = fmul float %52, %76 %81 = fmul float %52, %77 %82 = call float @llvm.AMDIL.fraction.(float %78) %83 = call float @llvm.AMDIL.fraction.(float %79) %84 = call float @llvm.AMDIL.fraction.(float %80) %85 = call float @llvm.AMDIL.fraction.(float %81) %86 = fmul float %82, %82 %87 = fmul float %83, %83 %88 = fmul float %84, %84 %89 = fmul float %85, %85 %90 = fmul float %86, 3.721000e+03 %91 = fadd float %90, %72 %92 = fmul float %87, 3.721000e+03 %93 = fadd float %92, %72 %94 = fmul float %88, 3.721000e+03 %95 = fadd float %94, %73 %96 = fmul float %89, 3.721000e+03 %97 = fadd float %96, %73 %98 = fmul float %91, 1.300000e+01 %99 = fmul float %93, 1.300000e+01 %100 = fmul float %95, 1.300000e+01 %101 = fmul float %97, 1.300000e+01 %102 = fmul float %69, %98 %103 = fmul float %69, %99 %104 = fmul float %69, %100 %105 = fmul float %69, %101 %106 = call float @llvm.AMDIL.fraction.(float %102) %107 = call float @llvm.AMDIL.fraction.(float %103) %108 = call float @llvm.AMDIL.fraction.(float %104) %109 = call float @llvm.AMDIL.fraction.(float %105) %110 = fmul float %106, %106 %111 = fmul float %107, %107 %112 = fmul float %108, %108 %113 = fmul float %109, %109 %114 = fmul float %110, 9.302500e+02 %115 = fmul float %111, 9.302500e+02 %116 = fmul float %112, 9.302500e+02 %117 = fmul float %113, 9.302500e+02 %118 = fmul float %110, 1.860500e+03 %119 = fmul float %111, 1.860500e+03 %120 = fmul float %112, 1.860500e+03 %121 = fmul float %113, 1.860500e+03 %122 = call float @llvm.AMDIL.fraction.(float %118) %123 = call float @llvm.AMDIL.fraction.(float %119) %124 = call float @llvm.AMDIL.fraction.(float %120) %125 = call float @llvm.AMDIL.fraction.(float %121) %126 = fadd float %122, -5.000000e-01 %127 = fadd float %123, -5.000000e-01 %128 = fadd float %124, -5.000000e-01 %129 = fadd float %125, -5.000000e-01 %130 = call float @llvm.AMDIL.fraction.(float %114) %131 = call float @llvm.AMDIL.fraction.(float %115) %132 = call float @llvm.AMDIL.fraction.(float %116) %133 = call float @llvm.AMDIL.fraction.(float %117) %134 = fadd float %130, -5.000000e-01 %135 = fadd float %131, -5.000000e-01 %136 = fadd float %132, -5.000000e-01 %137 = fadd float %133, -5.000000e-01 %138 = fsub float -0.000000e+00, %134 %139 = fsub float -0.000000e+00, %135 %140 = fsub float -0.000000e+00, %136 %141 = fsub float -0.000000e+00, %137 %142 = fcmp oge float %138, 0.000000e+00 %143 = sext i1 %142 to i32 %144 = bitcast i32 %143 to float %145 = bitcast float %144 to i32 %146 = icmp ne i32 %145, 0 %. = select i1 %146, float 0.000000e+00, float 1.000000e+00 %147 = fcmp oge float %139, 0.000000e+00 %148 = sext i1 %147 to i32 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = icmp ne i32 %150, 0 %temp32.0 = select i1 %151, float 0.000000e+00, float 1.000000e+00 %152 = fcmp oge float %140, 0.000000e+00 %153 = sext i1 %152 to i32 %154 = bitcast i32 %153 to float %155 = bitcast float %154 to i32 %156 = icmp ne i32 %155, 0 %.81 = select i1 %156, float 0.000000e+00, float 1.000000e+00 %157 = fcmp oge float %141, 0.000000e+00 %158 = sext i1 %157 to i32 %159 = bitcast i32 %158 to float %160 = bitcast float %159 to i32 %161 = icmp ne i32 %160, 0 %temp24.0 = select i1 %161, float 0.000000e+00, float 1.000000e+00 %162 = fcmp oge float %134, 0.000000e+00 %163 = sext i1 %162 to i32 %164 = bitcast i32 %163 to float %165 = bitcast float %164 to i32 %166 = icmp ne i32 %165, 0 %.82 = select i1 %166, float -0.000000e+00, float -1.000000e+00 %167 = fcmp oge float %135, 0.000000e+00 %168 = sext i1 %167 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = icmp ne i32 %170, 0 %temp32.2 = select i1 %171, float -0.000000e+00, float -1.000000e+00 %172 = fcmp oge float %136, 0.000000e+00 %173 = sext i1 %172 to i32 %174 = bitcast i32 %173 to float %175 = bitcast float %174 to i32 %176 = icmp ne i32 %175, 0 %.83 = select i1 %176, float -0.000000e+00, float -1.000000e+00 %177 = fcmp oge float %137, 0.000000e+00 %178 = sext i1 %177 to i32 %179 = bitcast i32 %178 to float %180 = bitcast float %179 to i32 %181 = icmp ne i32 %180, 0 %temp32.4 = select i1 %181, float -0.000000e+00, float -1.000000e+00 %182 = fadd float %.82, %. %183 = fadd float %temp32.2, %temp32.0 %184 = fadd float %.83, %.81 %185 = fadd float %temp32.4, %temp24.0 %186 = fadd float %55, -0.000000e+00 %187 = fadd float %55, -1.000000e+00 %188 = fadd float %56, -0.000000e+00 %189 = fadd float %56, -1.000000e+00 %190 = fmul float %182, %188 %191 = fmul float %183, %189 %192 = fmul float %184, %188 %193 = fmul float %185, %189 %194 = fsub float -0.000000e+00, %126 %195 = fsub float -0.000000e+00, %127 %196 = fsub float -0.000000e+00, %128 %197 = fsub float -0.000000e+00, %129 %198 = fcmp oge float %194, 0.000000e+00 %199 = sext i1 %198 to i32 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = icmp ne i32 %201, 0 %.84 = select i1 %202, float 0.000000e+00, float 1.000000e+00 %203 = fcmp oge float %195, 0.000000e+00 %204 = sext i1 %203 to i32 %205 = bitcast i32 %204 to float %206 = bitcast float %205 to i32 %207 = icmp ne i32 %206, 0 %temp32.5 = select i1 %207, float 0.000000e+00, float 1.000000e+00 %208 = fcmp oge float %196, 0.000000e+00 %209 = sext i1 %208 to i32 %210 = bitcast i32 %209 to float %211 = bitcast float %210 to i32 %212 = icmp ne i32 %211, 0 %.85 = select i1 %212, float 0.000000e+00, float 1.000000e+00 %213 = fcmp oge float %197, 0.000000e+00 %214 = sext i1 %213 to i32 %215 = bitcast i32 %214 to float %216 = bitcast float %215 to i32 %217 = icmp ne i32 %216, 0 %temp24.2 = select i1 %217, float 0.000000e+00, float 1.000000e+00 %218 = fcmp oge float %126, 0.000000e+00 %219 = sext i1 %218 to i32 %220 = bitcast i32 %219 to float %221 = bitcast float %220 to i32 %222 = icmp ne i32 %221, 0 %.86 = select i1 %222, float -0.000000e+00, float -1.000000e+00 %223 = fcmp oge float %127, 0.000000e+00 %224 = sext i1 %223 to i32 %225 = bitcast i32 %224 to float %226 = bitcast float %225 to i32 %227 = icmp ne i32 %226, 0 %temp32.7 = select i1 %227, float -0.000000e+00, float -1.000000e+00 %228 = fcmp oge float %128, 0.000000e+00 %229 = sext i1 %228 to i32 %230 = bitcast i32 %229 to float %231 = bitcast float %230 to i32 %232 = icmp ne i32 %231, 0 %.87 = select i1 %232, float -0.000000e+00, float -1.000000e+00 %233 = fcmp oge float %129, 0.000000e+00 %234 = sext i1 %233 to i32 %235 = bitcast i32 %234 to float %236 = bitcast float %235 to i32 %237 = icmp ne i32 %236, 0 %temp32.9 = select i1 %237, float -0.000000e+00, float -1.000000e+00 %238 = fadd float %.86, %.84 %239 = fadd float %temp32.7, %temp32.5 %240 = fadd float %.87, %.85 %241 = fadd float %temp32.9, %temp24.2 %242 = fmul float %186, %238 %243 = fadd float %242, %190 %244 = fmul float %186, %239 %245 = fadd float %244, %191 %246 = fmul float %187, %240 %247 = fadd float %246, %192 %248 = fmul float %187, %241 %249 = fadd float %248, %193 %250 = fsub float -0.000000e+00, %243 %251 = fadd float %250, %247 %252 = fsub float -0.000000e+00, %245 %253 = fadd float %252, %249 %254 = fsub float -0.000000e+00, %243 %255 = fadd float %254, %245 %256 = fsub float -0.000000e+00, %247 %257 = fadd float %256, %249 %258 = fmul float %55, -6.000000e+00 %259 = fadd float %258, 6.000000e+00 %260 = fmul float %56, -6.000000e+00 %261 = fadd float %260, 6.000000e+00 %262 = fmul float %55, %55 %263 = fmul float %56, %56 %264 = fmul float %55, %259 %265 = fmul float %56, %261 %266 = fmul float %55, -2.000000e+00 %267 = fadd float %266, 3.000000e+00 %268 = fmul float %56, -2.000000e+00 %269 = fadd float %268, 3.000000e+00 %270 = fmul float %55, -2.000000e+00 %271 = fadd float %270, 3.000000e+00 %272 = fmul float %55, -2.000000e+00 %273 = fadd float %272, 3.000000e+00 %274 = fmul float %267, %262 %275 = fmul float %269, %263 %276 = fmul float %271, %262 %277 = fmul float %273, %262 %278 = call float @llvm.AMDGPU.lrp(float %277, float %240, float %238) %279 = call float @llvm.AMDGPU.lrp(float %277, float %241, float %239) %280 = call float @llvm.AMDGPU.lrp(float %275, float %183, float %182) %281 = call float @llvm.AMDGPU.lrp(float %275, float %185, float %184) %282 = fmul float %251, %264 %283 = fadd float %282, %278 %284 = fmul float %253, %264 %285 = fadd float %284, %279 %286 = fmul float %255, %265 %287 = fadd float %286, %280 %288 = fmul float %257, %265 %289 = fadd float %288, %281 %290 = call float @llvm.AMDGPU.lrp(float %274, float %289, float %287) %291 = call float @llvm.AMDGPU.lrp(float %275, float %285, float %283) %292 = call float @llvm.AMDGPU.lrp(float %276, float %289, float %287) %293 = call float @llvm.AMDGPU.lrp(float %277, float %289, float %287) %294 = fmul float %290, %26 %295 = fmul float %291, %26 %296 = fmul float %292, %26 %297 = fmul float %293, %26 %298 = fmul float %294, 5.000000e-01 %299 = fmul float %295, 5.000000e-01 %300 = fmul float %296, 5.000000e-01 %301 = fmul float %297, 5.000000e-01 %302 = fmul float %298, -1.000000e+00 %303 = fadd float %302, 5.000000e-01 %304 = fmul float %299, 1.000000e+00 %305 = fadd float %304, 5.000000e-01 %306 = fmul float %300, 0.000000e+00 %307 = fadd float %306, 0.000000e+00 %308 = fmul float %301, 0.000000e+00 %309 = fadd float %308, 1.000000e+00 %310 = fdiv float 1.000000e+00, %24 %311 = fadd float %310, %33 %312 = fadd float 0.000000e+00, %34 %313 = bitcast float %311 to i32 %314 = bitcast float %312 to i32 %315 = insertelement <2 x i32> undef, i32 %313, i32 0 %316 = insertelement <2 x i32> %315, i32 %314, i32 1 %317 = bitcast <8 x i32> %30 to <32 x i8> %318 = bitcast <4 x i32> %32 to <16 x i8> %319 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %316, <32 x i8> %317, <16 x i8> %318, i32 2) %320 = extractelement <4 x float> %319, i32 0 %321 = fsub float -0.000000e+00, %310 %322 = fdiv float 1.000000e+00, %25 %323 = fadd float %321, %33 %324 = fadd float %322, %34 %325 = fadd float %310, %33 %326 = fadd float %322, %34 %327 = bitcast float %325 to i32 %328 = bitcast float %326 to i32 %329 = insertelement <2 x i32> undef, i32 %327, i32 0 %330 = insertelement <2 x i32> %329, i32 %328, i32 1 %331 = bitcast <8 x i32> %30 to <32 x i8> %332 = bitcast <4 x i32> %32 to <16 x i8> %333 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %330, <32 x i8> %331, <16 x i8> %332, i32 2) %334 = extractelement <4 x float> %333, i32 0 %335 = bitcast float %323 to i32 %336 = bitcast float %324 to i32 %337 = insertelement <2 x i32> undef, i32 %335, i32 0 %338 = insertelement <2 x i32> %337, i32 %336, i32 1 %339 = bitcast <8 x i32> %30 to <32 x i8> %340 = bitcast <4 x i32> %32 to <16 x i8> %341 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %338, <32 x i8> %339, <16 x i8> %340, i32 2) %342 = extractelement <4 x float> %341, i32 0 %343 = fmul float %342, 1.000000e+00 %344 = fmul float %334, -1.000000e+00 %345 = fadd float %344, %343 %346 = fmul float %320, -2.000000e+00 %347 = fadd float %345, %346 %348 = fadd float %321, %33 %349 = fadd float 0.000000e+00, %34 %350 = bitcast float %348 to i32 %351 = bitcast float %349 to i32 %352 = insertelement <2 x i32> undef, i32 %350, i32 0 %353 = insertelement <2 x i32> %352, i32 %351, i32 1 %354 = bitcast <8 x i32> %30 to <32 x i8> %355 = bitcast <4 x i32> %32 to <16 x i8> %356 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %353, <32 x i8> %354, <16 x i8> %355, i32 2) %357 = extractelement <4 x float> %356, i32 0 %358 = fsub float -0.000000e+00, %322 %359 = fadd float %310, %33 %360 = fadd float %358, %34 %361 = bitcast float %359 to i32 %362 = bitcast float %360 to i32 %363 = insertelement <2 x i32> undef, i32 %361, i32 0 %364 = insertelement <2 x i32> %363, i32 %362, i32 1 %365 = bitcast <8 x i32> %30 to <32 x i8> %366 = bitcast <4 x i32> %32 to <16 x i8> %367 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %364, <32 x i8> %365, <16 x i8> %366, i32 2) %368 = extractelement <4 x float> %367, i32 0 %369 = fadd float %321, %33 %370 = fadd float %358, %34 %371 = fadd float 0.000000e+00, %33 %372 = fadd float %358, %34 %373 = bitcast float %371 to i32 %374 = bitcast float %372 to i32 %375 = insertelement <2 x i32> undef, i32 %373, i32 0 %376 = insertelement <2 x i32> %375, i32 %374, i32 1 %377 = bitcast <8 x i32> %30 to <32 x i8> %378 = bitcast <4 x i32> %32 to <16 x i8> %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %376, <32 x i8> %377, <16 x i8> %378, i32 2) %380 = extractelement <4 x float> %379, i32 0 %381 = bitcast float %369 to i32 %382 = bitcast float %370 to i32 %383 = insertelement <2 x i32> undef, i32 %381, i32 0 %384 = insertelement <2 x i32> %383, i32 %382, i32 1 %385 = bitcast <8 x i32> %30 to <32 x i8> %386 = bitcast <4 x i32> %32 to <16 x i8> %387 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %384, <32 x i8> %385, <16 x i8> %386, i32 2) %388 = extractelement <4 x float> %387, i32 0 %389 = fmul float %388, 1.000000e+00 %390 = fmul float %368, 1.000000e+00 %391 = fadd float %390, %389 %392 = fmul float %380, 2.000000e+00 %393 = fadd float %391, %392 %394 = fmul float %388, 1.000000e+00 %395 = fmul float %368, -1.000000e+00 %396 = fadd float %395, %394 %397 = fmul float %357, 2.000000e+00 %398 = fadd float %396, %397 %399 = fadd float %347, %398 %400 = fadd float 0.000000e+00, %33 %401 = fadd float %322, %34 %402 = bitcast float %400 to i32 %403 = bitcast float %401 to i32 %404 = insertelement <2 x i32> undef, i32 %402, i32 0 %405 = insertelement <2 x i32> %404, i32 %403, i32 1 %406 = bitcast <8 x i32> %30 to <32 x i8> %407 = bitcast <4 x i32> %32 to <16 x i8> %408 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %405, <32 x i8> %406, <16 x i8> %407, i32 2) %409 = extractelement <4 x float> %408, i32 0 %410 = fmul float %342, -1.000000e+00 %411 = fmul float %334, -1.000000e+00 %412 = fadd float %411, %410 %413 = fmul float %409, -2.000000e+00 %414 = fadd float %412, %413 %415 = fadd float %414, %393 %416 = fmul float %399, %28 %417 = fmul float %415, %28 %418 = fmul float %417, 5.000000e-01 %419 = fmul float %416, 5.000000e-01 %420 = fmul float %417, 5.000000e-01 %421 = fmul float %416, %416 %422 = fmul float %417, %417 %423 = fadd float %421, %422 %424 = call float @llvm.maxnum.f32(float %423, float 0x3E7AD7F2A0000000) %425 = call float @llvm.AMDGPU.rsq.clamped.f32(float %424) %426 = fmul float %425, %424 %427 = fsub float -0.000000e+00, %424 %428 = call float @llvm.AMDGPU.cndlt(float %427, float %426, float 0.000000e+00) %429 = fmul float %418, -1.000000e+00 %430 = fadd float %429, 5.000000e-01 %431 = fmul float %419, 1.000000e+00 %432 = fadd float %431, 5.000000e-01 %433 = fmul float %420, 0.000000e+00 %434 = fadd float %433, 0.000000e+00 %435 = fsub float -0.000000e+00, %303 %436 = fadd float %435, %430 %437 = fsub float -0.000000e+00, %305 %438 = fadd float %437, %432 %439 = fsub float -0.000000e+00, %307 %440 = fadd float %439, %434 %441 = fsub float -0.000000e+00, %309 %442 = fadd float %441, %428 %443 = call float @llvm.AMDIL.clamp.(float %428, float 0.000000e+00, float 1.000000e+00) %444 = fmul float %443, %436 %445 = fadd float %444, %303 %446 = fmul float %443, %438 %447 = fadd float %446, %305 %448 = fmul float %443, %440 %449 = fadd float %448, %307 %450 = fmul float %443, %442 %451 = fadd float %450, %309 %452 = call i32 @llvm.SI.packf16(float %445, float %447) %453 = bitcast i32 %452 to float %454 = call i32 @llvm.SI.packf16(float %449, float %451) %455 = bitcast i32 %454 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %453, float %455, float %453, float %455) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x8 ; C2048108 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v3, s9 ; 7E065409 v_mul_f32_e32 v4, s8, v3 ; 10080608 v_fract_f32_e32 v4, v4 ; 7E084104 v_mad_f32 v4, v3, s8, -v4 ; D2820004 84101103 v_mul_f32_e32 v5, v2, v4 ; 100A0902 v_fract_f32_e32 v5, v5 ; 7E0A4105 v_mad_f32 v6, v4, v2, -v5 ; D2820006 84160504 v_add_f32_e32 v7, 0, v6 ; 060E0C80 v_interp_p1_f32 v8, v0, 1, 0, [m0] ; C8200100 v_interp_p2_f32 v8, [v8], v1, 1, 0, [m0] ; C8210101 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s9, v3 ; 10000609 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, v3, s9, -v0 ; D2820000 84001303 v_mul_f32_e32 v1, v8, v0 ; 10020108 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v3, v0, v8, -v1 ; D2820003 84061100 v_add_f32_e32 v9, 1.0, v3 ; 061206F2 v_mul_f32_e32 v9, 0x41880000, v9 ; 101212FF 41880000 v_rcp_f32_e32 v0, v0 ; 7E005500 v_mul_f32_e32 v9, v9, v0 ; 10120109 v_fract_f32_e32 v9, v9 ; 7E124109 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mov_b32_e32 v10, 0x45689000 ; 7E1402FF 45689000 v_mad_f32 v11, v9, v10, v7 ; D282000B 041E1509 v_mul_f32_e32 v11, 0x41500000, v11 ; 101616FF 41500000 v_rcp_f32_e32 v4, v4 ; 7E085504 v_mul_f32_e32 v11, v11, v4 ; 1016090B v_fract_f32_e32 v11, v11 ; 7E16410B v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mul_f32_e32 v12, 0x44e89000, v11 ; 101816FF 44E89000 v_fract_f32_e32 v12, v12 ; 7E18410C v_add_f32_e32 v12, -0.5, v12 ; 061818F1 v_cmp_ge_f32_e64 s[10:11], -v12, 0 ; D00C000A 2001010C v_cndmask_b32_e64 v13, 0, -1, s[10:11] ; D200080D 00298280 v_cmp_ne_i32_e64 s[10:11], v13, 0 ; D10A000A 0001010D v_cndmask_b32_e64 v13, 1.0, 0, s[10:11] ; D200080D 102900F2 v_cmp_ge_f32_e64 s[10:11], v12, 0 ; D00C000A 0001010C v_cndmask_b32_e64 v12, 0, -1, s[10:11] ; D200000C 00298280 v_cmp_ne_i32_e64 s[10:11], v12, 0 ; D10A000A 0001010C v_mov_b32_e32 v12, 0x80000000 ; 7E1802FF 80000000 v_cndmask_b32_e64 v14, -1.0, v12, s[10:11] ; D200000E 182A18F3 v_add_f32_e32 v13, v13, v14 ; 061A1D0D v_mul_f32_e32 v11, 0x44689000, v11 ; 101616FF 44689000 v_fract_f32_e32 v11, v11 ; 7E16410B v_add_f32_e32 v11, -0.5, v11 ; 061616F1 v_cmp_ge_f32_e64 s[10:11], -v11, 0 ; D00C000A 2001010B v_cndmask_b32_e64 v14, 0, -1, s[10:11] ; D200000E 00298280 v_cmp_ne_i32_e64 s[10:11], v14, 0 ; D10A000A 0001010E v_cndmask_b32_e64 v14, 1.0, 0, s[10:11] ; D200000E 102900F2 v_cmp_ge_f32_e64 s[10:11], v11, 0 ; D00C000A 0001010B v_cndmask_b32_e64 v11, 0, -1, s[10:11] ; D200080B 00298280 v_cmp_ne_i32_e64 s[10:11], v11, 0 ; D10A000A 0001010B v_cndmask_b32_e64 v11, -1.0, v12, s[10:11] ; D200080B 182A18F3 v_add_f32_e32 v11, v14, v11 ; 0616170E v_add_f32_e32 v14, -1.0, v1 ; 061C02F3 v_mul_f32_e32 v15, v14, v11 ; 101E170E v_add_f32_e32 v16, v12, v5 ; 06200B0C v_mad_f32 v15, v16, v13, v15 ; D282000F 043E1B10 v_add_f32_e32 v6, 1.0, v6 ; 060C0CF2 v_mad_f32 v9, v9, v10, v6 ; D2820009 041A1509 v_mul_f32_e32 v9, 0x41500000, v9 ; 101212FF 41500000 v_mul_f32_e32 v9, v9, v4 ; 10120909 v_fract_f32_e32 v9, v9 ; 7E124109 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mul_f32_e32 v17, 0x44e89000, v9 ; 102212FF 44E89000 v_fract_f32_e32 v17, v17 ; 7E224111 v_add_f32_e32 v17, -0.5, v17 ; 062222F1 v_cmp_ge_f32_e64 s[10:11], -v17, 0 ; D00C000A 20010111 v_cndmask_b32_e64 v18, 0, -1, s[10:11] ; D2000012 00298280 v_cmp_ne_i32_e64 s[10:11], v18, 0 ; D10A000A 00010112 v_cndmask_b32_e64 v18, 1.0, 0, s[10:11] ; D2000012 102900F2 v_cmp_ge_f32_e64 s[10:11], v17, 0 ; D00C000A 00010111 v_cndmask_b32_e64 v17, 0, -1, s[10:11] ; D2000811 00298280 v_cmp_ne_i32_e64 s[10:11], v17, 0 ; D10A000A 00010111 v_cndmask_b32_e64 v17, -1.0, v12, s[10:11] ; D2000811 182A18F3 v_add_f32_e32 v17, v18, v17 ; 06222312 v_mul_f32_e32 v9, 0x44689000, v9 ; 101212FF 44689000 v_fract_f32_e32 v9, v9 ; 7E124109 v_add_f32_e32 v9, -0.5, v9 ; 061212F1 v_cmp_ge_f32_e64 s[10:11], -v9, 0 ; D00C000A 20010109 v_cndmask_b32_e64 v18, 0, -1, s[10:11] ; D2000012 00298280 v_cmp_ne_i32_e64 s[10:11], v18, 0 ; D10A000A 00010112 v_cndmask_b32_e64 v18, 1.0, 0, s[10:11] ; D2000012 102900F2 v_cmp_ge_f32_e64 s[10:11], v9, 0 ; D00C000A 00010109 v_cndmask_b32_e64 v9, 0, -1, s[10:11] ; D2000809 00298280 v_cmp_ne_i32_e64 s[10:11], v9, 0 ; D10A000A 00010109 v_cndmask_b32_e64 v9, -1.0, v12, s[10:11] ; D2000809 182A18F3 v_add_f32_e32 v9, v18, v9 ; 06121312 v_mul_f32_e32 v14, v14, v9 ; 101C130E v_add_f32_e32 v18, -1.0, v5 ; 06240AF3 v_mad_f32 v14, v18, v17, v14 ; D282000E 043A2312 v_subrev_f32_e32 v19, v15, v14 ; 0A261D0F v_mov_b32_e32 v20, 0x40400000 ; 7E2802FF 40400000 v_mad_f32 v21, -2.0, v5, v20 ; D2820015 04520AF5 v_mul_f32_e32 v22, v5, v5 ; 102C0B05 v_mad_f32 v23, -v21, v22, 1.0 ; D2820017 23CA2D15 v_mul_f32_e32 v13, v13, v23 ; 101A2F0D v_mul_f32_e32 v21, v22, v21 ; 102A2B16 v_mad_f32 v13, v21, v17, v13 ; D282000D 04362315 v_mov_b32_e32 v17, 0x40c00000 ; 7E2202FF 40C00000 v_mov_b32_e32 v22, 0xc0c00000 ; 7E2C02FF C0C00000 v_mad_f32 v24, v5, v22, v17 ; D2820018 04462D05 v_mul_f32_e32 v5, v24, v5 ; 100A0B18 v_mad_f32 v13, v19, v5, v13 ; D282000D 04360B13 v_add_f32_e32 v3, 0, v3 ; 06060680 v_mul_f32_e32 v3, 0x41880000, v3 ; 100606FF 41880000 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_fract_f32_e32 v0, v0 ; 7E004100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v3, v0, v10, v7 ; D2820003 041E1500 v_mul_f32_e32 v3, 0x41500000, v3 ; 100606FF 41500000 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_fract_f32_e32 v3, v3 ; 7E064103 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mul_f32_e32 v7, 0x44e89000, v3 ; 100E06FF 44E89000 v_fract_f32_e32 v7, v7 ; 7E0E4107 v_add_f32_e32 v7, -0.5, v7 ; 060E0EF1 v_cmp_ge_f32_e64 s[10:11], -v7, 0 ; D00C000A 20010107 v_cndmask_b32_e64 v19, 0, -1, s[10:11] ; D2000813 00298280 v_cmp_ne_i32_e64 s[10:11], v19, 0 ; D10A000A 00010113 v_cndmask_b32_e64 v19, 1.0, 0, s[10:11] ; D2000813 102900F2 v_cmp_ge_f32_e64 s[10:11], v7, 0 ; D00C000A 00010107 v_cndmask_b32_e64 v7, 0, -1, s[10:11] ; D2000807 00298280 v_cmp_ne_i32_e64 s[10:11], v7, 0 ; D10A000A 00010107 v_cndmask_b32_e64 v7, -1.0, v12, s[10:11] ; D2000807 182A18F3 v_add_f32_e32 v7, v19, v7 ; 060E0F13 v_mul_f32_e32 v3, 0x44689000, v3 ; 100606FF 44689000 v_fract_f32_e32 v3, v3 ; 7E064103 v_add_f32_e32 v3, -0.5, v3 ; 060606F1 v_cmp_ge_f32_e64 s[10:11], -v3, 0 ; D00C000A 20010103 v_cndmask_b32_e64 v19, 0, -1, s[10:11] ; D2000813 00298280 v_cmp_ne_i32_e64 s[10:11], v19, 0 ; D10A000A 00010113 v_cndmask_b32_e64 v19, 1.0, 0, s[10:11] ; D2000813 102900F2 v_cmp_ge_f32_e64 s[10:11], v3, 0 ; D00C000A 00010103 v_cndmask_b32_e64 v3, 0, -1, s[10:11] ; D2000803 00298280 v_cmp_ne_i32_e64 s[10:11], v3, 0 ; D10A000A 00010103 v_cndmask_b32_e64 v3, -1.0, v12, s[10:11] ; D2000803 182A18F3 v_add_f32_e32 v3, v19, v3 ; 06060713 v_add_f32_e32 v19, v12, v1 ; 0626030C v_mul_f32_e32 v24, v19, v3 ; 10300713 v_mad_f32 v16, v16, v7, v24 ; D2820010 04620F10 v_mad_f32 v0, v0, v10, v6 ; D2820000 041A1500 v_mul_f32_e32 v0, 0x41500000, v0 ; 100000FF 41500000 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_fract_f32_e32 v0, v0 ; 7E004100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v4, 0x44e89000, v0 ; 100800FF 44E89000 v_fract_f32_e32 v4, v4 ; 7E084104 v_add_f32_e32 v4, -0.5, v4 ; 060808F1 v_cmp_ge_f32_e64 s[10:11], -v4, 0 ; D00C000A 20010104 v_cndmask_b32_e64 v6, 0, -1, s[10:11] ; D2000006 00298280 v_cmp_ne_i32_e64 s[10:11], v6, 0 ; D10A000A 00010106 v_cndmask_b32_e64 v6, 1.0, 0, s[10:11] ; D2000006 102900F2 v_cmp_ge_f32_e64 s[10:11], v4, 0 ; D00C000A 00010104 v_cndmask_b32_e64 v4, 0, -1, s[10:11] ; D2000004 00298280 v_cmp_ne_i32_e64 s[10:11], v4, 0 ; D10A000A 00010104 v_cndmask_b32_e64 v4, -1.0, v12, s[10:11] ; D2000004 182A18F3 v_add_f32_e32 v4, v6, v4 ; 06080906 v_mul_f32_e32 v0, 0x44689000, v0 ; 100000FF 44689000 v_fract_f32_e32 v0, v0 ; 7E004100 v_add_f32_e32 v0, -0.5, v0 ; 060000F1 v_cmp_ge_f32_e64 s[10:11], -v0, 0 ; D00C000A 20010100 v_cndmask_b32_e64 v6, 0, -1, s[10:11] ; D2000006 00298280 v_cmp_ne_i32_e64 s[10:11], v6, 0 ; D10A000A 00010106 v_cndmask_b32_e64 v6, 1.0, 0, s[10:11] ; D2000006 102900F2 v_cmp_ge_f32_e64 s[10:11], v0, 0 ; D00C000A 00010100 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_ne_i32_e64 s[10:11], v0, 0 ; D10A000A 00010100 v_cndmask_b32_e64 v0, -1.0, v12, s[10:11] ; D2000000 182A18F3 v_add_f32_e32 v0, v6, v0 ; 06000106 v_mul_f32_e32 v6, v19, v0 ; 100C0113 v_mad_f32 v6, v18, v4, v6 ; D2820006 041A0912 v_subrev_f32_e32 v10, v16, v6 ; 0A140D10 v_mul_f32_e32 v7, v7, v23 ; 100E2F07 v_mad_f32 v4, v21, v4, v7 ; D2820004 041E0915 v_mad_f32 v4, v10, v5, v4 ; D2820004 04120B0A v_mad_f32 v5, -2.0, v1, v20 ; D2820005 045202F5 v_mul_f32_e32 v7, v1, v1 ; 100E0301 v_mad_f32 v10, -v5, v7, 1.0 ; D282000A 23CA0F05 v_mul_f32_e32 v4, v4, v10 ; 10081504 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mad_f32 v4, v5, v13, v4 ; D2820004 04121B05 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s10, v4 ; 1008080A v_mad_f32 v4, 0.5, v4, 0.5 ; D2820004 03C208F0 v_rcp_f32_e32 v7, s9 ; 7E0E5409 v_subrev_f32_e32 v13, v7, v8 ; 0A1A1107 v_rcp_f32_e32 v18, s8 ; 7E245408 v_add_f32_e32 v12, v2, v18 ; 06182502 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v19, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[12:15] ; F0800100 0064130C v_subrev_f32_e32 v24, v18, v2 ; 0A300512 v_mov_b32_e32 v25, v13 ; 7E32030D image_sample v18, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800100 00641218 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v20, v19, v18 ; 0A282513 v_add_f32_e32 v26, 0, v8 ; 06341080 v_mov_b32_e32 v25, v26 ; 7E32031A image_sample v27, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800100 00641B18 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, 2.0, v27, v20 ; D2820014 045236F4 v_add_f32_e32 v25, v8, v7 ; 06320F08 v_mov_b32_e32 v7, v12 ; 7E0E030C v_mov_b32_e32 v8, v13 ; 7E10030D v_mov_b32_e32 v8, v25 ; 7E100319 image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[16:23], s[12:15] ; F0800100 00640707 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800100 00640818 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v27, v7, v8 ; 0A361107 v_mov_b32_e32 v28, v12 ; 7E38030C v_mov_b32_e32 v29, v13 ; 7E3A030D v_mov_b32_e32 v29, v26 ; 7E3A031A image_sample v26, 1, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800100 00641A1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, -2.0, v26, v27 ; D282001A 046E34F5 v_add_f32_e32 v20, v20, v26 ; 06283514 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s0, v20 ; 10282800 v_mad_f32 v26, 0.5, v20, 0.5 ; D282001A 03C228F0 v_subrev_f32_e32 v26, v4, v26 ; 0A343504 v_add_f32_e32 v18, v18, v19 ; 06242712 v_add_f32_e32 v12, 0, v2 ; 06180480 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[12:15] ; F0800100 0064020C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, 2.0, v2, v18 ; D2820002 044A04F4 v_sub_f32_e64 v7, -v7, v8 ; D2080007 20021107 v_mov_b32_e32 v13, v25 ; 7E1A0319 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[12:15] ; F0800100 0064080C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, -2.0, v8, v7 ; D2820007 041E10F5 v_add_f32_e32 v2, v2, v7 ; 06040F02 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v7, v2, v2 ; 100E0502 v_mad_f32 v7, v20, v20, v7 ; D2820007 041E2914 v_max_f32_e32 v7, 0x33d6bf95, v7 ; 200E0EFF 33D6BF95 v_rsq_clamp_f32_e32 v8, v7 ; 7E105907 v_mul_f32_e32 v8, v7, v8 ; 10101107 v_xor_b32_e32 v7, 0x80000000, v7 ; 3A0E0EFF 80000000 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v7, 0, v8, vcc ; D2000807 01AA1080 v_add_f32_e64 v8, 0, v7 clamp ; D2060808 00020E80 v_mad_f32 v4, v8, v26, v4 ; D2820004 04123508 v_subrev_f32_e32 v6, v6, v14 ; 0A0C1D06 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v0, v5, v9, v0 ; D2820000 04021305 v_mad_f32 v9, v1, v22, v17 ; D2820009 04462D01 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mad_f32 v0, v6, v1, v0 ; D2820000 04020306 v_subrev_f32_e32 v6, v16, v15 ; 0A0C1F10 v_mul_f32_e32 v3, v3, v10 ; 10061503 v_mad_f32 v3, v5, v11, v3 ; D2820003 040E1705 v_mad_f32 v1, v6, v1, v3 ; D2820001 040E0306 v_mul_f32_e32 v1, v1, v23 ; 10022F01 v_mad_f32 v0, v21, v0, v1 ; D2820000 04060115 v_mul_f32_e32 v0, s10, v0 ; 1000000A v_mad_f32 v1, 0.5, -v0, 0.5 ; D2820001 43C200F0 v_mad_f32 v3, 0.5, -v2, 0.5 ; D2820003 43C204F0 v_subrev_f32_e32 v3, v1, v3 ; 0A060701 v_mad_f32 v1, v8, v3, v1 ; D2820001 04060708 v_cvt_pkrtz_f16_f32_e32 v1, v1, v4 ; 5E020901 v_mul_f32_e32 v0, 0.5, v0 ; 100000F0 v_mad_f32 v3, 0, v0, 1.0 ; D2820003 03CA0080 v_subrev_f32_e32 v4, v3, v7 ; 0A080F03 v_mad_f32 v3, v8, v4, v3 ; D2820003 040E0908 v_mad_f32 v0, 0, v0, 0 ; D2820000 02020080 v_mul_f32_e32 v2, 0.5, v2 ; 100404F0 v_mad_f32 v2, 0, v2, 0 ; D2820002 02020480 v_subrev_f32_e32 v2, v0, v2 ; 0A040500 v_mad_f32 v0, v8, v2, v0 ; D2820000 04020508 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 1 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %22, float %23) %31 = bitcast i32 %30 to float %32 = call i32 @llvm.SI.packf16(float %24, float %25) %33 = bitcast i32 %32 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %31, float %33, float %31, float %33) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL OUT[8], GENERIC[16] DCL CONST[0..96] DCL TEMP[0..9], LOCAL DCL ADDR[0] IMM[0] FLT32 { -1.0000, -2.0000, 0.6600, 0.3300} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MOV TEMP[1].xy, IMM[0].xyxx 3: F2I TEMP[2].x, IN[4].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: ADD TEMP[2].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 6: MOV TEMP[1].zw, TEMP[2].wwzw 7: ABS TEMP[2].z, TEMP[1] 8: ABS TEMP[3].z, TEMP[1] 9: FSGE TEMP[2].x, -TEMP[2].zzzz, TEMP[3].zzzz 10: UIF TEMP[2].xxxx :0 11: F2I TEMP[2].x, IN[4].xxxx 12: UARL ADDR[0].x, TEMP[2].xxxx 13: MUL TEMP[2].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 14: MOV TEMP[2].xy, TEMP[2].xyxx 15: MUL TEMP[3].xy, TEMP[2], IMM[0].zzzz 16: MOV TEMP[2].xy, TEMP[3].xyxx 17: ELSE :0 18: F2I TEMP[3].x, IN[4].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: MUL TEMP[3].xy, IN[1], CONST[ADDR[0].x+1].xxxx 21: MOV TEMP[3].xy, TEMP[3].xyxx 22: MUL TEMP[4].xy, TEMP[3], IMM[0].wwww 23: MOV TEMP[2].xy, TEMP[4].xyxx 24: ENDIF 25: ABS TEMP[4].w, TEMP[1] 26: ABS TEMP[5].w, TEMP[1] 27: FSGE TEMP[4].x, -TEMP[4].wwww, TEMP[5].wwww 28: UIF TEMP[4].xxxx :0 29: F2I TEMP[4].x, IN[4].xxxx 30: UARL ADDR[0].x, TEMP[4].xxxx 31: MUL TEMP[4].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 32: MUL TEMP[4].xy, TEMP[4].zwzw, IMM[0].zzzz 33: MOV TEMP[2].xy, TEMP[4].xyxx 34: ENDIF 35: F2I TEMP[4].x, IN[4].yyyy 36: UARL ADDR[0].x, TEMP[4].xxxx 37: ADD TEMP[4].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 38: MOV TEMP[1].zw, TEMP[4].wwzw 39: ABS TEMP[4].z, TEMP[1] 40: ABS TEMP[5].z, TEMP[1] 41: FSGE TEMP[4].x, -TEMP[4].zzzz, TEMP[5].zzzz 42: UIF TEMP[4].xxxx :0 43: F2I TEMP[4].x, IN[4].yyyy 44: UARL ADDR[0].x, TEMP[4].xxxx 45: MUL TEMP[4].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 46: MOV TEMP[3].xy, TEMP[4].xyxx 47: MUL TEMP[4].xy, TEMP[3], IMM[0].zzzz 48: MOV TEMP[3].xy, TEMP[4].xyxx 49: ELSE :0 50: F2I TEMP[4].x, IN[4].yyyy 51: UARL ADDR[0].x, TEMP[4].xxxx 52: MUL TEMP[4].xy, IN[1], CONST[ADDR[0].x+1].xxxx 53: MOV TEMP[4].xy, TEMP[4].xyxx 54: MUL TEMP[5].xy, TEMP[4], IMM[0].wwww 55: MOV TEMP[3].xy, TEMP[5].xyxx 56: ENDIF 57: ABS TEMP[5].w, TEMP[1] 58: ABS TEMP[6].w, TEMP[1] 59: FSGE TEMP[5].x, -TEMP[5].wwww, TEMP[6].wwww 60: UIF TEMP[5].xxxx :0 61: F2I TEMP[5].x, IN[4].yyyy 62: UARL ADDR[0].x, TEMP[5].xxxx 63: MUL TEMP[5].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 64: MUL TEMP[5].xy, TEMP[5].zwzw, IMM[0].zzzz 65: MOV TEMP[3].xy, TEMP[5].xyxx 66: ENDIF 67: F2I TEMP[5].x, IN[4].zzzz 68: UARL ADDR[0].x, TEMP[5].xxxx 69: ADD TEMP[5].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 70: MOV TEMP[1].zw, TEMP[5].wwzw 71: ABS TEMP[5].z, TEMP[1] 72: ABS TEMP[6].z, TEMP[1] 73: FSGE TEMP[5].x, -TEMP[5].zzzz, TEMP[6].zzzz 74: UIF TEMP[5].xxxx :0 75: F2I TEMP[5].x, IN[4].zzzz 76: UARL ADDR[0].x, TEMP[5].xxxx 77: MUL TEMP[5].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 78: MOV TEMP[4].xy, TEMP[5].xyxx 79: MUL TEMP[5].xy, TEMP[4], IMM[0].zzzz 80: MOV TEMP[4].xy, TEMP[5].xyxx 81: ELSE :0 82: F2I TEMP[5].x, IN[4].zzzz 83: UARL ADDR[0].x, TEMP[5].xxxx 84: MUL TEMP[5].xy, IN[1], CONST[ADDR[0].x+1].xxxx 85: MOV TEMP[5].xy, TEMP[5].xyxx 86: MUL TEMP[6].xy, TEMP[5], IMM[0].wwww 87: MOV TEMP[4].xy, TEMP[6].xyxx 88: ENDIF 89: ABS TEMP[6].w, TEMP[1] 90: ABS TEMP[7].w, TEMP[1] 91: FSGE TEMP[6].x, -TEMP[6].wwww, TEMP[7].wwww 92: UIF TEMP[6].xxxx :0 93: F2I TEMP[6].x, IN[4].zzzz 94: UARL ADDR[0].x, TEMP[6].xxxx 95: MUL TEMP[6].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 96: MUL TEMP[6].xy, TEMP[6].zwzw, IMM[0].zzzz 97: MOV TEMP[4].xy, TEMP[6].xyxx 98: ENDIF 99: F2I TEMP[6].x, IN[4].wwww 100: UARL ADDR[0].x, TEMP[6].xxxx 101: ADD TEMP[6].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 102: MOV TEMP[1].zw, TEMP[6].wwzw 103: ABS TEMP[6].z, TEMP[1] 104: ABS TEMP[7].z, TEMP[1] 105: FSGE TEMP[6].x, -TEMP[6].zzzz, TEMP[7].zzzz 106: UIF TEMP[6].xxxx :0 107: F2I TEMP[6].x, IN[4].wwww 108: UARL ADDR[0].x, TEMP[6].xxxx 109: MUL TEMP[6].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 110: MOV TEMP[5].xy, TEMP[6].xyxx 111: MUL TEMP[6].xy, TEMP[5], IMM[0].zzzz 112: MOV TEMP[5].xy, TEMP[6].xyxx 113: ELSE :0 114: F2I TEMP[6].x, IN[4].wwww 115: UARL ADDR[0].x, TEMP[6].xxxx 116: MUL TEMP[6].xy, IN[1], CONST[ADDR[0].x+1].xxxx 117: MOV TEMP[6].xy, TEMP[6].xyxx 118: MUL TEMP[7].xy, TEMP[6], IMM[0].wwww 119: MOV TEMP[5].xy, TEMP[7].xyxx 120: ENDIF 121: ABS TEMP[7].w, TEMP[1] 122: ABS TEMP[8].w, TEMP[1] 123: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[8].wwww 124: UIF TEMP[7].xxxx :0 125: F2I TEMP[7].x, IN[4].wwww 126: UARL ADDR[0].x, TEMP[7].xxxx 127: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 128: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 129: MOV TEMP[5].xy, TEMP[7].xyxx 130: ENDIF 131: F2I TEMP[7].x, IN[5].xxxx 132: UARL ADDR[0].x, TEMP[7].xxxx 133: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 134: MOV TEMP[1].zw, TEMP[7].wwzw 135: ABS TEMP[7].z, TEMP[1] 136: ABS TEMP[8].z, TEMP[1] 137: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[8].zzzz 138: UIF TEMP[7].xxxx :0 139: F2I TEMP[7].x, IN[5].xxxx 140: UARL ADDR[0].x, TEMP[7].xxxx 141: MUL TEMP[7].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 142: MOV TEMP[6].xy, TEMP[7].xyxx 143: MUL TEMP[7].xy, TEMP[6], IMM[0].zzzz 144: MOV TEMP[6].xy, TEMP[7].xyxx 145: ELSE :0 146: F2I TEMP[7].x, IN[5].xxxx 147: UARL ADDR[0].x, TEMP[7].xxxx 148: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 149: MOV TEMP[6].zw, TEMP[7].wwzw 150: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].wwww 151: MOV TEMP[6].xy, TEMP[7].xyxx 152: ENDIF 153: ABS TEMP[7].w, TEMP[1] 154: ABS TEMP[8].w, TEMP[1] 155: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[8].wwww 156: UIF TEMP[7].xxxx :0 157: F2I TEMP[7].x, IN[5].xxxx 158: UARL ADDR[0].x, TEMP[7].xxxx 159: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 160: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 161: MOV TEMP[6].xy, TEMP[7].xyxx 162: ENDIF 163: F2I TEMP[7].x, IN[5].yyyy 164: UARL ADDR[0].x, TEMP[7].xxxx 165: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 166: MOV TEMP[1].zw, TEMP[7].wwzw 167: ABS TEMP[7].z, TEMP[1] 168: ABS TEMP[8].z, TEMP[1] 169: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[8].zzzz 170: UIF TEMP[7].xxxx :0 171: F2I TEMP[7].x, IN[5].yyyy 172: UARL ADDR[0].x, TEMP[7].xxxx 173: MUL TEMP[7].zw, IN[1].xyxz, CONST[ADDR[0].x+1].xxxx 174: MOV TEMP[6].zw, TEMP[7].wwzw 175: MUL TEMP[7].zw, TEMP[6], IMM[0].zzzz 176: MOV TEMP[6].zw, TEMP[7].wwzw 177: ELSE :0 178: F2I TEMP[7].x, IN[5].yyyy 179: UARL ADDR[0].x, TEMP[7].xxxx 180: MUL TEMP[7].xy, IN[1], CONST[ADDR[0].x+1].xxxx 181: MOV TEMP[8].xy, TEMP[7].xyxx 182: MUL TEMP[7].zw, TEMP[7].xyxy, IMM[0].wwww 183: MOV TEMP[6].zw, TEMP[7].wwzw 184: ENDIF 185: ABS TEMP[7].w, TEMP[1] 186: ABS TEMP[9].w, TEMP[1] 187: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[9].wwww 188: UIF TEMP[7].xxxx :0 189: F2I TEMP[7].x, IN[5].yyyy 190: UARL ADDR[0].x, TEMP[7].xxxx 191: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 192: MOV TEMP[1].zw, TEMP[7].wwzw 193: MUL TEMP[7].zw, TEMP[1], IMM[0].zzzz 194: MOV TEMP[6].zw, TEMP[7].wwzw 195: ENDIF 196: F2I TEMP[7].x, IN[5].zzzz 197: UARL ADDR[0].x, TEMP[7].xxxx 198: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 199: MOV TEMP[1].zw, TEMP[7].wwzw 200: ABS TEMP[7].z, TEMP[1] 201: ABS TEMP[9].z, TEMP[1] 202: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[9].zzzz 203: UIF TEMP[7].xxxx :0 204: F2I TEMP[7].x, IN[5].zzzz 205: UARL ADDR[0].x, TEMP[7].xxxx 206: MUL TEMP[7].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 207: MOV TEMP[8].xy, TEMP[7].xyxx 208: MUL TEMP[7].xy, TEMP[8], IMM[0].zzzz 209: MOV TEMP[8].xy, TEMP[7].xyxx 210: ELSE :0 211: F2I TEMP[7].x, IN[5].zzzz 212: UARL ADDR[0].x, TEMP[7].xxxx 213: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 214: MOV TEMP[8].zw, TEMP[7].wwzw 215: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].wwww 216: MOV TEMP[8].xy, TEMP[7].xyxx 217: ENDIF 218: ABS TEMP[7].w, TEMP[1] 219: ABS TEMP[9].w, TEMP[1] 220: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[9].wwww 221: UIF TEMP[7].xxxx :0 222: F2I TEMP[7].x, IN[5].zzzz 223: UARL ADDR[0].x, TEMP[7].xxxx 224: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 225: MOV TEMP[1].zw, TEMP[7].wwzw 226: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 227: MOV TEMP[8].xy, TEMP[7].xyxx 228: ENDIF 229: F2I TEMP[7].x, IN[5].wwww 230: UARL ADDR[0].x, TEMP[7].xxxx 231: ADD TEMP[7].xy, TEMP[1], CONST[ADDR[0].x+1].yyyy 232: MOV TEMP[1].xy, TEMP[7].xyxx 233: ABS TEMP[7].x, TEMP[1] 234: ABS TEMP[9].x, TEMP[1] 235: FSGE TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx 236: UIF TEMP[7].xxxx :0 237: F2I TEMP[7].x, IN[5].wwww 238: UARL ADDR[0].x, TEMP[7].xxxx 239: MUL TEMP[7].xz, IN[1], CONST[ADDR[0].x+1].xxxx 240: MOV TEMP[1].xz, TEMP[7].xxzx 241: MUL TEMP[7].xz, TEMP[1], IMM[0].zzzz 242: MOV TEMP[1].xz, TEMP[7].xxzx 243: ELSE :0 244: F2I TEMP[7].x, IN[5].wwww 245: UARL ADDR[0].x, TEMP[7].xxxx 246: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 247: MOV TEMP[8].zw, TEMP[7].wwzw 248: MUL TEMP[7].xz, TEMP[8].zyww, IMM[0].wwww 249: MOV TEMP[1].xz, TEMP[7].xxzx 250: ENDIF 251: ABS TEMP[7].y, TEMP[1] 252: ABS TEMP[9].y, TEMP[1] 253: FSGE TEMP[7].x, -TEMP[7].yyyy, TEMP[9].yyyy 254: UIF TEMP[7].xxxx :0 255: F2I TEMP[7].x, IN[5].wwww 256: UARL ADDR[0].x, TEMP[7].xxxx 257: MUL TEMP[7].yw, IN[1].xyzz, CONST[ADDR[0].x+1].xxxx 258: MUL TEMP[7].xz, TEMP[7].yyww, IMM[0].zzzz 259: MOV TEMP[1].xz, TEMP[7].xxzx 260: ENDIF 261: MAD TEMP[7].zw, IN[0].zzzz, IMM[1].xyxy, IMM[1].xyyx 262: MOV TEMP[0].zw, TEMP[7].wwzw 263: MOV TEMP[2].zw, IN[2].yyxy 264: MOV TEMP[3].zw, IN[2].wwzw 265: MOV TEMP[4].zw, IN[3].yyxy 266: MOV TEMP[5].zw, IN[3].wwzw 267: MOV TEMP[7].xy, TEMP[6].xyxx 268: MOV TEMP[7].zw, IMM[1].yyyy 269: MOV TEMP[6].xy, TEMP[6].zwzz 270: MOV TEMP[6].zw, IMM[1].yyyy 271: MOV TEMP[8].xy, TEMP[8].xyxx 272: MOV TEMP[8].zw, IMM[1].yyyy 273: MOV TEMP[1].xy, TEMP[1].xzxx 274: MOV TEMP[1].zw, IMM[1].yyyy 275: MOV OUT[6], TEMP[6] 276: MOV OUT[7], TEMP[8] 277: MOV OUT[8], TEMP[1] 278: MOV OUT[1], TEMP[2] 279: MOV OUT[2], TEMP[3] 280: MOV OUT[0], TEMP[0] 281: MOV OUT[3], TEMP[4] 282: MOV OUT[4], TEMP[5] 283: MOV OUT[5], TEMP[7] 284: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0 %17 = add i32 %5, %7 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %17) %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = add i32 %5, %7 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 5 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fadd float %13, %19 %62 = fadd float %14, %20 %63 = fptosi float %49 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = shl i32 %65, 4 %67 = add i32 %66, 20 %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %67) %69 = fadd float -1.000000e+00, %68 %70 = shl i32 %65, 4 %71 = add i32 %70, 20 %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %71) %73 = fadd float -2.000000e+00, %72 %74 = call float @fabs(float %69) %75 = call float @fabs(float %69) %76 = fsub float -0.000000e+00, %74 %77 = fcmp oge float %76, %75 %78 = sext i1 %77 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = icmp ne i32 %80, 0 %82 = fptosi float %49 to i32 %83 = bitcast i32 %82 to float %84 = bitcast float %83 to i32 %85 = shl i32 %84, 4 %86 = add i32 %85, 16 %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %86) %88 = fmul float %26, %87 %89 = shl i32 %84, 4 %90 = add i32 %89, 16 %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %90) br i1 %81, label %IF, label %ELSE IF: ; preds = %main_body %92 = fmul float %28, %91 %93 = fmul float %88, 0x3FE51EB860000000 %94 = fmul float %92, 0x3FE51EB860000000 br label %ENDIF ELSE: ; preds = %main_body %95 = fmul float %27, %91 %96 = fmul float %88, 0x3FD51EB860000000 %97 = fmul float %95, 0x3FD51EB860000000 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp8.0 = phi float [ %93, %IF ], [ %96, %ELSE ] %temp9.0 = phi float [ %94, %IF ], [ %97, %ELSE ] %98 = call float @fabs(float %73) %99 = call float @fabs(float %73) %100 = fsub float -0.000000e+00, %98 %101 = fcmp oge float %100, %99 %102 = sext i1 %101 to i32 %103 = bitcast i32 %102 to float %104 = bitcast float %103 to i32 %105 = icmp ne i32 %104, 0 br i1 %105, label %IF46, label %ENDIF45 IF46: ; preds = %ENDIF %106 = fptosi float %49 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = add i32 %109, 16 %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %110) %112 = fmul float %27, %111 %113 = shl i32 %108, 4 %114 = add i32 %113, 16 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %28, %115 %117 = fmul float %112, 0x3FE51EB860000000 %118 = fmul float %116, 0x3FE51EB860000000 br label %ENDIF45 ENDIF45: ; preds = %ENDIF, %IF46 %temp8.1 = phi float [ %117, %IF46 ], [ %temp8.0, %ENDIF ] %temp9.1 = phi float [ %118, %IF46 ], [ %temp9.0, %ENDIF ] %119 = fptosi float %50 to i32 %120 = bitcast i32 %119 to float %121 = bitcast float %120 to i32 %122 = shl i32 %121, 4 %123 = add i32 %122, 20 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = fadd float -1.000000e+00, %124 %126 = shl i32 %121, 4 %127 = add i32 %126, 20 %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %127) %129 = fadd float -2.000000e+00, %128 %130 = call float @fabs(float %125) %131 = call float @fabs(float %125) %132 = fsub float -0.000000e+00, %130 %133 = fcmp oge float %132, %131 %134 = sext i1 %133 to i32 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = icmp ne i32 %136, 0 %138 = fptosi float %50 to i32 %139 = bitcast i32 %138 to float %140 = bitcast float %139 to i32 %141 = shl i32 %140, 4 %142 = add i32 %141, 16 %143 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %142) %144 = fmul float %26, %143 %145 = shl i32 %140, 4 %146 = add i32 %145, 16 %147 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %146) br i1 %137, label %IF53, label %ELSE54 IF53: ; preds = %ENDIF45 %148 = fmul float %28, %147 %149 = fmul float %144, 0x3FE51EB860000000 %150 = fmul float %148, 0x3FE51EB860000000 br label %ENDIF52 ELSE54: ; preds = %ENDIF45 %151 = fmul float %27, %147 %152 = fmul float %144, 0x3FD51EB860000000 %153 = fmul float %151, 0x3FD51EB860000000 br label %ENDIF52 ENDIF52: ; preds = %ELSE54, %IF53 %temp12.0 = phi float [ %149, %IF53 ], [ %152, %ELSE54 ] %temp13.0 = phi float [ %150, %IF53 ], [ %153, %ELSE54 ] %154 = call float @fabs(float %129) %155 = call float @fabs(float %129) %156 = fsub float -0.000000e+00, %154 %157 = fcmp oge float %156, %155 %158 = sext i1 %157 to i32 %159 = bitcast i32 %158 to float %160 = bitcast float %159 to i32 %161 = icmp ne i32 %160, 0 br i1 %161, label %IF60, label %ENDIF59 IF60: ; preds = %ENDIF52 %162 = fptosi float %50 to i32 %163 = bitcast i32 %162 to float %164 = bitcast float %163 to i32 %165 = shl i32 %164, 4 %166 = add i32 %165, 16 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = fmul float %27, %167 %169 = shl i32 %164, 4 %170 = add i32 %169, 16 %171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %170) %172 = fmul float %28, %171 %173 = fmul float %168, 0x3FE51EB860000000 %174 = fmul float %172, 0x3FE51EB860000000 br label %ENDIF59 ENDIF59: ; preds = %ENDIF52, %IF60 %temp12.1 = phi float [ %173, %IF60 ], [ %temp12.0, %ENDIF52 ] %temp13.1 = phi float [ %174, %IF60 ], [ %temp13.0, %ENDIF52 ] %175 = fptosi float %51 to i32 %176 = bitcast i32 %175 to float %177 = bitcast float %176 to i32 %178 = shl i32 %177, 4 %179 = add i32 %178, 20 %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %179) %181 = fadd float -1.000000e+00, %180 %182 = shl i32 %177, 4 %183 = add i32 %182, 20 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = fadd float -2.000000e+00, %184 %186 = call float @fabs(float %181) %187 = call float @fabs(float %181) %188 = fsub float -0.000000e+00, %186 %189 = fcmp oge float %188, %187 %190 = sext i1 %189 to i32 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = icmp ne i32 %192, 0 %194 = fptosi float %51 to i32 %195 = bitcast i32 %194 to float %196 = bitcast float %195 to i32 %197 = shl i32 %196, 4 %198 = add i32 %197, 16 %199 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %198) %200 = fmul float %26, %199 %201 = shl i32 %196, 4 %202 = add i32 %201, 16 %203 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %202) br i1 %193, label %IF67, label %ELSE68 IF67: ; preds = %ENDIF59 %204 = fmul float %28, %203 %205 = fmul float %200, 0x3FE51EB860000000 %206 = fmul float %204, 0x3FE51EB860000000 br label %ENDIF66 ELSE68: ; preds = %ENDIF59 %207 = fmul float %27, %203 %208 = fmul float %200, 0x3FD51EB860000000 %209 = fmul float %207, 0x3FD51EB860000000 br label %ENDIF66 ENDIF66: ; preds = %ELSE68, %IF67 %temp16.0 = phi float [ %205, %IF67 ], [ %208, %ELSE68 ] %temp17.0 = phi float [ %206, %IF67 ], [ %209, %ELSE68 ] %210 = call float @fabs(float %185) %211 = call float @fabs(float %185) %212 = fsub float -0.000000e+00, %210 %213 = fcmp oge float %212, %211 %214 = sext i1 %213 to i32 %215 = bitcast i32 %214 to float %216 = bitcast float %215 to i32 %217 = icmp ne i32 %216, 0 br i1 %217, label %IF74, label %ENDIF73 IF74: ; preds = %ENDIF66 %218 = fptosi float %51 to i32 %219 = bitcast i32 %218 to float %220 = bitcast float %219 to i32 %221 = shl i32 %220, 4 %222 = add i32 %221, 16 %223 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %222) %224 = fmul float %27, %223 %225 = shl i32 %220, 4 %226 = add i32 %225, 16 %227 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %226) %228 = fmul float %28, %227 %229 = fmul float %224, 0x3FE51EB860000000 %230 = fmul float %228, 0x3FE51EB860000000 br label %ENDIF73 ENDIF73: ; preds = %ENDIF66, %IF74 %temp16.1 = phi float [ %229, %IF74 ], [ %temp16.0, %ENDIF66 ] %temp17.1 = phi float [ %230, %IF74 ], [ %temp17.0, %ENDIF66 ] %231 = fptosi float %52 to i32 %232 = bitcast i32 %231 to float %233 = bitcast float %232 to i32 %234 = shl i32 %233, 4 %235 = add i32 %234, 20 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = fadd float -1.000000e+00, %236 %238 = shl i32 %233, 4 %239 = add i32 %238, 20 %240 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %239) %241 = fadd float -2.000000e+00, %240 %242 = call float @fabs(float %237) %243 = call float @fabs(float %237) %244 = fsub float -0.000000e+00, %242 %245 = fcmp oge float %244, %243 %246 = sext i1 %245 to i32 %247 = bitcast i32 %246 to float %248 = bitcast float %247 to i32 %249 = icmp ne i32 %248, 0 %250 = fptosi float %52 to i32 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 16 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = fmul float %26, %255 %257 = shl i32 %252, 4 %258 = add i32 %257, 16 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) br i1 %249, label %IF81, label %ELSE82 IF81: ; preds = %ENDIF73 %260 = fmul float %28, %259 %261 = fmul float %256, 0x3FE51EB860000000 %262 = fmul float %260, 0x3FE51EB860000000 br label %ENDIF80 ELSE82: ; preds = %ENDIF73 %263 = fmul float %27, %259 %264 = fmul float %256, 0x3FD51EB860000000 %265 = fmul float %263, 0x3FD51EB860000000 br label %ENDIF80 ENDIF80: ; preds = %ELSE82, %IF81 %temp20.0 = phi float [ %261, %IF81 ], [ %264, %ELSE82 ] %temp21.0 = phi float [ %262, %IF81 ], [ %265, %ELSE82 ] %266 = call float @fabs(float %241) %267 = call float @fabs(float %241) %268 = fsub float -0.000000e+00, %266 %269 = fcmp oge float %268, %267 %270 = sext i1 %269 to i32 %271 = bitcast i32 %270 to float %272 = bitcast float %271 to i32 %273 = icmp ne i32 %272, 0 br i1 %273, label %IF88, label %ENDIF87 IF88: ; preds = %ENDIF80 %274 = fptosi float %52 to i32 %275 = bitcast i32 %274 to float %276 = bitcast float %275 to i32 %277 = shl i32 %276, 4 %278 = add i32 %277, 16 %279 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %278) %280 = fmul float %27, %279 %281 = shl i32 %276, 4 %282 = add i32 %281, 16 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = fmul float %28, %283 %285 = fmul float %280, 0x3FE51EB860000000 %286 = fmul float %284, 0x3FE51EB860000000 br label %ENDIF87 ENDIF87: ; preds = %ENDIF80, %IF88 %temp20.1 = phi float [ %285, %IF88 ], [ %temp20.0, %ENDIF80 ] %temp21.1 = phi float [ %286, %IF88 ], [ %temp21.0, %ENDIF80 ] %287 = fptosi float %57 to i32 %288 = bitcast i32 %287 to float %289 = bitcast float %288 to i32 %290 = shl i32 %289, 4 %291 = add i32 %290, 20 %292 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %291) %293 = fadd float -1.000000e+00, %292 %294 = shl i32 %289, 4 %295 = add i32 %294, 20 %296 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %295) %297 = fadd float -2.000000e+00, %296 %298 = call float @fabs(float %293) %299 = call float @fabs(float %293) %300 = fsub float -0.000000e+00, %298 %301 = fcmp oge float %300, %299 %302 = sext i1 %301 to i32 %303 = bitcast i32 %302 to float %304 = bitcast float %303 to i32 %305 = icmp ne i32 %304, 0 %306 = fptosi float %57 to i32 %307 = bitcast i32 %306 to float %308 = bitcast float %307 to i32 %309 = shl i32 %308, 4 %310 = add i32 %309, 16 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = fmul float %26, %311 %313 = shl i32 %308, 4 %314 = add i32 %313, 16 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) br i1 %305, label %IF95, label %ELSE96 IF95: ; preds = %ENDIF87 %316 = fmul float %28, %315 %317 = fmul float %312, 0x3FE51EB860000000 %318 = fmul float %316, 0x3FE51EB860000000 br label %ENDIF94 ELSE96: ; preds = %ENDIF87 %319 = fmul float %27, %315 %320 = fmul float %312, 0x3FD51EB860000000 %321 = fmul float %319, 0x3FD51EB860000000 br label %ENDIF94 ENDIF94: ; preds = %ELSE96, %IF95 %temp24.0 = phi float [ %317, %IF95 ], [ %320, %ELSE96 ] %temp25.0 = phi float [ %318, %IF95 ], [ %321, %ELSE96 ] %322 = call float @fabs(float %297) %323 = call float @fabs(float %297) %324 = fsub float -0.000000e+00, %322 %325 = fcmp oge float %324, %323 %326 = sext i1 %325 to i32 %327 = bitcast i32 %326 to float %328 = bitcast float %327 to i32 %329 = icmp ne i32 %328, 0 br i1 %329, label %IF102, label %ENDIF101 IF102: ; preds = %ENDIF94 %330 = fptosi float %57 to i32 %331 = bitcast i32 %330 to float %332 = bitcast float %331 to i32 %333 = shl i32 %332, 4 %334 = add i32 %333, 16 %335 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %334) %336 = fmul float %27, %335 %337 = shl i32 %332, 4 %338 = add i32 %337, 16 %339 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %338) %340 = fmul float %28, %339 %341 = fmul float %336, 0x3FE51EB860000000 %342 = fmul float %340, 0x3FE51EB860000000 br label %ENDIF101 ENDIF101: ; preds = %ENDIF94, %IF102 %temp24.1 = phi float [ %341, %IF102 ], [ %temp24.0, %ENDIF94 ] %temp25.1 = phi float [ %342, %IF102 ], [ %temp25.0, %ENDIF94 ] %343 = fptosi float %58 to i32 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 20 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = fadd float -1.000000e+00, %348 %350 = shl i32 %345, 4 %351 = add i32 %350, 20 %352 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %351) %353 = fadd float -2.000000e+00, %352 %354 = call float @fabs(float %349) %355 = call float @fabs(float %349) %356 = fsub float -0.000000e+00, %354 %357 = fcmp oge float %356, %355 %358 = sext i1 %357 to i32 %359 = bitcast i32 %358 to float %360 = bitcast float %359 to i32 %361 = icmp ne i32 %360, 0 %362 = fptosi float %58 to i32 %363 = bitcast i32 %362 to float %364 = bitcast float %363 to i32 %365 = shl i32 %364, 4 %366 = add i32 %365, 16 %367 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %366) %368 = fmul float %26, %367 %369 = shl i32 %364, 4 %370 = add i32 %369, 16 %371 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %370) br i1 %361, label %IF109, label %ELSE110 IF109: ; preds = %ENDIF101 %372 = fmul float %28, %371 %373 = fmul float %368, 0x3FE51EB860000000 %374 = fmul float %372, 0x3FE51EB860000000 br label %ENDIF108 ELSE110: ; preds = %ENDIF101 %375 = fmul float %27, %371 %376 = fmul float %368, 0x3FD51EB860000000 %377 = fmul float %375, 0x3FD51EB860000000 br label %ENDIF108 ENDIF108: ; preds = %ELSE110, %IF109 %temp26.0 = phi float [ %373, %IF109 ], [ %376, %ELSE110 ] %temp27.0 = phi float [ %374, %IF109 ], [ %377, %ELSE110 ] %378 = call float @fabs(float %353) %379 = call float @fabs(float %353) %380 = fsub float -0.000000e+00, %378 %381 = fcmp oge float %380, %379 %382 = sext i1 %381 to i32 %383 = bitcast i32 %382 to float %384 = bitcast float %383 to i32 %385 = icmp ne i32 %384, 0 br i1 %385, label %IF116, label %ENDIF115 IF116: ; preds = %ENDIF108 %386 = fptosi float %58 to i32 %387 = bitcast i32 %386 to float %388 = bitcast float %387 to i32 %389 = shl i32 %388, 4 %390 = add i32 %389, 16 %391 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %390) %392 = fmul float %27, %391 %393 = shl i32 %388, 4 %394 = add i32 %393, 16 %395 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %394) %396 = fmul float %28, %395 %397 = fmul float %392, 0x3FE51EB860000000 %398 = fmul float %396, 0x3FE51EB860000000 br label %ENDIF115 ENDIF115: ; preds = %ENDIF108, %IF116 %temp26.1 = phi float [ %397, %IF116 ], [ %temp26.0, %ENDIF108 ] %temp27.1 = phi float [ %398, %IF116 ], [ %temp27.0, %ENDIF108 ] %399 = fptosi float %59 to i32 %400 = bitcast i32 %399 to float %401 = bitcast float %400 to i32 %402 = shl i32 %401, 4 %403 = add i32 %402, 20 %404 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %403) %405 = fadd float -1.000000e+00, %404 %406 = shl i32 %401, 4 %407 = add i32 %406, 20 %408 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %407) %409 = fadd float -2.000000e+00, %408 %410 = call float @fabs(float %405) %411 = call float @fabs(float %405) %412 = fsub float -0.000000e+00, %410 %413 = fcmp oge float %412, %411 %414 = sext i1 %413 to i32 %415 = bitcast i32 %414 to float %416 = bitcast float %415 to i32 %417 = icmp ne i32 %416, 0 %418 = fptosi float %59 to i32 %419 = bitcast i32 %418 to float %420 = bitcast float %419 to i32 %421 = shl i32 %420, 4 %422 = add i32 %421, 16 %423 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %422) %424 = fmul float %26, %423 %425 = shl i32 %420, 4 %426 = add i32 %425, 16 %427 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %426) br i1 %417, label %IF123, label %ELSE124 IF123: ; preds = %ENDIF115 %428 = fmul float %28, %427 %429 = fmul float %424, 0x3FE51EB860000000 %430 = fmul float %428, 0x3FE51EB860000000 br label %ENDIF122 ELSE124: ; preds = %ENDIF115 %431 = fmul float %27, %427 %432 = fmul float %424, 0x3FD51EB860000000 %433 = fmul float %431, 0x3FD51EB860000000 br label %ENDIF122 ENDIF122: ; preds = %ELSE124, %IF123 %temp32.0 = phi float [ %429, %IF123 ], [ %432, %ELSE124 ] %temp33.0 = phi float [ %430, %IF123 ], [ %433, %ELSE124 ] %434 = call float @fabs(float %409) %435 = call float @fabs(float %409) %436 = fsub float -0.000000e+00, %434 %437 = fcmp oge float %436, %435 %438 = sext i1 %437 to i32 %439 = bitcast i32 %438 to float %440 = bitcast float %439 to i32 %441 = icmp ne i32 %440, 0 br i1 %441, label %IF130, label %ENDIF129 IF130: ; preds = %ENDIF122 %442 = fptosi float %59 to i32 %443 = bitcast i32 %442 to float %444 = bitcast float %443 to i32 %445 = shl i32 %444, 4 %446 = add i32 %445, 16 %447 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %446) %448 = fmul float %27, %447 %449 = shl i32 %444, 4 %450 = add i32 %449, 16 %451 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %450) %452 = fmul float %28, %451 %453 = fmul float %448, 0x3FE51EB860000000 %454 = fmul float %452, 0x3FE51EB860000000 br label %ENDIF129 ENDIF129: ; preds = %ENDIF122, %IF130 %temp32.1 = phi float [ %453, %IF130 ], [ %temp32.0, %ENDIF122 ] %temp33.1 = phi float [ %454, %IF130 ], [ %temp33.0, %ENDIF122 ] %455 = fptosi float %60 to i32 %456 = bitcast i32 %455 to float %457 = bitcast float %456 to i32 %458 = shl i32 %457, 4 %459 = add i32 %458, 20 %460 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %459) %461 = fadd float -1.000000e+00, %460 %462 = shl i32 %457, 4 %463 = add i32 %462, 20 %464 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %463) %465 = fadd float -2.000000e+00, %464 %466 = call float @fabs(float %461) %467 = call float @fabs(float %461) %468 = fsub float -0.000000e+00, %466 %469 = fcmp oge float %468, %467 %470 = sext i1 %469 to i32 %471 = bitcast i32 %470 to float %472 = bitcast float %471 to i32 %473 = icmp ne i32 %472, 0 %474 = fptosi float %60 to i32 %475 = bitcast i32 %474 to float %476 = bitcast float %475 to i32 %477 = shl i32 %476, 4 %478 = add i32 %477, 16 %479 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %478) %480 = fmul float %26, %479 %481 = shl i32 %476, 4 %482 = add i32 %481, 16 %483 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %482) br i1 %473, label %IF137, label %ELSE138 IF137: ; preds = %ENDIF129 %484 = fmul float %28, %483 %485 = fmul float %480, 0x3FE51EB860000000 %486 = fmul float %484, 0x3FE51EB860000000 br label %ENDIF136 ELSE138: ; preds = %ENDIF129 %487 = fmul float %27, %483 %488 = fmul float %480, 0x3FD51EB860000000 %489 = fmul float %487, 0x3FD51EB860000000 br label %ENDIF136 ENDIF136: ; preds = %ELSE138, %IF137 %temp4.0 = phi float [ %485, %IF137 ], [ %488, %ELSE138 ] %temp6.0 = phi float [ %486, %IF137 ], [ %489, %ELSE138 ] %490 = call float @fabs(float %465) %491 = call float @fabs(float %465) %492 = fsub float -0.000000e+00, %490 %493 = fcmp oge float %492, %491 %494 = sext i1 %493 to i32 %495 = bitcast i32 %494 to float %496 = bitcast float %495 to i32 %497 = icmp ne i32 %496, 0 br i1 %497, label %IF144, label %ENDIF143 IF144: ; preds = %ENDIF136 %498 = fptosi float %60 to i32 %499 = bitcast i32 %498 to float %500 = bitcast float %499 to i32 %501 = shl i32 %500, 4 %502 = add i32 %501, 16 %503 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %502) %504 = fmul float %27, %503 %505 = shl i32 %500, 4 %506 = add i32 %505, 16 %507 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %506) %508 = fmul float %28, %507 %509 = fmul float %504, 0x3FE51EB860000000 %510 = fmul float %508, 0x3FE51EB860000000 br label %ENDIF143 ENDIF143: ; preds = %ENDIF136, %IF144 %temp4.1 = phi float [ %509, %IF144 ], [ %temp4.0, %ENDIF136 ] %temp6.1 = phi float [ %510, %IF144 ], [ %temp6.0, %ENDIF136 ] %511 = fmul float %21, 1.000000e+00 %512 = fadd float %511, 0.000000e+00 %513 = fmul float %21, 0.000000e+00 %514 = fadd float %513, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %temp8.1, float %temp9.1, float %33, float %34) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %temp12.1, float %temp13.1, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp16.1, float %temp17.1, float %41, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %temp20.1, float %temp21.1, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %temp24.1, float %temp25.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %temp26.1, float %temp27.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %temp32.1, float %temp33.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %temp4.1, float %temp6.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %61, float %62, float %512, float %514) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[24:27], s[8:9], 0x10 ; C08C0910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[28:31], v0, s[24:27], 0 idxen ; E00C2000 80061C00 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v1, v28 ; 7E02111C v_lshlrev_b32_e32 v20, 4, v1 ; 34280284 v_add_i32_e32 v1, 16, v20 ; 4A022890 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v22, v1, s[0:3], 0 offen ; E0301000 80001601 buffer_load_format_xyzw v[12:15], v0, s[12:15], 0 idxen ; E00C2000 80030C00 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v24, v22, v12 ; 10301916 buffer_load_format_xyzw v[16:19], v0, s[8:11], 0 idxen ; E00C2000 80021000 buffer_load_format_xyzw v[4:7], v0, s[20:23], 0 idxen ; E00C2000 80050400 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_add_i32_e32 v20, 20, v20 ; 4A282894 buffer_load_dword v23, v20, s[0:3], 0 offen ; E0301000 80001714 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v20, -1.0, v23 ; 06282EF3 v_cmp_ge_f32_e64 s[4:5], -|v20|, |v20| ; D00C0304 20022914 v_cndmask_b32_e64 v20, 0, -1, s[4:5] ; D2000014 00118280 v_cmp_eq_i32_e64 s[4:5], v20, 0 ; D1040004 00010114 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v20, 0x3ea8f5c3, v24 ; 102830FF 3EA8F5C3 v_mul_f32_e32 v21, v22, v13 ; 102A1B16 v_mul_f32_e32 v21, 0x3ea8f5c3, v21 ; 102A2AFF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v20, 0x3f28f5c3, v24 ; 102830FF 3F28F5C3 v_mul_f32_e32 v21, v22, v14 ; 102A1D16 v_mul_f32_e32 v21, 0x3f28f5c3, v21 ; 102A2AFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v22, -2.0, v23 ; 062C2EF5 v_cmp_ge_f32_e64 s[4:5], -|v22|, |v22| ; D00C0304 20022D16 v_cndmask_b32_e64 v22, 0, -1, s[4:5] ; D2000016 00118280 v_cmp_ne_i32_e64 s[4:5], v22, 0 ; D10A0004 00010116 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v20, v28 ; 7E28111C v_lshlrev_b32_e32 v20, 4, v20 ; 34282884 v_add_i32_e32 v20, 16, v20 ; 4A282890 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v20, v14 ; 102A1D14 v_mul_f32_e32 v21, 0x3f28f5c3, v21 ; 102A2AFF 3F28F5C3 v_mul_f32_e32 v20, v20, v13 ; 10281B14 v_mul_f32_e32 v20, 0x3f28f5c3, v20 ; 102828FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v22, v29 ; 7E2C111D v_lshlrev_b32_e32 v22, 4, v22 ; 342C2C84 v_add_i32_e32 v23, 16, v22 ; 4A2E2C90 buffer_load_dword v25, v23, s[0:3], 0 offen ; E0301000 80001917 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v25, v12 ; 10341919 v_add_i32_e32 v22, 20, v22 ; 4A2C2C94 buffer_load_dword v24, v22, s[0:3], 0 offen ; E0301000 80001816 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v22, -1.0, v24 ; 062C30F3 v_cmp_ge_f32_e64 s[4:5], -|v22|, |v22| ; D00C0304 20022D16 v_cndmask_b32_e64 v22, 0, -1, s[4:5] ; D2000016 00118280 v_cmp_eq_i32_e64 s[4:5], v22, 0 ; D1040004 00010116 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v22, 0x3ea8f5c3, v26 ; 102C34FF 3EA8F5C3 v_mul_f32_e32 v23, v25, v13 ; 102E1B19 v_mul_f32_e32 v23, 0x3ea8f5c3, v23 ; 102E2EFF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v22, 0x3f28f5c3, v26 ; 102C34FF 3F28F5C3 v_mul_f32_e32 v23, v25, v14 ; 102E1D19 v_mul_f32_e32 v23, 0x3f28f5c3, v23 ; 102E2EFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v24, -2.0, v24 ; 063030F5 v_cmp_ge_f32_e64 s[4:5], -|v24|, |v24| ; D00C0304 20023118 v_cndmask_b32_e64 v24, 0, -1, s[4:5] ; D2000018 00118280 v_cmp_ne_i32_e64 s[4:5], v24, 0 ; D10A0004 00010118 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v22, v29 ; 7E2C111D v_lshlrev_b32_e32 v22, 4, v22 ; 342C2C84 v_add_i32_e32 v22, 16, v22 ; 4A2C2C90 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v23, v22, v14 ; 102E1D16 v_mul_f32_e32 v23, 0x3f28f5c3, v23 ; 102E2EFF 3F28F5C3 v_mul_f32_e32 v22, v22, v13 ; 102C1B16 v_mul_f32_e32 v22, 0x3f28f5c3, v22 ; 102C2CFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v24, v30 ; 7E30111E v_lshlrev_b32_e32 v24, 4, v24 ; 34303084 v_add_i32_e32 v25, 16, v24 ; 4A323090 buffer_load_dword v27, v25, s[0:3], 0 offen ; E0301000 80001B19 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v27, v12 ; 1040191B v_add_i32_e32 v24, 20, v24 ; 4A303094 buffer_load_dword v26, v24, s[0:3], 0 offen ; E0301000 80001A18 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v24, -1.0, v26 ; 063034F3 v_cmp_ge_f32_e64 s[4:5], -|v24|, |v24| ; D00C0304 20023118 v_cndmask_b32_e64 v24, 0, -1, s[4:5] ; D2000018 00118280 v_cmp_eq_i32_e64 s[4:5], v24, 0 ; D1040004 00010118 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v24, 0x3ea8f5c3, v32 ; 103040FF 3EA8F5C3 v_mul_f32_e32 v25, v27, v13 ; 10321B1B v_mul_f32_e32 v25, 0x3ea8f5c3, v25 ; 103232FF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v24, 0x3f28f5c3, v32 ; 103040FF 3F28F5C3 v_mul_f32_e32 v25, v27, v14 ; 10321D1B v_mul_f32_e32 v25, 0x3f28f5c3, v25 ; 103232FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v26, -2.0, v26 ; 063434F5 v_cmp_ge_f32_e64 s[4:5], -|v26|, |v26| ; D00C0304 2002351A v_cndmask_b32_e64 v26, 0, -1, s[4:5] ; D200001A 00118280 v_cmp_ne_i32_e64 s[4:5], v26, 0 ; D10A0004 0001011A s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v24, v30 ; 7E30111E v_lshlrev_b32_e32 v24, 4, v24 ; 34303084 v_add_i32_e32 v24, 16, v24 ; 4A303090 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v25, v24, v14 ; 10321D18 v_mul_f32_e32 v25, 0x3f28f5c3, v25 ; 103232FF 3F28F5C3 v_mul_f32_e32 v24, v24, v13 ; 10301B18 v_mul_f32_e32 v24, 0x3f28f5c3, v24 ; 103030FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v26, v31 ; 7E34111F v_lshlrev_b32_e32 v26, 4, v26 ; 34343484 v_add_i32_e32 v27, 16, v26 ; 4A363490 buffer_load_dword v33, v27, s[0:3], 0 offen ; E0301000 8000211B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v34, v33, v12 ; 10441921 v_add_i32_e32 v26, 20, v26 ; 4A343494 buffer_load_dword v32, v26, s[0:3], 0 offen ; E0301000 8000201A s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v26, -1.0, v32 ; 063440F3 v_cmp_ge_f32_e64 s[4:5], -|v26|, |v26| ; D00C0304 2002351A v_cndmask_b32_e64 v26, 0, -1, s[4:5] ; D200001A 00118280 v_cmp_eq_i32_e64 s[4:5], v26, 0 ; D1040004 0001011A s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v26, 0x3ea8f5c3, v34 ; 103444FF 3EA8F5C3 v_mul_f32_e32 v27, v33, v13 ; 10361B21 v_mul_f32_e32 v27, 0x3ea8f5c3, v27 ; 103636FF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v26, 0x3f28f5c3, v34 ; 103444FF 3F28F5C3 v_mul_f32_e32 v27, v33, v14 ; 10361D21 v_mul_f32_e32 v27, 0x3f28f5c3, v27 ; 103636FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v32, -2.0, v32 ; 064040F5 v_cmp_ge_f32_e64 s[4:5], -|v32|, |v32| ; D00C0304 20024120 v_cndmask_b32_e64 v32, 0, -1, s[4:5] ; D2000020 00118280 v_cmp_ne_i32_e64 s[4:5], v32, 0 ; D10A0004 00010120 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v26, v31 ; 7E34111F v_lshlrev_b32_e32 v26, 4, v26 ; 34343484 v_add_i32_e32 v26, 16, v26 ; 4A343490 buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v27, v26, v14 ; 10361D1A v_mul_f32_e32 v27, 0x3f28f5c3, v27 ; 103636FF 3F28F5C3 v_mul_f32_e32 v26, v26, v13 ; 10341B1A v_mul_f32_e32 v26, 0x3f28f5c3, v26 ; 103434FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v28, v16 ; 7E381110 v_lshlrev_b32_e32 v28, 4, v28 ; 34383884 v_add_i32_e32 v29, 16, v28 ; 4A3A3890 buffer_load_dword v31, v29, s[0:3], 0 offen ; E0301000 80001F1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v31, v12 ; 1040191F v_add_i32_e32 v28, 20, v28 ; 4A383894 buffer_load_dword v30, v28, s[0:3], 0 offen ; E0301000 80001E1C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v28, -1.0, v30 ; 06383CF3 v_cmp_ge_f32_e64 s[4:5], -|v28|, |v28| ; D00C0304 2002391C v_cndmask_b32_e64 v28, 0, -1, s[4:5] ; D200001C 00118280 v_cmp_eq_i32_e64 s[4:5], v28, 0 ; D1040004 0001011C s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v28, 0x3ea8f5c3, v32 ; 103840FF 3EA8F5C3 v_mul_f32_e32 v29, v31, v13 ; 103A1B1F v_mul_f32_e32 v29, 0x3ea8f5c3, v29 ; 103A3AFF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v28, 0x3f28f5c3, v32 ; 103840FF 3F28F5C3 v_mul_f32_e32 v29, v31, v14 ; 103A1D1F v_mul_f32_e32 v29, 0x3f28f5c3, v29 ; 103A3AFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v30, -2.0, v30 ; 063C3CF5 v_cmp_ge_f32_e64 s[4:5], -|v30|, |v30| ; D00C0304 20023D1E v_cndmask_b32_e64 v30, 0, -1, s[4:5] ; D200001E 00118280 v_cmp_ne_i32_e64 s[4:5], v30, 0 ; D10A0004 0001011E s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v28, v16 ; 7E381110 v_lshlrev_b32_e32 v28, 4, v28 ; 34383884 v_add_i32_e32 v28, 16, v28 ; 4A383890 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v28, v14 ; 103A1D1C v_mul_f32_e32 v29, 0x3f28f5c3, v29 ; 103A3AFF 3F28F5C3 v_mul_f32_e32 v28, v28, v13 ; 10381B1C v_mul_f32_e32 v28, 0x3f28f5c3, v28 ; 103838FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v30, v17 ; 7E3C1111 v_lshlrev_b32_e32 v30, 4, v30 ; 343C3C84 v_add_i32_e32 v31, 16, v30 ; 4A3E3C90 buffer_load_dword v33, v31, s[0:3], 0 offen ; E0301000 8000211F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v34, v33, v12 ; 10441921 v_add_i32_e32 v30, 20, v30 ; 4A3C3C94 buffer_load_dword v32, v30, s[0:3], 0 offen ; E0301000 8000201E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v30, -1.0, v32 ; 063C40F3 v_cmp_ge_f32_e64 s[4:5], -|v30|, |v30| ; D00C0304 20023D1E v_cndmask_b32_e64 v30, 0, -1, s[4:5] ; D200001E 00118280 v_cmp_eq_i32_e64 s[4:5], v30, 0 ; D1040004 0001011E s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v30, 0x3ea8f5c3, v34 ; 103C44FF 3EA8F5C3 v_mul_f32_e32 v31, v33, v13 ; 103E1B21 v_mul_f32_e32 v31, 0x3ea8f5c3, v31 ; 103E3EFF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v30, 0x3f28f5c3, v34 ; 103C44FF 3F28F5C3 v_mul_f32_e32 v31, v33, v14 ; 103E1D21 v_mul_f32_e32 v31, 0x3f28f5c3, v31 ; 103E3EFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v32, -2.0, v32 ; 064040F5 v_cmp_ge_f32_e64 s[4:5], -|v32|, |v32| ; D00C0304 20024120 v_cndmask_b32_e64 v32, 0, -1, s[4:5] ; D2000020 00118280 v_cmp_ne_i32_e64 s[4:5], v32, 0 ; D10A0004 00010120 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v30, v17 ; 7E3C1111 v_lshlrev_b32_e32 v30, 4, v30 ; 343C3C84 v_add_i32_e32 v30, 16, v30 ; 4A3C3C90 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v31, v30, v14 ; 103E1D1E v_mul_f32_e32 v31, 0x3f28f5c3, v31 ; 103E3EFF 3F28F5C3 v_mul_f32_e32 v30, v30, v13 ; 103C1B1E v_mul_f32_e32 v30, 0x3f28f5c3, v30 ; 103C3CFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v32, v18 ; 7E401112 v_lshlrev_b32_e32 v32, 4, v32 ; 34404084 v_add_i32_e32 v33, 16, v32 ; 4A424090 buffer_load_dword v35, v33, s[0:3], 0 offen ; E0301000 80002321 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, v35, v12 ; 10481923 v_add_i32_e32 v32, 20, v32 ; 4A404094 buffer_load_dword v34, v32, s[0:3], 0 offen ; E0301000 80002220 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v32, -1.0, v34 ; 064044F3 v_cmp_ge_f32_e64 s[4:5], -|v32|, |v32| ; D00C0304 20024120 v_cndmask_b32_e64 v32, 0, -1, s[4:5] ; D2000020 00118280 v_cmp_eq_i32_e64 s[4:5], v32, 0 ; D1040004 00010120 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v32, 0x3ea8f5c3, v36 ; 104048FF 3EA8F5C3 v_mul_f32_e32 v33, v35, v13 ; 10421B23 v_mul_f32_e32 v33, 0x3ea8f5c3, v33 ; 104242FF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v32, 0x3f28f5c3, v36 ; 104048FF 3F28F5C3 v_mul_f32_e32 v33, v35, v14 ; 10421D23 v_mul_f32_e32 v33, 0x3f28f5c3, v33 ; 104242FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v34, -2.0, v34 ; 064444F5 v_cmp_ge_f32_e64 s[4:5], -|v34|, |v34| ; D00C0304 20024522 v_cndmask_b32_e64 v34, 0, -1, s[4:5] ; D2000022 00118280 v_cmp_ne_i32_e64 s[4:5], v34, 0 ; D10A0004 00010122 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v32, v18 ; 7E401112 v_lshlrev_b32_e32 v32, 4, v32 ; 34404084 v_add_i32_e32 v32, 16, v32 ; 4A404090 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, v32, v14 ; 10421D20 v_mul_f32_e32 v33, 0x3f28f5c3, v33 ; 104242FF 3F28F5C3 v_mul_f32_e32 v32, v32, v13 ; 10401B20 v_mul_f32_e32 v32, 0x3f28f5c3, v32 ; 104040FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v34, v19 ; 7E441113 v_lshlrev_b32_e32 v34, 4, v34 ; 34444484 v_add_i32_e32 v35, 16, v34 ; 4A464490 buffer_load_dword v37, v35, s[0:3], 0 offen ; E0301000 80002523 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v38, v37, v12 ; 104C1925 v_add_i32_e32 v34, 20, v34 ; 4A444494 buffer_load_dword v36, v34, s[0:3], 0 offen ; E0301000 80002422 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v34, -1.0, v36 ; 064448F3 v_cmp_ge_f32_e64 s[4:5], -|v34|, |v34| ; D00C0304 20024522 v_cndmask_b32_e64 v34, 0, -1, s[4:5] ; D2000022 00118280 v_cmp_eq_i32_e64 s[4:5], v34, 0 ; D1040004 00010122 s_and_saveexec_b64 s[6:7], s[4:5] ; BE862404 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E v_mul_f32_e32 v34, 0x3ea8f5c3, v38 ; 10444CFF 3EA8F5C3 v_mul_f32_e32 v35, v37, v13 ; 10461B25 v_mul_f32_e32 v35, 0x3ea8f5c3, v35 ; 104646FF 3EA8F5C3 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_xor_b64 exec, exec, s[6:7] ; 89FE067E v_mul_f32_e32 v34, 0x3f28f5c3, v38 ; 10444CFF 3F28F5C3 v_mul_f32_e32 v35, v37, v14 ; 10461D25 v_mul_f32_e32 v35, 0x3f28f5c3, v35 ; 104646FF 3F28F5C3 s_or_b64 exec, exec, s[6:7] ; 88FE067E s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_add_f32_e32 v36, -2.0, v36 ; 064848F5 v_cmp_ge_f32_e64 s[6:7], -|v36|, |v36| ; D00C0306 20024924 v_cndmask_b32_e64 v36, 0, -1, s[6:7] ; D2000024 00198280 v_cmp_ne_i32_e64 s[6:7], v36, 0 ; D10A0006 00010124 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E v_cvt_i32_f32_e32 v16, v19 ; 7E201113 v_lshlrev_b32_e32 v16, 4, v16 ; 34202084 v_add_i32_e32 v16, 16, v16 ; 4A202090 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v16, v14 ; 10221D10 v_mul_f32_e32 v35, 0x3f28f5c3, v17 ; 104622FF 3F28F5C3 v_mul_f32_e32 v12, v16, v13 ; 10181B10 v_mul_f32_e32 v34, 0x3f28f5c3, v12 ; 104418FF 3F28F5C3 s_or_b64 exec, exec, s[6:7] ; 88FE067E exp 15, 32, 0, 0, 0, v20, v21, v8, v9 ; F800020F 09081514 exp 15, 33, 0, 0, 0, v22, v23, v10, v11 ; F800021F 0B0A1716 exp 15, 34, 0, 0, 0, v24, v25, v4, v5 ; F800022F 05041918 exp 15, 35, 0, 0, 0, v26, v27, v6, v7 ; F800023F 07061B1A s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, 0 ; 7E080280 exp 15, 36, 0, 0, 0, v28, v29, v4, v4 ; F800024F 04041D1C exp 15, 37, 0, 0, 0, v30, v31, v4, v4 ; F800025F 04041F1E exp 15, 38, 0, 0, 0, v32, v33, v4, v4 ; F800026F 04042120 exp 15, 39, 0, 0, 0, v34, v35, v4, v4 ; F800027F 04042322 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v4, 0, v2, 1.0 ; D2820004 03CA0480 v_add_f32_e32 v5, 0, v2 ; 060A0480 v_add_f32_e32 v6, s5, v1 ; 060C0205 v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL IN[7], GENERIC[16], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL SAMP[12] DCL SAMP[13] DCL SAMP[14] DCL SAMP[15] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[0], TEMP[0], TEMP[0] 3: MOV TEMP[1].y, TEMP[0].wwww 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2], TEMP[2], SAMP[0], 2D 6: MUL TEMP[2], TEMP[2], TEMP[2] 7: MOV TEMP[1].x, TEMP[2].wwww 8: MOV TEMP[3].xy, IN[2].xyyy 9: TEX TEMP[3], TEMP[3], SAMP[2], 2D 10: MUL TEMP[3], TEMP[3], TEMP[3] 11: MOV TEMP[1].z, TEMP[3].wwww 12: MOV TEMP[4].xy, IN[3].xyyy 13: TEX TEMP[4], TEMP[4], SAMP[3], 2D 14: MUL TEMP[4], TEMP[4], TEMP[4] 15: MOV TEMP[1].w, TEMP[4].wwww 16: MUL TEMP[1], TEMP[1], TEMP[1] 17: MUL TEMP[1], TEMP[1], TEMP[1] 18: MOV TEMP[5].xy, IN[0].zwzz 19: MOV TEMP[5].zw, IN[1].wwzw 20: MUL TEMP[1], TEMP[1], TEMP[5] 21: MUL TEMP[0], TEMP[0], TEMP[1].yyyy 22: MAD TEMP[0], TEMP[1].xxxx, TEMP[2], TEMP[0] 23: MAD TEMP[0], TEMP[1].zzzz, TEMP[3], TEMP[0] 24: MAD TEMP[0], TEMP[1].wwww, TEMP[4], TEMP[0] 25: MOV TEMP[6].xy, IN[4].xyyy 26: TEX TEMP[6], TEMP[6], SAMP[4], 2D 27: MUL TEMP[2], TEMP[6], TEMP[6] 28: MOV TEMP[3].x, TEMP[2].wwww 29: MOV TEMP[6].xy, IN[5].xyyy 30: TEX TEMP[6], TEMP[6], SAMP[5], 2D 31: MUL TEMP[4], TEMP[6], TEMP[6] 32: MOV TEMP[3].y, TEMP[4].wwww 33: MOV TEMP[6].xy, IN[6].xyyy 34: TEX TEMP[6], TEMP[6], SAMP[6], 2D 35: MUL TEMP[5], TEMP[6], TEMP[6] 36: MOV TEMP[3].z, TEMP[5].wwww 37: MOV TEMP[6].xy, IN[7].xyyy 38: TEX TEMP[6], TEMP[6], SAMP[7], 2D 39: MUL TEMP[6], TEMP[6], TEMP[6] 40: MOV TEMP[3].w, TEMP[6].wwww 41: MUL TEMP[3], TEMP[3], TEMP[3] 42: MUL TEMP[3], TEMP[3], TEMP[3] 43: MOV TEMP[7].xy, IN[2].zwzz 44: MOV TEMP[7].zw, IN[3].wwzw 45: MUL TEMP[3], TEMP[3], TEMP[7] 46: MAD TEMP[0], TEMP[3].xxxx, TEMP[2], TEMP[0] 47: MAD TEMP[0], TEMP[3].yyyy, TEMP[4], TEMP[0] 48: MAD TEMP[0], TEMP[3].zzzz, TEMP[5], TEMP[0] 49: MAD TEMP[0], TEMP[3].wwww, TEMP[6], TEMP[0] 50: DP4 TEMP[4].x, TEMP[3], IMM[0].xxxx 51: DP4 TEMP[5].x, TEMP[1], IMM[0].xxxx 52: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 53: RCP TEMP[2].x, TEMP[4].xxxx 54: MUL TEMP[0], TEMP[0], TEMP[2].xxxx 55: MOV TEMP[4].xy, IN[1].xyyy 56: TEX TEMP[4].x, TEMP[4], SAMP[9], 2D 57: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].yyyy 58: MOV TEMP[5].xy, IN[0].xyyy 59: TEX TEMP[5].x, TEMP[5], SAMP[8], 2D 60: MAD TEMP[4].x, TEMP[1].xxxx, TEMP[5].xxxx, TEMP[4].xxxx 61: MOV TEMP[5].xy, IN[2].xyyy 62: TEX TEMP[5].x, TEMP[5], SAMP[10], 2D 63: MAD TEMP[4].x, TEMP[1].zzzz, TEMP[5].xxxx, TEMP[4].xxxx 64: MOV TEMP[5].xy, IN[3].xyyy 65: TEX TEMP[5].x, TEMP[5], SAMP[11], 2D 66: MAD TEMP[1].x, TEMP[1].wwww, TEMP[5].xxxx, TEMP[4].xxxx 67: MOV TEMP[4].xy, IN[4].xyyy 68: TEX TEMP[4].x, TEMP[4], SAMP[12], 2D 69: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[1].xxxx 70: MOV TEMP[4].xy, IN[5].xyyy 71: TEX TEMP[4].x, TEMP[4], SAMP[13], 2D 72: MAD TEMP[1].x, TEMP[3].yyyy, TEMP[4].xxxx, TEMP[1].xxxx 73: MOV TEMP[4].xy, IN[6].xyyy 74: TEX TEMP[4].x, TEMP[4], SAMP[14], 2D 75: MAD TEMP[1].x, TEMP[3].zzzz, TEMP[4].xxxx, TEMP[1].xxxx 76: MOV TEMP[4].xy, IN[7].xyyy 77: TEX TEMP[4].x, TEMP[4], SAMP[15], 2D 78: MAD TEMP[1].x, TEMP[3].wwww, TEMP[4].xxxx, TEMP[1].xxxx 79: MUL TEMP[1], TEMP[2].xxxx, TEMP[1].xxxx 80: MOV OUT[1], TEMP[1] 81: MOV OUT[0], TEMP[0] 82: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 11 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 11 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 12 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 12 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 13 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 13 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14 %79 = load <8 x i32> addrspace(2)* %78, !tbaa !0 %80 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 14 %81 = load <4 x i32> addrspace(2)* %80, !tbaa !0 %82 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15 %83 = load <8 x i32> addrspace(2)* %82, !tbaa !0 %84 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 15 %85 = load <4 x i32> addrspace(2)* %84, !tbaa !0 %86 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %110 = bitcast float %90 to i32 %111 = bitcast float %91 to i32 %112 = insertelement <2 x i32> undef, i32 %110, i32 0 %113 = insertelement <2 x i32> %112, i32 %111, i32 1 %114 = bitcast <8 x i32> %27 to <32 x i8> %115 = bitcast <4 x i32> %29 to <16 x i8> %116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %114, <16 x i8> %115, i32 2) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = extractelement <4 x float> %116, i32 2 %120 = extractelement <4 x float> %116, i32 3 %121 = fmul float %117, %117 %122 = fmul float %118, %118 %123 = fmul float %119, %119 %124 = fmul float %120, %120 %125 = bitcast float %86 to i32 %126 = bitcast float %87 to i32 %127 = insertelement <2 x i32> undef, i32 %125, i32 0 %128 = insertelement <2 x i32> %127, i32 %126, i32 1 %129 = bitcast <8 x i32> %23 to <32 x i8> %130 = bitcast <4 x i32> %25 to <16 x i8> %131 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %128, <32 x i8> %129, <16 x i8> %130, i32 2) %132 = extractelement <4 x float> %131, i32 0 %133 = extractelement <4 x float> %131, i32 1 %134 = extractelement <4 x float> %131, i32 2 %135 = extractelement <4 x float> %131, i32 3 %136 = fmul float %132, %132 %137 = fmul float %133, %133 %138 = fmul float %134, %134 %139 = fmul float %135, %135 %140 = bitcast float %94 to i32 %141 = bitcast float %95 to i32 %142 = insertelement <2 x i32> undef, i32 %140, i32 0 %143 = insertelement <2 x i32> %142, i32 %141, i32 1 %144 = bitcast <8 x i32> %31 to <32 x i8> %145 = bitcast <4 x i32> %33 to <16 x i8> %146 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %143, <32 x i8> %144, <16 x i8> %145, i32 2) %147 = extractelement <4 x float> %146, i32 0 %148 = extractelement <4 x float> %146, i32 1 %149 = extractelement <4 x float> %146, i32 2 %150 = extractelement <4 x float> %146, i32 3 %151 = fmul float %147, %147 %152 = fmul float %148, %148 %153 = fmul float %149, %149 %154 = fmul float %150, %150 %155 = bitcast float %98 to i32 %156 = bitcast float %99 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %35 to <32 x i8> %160 = bitcast <4 x i32> %37 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 1 %164 = extractelement <4 x float> %161, i32 2 %165 = extractelement <4 x float> %161, i32 3 %166 = fmul float %162, %162 %167 = fmul float %163, %163 %168 = fmul float %164, %164 %169 = fmul float %165, %165 %170 = fmul float %139, %139 %171 = fmul float %124, %124 %172 = fmul float %154, %154 %173 = fmul float %169, %169 %174 = fmul float %170, %170 %175 = fmul float %171, %171 %176 = fmul float %172, %172 %177 = fmul float %173, %173 %178 = fmul float %174, %88 %179 = fmul float %175, %89 %180 = fmul float %176, %92 %181 = fmul float %177, %93 %182 = fmul float %121, %179 %183 = fmul float %122, %179 %184 = fmul float %123, %179 %185 = fmul float %124, %179 %186 = fmul float %178, %136 %187 = fadd float %186, %182 %188 = fmul float %178, %137 %189 = fadd float %188, %183 %190 = fmul float %178, %138 %191 = fadd float %190, %184 %192 = fmul float %178, %139 %193 = fadd float %192, %185 %194 = fmul float %180, %151 %195 = fadd float %194, %187 %196 = fmul float %180, %152 %197 = fadd float %196, %189 %198 = fmul float %180, %153 %199 = fadd float %198, %191 %200 = fmul float %180, %154 %201 = fadd float %200, %193 %202 = fmul float %181, %166 %203 = fadd float %202, %195 %204 = fmul float %181, %167 %205 = fadd float %204, %197 %206 = fmul float %181, %168 %207 = fadd float %206, %199 %208 = fmul float %181, %169 %209 = fadd float %208, %201 %210 = bitcast float %102 to i32 %211 = bitcast float %103 to i32 %212 = insertelement <2 x i32> undef, i32 %210, i32 0 %213 = insertelement <2 x i32> %212, i32 %211, i32 1 %214 = bitcast <8 x i32> %39 to <32 x i8> %215 = bitcast <4 x i32> %41 to <16 x i8> %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %213, <32 x i8> %214, <16 x i8> %215, i32 2) %217 = extractelement <4 x float> %216, i32 0 %218 = extractelement <4 x float> %216, i32 1 %219 = extractelement <4 x float> %216, i32 2 %220 = extractelement <4 x float> %216, i32 3 %221 = fmul float %217, %217 %222 = fmul float %218, %218 %223 = fmul float %219, %219 %224 = fmul float %220, %220 %225 = bitcast float %104 to i32 %226 = bitcast float %105 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = bitcast <8 x i32> %43 to <32 x i8> %230 = bitcast <4 x i32> %45 to <16 x i8> %231 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %229, <16 x i8> %230, i32 2) %232 = extractelement <4 x float> %231, i32 0 %233 = extractelement <4 x float> %231, i32 1 %234 = extractelement <4 x float> %231, i32 2 %235 = extractelement <4 x float> %231, i32 3 %236 = fmul float %232, %232 %237 = fmul float %233, %233 %238 = fmul float %234, %234 %239 = fmul float %235, %235 %240 = bitcast float %106 to i32 %241 = bitcast float %107 to i32 %242 = insertelement <2 x i32> undef, i32 %240, i32 0 %243 = insertelement <2 x i32> %242, i32 %241, i32 1 %244 = bitcast <8 x i32> %47 to <32 x i8> %245 = bitcast <4 x i32> %49 to <16 x i8> %246 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %243, <32 x i8> %244, <16 x i8> %245, i32 2) %247 = extractelement <4 x float> %246, i32 0 %248 = extractelement <4 x float> %246, i32 1 %249 = extractelement <4 x float> %246, i32 2 %250 = extractelement <4 x float> %246, i32 3 %251 = fmul float %247, %247 %252 = fmul float %248, %248 %253 = fmul float %249, %249 %254 = fmul float %250, %250 %255 = bitcast float %108 to i32 %256 = bitcast float %109 to i32 %257 = insertelement <2 x i32> undef, i32 %255, i32 0 %258 = insertelement <2 x i32> %257, i32 %256, i32 1 %259 = bitcast <8 x i32> %51 to <32 x i8> %260 = bitcast <4 x i32> %53 to <16 x i8> %261 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %258, <32 x i8> %259, <16 x i8> %260, i32 2) %262 = extractelement <4 x float> %261, i32 0 %263 = extractelement <4 x float> %261, i32 1 %264 = extractelement <4 x float> %261, i32 2 %265 = extractelement <4 x float> %261, i32 3 %266 = fmul float %262, %262 %267 = fmul float %263, %263 %268 = fmul float %264, %264 %269 = fmul float %265, %265 %270 = fmul float %224, %224 %271 = fmul float %239, %239 %272 = fmul float %254, %254 %273 = fmul float %269, %269 %274 = fmul float %270, %270 %275 = fmul float %271, %271 %276 = fmul float %272, %272 %277 = fmul float %273, %273 %278 = fmul float %274, %96 %279 = fmul float %275, %97 %280 = fmul float %276, %100 %281 = fmul float %277, %101 %282 = fmul float %278, %221 %283 = fadd float %282, %203 %284 = fmul float %278, %222 %285 = fadd float %284, %205 %286 = fmul float %278, %223 %287 = fadd float %286, %207 %288 = fmul float %278, %224 %289 = fadd float %288, %209 %290 = fmul float %279, %236 %291 = fadd float %290, %283 %292 = fmul float %279, %237 %293 = fadd float %292, %285 %294 = fmul float %279, %238 %295 = fadd float %294, %287 %296 = fmul float %279, %239 %297 = fadd float %296, %289 %298 = fmul float %280, %251 %299 = fadd float %298, %291 %300 = fmul float %280, %252 %301 = fadd float %300, %293 %302 = fmul float %280, %253 %303 = fadd float %302, %295 %304 = fmul float %280, %254 %305 = fadd float %304, %297 %306 = fmul float %281, %266 %307 = fadd float %306, %299 %308 = fmul float %281, %267 %309 = fadd float %308, %301 %310 = fmul float %281, %268 %311 = fadd float %310, %303 %312 = fmul float %281, %269 %313 = fadd float %312, %305 %314 = fmul float %278, 1.000000e+00 %315 = fmul float %279, 1.000000e+00 %316 = fadd float %314, %315 %317 = fmul float %280, 1.000000e+00 %318 = fadd float %316, %317 %319 = fmul float %281, 1.000000e+00 %320 = fadd float %318, %319 %321 = fmul float %178, 1.000000e+00 %322 = fmul float %179, 1.000000e+00 %323 = fadd float %321, %322 %324 = fmul float %180, 1.000000e+00 %325 = fadd float %323, %324 %326 = fmul float %181, 1.000000e+00 %327 = fadd float %325, %326 %328 = fadd float %320, %327 %329 = fdiv float 1.000000e+00, %328 %330 = fmul float %307, %329 %331 = fmul float %309, %329 %332 = fmul float %311, %329 %333 = fmul float %313, %329 %334 = bitcast float %90 to i32 %335 = bitcast float %91 to i32 %336 = insertelement <2 x i32> undef, i32 %334, i32 0 %337 = insertelement <2 x i32> %336, i32 %335, i32 1 %338 = bitcast <8 x i32> %59 to <32 x i8> %339 = bitcast <4 x i32> %61 to <16 x i8> %340 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %337, <32 x i8> %338, <16 x i8> %339, i32 2) %341 = extractelement <4 x float> %340, i32 0 %342 = fmul float %341, %179 %343 = bitcast float %86 to i32 %344 = bitcast float %87 to i32 %345 = insertelement <2 x i32> undef, i32 %343, i32 0 %346 = insertelement <2 x i32> %345, i32 %344, i32 1 %347 = bitcast <8 x i32> %55 to <32 x i8> %348 = bitcast <4 x i32> %57 to <16 x i8> %349 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %346, <32 x i8> %347, <16 x i8> %348, i32 2) %350 = extractelement <4 x float> %349, i32 0 %351 = fmul float %178, %350 %352 = fadd float %351, %342 %353 = bitcast float %94 to i32 %354 = bitcast float %95 to i32 %355 = insertelement <2 x i32> undef, i32 %353, i32 0 %356 = insertelement <2 x i32> %355, i32 %354, i32 1 %357 = bitcast <8 x i32> %63 to <32 x i8> %358 = bitcast <4 x i32> %65 to <16 x i8> %359 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %356, <32 x i8> %357, <16 x i8> %358, i32 2) %360 = extractelement <4 x float> %359, i32 0 %361 = fmul float %180, %360 %362 = fadd float %361, %352 %363 = bitcast float %98 to i32 %364 = bitcast float %99 to i32 %365 = insertelement <2 x i32> undef, i32 %363, i32 0 %366 = insertelement <2 x i32> %365, i32 %364, i32 1 %367 = bitcast <8 x i32> %67 to <32 x i8> %368 = bitcast <4 x i32> %69 to <16 x i8> %369 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %366, <32 x i8> %367, <16 x i8> %368, i32 2) %370 = extractelement <4 x float> %369, i32 0 %371 = fmul float %181, %370 %372 = fadd float %371, %362 %373 = bitcast float %102 to i32 %374 = bitcast float %103 to i32 %375 = insertelement <2 x i32> undef, i32 %373, i32 0 %376 = insertelement <2 x i32> %375, i32 %374, i32 1 %377 = bitcast <8 x i32> %71 to <32 x i8> %378 = bitcast <4 x i32> %73 to <16 x i8> %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %376, <32 x i8> %377, <16 x i8> %378, i32 2) %380 = extractelement <4 x float> %379, i32 0 %381 = fmul float %278, %380 %382 = fadd float %381, %372 %383 = bitcast float %104 to i32 %384 = bitcast float %105 to i32 %385 = insertelement <2 x i32> undef, i32 %383, i32 0 %386 = insertelement <2 x i32> %385, i32 %384, i32 1 %387 = bitcast <8 x i32> %75 to <32 x i8> %388 = bitcast <4 x i32> %77 to <16 x i8> %389 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %386, <32 x i8> %387, <16 x i8> %388, i32 2) %390 = extractelement <4 x float> %389, i32 0 %391 = fmul float %279, %390 %392 = fadd float %391, %382 %393 = bitcast float %106 to i32 %394 = bitcast float %107 to i32 %395 = insertelement <2 x i32> undef, i32 %393, i32 0 %396 = insertelement <2 x i32> %395, i32 %394, i32 1 %397 = bitcast <8 x i32> %79 to <32 x i8> %398 = bitcast <4 x i32> %81 to <16 x i8> %399 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2) %400 = extractelement <4 x float> %399, i32 0 %401 = fmul float %280, %400 %402 = fadd float %401, %392 %403 = bitcast float %108 to i32 %404 = bitcast float %109 to i32 %405 = insertelement <2 x i32> undef, i32 %403, i32 0 %406 = insertelement <2 x i32> %405, i32 %404, i32 1 %407 = bitcast <8 x i32> %83 to <32 x i8> %408 = bitcast <4 x i32> %85 to <16 x i8> %409 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %406, <32 x i8> %407, <16 x i8> %408, i32 2) %410 = extractelement <4 x float> %409, i32 0 %411 = fmul float %281, %410 %412 = fadd float %411, %402 %413 = fmul float %329, %412 %414 = fmul float %329, %412 %415 = fmul float %329, %412 %416 = fmul float %329, %412 %417 = call i32 @llvm.SI.packf16(float %330, float %331) %418 = bitcast i32 %417 to float %419 = call i32 @llvm.SI.packf16(float %332, float %333) %420 = bitcast i32 %419 to float %421 = call i32 @llvm.SI.packf16(float %413, float %414) %422 = bitcast i32 %421 to float %423 = call i32 @llvm.SI.packf16(float %415, float %416) %424 = bitcast i32 %423 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %418, float %420, float %418, float %420) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %422, float %424, float %422, float %424) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 s[100:101], s[6:7] ; BEE40406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[96:99], s[4:5], 0x8 ; C0B00508 s_load_dwordx4 s[84:87], s[4:5], 0xc ; C0AA050C s_load_dwordx4 s[60:63], s[4:5], 0x10 ; C09E0510 s_load_dwordx4 s[72:75], s[4:5], 0x14 ; C0A40514 s_load_dwordx4 s[48:51], s[4:5], 0x18 ; C0980518 s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C s_load_dwordx4 s[12:15], s[4:5], 0x20 ; C0860520 s_load_dwordx4 s[0:3], s[4:5], 0x24 ; C0800524 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 24 ; 04953000 v_writelane_b32 v74, s1, 25 ; 04953201 v_writelane_b32 v74, s2, 26 ; 04953402 v_writelane_b32 v74, s3, 27 ; 04953603 s_load_dwordx4 s[0:3], s[4:5], 0x28 ; C0800528 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 20 ; 04952800 v_writelane_b32 v74, s1, 21 ; 04952A01 v_writelane_b32 v74, s2, 22 ; 04952C02 v_writelane_b32 v74, s3, 23 ; 04952E03 s_load_dwordx4 s[0:3], s[4:5], 0x2c ; C080052C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 16 ; 04952000 v_writelane_b32 v74, s1, 17 ; 04952201 v_writelane_b32 v74, s2, 18 ; 04952402 v_writelane_b32 v74, s3, 19 ; 04952603 s_load_dwordx4 s[0:3], s[4:5], 0x30 ; C0800530 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 12 ; 04951800 v_writelane_b32 v74, s1, 13 ; 04951A01 v_writelane_b32 v74, s2, 14 ; 04951C02 v_writelane_b32 v74, s3, 15 ; 04951E03 s_load_dwordx4 s[0:3], s[4:5], 0x34 ; C0800534 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 8 ; 04951000 v_writelane_b32 v74, s1, 9 ; 04951201 v_writelane_b32 v74, s2, 10 ; 04951402 v_writelane_b32 v74, s3, 11 ; 04951603 s_load_dwordx4 s[0:3], s[4:5], 0x38 ; C0800538 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 4 ; 04950800 v_writelane_b32 v74, s1, 5 ; 04950A01 v_writelane_b32 v74, s2, 6 ; 04950C02 v_writelane_b32 v74, s3, 7 ; 04950E03 s_load_dwordx4 s[0:3], s[4:5], 0x3c ; C080053C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 0 ; 04950000 v_writelane_b32 v74, s1, 1 ; 04950201 v_writelane_b32 v74, s2, 2 ; 04950402 v_writelane_b32 v74, s3, 3 ; 04950603 s_load_dwordx8 s[28:35], s[100:101], 0x0 ; C0CE6500 s_load_dwordx8 s[0:7], s[100:101], 0x8 ; C0C06508 s_load_dwordx8 s[16:23], s[100:101], 0x10 ; C0C86510 s_load_dwordx8 s[88:95], s[100:101], 0x18 ; C0EC6518 s_load_dwordx8 s[64:71], s[100:101], 0x20 ; C0E06520 s_load_dwordx8 s[76:83], s[100:101], 0x28 ; C0E66528 s_load_dwordx8 s[52:59], s[100:101], 0x30 ; C0DA6530 s_load_dwordx8 s[40:47], s[100:101], 0x38 ; C0D46538 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[0:7], s[8:11] ; F0800F00 00400402 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v7, v7 ; 10100F07 v_mul_f32_e32 v9, v8, v8 ; 10121108 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_interp_p1_f32 v10, v0, 3, 0, [m0] ; C8280300 v_interp_p2_f32 v10, [v10], v1, 3, 0, [m0] ; C8290301 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_interp_p1_f32 v11, v0, 1, 0, [m0] ; C82C0100 v_interp_p2_f32 v11, [v11], v1, 1, 0, [m0] ; C82D0101 v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[28:35], s[24:27] ; F0800F00 00C70C0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, v15, v15 ; 10201F0F v_mul_f32_e32 v17, v16, v16 ; 10222110 v_mul_f32_e32 v17, v17, v17 ; 10222311 v_interp_p1_f32 v18, v0, 2, 0, [m0] ; C8480200 v_interp_p2_f32 v18, [v18], v1, 2, 0, [m0] ; C8490201 v_mad_f32 v19, v17, v18, v9 ; D2820013 04262511 v_interp_p1_f32 v21, v0, 1, 2, [m0] ; C8540900 v_interp_p2_f32 v21, [v21], v1, 1, 2, [m0] ; C8550901 v_interp_p1_f32 v20, v0, 0, 2, [m0] ; C8500800 v_interp_p2_f32 v20, [v20], v1, 0, 2, [m0] ; C8510801 image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[96:99] ; F0800F00 03041614 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v25, v25 ; 10343319 v_mul_f32_e32 v27, v26, v26 ; 1036351A v_mul_f32_e32 v27, v27, v27 ; 1036371B v_interp_p1_f32 v28, v0, 2, 1, [m0] ; C8700600 v_interp_p2_f32 v28, [v28], v1, 2, 1, [m0] ; C8710601 v_mad_f32 v19, v27, v28, v19 ; D2820013 044E391B v_interp_p1_f32 v30, v0, 1, 3, [m0] ; C8780D00 v_interp_p2_f32 v30, [v30], v1, 1, 3, [m0] ; C8790D01 v_interp_p1_f32 v29, v0, 0, 3, [m0] ; C8740C00 v_interp_p2_f32 v29, [v29], v1, 0, 3, [m0] ; C8750C01 image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[88:95], s[84:87] ; F0800F00 02B61F1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v35, v34, v34 ; 10464522 v_mul_f32_e32 v36, v35, v35 ; 10484723 v_mul_f32_e32 v36, v36, v36 ; 10484924 v_interp_p1_f32 v37, v0, 3, 1, [m0] ; C8940700 v_interp_p2_f32 v37, [v37], v1, 3, 1, [m0] ; C8950701 v_mad_f32 v19, v36, v37, v19 ; D2820013 044E4B24 v_interp_p1_f32 v39, v0, 1, 5, [m0] ; C89C1500 v_interp_p2_f32 v39, [v39], v1, 1, 5, [m0] ; C89D1501 v_interp_p1_f32 v38, v0, 0, 5, [m0] ; C8981400 v_interp_p2_f32 v38, [v38], v1, 0, 5, [m0] ; C8991401 image_sample v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[76:83], s[72:75] ; F0800F00 02532826 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v44, v43, v43 ; 1058572B v_mul_f32_e32 v45, v44, v44 ; 105A592C v_mul_f32_e32 v45, v45, v45 ; 105A5B2D v_interp_p1_f32 v46, v0, 3, 2, [m0] ; C8B80B00 v_interp_p2_f32 v46, [v46], v1, 3, 2, [m0] ; C8B90B01 v_mul_f32_e32 v45, v46, v45 ; 105A5B2E v_interp_p1_f32 v47, v0, 1, 4, [m0] ; C8BC1100 v_interp_p2_f32 v47, [v47], v1, 1, 4, [m0] ; C8BD1101 v_interp_p1_f32 v46, v0, 0, 4, [m0] ; C8B81000 v_interp_p2_f32 v46, [v46], v1, 0, 4, [m0] ; C8B91001 image_sample v[48:51], 15, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[64:71], s[60:63] ; F0800F00 01F0302E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v52, v51, v51 ; 10686733 v_mul_f32_e32 v53, v52, v52 ; 106A6934 v_mul_f32_e32 v53, v53, v53 ; 106A6B35 v_interp_p1_f32 v54, v0, 2, 2, [m0] ; C8D80A00 v_interp_p2_f32 v54, [v54], v1, 2, 2, [m0] ; C8D90A01 v_mad_f32 v55, v53, v54, v45 ; D2820037 04B66D35 v_interp_p1_f32 v57, v0, 1, 6, [m0] ; C8E41900 v_interp_p2_f32 v57, [v57], v1, 1, 6, [m0] ; C8E51901 v_interp_p1_f32 v56, v0, 0, 6, [m0] ; C8E01800 v_interp_p2_f32 v56, [v56], v1, 0, 6, [m0] ; C8E11801 image_sample v[58:61], 15, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[52:59], s[48:51] ; F0800F00 018D3A38 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v62, v61, v61 ; 107C7B3D v_mul_f32_e32 v63, v62, v62 ; 107E7D3E v_mul_f32_e32 v63, v63, v63 ; 107E7F3F v_interp_p1_f32 v64, v0, 2, 3, [m0] ; C9000E00 v_interp_p2_f32 v64, [v64], v1, 2, 3, [m0] ; C9010E01 v_mad_f32 v55, v63, v64, v55 ; D2820037 04DE813F v_interp_p1_f32 v66, v0, 1, 7, [m0] ; C9081D00 v_interp_p2_f32 v66, [v66], v1, 1, 7, [m0] ; C9091D01 v_interp_p1_f32 v65, v0, 0, 7, [m0] ; C9041C00 v_interp_p2_f32 v65, [v65], v1, 0, 7, [m0] ; C9051C01 image_sample v[67:70], 15, 0, 0, 0, 0, 0, 0, 0, v[65:66], s[40:47], s[36:39] ; F0800F00 012A4341 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v71, v70, v70 ; 108E8D46 v_mul_f32_e32 v72, v71, v71 ; 10908F47 v_mul_f32_e32 v72, v72, v72 ; 10909148 v_interp_p1_f32 v73, v0, 3, 3, [m0] ; C9240F00 v_interp_p2_f32 v73, [v73], v1, 3, 3, [m0] ; C9250F01 v_mad_f32 v0, v72, v73, v55 ; D2820000 04DE9348 v_add_f32_e32 v0, v19, v0 ; 06000113 v_rcp_f32_e32 v0, v0 ; 7E005500 v_mul_f32_e32 v1, v9, v8 ; 10021109 v_mul_f32_e32 v8, v18, v17 ; 10102312 v_mad_f32 v1, v8, v16, v1 ; D2820001 04062108 v_mul_f32_e32 v16, v28, v27 ; 1020371C v_mad_f32 v1, v16, v26, v1 ; D2820001 04063510 v_mul_f32_e32 v17, v37, v36 ; 10224925 v_mad_f32 v1, v17, v35, v1 ; D2820001 04064711 v_mul_f32_e32 v18, v54, v53 ; 10246B36 v_mad_f32 v1, v18, v52, v1 ; D2820001 04066912 v_mad_f32 v1, v45, v44, v1 ; D2820001 0406592D v_mul_f32_e32 v19, v64, v63 ; 10267F40 v_mad_f32 v1, v19, v62, v1 ; D2820001 04067D13 v_mul_f32_e32 v26, v73, v72 ; 10349149 v_mad_f32 v1, v26, v71, v1 ; D2820001 04068F1A v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v27, v6, v6 ; 10360D06 v_mul_f32_e32 v27, v9, v27 ; 10363709 v_mul_f32_e32 v28, v14, v14 ; 10381D0E v_mad_f32 v27, v8, v28, v27 ; D282001B 046E3908 v_mul_f32_e32 v28, v24, v24 ; 10383118 v_mad_f32 v27, v16, v28, v27 ; D282001B 046E3910 v_mul_f32_e32 v28, v33, v33 ; 10384321 v_mad_f32 v27, v17, v28, v27 ; D282001B 046E3911 v_mul_f32_e32 v28, v50, v50 ; 10386532 v_mad_f32 v27, v18, v28, v27 ; D282001B 046E3912 v_mul_f32_e32 v28, v42, v42 ; 1038552A v_mad_f32 v27, v45, v28, v27 ; D282001B 046E392D v_mul_f32_e32 v28, v60, v60 ; 1038793C v_mad_f32 v27, v19, v28, v27 ; D282001B 046E3913 v_mul_f32_e32 v28, v69, v69 ; 10388B45 v_mad_f32 v27, v26, v28, v27 ; D282001B 046E391A v_mul_f32_e32 v27, v0, v27 ; 10363700 v_cvt_pkrtz_f16_f32_e32 v1, v27, v1 ; 5E02031B v_mul_f32_e32 v27, v5, v5 ; 10360B05 v_mul_f32_e32 v27, v9, v27 ; 10363709 v_mul_f32_e32 v28, v13, v13 ; 10381B0D v_mad_f32 v27, v8, v28, v27 ; D282001B 046E3908 v_mul_f32_e32 v28, v23, v23 ; 10382F17 v_mad_f32 v27, v16, v28, v27 ; D282001B 046E3910 v_mul_f32_e32 v28, v32, v32 ; 10384120 v_mad_f32 v27, v17, v28, v27 ; D282001B 046E3911 v_mul_f32_e32 v28, v49, v49 ; 10386331 v_mad_f32 v27, v18, v28, v27 ; D282001B 046E3912 v_mul_f32_e32 v28, v41, v41 ; 10385329 v_mad_f32 v27, v45, v28, v27 ; D282001B 046E392D v_mul_f32_e32 v28, v59, v59 ; 1038773B v_mad_f32 v27, v19, v28, v27 ; D282001B 046E3913 v_mul_f32_e32 v28, v68, v68 ; 10388944 v_mad_f32 v27, v26, v28, v27 ; D282001B 046E391A v_mul_f32_e32 v27, v0, v27 ; 10363700 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v5, v12, v12 ; 100A190C v_mad_f32 v4, v8, v5, v4 ; D2820004 04120B08 v_mul_f32_e32 v5, v22, v22 ; 100A2D16 v_mad_f32 v4, v16, v5, v4 ; D2820004 04120B10 v_mul_f32_e32 v5, v31, v31 ; 100A3F1F v_mad_f32 v4, v17, v5, v4 ; D2820004 04120B11 v_mul_f32_e32 v5, v48, v48 ; 100A6130 v_mad_f32 v4, v18, v5, v4 ; D2820004 04120B12 v_mul_f32_e32 v5, v40, v40 ; 100A5128 v_mad_f32 v4, v45, v5, v4 ; D2820004 04120B2D v_mul_f32_e32 v5, v58, v58 ; 100A753A v_mad_f32 v4, v19, v5, v4 ; D2820004 04120B13 v_mul_f32_e32 v5, v67, v67 ; 100A8743 v_mad_f32 v4, v26, v5, v4 ; D2820004 04120B1A v_mul_f32_e32 v4, v0, v4 ; 10080900 v_cvt_pkrtz_f16_f32_e32 v4, v4, v27 ; 5E083704 exp 15, 0, 1, 0, 0, v4, v1, v4, v1 ; F800040F 01040104 s_load_dwordx8 s[0:7], s[100:101], 0x48 ; C0C06548 v_readlane_b32 s8, v74, 24 ; 0211314A v_readlane_b32 s9, v74, 25 ; 0213334A v_readlane_b32 s10, v74, 26 ; 0215354A v_readlane_b32 s11, v74, 27 ; 0217374A s_nop 2 ; BF800002 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[0:7], s[8:11] ; F0800100 00400102 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v9, v1 ; 10020309 s_load_dwordx8 s[0:7], s[100:101], 0x40 ; C0C06540 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[0:7], s[12:15] ; F0800100 0060020A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v8, v2, v1 ; D2820001 04060508 s_load_dwordx8 s[0:7], s[100:101], 0x50 ; C0C06550 v_readlane_b32 s8, v74, 20 ; 0211294A v_readlane_b32 s9, v74, 21 ; 02132B4A v_readlane_b32 s10, v74, 22 ; 02152D4A v_readlane_b32 s11, v74, 23 ; 02172F4A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[0:7], s[8:11] ; F0800100 00400214 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v16, v2, v1 ; D2820001 04060510 s_load_dwordx8 s[0:7], s[100:101], 0x58 ; C0C06558 v_readlane_b32 s8, v74, 16 ; 0211214A v_readlane_b32 s9, v74, 17 ; 0213234A v_readlane_b32 s10, v74, 18 ; 0215254A v_readlane_b32 s11, v74, 19 ; 0217274A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[0:7], s[8:11] ; F0800100 0040021D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v17, v2, v1 ; D2820001 04060511 s_load_dwordx8 s[0:7], s[100:101], 0x60 ; C0C06560 v_readlane_b32 s8, v74, 12 ; 0211194A v_readlane_b32 s9, v74, 13 ; 02131B4A v_readlane_b32 s10, v74, 14 ; 02151D4A v_readlane_b32 s11, v74, 15 ; 02171F4A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[0:7], s[8:11] ; F0800100 0040022E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v18, v2, v1 ; D2820001 04060512 s_load_dwordx8 s[0:7], s[100:101], 0x68 ; C0C06568 v_readlane_b32 s8, v74, 8 ; 0211114A v_readlane_b32 s9, v74, 9 ; 0213134A v_readlane_b32 s10, v74, 10 ; 0215154A v_readlane_b32 s11, v74, 11 ; 0217174A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[0:7], s[8:11] ; F0800100 00400226 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v45, v2, v1 ; D2820001 0406052D s_load_dwordx8 s[0:7], s[100:101], 0x70 ; C0C06570 v_readlane_b32 s8, v74, 4 ; 0211094A v_readlane_b32 s9, v74, 5 ; 02130B4A v_readlane_b32 s10, v74, 6 ; 02150D4A v_readlane_b32 s11, v74, 7 ; 02170F4A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[0:7], s[8:11] ; F0800100 00400238 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v19, v2, v1 ; D2820001 04060513 s_load_dwordx8 s[0:7], s[100:101], 0x78 ; C0C06578 v_readlane_b32 s8, v74, 0 ; 0211014A v_readlane_b32 s9, v74, 1 ; 0213034A v_readlane_b32 s10, v74, 2 ; 0215054A v_readlane_b32 s11, v74, 3 ; 0217074A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[65:66], s[0:7], s[8:11] ; F0800100 00400241 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v26, v2, v1 ; D2820001 0406051A v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 1, 1, 1, 1, v0, v0, v0, v0 ; F8001C1F 00000000 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV OUT[1], IN[1].xyxy 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float %22, float %23) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v1, v2 ; F800020F 02010201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 0, v0, 1.0 ; D2820004 03CA0080 v_add_f32_e32 v5, 0, v2 ; 060A0480 v_add_f32_e32 v6, 0, v1 ; 060C0280 v_add_f32_e32 v0, 0, v0 ; 06000080 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..1] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 2.0000} IMM[1] FLT32 { 1.0000, -1.0000, -2.0000, 0.5000} IMM[2] FLT32 { 2.0000, -2.0000, 0.0000, 4.0000} IMM[3] FLT32 { 0.0000, 0.5000, 2.0000, 0.0000} 0: MOV TEMP[0].xy, IMM[0].xyxx 1: MAD TEMP[1].zw, CONST[0].xyxz, IMM[0].xyyx, IN[0].xyxy 2: MAD TEMP[1].xy, TEMP[1].zwzw, IMM[0].xyxx, IMM[0].zxzz 3: MOV TEMP[2].xy, TEMP[1].xyyy 4: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D 5: MOV TEMP[2].z, TEMP[2].xxxx 6: ADD TEMP[3].zw, -CONST[0].xyxy, IN[0].xyxy 7: MAD TEMP[1].xy, TEMP[3].zwzw, IMM[0].xyxx, IMM[0].zxzz 8: MOV TEMP[3].xy, TEMP[1].xyyy 9: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 10: MOV TEMP[2].x, TEMP[3].xxxx 11: MAD TEMP[3].zw, CONST[0].xyxy, IMM[0].xyxy, IN[0].xyxy 12: MOV TEMP[0].w, TEMP[3].wwzw 13: MAD TEMP[1].xy, TEMP[3].zwzw, IMM[0].xyxx, IMM[0].zxzz 14: MOV TEMP[3].xy, TEMP[1].xyyy 15: TEX TEMP[3].xzw, TEMP[3], SAMP[0], 2D 16: MOV TEMP[4].zw, TEMP[3].wwzw 17: MOV TEMP[2].y, TEMP[3].xxxx 18: DP3 TEMP[3].x, TEMP[2].xyzz, IMM[0].xyww 19: MOV TEMP[0].z, TEMP[3].xxxx 20: ADD TEMP[5].xy, CONST[0].xzzw, IN[0] 21: MOV TEMP[4].xy, TEMP[5].xyxx 22: MAD TEMP[1].xy, TEMP[4], IMM[0].xyxx, IMM[0].zxzz 23: MOV TEMP[5].xy, TEMP[1].xyyy 24: TEX TEMP[5].xw, TEMP[5], SAMP[0], 2D 25: MOV TEMP[4].x, TEMP[5].xxxw 26: MAD TEMP[5].xy, CONST[0], TEMP[0].yxzw, IN[0] 27: MOV TEMP[5].xy, TEMP[5].xyxx 28: MAD TEMP[1].xy, TEMP[5], IMM[0].xyxx, IMM[0].zxzz 29: MOV TEMP[6].xy, TEMP[1].xyyy 30: TEX TEMP[6].xzw, TEMP[6], SAMP[0], 2D 31: MOV TEMP[5].zw, TEMP[6].wwzw 32: MOV TEMP[4].y, TEMP[6].xxxx 33: ADD TEMP[6].xy, CONST[0], IN[0] 34: MOV TEMP[5].xy, TEMP[6].xyxx 35: MAD TEMP[1].xy, TEMP[5], IMM[0].xyxx, IMM[0].zxzz 36: MOV TEMP[6].xy, TEMP[1].xyyy 37: TEX TEMP[6].xzw, TEMP[6], SAMP[0], 2D 38: MOV TEMP[5].zw, TEMP[6].wwzw 39: MOV TEMP[4].z, TEMP[6].xxxx 40: DP3 TEMP[6].x, TEMP[4].yzxx, IMM[1].xyzz 41: ADD TEMP[3].z, TEMP[6].xxxx, TEMP[3].xxxx 42: MOV TEMP[0].z, TEMP[3].zzzz 43: MUL TEMP[6].w, IMM[1].wwww, CONST[1].xxxx 44: MOV TEMP[0].w, TEMP[6].wwww 45: MUL TEMP[3].x, TEMP[6].wwww, TEMP[3].zzzz 46: MOV TEMP[5].x, TEMP[3].xxxx 47: MAD TEMP[3].xy, CONST[0].zyzw, TEMP[0], IN[0] 48: MOV TEMP[0].xy, TEMP[3].xyxx 49: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 50: MOV TEMP[3].xy, TEMP[1].xyyy 51: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 52: MOV TEMP[2].w, TEMP[3].xxxx 53: DP3 TEMP[3].x, TEMP[2].xyww, IMM[0].xxww 54: ADD TEMP[7].yz, CONST[0].xzyw, IN[0].xxyw 55: MOV TEMP[0].yz, TEMP[7].zyzz 56: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 57: MOV TEMP[1].xy, TEMP[1].xyyy 58: TEX TEMP[1].xw, TEMP[1], SAMP[0], 2D 59: MOV TEMP[2].w, TEMP[1].wwww 60: MOV TEMP[4].w, TEMP[1].xxxx 61: DP3 TEMP[1].x, TEMP[4].yzww, IMM[1].yyzz 62: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 63: MUL TEMP[1].y, TEMP[6].wwww, TEMP[1].xxxx 64: MOV TEMP[5].y, TEMP[1].yyyy 65: MUL TEMP[1].xy, TEMP[5], IMM[2].xyxy 66: MOV TEMP[0].xy, TEMP[1].xyxx 67: MOV TEMP[0].z, IMM[0].zzzz 68: ADD TEMP[1].xyz, -TEMP[0], IMM[2].zzwx 69: MOV TEMP[0].xy, TEMP[1].xyzx 70: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 71: MOV TEMP[0].z, TEMP[1].xxxx 72: MAX TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx 73: RSQ TEMP[1].x, TEMP[1].xxxx 74: MOV TEMP[2].z, TEMP[1].xxxx 75: MUL TEMP[0].xy, TEMP[0], TEMP[1].xxxx 76: MOV TEMP[2].xy, TEMP[0].xyxx 77: MAD TEMP[0].xyz, TEMP[2], IMM[3].yyzz, IMM[1].wwww 78: MOV TEMP[0].xyz, TEMP[0].xyzx 79: MOV TEMP[0].w, IMM[0].zzzz 80: MOV OUT[0], TEMP[0] 81: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %31 = load <4 x i32> addrspace(2)* %30, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = fmul float %24, -1.000000e+00 %35 = fadd float %34, %32 %36 = fmul float %26, 1.000000e+00 %37 = fadd float %36, %33 %38 = fmul float %35, 1.000000e+00 %39 = fadd float %38, 0.000000e+00 %40 = fmul float %37, -1.000000e+00 %41 = fadd float %40, 1.000000e+00 %42 = bitcast float %39 to i32 %43 = bitcast float %41 to i32 %44 = insertelement <2 x i32> undef, i32 %42, i32 0 %45 = insertelement <2 x i32> %44, i32 %43, i32 1 %46 = bitcast <8 x i32> %29 to <32 x i8> %47 = bitcast <4 x i32> %31 to <16 x i8> %48 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %46, <16 x i8> %47, i32 2) %49 = extractelement <4 x float> %48, i32 0 %50 = fsub float -0.000000e+00, %24 %51 = fadd float %50, %32 %52 = fsub float -0.000000e+00, %25 %53 = fadd float %52, %33 %54 = fmul float %51, 1.000000e+00 %55 = fadd float %54, 0.000000e+00 %56 = fmul float %53, -1.000000e+00 %57 = fadd float %56, 1.000000e+00 %58 = bitcast float %55 to i32 %59 = bitcast float %57 to i32 %60 = insertelement <2 x i32> undef, i32 %58, i32 0 %61 = insertelement <2 x i32> %60, i32 %59, i32 1 %62 = bitcast <8 x i32> %29 to <32 x i8> %63 = bitcast <4 x i32> %31 to <16 x i8> %64 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %62, <16 x i8> %63, i32 2) %65 = extractelement <4 x float> %64, i32 0 %66 = fmul float %24, 1.000000e+00 %67 = fadd float %66, %32 %68 = fmul float %25, -1.000000e+00 %69 = fadd float %68, %33 %70 = fmul float %67, 1.000000e+00 %71 = fadd float %70, 0.000000e+00 %72 = fmul float %69, -1.000000e+00 %73 = fadd float %72, 1.000000e+00 %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = bitcast <8 x i32> %29 to <32 x i8> %79 = bitcast <4 x i32> %31 to <16 x i8> %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %78, <16 x i8> %79, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = fmul float %65, 1.000000e+00 %83 = fmul float %81, -1.000000e+00 %84 = fadd float %83, %82 %85 = fmul float %49, 2.000000e+00 %86 = fadd float %84, %85 %87 = fadd float %24, %32 %88 = fadd float %26, %33 %89 = fmul float %87, 1.000000e+00 %90 = fadd float %89, 0.000000e+00 %91 = fmul float %88, -1.000000e+00 %92 = fadd float %91, 1.000000e+00 %93 = bitcast float %90 to i32 %94 = bitcast float %92 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %29 to <32 x i8> %98 = bitcast <4 x i32> %31 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = fmul float %24, -1.000000e+00 %102 = fadd float %101, %32 %103 = fmul float %25, 1.000000e+00 %104 = fadd float %103, %33 %105 = fmul float %102, 1.000000e+00 %106 = fadd float %105, 0.000000e+00 %107 = fmul float %104, -1.000000e+00 %108 = fadd float %107, 1.000000e+00 %109 = bitcast float %106 to i32 %110 = bitcast float %108 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = bitcast <8 x i32> %29 to <32 x i8> %114 = bitcast <4 x i32> %31 to <16 x i8> %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %113, <16 x i8> %114, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = fadd float %24, %32 %118 = fadd float %25, %33 %119 = fmul float %117, 1.000000e+00 %120 = fadd float %119, 0.000000e+00 %121 = fmul float %118, -1.000000e+00 %122 = fadd float %121, 1.000000e+00 %123 = bitcast float %120 to i32 %124 = bitcast float %122 to i32 %125 = insertelement <2 x i32> undef, i32 %123, i32 0 %126 = insertelement <2 x i32> %125, i32 %124, i32 1 %127 = bitcast <8 x i32> %29 to <32 x i8> %128 = bitcast <4 x i32> %31 to <16 x i8> %129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2) %130 = extractelement <4 x float> %129, i32 0 %131 = fmul float %116, 1.000000e+00 %132 = fmul float %130, -1.000000e+00 %133 = fadd float %132, %131 %134 = fmul float %100, -2.000000e+00 %135 = fadd float %133, %134 %136 = fadd float %135, %86 %137 = fmul float 5.000000e-01, %27 %138 = fmul float %137, %136 %139 = fmul float %26, 1.000000e+00 %140 = fadd float %139, %32 %141 = fmul float %25, -1.000000e+00 %142 = fadd float %141, %33 %143 = fmul float %140, 1.000000e+00 %144 = fadd float %143, 0.000000e+00 %145 = fmul float %142, -1.000000e+00 %146 = fadd float %145, 1.000000e+00 %147 = bitcast float %144 to i32 %148 = bitcast float %146 to i32 %149 = insertelement <2 x i32> undef, i32 %147, i32 0 %150 = insertelement <2 x i32> %149, i32 %148, i32 1 %151 = bitcast <8 x i32> %29 to <32 x i8> %152 = bitcast <4 x i32> %31 to <16 x i8> %153 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %150, <32 x i8> %151, <16 x i8> %152, i32 2) %154 = extractelement <4 x float> %153, i32 0 %155 = fmul float %65, 1.000000e+00 %156 = fmul float %81, 1.000000e+00 %157 = fadd float %156, %155 %158 = fmul float %154, 2.000000e+00 %159 = fadd float %157, %158 %160 = fadd float %26, %32 %161 = fadd float %25, %33 %162 = fmul float %160, 1.000000e+00 %163 = fadd float %162, 0.000000e+00 %164 = fmul float %161, -1.000000e+00 %165 = fadd float %164, 1.000000e+00 %166 = bitcast float %163 to i32 %167 = bitcast float %165 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %29 to <32 x i8> %171 = bitcast <4 x i32> %31 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) %173 = extractelement <4 x float> %172, i32 0 %174 = fmul float %116, -1.000000e+00 %175 = fmul float %130, -1.000000e+00 %176 = fadd float %175, %174 %177 = fmul float %173, -2.000000e+00 %178 = fadd float %176, %177 %179 = fadd float %178, %159 %180 = fmul float %137, %179 %181 = fmul float %138, 2.000000e+00 %182 = fmul float %180, -2.000000e+00 %183 = fsub float -0.000000e+00, %181 %184 = fadd float %183, 0.000000e+00 %185 = fsub float -0.000000e+00, %182 %186 = fadd float %185, 0.000000e+00 %187 = fsub float -0.000000e+00, 0.000000e+00 %188 = fadd float %187, 4.000000e+00 %189 = fmul float %184, %184 %190 = fmul float %186, %186 %191 = fadd float %190, %189 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = call float @llvm.maxnum.f32(float %193, float 0x3E7AD7F2A0000000) %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %184, %195 %197 = fmul float %186, %195 %198 = fmul float %196, 5.000000e-01 %199 = fadd float %198, 5.000000e-01 %200 = fmul float %197, 5.000000e-01 %201 = fadd float %200, 5.000000e-01 %202 = fmul float %195, 2.000000e+00 %203 = fadd float %202, 5.000000e-01 %204 = call i32 @llvm.SI.packf16(float %199, float %201) %205 = bitcast i32 %204 to float %206 = call i32 @llvm.SI.packf16(float %203, float 0.000000e+00) %207 = bitcast i32 %206 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %205, float %207, float %205, float %207) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s8, v2 ; 0A060408 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s9, v5 ; 0A000A09 v_add_f32_e32 v3, 0, v0 ; 06060080 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800100 00640003 v_add_f32_e32 v1, s9, v5 ; 06020A09 v_add_f32_e32 v6, 0, v1 ; 060C0280 v_mov_b32_e32 v7, v4 ; 7E0E0304 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800100 00640106 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v8, v0, v1 ; 06100300 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_add_f32_e32 v9, 0, v5 ; 06120A80 v_mov_b32_e32 v10, v4 ; 7E140304 image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[16:23], s[12:15] ; F0800100 00640509 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, 2.0, v5, v8 ; D2820005 04220AF4 v_add_f32_e32 v8, s8, v2 ; 06100408 v_sub_f32_e32 v10, 1.0, v8 ; 081410F2 v_mov_b32_e32 v4, v10 ; 7E08030A image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800100 00640803 v_mov_b32_e32 v7, v10 ; 7E0E030A image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800100 00640B06 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e64 v12, -v11, v8 ; D208000C 2002110B image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[16:23], s[12:15] ; F0800100 00640909 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, -2.0, v9, v12 ; D2820009 043212F5 v_add_f32_e32 v5, v5, v9 ; 060A1305 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v9, 0.5, s0 ; D2100009 000000F0 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mad_f32 v5, -2.0, -v5, 0 ; D2820005 42020AF5 v_subrev_f32_e32 v0, v1, v0 ; 0A000101 v_add_f32_e32 v1, s4, v2 ; 06020404 v_sub_f32_e32 v4, 1.0, v1 ; 080802F2 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800100 00640103 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, 2.0, v1, v0 ; D2820000 040202F4 v_subrev_f32_e32 v1, v11, v8 ; 0A02110B v_mov_b32_e32 v7, v4 ; 7E0E0304 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800100 00640206 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, -2.0, v2, v1 ; D2820001 040604F5 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mad_f32 v0, -2.0, v0, 0 ; D2820000 020200F5 v_mul_f32_e32 v1, v0, v0 ; 10020100 v_mad_f32 v1, v5, v5, v1 ; D2820001 04060B05 v_add_f32_e32 v1, 0x41800000, v1 ; 060202FF 41800000 v_max_f32_e32 v1, 0x33d6bf95, v1 ; 200202FF 33D6BF95 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v2, v1, v5 ; 10040B01 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_mad_f32 v1, 2.0, v1, 0.5 ; D2820001 03C202F4 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1].zw, IN[0].zzzz, IMM[0].xyxy, IMM[0].xyyx 3: MOV TEMP[0].zw, TEMP[1].wwzw 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV TEMP[1].zw, IMM[0].xxyx 6: MOV OUT[0], TEMP[0] 7: MOV OUT[1], TEMP[1] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0 %17 = add i32 %5, %7 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %17) %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = add i32 %5, %7 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = fadd float %13, %19 %29 = fadd float %14, %20 %30 = fmul float %21, 1.000000e+00 %31 = fadd float %30, 0.000000e+00 %32 = fmul float %21, 0.000000e+00 %33 = fadd float %32, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %28, float %29, float %31, float %33) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 0, v2, 1.0 ; D2820004 03CA0480 v_add_f32_e32 v5, 0, v2 ; 060A0480 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v1 ; 060C0204 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..4] DCL TEMP[0..21], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, 0.0000, 0.3145} IMM[1] FLT32 { 1.0000, -64.0000, 0.0104, 0.0000} IMM[2] INT32 {40, 0, -1, 0} 0: ABS TEMP[0], CONST[1] 1: ADD TEMP[1], -TEMP[0].yyyy, TEMP[0].xxxx 2: MOV TEMP[2].z, TEMP[1].zzzz 3: RCP TEMP[3].x, TEMP[0].yyyy 4: MUL TEMP[3], TEMP[3].xxxx, CONST[1].xxzy 5: MOV TEMP[2].yw, TEMP[3].wyww 6: RCP TEMP[2].x, TEMP[0].xxxx 7: MUL TEMP[0], TEMP[2].xxxx, CONST[1] 8: MOV TEMP[4], TEMP[2].ywzw 9: FSGE TEMP[3].x, TEMP[1].zzzz, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].x, TEMP[0].xxxx 12: ELSE :0 13: MOV TEMP[3].x, TEMP[2].yyyy 14: ENDIF 15: MOV TEMP[3].x, TEMP[3].xxxx 16: FSGE TEMP[5].x, TEMP[1].zzzz, IMM[0].xxxx 17: UIF TEMP[5].xxxx :0 18: MOV TEMP[5].x, TEMP[0].yyyy 19: ELSE :0 20: MOV TEMP[5].x, TEMP[2].wwww 21: ENDIF 22: MOV TEMP[3].y, TEMP[5].xxxx 23: FSGE TEMP[5].x, TEMP[1].zzzz, IMM[0].xxxx 24: UIF TEMP[5].xxxx :0 25: MOV TEMP[5].x, TEMP[0].zzzz 26: ELSE :0 27: MOV TEMP[5].x, TEMP[2].zzzz 28: ENDIF 29: MOV TEMP[3].z, TEMP[5].xxxx 30: FSGE TEMP[1].x, TEMP[1].zzzz, IMM[0].xxxx 31: UIF TEMP[1].xxxx :0 32: MOV TEMP[1].x, TEMP[0].wwww 33: ELSE :0 34: MOV TEMP[1].x, TEMP[2].wwww 35: ENDIF 36: MOV TEMP[3].w, TEMP[1].xxxx 37: MOV TEMP[2].xy, TEMP[3].xyxx 38: ADD TEMP[1], CONST[3].xyxy, IN[0].xyxy 39: MOV TEMP[2].zw, TEMP[1].wwzw 40: MUL TEMP[1], TEMP[2], CONST[2].xyxy 41: MOV TEMP[2].w, TEMP[1].wwzw 42: MOV TEMP[1].xy, TEMP[1].zwww 43: TEX TEMP[1], TEMP[1], SAMP[0], 2D 44: MUL TEMP[1], TEMP[1].wwww, CONST[0].zzzz 45: MAD TEMP[1], TEMP[1].zzzz, IMM[0].yyyy, CONST[4].zzzz 46: MOV TEMP[2].z, TEMP[1].zzzz 47: ADD TEMP[5], TEMP[2], IN[0] 48: MOV TEMP[0].xy, TEMP[5].xyxx 49: MUL TEMP[3], TEMP[3].xyxy, CONST[0].xyxy 50: MOV TEMP[0].zw, TEMP[3].wwzw 51: DP2 TEMP[3].x, TEMP[3].zwww, TEMP[3].zwww 52: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz 53: RSQ TEMP[5].x, TEMP[3].xxxx 54: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx 55: CMP TEMP[5].x, -TEMP[3].xxxx, TEMP[5].xxxx, IMM[0].xxxx 56: MOV TEMP[2].w, TEMP[5].xxxx 57: MAD TEMP[1], TEMP[5].xxxx, IMM[0].wwww, TEMP[1].zzzz 58: MOV TEMP[2].z, TEMP[1].zzzz 59: ADD TEMP[3], TEMP[0], CONST[3] 60: MOV TEMP[0].xy, TEMP[3].xyxx 61: MUL TEMP[3], TEMP[0], CONST[2] 62: MOV TEMP[0].xy, TEMP[3].xyxx 63: MOV TEMP[0].zw, TEMP[3].yyxy 64: MOV TEMP[3].x, IMM[1].xxxx 65: MOV TEMP[3].y, TEMP[1].zzzz 66: MOV TEMP[1].x, IMM[2].xxxx 67: BGNLOOP :0 68: ISGE TEMP[5].x, IMM[2].yyyy, TEMP[1].xxxx 69: UIF TEMP[5].xxxx :0 70: BRK 71: ENDIF 72: MAD TEMP[6], TEMP[2].xyxy, CONST[2].xyxy, TEMP[0] 73: MOV TEMP[0].zw, TEMP[6].wwzw 74: MAD TEMP[7], TEMP[2].wwww, IMM[0].wwww, TEMP[3].yyyy 75: MOV TEMP[3].y, TEMP[7].yyyy 76: MOV TEMP[8].xy, TEMP[6].zwww 77: TEX TEMP[9], TEMP[8], SAMP[0], 2D 78: MOV TEMP[10], TEMP[9] 79: MOV TEMP[11].yzw, TEMP[9].zyzw 80: MUL TEMP[12], TEMP[9].wwww, IMM[0].yyyy 81: MAD TEMP[13], TEMP[9].wwww, IMM[0].yyyy, IMM[1].yyyy 82: MUL TEMP[4], TEMP[13].wwww, IMM[1].zzzz 83: MOV_SAT TEMP[14], TEMP[4] 84: ADD TEMP[15], -TEMP[14].wwww, IMM[1].xxxx 85: MOV TEMP[3].w, TEMP[15].wwww 86: MAD TEMP[16], TEMP[12].zzzz, -CONST[0].zzzz, TEMP[7].yyyy 87: MOV TEMP[3].z, TEMP[16].zzzz 88: MIN TEMP[17], TEMP[15].wwww, TEMP[3].xxxx 89: MOV TEMP[11].x, TEMP[17].xxxx 90: MOV TEMP[18].x, TEMP[3].xxxx 91: MOV TEMP[19].x, TEMP[17].xxxx 92: FSGE TEMP[20].x, TEMP[16].zzzz, IMM[0].xxxx 93: UIF TEMP[20].xxxx :0 94: MOV TEMP[21].x, TEMP[3].xxxx 95: ELSE :0 96: MOV TEMP[21].x, TEMP[17].xxxx 97: ENDIF 98: MOV TEMP[3].x, TEMP[21].xxxx 99: UADD TEMP[1].x, TEMP[1].xxxx, IMM[2].zzzz 100: ENDLOOP :0 101: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[1].xxxx 102: MUL TEMP[0], TEMP[0].xxxx, CONST[4].zzzz 103: MOV OUT[0], TEMP[0] 104: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %36 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %37 = load <8 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %39 = load <4 x i32> addrspace(2)* %38, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @fabs(float %27) %43 = call float @fabs(float %28) %44 = call float @fabs(float %29) %45 = call float @fabs(float %30) %46 = fsub float -0.000000e+00, %43 %47 = fadd float %46, %42 %48 = fdiv float 1.000000e+00, %43 %49 = fmul float %48, %27 %50 = fmul float %48, %28 %51 = fdiv float 1.000000e+00, %42 %52 = fmul float %51, %27 %53 = fmul float %51, %28 %54 = fcmp oge float %47, 0.000000e+00 %55 = sext i1 %54 to i32 %56 = bitcast i32 %55 to float %57 = bitcast float %56 to i32 %58 = icmp ne i32 %57, 0 %. = select i1 %58, float %52, float %49 %59 = fcmp oge float %47, 0.000000e+00 %60 = sext i1 %59 to i32 %61 = bitcast i32 %60 to float %62 = bitcast float %61 to i32 %63 = icmp ne i32 %62, 0 %temp20.0 = select i1 %63, float %53, float %50 %64 = fadd float %33, %40 %65 = fadd float %34, %41 %66 = fmul float %64, %31 %67 = fmul float %65, %32 %68 = bitcast float %66 to i32 %69 = bitcast float %67 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = bitcast <8 x i32> %37 to <32 x i8> %73 = bitcast <4 x i32> %39 to <16 x i8> %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %72, <16 x i8> %73, i32 2) %75 = extractelement <4 x float> %74, i32 3 %76 = fmul float %75, %26 %77 = fmul float %76, 2.550000e+02 %78 = fadd float %77, %35 %79 = fadd float %., %40 %80 = fadd float %temp20.0, %41 %81 = fmul float %., %24 %82 = fmul float %temp20.0, %25 %83 = fmul float %81, %81 %84 = fmul float %82, %82 %85 = fadd float %83, %84 %86 = call float @llvm.maxnum.f32(float %85, float 0x3E7AD7F2A0000000) %87 = call float @llvm.AMDGPU.rsq.clamped.f32(float %86) %88 = fmul float %87, %86 %89 = fsub float -0.000000e+00, %86 %90 = call float @llvm.AMDGPU.cndlt(float %89, float %88, float 0.000000e+00) %91 = fmul float %90, 0x3FD42085C0000000 %92 = fadd float %91, %78 %93 = fadd float %79, %33 %94 = fadd float %80, %34 %95 = fmul float %93, %31 %96 = fmul float %94, %32 %97 = fmul float %., %31 %98 = fmul float %temp20.0, %32 %99 = fmul float %90, 0x3FD42085C0000000 %100 = bitcast <8 x i32> %37 to <32 x i8> %101 = bitcast <4 x i32> %39 to <16 x i8> %102 = fsub float -0.000000e+00, %26 br label %LOOP LOOP: ; preds = %ENDIF97, %main_body %temp12.1 = phi float [ 1.000000e+00, %main_body ], [ %temp12.1., %ENDIF97 ] %temp13.0 = phi float [ %92, %main_body ], [ %121, %ENDIF97 ] %temp4.1 = phi float [ 0x36F4000000000000, %main_body ], [ %151, %ENDIF97 ] %temp3.0 = phi float [ %96, %main_body ], [ %120, %ENDIF97 ] %temp2.0 = phi float [ %95, %main_body ], [ %119, %ENDIF97 ] %103 = bitcast float %temp4.1 to i32 %104 = icmp sge i32 0, %103 %105 = sext i1 %104 to i32 %106 = bitcast i32 %105 to float %107 = bitcast float %106 to i32 %108 = icmp ne i32 %107, 0 br i1 %108, label %IF98, label %ENDIF97 IF98: ; preds = %LOOP %temp12.1.lcssa = phi float [ %temp12.1, %LOOP ] %109 = fsub float -0.000000e+00, %temp12.1.lcssa %110 = fadd float %109, 1.000000e+00 %111 = fmul float %110, %35 %112 = fmul float %110, %35 %113 = fmul float %110, %35 %114 = fmul float %110, %35 %115 = call i32 @llvm.SI.packf16(float %111, float %112) %116 = bitcast i32 %115 to float %117 = call i32 @llvm.SI.packf16(float %113, float %114) %118 = bitcast i32 %117 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %116, float %118, float %116, float %118) ret void ENDIF97: ; preds = %LOOP %119 = fadd float %97, %temp2.0 %120 = fadd float %98, %temp3.0 %121 = fadd float %99, %temp13.0 %122 = bitcast float %119 to i32 %123 = bitcast float %120 to i32 %124 = insertelement <2 x i32> undef, i32 %122, i32 0 %125 = insertelement <2 x i32> %124, i32 %123, i32 1 %126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %125, <32 x i8> %100, <16 x i8> %101, i32 2) %127 = extractelement <4 x float> %126, i32 3 %128 = fmul float %127, 2.550000e+02 %129 = fmul float %127, 2.550000e+02 %130 = fadd float %129, -6.400000e+01 %131 = fmul float %130, 0x3F855559C0000000 %132 = fmul float %130, 0x3F855559C0000000 %133 = fmul float %130, 0x3F855559C0000000 %134 = fmul float %130, 0x3F855559C0000000 %135 = call float @llvm.AMDIL.clamp.(float %131, float 0.000000e+00, float 1.000000e+00) %136 = call float @llvm.AMDIL.clamp.(float %132, float 0.000000e+00, float 1.000000e+00) %137 = call float @llvm.AMDIL.clamp.(float %133, float 0.000000e+00, float 1.000000e+00) %138 = call float @llvm.AMDIL.clamp.(float %134, float 0.000000e+00, float 1.000000e+00) %139 = fsub float -0.000000e+00, %138 %140 = fadd float %139, 1.000000e+00 %141 = fmul float %128, %102 %142 = fadd float %141, %121 %143 = call float @llvm.minnum.f32(float %140, float %temp12.1) %144 = fcmp oge float %142, 0.000000e+00 %145 = sext i1 %144 to i32 %146 = bitcast i32 %145 to float %147 = bitcast float %146 to i32 %148 = icmp ne i32 %147, 0 %temp12.1. = select i1 %148, float %temp12.1, float %143 %149 = bitcast float %temp4.1 to i32 %150 = add i32 %149, -1 %151 = bitcast i32 %150 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x4 ; C2000D04 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e64 v2, |s0| ; D3540102 00000000 v_mul_f32_e32 v3, s0, v2 ; 10060400 s_buffer_load_dword s1, s[12:15], 0x5 ; C2008D05 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e64 v4, |s1| ; D3540104 00000001 v_mul_f32_e32 v5, s0, v4 ; 100A0800 v_mov_b32_e32 v6, s1 ; 7E0C0201 v_sub_f32_e64 v6, |s0|, |v6| ; D2080306 00020C00 v_cmp_ge_f32_e64 s[2:3], v6, 0 ; D00C0002 00010106 v_cndmask_b32_e64 v6, 0, -1, s[2:3] ; D2000006 00098280 v_cmp_ne_i32_e64 s[2:3], v6, 0 ; D10A0002 00010106 v_cndmask_b32_e64 v6, v5, v3, s[2:3] ; D2000006 080A0705 s_buffer_load_dword s0, s[12:15], 0x0 ; C2000D00 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s0, v6 ; 10060C00 v_mul_f32_e32 v2, s1, v2 ; 10040401 v_mul_f32_e32 v4, s1, v4 ; 10080801 v_cndmask_b32_e64 v4, v4, v2, s[2:3] ; D2000004 000A0504 s_buffer_load_dword s0, s[12:15], 0x1 ; C2000D01 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s0, v4 ; 10040800 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v2, v3, v3, v2 ; D2820002 040A0703 v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v3, v2 ; 7E065902 v_mul_f32_e32 v3, v2, v3 ; 10060702 v_mov_b32_e32 v7, 0x80000000 ; 7E0E02FF 80000000 v_xor_b32_e32 v2, v2, v7 ; 3A040F02 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, 0, v3, vcc ; D2000002 01AA0680 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_buffer_load_dword s16, s[12:15], 0xd ; C2080D0D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s16, v5 ; 06060A10 s_buffer_load_dword s17, s[12:15], 0x9 ; C2088D09 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s17, v3 ; 10120611 v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 s_buffer_load_dword s18, s[12:15], 0xc ; C2090D0C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s18, v10 ; 06001412 s_buffer_load_dword s19, s[12:15], 0x8 ; C2098D08 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s19, v0 ; 10100013 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[4:11], s[0:3] ; F0800800 00010008 s_buffer_load_dword s20, s[12:15], 0x2 ; C20A0D02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s20, v0 ; 10000014 s_buffer_load_dword s12, s[12:15], 0x12 ; C2060D12 v_mov_b32_e32 v1, 0x437f0000 ; 7E0202FF 437F0000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, v0, s12 ; D2820000 00320101 v_mov_b32_e32 v1, 0x3ea1042e ; 7E0202FF 3EA1042E v_mad_f32 v0, v2, v1, v0 ; D2820000 04020302 v_mul_f32_e32 v1, 0x3ea1042e, v2 ; 100204FF 3EA1042E v_mul_f32_e32 v2, s17, v4 ; 10040811 v_mul_f32_e32 v3, s19, v6 ; 10060C13 v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v4, s16, v4 ; 06080810 v_mul_f32_e32 v5, s17, v4 ; 100A0811 v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_add_f32_e32 v6, s18, v6 ; 060C0C12 v_mul_f32_e32 v4, s19, v6 ; 10080C13 v_xor_b32_e32 v6, s20, v7 ; 3A0C0E14 v_mov_b32_e32 v7, 40 ; 7E0E02A8 v_mov_b32_e32 v9, 1.0 ; 7E1202F2 s_mov_b64 s[14:15], 0 ; BE8E0480 v_mov_b32_e32 v8, v9 ; 7E100309 v_cmp_lt_i32_e64 s[16:17], v7, 1 ; D1020010 00010307 v_cndmask_b32_e64 v9, 0, -1, s[16:17] ; D2000809 00418280 v_cmp_eq_i32_e64 s[16:17], v9, 0 ; D1040010 00010109 s_and_saveexec_b64 s[16:17], s[16:17] ; BE902410 s_xor_b64 s[16:17], exec, s[16:17] ; 8990107E s_cbranch_execz BB0_2 ; BF880000 v_add_f32_e32 v5, v5, v2 ; 060A0505 v_add_f32_e32 v4, v4, v3 ; 06080704 image_sample v9, 8, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800800 00010904 v_mov_b32_e32 v10, 0x437f0000 ; 7E1402FF 437F0000 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v11, 0x437f0000, v9 ; 101612FF 437F0000 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mad_f32 v11, v11, v6, v0 ; D282000B 04020D0B v_cmp_ge_f32_e64 s[18:19], v11, 0 ; D00C0012 0001010B v_cndmask_b32_e64 v11, 0, -1, s[18:19] ; D200080B 00498280 v_cmp_ne_i32_e64 s[18:19], v11, 0 ; D10A0012 0001010B v_mov_b32_e32 v11, 0xc2800000 ; 7E1602FF C2800000 v_mad_f32 v9, v10, v9, v11 ; D2820009 042E130A v_mul_f32_e32 v9, 0x3c2aaace, v9 ; 101212FF 3C2AAACE v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_min_f32_e32 v9, v8, v9 ; 1E121308 v_cndmask_b32_e64 v9, v9, v8, s[18:19] ; D2000809 084A1109 v_add_i32_e32 v7, -1, v7 ; 4A0E0EC1 s_or_b64 exec, exec, s[16:17] ; 88FE107E s_or_b64 s[14:15], s[16:17], s[14:15] ; 888E0E10 s_andn2_b64 exec, exec, s[14:15] ; 8AFE0E7E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E v_sub_f32_e32 v0, 1.0, v8 ; 080010F2 v_mul_f32_e32 v0, s12, v0 ; 1000000C v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.0000, -2.0000, 0.0000, 1.0000} IMM[1] FLT32 { -1.0000, 1.0000, 0.5000, 0.0000} 0: MAD TEMP[0], IN[1].xyxx, IMM[0].xyzz, IMM[1].xyzy 1: MOV TEMP[1].xyz, IN[0].xyzx 2: MUL TEMP[2], IMM[0].wwzz, IN[1].xyxx 3: MOV OUT[2], TEMP[2] 4: MOV OUT[1], TEMP[1] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %22, 2.000000e+00 %25 = fadd float %24, -1.000000e+00 %26 = fmul float %23, -2.000000e+00 %27 = fadd float %26, 1.000000e+00 %28 = fmul float %22, 0.000000e+00 %29 = fadd float %28, 5.000000e-01 %30 = fmul float %22, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 %32 = fmul float 1.000000e+00, %22 %33 = fmul float 1.000000e+00, %23 %34 = fmul float 0.000000e+00, %22 %35 = fmul float 0.000000e+00, %22 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %15, float %16, float %17, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %32, float %33, float %34, float %35) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, 0, v0 ; 10080080 exp 15, 33, 0, 0, 0, v0, v1, v4, v4 ; F800021F 04040100 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v4, 0, v0, 1.0 ; D2820004 03CA0080 v_mad_f32 v5, 0, v0, 0.5 ; D2820005 03C20080 v_mad_f32 v6, -2.0, v1, 1.0 ; D2820006 03CA02F5 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -0.0000, -1.0000} IMM[1] FLT32 { 0.0000, 2.0000, 2.2000, 1.0000} IMM[2] FLT32 { 1.7000, 0.0010, 0.4545, 0.0000} 0: MUL TEMP[0].xyz, IMM[0].xyxy, IN[0].yzxw 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].xyz, IN[0].yzxw, IMM[0].xxyy, -TEMP[0].yzxw 3: MOV TEMP[2].xyz, TEMP[1].xyzx 4: MAD TEMP[3].xyz, IN[0].zxyw, IMM[0].zzww, TEMP[0] 5: MOV TEMP[0].xyz, TEMP[3].xyzx 6: DP2 TEMP[1].x, TEMP[1].xzzz, TEMP[1].xzzz 7: MAX TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[2] 10: MOV TEMP[2].xyz, TEMP[1].xyzx 11: DP2 TEMP[1].x, TEMP[3].yzzz, TEMP[3].yzzz 12: MAX TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 13: RSQ TEMP[1].x, TEMP[1].xxxx 14: MOV TEMP[0].w, TEMP[1].xxxx 15: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[0] 16: MOV TEMP[0].xyz, TEMP[1].xyzx 17: MUL TEMP[1], IN[1], IMM[0].ywyy 18: ADD TEMP[3].xy, TEMP[1], IMM[0].xyxx 19: MOV TEMP[3].xy, TEMP[3].xyyy 20: TEX TEMP[3], TEMP[3], SAMP[0], 2D 21: MAD TEMP[3].xyz, TEMP[3], IMM[1].yyyy, IMM[0].wwww 22: MUL TEMP[4].xyz, TEMP[0], TEMP[3].yyyy 23: MOV TEMP[0].xyz, TEMP[4].xyzx 24: MAD TEMP[4].xyz, TEMP[3].xxxx, TEMP[2], TEMP[0] 25: MOV TEMP[0].xyz, TEMP[4].xyzx 26: MAD TEMP[3].xyz, TEMP[3].zzzz, IN[0], TEMP[0] 27: MOV TEMP[4].xyz, TEMP[3].xyzz 28: TEX TEMP[4], TEMP[4], SAMP[4], CUBE 29: POW TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz 30: POW TEMP[5].y, TEMP[4].yyyy, IMM[1].zzzz 31: POW TEMP[5].z, TEMP[4].zzzz, IMM[1].zzzz 32: POW TEMP[5].w, TEMP[4].wwww, IMM[1].wwww 33: MOV TEMP[2].w, TEMP[5].wwww 34: MOV TEMP[3].xyz, TEMP[3].xyzz 35: TEX TEMP[3], TEMP[3], SAMP[3], CUBE 36: POW TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz 37: POW TEMP[4].y, TEMP[3].yyyy, IMM[1].zzzz 38: POW TEMP[4].z, TEMP[3].zzzz, IMM[1].zzzz 39: POW TEMP[4].w, TEMP[3].wwww, IMM[1].wwww 40: MOV TEMP[0].w, TEMP[4].wwww 41: MUL TEMP[3].xyz, TEMP[4], IMM[2].xxxx 42: MOV TEMP[0].xyz, TEMP[3].xyzx 43: MAD TEMP[3].xyz, TEMP[5], IMM[2].xxxx, -TEMP[0] 44: MOV TEMP[2].xyz, TEMP[3].xyzx 45: ADD TEMP[3].xy, TEMP[1], IMM[0].xyxx 46: MOV TEMP[3].xy, TEMP[3].xyyy 47: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D 48: MAD TEMP[3].xyz, TEMP[3].xxxx, TEMP[2], TEMP[0] 49: MOV TEMP[0].xyz, TEMP[3].xyzx 50: ADD TEMP[1].xy, TEMP[1], IMM[0].xyxx 51: MOV TEMP[1].xy, TEMP[1].xyyy 52: TEX TEMP[1], TEMP[1], SAMP[1], 2D 53: MOV TEMP[2].w, TEMP[1].wwww 54: MAD TEMP[0].xyz, TEMP[1], TEMP[0], IMM[2].yyyy 55: ABS TEMP[1].x, TEMP[0].xxxx 56: LG2 TEMP[2].x, TEMP[1].xxxx 57: ABS TEMP[1].x, TEMP[0].yyyy 58: LG2 TEMP[1].x, TEMP[1].xxxx 59: MOV TEMP[2].y, TEMP[1].xxxx 60: ABS TEMP[0].x, TEMP[0].zzzz 61: LG2 TEMP[0].x, TEMP[0].xxxx 62: MOV TEMP[2].z, TEMP[0].xxxx 63: MUL TEMP[0].xyz, TEMP[2], IMM[2].zzzz 64: EX2 TEMP[2].x, TEMP[0].xxxx 65: EX2 TEMP[1].x, TEMP[0].yyyy 66: MOV TEMP[2].y, TEMP[1].xxxx 67: EX2 TEMP[0].x, TEMP[0].zzzz 68: MOV TEMP[2].z, TEMP[0].xxxx 69: MOV TEMP[0].xyz, TEMP[2].xyzz 70: TEX TEMP[0].xyz, TEMP[0], SAMP[5], 3D 71: MOV TEMP[0].xyz, TEMP[0].xyzx 72: MOV TEMP[0].w, IMM[0].yyyy 73: MOV OUT[0], TEMP[0] 74: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %51 = fmul float 0.000000e+00, %47 %52 = fmul float 1.000000e+00, %48 %53 = fmul float 0.000000e+00, %46 %54 = fsub float -0.000000e+00, %52 %55 = fmul float %47, 0.000000e+00 %56 = fadd float %55, %54 %57 = fsub float -0.000000e+00, %53 %58 = fmul float %48, 0.000000e+00 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %51 %61 = fmul float %46, 1.000000e+00 %62 = fadd float %61, %60 %63 = fmul float %48, -0.000000e+00 %64 = fadd float %63, %51 %65 = fmul float %46, -0.000000e+00 %66 = fadd float %65, %52 %67 = fmul float %47, -1.000000e+00 %68 = fadd float %67, %53 %69 = fmul float %56, %56 %70 = fmul float %62, %62 %71 = fadd float %69, %70 %72 = call float @llvm.maxnum.f32(float %71, float 0x3E7AD7F2A0000000) %73 = call float @llvm.AMDGPU.rsq.clamped.f32(float %72) %74 = fmul float %73, %56 %75 = fmul float %73, %59 %76 = fmul float %73, %62 %77 = fmul float %66, %66 %78 = fmul float %68, %68 %79 = fadd float %77, %78 %80 = call float @llvm.maxnum.f32(float %79, float 0x3E7AD7F2A0000000) %81 = call float @llvm.AMDGPU.rsq.clamped.f32(float %80) %82 = fmul float %81, %64 %83 = fmul float %81, %66 %84 = fmul float %81, %68 %85 = fmul float %49, 1.000000e+00 %86 = fmul float %50, -1.000000e+00 %87 = fadd float %85, 0.000000e+00 %88 = fadd float %86, 1.000000e+00 %89 = bitcast float %87 to i32 %90 = bitcast float %88 to i32 %91 = insertelement <2 x i32> undef, i32 %89, i32 0 %92 = insertelement <2 x i32> %91, i32 %90, i32 1 %93 = bitcast <8 x i32> %23 to <32 x i8> %94 = bitcast <4 x i32> %25 to <16 x i8> %95 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %92, <32 x i8> %93, <16 x i8> %94, i32 2) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = extractelement <4 x float> %95, i32 3 %100 = fmul float %96, 2.000000e+00 %101 = fadd float %100, -1.000000e+00 %102 = fmul float %97, 2.000000e+00 %103 = fadd float %102, -1.000000e+00 %104 = fmul float %98, 2.000000e+00 %105 = fadd float %104, -1.000000e+00 %106 = fmul float %82, %103 %107 = fmul float %83, %103 %108 = fmul float %84, %103 %109 = fmul float %101, %74 %110 = fadd float %109, %106 %111 = fmul float %101, %75 %112 = fadd float %111, %107 %113 = fmul float %101, %76 %114 = fadd float %113, %108 %115 = fmul float %105, %46 %116 = fadd float %115, %110 %117 = fmul float %105, %47 %118 = fadd float %117, %112 %119 = fmul float %105, %48 %120 = fadd float %119, %114 %121 = insertelement <4 x float> undef, float %116, i32 0 %122 = insertelement <4 x float> %121, float %118, i32 1 %123 = insertelement <4 x float> %122, float %120, i32 2 %124 = insertelement <4 x float> %123, float 0.000000e+00, i32 3 %125 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %124) %126 = extractelement <4 x float> %125, i32 0 %127 = extractelement <4 x float> %125, i32 1 %128 = extractelement <4 x float> %125, i32 2 %129 = extractelement <4 x float> %125, i32 3 %130 = call float @fabs(float %128) %131 = fdiv float 1.000000e+00, %130 %132 = fmul float %126, %131 %133 = fadd float %132, 1.500000e+00 %134 = fmul float %127, %131 %135 = fadd float %134, 1.500000e+00 %136 = bitcast float %135 to i32 %137 = bitcast float %133 to i32 %138 = bitcast float %129 to i32 %139 = insertelement <4 x i32> undef, i32 %136, i32 0 %140 = insertelement <4 x i32> %139, i32 %137, i32 1 %141 = insertelement <4 x i32> %140, i32 %138, i32 2 %142 = insertelement <4 x i32> %141, i32 undef, i32 3 %143 = bitcast <8 x i32> %39 to <32 x i8> %144 = bitcast <4 x i32> %41 to <16 x i8> %145 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %142, <32 x i8> %143, <16 x i8> %144, i32 4) %146 = extractelement <4 x float> %145, i32 0 %147 = extractelement <4 x float> %145, i32 1 %148 = extractelement <4 x float> %145, i32 2 %149 = call float @llvm.pow.f32(float %146, float 0x40019999A0000000) %150 = call float @llvm.pow.f32(float %147, float 0x40019999A0000000) %151 = call float @llvm.pow.f32(float %148, float 0x40019999A0000000) %152 = insertelement <4 x float> undef, float %116, i32 0 %153 = insertelement <4 x float> %152, float %118, i32 1 %154 = insertelement <4 x float> %153, float %120, i32 2 %155 = insertelement <4 x float> %154, float %99, i32 3 %156 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %155) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = extractelement <4 x float> %156, i32 2 %160 = extractelement <4 x float> %156, i32 3 %161 = call float @fabs(float %159) %162 = fdiv float 1.000000e+00, %161 %163 = fmul float %157, %162 %164 = fadd float %163, 1.500000e+00 %165 = fmul float %158, %162 %166 = fadd float %165, 1.500000e+00 %167 = bitcast float %166 to i32 %168 = bitcast float %164 to i32 %169 = bitcast float %160 to i32 %170 = insertelement <4 x i32> undef, i32 %167, i32 0 %171 = insertelement <4 x i32> %170, i32 %168, i32 1 %172 = insertelement <4 x i32> %171, i32 %169, i32 2 %173 = insertelement <4 x i32> %172, i32 undef, i32 3 %174 = bitcast <8 x i32> %35 to <32 x i8> %175 = bitcast <4 x i32> %37 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 4) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = call float @llvm.pow.f32(float %177, float 0x40019999A0000000) %181 = call float @llvm.pow.f32(float %178, float 0x40019999A0000000) %182 = call float @llvm.pow.f32(float %179, float 0x40019999A0000000) %183 = fmul float %180, 0x3FFB333340000000 %184 = fmul float %181, 0x3FFB333340000000 %185 = fmul float %182, 0x3FFB333340000000 %186 = fsub float -0.000000e+00, %183 %187 = fmul float %149, 0x3FFB333340000000 %188 = fadd float %187, %186 %189 = fsub float -0.000000e+00, %184 %190 = fmul float %150, 0x3FFB333340000000 %191 = fadd float %190, %189 %192 = fsub float -0.000000e+00, %185 %193 = fmul float %151, 0x3FFB333340000000 %194 = fadd float %193, %192 %195 = fadd float %85, 0.000000e+00 %196 = fadd float %86, 1.000000e+00 %197 = bitcast float %195 to i32 %198 = bitcast float %196 to i32 %199 = insertelement <2 x i32> undef, i32 %197, i32 0 %200 = insertelement <2 x i32> %199, i32 %198, i32 1 %201 = bitcast <8 x i32> %31 to <32 x i8> %202 = bitcast <4 x i32> %33 to <16 x i8> %203 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %200, <32 x i8> %201, <16 x i8> %202, i32 2) %204 = extractelement <4 x float> %203, i32 0 %205 = fmul float %204, %188 %206 = fadd float %205, %183 %207 = fmul float %204, %191 %208 = fadd float %207, %184 %209 = fmul float %204, %194 %210 = fadd float %209, %185 %211 = fadd float %85, 0.000000e+00 %212 = fadd float %86, 1.000000e+00 %213 = bitcast float %211 to i32 %214 = bitcast float %212 to i32 %215 = insertelement <2 x i32> undef, i32 %213, i32 0 %216 = insertelement <2 x i32> %215, i32 %214, i32 1 %217 = bitcast <8 x i32> %27 to <32 x i8> %218 = bitcast <4 x i32> %29 to <16 x i8> %219 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %216, <32 x i8> %217, <16 x i8> %218, i32 2) %220 = extractelement <4 x float> %219, i32 0 %221 = extractelement <4 x float> %219, i32 1 %222 = extractelement <4 x float> %219, i32 2 %223 = fmul float %220, %206 %224 = fadd float %223, 0x3F50624DE0000000 %225 = fmul float %221, %208 %226 = fadd float %225, 0x3F50624DE0000000 %227 = fmul float %222, %210 %228 = fadd float %227, 0x3F50624DE0000000 %229 = call float @fabs(float %224) %230 = call float @llvm.log2.f32(float %229) %231 = call float @fabs(float %226) %232 = call float @llvm.log2.f32(float %231) %233 = call float @fabs(float %228) %234 = call float @llvm.log2.f32(float %233) %235 = fmul float %230, 0x3FDD1743E0000000 %236 = fmul float %232, 0x3FDD1743E0000000 %237 = fmul float %234, 0x3FDD1743E0000000 %238 = call float @llvm.AMDIL.exp.(float %235) %239 = call float @llvm.AMDIL.exp.(float %236) %240 = call float @llvm.AMDIL.exp.(float %237) %241 = bitcast float %238 to i32 %242 = bitcast float %239 to i32 %243 = bitcast float %240 to i32 %244 = insertelement <4 x i32> undef, i32 %241, i32 0 %245 = insertelement <4 x i32> %244, i32 %242, i32 1 %246 = insertelement <4 x i32> %245, i32 %243, i32 2 %247 = insertelement <4 x i32> %246, i32 undef, i32 3 %248 = bitcast <8 x i32> %43 to <32 x i8> %249 = bitcast <4 x i32> %45 to <16 x i8> %250 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %247, <32 x i8> %248, <16 x i8> %249, i32 3) %251 = extractelement <4 x float> %250, i32 0 %252 = extractelement <4 x float> %250, i32 1 %253 = extractelement <4 x float> %250, i32 2 %254 = call i32 @llvm.SI.packf16(float %251, float %252) %255 = bitcast i32 %254 to float %256 = call i32 @llvm.SI.packf16(float %253, float 1.000000e+00) %257 = bitcast i32 %256 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %255, float %257, float %255, float %257) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 2, 0, [m0] ; C8080200 v_interp_p2_f32 v2, [v2], v1, 2, 0, [m0] ; C8090201 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v4, 0 ; 7E080280 v_mad_f32 v5, 0, v3, -v2 ; D2820005 840A0680 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_mad_f32 v7, 0, -v3, v6 ; D2820007 441A0680 v_mul_f32_e32 v8, v7, v7 ; 10100F07 v_mad_f32 v8, v5, v5, v8 ; D2820008 04220B05 v_max_f32_e32 v8, 0x33d6bf95, v8 ; 201010FF 33D6BF95 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_mov_b32_e32 v9, 0x80000000 ; 7E1202FF 80000000 v_mad_f32 v10, v6, v9, v2 ; D282000A 040A1306 v_mad_f32 v11, 0, v6, -v3 ; D282000B 840E0C80 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mad_f32 v12, v10, v10, v12 ; D282000C 0432150A v_max_f32_e32 v12, 0x33d6bf95, v12 ; 201818FF 33D6BF95 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mul_f32_e32 v11, v11, v12 ; 1016190B v_interp_p1_f32 v13, v0, 1, 1, [m0] ; C8340500 v_interp_p2_f32 v13, [v13], v1, 1, 1, [m0] ; C8350501 v_sub_f32_e32 v14, 1.0, v13 ; 081C1AF2 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 v_add_f32_e32 v13, 0, v15 ; 061A1E80 s_load_dwordx4 s[60:63], s[4:5], 0x0 ; C09E0500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[48:51], s[4:5], 0xc ; C098050C s_load_dwordx4 s[36:39], s[4:5], 0x10 ; C0920510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_load_dwordx8 s[64:71], s[6:7], 0x0 ; C0E00700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718 s_load_dwordx8 s[40:47], s[6:7], 0x20 ; C0D40720 s_load_dwordx8 s[4:11], s[6:7], 0x28 ; C0C20728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[64:71], s[60:63] ; F0800F00 01F00F0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, 2.0, v16, -1.0 ; D2820000 03CE20F4 v_mul_f32_e32 v1, v0, v11 ; 10021700 v_mad_f32 v11, 2.0, v15, -1.0 ; D282000B 03CE1EF4 v_mad_f32 v1, v11, v7, v1 ; D2820001 04060F0B v_mad_f32 v7, 2.0, v17, -1.0 ; D2820007 03CE22F4 v_mad_f32 v17, v7, v2, v1 ; D2820011 04060507 v_mul_f32_e32 v1, v9, v6 ; 10020D09 v_mad_f32 v1, 0, v2, v1 ; D2820001 04060480 v_mul_f32_e32 v1, v1, v8 ; 10021101 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mul_f32_e32 v10, v0, v10 ; 10141500 v_mad_f32 v1, v11, v1, v10 ; D2820001 042A030B v_mad_f32 v16, v7, v3, v1 ; D2820010 04060707 v_mul_f32_e32 v1, v5, v8 ; 10021105 v_mul_f32_e32 v3, 0, v3 ; 10060680 v_mad_f32 v2, v2, v9, v3 ; D2820002 040E1302 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mad_f32 v0, v11, v1, v0 ; D2820000 0402030B v_mad_f32 v15, v7, v6, v0 ; D282000F 04020D07 v_cubeid_f32 v8, v15, v16, v17 ; D2880008 0446210F v_cubema_f32 v7, v15, v16, v17 ; D28E0007 0446210F v_cubesc_f32 v6, v15, v16, v17 ; D28A0006 0446210F v_cubetc_f32 v5, v15, v16, v17 ; D28C0005 0446210F v_rcp_f32_e64 v0, |v7| ; D3540100 00000107 v_mov_b32_e32 v1, 0x3fc00000 ; 7E0202FF 3FC00000 v_mad_f32 v7, v5, v0, v1 ; D2820007 04060105 v_mad_f32 v6, v6, v0, v1 ; D2820006 04060106 image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[52:59], s[48:51] ; F0800700 018D0506 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v0, v7 ; 7E004F07 v_mov_b32_e32 v2, 0x400ccccd ; 7E0402FF 400CCCCD v_mul_legacy_f32_e32 v0, v2, v0 ; 0E000102 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mov_b32_e32 v3, 0xbfd9999a ; 7E0602FF BFD9999A v_mul_f32_e32 v8, v3, v0 ; 10100103 v_mov_b32_e32 v18, v4 ; 7E240304 v_cubeid_f32 v22, v15, v16, v17 ; D2880016 0446210F v_cubema_f32 v21, v15, v16, v17 ; D28E0015 0446210F v_cubesc_f32 v20, v15, v16, v17 ; D28A0014 0446210F v_cubetc_f32 v19, v15, v16, v17 ; D28C0013 0446210F v_rcp_f32_e64 v4, |v21| ; D3540104 00000115 v_mad_f32 v21, v19, v4, v1 ; D2820015 04060913 v_mad_f32 v20, v20, v4, v1 ; D2820014 04060914 image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[40:47], s[36:39] ; F0800700 012A0914 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v1, v11 ; 7E024F0B v_mul_legacy_f32_e32 v1, v2, v1 ; 0E020302 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mov_b32_e32 v4, 0x3fd9999a ; 7E0802FF 3FD9999A v_mad_f32 v1, v1, v4, v8 ; D2820001 04220901 v_mul_f32_e32 v0, 0x3fd9999a, v0 ; 100000FF 3FD9999A image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[28:35], s[16:19] ; F0800100 0087080D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v1, v0 ; D2820000 04020308 image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[20:27], s[12:15] ; F0800700 00650C0D v_mov_b32_e32 v1, 0x3a83126f ; 7E0202FF 3A83126F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v14, v0, v1 ; D2820000 0406010E v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v17, v0 ; 7E224B00 v_log_f32_e32 v0, v6 ; 7E004F06 v_mul_legacy_f32_e32 v0, v2, v0 ; 0E000102 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v19, v3, v0 ; 10260103 v_log_f32_e32 v20, v10 ; 7E284F0A v_mul_legacy_f32_e32 v20, v2, v20 ; 0E282902 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mad_f32 v19, v20, v4, v19 ; D2820013 044E0914 v_mul_f32_e32 v0, 0x3fd9999a, v0 ; 100000FF 3FD9999A v_mad_f32 v0, v8, v19, v0 ; D2820000 04022708 v_mad_f32 v0, v13, v0, v1 ; D2820000 0406010D v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v16, v0 ; 7E204B00 v_log_f32_e32 v0, v5 ; 7E004F05 v_mul_legacy_f32_e32 v0, v2, v0 ; 0E000102 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v3, v3, v0 ; 10060103 v_log_f32_e32 v5, v9 ; 7E0A4F09 v_mul_legacy_f32_e32 v2, v2, v5 ; 0E040B02 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mad_f32 v2, v2, v4, v3 ; D2820002 040E0902 v_mul_f32_e32 v0, 0x3fd9999a, v0 ; 100000FF 3FD9999A v_mad_f32 v0, v8, v2, v0 ; D2820000 04020508 v_mad_f32 v0, v12, v0, v1 ; D2820000 0406010C v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v15, v0 ; 7E1E4B00 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[4:11], s[0:3] ; F0800700 0001000F s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v3, v0, v1 ; 5E060300 v_cvt_pkrtz_f16_f32_e64 v0, v2, 1.0 ; D25E0000 0001E502 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: LRP TEMP[0].xy, IN[0], CONST[5].zwzw, CONST[5] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: ADD TEMP[1].xy, TEMP[0], IMM[0].xxxx 3: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 4: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[2] 5: MAD TEMP[0], CONST[6].xxxx, CONST[2], TEMP[0] 6: ADD TEMP[0], TEMP[0], CONST[3] 7: MAD TEMP[1].xy, IN[1], CONST[4], CONST[4].zwzw 8: MOV TEMP[1].xy, TEMP[1].xyxx 9: MOV TEMP[1].zw, IMM[0].yyyy 10: MOV OUT[1], TEMP[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = call float @llvm.AMDGPU.lrp(float %42, float %35, float %33) %51 = call float @llvm.AMDGPU.lrp(float %43, float %36, float %34) %52 = fadd float %50, 1.000000e+00 %53 = fadd float %51, 1.000000e+00 %54 = fmul float %53, %17 %55 = fmul float %53, %18 %56 = fmul float %53, %19 %57 = fmul float %53, %20 %58 = fmul float %52, %13 %59 = fadd float %58, %54 %60 = fmul float %52, %14 %61 = fadd float %60, %55 %62 = fmul float %52, %15 %63 = fadd float %62, %56 %64 = fmul float %52, %16 %65 = fadd float %64, %57 %66 = fmul float %37, %21 %67 = fadd float %66, %59 %68 = fmul float %37, %22 %69 = fadd float %68, %61 %70 = fmul float %37, %23 %71 = fadd float %70, %63 %72 = fmul float %37, %24 %73 = fadd float %72, %65 %74 = fadd float %67, %25 %75 = fadd float %69, %26 %76 = fadd float %71, %27 %77 = fadd float %73, %28 %78 = fmul float %48, %29 %79 = fadd float %78, %31 %80 = fmul float %49, %30 %81 = fadd float %80, %32 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %79, float %81, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %75, float %76, float %77) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x12 ; C2040112 s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v1, v5, v2, v2 ; F800020F 02020501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v4, 1.0, v0 ; 080800F2 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 v_add_f32_e32 v4, 1.0, v4 ; 060808F2 v_sub_f32_e32 v5, 1.0, v1 ; 080A02F2 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s4, v5 ; D2820000 04140901 v_add_f32_e32 v0, 1.0, v0 ; 060000F2 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_mad_f32 v1, v2, s5, v1 ; D2820001 04040B02 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v0 ; 10040004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_mad_f32 v2, v3, s5, v2 ; D2820002 04080B03 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v3, v5, s5, v3 ; D2820003 040C0B05 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v0, v4, s5, v0 ; D2820000 04000B04 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 2.0000, -0.9999} IMM[1] FLT32 { 0.0000, -1.0000, -2.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].xy, IN[0].xyyy 7: TEX TEMP[0].xy, TEMP[0], SAMP[1], 2D 8: MAD TEMP[2].z, TEMP[0].yyyy, IMM[0].zzzz, IMM[0].wwww 9: MAD TEMP[3], TEMP[0].xxxy, IMM[0].yyyz, IMM[1].xxxy 10: MAD TEMP[0], TEMP[0].xxxy, IMM[1].yyyz, IMM[0].yyyy 11: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[1].xxxx 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[4].x, TEMP[3].xxxx 14: ELSE :0 15: MOV TEMP[4].x, TEMP[0].xxxx 16: ENDIF 17: MOV TEMP[4].x, TEMP[4].xxxx 18: FSGE TEMP[5].x, TEMP[2].zzzz, IMM[1].xxxx 19: UIF TEMP[5].xxxx :0 20: MOV TEMP[5].x, TEMP[3].yyyy 21: ELSE :0 22: MOV TEMP[5].x, TEMP[0].yyyy 23: ENDIF 24: MOV TEMP[4].y, TEMP[5].xxxx 25: FSGE TEMP[5].x, TEMP[2].zzzz, IMM[1].xxxx 26: UIF TEMP[5].xxxx :0 27: MOV TEMP[5].x, TEMP[3].zzzz 28: ELSE :0 29: MOV TEMP[5].x, TEMP[0].zzzz 30: ENDIF 31: MOV TEMP[4].z, TEMP[5].xxxx 32: FSGE TEMP[2].x, TEMP[2].zzzz, IMM[1].xxxx 33: UIF TEMP[2].xxxx :0 34: MOV TEMP[2].x, TEMP[3].wwww 35: ELSE :0 36: MOV TEMP[2].x, TEMP[0].wwww 37: ENDIF 38: MOV TEMP[4].w, TEMP[2].xxxx 39: MOV OUT[1], TEMP[4] 40: MOV OUT[0], TEMP[1] 41: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %44 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %42, float 1.000000e+00) %47 = bitcast float %30 to i32 %48 = bitcast float %31 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = bitcast <8 x i32> %27 to <32 x i8> %52 = bitcast <4 x i32> %29 to <16 x i8> %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %51, <16 x i8> %52, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float %55, 2.000000e+00 %57 = fadd float %56, 0xBFEFFF2E40000000 %58 = fmul float %54, 1.000000e+00 %59 = fadd float %58, 0.000000e+00 %60 = fmul float %54, 1.000000e+00 %61 = fadd float %60, 0.000000e+00 %62 = fmul float %54, 1.000000e+00 %63 = fadd float %62, 0.000000e+00 %64 = fmul float %55, 2.000000e+00 %65 = fadd float %64, -1.000000e+00 %66 = fmul float %54, -1.000000e+00 %67 = fadd float %66, 1.000000e+00 %68 = fmul float %54, -1.000000e+00 %69 = fadd float %68, 1.000000e+00 %70 = fmul float %54, -1.000000e+00 %71 = fadd float %70, 1.000000e+00 %72 = fmul float %55, -2.000000e+00 %73 = fadd float %72, 1.000000e+00 %74 = fcmp oge float %57, 0.000000e+00 %75 = sext i1 %74 to i32 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = icmp ne i32 %77, 0 %. = select i1 %78, float %59, float %67 %79 = fcmp oge float %57, 0.000000e+00 %80 = sext i1 %79 to i32 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 %temp20.0 = select i1 %83, float %61, float %69 %84 = fcmp oge float %57, 0.000000e+00 %85 = sext i1 %84 to i32 %86 = bitcast i32 %85 to float %87 = bitcast float %86 to i32 %88 = icmp ne i32 %87, 0 %.33 = select i1 %88, float %63, float %71 %89 = fcmp oge float %57, 0.000000e+00 %90 = sext i1 %89 to i32 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = icmp ne i32 %92, 0 %temp8.0 = select i1 %93, float %65, float %73 %94 = call i32 @llvm.SI.packf16(float %43, float %44) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %45, float %46) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %., float %temp20.0) %99 = bitcast i32 %98 to float %100 = call i32 @llvm.SI.packf16(float %.33, float %temp8.0) %101 = bitcast i32 %100 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %95, float %97, float %95, float %97) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %99, float %101, float %99, float %101) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030402 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v0, v7 ; 7E004F07 v_mul_legacy_f32_e32 v0, 1.0, v0 ; 0E0000F2 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e32 v1, v6 ; 7E024F06 v_mov_b32_e32 v8, 0x400ccccd ; 7E1002FF 400CCCCD v_mul_legacy_f32_e32 v1, v8, v1 ; 0E020308 v_exp_f32_e32 v1, v1 ; 7E024B01 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_log_f32_e32 v1, v5 ; 7E024F05 v_mul_legacy_f32_e32 v1, v8, v1 ; 0E020308 v_exp_f32_e32 v1, v1 ; 7E024B01 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_legacy_f32_e32 v4, v8, v4 ; 0E080908 v_exp_f32_e32 v4, v4 ; 7E084B04 v_cvt_pkrtz_f16_f32_e32 v1, v4, v1 ; 5E020304 exp 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 s_waitcnt expcnt(0) ; BF8C070F image_sample v[0:1], 3, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800300 00450002 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, 2.0, v1, -1.0 ; D2820002 03CE02F4 v_mad_f32 v3, -2.0, v1, 1.0 ; D2820003 03CA02F5 v_mov_b32_e32 v4, 0xbf7ff972 ; 7E0802FF BF7FF972 v_mad_f32 v4, 2.0, v1, v4 ; D2820004 041202F4 v_cmp_ge_f32_e64 s[0:1], v4, 0 ; D00C0000 00010104 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_cmp_ne_i32_e64 s[0:1], v4, 0 ; D10A0000 00010104 v_cndmask_b32_e64 v2, v3, v2, s[0:1] ; D2000002 18020503 v_add_f32_e32 v3, 0, v0 ; 06060080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_cndmask_b32_e64 v0, v0, v3, s[0:1] ; D2000000 00020700 v_cvt_pkrtz_f16_f32_e32 v1, v0, v2 ; 5E020500 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 1, 1, 1, 1, v0, v1, v0, v1 ; F8001C1F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[2].xyxx 1: ADD TEMP[1].xy, -TEMP[0], CONST[3] 2: MOV TEMP[0].xy, TEMP[1].xyxx 3: MAD TEMP[0].xy, IN[0], TEMP[0], CONST[2] 4: MAD TEMP[1].zw, TEMP[0].xyxy, CONST[1].xyxy, -CONST[0].xyxy 5: MOV TEMP[0].xy, TEMP[0].xyxx 6: MOV TEMP[1].xy, TEMP[1].zwzz 7: MOV TEMP[1].zw, IMM[0].yyxy 8: MOV TEMP[0].zw, IMM[0].yyxy 9: MOV OUT[1], TEMP[0] 10: MOV OUT[0], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = fsub float -0.000000e+00, %17 %28 = fadd float %27, %19 %29 = fsub float -0.000000e+00, %18 %30 = fadd float %29, %20 %31 = fmul float %25, %28 %32 = fadd float %31, %17 %33 = fmul float %26, %30 %34 = fadd float %33, %18 %35 = fsub float -0.000000e+00, %13 %36 = fmul float %32, %15 %37 = fadd float %36, %35 %38 = fsub float -0.000000e+00, %14 %39 = fmul float %34, %16 %40 = fadd float %39, %38 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %32, float %34, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %37, float %40, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x0 ; C0840900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[8:11], 0 idxen ; E00C2000 80020200 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v3, v1, s4 ; D2820000 00120303 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 v_mad_f32 v1, v2, v1, s4 ; D2820001 00120302 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 32, 0, 0, 0, v1, v0, v3, v2 ; F800020F 02030001 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v0, s5, v0, -v4 ; D2820000 84120005 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v1, s0, v1, -v4 ; D2820001 84120200 exp 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: ABS TEMP[0].x, TEMP[1].xxxx 7: LG2 TEMP[0].x, TEMP[0].xxxx 8: ABS TEMP[2].x, TEMP[1].yyyy 9: LG2 TEMP[2].x, TEMP[2].xxxx 10: MOV TEMP[0].y, TEMP[2].xxxx 11: ABS TEMP[1].x, TEMP[1].zzzz 12: LG2 TEMP[1].x, TEMP[1].xxxx 13: MOV TEMP[0].z, TEMP[1].xxxx 14: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 15: EX2 TEMP[1].x, TEMP[0].xxxx 16: EX2 TEMP[2].x, TEMP[0].yyyy 17: MOV TEMP[1].y, TEMP[2].xxxx 18: EX2 TEMP[0].x, TEMP[0].zzzz 19: MOV TEMP[1].z, TEMP[0].xxxx 20: MOV TEMP[1].w, IMM[0].yyyy 21: MOV OUT[0], TEMP[1] 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %49 = call float @fabs(float %46) %50 = call float @llvm.log2.f32(float %49) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = fmul float %50, 0x3FDD1743E0000000 %56 = fmul float %52, 0x3FDD1743E0000000 %57 = fmul float %54, 0x3FDD1743E0000000 %58 = call float @llvm.AMDIL.exp.(float %55) %59 = call float @llvm.AMDIL.exp.(float %56) %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call i32 @llvm.SI.packf16(float %58, float %59) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %60, float 1.000000e+00) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800700 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v1 ; 7E064F01 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v3, v4, v3 ; 0E060704 v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v5, v0 ; 7E0A4F00 v_mul_legacy_f32_e32 v5, v4, v5 ; 0E0A0B04 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_log_f32_e32 v0, v2 ; 7E004F02 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v1, -v6 ; D2820006 841A0205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v4, v3 ; 7E084F03 v_mul_legacy_f32_e32 v4, 1.0, v4 ; 0E0808F2 v_exp_f32_e32 v4, v4 ; 7E084B04 v_log_f32_e32 v5, v2 ; 7E0A4F02 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_log_f32_e32 v5, v1 ; 7E0A4F01 v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_legacy_f32_e32 v0, v6, v0 ; 0E000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v1, -v6 ; D2820006 841A0205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v4, v3 ; 7E084F03 v_mul_legacy_f32_e32 v4, 1.0, v4 ; 0E0808F2 v_exp_f32_e32 v4, v4 ; 7E084B04 v_log_f32_e32 v5, v2 ; 7E0A4F02 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_log_f32_e32 v5, v1 ; 7E0A4F01 v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_legacy_f32_e32 v0, v6, v0 ; 0E000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v1, -v6 ; D2820006 841A0205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..1] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[1].xxxx, CONST[1].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: MUL TEMP[0].xyz, TEMP[1], CONST[0] 8: ABS TEMP[1].x, TEMP[0].xxxx 9: LG2 TEMP[1].x, TEMP[1].xxxx 10: ABS TEMP[2].x, TEMP[0].yyyy 11: LG2 TEMP[2].x, TEMP[2].xxxx 12: MOV TEMP[1].y, TEMP[2].xxxx 13: ABS TEMP[0].x, TEMP[0].zzzz 14: LG2 TEMP[0].x, TEMP[0].xxxx 15: MOV TEMP[1].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[1], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV TEMP[1].w, IMM[0].yyyy 23: MOV OUT[0], TEMP[1] 24: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %30 = load <8 x i32> addrspace(2)* %29, !tbaa !0 %31 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %32 = load <4 x i32> addrspace(2)* %31, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fmul float %33, %27 %36 = fadd float %35, %28 %37 = fmul float %34, %27 %38 = fadd float %37, %28 %39 = bitcast float %36 to i32 %40 = bitcast float %38 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = bitcast <8 x i32> %30 to <32 x i8> %44 = bitcast <4 x i32> %32 to <16 x i8> %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %43, <16 x i8> %44, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = call float @llvm.pow.f32(float %46, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %47, float 0x40019999A0000000) %51 = call float @llvm.pow.f32(float %48, float 0x40019999A0000000) %52 = fmul float %49, %24 %53 = fmul float %50, %25 %54 = fmul float %51, %26 %55 = call float @fabs(float %52) %56 = call float @llvm.log2.f32(float %55) %57 = call float @fabs(float %53) %58 = call float @llvm.log2.f32(float %57) %59 = call float @fabs(float %54) %60 = call float @llvm.log2.f32(float %59) %61 = fmul float %56, 0x3FDD1743E0000000 %62 = fmul float %58, 0x3FDD1743E0000000 %63 = fmul float %60, 0x3FDD1743E0000000 %64 = call float @llvm.AMDIL.exp.(float %61) %65 = call float @llvm.AMDIL.exp.(float %62) %66 = call float @llvm.AMDIL.exp.(float %63) %67 = call i32 @llvm.SI.packf16(float %64, float %65) %68 = bitcast i32 %67 to float %69 = call i32 @llvm.SI.packf16(float %66, float 1.000000e+00) %70 = bitcast i32 %69 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %68, float %70, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s9, v2, v3 ; D2820003 040E0409 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s9, v4, v0 ; D2820002 04020809 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v1 ; 7E064F01 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v3, v4, v3 ; 0E060704 v_exp_f32_e32 v3, v3 ; 7E064B03 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v3 ; 10060604 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v5, v0 ; 7E0A4F00 v_mul_legacy_f32_e32 v5, v4, v5 ; 0E0A0B04 v_exp_f32_e32 v5, v5 ; 7E0A4B05 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_log_f32_e32 v0, v2 ; 7E004F02 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s0, v0 ; 10000000 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..4] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} 0: MAD TEMP[0].x, IN[0].xxxx, IMM[0].xxxx, IMM[0].xxxx 1: MOV TEMP[1].xy, CONST[3].xyxx 2: ADD TEMP[1].yz, -TEMP[1].xxyw, CONST[4].xxyw 3: MAD TEMP[2].x, TEMP[0].xxxx, TEMP[1].yyyy, CONST[3].xxxx 4: MOV TEMP[0].x, TEMP[2].xxxx 5: MAD TEMP[2].w, IN[0].yyyy, IMM[0].xxxx, IMM[0].xxxx 6: MOV TEMP[0].w, TEMP[2].wwww 7: MAD TEMP[1].z, TEMP[2].wwww, TEMP[1].zzzz, CONST[3].yyyy 8: MOV TEMP[0].z, TEMP[1].zzzz 9: ADD TEMP[0].xy, TEMP[0].xzzw, CONST[2] 10: MAD TEMP[1].zw, TEMP[0].xyxy, CONST[1].xyxy, -CONST[0].xyxy 11: MOV TEMP[0].xy, TEMP[0].xyxx 12: MOV TEMP[1].xy, TEMP[1].zwzz 13: MOV TEMP[1].zw, IMM[0].zzyz 14: MOV TEMP[0].zw, IMM[0].zzyz 15: MOV OUT[1], TEMP[0] 16: MOV OUT[0], TEMP[1] 17: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = fmul float %27, 5.000000e-01 %30 = fadd float %29, 5.000000e-01 %31 = fsub float -0.000000e+00, %19 %32 = fadd float %31, %21 %33 = fsub float -0.000000e+00, %20 %34 = fadd float %33, %22 %35 = fmul float %30, %32 %36 = fadd float %35, %19 %37 = fmul float %28, 5.000000e-01 %38 = fadd float %37, 5.000000e-01 %39 = fmul float %38, %34 %40 = fadd float %39, %20 %41 = fadd float %36, %17 %42 = fadd float %40, %18 %43 = fsub float -0.000000e+00, %13 %44 = fmul float %41, %15 %45 = fadd float %44, %43 %46 = fsub float -0.000000e+00, %14 %47 = fmul float %42, %16 %48 = fadd float %47, %46 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %45, float %48, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x0 ; C0840900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[8:11], 0 idxen ; E00C2000 80020200 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, 0.5, v3, 0.5 ; D2820000 03C206F0 v_mad_f32 v0, v0, v1, s4 ; D2820000 00120300 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_mad_f32 v1, v2, v1, s4 ; D2820001 00120302 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 32, 0, 0, 0, v1, v0, v3, v2 ; F800020F 02030001 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v0, s5, v0, -v4 ; D2820000 84120005 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v1, s0, v1, -v4 ; D2820001 84120200 exp 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: ABS TEMP[0].x, TEMP[1].xxxx 7: LG2 TEMP[0].x, TEMP[0].xxxx 8: ABS TEMP[2].x, TEMP[1].yyyy 9: LG2 TEMP[2].x, TEMP[2].xxxx 10: MOV TEMP[0].y, TEMP[2].xxxx 11: ABS TEMP[1].x, TEMP[1].zzzz 12: LG2 TEMP[1].x, TEMP[1].xxxx 13: MOV TEMP[0].z, TEMP[1].xxxx 14: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 15: EX2 TEMP[1].x, TEMP[0].xxxx 16: EX2 TEMP[2].x, TEMP[0].yyyy 17: MOV TEMP[1].y, TEMP[2].xxxx 18: EX2 TEMP[0].x, TEMP[0].zzzz 19: MOV TEMP[1].z, TEMP[0].xxxx 20: MOV TEMP[1].w, IMM[0].yyyy 21: MOV OUT[0], TEMP[1] 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %49 = call float @fabs(float %46) %50 = call float @llvm.log2.f32(float %49) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = fmul float %50, 0x3FDD1743E0000000 %56 = fmul float %52, 0x3FDD1743E0000000 %57 = fmul float %54, 0x3FDD1743E0000000 %58 = call float @llvm.AMDIL.exp.(float %55) %59 = call float @llvm.AMDIL.exp.(float %56) %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call i32 @llvm.SI.packf16(float %58, float %59) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %60, float 1.000000e+00) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800700 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v1 ; 7E064F01 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v3, v4, v3 ; 0E060704 v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v5, v0 ; 7E0A4F00 v_mul_legacy_f32_e32 v5, v4, v5 ; 0E0A0B04 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_log_f32_e32 v0, v2 ; 7E004F02 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v1, -v6 ; D2820006 841A0205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v4, v3 ; 7E084F03 v_mul_legacy_f32_e32 v4, 1.0, v4 ; 0E0808F2 v_exp_f32_e32 v4, v4 ; 7E084B04 v_log_f32_e32 v5, v2 ; 7E0A4F02 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_log_f32_e32 v5, v1 ; 7E0A4F01 v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_legacy_f32_e32 v0, v6, v0 ; 0E000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 0.0000, 1.0000, 0.0000} 0: ABS TEMP[0].x, IN[2].xxxx 1: LG2 TEMP[0].x, TEMP[0].xxxx 2: ABS TEMP[1].x, IN[2].yyyy 3: LG2 TEMP[1].x, TEMP[1].xxxx 4: MOV TEMP[0].y, TEMP[1].xxxx 5: ABS TEMP[1].x, IN[2].zzzz 6: LG2 TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].z, TEMP[1].xxxx 8: MUL TEMP[1].xyz, TEMP[0], IMM[0].xxxx 9: MOV TEMP[0].z, TEMP[1].xyzx 10: EX2 TEMP[2].x, TEMP[1].xxxx 11: EX2 TEMP[3].x, TEMP[1].yyyy 12: MOV TEMP[2].y, TEMP[3].xxxx 13: EX2 TEMP[1].x, TEMP[1].zzzz 14: MOV TEMP[2].z, TEMP[1].xxxx 15: MOV TEMP[0].xy, CONST[1].xyxx 16: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 17: MOV TEMP[0].xy, TEMP[0].xyxx 18: MOV TEMP[1].xy, IN[1].xyxx 19: MOV TEMP[2].w, IN[2].wwww 20: MOV TEMP[0].zw, IMM[0].zzyz 21: MOV TEMP[1].zw, IMM[0].zzyz 22: MOV OUT[1], TEMP[2] 23: MOV OUT[2], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call float @fabs(float %33) %38 = call float @llvm.log2.f32(float %37) %39 = call float @fabs(float %34) %40 = call float @llvm.log2.f32(float %39) %41 = call float @fabs(float %35) %42 = call float @llvm.log2.f32(float %41) %43 = fmul float %38, 0x40019999A0000000 %44 = fmul float %40, 0x40019999A0000000 %45 = fmul float %42, 0x40019999A0000000 %46 = call float @llvm.AMDIL.exp.(float %43) %47 = call float @llvm.AMDIL.exp.(float %44) %48 = call float @llvm.AMDIL.exp.(float %45) %49 = fsub float -0.000000e+00, %13 %50 = fmul float %21, %15 %51 = fadd float %50, %49 %52 = fsub float -0.000000e+00, %14 %53 = fmul float %22, %16 %54 = fadd float %53, %52 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %27, float %28, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %54, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e64 v5, |v3| ; D34E0105 00000103 v_mul_f32_e32 v5, 0x400ccccd, v5 ; 100A0AFF 400CCCCD v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v6, |v2| ; D34E0106 00000102 v_mul_f32_e32 v6, 0x400ccccd, v6 ; 100C0CFF 400CCCCD v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_log_f32_e64 v7, |v1| ; D34E0107 00000101 v_mul_f32_e32 v7, 0x400ccccd, v7 ; 100E0EFF 400CCCCD v_exp_f32_e32 v7, v7 ; 7E0E4B07 exp 15, 32, 0, 0, 0, v7, v6, v5, v4 ; F800020F 04050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, -v4 ; D2820004 84120205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { -0.5000, 0.0000, 0.0000, 2.0000} IMM[1] FLT32 { -2.0000, 3.0000, 1.0000, 0.4545} 0: ADD TEMP[0].xy, IMM[0].xxxx, IN[1] 1: DP2 TEMP[0].x, TEMP[0].xyyy, TEMP[0].xyyy 2: MAX TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy 3: RSQ TEMP[2].x, TEMP[1].xxxx 4: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx 5: CMP TEMP[0].x, -TEMP[1].xxxx, TEMP[2].xxxx, IMM[0].zzzz 6: MOV TEMP[1].x, -CONST[0] 7: MAD TEMP[2].x, TEMP[0].xxxx, IMM[0].wwww, TEMP[1].xxxx 8: ADD TEMP[1].y, TEMP[1].xxxx, CONST[0].yyyy 9: RCP TEMP[1].x, TEMP[1].yyyy 10: MUL TEMP[1], TEMP[1].xxxx, TEMP[2].xxxx 11: MOV_SAT TEMP[1], TEMP[1] 12: MAD TEMP[2].y, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 14: MAD TEMP[1].x, TEMP[2].yyyy, -TEMP[1].xxxx, IMM[1].zzzz 15: MUL TEMP[1].w, TEMP[1].xxxx, IN[0].wwww 16: MOV TEMP[1].w, TEMP[1].wwww 17: ABS TEMP[2].x, IN[0].xxxx 18: LG2 TEMP[0].x, TEMP[2].xxxx 19: ABS TEMP[2].x, IN[0].yyyy 20: LG2 TEMP[2].x, TEMP[2].xxxx 21: MOV TEMP[0].y, TEMP[2].xxxx 22: ABS TEMP[2].x, IN[0].zzzz 23: LG2 TEMP[2].x, TEMP[2].xxxx 24: MOV TEMP[0].z, TEMP[2].xxxx 25: MUL TEMP[0].xyz, TEMP[0], IMM[1].wwww 26: EX2 TEMP[1].x, TEMP[0].xxxx 27: EX2 TEMP[2].x, TEMP[0].yyyy 28: MOV TEMP[1].y, TEMP[2].xxxx 29: EX2 TEMP[0].x, TEMP[0].zzzz 30: MOV TEMP[1].z, TEMP[0].xxxx 31: MOV OUT[0], TEMP[1] 32: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = fadd float -5.000000e-01, %30 %33 = fadd float -5.000000e-01, %31 %34 = fmul float %32, %32 %35 = fmul float %33, %33 %36 = fadd float %34, %35 %37 = call float @llvm.maxnum.f32(float %36, float 0x3E7AD7F2A0000000) %38 = call float @llvm.AMDGPU.rsq.clamped.f32(float %37) %39 = fmul float %38, %37 %40 = fsub float -0.000000e+00, %37 %41 = call float @llvm.AMDGPU.cndlt(float %40, float %39, float 0.000000e+00) %42 = fsub float -0.000000e+00, %24 %43 = fmul float %41, 2.000000e+00 %44 = fadd float %43, %42 %45 = fadd float %42, %25 %46 = fdiv float 1.000000e+00, %45 %47 = fmul float %46, %44 %48 = fmul float %46, %44 %49 = fmul float %46, %44 %50 = fmul float %46, %44 %51 = call float @llvm.AMDIL.clamp.(float %47, float 0.000000e+00, float 1.000000e+00) %52 = call float @llvm.AMDIL.clamp.(float %48, float 0.000000e+00, float 1.000000e+00) %53 = call float @llvm.AMDIL.clamp.(float %49, float 0.000000e+00, float 1.000000e+00) %54 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) %55 = fmul float %51, -2.000000e+00 %56 = fadd float %55, 3.000000e+00 %57 = fmul float %51, %51 %58 = fsub float -0.000000e+00, %57 %59 = fmul float %56, %58 %60 = fadd float %59, 1.000000e+00 %61 = fmul float %60, %29 %62 = call float @fabs(float %26) %63 = call float @llvm.log2.f32(float %62) %64 = call float @fabs(float %27) %65 = call float @llvm.log2.f32(float %64) %66 = call float @fabs(float %28) %67 = call float @llvm.log2.f32(float %66) %68 = fmul float %63, 0x3FDD1743E0000000 %69 = fmul float %65, 0x3FDD1743E0000000 %70 = fmul float %67, 0x3FDD1743E0000000 %71 = call float @llvm.AMDIL.exp.(float %68) %72 = call float @llvm.AMDIL.exp.(float %69) %73 = call float @llvm.AMDIL.exp.(float %70) %74 = call i32 @llvm.SI.packf16(float %71, float %72) %75 = bitcast i32 %74 to float %76 = call i32 @llvm.SI.packf16(float %73, float %61) %77 = bitcast i32 %76 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %75, float %77, float %75, float %77) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #4 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { readonly } attributes #4 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_add_f32_e32 v2, -0.5, v2 ; 060404F1 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_add_f32_e32 v3, -0.5, v3 ; 060606F1 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v2, v2, v2, v3 ; D2820002 040E0502 v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v3, v2 ; 7E065902 v_mul_f32_e32 v3, v2, v3 ; 10060702 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, 0, v3, vcc ; D2000002 01AA0680 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, 2.0, v2, -s4 ; D2820002 801204F4 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 v_mov_b32_e32 v3, s4 ; 7E060204 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s0, v3 ; 08060600 v_rcp_f32_e32 v3, v3 ; 7E065503 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mov_b32_e32 v3, 0x40400000 ; 7E0602FF 40400000 v_mad_f32 v3, -2.0, v2, v3 ; D2820003 040E04F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v2, -v3, v2, 1.0 ; D2820002 23CA0503 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_log_f32_e64 v0, |v4| ; D34E0100 00000104 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 0.0000, 1.0000, 0.0000} 0: ABS TEMP[0].x, IN[2].xxxx 1: LG2 TEMP[0].x, TEMP[0].xxxx 2: ABS TEMP[1].x, IN[2].yyyy 3: LG2 TEMP[1].x, TEMP[1].xxxx 4: MOV TEMP[0].y, TEMP[1].xxxx 5: ABS TEMP[1].x, IN[2].zzzz 6: LG2 TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].z, TEMP[1].xxxx 8: MUL TEMP[1].xyz, TEMP[0], IMM[0].xxxx 9: MOV TEMP[0].z, TEMP[1].xyzx 10: EX2 TEMP[2].x, TEMP[1].xxxx 11: EX2 TEMP[3].x, TEMP[1].yyyy 12: MOV TEMP[2].y, TEMP[3].xxxx 13: EX2 TEMP[1].x, TEMP[1].zzzz 14: MOV TEMP[2].z, TEMP[1].xxxx 15: MOV TEMP[0].xy, CONST[1].xyxx 16: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 17: MOV TEMP[0].xy, TEMP[0].xyxx 18: MOV TEMP[1].xy, IN[1].xyxx 19: MOV TEMP[2].w, IN[2].wwww 20: MOV TEMP[0].zw, IMM[0].zzyz 21: MOV TEMP[1].zw, IMM[0].zzyz 22: MOV OUT[1], TEMP[2] 23: MOV OUT[2], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call float @fabs(float %33) %38 = call float @llvm.log2.f32(float %37) %39 = call float @fabs(float %34) %40 = call float @llvm.log2.f32(float %39) %41 = call float @fabs(float %35) %42 = call float @llvm.log2.f32(float %41) %43 = fmul float %38, 0x40019999A0000000 %44 = fmul float %40, 0x40019999A0000000 %45 = fmul float %42, 0x40019999A0000000 %46 = call float @llvm.AMDIL.exp.(float %43) %47 = call float @llvm.AMDIL.exp.(float %44) %48 = call float @llvm.AMDIL.exp.(float %45) %49 = fsub float -0.000000e+00, %13 %50 = fmul float %21, %15 %51 = fadd float %50, %49 %52 = fsub float -0.000000e+00, %14 %53 = fmul float %22, %16 %54 = fadd float %53, %52 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %27, float %28, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %54, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e64 v5, |v3| ; D34E0105 00000103 v_mul_f32_e32 v5, 0x400ccccd, v5 ; 100A0AFF 400CCCCD v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v6, |v2| ; D34E0106 00000102 v_mul_f32_e32 v6, 0x400ccccd, v6 ; 100C0CFF 400CCCCD v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_log_f32_e64 v7, |v1| ; D34E0107 00000101 v_mul_f32_e32 v7, 0x400ccccd, v7 ; 100E0EFF 400CCCCD v_exp_f32_e32 v7, v7 ; 7E0E4B07 exp 15, 32, 0, 0, 0, v7, v6, v5, v4 ; F800020F 04050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, -v4 ; D2820004 84120205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MUL TEMP[0].w, TEMP[1].wwww, IN[0].wwww 7: MOV TEMP[0].w, TEMP[0].wwww 8: MUL TEMP[1].xyz, TEMP[1], IN[0] 9: ABS TEMP[2].x, TEMP[1].xxxx 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: ABS TEMP[3].x, TEMP[1].yyyy 12: LG2 TEMP[3].x, TEMP[3].xxxx 13: MOV TEMP[2].y, TEMP[3].xxxx 14: ABS TEMP[1].x, TEMP[1].zzzz 15: LG2 TEMP[1].x, TEMP[1].xxxx 16: MOV TEMP[2].z, TEMP[1].xxxx 17: MUL TEMP[1].xyz, TEMP[2], IMM[0].zzzz 18: EX2 TEMP[0].x, TEMP[1].xxxx 19: EX2 TEMP[2].x, TEMP[1].yyyy 20: MOV TEMP[0].y, TEMP[2].xxxx 21: EX2 TEMP[1].x, TEMP[1].zzzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: MOV OUT[0], TEMP[0] 24: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %44 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %42, float 1.000000e+00) %47 = fmul float %46, %29 %48 = fmul float %43, %26 %49 = fmul float %44, %27 %50 = fmul float %45, %28 %51 = call float @fabs(float %48) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %49) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %50) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %47) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v6, v5 ; 7E0C4F05 v_mul_legacy_f32_e32 v6, 1.0, v6 ; 0E0C0CF2 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300 v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_log_f32_e32 v7, v4 ; 7E0E4F04 v_mov_b32_e32 v8, 0x400ccccd ; 7E1002FF 400CCCCD v_mul_legacy_f32_e32 v7, v8, v7 ; 0E0E0F08 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_interp_p1_f32 v9, v0, 2, 0, [m0] ; C8240200 v_interp_p2_f32 v9, [v9], v1, 2, 0, [m0] ; C8250201 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_mul_f32_e32 v7, 0x3ee8ba1f, v7 ; 100E0EFF 3EE8BA1F v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_log_f32_e32 v7, v3 ; 7E0E4F03 v_mul_legacy_f32_e32 v7, v8, v7 ; 0E0E0F08 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_mul_f32_e32 v7, 0x3ee8ba1f, v7 ; 100E0EFF 3EE8BA1F v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, v8, v2 ; 0E040508 v_exp_f32_e32 v2, v2 ; 7E044B02 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_mul_f32_e32 v0, v3, v2 ; 10000503 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[1].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[1].xyxx 3: MAD TEMP[0].xy, TEMP[0], IMM[0], IMM[0].xxxx 4: MOV TEMP[0].xy, TEMP[0].xyxx 5: MOV TEMP[1].xy, TEMP[1].xyxx 6: MOV TEMP[1].zw, IMM[0].wwzw 7: MOV TEMP[0].zw, IMM[0].wwzw 8: MOV OUT[0], TEMP[1] 9: MOV OUT[1], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 %29 = fmul float %25, 5.000000e-01 %30 = fadd float %29, 5.000000e-01 %31 = fmul float %28, -5.000000e-01 %32 = fadd float %31, 5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %32, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, -v4 ; D2820004 84120205 v_mad_f32 v5, -0.5, v4, 0.5 ; D2820005 03C208F1 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v0, s0, v0, -v6 ; D2820000 841A0000 v_mad_f32 v1, 0.5, v0, 0.5 ; D2820001 03C200F0 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 32, 0, 0, 0, v1, v5, v3, v2 ; F800020F 02030501 exp 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: ABS TEMP[0].x, TEMP[1].xxxx 6: LG2 TEMP[0].x, TEMP[0].xxxx 7: ABS TEMP[2].x, TEMP[1].yyyy 8: LG2 TEMP[2].x, TEMP[2].xxxx 9: MOV TEMP[0].y, TEMP[2].xxxx 10: ABS TEMP[1].x, TEMP[1].zzzz 11: LG2 TEMP[1].x, TEMP[1].xxxx 12: MOV TEMP[0].z, TEMP[1].xxxx 13: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 14: EX2 TEMP[1].x, TEMP[0].xxxx 15: EX2 TEMP[2].x, TEMP[0].yyyy 16: MOV TEMP[1].y, TEMP[2].xxxx 17: EX2 TEMP[0].x, TEMP[0].zzzz 18: MOV TEMP[1].z, TEMP[0].xxxx 19: MOV TEMP[1].w, CONST[0].xxxx 20: MOV OUT[0], TEMP[1] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = bitcast <8 x i32> %26 to <32 x i8> %36 = bitcast <4 x i32> %28 to <16 x i8> %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %35, <16 x i8> %36, i32 2) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = extractelement <4 x float> %37, i32 2 %41 = call float @llvm.pow.f32(float %38, float 0x40019999A0000000) %42 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %43 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %44 = call float @fabs(float %41) %45 = call float @llvm.log2.f32(float %44) %46 = call float @fabs(float %42) %47 = call float @llvm.log2.f32(float %46) %48 = call float @fabs(float %43) %49 = call float @llvm.log2.f32(float %48) %50 = fmul float %45, 0x3FDD1743E0000000 %51 = fmul float %47, 0x3FDD1743E0000000 %52 = fmul float %49, 0x3FDD1743E0000000 %53 = call float @llvm.AMDIL.exp.(float %50) %54 = call float @llvm.AMDIL.exp.(float %51) %55 = call float @llvm.AMDIL.exp.(float %52) %56 = call i32 @llvm.SI.packf16(float %53, float %54) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float %55, float %24) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v1 ; 7E064F01 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v3, v4, v3 ; 0E060704 v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v5, v0 ; 7E0A4F00 v_mul_legacy_f32_e32 v5, v4, v5 ; 0E0A0B04 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_log_f32_e32 v0, v2 ; 7E004F02 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cvt_pkrtz_f16_f32_e64 v0, v0, s0 ; D25E0000 00000100 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].x, IN[1].xxxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = fsub float -0.000000e+00, %13 %29 = fmul float %21, %15 %30 = fadd float %29, %28 %31 = fsub float -0.000000e+00, %14 %32 = fmul float %22, %16 %33 = fadd float %32, %31 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %30, float %33, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v6, v6, v5 ; F800020F 05060601 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, -v4 ; D2820004 84120205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MAX TEMP[0].x, IN[0].xxxx, IMM[0].xxxx 1: RSQ TEMP[1].x, TEMP[0].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 3: CMP TEMP[1].x, -TEMP[0].xxxx, TEMP[1].xxxx, IMM[0].yyyy 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV TEMP[0].xyz, IN[0].xxxx 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.maxnum.f32(float %22, float 0x3E7AD7F2A0000000) %24 = call float @llvm.AMDGPU.rsq.clamped.f32(float %23) %25 = fmul float %24, %23 %26 = fsub float -0.000000e+00, %23 %27 = call float @llvm.AMDGPU.cndlt(float %26, float %25, float 0.000000e+00) %28 = call i32 @llvm.SI.packf16(float %22, float %22) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %22, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_max_f32_e32 v0, 0x33d6bf95, v2 ; 200004FF 33D6BF95 v_rsq_clamp_f32_e32 v1, v0 ; 7E025900 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, 0, v1, vcc ; D2000000 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 v_cvt_pkrtz_f16_f32_e32 v1, v2, v2 ; 5E020502 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..99] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+4] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[0].x, TEMP[0].xxxx 12: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 13: UARL ADDR[0].x, TEMP[0].xxxx 14: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 15: MOV TEMP[2].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[0].xyzx 18: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 19: UIF TEMP[0].xxxx :0 20: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 21: MOV TEMP[2].w, TEMP[0].wwww 22: F2I TEMP[3].x, TEMP[0].wwww 23: UARL ADDR[0].x, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 26: F2I TEMP[4].x, TEMP[0].wwww 27: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 28: UARL ADDR[0].x, TEMP[4].xxxx 29: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 30: MOV TEMP[3].y, TEMP[4].xxxx 31: F2I TEMP[0].x, TEMP[0].wwww 32: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 33: UARL ADDR[0].x, TEMP[0].xxxx 34: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 35: MOV TEMP[3].z, TEMP[0].xxxx 36: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[3], TEMP[2] 37: MOV TEMP[2].xyz, TEMP[0].xyzx 38: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 39: UIF TEMP[0].xxxx :0 40: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 41: MOV TEMP[2].w, TEMP[0].wwww 42: F2I TEMP[4].x, TEMP[0].wwww 43: UARL ADDR[0].x, TEMP[4].xxxx 44: UARL ADDR[0].x, TEMP[4].xxxx 45: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 46: F2I TEMP[4].x, TEMP[0].wwww 47: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 48: UARL ADDR[0].x, TEMP[4].xxxx 49: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 50: MOV TEMP[3].y, TEMP[4].xxxx 51: F2I TEMP[0].x, TEMP[0].wwww 52: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 53: UARL ADDR[0].x, TEMP[0].xxxx 54: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 55: MOV TEMP[3].z, TEMP[0].xxxx 56: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[3], TEMP[2] 57: MOV TEMP[2].xyz, TEMP[0].xyzx 58: ENDIF 59: ENDIF 60: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 61: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 62: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 63: ADD TEMP[0], TEMP[1], CONST[3] 64: MOV TEMP[2].xy, IN[3].xyxx 65: MOV TEMP[1].xyz, TEMP[2].xyzx 66: MOV TEMP[1].w, IMM[0].yyyy 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float 3.000000e+00, %47 %57 = fmul float %33, 1.000000e+00 %58 = fadd float %57, 0.000000e+00 %59 = fmul float %34, 1.000000e+00 %60 = fadd float %59, 0.000000e+00 %61 = fmul float %35, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %33, 0.000000e+00 %64 = fadd float %63, 1.000000e+00 %65 = fptosi float %56 to i32 %66 = bitcast i32 %65 to float %67 = bitcast float %66 to i32 %68 = shl i32 %67, 4 %69 = add i32 %68, 64 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = shl i32 %67, 4 %72 = add i32 %71, 68 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %67, 4 %75 = add i32 %74, 72 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %67, 4 %78 = add i32 %77, 76 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %58, %70 %81 = fmul float %60, %73 %82 = fadd float %80, %81 %83 = fmul float %62, %76 %84 = fadd float %82, %83 %85 = fmul float %64, %79 %86 = fadd float %84, %85 %87 = fptosi float %56 to i32 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = add i32 1, %89 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = shl i32 %92, 4 %94 = add i32 %93, 64 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %92, 4 %97 = add i32 %96, 68 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = shl i32 %92, 4 %100 = add i32 %99, 72 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = shl i32 %92, 4 %103 = add i32 %102, 76 %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %103) %105 = fmul float %58, %95 %106 = fmul float %60, %98 %107 = fadd float %105, %106 %108 = fmul float %62, %101 %109 = fadd float %107, %108 %110 = fmul float %64, %104 %111 = fadd float %109, %110 %112 = fptosi float %56 to i32 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = add i32 2, %114 %116 = bitcast i32 %115 to float %117 = bitcast float %116 to i32 %118 = shl i32 %117, 4 %119 = add i32 %118, 64 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %117, 4 %122 = add i32 %121, 68 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = shl i32 %117, 4 %125 = add i32 %124, 72 %126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %125) %127 = shl i32 %117, 4 %128 = add i32 %127, 76 %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %128) %130 = fmul float %58, %120 %131 = fmul float %60, %123 %132 = fadd float %130, %131 %133 = fmul float %62, %126 %134 = fadd float %132, %133 %135 = fmul float %64, %129 %136 = fadd float %134, %135 %137 = fmul float %86, %40 %138 = fmul float %111, %40 %139 = fmul float %136, %40 %140 = fcmp olt float 0.000000e+00, %41 %141 = sext i1 %140 to i32 %142 = bitcast i32 %141 to float %143 = bitcast float %142 to i32 %144 = icmp ne i32 %143, 0 br i1 %144, label %IF, label %ENDIF IF: ; preds = %main_body %145 = fmul float 3.000000e+00, %48 %146 = fptosi float %145 to i32 %147 = bitcast i32 %146 to float %148 = bitcast float %147 to i32 %149 = shl i32 %148, 4 %150 = add i32 %149, 64 %151 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %150) %152 = shl i32 %148, 4 %153 = add i32 %152, 68 %154 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %153) %155 = shl i32 %148, 4 %156 = add i32 %155, 72 %157 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %156) %158 = shl i32 %148, 4 %159 = add i32 %158, 76 %160 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %159) %161 = fmul float %58, %151 %162 = fmul float %60, %154 %163 = fadd float %161, %162 %164 = fmul float %62, %157 %165 = fadd float %163, %164 %166 = fmul float %64, %160 %167 = fadd float %165, %166 %168 = fptosi float %145 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = add i32 1, %170 %172 = bitcast i32 %171 to float %173 = bitcast float %172 to i32 %174 = shl i32 %173, 4 %175 = add i32 %174, 64 %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %175) %177 = shl i32 %173, 4 %178 = add i32 %177, 68 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = shl i32 %173, 4 %181 = add i32 %180, 72 %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %181) %183 = shl i32 %173, 4 %184 = add i32 %183, 76 %185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %184) %186 = fmul float %58, %176 %187 = fmul float %60, %179 %188 = fadd float %186, %187 %189 = fmul float %62, %182 %190 = fadd float %188, %189 %191 = fmul float %64, %185 %192 = fadd float %190, %191 %193 = fptosi float %145 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = add i32 2, %195 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = shl i32 %198, 4 %200 = add i32 %199, 64 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = shl i32 %198, 4 %203 = add i32 %202, 68 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %198, 4 %206 = add i32 %205, 72 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %198, 4 %209 = add i32 %208, 76 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %58, %201 %212 = fmul float %60, %204 %213 = fadd float %211, %212 %214 = fmul float %62, %207 %215 = fadd float %213, %214 %216 = fmul float %64, %210 %217 = fadd float %215, %216 %218 = fmul float %41, %167 %219 = fadd float %218, %137 %220 = fmul float %41, %192 %221 = fadd float %220, %138 %222 = fmul float %41, %217 %223 = fadd float %222, %139 %224 = fcmp olt float 0.000000e+00, %42 %225 = sext i1 %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = icmp ne i32 %227, 0 br i1 %228, label %IF44, label %ENDIF ENDIF: ; preds = %IF44, %IF, %main_body %temp8.0 = phi float [ %137, %main_body ], [ %327, %IF44 ], [ %219, %IF ] %temp9.0 = phi float [ %138, %main_body ], [ %329, %IF44 ], [ %221, %IF ] %temp10.0 = phi float [ %139, %main_body ], [ %331, %IF44 ], [ %223, %IF ] %229 = fmul float %temp9.0, %17 %230 = fmul float %temp9.0, %18 %231 = fmul float %temp9.0, %19 %232 = fmul float %temp9.0, %20 %233 = fmul float %temp8.0, %13 %234 = fadd float %233, %229 %235 = fmul float %temp8.0, %14 %236 = fadd float %235, %230 %237 = fmul float %temp8.0, %15 %238 = fadd float %237, %231 %239 = fmul float %temp8.0, %16 %240 = fadd float %239, %232 %241 = fmul float %temp10.0, %21 %242 = fadd float %241, %234 %243 = fmul float %temp10.0, %22 %244 = fadd float %243, %236 %245 = fmul float %temp10.0, %23 %246 = fadd float %245, %238 %247 = fmul float %temp10.0, %24 %248 = fadd float %247, %240 %249 = fadd float %242, %25 %250 = fadd float %244, %26 %251 = fadd float %246, %27 %252 = fadd float %248, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %54, float %55, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %249, float %250, float %251, float %252) ret void IF44: ; preds = %IF %253 = fmul float 3.000000e+00, %49 %254 = fptosi float %253 to i32 %255 = bitcast i32 %254 to float %256 = bitcast float %255 to i32 %257 = shl i32 %256, 4 %258 = add i32 %257, 64 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) %260 = shl i32 %256, 4 %261 = add i32 %260, 68 %262 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %261) %263 = shl i32 %256, 4 %264 = add i32 %263, 72 %265 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %264) %266 = shl i32 %256, 4 %267 = add i32 %266, 76 %268 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %267) %269 = fmul float %58, %259 %270 = fmul float %60, %262 %271 = fadd float %269, %270 %272 = fmul float %62, %265 %273 = fadd float %271, %272 %274 = fmul float %64, %268 %275 = fadd float %273, %274 %276 = fptosi float %253 to i32 %277 = bitcast i32 %276 to float %278 = bitcast float %277 to i32 %279 = add i32 1, %278 %280 = bitcast i32 %279 to float %281 = bitcast float %280 to i32 %282 = shl i32 %281, 4 %283 = add i32 %282, 64 %284 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %283) %285 = shl i32 %281, 4 %286 = add i32 %285, 68 %287 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %286) %288 = shl i32 %281, 4 %289 = add i32 %288, 72 %290 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %289) %291 = shl i32 %281, 4 %292 = add i32 %291, 76 %293 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %292) %294 = fmul float %58, %284 %295 = fmul float %60, %287 %296 = fadd float %294, %295 %297 = fmul float %62, %290 %298 = fadd float %296, %297 %299 = fmul float %64, %293 %300 = fadd float %298, %299 %301 = fptosi float %253 to i32 %302 = bitcast i32 %301 to float %303 = bitcast float %302 to i32 %304 = add i32 2, %303 %305 = bitcast i32 %304 to float %306 = bitcast float %305 to i32 %307 = shl i32 %306, 4 %308 = add i32 %307, 64 %309 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %308) %310 = shl i32 %306, 4 %311 = add i32 %310, 68 %312 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %311) %313 = shl i32 %306, 4 %314 = add i32 %313, 72 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) %316 = shl i32 %306, 4 %317 = add i32 %316, 76 %318 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %317) %319 = fmul float %58, %309 %320 = fmul float %60, %312 %321 = fadd float %319, %320 %322 = fmul float %62, %315 %323 = fadd float %321, %322 %324 = fmul float %64, %318 %325 = fadd float %323, %324 %326 = fmul float %42, %275 %327 = fadd float %326, %219 %328 = fmul float %42, %300 %329 = fadd float %328, %221 %330 = fmul float %42, %325 %331 = fadd float %330, %223 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v11, s10, v0 ; 4A16000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[4:7], s[8:9], 0xc ; C082090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v11, s[12:15], 0 idxen ; E00C2000 8003060B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, 0, v7 ; 06040E80 buffer_load_format_xyzw v[15:18], v11, s[20:23], 0 idxen ; E00C2000 80050F0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v15 ; 10001EFF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v3, 4, v0 ; 34060084 v_add_i32_e32 v0, 0x64, v3 ; 4A0006FF 00000064 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_add_f32_e32 v4, 0, v6 ; 06080C80 v_add_i32_e32 v1, 0x60, v3 ; 4A0206FF 00000060 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v4, v1, v0 ; D2820000 04020304 v_add_f32_e32 v5, 0, v8 ; 060A1080 v_add_i32_e32 v1, 0x68, v3 ; 4A0206FF 00000068 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v5, v1, v0 ; D2820000 04020305 v_mad_f32 v6, 0, v6, 1.0 ; D2820006 03CA0C80 v_add_i32_e32 v1, 0x6c, v3 ; 4A0206FF 0000006C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v1, v0 ; D2820000 04020306 buffer_load_format_xyzw v[7:10], v11, s[16:19], 0 idxen ; E00C2000 8004070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_add_i32_e32 v1, 0x54, v3 ; 4A0206FF 00000054 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_add_i32_e32 v12, 0x50, v3 ; 4A1806FF 00000050 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v4, v12, v1 ; D2820001 04061904 v_add_i32_e32 v12, 0x58, v3 ; 4A1806FF 00000058 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v5, v12, v1 ; D2820001 04061905 v_add_i32_e32 v12, 0x5c, v3 ; 4A1806FF 0000005C buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v6, v12, v1 ; D2820001 04061906 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_i32_e32 v12, 0x44, v3 ; 4A1806FF 00000044 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v2 ; 1018050C v_add_i32_e32 v13, 64, v3 ; 4A1A06C0 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v4, v13, v12 ; D282000C 04321B04 v_add_i32_e32 v13, 0x48, v3 ; 4A1A06FF 00000048 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v5, v13, v12 ; D282000C 04321B05 v_add_i32_e32 v3, 0x4c, v3 ; 4A0606FF 0000004C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v6, v3, v12 ; D2820003 04320706 v_mul_f32_e32 v3, v7, v3 ; 10060707 buffer_load_format_xyzw v[11:14], v11, s[4:7], 0 idxen ; E00C2000 80010B0B v_cmp_gt_f32_e64 s[4:5], v8, 0 ; D0080004 00010108 v_cndmask_b32_e64 v19, 0, -1, s[4:5] ; D2000813 00118280 v_cmp_ne_i32_e64 s[20:21], v19, 0 ; D10A0014 00010113 s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_buffer_load_dword s7, s[0:3], 0xe ; C203810E s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s12, s[0:3], 0xb ; C206010B s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s17, s[0:3], 0x7 ; C2088107 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s19, s[0:3], 0x3 ; C2098103 s_buffer_load_dword s18, s[0:3], 0x2 ; C2090102 s_buffer_load_dword s16, s[0:3], 0x1 ; C2080101 s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[20:21], s[20:21] ; BE942414 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v19, 0x40400000, v16 ; 102620FF 40400000 v_cvt_i32_f32_e32 v19, v19 ; 7E261113 v_lshlrev_b32_e32 v19, 4, v19 ; 34262684 v_add_i32_e32 v20, 0x60, v19 ; 4A2826FF 00000060 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x64, v19 ; 4A2A26FF 00000064 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x68, v19 ; 4A2A26FF 00000068 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v21, 0x6c, v19 ; 4A2A26FF 0000006C buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v6, v21, v20 ; D2820014 04522B06 v_mad_f32 v0, v8, v20, v0 ; D2820000 04022908 v_add_i32_e32 v20, 0x50, v19 ; 4A2826FF 00000050 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x54, v19 ; 4A2A26FF 00000054 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x58, v19 ; 4A2A26FF 00000058 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v21, 0x5c, v19 ; 4A2A26FF 0000005C buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v6, v21, v20 ; D2820014 04522B06 v_mad_f32 v1, v8, v20, v1 ; D2820001 04062908 v_add_i32_e32 v20, 64, v19 ; 4A2826C0 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x44, v19 ; 4A2A26FF 00000044 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x48, v19 ; 4A2A26FF 00000048 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v19, 0x4c, v19 ; 4A2626FF 0000004C buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, v6, v19, v20 ; D2820013 04522706 v_mad_f32 v3, v8, v19, v3 ; D2820003 040E2708 v_cmp_gt_f32_e64 s[22:23], v9, 0 ; D0080016 00010109 v_cndmask_b32_e64 v19, 0, -1, s[22:23] ; D2000813 00598280 v_cmp_ne_i32_e64 s[22:23], v19, 0 ; D10A0016 00010113 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v15, 0x40400000, v17 ; 101E22FF 40400000 v_cvt_i32_f32_e32 v15, v15 ; 7E1E110F v_lshlrev_b32_e32 v15, 4, v15 ; 341E1E84 v_add_i32_e32 v16, 0x60, v15 ; 4A201EFF 00000060 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x64, v15 ; 4A221EFF 00000064 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v17, v17, v2 ; 10220511 v_mad_f32 v16, v4, v16, v17 ; D2820010 04462104 v_add_i32_e32 v17, 0x68, v15 ; 4A221EFF 00000068 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v5, v17, v16 ; D2820010 04422305 v_add_i32_e32 v17, 0x6c, v15 ; 4A221EFF 0000006C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v6, v17, v16 ; D2820010 04422306 v_mad_f32 v0, v9, v16, v0 ; D2820000 04022109 v_add_i32_e32 v16, 0x50, v15 ; 4A201EFF 00000050 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x54, v15 ; 4A221EFF 00000054 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v17, v2 ; 10220511 v_mad_f32 v16, v4, v16, v17 ; D2820010 04462104 v_add_i32_e32 v17, 0x58, v15 ; 4A221EFF 00000058 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v5, v17, v16 ; D2820010 04422305 v_add_i32_e32 v17, 0x5c, v15 ; 4A221EFF 0000005C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v6, v17, v16 ; D2820010 04422306 v_mad_f32 v1, v9, v16, v1 ; D2820001 04062109 v_add_i32_e32 v16, 64, v15 ; 4A201EC0 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x44, v15 ; 4A221EFF 00000044 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mad_f32 v2, v4, v16, v2 ; D2820002 040A2104 v_add_i32_e32 v4, 0x48, v15 ; 4A081EFF 00000048 buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v5, v4, v2 ; D2820002 040A0905 v_add_i32_e32 v4, 0x4c, v15 ; 4A081EFF 0000004C buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v6, v4, v2 ; D2820002 040A0906 v_mad_f32 v3, v9, v2, v3 ; D2820003 040E0509 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_or_b64 exec, exec, s[20:21] ; 88FE147E v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 32, 0, 0, 0, v11, v12, v0, v2 ; F800020F 02000C0B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s17, v1 ; 10040211 v_mad_f32 v2, v3, s19, v2 ; D2820002 04082703 v_mad_f32 v2, v0, s12, v2 ; D2820002 04081900 v_add_f32_e32 v2, s9, v2 ; 06040409 v_mul_f32_e32 v4, s15, v1 ; 1008020F v_mad_f32 v4, v3, s18, v4 ; D2820004 04102503 v_mad_f32 v4, v0, s10, v4 ; D2820004 04101500 v_add_f32_e32 v4, s7, v4 ; 06080807 v_mul_f32_e32 v5, s13, v1 ; 100A020D v_mad_f32 v5, v3, s16, v5 ; D2820005 04142103 v_mad_f32 v5, v0, s8, v5 ; D2820005 04141100 v_add_f32_e32 v5, s5, v5 ; 060A0A05 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mad_f32 v1, v3, s14, v1 ; D2820001 04041D03 v_mad_f32 v0, v0, s6, v1 ; D2820000 04040D00 v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.3300, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 2: UIF TEMP[0].xxxx :0 3: MOV TEMP[0].x, IMM[0].yyyy 4: ELSE :0 5: MOV TEMP[0].x, IMM[0].zzzz 6: ENDIF 7: ADD TEMP[1].y, -CONST[1].xxxx, CONST[1].yyyy 8: MAD TEMP[1].y, CONST[1].zzzz, TEMP[1].yyyy, CONST[1].xxxx 9: MUL TEMP[0].w, TEMP[1].yyyy, TEMP[0].xxxx 10: MOV TEMP[0].w, TEMP[0].wwww 11: MOV TEMP[0].xyz, IMM[0].yyyy 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = fsub float -0.000000e+00, %24 %30 = fadd float %29, %28 %31 = fcmp oge float %30, 0.000000e+00 %32 = sext i1 %31 to i32 %33 = bitcast i32 %32 to float %34 = bitcast float %33 to i32 %35 = icmp ne i32 %34, 0 %. = select i1 %35, float 1.000000e+00, float 0x3FD51EB860000000 %36 = fsub float -0.000000e+00, %25 %37 = fadd float %36, %26 %38 = fmul float %27, %37 %39 = fadd float %38, %25 %40 = fmul float %39, %. %41 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 2, 0, [m0] ; C8080200 v_interp_p2_f32 v2, [v2], v1, 2, 0, [m0] ; C8090201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s4, v2 ; 0A000404 v_cmp_ge_f32_e64 s[4:5], v0, 0 ; D00C0004 00010100 v_cndmask_b32_e64 v0, 0, -1, s[4:5] ; D2000000 00118280 v_cmp_ne_i32_e64 s[4:5], v0, 0 ; D10A0004 00010100 v_mov_b32_e32 v0, 0x3ea8f5c3 ; 7E0002FF 3EA8F5C3 v_cndmask_b32_e64 v0, v0, 1.0, s[4:5] ; D2000000 0011E500 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 v_mov_b32_e32 v2, s4 ; 7E040204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v1, s0, v2 ; D2820001 04080101 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cvt_pkrtz_f16_f32_e32 v0, 1.0, v0 ; 5E0000F2 v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 1.0 ; D25E0001 0001E4F2 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[6], TEMP[0] 3: ADD TEMP[0], TEMP[0], CONST[7] 4: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 5: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 6: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 7: MAD TEMP[1], TEMP[0].wwww, CONST[3], TEMP[1] 8: MOV TEMP[0].z, TEMP[0].zzzz 9: MOV TEMP[0].xy, IN[1].xyxx 10: MOV TEMP[0].w, IMM[0].xxxx 11: MOV OUT[1], TEMP[0] 12: MOV OUT[0], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = fmul float %33, %50 %59 = fmul float %34, %50 %60 = fmul float %35, %50 %61 = fmul float %36, %50 %62 = fmul float %49, %29 %63 = fadd float %62, %58 %64 = fmul float %49, %30 %65 = fadd float %64, %59 %66 = fmul float %49, %31 %67 = fadd float %66, %60 %68 = fmul float %49, %32 %69 = fadd float %68, %61 %70 = fmul float %51, %37 %71 = fadd float %70, %63 %72 = fmul float %51, %38 %73 = fadd float %72, %65 %74 = fmul float %51, %39 %75 = fadd float %74, %67 %76 = fmul float %51, %40 %77 = fadd float %76, %69 %78 = fadd float %71, %41 %79 = fadd float %73, %42 %80 = fadd float %75, %43 %81 = fadd float %77, %44 %82 = fmul float %79, %17 %83 = fmul float %79, %18 %84 = fmul float %79, %19 %85 = fmul float %79, %20 %86 = fmul float %78, %13 %87 = fadd float %86, %82 %88 = fmul float %78, %14 %89 = fadd float %88, %83 %90 = fmul float %78, %15 %91 = fadd float %90, %84 %92 = fmul float %78, %16 %93 = fadd float %92, %85 %94 = fmul float %80, %21 %95 = fadd float %94, %87 %96 = fmul float %80, %22 %97 = fadd float %96, %89 %98 = fmul float %80, %23 %99 = fadd float %98, %91 %100 = fmul float %80, %24 %101 = fadd float %100, %93 %102 = fmul float %81, %25 %103 = fadd float %102, %95 %104 = fmul float %81, %26 %105 = fadd float %104, %97 %106 = fmul float %81, %27 %107 = fadd float %106, %99 %108 = fmul float %81, %28 %109 = fadd float %108, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %56, float %57, float %80, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %103, float %105, float %107, float %109) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v2 ; 100A0404 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s4, v5 ; D2820005 04140901 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v6, v7, v5, v0 ; F800020F 00050706 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s4, v2 ; 10000404 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s4, v0 ; D2820000 04000901 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s4, v0 ; D2820000 04000903 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v2 ; 100C0404 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v6 ; 060C0C04 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v6 ; 100E0C04 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s4, v7 ; D2820007 041C0905 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v2 ; 10100404 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v1, s4, v8 ; D2820008 04200901 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v3, s4, v8 ; D2820001 04200903 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s4, v7 ; D2820002 041C0901 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v6 ; 10060C04 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v5, s4, v3 ; D2820003 040C0905 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v1, s4, v3 ; D2820003 040C0901 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v6 ; 10080C04 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s4, v4 ; D2820004 04100905 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s4, v4 ; D2820004 04100901 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v6 ; 100C0C04 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v6 ; D2820000 04180900 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s4, v0 ; D2820000 04000905 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s0, v0 ; D2820000 04000101 exp 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: POW TEMP[1].w, TEMP[1].wwww, IMM[0].yyyy 4: MUL TEMP[2].y, TEMP[1].wwww, IMM[0].zzzz 5: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 6: UIF TEMP[0].xxxx :0 7: MOV TEMP[0].x, TEMP[1].wwww 8: ELSE :0 9: MOV TEMP[0].x, TEMP[2].yyyy 10: ENDIF 11: ADD TEMP[1].y, -TEMP[1].wwww, IMM[1].xxxx 12: FSGE TEMP[1].x, TEMP[1].yyyy, IMM[0].wwww 13: UIF TEMP[1].xxxx :0 14: MOV TEMP[1].x, IMM[0].wwww 15: ELSE :0 16: MOV TEMP[1].x, TEMP[0].xxxx 17: ENDIF 18: MOV TEMP[0].w, TEMP[1].xxxx 19: MOV TEMP[0].xyz, IMM[0].yyyy 20: MOV OUT[0], TEMP[0] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %32 = fsub float -0.000000e+00, %24 %33 = fadd float %32, %31 %34 = bitcast float %29 to i32 %35 = bitcast float %30 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %26 to <32 x i8> %39 = bitcast <4 x i32> %28 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 3 %42 = call float @llvm.pow.f32(float %41, float 1.000000e+00) %43 = fmul float %42, 5.000000e-01 %44 = fcmp oge float %33, 0.000000e+00 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 %. = select i1 %48, float %42, float %43 %49 = fsub float -0.000000e+00, %42 %50 = fadd float %49, 0x3FB99999A0000000 %51 = fcmp oge float %50, 0.000000e+00 %52 = sext i1 %51 to i32 %53 = bitcast i32 %52 to float %54 = bitcast float %53 to i32 %55 = icmp ne i32 %54, 0 %temp4.0 = select i1 %55, float 0.000000e+00, float %. %56 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %temp4.0) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800800 00430202 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v3, 0.5, v2 ; 100604F0 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s0, v4 ; 0A000800 v_cmp_ge_f32_e64 s[0:1], v0, 0 ; D00C0000 00010100 v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; D2000000 00018280 v_cmp_ne_i32_e64 s[0:1], v0, 0 ; D10A0000 00010100 v_cndmask_b32_e64 v0, v3, v2, s[0:1] ; D2000000 18020503 v_sub_f32_e32 v1, 0x3dcccccd, v2 ; 080204FF 3DCCCCCD v_cmp_ge_f32_e64 s[0:1], v1, 0 ; D00C0000 00010101 v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; D2000801 00018280 v_cmp_ne_i32_e64 s[0:1], v1, 0 ; D10A0000 00010101 v_cndmask_b32_e64 v0, v0, 0, s[0:1] ; D2000000 00010100 v_cvt_pkrtz_f16_f32_e32 v0, 1.0, v0 ; 5E0000F2 v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 1.0 ; D25E0001 0001E4F2 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..99] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+4] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[0].x, TEMP[0].xxxx 12: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 13: UARL ADDR[0].x, TEMP[0].xxxx 14: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 15: MOV TEMP[2].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[0].xyzx 18: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 19: UIF TEMP[0].xxxx :0 20: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 21: MOV TEMP[2].w, TEMP[0].wwww 22: F2I TEMP[3].x, TEMP[0].wwww 23: UARL ADDR[0].x, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 26: F2I TEMP[4].x, TEMP[0].wwww 27: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 28: UARL ADDR[0].x, TEMP[4].xxxx 29: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 30: MOV TEMP[3].y, TEMP[4].xxxx 31: F2I TEMP[0].x, TEMP[0].wwww 32: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 33: UARL ADDR[0].x, TEMP[0].xxxx 34: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 35: MOV TEMP[3].z, TEMP[0].xxxx 36: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[3], TEMP[2] 37: MOV TEMP[2].xyz, TEMP[0].xyzx 38: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 39: UIF TEMP[0].xxxx :0 40: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 41: MOV TEMP[2].w, TEMP[0].wwww 42: F2I TEMP[4].x, TEMP[0].wwww 43: UARL ADDR[0].x, TEMP[4].xxxx 44: UARL ADDR[0].x, TEMP[4].xxxx 45: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 46: F2I TEMP[4].x, TEMP[0].wwww 47: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 48: UARL ADDR[0].x, TEMP[4].xxxx 49: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 50: MOV TEMP[3].y, TEMP[4].xxxx 51: F2I TEMP[0].x, TEMP[0].wwww 52: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 53: UARL ADDR[0].x, TEMP[0].xxxx 54: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 55: MOV TEMP[3].z, TEMP[0].xxxx 56: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[3], TEMP[2] 57: MOV TEMP[2].xyz, TEMP[0].xyzx 58: ENDIF 59: ENDIF 60: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 61: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 62: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 63: ADD TEMP[0], TEMP[1], CONST[3] 64: MOV TEMP[2].xy, IN[3].xyxx 65: MOV TEMP[1].xyz, TEMP[2].xyzx 66: MOV TEMP[1].w, IMM[0].yyyy 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float 3.000000e+00, %47 %57 = fmul float %33, 1.000000e+00 %58 = fadd float %57, 0.000000e+00 %59 = fmul float %34, 1.000000e+00 %60 = fadd float %59, 0.000000e+00 %61 = fmul float %35, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %33, 0.000000e+00 %64 = fadd float %63, 1.000000e+00 %65 = fptosi float %56 to i32 %66 = bitcast i32 %65 to float %67 = bitcast float %66 to i32 %68 = shl i32 %67, 4 %69 = add i32 %68, 64 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = shl i32 %67, 4 %72 = add i32 %71, 68 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %67, 4 %75 = add i32 %74, 72 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %67, 4 %78 = add i32 %77, 76 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %58, %70 %81 = fmul float %60, %73 %82 = fadd float %80, %81 %83 = fmul float %62, %76 %84 = fadd float %82, %83 %85 = fmul float %64, %79 %86 = fadd float %84, %85 %87 = fptosi float %56 to i32 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = add i32 1, %89 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = shl i32 %92, 4 %94 = add i32 %93, 64 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %92, 4 %97 = add i32 %96, 68 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = shl i32 %92, 4 %100 = add i32 %99, 72 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = shl i32 %92, 4 %103 = add i32 %102, 76 %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %103) %105 = fmul float %58, %95 %106 = fmul float %60, %98 %107 = fadd float %105, %106 %108 = fmul float %62, %101 %109 = fadd float %107, %108 %110 = fmul float %64, %104 %111 = fadd float %109, %110 %112 = fptosi float %56 to i32 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = add i32 2, %114 %116 = bitcast i32 %115 to float %117 = bitcast float %116 to i32 %118 = shl i32 %117, 4 %119 = add i32 %118, 64 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %117, 4 %122 = add i32 %121, 68 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = shl i32 %117, 4 %125 = add i32 %124, 72 %126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %125) %127 = shl i32 %117, 4 %128 = add i32 %127, 76 %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %128) %130 = fmul float %58, %120 %131 = fmul float %60, %123 %132 = fadd float %130, %131 %133 = fmul float %62, %126 %134 = fadd float %132, %133 %135 = fmul float %64, %129 %136 = fadd float %134, %135 %137 = fmul float %86, %40 %138 = fmul float %111, %40 %139 = fmul float %136, %40 %140 = fcmp olt float 0.000000e+00, %41 %141 = sext i1 %140 to i32 %142 = bitcast i32 %141 to float %143 = bitcast float %142 to i32 %144 = icmp ne i32 %143, 0 br i1 %144, label %IF, label %ENDIF IF: ; preds = %main_body %145 = fmul float 3.000000e+00, %48 %146 = fptosi float %145 to i32 %147 = bitcast i32 %146 to float %148 = bitcast float %147 to i32 %149 = shl i32 %148, 4 %150 = add i32 %149, 64 %151 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %150) %152 = shl i32 %148, 4 %153 = add i32 %152, 68 %154 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %153) %155 = shl i32 %148, 4 %156 = add i32 %155, 72 %157 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %156) %158 = shl i32 %148, 4 %159 = add i32 %158, 76 %160 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %159) %161 = fmul float %58, %151 %162 = fmul float %60, %154 %163 = fadd float %161, %162 %164 = fmul float %62, %157 %165 = fadd float %163, %164 %166 = fmul float %64, %160 %167 = fadd float %165, %166 %168 = fptosi float %145 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = add i32 1, %170 %172 = bitcast i32 %171 to float %173 = bitcast float %172 to i32 %174 = shl i32 %173, 4 %175 = add i32 %174, 64 %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %175) %177 = shl i32 %173, 4 %178 = add i32 %177, 68 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = shl i32 %173, 4 %181 = add i32 %180, 72 %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %181) %183 = shl i32 %173, 4 %184 = add i32 %183, 76 %185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %184) %186 = fmul float %58, %176 %187 = fmul float %60, %179 %188 = fadd float %186, %187 %189 = fmul float %62, %182 %190 = fadd float %188, %189 %191 = fmul float %64, %185 %192 = fadd float %190, %191 %193 = fptosi float %145 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = add i32 2, %195 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = shl i32 %198, 4 %200 = add i32 %199, 64 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = shl i32 %198, 4 %203 = add i32 %202, 68 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %198, 4 %206 = add i32 %205, 72 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %198, 4 %209 = add i32 %208, 76 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %58, %201 %212 = fmul float %60, %204 %213 = fadd float %211, %212 %214 = fmul float %62, %207 %215 = fadd float %213, %214 %216 = fmul float %64, %210 %217 = fadd float %215, %216 %218 = fmul float %41, %167 %219 = fadd float %218, %137 %220 = fmul float %41, %192 %221 = fadd float %220, %138 %222 = fmul float %41, %217 %223 = fadd float %222, %139 %224 = fcmp olt float 0.000000e+00, %42 %225 = sext i1 %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = icmp ne i32 %227, 0 br i1 %228, label %IF44, label %ENDIF ENDIF: ; preds = %IF44, %IF, %main_body %temp8.0 = phi float [ %137, %main_body ], [ %327, %IF44 ], [ %219, %IF ] %temp9.0 = phi float [ %138, %main_body ], [ %329, %IF44 ], [ %221, %IF ] %temp10.0 = phi float [ %139, %main_body ], [ %331, %IF44 ], [ %223, %IF ] %229 = fmul float %temp9.0, %17 %230 = fmul float %temp9.0, %18 %231 = fmul float %temp9.0, %19 %232 = fmul float %temp9.0, %20 %233 = fmul float %temp8.0, %13 %234 = fadd float %233, %229 %235 = fmul float %temp8.0, %14 %236 = fadd float %235, %230 %237 = fmul float %temp8.0, %15 %238 = fadd float %237, %231 %239 = fmul float %temp8.0, %16 %240 = fadd float %239, %232 %241 = fmul float %temp10.0, %21 %242 = fadd float %241, %234 %243 = fmul float %temp10.0, %22 %244 = fadd float %243, %236 %245 = fmul float %temp10.0, %23 %246 = fadd float %245, %238 %247 = fmul float %temp10.0, %24 %248 = fadd float %247, %240 %249 = fadd float %242, %25 %250 = fadd float %244, %26 %251 = fadd float %246, %27 %252 = fadd float %248, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %54, float %55, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %249, float %250, float %251, float %252) ret void IF44: ; preds = %IF %253 = fmul float 3.000000e+00, %49 %254 = fptosi float %253 to i32 %255 = bitcast i32 %254 to float %256 = bitcast float %255 to i32 %257 = shl i32 %256, 4 %258 = add i32 %257, 64 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) %260 = shl i32 %256, 4 %261 = add i32 %260, 68 %262 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %261) %263 = shl i32 %256, 4 %264 = add i32 %263, 72 %265 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %264) %266 = shl i32 %256, 4 %267 = add i32 %266, 76 %268 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %267) %269 = fmul float %58, %259 %270 = fmul float %60, %262 %271 = fadd float %269, %270 %272 = fmul float %62, %265 %273 = fadd float %271, %272 %274 = fmul float %64, %268 %275 = fadd float %273, %274 %276 = fptosi float %253 to i32 %277 = bitcast i32 %276 to float %278 = bitcast float %277 to i32 %279 = add i32 1, %278 %280 = bitcast i32 %279 to float %281 = bitcast float %280 to i32 %282 = shl i32 %281, 4 %283 = add i32 %282, 64 %284 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %283) %285 = shl i32 %281, 4 %286 = add i32 %285, 68 %287 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %286) %288 = shl i32 %281, 4 %289 = add i32 %288, 72 %290 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %289) %291 = shl i32 %281, 4 %292 = add i32 %291, 76 %293 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %292) %294 = fmul float %58, %284 %295 = fmul float %60, %287 %296 = fadd float %294, %295 %297 = fmul float %62, %290 %298 = fadd float %296, %297 %299 = fmul float %64, %293 %300 = fadd float %298, %299 %301 = fptosi float %253 to i32 %302 = bitcast i32 %301 to float %303 = bitcast float %302 to i32 %304 = add i32 2, %303 %305 = bitcast i32 %304 to float %306 = bitcast float %305 to i32 %307 = shl i32 %306, 4 %308 = add i32 %307, 64 %309 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %308) %310 = shl i32 %306, 4 %311 = add i32 %310, 68 %312 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %311) %313 = shl i32 %306, 4 %314 = add i32 %313, 72 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) %316 = shl i32 %306, 4 %317 = add i32 %316, 76 %318 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %317) %319 = fmul float %58, %309 %320 = fmul float %60, %312 %321 = fadd float %319, %320 %322 = fmul float %62, %315 %323 = fadd float %321, %322 %324 = fmul float %64, %318 %325 = fadd float %323, %324 %326 = fmul float %42, %275 %327 = fadd float %326, %219 %328 = fmul float %42, %300 %329 = fadd float %328, %221 %330 = fmul float %42, %325 %331 = fadd float %330, %223 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v11, s10, v0 ; 4A16000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[4:7], s[8:9], 0xc ; C082090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v11, s[12:15], 0 idxen ; E00C2000 8003060B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, 0, v7 ; 06040E80 buffer_load_format_xyzw v[15:18], v11, s[20:23], 0 idxen ; E00C2000 80050F0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v15 ; 10001EFF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v3, 4, v0 ; 34060084 v_add_i32_e32 v0, 0x64, v3 ; 4A0006FF 00000064 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_add_f32_e32 v4, 0, v6 ; 06080C80 v_add_i32_e32 v1, 0x60, v3 ; 4A0206FF 00000060 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v4, v1, v0 ; D2820000 04020304 v_add_f32_e32 v5, 0, v8 ; 060A1080 v_add_i32_e32 v1, 0x68, v3 ; 4A0206FF 00000068 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v5, v1, v0 ; D2820000 04020305 v_mad_f32 v6, 0, v6, 1.0 ; D2820006 03CA0C80 v_add_i32_e32 v1, 0x6c, v3 ; 4A0206FF 0000006C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v1, v0 ; D2820000 04020306 buffer_load_format_xyzw v[7:10], v11, s[16:19], 0 idxen ; E00C2000 8004070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_add_i32_e32 v1, 0x54, v3 ; 4A0206FF 00000054 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_add_i32_e32 v12, 0x50, v3 ; 4A1806FF 00000050 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v4, v12, v1 ; D2820001 04061904 v_add_i32_e32 v12, 0x58, v3 ; 4A1806FF 00000058 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v5, v12, v1 ; D2820001 04061905 v_add_i32_e32 v12, 0x5c, v3 ; 4A1806FF 0000005C buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v6, v12, v1 ; D2820001 04061906 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_i32_e32 v12, 0x44, v3 ; 4A1806FF 00000044 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v2 ; 1018050C v_add_i32_e32 v13, 64, v3 ; 4A1A06C0 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v4, v13, v12 ; D282000C 04321B04 v_add_i32_e32 v13, 0x48, v3 ; 4A1A06FF 00000048 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v5, v13, v12 ; D282000C 04321B05 v_add_i32_e32 v3, 0x4c, v3 ; 4A0606FF 0000004C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v6, v3, v12 ; D2820003 04320706 v_mul_f32_e32 v3, v7, v3 ; 10060707 buffer_load_format_xyzw v[11:14], v11, s[4:7], 0 idxen ; E00C2000 80010B0B v_cmp_gt_f32_e64 s[4:5], v8, 0 ; D0080004 00010108 v_cndmask_b32_e64 v19, 0, -1, s[4:5] ; D2000813 00118280 v_cmp_ne_i32_e64 s[20:21], v19, 0 ; D10A0014 00010113 s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_buffer_load_dword s7, s[0:3], 0xe ; C203810E s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s12, s[0:3], 0xb ; C206010B s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s17, s[0:3], 0x7 ; C2088107 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s19, s[0:3], 0x3 ; C2098103 s_buffer_load_dword s18, s[0:3], 0x2 ; C2090102 s_buffer_load_dword s16, s[0:3], 0x1 ; C2080101 s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[20:21], s[20:21] ; BE942414 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v19, 0x40400000, v16 ; 102620FF 40400000 v_cvt_i32_f32_e32 v19, v19 ; 7E261113 v_lshlrev_b32_e32 v19, 4, v19 ; 34262684 v_add_i32_e32 v20, 0x60, v19 ; 4A2826FF 00000060 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x64, v19 ; 4A2A26FF 00000064 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x68, v19 ; 4A2A26FF 00000068 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v21, 0x6c, v19 ; 4A2A26FF 0000006C buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v6, v21, v20 ; D2820014 04522B06 v_mad_f32 v0, v8, v20, v0 ; D2820000 04022908 v_add_i32_e32 v20, 0x50, v19 ; 4A2826FF 00000050 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x54, v19 ; 4A2A26FF 00000054 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x58, v19 ; 4A2A26FF 00000058 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v21, 0x5c, v19 ; 4A2A26FF 0000005C buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v6, v21, v20 ; D2820014 04522B06 v_mad_f32 v1, v8, v20, v1 ; D2820001 04062908 v_add_i32_e32 v20, 64, v19 ; 4A2826C0 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x44, v19 ; 4A2A26FF 00000044 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x48, v19 ; 4A2A26FF 00000048 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v19, 0x4c, v19 ; 4A2626FF 0000004C buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, v6, v19, v20 ; D2820013 04522706 v_mad_f32 v3, v8, v19, v3 ; D2820003 040E2708 v_cmp_gt_f32_e64 s[22:23], v9, 0 ; D0080016 00010109 v_cndmask_b32_e64 v19, 0, -1, s[22:23] ; D2000813 00598280 v_cmp_ne_i32_e64 s[22:23], v19, 0 ; D10A0016 00010113 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v15, 0x40400000, v17 ; 101E22FF 40400000 v_cvt_i32_f32_e32 v15, v15 ; 7E1E110F v_lshlrev_b32_e32 v15, 4, v15 ; 341E1E84 v_add_i32_e32 v16, 0x60, v15 ; 4A201EFF 00000060 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x64, v15 ; 4A221EFF 00000064 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v17, v17, v2 ; 10220511 v_mad_f32 v16, v4, v16, v17 ; D2820010 04462104 v_add_i32_e32 v17, 0x68, v15 ; 4A221EFF 00000068 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v5, v17, v16 ; D2820010 04422305 v_add_i32_e32 v17, 0x6c, v15 ; 4A221EFF 0000006C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v6, v17, v16 ; D2820010 04422306 v_mad_f32 v0, v9, v16, v0 ; D2820000 04022109 v_add_i32_e32 v16, 0x50, v15 ; 4A201EFF 00000050 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x54, v15 ; 4A221EFF 00000054 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v17, v2 ; 10220511 v_mad_f32 v16, v4, v16, v17 ; D2820010 04462104 v_add_i32_e32 v17, 0x58, v15 ; 4A221EFF 00000058 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v5, v17, v16 ; D2820010 04422305 v_add_i32_e32 v17, 0x5c, v15 ; 4A221EFF 0000005C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v6, v17, v16 ; D2820010 04422306 v_mad_f32 v1, v9, v16, v1 ; D2820001 04062109 v_add_i32_e32 v16, 64, v15 ; 4A201EC0 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x44, v15 ; 4A221EFF 00000044 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mad_f32 v2, v4, v16, v2 ; D2820002 040A2104 v_add_i32_e32 v4, 0x48, v15 ; 4A081EFF 00000048 buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v5, v4, v2 ; D2820002 040A0905 v_add_i32_e32 v4, 0x4c, v15 ; 4A081EFF 0000004C buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v6, v4, v2 ; D2820002 040A0906 v_mad_f32 v3, v9, v2, v3 ; D2820003 040E0509 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_or_b64 exec, exec, s[20:21] ; 88FE147E v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 32, 0, 0, 0, v11, v12, v0, v2 ; F800020F 02000C0B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s17, v1 ; 10040211 v_mad_f32 v2, v3, s19, v2 ; D2820002 04082703 v_mad_f32 v2, v0, s12, v2 ; D2820002 04081900 v_add_f32_e32 v2, s9, v2 ; 06040409 v_mul_f32_e32 v4, s15, v1 ; 1008020F v_mad_f32 v4, v3, s18, v4 ; D2820004 04102503 v_mad_f32 v4, v0, s10, v4 ; D2820004 04101500 v_add_f32_e32 v4, s7, v4 ; 06080807 v_mul_f32_e32 v5, s13, v1 ; 100A020D v_mad_f32 v5, v3, s16, v5 ; D2820005 04142103 v_mad_f32 v5, v0, s8, v5 ; D2820005 04141100 v_add_f32_e32 v5, s5, v5 ; 060A0A05 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mad_f32 v1, v3, s14, v1 ; D2820001 04041D03 v_mad_f32 v0, v0, s6, v1 ; D2820000 04040D00 v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: POW TEMP[1].w, TEMP[1].wwww, IMM[0].yyyy 4: MUL TEMP[2].y, TEMP[1].wwww, IMM[0].zzzz 5: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 6: UIF TEMP[0].xxxx :0 7: MOV TEMP[0].x, TEMP[1].wwww 8: ELSE :0 9: MOV TEMP[0].x, TEMP[2].yyyy 10: ENDIF 11: ADD TEMP[1].y, -TEMP[1].wwww, IMM[1].xxxx 12: FSGE TEMP[1].x, TEMP[1].yyyy, IMM[0].wwww 13: UIF TEMP[1].xxxx :0 14: MOV TEMP[1].x, IMM[0].wwww 15: ELSE :0 16: MOV TEMP[1].x, TEMP[0].xxxx 17: ENDIF 18: MOV TEMP[0].w, TEMP[1].xxxx 19: MOV TEMP[0].xyz, IMM[0].yyyy 20: MOV OUT[0], TEMP[0] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %32 = fsub float -0.000000e+00, %24 %33 = fadd float %32, %31 %34 = bitcast float %29 to i32 %35 = bitcast float %30 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %26 to <32 x i8> %39 = bitcast <4 x i32> %28 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 3 %42 = call float @llvm.pow.f32(float %41, float 1.000000e+00) %43 = fmul float %42, 5.000000e-01 %44 = fcmp oge float %33, 0.000000e+00 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 %. = select i1 %48, float %42, float %43 %49 = fsub float -0.000000e+00, %42 %50 = fadd float %49, 0x3FB99999A0000000 %51 = fcmp oge float %50, 0.000000e+00 %52 = sext i1 %51 to i32 %53 = bitcast i32 %52 to float %54 = bitcast float %53 to i32 %55 = icmp ne i32 %54, 0 %temp4.0 = select i1 %55, float 0.000000e+00, float %. %56 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %temp4.0) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800800 00430202 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v3, 0.5, v2 ; 100604F0 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s0, v4 ; 0A000800 v_cmp_ge_f32_e64 s[0:1], v0, 0 ; D00C0000 00010100 v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; D2000000 00018280 v_cmp_ne_i32_e64 s[0:1], v0, 0 ; D10A0000 00010100 v_cndmask_b32_e64 v0, v3, v2, s[0:1] ; D2000000 18020503 v_sub_f32_e32 v1, 0x3dcccccd, v2 ; 080204FF 3DCCCCCD v_cmp_ge_f32_e64 s[0:1], v1, 0 ; D00C0000 00010101 v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; D2000801 00018280 v_cmp_ne_i32_e64 s[0:1], v1, 0 ; D10A0000 00010101 v_cndmask_b32_e64 v0, v0, 0, s[0:1] ; D2000000 00010100 v_cvt_pkrtz_f16_f32_e32 v0, 1.0, v0 ; 5E0000F2 v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 1.0 ; D25E0001 0001E4F2 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV TEMP[1].xy, IN[1].xyxx 2: MOV TEMP[1].zw, IMM[0].xxyx 3: MOV OUT[1], TEMP[1] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 0, v0, 1.0 ; D2820004 03CA0080 v_add_f32_e32 v5, 0, v2 ; 060A0480 v_add_f32_e32 v6, 0, v1 ; 060C0280 v_add_f32_e32 v0, 0, v0 ; 06000080 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.5000} IMM[1] FLT32 { 0.1250, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 3: MOV TEMP[2].xy, TEMP[1].xyyy 4: TEX TEMP[2].zw, TEMP[2], SAMP[0], 2D 5: MOV TEMP[0].zw, TEMP[2].wwzw 6: ADD TEMP[3].xy, CONST[0].xwzw, IN[0] 7: MOV TEMP[0].xy, TEMP[3].xyxx 8: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 9: MOV TEMP[3].xy, TEMP[1].xyyy 10: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 11: ADD TEMP[2].x, TEMP[2].wwww, TEMP[3].wwww 12: ADD TEMP[3].yz, CONST[0].xzyw, IN[0].xxyw 13: MOV TEMP[0].yz, TEMP[3].zyzz 14: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 15: MOV TEMP[3].xy, TEMP[1].xyyy 16: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 17: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 18: ADD TEMP[3].yz, CONST[0].xzww, IN[0].xxyw 19: MOV TEMP[0].yz, TEMP[3].zyzz 20: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 21: MOV TEMP[3].xy, TEMP[1].xyyy 22: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 23: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 24: MAD TEMP[3].yz, CONST[0].xxyw, IMM[0].wwww, IN[0].xxyw 25: MOV TEMP[0].yz, TEMP[3].zyzz 26: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 27: MOV TEMP[3].xy, TEMP[1].xyyy 28: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 29: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 30: MAD TEMP[3].yz, CONST[0].xxww, IMM[0].wwww, IN[0].xxyw 31: MOV TEMP[0].yz, TEMP[3].zyzz 32: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 33: MOV TEMP[3].xy, TEMP[1].xyyy 34: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 35: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 36: MAD TEMP[3].yz, CONST[0].xzyw, IMM[0].wwww, IN[0].xxyw 37: MOV TEMP[0].yz, TEMP[3].zyzz 38: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 39: MOV TEMP[3].xy, TEMP[1].xyyy 40: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 41: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 42: MAD TEMP[3].yz, CONST[0].xzww, IMM[0].wwww, IN[0].xxyw 43: MOV TEMP[0].yz, TEMP[3].zyzz 44: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 45: MOV TEMP[0].xy, TEMP[1].xyyy 46: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 47: ADD TEMP[0].x, TEMP[2].xxxx, TEMP[0].wwww 48: MUL TEMP[0], TEMP[0].xxxx, IMM[1].xxxx 49: MOV OUT[0], TEMP[0] 50: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %31 = load <4 x i32> addrspace(2)* %30, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = fadd float %24, %32 %35 = fadd float %25, %33 %36 = fmul float %34, 1.000000e+00 %37 = fadd float %36, 0.000000e+00 %38 = fmul float %35, -1.000000e+00 %39 = fadd float %38, 1.000000e+00 %40 = bitcast float %37 to i32 %41 = bitcast float %39 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = bitcast <8 x i32> %29 to <32 x i8> %45 = bitcast <4 x i32> %31 to <16 x i8> %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %44, <16 x i8> %45, i32 2) %47 = extractelement <4 x float> %46, i32 3 %48 = fadd float %24, %32 %49 = fadd float %27, %33 %50 = fmul float %48, 1.000000e+00 %51 = fadd float %50, 0.000000e+00 %52 = fmul float %49, -1.000000e+00 %53 = fadd float %52, 1.000000e+00 %54 = bitcast float %51 to i32 %55 = bitcast float %53 to i32 %56 = insertelement <2 x i32> undef, i32 %54, i32 0 %57 = insertelement <2 x i32> %56, i32 %55, i32 1 %58 = bitcast <8 x i32> %29 to <32 x i8> %59 = bitcast <4 x i32> %31 to <16 x i8> %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %58, <16 x i8> %59, i32 2) %61 = extractelement <4 x float> %60, i32 3 %62 = fadd float %47, %61 %63 = fadd float %26, %32 %64 = fadd float %25, %33 %65 = fmul float %63, 1.000000e+00 %66 = fadd float %65, 0.000000e+00 %67 = fmul float %64, -1.000000e+00 %68 = fadd float %67, 1.000000e+00 %69 = bitcast float %66 to i32 %70 = bitcast float %68 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = bitcast <8 x i32> %29 to <32 x i8> %74 = bitcast <4 x i32> %31 to <16 x i8> %75 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %73, <16 x i8> %74, i32 2) %76 = extractelement <4 x float> %75, i32 3 %77 = fadd float %62, %76 %78 = fadd float %26, %32 %79 = fadd float %27, %33 %80 = fmul float %78, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %79, -1.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = bitcast float %81 to i32 %85 = bitcast float %83 to i32 %86 = insertelement <2 x i32> undef, i32 %84, i32 0 %87 = insertelement <2 x i32> %86, i32 %85, i32 1 %88 = bitcast <8 x i32> %29 to <32 x i8> %89 = bitcast <4 x i32> %31 to <16 x i8> %90 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %88, <16 x i8> %89, i32 2) %91 = extractelement <4 x float> %90, i32 3 %92 = fadd float %77, %91 %93 = fmul float %24, 5.000000e-01 %94 = fadd float %93, %32 %95 = fmul float %25, 5.000000e-01 %96 = fadd float %95, %33 %97 = fmul float %94, 1.000000e+00 %98 = fadd float %97, 0.000000e+00 %99 = fmul float %96, -1.000000e+00 %100 = fadd float %99, 1.000000e+00 %101 = bitcast float %98 to i32 %102 = bitcast float %100 to i32 %103 = insertelement <2 x i32> undef, i32 %101, i32 0 %104 = insertelement <2 x i32> %103, i32 %102, i32 1 %105 = bitcast <8 x i32> %29 to <32 x i8> %106 = bitcast <4 x i32> %31 to <16 x i8> %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %105, <16 x i8> %106, i32 2) %108 = extractelement <4 x float> %107, i32 3 %109 = fadd float %92, %108 %110 = fmul float %24, 5.000000e-01 %111 = fadd float %110, %32 %112 = fmul float %27, 5.000000e-01 %113 = fadd float %112, %33 %114 = fmul float %111, 1.000000e+00 %115 = fadd float %114, 0.000000e+00 %116 = fmul float %113, -1.000000e+00 %117 = fadd float %116, 1.000000e+00 %118 = bitcast float %115 to i32 %119 = bitcast float %117 to i32 %120 = insertelement <2 x i32> undef, i32 %118, i32 0 %121 = insertelement <2 x i32> %120, i32 %119, i32 1 %122 = bitcast <8 x i32> %29 to <32 x i8> %123 = bitcast <4 x i32> %31 to <16 x i8> %124 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %122, <16 x i8> %123, i32 2) %125 = extractelement <4 x float> %124, i32 3 %126 = fadd float %109, %125 %127 = fmul float %26, 5.000000e-01 %128 = fadd float %127, %32 %129 = fmul float %25, 5.000000e-01 %130 = fadd float %129, %33 %131 = fmul float %128, 1.000000e+00 %132 = fadd float %131, 0.000000e+00 %133 = fmul float %130, -1.000000e+00 %134 = fadd float %133, 1.000000e+00 %135 = bitcast float %132 to i32 %136 = bitcast float %134 to i32 %137 = insertelement <2 x i32> undef, i32 %135, i32 0 %138 = insertelement <2 x i32> %137, i32 %136, i32 1 %139 = bitcast <8 x i32> %29 to <32 x i8> %140 = bitcast <4 x i32> %31 to <16 x i8> %141 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %138, <32 x i8> %139, <16 x i8> %140, i32 2) %142 = extractelement <4 x float> %141, i32 3 %143 = fadd float %126, %142 %144 = fmul float %26, 5.000000e-01 %145 = fadd float %144, %32 %146 = fmul float %27, 5.000000e-01 %147 = fadd float %146, %33 %148 = fmul float %145, 1.000000e+00 %149 = fadd float %148, 0.000000e+00 %150 = fmul float %147, -1.000000e+00 %151 = fadd float %150, 1.000000e+00 %152 = bitcast float %149 to i32 %153 = bitcast float %151 to i32 %154 = insertelement <2 x i32> undef, i32 %152, i32 0 %155 = insertelement <2 x i32> %154, i32 %153, i32 1 %156 = bitcast <8 x i32> %29 to <32 x i8> %157 = bitcast <4 x i32> %31 to <16 x i8> %158 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %156, <16 x i8> %157, i32 2) %159 = extractelement <4 x float> %158, i32 3 %160 = fadd float %143, %159 %161 = fmul float %160, 1.250000e-01 %162 = fmul float %160, 1.250000e-01 %163 = fmul float %160, 1.250000e-01 %164 = fmul float %160, 1.250000e-01 %165 = call i32 @llvm.SI.packf16(float %161, float %162) %166 = bitcast i32 %165 to float %167 = call i32 @llvm.SI.packf16(float %163, float %164) %168 = bitcast i32 %167 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %166, float %168, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x3 ; C2040103 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s8, v2 ; 06060408 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s9, v5 ; 06000A09 v_add_f32_e32 v3, 0, v0 ; 06060080 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800800 00640003 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v1, s4, v2 ; 06020404 v_sub_f32_e32 v7, 1.0, v1 ; 080E02F2 v_mov_b32_e32 v8, v3 ; 7E100303 v_mov_b32_e32 v9, v4 ; 7E120304 v_mov_b32_e32 v9, v7 ; 7E120307 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[16:23], s[12:15] ; F0800800 00640108 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v0, v1 ; 06000300 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s0, v5 ; 06020A00 v_add_f32_e32 v6, 0, v1 ; 060C0280 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800800 00640106 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mov_b32_e32 v7, v4 ; 7E0E0304 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800800 00640106 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mad_f32 v1, 0.5, s4, v2 ; D2820001 040808F0 v_sub_f32_e32 v4, 1.0, v1 ; 080802F2 v_mad_f32 v1, 0.5, s9, v5 ; D2820001 041412F0 v_add_f32_e32 v3, 0, v1 ; 06060280 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800800 00640103 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mad_f32 v1, 0.5, s8, v2 ; D2820001 040810F0 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_mov_b32_e32 v6, v3 ; 7E0C0303 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v7, v1 ; 7E0E0301 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800800 00640206 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v2, v0 ; 06000102 v_mad_f32 v2, 0.5, s0, v5 ; D2820002 041400F0 v_add_f32_e32 v3, 0, v2 ; 06060480 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800800 00640203 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v2, v0 ; 06000102 v_mov_b32_e32 v4, v1 ; 7E080301 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800800 00640103 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v0, 0x3e000000, v0 ; 100000FF 3E000000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: ADD TEMP[0], TEMP[0], CONST[7] 3: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 4: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 5: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 6: MAD TEMP[0], TEMP[0].wwww, CONST[3], TEMP[1] 7: MOV OUT[1], IN[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0 %49 = add i32 %5, %7 %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 %55 = fmul float %33, %46 %56 = fmul float %34, %46 %57 = fmul float %35, %46 %58 = fmul float %36, %46 %59 = fmul float %45, %29 %60 = fadd float %59, %55 %61 = fmul float %45, %30 %62 = fadd float %61, %56 %63 = fmul float %45, %31 %64 = fadd float %63, %57 %65 = fmul float %45, %32 %66 = fadd float %65, %58 %67 = fadd float %60, %37 %68 = fadd float %62, %38 %69 = fadd float %64, %39 %70 = fadd float %66, %40 %71 = fmul float %68, %17 %72 = fmul float %68, %18 %73 = fmul float %68, %19 %74 = fmul float %68, %20 %75 = fmul float %67, %13 %76 = fadd float %75, %71 %77 = fmul float %67, %14 %78 = fadd float %77, %72 %79 = fmul float %67, %15 %80 = fadd float %79, %73 %81 = fmul float %67, %16 %82 = fadd float %81, %74 %83 = fmul float %69, %21 %84 = fadd float %83, %76 %85 = fmul float %69, %22 %86 = fadd float %85, %78 %87 = fmul float %69, %23 %88 = fadd float %87, %80 %89 = fmul float %69, %24 %90 = fadd float %89, %82 %91 = fmul float %70, %25 %92 = fadd float %91, %84 %93 = fmul float %70, %26 %94 = fadd float %93, %86 %95 = fmul float %70, %27 %96 = fadd float %95, %88 %97 = fmul float %70, %28 %98 = fadd float %97, %90 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %51, float %52, float %53, float %54) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %92, float %94, float %96, float %98) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v5 ; 100C0A04 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v7 ; 060E0E04 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v7, s4, v6 ; D2820006 04180907 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v1 ; 10100204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v8 ; D2820000 04200900 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v6 ; D2820001 04180900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v5 ; 10040A04 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v7, s4, v2 ; D2820002 04080907 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v5 ; 10060A04 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v7, s4, v3 ; D2820003 040C0907 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v4, s4, v5 ; D2820004 04140904 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v7, s4, v4 ; D2820004 04100907 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v4 ; D2820000 04100100 exp 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0] 1: MAD TEMP[1], TEMP[0].wwww, IMM[0].xxxy, IMM[0].yyyx 2: MUL TEMP[0], TEMP[0], TEMP[1] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %24, %28 %33 = fmul float %25, %29 %34 = fmul float %26, %30 %35 = fmul float %27, %31 %36 = fmul float %35, 1.000000e+00 %37 = fadd float %36, 0.000000e+00 %38 = fmul float %35, 1.000000e+00 %39 = fadd float %38, 0.000000e+00 %40 = fmul float %35, 1.000000e+00 %41 = fadd float %40, 0.000000e+00 %42 = fmul float %35, 0.000000e+00 %43 = fadd float %42, 1.000000e+00 %44 = fmul float %32, %37 %45 = fmul float %33, %39 %46 = fmul float %34, %41 %47 = fmul float %35, %43 %48 = call i32 @llvm.SI.packf16(float %44, float %45) %49 = bitcast i32 %48 to float %50 = call i32 @llvm.SI.packf16(float %46, float %47) %51 = bitcast i32 %50 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, 0 ; D2820003 02020404 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s5, v4 ; 10080805 v_mul_f32_e32 v4, v3, v4 ; 10080903 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s5, v5 ; 100A0A05 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s0, v5 ; 10000A00 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, s4, v2 ; 10020404 v_mad_f32 v2, 0, v1, 1.0 ; D2820002 03CA0280 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v4, v0, v4, v0 ; F8001C0F 00040004 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..15] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[6], TEMP[0] 3: MAD TEMP[0], IN[0].wwww, CONST[7], TEMP[0] 4: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 5: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 6: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 7: MAD TEMP[1], TEMP[0].wwww, CONST[3], TEMP[1] 8: MUL TEMP[2].xy, CONST[13], IN[1].yyyy 9: MOV TEMP[0].xy, TEMP[2].xyxx 10: MAD TEMP[2].xy, IN[1].xxxx, CONST[12], TEMP[0] 11: MOV TEMP[0].xy, TEMP[2].xyxx 12: ADD TEMP[2].xy, TEMP[0], CONST[15] 13: MOV TEMP[2].xy, TEMP[2].xyxx 14: MUL TEMP[3].xy, CONST[9], IN[1].yyyy 15: MOV TEMP[0].xy, TEMP[3].xyxx 16: MAD TEMP[0].xy, IN[1].xxxx, CONST[8], TEMP[0] 17: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[11].xyxy 18: MOV TEMP[2].zw, TEMP[0].wwzw 19: MOV OUT[1], TEMP[2] 20: MOV OUT[0], TEMP[1] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = fmul float %33, %62 %72 = fmul float %34, %62 %73 = fmul float %35, %62 %74 = fmul float %36, %62 %75 = fmul float %61, %29 %76 = fadd float %75, %71 %77 = fmul float %61, %30 %78 = fadd float %77, %72 %79 = fmul float %61, %31 %80 = fadd float %79, %73 %81 = fmul float %61, %32 %82 = fadd float %81, %74 %83 = fmul float %63, %37 %84 = fadd float %83, %76 %85 = fmul float %63, %38 %86 = fadd float %85, %78 %87 = fmul float %63, %39 %88 = fadd float %87, %80 %89 = fmul float %63, %40 %90 = fadd float %89, %82 %91 = fmul float %64, %41 %92 = fadd float %91, %84 %93 = fmul float %64, %42 %94 = fadd float %93, %86 %95 = fmul float %64, %43 %96 = fadd float %95, %88 %97 = fmul float %64, %44 %98 = fadd float %97, %90 %99 = fmul float %94, %17 %100 = fmul float %94, %18 %101 = fmul float %94, %19 %102 = fmul float %94, %20 %103 = fmul float %92, %13 %104 = fadd float %103, %99 %105 = fmul float %92, %14 %106 = fadd float %105, %100 %107 = fmul float %92, %15 %108 = fadd float %107, %101 %109 = fmul float %92, %16 %110 = fadd float %109, %102 %111 = fmul float %96, %21 %112 = fadd float %111, %104 %113 = fmul float %96, %22 %114 = fadd float %113, %106 %115 = fmul float %96, %23 %116 = fadd float %115, %108 %117 = fmul float %96, %24 %118 = fadd float %117, %110 %119 = fmul float %98, %25 %120 = fadd float %119, %112 %121 = fmul float %98, %26 %122 = fadd float %121, %114 %123 = fmul float %98, %27 %124 = fadd float %123, %116 %125 = fmul float %98, %28 %126 = fadd float %125, %118 %127 = fmul float %53, %70 %128 = fmul float %54, %70 %129 = fmul float %69, %51 %130 = fadd float %129, %127 %131 = fmul float %69, %52 %132 = fadd float %131, %128 %133 = fadd float %130, %55 %134 = fadd float %132, %56 %135 = fmul float %47, %70 %136 = fmul float %48, %70 %137 = fmul float %69, %45 %138 = fadd float %137, %135 %139 = fmul float %69, %46 %140 = fadd float %139, %136 %141 = fadd float %138, %49 %142 = fadd float %140, %50 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %133, float %134, float %141, float %142) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %120, float %122, float %124, float %126) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x25 ; C2040125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v2 ; 100A0408 s_buffer_load_dword s8, s[0:3], 0x21 ; C2040121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s8, v5 ; D2820005 04141101 s_buffer_load_dword s8, s[0:3], 0x2d ; C204012D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s8, v5 ; 060A0A08 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x20 ; C2040120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s8, v6 ; D2820006 04181101 s_buffer_load_dword s8, s[0:3], 0x2c ; C204012C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s8, v6 ; 060C0C08 s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v2 ; 100E0408 s_buffer_load_dword s8, s[0:3], 0x31 ; C2040131 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s8, v7 ; D2820007 041C1101 s_buffer_load_dword s8, s[0:3], 0x3d ; C204013D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s8, v7 ; 060E0E08 s_buffer_load_dword s8, s[0:3], 0x34 ; C2040134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s8, v2 ; 10100408 s_buffer_load_dword s8, s[0:3], 0x30 ; C2040130 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v1, s8, v8 ; D2820001 04201101 s_buffer_load_dword s8, s[0:3], 0x3c ; C204013C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s8, v1 ; 06020208 exp 15, 32, 0, 0, 0, v1, v7, v6, v5 ; F800020F 05060701 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v5 ; 100C0A04 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v3, s4, v7 ; D2820007 041C0903 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v7, s4, v6 ; D2820006 04180907 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v1 ; 10100204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v0, s4, v8 ; D2820008 04200900 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v2, s4, v8 ; D2820008 04200902 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s4, v8 ; D2820000 04200903 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v6 ; D2820001 04180900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v5 ; 10040A04 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v7, s4, v2 ; D2820002 04080907 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v5 ; 10060A04 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v7, s4, v3 ; D2820003 040C0907 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v4, s4, v5 ; D2820004 04140904 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v7, s4, v4 ; D2820004 04100907 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v4 ; D2820000 04100100 exp 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { -0.0010, 0.0000, -1.0000, -0.0000} IMM[1] FLT32 { 1.0000, -0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: ADD TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx 3: FSGE TEMP[2].x, TEMP[1].xxxx, IMM[0].yyyy 4: UIF TEMP[2].xxxx :0 5: MOV TEMP[2].x, IMM[0].yyyy 6: ELSE :0 7: MOV TEMP[2].x, IMM[0].zzzz 8: ENDIF 9: MOV TEMP[2].x, TEMP[2].xxxx 10: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 11: UIF TEMP[3].xxxx :0 12: MOV TEMP[3].x, IMM[0].yyyy 13: ELSE :0 14: MOV TEMP[3].x, IMM[0].zzzz 15: ENDIF 16: MOV TEMP[2].y, TEMP[3].xxxx 17: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 18: UIF TEMP[3].xxxx :0 19: MOV TEMP[3].x, IMM[0].yyyy 20: ELSE :0 21: MOV TEMP[3].x, IMM[0].zzzz 22: ENDIF 23: MOV TEMP[2].z, TEMP[3].xxxx 24: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 25: UIF TEMP[3].xxxx :0 26: ELSE :0 27: ENDIF 28: FSLT TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy 29: OR TEMP[3].x, TEMP[2].xxxx, TEMP[2].zzzz 30: OR TEMP[3].x, TEMP[3].xxxx, TEMP[2].yyyy 31: UIF TEMP[3].xxxx :0 32: KILL 33: ENDIF 34: MAD TEMP[1], TEMP[0].wwww, IMM[1].xxxy, IMM[1].yyyx 35: MUL TEMP[0], TEMP[0], TEMP[1] 36: MOV OUT[0], TEMP[0] 37: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = fadd float %38, 0xBF50624DE0000000 %40 = fcmp oge float %39, 0.000000e+00 %41 = sext i1 %40 to i32 %42 = bitcast i32 %41 to float %43 = bitcast float %42 to i32 %44 = icmp ne i32 %43, 0 %. = select i1 %44, float 0.000000e+00, float -1.000000e+00 %45 = fcmp oge float %39, 0.000000e+00 %46 = sext i1 %45 to i32 %47 = bitcast i32 %46 to float %48 = bitcast float %47 to i32 %49 = icmp ne i32 %48, 0 %temp12.0 = select i1 %49, float 0.000000e+00, float -1.000000e+00 %50 = fcmp oge float %39, 0.000000e+00 %51 = sext i1 %50 to i32 %52 = bitcast i32 %51 to float %53 = bitcast float %52 to i32 %54 = icmp ne i32 %53, 0 %.28 = select i1 %54, float 0.000000e+00, float -1.000000e+00 %55 = fcmp oge float %39, 0.000000e+00 %56 = sext i1 %55 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = icmp ne i32 %58, 0 %60 = fcmp olt float %., 0.000000e+00 %61 = sext i1 %60 to i32 %62 = fcmp olt float %temp12.0, 0.000000e+00 %63 = sext i1 %62 to i32 %64 = fcmp olt float %.28, 0.000000e+00 %65 = sext i1 %64 to i32 %66 = bitcast i32 %61 to float %67 = bitcast i32 %63 to float %68 = bitcast i32 %65 to float %69 = bitcast float %66 to i32 %70 = bitcast float %68 to i32 %71 = or i32 %69, %70 %72 = bitcast i32 %71 to float %73 = bitcast float %72 to i32 %74 = bitcast float %67 to i32 %75 = or i32 %73, %74 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = icmp ne i32 %77, 0 br i1 %78, label %IF26, label %ENDIF25 IF26: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF25 ENDIF25: ; preds = %main_body, %IF26 %79 = fmul float %38, 1.000000e+00 %80 = fadd float %79, -0.000000e+00 %81 = fmul float %38, 1.000000e+00 %82 = fadd float %81, -0.000000e+00 %83 = fmul float %38, 1.000000e+00 %84 = fadd float %83, -0.000000e+00 %85 = fmul float %38, -0.000000e+00 %86 = fadd float %85, 1.000000e+00 %87 = fmul float %35, %80 %88 = fmul float %36, %82 %89 = fmul float %37, %84 %90 = fmul float %38, %86 %91 = call i32 @llvm.SI.packf16(float %87, float %88) %92 = bitcast i32 %91 to float %93 = call i32 @llvm.SI.packf16(float %89, float %90) %94 = bitcast i32 %93 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %92, float %94, float %92, float %94) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 v_mov_b32_e32 v4, 0xba83126f ; 7E0802FF BA83126F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v3, v4 ; 06080903 v_cmp_ge_f32_e64 s[0:1], v4, 0 ; D00C0000 00010104 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_cmp_ne_i32_e64 s[0:1], v4, 0 ; D10A0000 00010104 v_cndmask_b32_e64 v4, -1.0, 0, s[0:1] ; D2000004 180100F3 v_cmp_lt_f32_e64 s[0:1], v4, 0 ; D0020000 00010104 s_and_saveexec_b64 s[0:1], s[0:1] ; BE802400 s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mad_f32 v5, v3, v4, 1.0 ; D2820005 03CA0903 v_mul_f32_e32 v5, v5, v3 ; 100A0705 v_add_f32_e32 v4, v4, v3 ; 06080704 v_mul_f32_e32 v6, v4, v2 ; 100C0504 v_cvt_pkrtz_f16_f32_e32 v5, v6, v5 ; 5E0A0B06 v_mul_f32_e32 v6, v4, v1 ; 100C0304 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_cvt_pkrtz_f16_f32_e32 v0, v0, v6 ; 5E000D00 exp 15, 0, 1, 1, 1, v0, v5, v0, v5 ; F8001C0F 05000500 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].xxxx 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: UARL ADDR[0].x, TEMP[1].xxxx 21: MAD TEMP[0], IN[0].wwww, CONST[ADDR[0].x], TEMP[0] 22: MOV TEMP[1].xyz, IN[2].xxxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0 %28 = add i32 %5, %7 %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %28) %30 = extractelement <4 x float> %29, i32 0 %31 = call float @llvm.AMDIL.fraction.(float %25) %32 = fsub float -0.000000e+00, %31 %33 = fadd float %32, %25 %34 = fmul float %33, 4.000000e+00 %35 = fptosi float %34 to i32 %36 = bitcast i32 %35 to float %37 = bitcast float %36 to i32 %38 = add i32 1, %37 %39 = bitcast i32 %38 to float %40 = bitcast float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %18, %42 %44 = shl i32 %40, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = fmul float %18, %46 %48 = shl i32 %40, 4 %49 = add i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %18, %50 %52 = shl i32 %40, 4 %53 = add i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = fmul float %18, %54 %56 = fptosi float %34 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %43 %63 = shl i32 %58, 4 %64 = add i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %17, %65 %67 = fadd float %66, %47 %68 = shl i32 %58, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %17, %70 %72 = fadd float %71, %51 %73 = shl i32 %58, 4 %74 = add i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %17, %75 %77 = fadd float %76, %55 %78 = fptosi float %34 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = add i32 2, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = shl i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %19, %85 %87 = fadd float %86, %62 %88 = shl i32 %83, 4 %89 = add i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fmul float %19, %90 %92 = fadd float %91, %67 %93 = shl i32 %83, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = fmul float %19, %95 %97 = fadd float %96, %72 %98 = shl i32 %83, 4 %99 = add i32 %98, 12 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = fmul float %19, %100 %102 = fadd float %101, %77 %103 = fptosi float %34 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = add i32 3, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %20, %110 %112 = fadd float %111, %87 %113 = shl i32 %108, 4 %114 = add i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %20, %115 %117 = fadd float %116, %92 %118 = shl i32 %108, 4 %119 = add i32 %118, 8 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %97 %123 = shl i32 %108, 4 %124 = add i32 %123, 12 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %20, %125 %127 = fadd float %126, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %30, float %30, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %117, float %122, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v1, v1, v5 ; F800020F 05010101 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v1 ; 7E0A4101 v_subrev_f32_e32 v1, v5, v1 ; 0A020305 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v1, s[0:3], 0 offen ; E0301000 80000201 v_add_i32_e32 v3, 16, v1 ; 4A060290 buffer_load_dword v4, v3, s[0:3], 0 offen ; E0301000 80000403 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v6 ; 10000D04 v_mad_f32 v0, v5, v2, v0 ; D2820000 04020505 v_add_i32_e32 v2, 32, v1 ; 4A0402A0 buffer_load_dword v4, v2, s[0:3], 0 offen ; E0301000 80000402 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v7, v4, v0 ; D2820000 04020907 v_add_i32_e32 v4, 48, v1 ; 4A0802B0 buffer_load_dword v9, v4, s[0:3], 0 offen ; E0301000 80000904 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v9, v0 ; D2820000 04021308 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v6 ; 10140D0A v_mad_f32 v9, v5, v9, v10 ; D2820009 042A1305 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v7, v10, v9 ; D2820009 04261507 v_or_b32_e32 v10, 12, v4 ; 3814088C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v8, v10, v9 ; D2820009 04261508 v_or_b32_e32 v10, 8, v1 ; 38140288 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v11, 8, v3 ; 38160688 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v6 ; 10160D0B v_mad_f32 v10, v5, v10, v11 ; D282000A 042E1505 v_or_b32_e32 v11, 8, v2 ; 38160488 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v7, v11, v10 ; D282000A 042A1707 v_or_b32_e32 v11, 8, v4 ; 38160888 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v8, v11, v10 ; D282000A 042A1708 v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v3, 4, v3 ; 38060684 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v1, v5, v1, v3 ; D2820001 040E0305 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 v_or_b32_e32 v2, 4, v4 ; 38040884 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v8, v2, v1 ; D2820001 04060508 exp 15, 12, 0, 1, 0, v0, v1, v10, v9 ; F80008CF 090A0100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xxxx, IMM[0].xyyy, IMM[0].yyyx 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fmul float %22, 1.000000e+00 %24 = fadd float %23, 0.000000e+00 %25 = fmul float %22, 0.000000e+00 %26 = fadd float %25, 0.000000e+00 %27 = fmul float %22, 0.000000e+00 %28 = fadd float %27, 0.000000e+00 %29 = fmul float %22, 0.000000e+00 %30 = fadd float %29, 1.000000e+00 %31 = call i32 @llvm.SI.packf16(float %24, float %26) %32 = bitcast i32 %31 to float %33 = call i32 @llvm.SI.packf16(float %28, float %30) %34 = bitcast i32 %33 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %32, float %34, float %32, float %34) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_mad_f32 v0, 0, v2, 1.0 ; D2820000 03CA0480 v_mad_f32 v1, 0, v2, 0 ; D2820001 02020480 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_add_f32_e32 v2, 0, v2 ; 06040480 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].xxxx 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: UARL ADDR[0].x, TEMP[1].xxxx 21: MAD TEMP[0], IN[0].wwww, CONST[ADDR[0].x], TEMP[0] 22: MOV TEMP[1].xyz, IN[2].xxxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0 %28 = add i32 %5, %7 %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %28) %30 = extractelement <4 x float> %29, i32 0 %31 = call float @llvm.AMDIL.fraction.(float %25) %32 = fsub float -0.000000e+00, %31 %33 = fadd float %32, %25 %34 = fmul float %33, 4.000000e+00 %35 = fptosi float %34 to i32 %36 = bitcast i32 %35 to float %37 = bitcast float %36 to i32 %38 = add i32 1, %37 %39 = bitcast i32 %38 to float %40 = bitcast float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %18, %42 %44 = shl i32 %40, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = fmul float %18, %46 %48 = shl i32 %40, 4 %49 = add i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %18, %50 %52 = shl i32 %40, 4 %53 = add i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = fmul float %18, %54 %56 = fptosi float %34 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %43 %63 = shl i32 %58, 4 %64 = add i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %17, %65 %67 = fadd float %66, %47 %68 = shl i32 %58, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %17, %70 %72 = fadd float %71, %51 %73 = shl i32 %58, 4 %74 = add i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %17, %75 %77 = fadd float %76, %55 %78 = fptosi float %34 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = add i32 2, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = shl i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %19, %85 %87 = fadd float %86, %62 %88 = shl i32 %83, 4 %89 = add i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fmul float %19, %90 %92 = fadd float %91, %67 %93 = shl i32 %83, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = fmul float %19, %95 %97 = fadd float %96, %72 %98 = shl i32 %83, 4 %99 = add i32 %98, 12 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = fmul float %19, %100 %102 = fadd float %101, %77 %103 = fptosi float %34 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = add i32 3, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %20, %110 %112 = fadd float %111, %87 %113 = shl i32 %108, 4 %114 = add i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %20, %115 %117 = fadd float %116, %92 %118 = shl i32 %108, 4 %119 = add i32 %118, 8 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %97 %123 = shl i32 %108, 4 %124 = add i32 %123, 12 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %20, %125 %127 = fadd float %126, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %30, float %30, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %117, float %122, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v1, v1, v5 ; F800020F 05010101 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v1 ; 7E0A4101 v_subrev_f32_e32 v1, v5, v1 ; 0A020305 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v1, s[0:3], 0 offen ; E0301000 80000201 v_add_i32_e32 v3, 16, v1 ; 4A060290 buffer_load_dword v4, v3, s[0:3], 0 offen ; E0301000 80000403 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v6 ; 10000D04 v_mad_f32 v0, v5, v2, v0 ; D2820000 04020505 v_add_i32_e32 v2, 32, v1 ; 4A0402A0 buffer_load_dword v4, v2, s[0:3], 0 offen ; E0301000 80000402 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v7, v4, v0 ; D2820000 04020907 v_add_i32_e32 v4, 48, v1 ; 4A0802B0 buffer_load_dword v9, v4, s[0:3], 0 offen ; E0301000 80000904 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v9, v0 ; D2820000 04021308 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v6 ; 10140D0A v_mad_f32 v9, v5, v9, v10 ; D2820009 042A1305 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v7, v10, v9 ; D2820009 04261507 v_or_b32_e32 v10, 12, v4 ; 3814088C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v8, v10, v9 ; D2820009 04261508 v_or_b32_e32 v10, 8, v1 ; 38140288 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v11, 8, v3 ; 38160688 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v6 ; 10160D0B v_mad_f32 v10, v5, v10, v11 ; D282000A 042E1505 v_or_b32_e32 v11, 8, v2 ; 38160488 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v7, v11, v10 ; D282000A 042A1707 v_or_b32_e32 v11, 8, v4 ; 38160888 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v8, v11, v10 ; D282000A 042A1708 v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v3, 4, v3 ; 38060684 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v1, v5, v1, v3 ; D2820001 040E0305 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 v_or_b32_e32 v2, 4, v4 ; 38040884 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v8, v2, v1 ; D2820001 04060508 exp 15, 12, 0, 1, 0, v0, v1, v10, v9 ; F80008CF 090A0100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xxxx, IMM[0].xyxy, IMM[0].yyyx 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fmul float %22, 1.000000e+00 %24 = fadd float %23, 0.000000e+00 %25 = fmul float %22, 0.000000e+00 %26 = fadd float %25, 0.000000e+00 %27 = fmul float %22, 1.000000e+00 %28 = fadd float %27, 0.000000e+00 %29 = fmul float %22, 0.000000e+00 %30 = fadd float %29, 1.000000e+00 %31 = call i32 @llvm.SI.packf16(float %24, float %26) %32 = bitcast i32 %31 to float %33 = call i32 @llvm.SI.packf16(float %28, float %30) %34 = bitcast i32 %33 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %32, float %34, float %32, float %34) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_mad_f32 v0, 0, v2, 1.0 ; D2820000 03CA0480 v_add_f32_e32 v1, 0, v2 ; 06020480 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_mad_f32 v2, 0, v2, 0 ; D2820002 02020480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL SAMP[0] DCL CONST[0..15] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 255.0000, 1.0000} IMM[1] FLT32 { 0.5000, -0.5000, 1.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[15], IN[0] 1: MOV TEMP[1].xy, TEMP[0].xyxx 2: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[15] 3: MOV TEMP[1].zw, TEMP[0].wwzw 4: MUL TEMP[0].xy, TEMP[1], CONST[9] 5: MOV TEMP[1].xy, TEMP[0].xyxx 6: ADD TEMP[2].zw, TEMP[1], IMM[0].xxxx 7: MUL TEMP[2].xy, TEMP[2].zwzw, CONST[10] 8: MOV TEMP[3].xy, TEMP[2].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 11: MUL TEMP[3].z, TEMP[3].wwww, CONST[9].zzzz 12: MUL TEMP[3].z, TEMP[3].zzzz, IMM[0].zzzz 13: MUL TEMP[2], TEMP[0].yyyy, CONST[12] 14: MAD TEMP[2], TEMP[0].xxxx, CONST[11], TEMP[2] 15: MAD TEMP[1], TEMP[3].zzzz, CONST[13], TEMP[2] 16: ADD TEMP[1], TEMP[1], CONST[14] 17: ADD TEMP[0].xy, TEMP[1], CONST[8].zwzw 18: MUL TEMP[0].zw, TEMP[0].xyxy, CONST[8].xyxy 19: MOV TEMP[0].zw, TEMP[0].wwzw 20: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 21: MAD TEMP[2], TEMP[1].xxxx, CONST[0], TEMP[2] 22: MAD TEMP[2], TEMP[1].zzzz, CONST[2], TEMP[2] 23: MAD TEMP[2], TEMP[1].wwww, CONST[3], TEMP[2] 24: RCP TEMP[3].x, TEMP[2].wwww 25: MUL TEMP[3].xy, TEMP[2], TEMP[3].xxxx 26: MOV TEMP[3].xy, TEMP[3].xyxx 27: MOV TEMP[4], TEMP[2] 28: MOV TEMP[0].xy, IN[1].xyxx 29: MUL TEMP[2], TEMP[1].yyyy, CONST[5] 30: MAD TEMP[2], TEMP[1].xxxx, CONST[4], TEMP[2] 31: MAD TEMP[2], TEMP[1].zzzz, CONST[6], TEMP[2] 32: MAD TEMP[2], TEMP[1].wwww, CONST[7], TEMP[2] 33: MOV TEMP[1].xyz, TEMP[1].xyzx 34: MAD TEMP[2], TEMP[2], IMM[1].xyzz, IMM[0].xxyy 35: MOV TEMP[1].w, IMM[0].wwww 36: MOV TEMP[3].zw, IMM[0].wwyw 37: MOV OUT[1], TEMP[0] 38: MOV OUT[2], TEMP[2] 39: MOV OUT[0], TEMP[4] 40: MOV OUT[3], TEMP[1] 41: MOV OUT[4], TEMP[3] 42: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0 %80 = add i32 %5, %7 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %85 = load <16 x i8> addrspace(2)* %84, !tbaa !0 %86 = add i32 %5, %7 %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %86) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = fadd float %70, %82 %91 = fadd float %71, %83 %92 = fadd float %90, %72 %93 = fadd float %91, %73 %94 = fmul float %90, %49 %95 = fmul float %91, %50 %96 = fadd float %92, 5.000000e-01 %97 = fadd float %93, 5.000000e-01 %98 = fmul float %96, %52 %99 = fmul float %97, %53 %100 = bitcast float %98 to i32 %101 = bitcast float %99 to i32 %102 = bitcast float 0.000000e+00 to i32 %103 = insertelement <4 x i32> undef, i32 %100, i32 0 %104 = insertelement <4 x i32> %103, i32 %101, i32 1 %105 = insertelement <4 x i32> %104, i32 %102, i32 2 %106 = insertelement <4 x i32> %105, i32 undef, i32 3 %107 = bitcast <8 x i32> %75 to <32 x i8> %108 = bitcast <4 x i32> %77 to <16 x i8> %109 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %106, <32 x i8> %107, <16 x i8> %108, i32 2) %110 = extractelement <4 x float> %109, i32 3 %111 = fmul float %110, %51 %112 = fmul float %111, 2.550000e+02 %113 = fmul float %95, %58 %114 = fmul float %95, %59 %115 = fmul float %95, %60 %116 = fmul float %95, %61 %117 = fmul float %94, %54 %118 = fadd float %117, %113 %119 = fmul float %94, %55 %120 = fadd float %119, %114 %121 = fmul float %94, %56 %122 = fadd float %121, %115 %123 = fmul float %94, %57 %124 = fadd float %123, %116 %125 = fmul float %112, %62 %126 = fadd float %125, %118 %127 = fmul float %112, %63 %128 = fadd float %127, %120 %129 = fmul float %112, %64 %130 = fadd float %129, %122 %131 = fmul float %112, %65 %132 = fadd float %131, %124 %133 = fadd float %126, %66 %134 = fadd float %128, %67 %135 = fadd float %130, %68 %136 = fadd float %132, %69 %137 = fadd float %133, %47 %138 = fadd float %134, %48 %139 = fmul float %137, %45 %140 = fmul float %138, %46 %141 = fmul float %134, %17 %142 = fmul float %134, %18 %143 = fmul float %134, %19 %144 = fmul float %134, %20 %145 = fmul float %133, %13 %146 = fadd float %145, %141 %147 = fmul float %133, %14 %148 = fadd float %147, %142 %149 = fmul float %133, %15 %150 = fadd float %149, %143 %151 = fmul float %133, %16 %152 = fadd float %151, %144 %153 = fmul float %135, %21 %154 = fadd float %153, %146 %155 = fmul float %135, %22 %156 = fadd float %155, %148 %157 = fmul float %135, %23 %158 = fadd float %157, %150 %159 = fmul float %135, %24 %160 = fadd float %159, %152 %161 = fmul float %136, %25 %162 = fadd float %161, %154 %163 = fmul float %136, %26 %164 = fadd float %163, %156 %165 = fmul float %136, %27 %166 = fadd float %165, %158 %167 = fmul float %136, %28 %168 = fadd float %167, %160 %169 = fdiv float 1.000000e+00, %168 %170 = fmul float %162, %169 %171 = fmul float %164, %169 %172 = fmul float %134, %33 %173 = fmul float %134, %34 %174 = fmul float %134, %35 %175 = fmul float %134, %36 %176 = fmul float %133, %29 %177 = fadd float %176, %172 %178 = fmul float %133, %30 %179 = fadd float %178, %173 %180 = fmul float %133, %31 %181 = fadd float %180, %174 %182 = fmul float %133, %32 %183 = fadd float %182, %175 %184 = fmul float %135, %37 %185 = fadd float %184, %177 %186 = fmul float %135, %38 %187 = fadd float %186, %179 %188 = fmul float %135, %39 %189 = fadd float %188, %181 %190 = fmul float %135, %40 %191 = fadd float %190, %183 %192 = fmul float %136, %41 %193 = fadd float %192, %185 %194 = fmul float %136, %42 %195 = fadd float %194, %187 %196 = fmul float %136, %43 %197 = fadd float %196, %189 %198 = fmul float %136, %44 %199 = fadd float %198, %191 %200 = fmul float %193, 5.000000e-01 %201 = fadd float %200, 5.000000e-01 %202 = fmul float %195, -5.000000e-01 %203 = fadd float %202, 5.000000e-01 %204 = fmul float %197, 1.000000e+00 %205 = fadd float %204, 0.000000e+00 %206 = fmul float %199, 1.000000e+00 %207 = fadd float %206, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float %139, float %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %201, float %203, float %205, float %207) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %133, float %134, float %135, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %170, float %171, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s12, s[0:3], 0x3c ; C206013C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s12, v1 ; 060A020C s_buffer_load_dword s12, s[0:3], 0x24 ; C2060124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s12, v5 ; 100C0A0C s_buffer_load_dword s12, s[0:3], 0x3d ; C206013D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s12, v2 ; 0602040C s_buffer_load_dword s12, s[0:3], 0x25 ; C2060125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s12, v1 ; 1004020C s_buffer_load_dword s12, s[0:3], 0x31 ; C2060131 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s12, v2 ; 1006040C s_buffer_load_dword s12, s[0:3], 0x2d ; C206012D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v6, s12, v3 ; D2820003 040C1906 s_buffer_load_dword s12, s[0:3], 0x3f ; C206013F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s12, v1 ; 0602020C v_add_f32_e32 v1, 0.5, v1 ; 060202F0 s_buffer_load_dword s12, s[0:3], 0x29 ; C2060129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s12, v1 ; 1010020C s_buffer_load_dword s12, s[0:3], 0x3e ; C206013E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s12, v5 ; 06020A0C v_add_f32_e32 v1, 0.5, v1 ; 060202F0 s_buffer_load_dword s12, s[0:3], 0x28 ; C2060128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s12, v1 ; 100E020C v_mov_b32_e32 v9, 0 ; 7E120280 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v1, 8, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900800 00640107 s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v1, 0x437f0000, v1 ; 100202FF 437F0000 s_buffer_load_dword s4, s[0:3], 0x35 ; C2020135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v1, s4, v3 ; D2820003 040C0901 s_buffer_load_dword s4, s[0:3], 0x39 ; C2020139 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v3 ; 06080604 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v2 ; 100A0404 s_buffer_load_dword s4, s[0:3], 0x2c ; C202012C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v6, s4, v5 ; D2820005 04140906 s_buffer_load_dword s4, s[0:3], 0x34 ; C2020134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s4, v5 ; D2820005 04140901 s_buffer_load_dword s4, s[0:3], 0x38 ; C2020138 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v11, s4, v5 ; 06160A04 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s4, v11 ; 10161604 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v12, v13, v11, v4 ; F800020F 040B0D0C s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s4, v3 ; 10000604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s4, v0 ; D2820000 04000905 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v2 ; 10080404 s_buffer_load_dword s4, s[0:3], 0x2e ; C202012E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s4, v4 ; D2820004 04100906 s_buffer_load_dword s4, s[0:3], 0x36 ; C2020136 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s4, v4 ; D2820004 04100901 s_buffer_load_dword s4, s[0:3], 0x3a ; C202013A s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s4, s[0:3], 0x33 ; C2020133 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v2 ; 10040404 s_buffer_load_dword s4, s[0:3], 0x2f ; C202012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s4, v2 ; D2820002 04080906 s_buffer_load_dword s4, s[0:3], 0x37 ; C2020137 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v1, s4, v2 ; D2820001 04080901 s_buffer_load_dword s4, s[0:3], 0x3b ; C202013B s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s4, v0 ; D2820000 04000901 v_mad_f32 v0, -0.5, v0, 0.5 ; D2820000 03C200F1 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v3 ; 10040604 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s4, v2 ; D2820002 04080905 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s4, v2 ; D2820002 04080901 v_add_f32_e32 v2, 0, v2 ; 06040480 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v3 ; 100C0604 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v6 ; D2820006 04180905 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 v_add_f32_e32 v6, 0, v6 ; 060C0C80 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s4, v3 ; 10160604 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, v5, s4, v11 ; D282000B 042C0905 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, v4, s4, v11 ; D282000B 042C0904 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, v1, s4, v11 ; D282000B 042C0901 v_mad_f32 v11, 0.5, v11, 0.5 ; D282000B 03C216F0 exp 15, 33, 0, 0, 0, v11, v0, v6, v2 ; F800021F 0206000B s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v5, v3, v4, v0 ; F800022F 00040305 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v3 ; 10040604 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s4, v2 ; D2820002 04080905 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s4, v2 ; D2820002 04080901 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v3 ; 100C0604 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v6 ; D2820006 04180905 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 v_rcp_f32_e32 v11, v6 ; 7E165506 v_mul_f32_e32 v12, v11, v2 ; 1018050B s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s4, v3 ; 101A0604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v5, s4, v13 ; D282000D 04340905 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v4, s4, v13 ; D282000D 04340904 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v1, s4, v13 ; D282000D 04340901 v_mul_f32_e32 v11, v11, v13 ; 10161B0B exp 15, 35, 0, 0, 0, v11, v12, v9, v0 ; F800023F 00090C0B s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s4, v3 ; 10000604 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s4, v0 ; D2820000 04000905 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s0, v0 ; D2820000 04000101 exp 15, 12, 0, 1, 0, v13, v2, v0, v6 ; F80008CF 0600020D s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[0..8] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 1.4427, 1.0000, 0.5000, -0.5000} IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xyz, -CONST[0], IN[2] 1: MUL TEMP[1].w, TEMP[0].zzzz, CONST[8].xxxx 2: MUL TEMP[1].w, TEMP[1].wwww, IMM[0].xxxx 3: EX2 TEMP[1].x, TEMP[1].wwww 4: ADD TEMP[1].w, -TEMP[1].xxxx, IMM[0].yyyy 5: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 6: RCP TEMP[3].x, TEMP[0].zzzz 7: MUL TEMP[2].x, TEMP[2].xxxx, CONST[8].yyyy 8: MUL TEMP[1].x, TEMP[1].wwww, TEMP[2].xxxx 9: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 10: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MOV TEMP[0].x, TEMP[1].xxxx 12: EX2 TEMP[1].x, TEMP[1].xxxx 13: MOV_SAT TEMP[0].x, TEMP[1].xxxx 14: ADD TEMP[1].x, -TEMP[0].xxxx, IMM[0].yyyy 15: MAD TEMP[2].x, IN[3].yyyy, IMM[0].zzzz, IMM[0].zzzz 16: MOV TEMP[2].x, TEMP[2].xxxx 17: MOV TEMP[2].y, CONST[8].wwww 18: MOV TEMP[3].xy, TEMP[2].xyyy 19: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D 20: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 21: MOV TEMP[0].x, TEMP[1].xxxx 22: MUL TEMP[3].yzw, CONST[4].xxyw, IN[2].yyyy 23: MOV TEMP[0].yzw, TEMP[3].zyzw 24: MAD TEMP[3].yzw, IN[2].xxxx, CONST[3].xxyw, TEMP[0] 25: MOV TEMP[0].yzw, TEMP[3].zyzw 26: MAD TEMP[3].yzw, IN[2].zzzz, CONST[5].xxyw, TEMP[0] 27: MOV TEMP[0].yzw, TEMP[3].zyzw 28: ADD TEMP[3].yzw, TEMP[0], CONST[6].xxyw 29: MOV TEMP[0].yz, TEMP[3].zyzw 30: RCP TEMP[3].x, TEMP[3].wwww 31: MOV TEMP[0].w, TEMP[3].xxxx 32: MUL TEMP[3].yz, TEMP[3].xxxx, TEMP[0] 33: MOV TEMP[0].yz, TEMP[3].zyzz 34: MAD TEMP[3].yz, TEMP[0], IMM[0].zzwy, IMM[0].wwww 35: MOV TEMP[0].yz, TEMP[3].zyzz 36: MAD TEMP[0].xy, TEMP[0].yzzw, IMM[1].xyxx, IMM[1].zxzz 37: MOV TEMP[0].xy, TEMP[0].xyyy 38: TEX TEMP[0].x, TEMP[0], SAMP[3], 2D 39: MOV_SAT TEMP[0].x, TEMP[0].xxxx 40: MUL TEMP[0].y, TEMP[0].xxxx, CONST[1].xxxx 41: MAD TEMP[3].xy, IN[0], IMM[1].xyxx, IMM[1].zxzz 42: MOV TEMP[3].xy, TEMP[3].xyyy 43: TEX TEMP[3], TEMP[3], SAMP[0], 2D 44: MOV TEMP[2].w, TEMP[3] 45: MOV TEMP[4], -CONST[2].xxxx 46: FSGE TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz 47: UIF TEMP[5].xxxx :0 48: MOV TEMP[5].x, IMM[1].zzzz 49: ELSE :0 50: MOV TEMP[5].x, TEMP[3].xxxx 51: ENDIF 52: MOV TEMP[5].x, TEMP[5].xxxx 53: FSGE TEMP[6].x, TEMP[4].yyyy, IMM[1].zzzz 54: UIF TEMP[6].xxxx :0 55: MOV TEMP[6].x, IMM[1].zzzz 56: ELSE :0 57: MOV TEMP[6].x, TEMP[3].yyyy 58: ENDIF 59: MOV TEMP[5].y, TEMP[6].xxxx 60: FSGE TEMP[6].x, TEMP[4].zzzz, IMM[1].zzzz 61: UIF TEMP[6].xxxx :0 62: MOV TEMP[6].x, IMM[1].zzzz 63: ELSE :0 64: MOV TEMP[6].x, TEMP[3].zzzz 65: ENDIF 66: MOV TEMP[5].z, TEMP[6].xxxx 67: FSGE TEMP[3].x, TEMP[4].wwww, IMM[1].zzzz 68: UIF TEMP[3].xxxx :0 69: ELSE :0 70: ENDIF 71: MOV TEMP[2].xyz, TEMP[5].xyzx 72: MOV TEMP[3].xyz, TEMP[5].xyzz 73: TEX TEMP[3], TEMP[3], SAMP[7], 3D 74: LRP TEMP[0].xyz, TEMP[0].yyyy, TEMP[3], TEMP[2] 75: MUL TEMP[2], IN[1], IMM[1].xyxx 76: ADD TEMP[3].xy, TEMP[2], IMM[1].zxzz 77: MOV TEMP[3].xy, TEMP[3].xyyy 78: TEX TEMP[3], TEMP[3], SAMP[1], 2D 79: ADD TEMP[4].y, -TEMP[3].wwww, IMM[0].yyyy 80: MAD TEMP[0].yzw, TEMP[0].xxyz, TEMP[4].yyyy, TEMP[3].xxyz 81: ADD TEMP[2].xy, TEMP[2], IMM[1].zxzz 82: MOV TEMP[2].xy, TEMP[2].xyyy 83: TEX TEMP[2].xzw, TEMP[2], SAMP[4], 2D 84: MOV TEMP[3].w, TEMP[2].wwww 85: MOV TEMP[4].xy, IN[0].zwww 86: TEX TEMP[4], TEMP[4], SAMP[5], 2D 87: LRP TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].yzww, TEMP[4] 88: MOV TEMP[4].xyz, TEMP[0].xyzx 89: MAD TEMP[2].y, TEMP[2].zzzz, -TEMP[2].xxxx, TEMP[2].xxxx 90: MOV TEMP[0].xyz, TEMP[0].xyzz 91: TEX TEMP[0], TEMP[0], SAMP[6], 3D 92: LRP TEMP[0].xyz, TEMP[2].yyyy, TEMP[0], TEMP[4] 93: MOV TEMP[3].xyz, TEMP[0].xyzx 94: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[7].xxyz 95: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[3] 96: MOV TEMP[0].xyz, TEMP[0].xyzx 97: MOV TEMP[0].w, IMM[0].yyyy 98: MOV OUT[0], TEMP[0] 99: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %47 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %48 = load <8 x i32> addrspace(2)* %47, !tbaa !0 %49 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %50 = load <4 x i32> addrspace(2)* %49, !tbaa !0 %51 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %52 = load <8 x i32> addrspace(2)* %51, !tbaa !0 %53 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %54 = load <4 x i32> addrspace(2)* %53, !tbaa !0 %55 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %56 = load <8 x i32> addrspace(2)* %55, !tbaa !0 %57 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %58 = load <4 x i32> addrspace(2)* %57, !tbaa !0 %59 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %60 = load <8 x i32> addrspace(2)* %59, !tbaa !0 %61 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %62 = load <4 x i32> addrspace(2)* %61, !tbaa !0 %63 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %64 = load <8 x i32> addrspace(2)* %63, !tbaa !0 %65 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %66 = load <4 x i32> addrspace(2)* %65, !tbaa !0 %67 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %68 = load <8 x i32> addrspace(2)* %67, !tbaa !0 %69 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %70 = load <4 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %72 = load <8 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %74 = load <4 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %76 = load <8 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %78 = load <4 x i32> addrspace(2)* %77, !tbaa !0 %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = fsub float -0.000000e+00, %24 %90 = fadd float %89, %85 %91 = fsub float -0.000000e+00, %25 %92 = fadd float %91, %86 %93 = fsub float -0.000000e+00, %26 %94 = fadd float %93, %87 %95 = fmul float %94, %44 %96 = fmul float %95, 0x3FF7154CA0000000 %97 = call float @llvm.AMDIL.exp.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %98, 1.000000e+00 %100 = fmul float %90, %90 %101 = fmul float %92, %92 %102 = fadd float %101, %100 %103 = fmul float %94, %94 %104 = fadd float %102, %103 %105 = fdiv float 1.000000e+00, %94 %106 = fmul float %104, %45 %107 = fmul float %99, %106 %108 = fmul float %105, %107 %109 = fmul float %108, 0x3FF7154CA0000000 %110 = call float @llvm.AMDIL.exp.(float %109) %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00) %112 = fsub float -0.000000e+00, %111 %113 = fadd float %112, 1.000000e+00 %114 = fmul float %88, 5.000000e-01 %115 = fadd float %114, 5.000000e-01 %116 = bitcast float %115 to i32 %117 = bitcast float %46 to i32 %118 = insertelement <2 x i32> undef, i32 %116, i32 0 %119 = insertelement <2 x i32> %118, i32 %117, i32 1 %120 = bitcast <8 x i32> %56 to <32 x i8> %121 = bitcast <4 x i32> %58 to <16 x i8> %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = fmul float %113, %123 %125 = fmul float %32, %86 %126 = fmul float %33, %86 %127 = fmul float %34, %86 %128 = fmul float %85, %29 %129 = fadd float %128, %125 %130 = fmul float %85, %30 %131 = fadd float %130, %126 %132 = fmul float %85, %31 %133 = fadd float %132, %127 %134 = fmul float %87, %35 %135 = fadd float %134, %129 %136 = fmul float %87, %36 %137 = fadd float %136, %131 %138 = fmul float %87, %37 %139 = fadd float %138, %133 %140 = fadd float %135, %38 %141 = fadd float %137, %39 %142 = fadd float %139, %40 %143 = fdiv float 1.000000e+00, %142 %144 = fmul float %143, %140 %145 = fmul float %143, %141 %146 = fmul float %144, 5.000000e-01 %147 = fadd float %146, -5.000000e-01 %148 = fmul float %145, -5.000000e-01 %149 = fadd float %148, -5.000000e-01 %150 = fmul float %147, 1.000000e+00 %151 = fadd float %150, 0.000000e+00 %152 = fmul float %149, -1.000000e+00 %153 = fadd float %152, 1.000000e+00 %154 = bitcast float %151 to i32 %155 = bitcast float %153 to i32 %156 = insertelement <2 x i32> undef, i32 %154, i32 0 %157 = insertelement <2 x i32> %156, i32 %155, i32 1 %158 = bitcast <8 x i32> %60 to <32 x i8> %159 = bitcast <4 x i32> %62 to <16 x i8> %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) %163 = fmul float %162, %27 %164 = fmul float %79, 1.000000e+00 %165 = fadd float %164, 0.000000e+00 %166 = fmul float %80, -1.000000e+00 %167 = fadd float %166, 1.000000e+00 %168 = bitcast float %165 to i32 %169 = bitcast float %167 to i32 %170 = insertelement <2 x i32> undef, i32 %168, i32 0 %171 = insertelement <2 x i32> %170, i32 %169, i32 1 %172 = bitcast <8 x i32> %48 to <32 x i8> %173 = bitcast <4 x i32> %50 to <16 x i8> %174 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %171, <32 x i8> %172, <16 x i8> %173, i32 2) %175 = extractelement <4 x float> %174, i32 0 %176 = extractelement <4 x float> %174, i32 1 %177 = extractelement <4 x float> %174, i32 2 %178 = fsub float -0.000000e+00, %28 %179 = fsub float -0.000000e+00, %28 %180 = fsub float -0.000000e+00, %28 %181 = fsub float -0.000000e+00, %28 %182 = fcmp oge float %178, 0.000000e+00 %183 = sext i1 %182 to i32 %184 = bitcast i32 %183 to float %185 = bitcast float %184 to i32 %186 = icmp ne i32 %185, 0 %. = select i1 %186, float 0.000000e+00, float %175 %187 = fcmp oge float %179, 0.000000e+00 %188 = sext i1 %187 to i32 %189 = bitcast i32 %188 to float %190 = bitcast float %189 to i32 %191 = icmp ne i32 %190, 0 %temp24.0 = select i1 %191, float 0.000000e+00, float %176 %192 = fcmp oge float %180, 0.000000e+00 %193 = sext i1 %192 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = icmp ne i32 %195, 0 %.37 = select i1 %196, float 0.000000e+00, float %177 %197 = fcmp oge float %181, 0.000000e+00 %198 = sext i1 %197 to i32 %199 = bitcast i32 %198 to float %200 = bitcast float %199 to i32 %201 = icmp ne i32 %200, 0 %202 = bitcast float %. to i32 %203 = bitcast float %temp24.0 to i32 %204 = bitcast float %.37 to i32 %205 = insertelement <4 x i32> undef, i32 %202, i32 0 %206 = insertelement <4 x i32> %205, i32 %203, i32 1 %207 = insertelement <4 x i32> %206, i32 %204, i32 2 %208 = insertelement <4 x i32> %207, i32 undef, i32 3 %209 = bitcast <8 x i32> %76 to <32 x i8> %210 = bitcast <4 x i32> %78 to <16 x i8> %211 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %208, <32 x i8> %209, <16 x i8> %210, i32 3) %212 = extractelement <4 x float> %211, i32 0 %213 = extractelement <4 x float> %211, i32 1 %214 = extractelement <4 x float> %211, i32 2 %215 = call float @llvm.AMDGPU.lrp(float %163, float %212, float %.) %216 = call float @llvm.AMDGPU.lrp(float %163, float %213, float %temp24.0) %217 = call float @llvm.AMDGPU.lrp(float %163, float %214, float %.37) %218 = fmul float %83, 1.000000e+00 %219 = fmul float %84, -1.000000e+00 %220 = fadd float %218, 0.000000e+00 %221 = fadd float %219, 1.000000e+00 %222 = bitcast float %220 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %52 to <32 x i8> %227 = bitcast <4 x i32> %54 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = fsub float -0.000000e+00, %232 %234 = fadd float %233, 1.000000e+00 %235 = fmul float %215, %234 %236 = fadd float %235, %229 %237 = fmul float %216, %234 %238 = fadd float %237, %230 %239 = fmul float %217, %234 %240 = fadd float %239, %231 %241 = fadd float %218, 0.000000e+00 %242 = fadd float %219, 1.000000e+00 %243 = bitcast float %241 to i32 %244 = bitcast float %242 to i32 %245 = insertelement <2 x i32> undef, i32 %243, i32 0 %246 = insertelement <2 x i32> %245, i32 %244, i32 1 %247 = bitcast <8 x i32> %64 to <32 x i8> %248 = bitcast <4 x i32> %66 to <16 x i8> %249 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = extractelement <4 x float> %249, i32 2 %252 = bitcast float %81 to i32 %253 = bitcast float %82 to i32 %254 = insertelement <2 x i32> undef, i32 %252, i32 0 %255 = insertelement <2 x i32> %254, i32 %253, i32 1 %256 = bitcast <8 x i32> %68 to <32 x i8> %257 = bitcast <4 x i32> %70 to <16 x i8> %258 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %255, <32 x i8> %256, <16 x i8> %257, i32 2) %259 = extractelement <4 x float> %258, i32 0 %260 = extractelement <4 x float> %258, i32 1 %261 = extractelement <4 x float> %258, i32 2 %262 = call float @llvm.AMDGPU.lrp(float %250, float %236, float %259) %263 = call float @llvm.AMDGPU.lrp(float %250, float %238, float %260) %264 = call float @llvm.AMDGPU.lrp(float %250, float %240, float %261) %265 = fsub float -0.000000e+00, %250 %266 = fmul float %251, %265 %267 = fadd float %266, %250 %268 = bitcast float %262 to i32 %269 = bitcast float %263 to i32 %270 = bitcast float %264 to i32 %271 = insertelement <4 x i32> undef, i32 %268, i32 0 %272 = insertelement <4 x i32> %271, i32 %269, i32 1 %273 = insertelement <4 x i32> %272, i32 %270, i32 2 %274 = insertelement <4 x i32> %273, i32 undef, i32 3 %275 = bitcast <8 x i32> %72 to <32 x i8> %276 = bitcast <4 x i32> %74 to <16 x i8> %277 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %274, <32 x i8> %275, <16 x i8> %276, i32 3) %278 = extractelement <4 x float> %277, i32 0 %279 = extractelement <4 x float> %277, i32 1 %280 = extractelement <4 x float> %277, i32 2 %281 = call float @llvm.AMDGPU.lrp(float %267, float %278, float %262) %282 = call float @llvm.AMDGPU.lrp(float %267, float %279, float %263) %283 = call float @llvm.AMDGPU.lrp(float %267, float %280, float %264) %284 = fsub float -0.000000e+00, %281 %285 = fadd float %284, %41 %286 = fsub float -0.000000e+00, %282 %287 = fadd float %286, %42 %288 = fsub float -0.000000e+00, %283 %289 = fadd float %288, %43 %290 = fmul float %124, %285 %291 = fadd float %290, %281 %292 = fmul float %124, %287 %293 = fadd float %292, %282 %294 = fmul float %124, %289 %295 = fadd float %294, %283 %296 = call i32 @llvm.SI.packf16(float %291, float %293) %297 = bitcast i32 %296 to float %298 = call i32 @llvm.SI.packf16(float %295, float 1.000000e+00) %299 = bitcast i32 %298 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %297, float %299, float %297, float %299) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900 v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s8, v3 ; 10080608 s_buffer_load_dword s8, s[0:3], 0xd ; C204010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s8, v4 ; D2820004 04101102 v_interp_p1_f32 v5, v0, 2, 2, [m0] ; C8140A00 v_interp_p2_f32 v5, [v5], v1, 2, 2, [m0] ; C8150A01 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s8, v4 ; D2820004 04101105 s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s8, v4 ; 06080808 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v3 ; 100C0608 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s8, v6 ; D2820006 04181102 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s8, v6 ; D2820006 04181105 s_buffer_load_dword s8, s[0:3], 0x1b ; C204011B s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s8, v6 ; 060C0C08 v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mad_f32 v4, -0.5, v4, -0.5 ; D2820004 03C608F1 v_sub_f32_e32 v8, 1.0, v4 ; 081008F2 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s8, v3 ; 10080608 s_buffer_load_dword s8, s[0:3], 0xc ; C204010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s8, v4 ; D2820004 04101102 s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s8, v4 ; D2820004 04101105 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s8, v4 ; 06080808 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mad_f32 v4, 0.5, v4, -0.5 ; D2820004 03C608F0 v_add_f32_e32 v7, 0, v4 ; 060E0880 s_load_dwordx4 s[68:71], s[4:5], 0x0 ; C0A20500 s_load_dwordx4 s[48:51], s[4:5], 0x4 ; C0980504 s_load_dwordx4 s[96:99], s[4:5], 0x8 ; C0B00508 s_load_dwordx4 s[72:75], s[4:5], 0xc ; C0A4050C s_load_dwordx4 s[44:47], s[4:5], 0x10 ; C0960510 s_load_dwordx4 s[32:35], s[4:5], 0x14 ; C0900514 s_load_dwordx4 s[20:23], s[4:5], 0x18 ; C08A0518 s_load_dwordx4 s[76:79], s[4:5], 0x1c ; C0A6051C s_load_dwordx8 s[80:87], s[6:7], 0x0 ; C0E80700 s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 s_load_dwordx8 s[88:95], s[6:7], 0x18 ; C0EC0718 s_load_dwordx8 s[52:59], s[6:7], 0x20 ; C0DA0720 s_load_dwordx8 s[36:43], s[6:7], 0x28 ; C0D20728 s_load_dwordx8 s[24:31], s[6:7], 0x30 ; C0CC0730 s_load_dwordx8 s[4:11], s[6:7], 0x38 ; C0C20738 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[88:95], s[72:75] ; F0800100 02560407 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 s_buffer_load_dword s72, s[0:3], 0x4 ; C2240104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s72, v4 ; 100C0848 v_mad_f32 v4, -v4, s72, 1.0 ; D2820004 23C89104 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_interp_p1_f32 v9, v0, 0, 0, [m0] ; C8240000 v_interp_p2_f32 v9, [v9], v1, 0, 0, [m0] ; C8250001 v_add_f32_e32 v7, 0, v9 ; 060E1280 image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[80:87], s[68:71] ; F0800700 02340707 s_buffer_load_dword s68, s[0:3], 0x8 ; C2220108 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_ge_f32_e64 s[68:69], -s68, 0 ; D00C0044 20010044 v_cndmask_b32_e64 v10, 0, -1, s[68:69] ; D200000A 01118280 v_cmp_ne_i32_e64 s[68:69], v10, 0 ; D10A0044 0001010A v_cndmask_b32_e64 v12, v9, 0, s[68:69] ; D200000C 09110109 v_mul_f32_e32 v14, v12, v4 ; 101C090C v_cndmask_b32_e64 v11, v8, 0, s[68:69] ; D200080B 01110108 v_cndmask_b32_e64 v10, v7, 0, s[68:69] ; D200000A 19110107 image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[4:11], s[76:79] ; F0800700 0261070A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v6, v9, v14 ; D282000E 043A1306 v_interp_p1_f32 v15, v0, 1, 1, [m0] ; C83C0500 v_interp_p2_f32 v15, [v15], v1, 1, 1, [m0] ; C83D0501 v_sub_f32_e32 v16, 1.0, v15 ; 08201EF2 v_interp_p1_f32 v17, v0, 0, 1, [m0] ; C8440400 v_interp_p2_f32 v17, [v17], v1, 0, 1, [m0] ; C8450401 v_add_f32_e32 v15, 0, v17 ; 061E2280 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[60:67], s[48:51] ; F0800F00 018F110F s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v21, 1.0, v20 ; 082A28F2 v_mad_f32 v14, v14, v21, v19 ; D282000E 044E2B0E image_sample v[15:16], 5, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[52:59], s[44:47] ; F0800500 016D0F0F s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v22, 1.0, v15 ; 082C1EF2 v_interp_p1_f32 v24, v0, 3, 0, [m0] ; C8600300 v_interp_p2_f32 v24, [v24], v1, 3, 0, [m0] ; C8610301 v_interp_p1_f32 v23, v0, 2, 0, [m0] ; C85C0200 v_interp_p2_f32 v23, [v23], v1, 2, 0, [m0] ; C85D0201 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[36:43], s[32:35] ; F0800700 01091717 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v25, v22 ; 10342D19 v_mad_f32 v28, v15, v14, v26 ; D282001C 046A1D0F v_mul_f32_e32 v14, v11, v4 ; 101C090B v_mad_f32 v14, v6, v8, v14 ; D282000E 043A1106 v_mad_f32 v14, v14, v21, v18 ; D282000E 044A2B0E v_mul_f32_e32 v30, v24, v22 ; 103C2D18 v_mad_f32 v27, v15, v14, v30 ; D282001B 047A1D0F v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mad_f32 v4, v6, v7, v4 ; D2820004 04120F06 v_mad_f32 v4, v4, v21, v17 ; D2820004 04462B04 v_mul_f32_e32 v6, v23, v22 ; 100C2D17 v_mad_f32 v26, v15, v4, v6 ; D282001A 041A090F image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[24:31], s[20:23] ; F0800700 00A6061A v_mad_f32 v4, -v16, v15, v15 ; D2820004 243E1F10 v_sub_f32_e32 v9, 1.0, v4 ; 081208F2 v_mul_f32_e32 v10, v27, v9 ; 1014131B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v4, v7, v10 ; D282000A 042A0F04 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v11, s4, v10 ; 08161404 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s4, v3 ; 0A060604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v2, s4, v2 ; 0A040404 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v2, v3, v3, v2 ; D2820002 040A0703 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s4, v5 ; 0A060A04 v_mad_f32 v2, v3, v3, v2 ; D2820002 040A0703 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v2 ; 10040404 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v3 ; 100A0604 v_mul_f32_e32 v5, 0x3fb8aa65, v5 ; 100A0AFF 3FB8AA65 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_rcp_f32_e32 v3, v3 ; 7E065503 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mul_f32_e32 v2, 0x3fb8aa65, v2 ; 100404FF 3FB8AA65 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v2, 1.0, v2 ; 080404F2 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_mad_f32 v0, 0.5, v3, 0.5 ; D2820000 03C206F0 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[96:99] ; F0800100 03030000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mad_f32 v1, v0, v11, v10 ; D2820001 042A1700 v_mul_f32_e32 v2, v26, v9 ; 1004131A v_mad_f32 v2, v4, v6, v2 ; D2820002 040A0D04 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v2, v0, v3, v2 ; D2820002 040A0700 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 v_mul_f32_e32 v2, v28, v9 ; 1004131C v_mad_f32 v2, v4, v8, v2 ; D2820002 040A1104 s_buffer_load_dword s0, s[0:3], 0x1e ; C200011E s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s0, v2 ; 08060400 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL CONST[0..10] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, -1.0000, -0.5000} IMM[1] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[8].xxxx, CONST[9].xxxx 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1], CONST[8].yyyy, IMM[0].xyyx, IMM[0].yxzy 3: MUL TEMP[2].xy, TEMP[1], IN[0] 4: MOV TEMP[1].xy, TEMP[2].xyxx 5: MAD TEMP[1].xy, TEMP[1], TEMP[1].zwzw, CONST[9].xxxx 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: MAD TEMP[2].xy, IN[0], -CONST[8].zzzz, CONST[9].xxxx 8: MOV TEMP[2].xy, TEMP[2].xyxx 9: MAD TEMP[3], CONST[8].ywwy, IMM[0].yxxy, IMM[0].xyyz 10: MUL TEMP[4].xy, TEMP[3], IN[0] 11: MOV TEMP[3].xy, TEMP[4].xyxx 12: MAD TEMP[4].xy, TEMP[3], TEMP[3].zwzw, CONST[9].xxxx 13: MOV TEMP[4].xy, TEMP[4].xyxx 14: MUL TEMP[5].xy, CONST[5], IN[0].yyyy 15: MOV TEMP[3].xy, TEMP[5].xyxx 16: MAD TEMP[5].xy, IN[0].xxxx, CONST[4], TEMP[3] 17: MOV TEMP[3].xy, TEMP[5].xyxx 18: MAD TEMP[5].xy, CONST[10].xxxx, CONST[6], TEMP[3] 19: MOV TEMP[3].xy, TEMP[5].xyxx 20: ADD TEMP[5].xy, TEMP[3], CONST[7] 21: MOV TEMP[3].xy, TEMP[5].xyxx 22: MAD TEMP[5].xy, TEMP[3], IMM[1].xyzz, IMM[0].wwww 23: MOV TEMP[5].xy, TEMP[5].xyxx 24: MUL TEMP[3], CONST[1], IN[0].yyyy 25: MAD TEMP[3], IN[0].xxxx, CONST[0], TEMP[3] 26: MAD TEMP[3], CONST[10].xxxx, CONST[2], TEMP[3] 27: ADD TEMP[3], TEMP[3], CONST[3] 28: MOV TEMP[6].xyz, TEMP[3].xywx 29: MOV TEMP[7].xy, IN[0].xyxx 30: MOV TEMP[7].z, CONST[10].xxxx 31: MOV TEMP[5].zw, IN[0].wwzw 32: MOV TEMP[0].zw, IMM[0].xxyx 33: MOV TEMP[1].zw, IMM[0].xxyx 34: MOV TEMP[2].zw, IMM[0].xxyx 35: MOV TEMP[4].zw, IMM[0].xxyx 36: MOV TEMP[7].w, IMM[0].xxxx 37: MOV TEMP[6].w, IMM[0].xxxx 38: MOV OUT[6], TEMP[6] 39: MOV OUT[7], TEMP[5] 40: MOV OUT[1], TEMP[0] 41: MOV OUT[0], TEMP[3] 42: MOV OUT[2], TEMP[1] 43: MOV OUT[3], TEMP[2] 44: MOV OUT[4], TEMP[4] 45: MOV OUT[5], TEMP[7] 46: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = fmul float %47, %37 %52 = fadd float %51, %41 %53 = fmul float %48, %37 %54 = fadd float %53, %41 %55 = fmul float %38, 1.000000e+00 %56 = fadd float %55, 0.000000e+00 %57 = fmul float %38, 0.000000e+00 %58 = fadd float %57, 1.000000e+00 %59 = fmul float %38, 0.000000e+00 %60 = fadd float %59, -1.000000e+00 %61 = fmul float %38, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %56, %47 %64 = fmul float %58, %48 %65 = fmul float %63, %60 %66 = fadd float %65, %41 %67 = fmul float %64, %62 %68 = fadd float %67, %41 %69 = fsub float -0.000000e+00, %39 %70 = fmul float %47, %69 %71 = fadd float %70, %41 %72 = fsub float -0.000000e+00, %39 %73 = fmul float %48, %72 %74 = fadd float %73, %41 %75 = fmul float %38, 0.000000e+00 %76 = fadd float %75, 1.000000e+00 %77 = fmul float %40, 1.000000e+00 %78 = fadd float %77, 0.000000e+00 %79 = fmul float %40, 1.000000e+00 %80 = fadd float %79, 0.000000e+00 %81 = fmul float %38, 0.000000e+00 %82 = fadd float %81, -1.000000e+00 %83 = fmul float %76, %47 %84 = fmul float %78, %48 %85 = fmul float %83, %80 %86 = fadd float %85, %41 %87 = fmul float %84, %82 %88 = fadd float %87, %41 %89 = fmul float %31, %48 %90 = fmul float %32, %48 %91 = fmul float %47, %29 %92 = fadd float %91, %89 %93 = fmul float %47, %30 %94 = fadd float %93, %90 %95 = fmul float %42, %33 %96 = fadd float %95, %92 %97 = fmul float %42, %34 %98 = fadd float %97, %94 %99 = fadd float %96, %35 %100 = fadd float %98, %36 %101 = fmul float %99, 5.000000e-01 %102 = fadd float %101, -5.000000e-01 %103 = fmul float %100, -5.000000e-01 %104 = fadd float %103, -5.000000e-01 %105 = fmul float %17, %48 %106 = fmul float %18, %48 %107 = fmul float %19, %48 %108 = fmul float %20, %48 %109 = fmul float %47, %13 %110 = fadd float %109, %105 %111 = fmul float %47, %14 %112 = fadd float %111, %106 %113 = fmul float %47, %15 %114 = fadd float %113, %107 %115 = fmul float %47, %16 %116 = fadd float %115, %108 %117 = fmul float %42, %21 %118 = fadd float %117, %110 %119 = fmul float %42, %22 %120 = fadd float %119, %112 %121 = fmul float %42, %23 %122 = fadd float %121, %114 %123 = fmul float %42, %24 %124 = fadd float %123, %116 %125 = fadd float %118, %25 %126 = fadd float %120, %26 %127 = fadd float %122, %27 %128 = fadd float %124, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %54, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %66, float %68, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %71, float %74, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %47, float %48, float %42, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %125, float %126, float %128, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %102, float %104, float %49, float %50) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %125, float %126, float %127, float %128) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, v4 ; D2820004 04120205 v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, s5, v0, v5 ; D2820005 04160005 v_mov_b32_e32 v6, 1.0 ; 7E0C02F2 v_mov_b32_e32 v7, 0 ; 7E0E0280 exp 15, 32, 0, 0, 0, v5, v4, v7, v6 ; F800020F 06070405 s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mad_f32 v4, 0, s5, -1.0 ; D2820004 03CC0A80 v_add_f32_e64 v5, 0, s5 ; D2060005 00000A80 v_mul_f32_e32 v8, v0, v5 ; 10100B00 v_mad_f32 v8, v8, v4, s4 ; D2820008 00120908 v_mad_f32 v9, 0, s5, 1.0 ; D2820009 03C80A80 v_mul_f32_e32 v10, v1, v9 ; 10141301 v_mad_f32 v5, v10, v5, s4 ; D2820005 00120B0A exp 15, 33, 0, 0, 0, v8, v5, v7, v6 ; F800021F 06070508 s_buffer_load_dword s5, s[0:3], 0x22 ; C2028122 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, s4 ; 7E0A0204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, -v1, s5, v5 ; D2820005 24140B01 v_mov_b32_e32 v8, s4 ; 7E100204 v_mad_f32 v8, -v0, s5, v8 ; D2820008 24200B00 exp 15, 34, 0, 0, 0, v8, v5, v7, v6 ; F800022F 06070508 s_buffer_load_dword s5, s[0:3], 0x23 ; C2028123 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e64 v5, 0, s5 ; D2060005 00000A80 v_mul_f32_e32 v8, v1, v5 ; 10100B01 v_mad_f32 v4, v8, v4, s4 ; D2820004 00120908 v_mul_f32_e32 v8, v0, v9 ; 10101300 v_mad_f32 v5, v8, v5, s4 ; D2820005 00120B08 exp 15, 35, 0, 0, 0, v5, v4, v7, v6 ; F800023F 06070405 s_buffer_load_dword s4, s[0:3], 0x28 ; C2020128 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 36, 0, 0, 0, v0, v1, v4, v6 ; F800024F 06040100 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s5, v1 ; 10080205 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s5, v4 ; D2820004 04100B00 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mad_f32 v4, v5, s4, v4 ; D2820004 04100905 s_buffer_load_dword s5, s[0:3], 0xf ; C202810F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s5, v4 ; 06080805 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s5, v1 ; 100A0205 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s5, v5 ; D2820005 04140B00 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s5 ; 7E0E0205 v_mad_f32 v5, v7, s4, v5 ; D2820005 04140907 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s5, v5 ; 060A0A05 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s5, v1 ; 100E0205 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s5, v7 ; D2820007 041C0B00 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 v_mad_f32 v7, v8, s4, v7 ; D2820007 041C0908 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s5, v7 ; 060E0E05 exp 15, 37, 0, 0, 0, v7, v5, v4, v6 ; F800025F 06040507 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v6, s5, v1 ; 100C0205 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s5, v6 ; D2820006 04180B00 s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 v_mad_f32 v6, v8, s4, v6 ; D2820006 04180908 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_mad_f32 v6, 0.5, v6, -0.5 ; D2820006 03C60CF0 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s5, v1 ; 10100205 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v0, s5, v8 ; D2820008 04200B00 s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s5 ; 7E120205 v_mad_f32 v8, v9, s4, v8 ; D2820008 04200909 s_buffer_load_dword s5, s[0:3], 0x1d ; C202811D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v8, s5, v8 ; 06101005 v_mad_f32 v8, -0.5, v8, -0.5 ; D2820008 03C610F1 exp 15, 38, 0, 0, 0, v6, v8, v2, v3 ; F800026F 03020806 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v6, s5, v1 ; 100C0205 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s5, v6 ; D2820000 04180B00 s_buffer_load_dword s5, s[0:3], 0xa ; C202810A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s5 ; 7E020205 v_mad_f32 v0, v1, s4, v0 ; D2820000 04000901 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v7, v5, v0, v4 ; F80008CF 04000507 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL CONST[0..23] DCL TEMP[0..24], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0000} IMM[1] FLT32 { 4.0000, 2.2000, 1.0000, 2.0000} IMM[2] FLT32 { -1.0000, 1.0000, 4.0000, -2.0000} IMM[3] FLT32 { 0.0000, 0.5000, -16.0000, -1.4427} IMM[4] FLT32 { 16.0000, -1.4427, 0.0050, -0.5000} IMM[5] FLT32 { 0.5000, -0.5000, 6.0000, 0.2500} IMM[6] FLT32 { 2.0000, -2.0000, 6.0000, 2.2000} IMM[7] FLT32 { -0.6700, 0.4545, -0.4000, 0.4000} IMM[8] FLT32 { 1.4427, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[8], 2D 3: MOV TEMP[1].xz, TEMP[0] 4: ABS TEMP[2].x, TEMP[0] 5: MOV TEMP[2], -TEMP[2].xxxx 6: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 7: UIF TEMP[3].xxxx :0 8: MOV TEMP[3].x, IMM[0].yyyy 9: ELSE :0 10: MOV TEMP[3].x, IMM[0].wwww 11: ENDIF 12: MOV TEMP[3].x, TEMP[3].xxxx 13: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 14: UIF TEMP[4].xxxx :0 15: MOV TEMP[4].x, IMM[0].yyyy 16: ELSE :0 17: MOV TEMP[4].x, IMM[0].wwww 18: ENDIF 19: MOV TEMP[3].y, TEMP[4].xxxx 20: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 21: UIF TEMP[4].xxxx :0 22: MOV TEMP[4].x, IMM[0].yyyy 23: ELSE :0 24: MOV TEMP[4].x, IMM[0].wwww 25: ENDIF 26: MOV TEMP[3].z, TEMP[4].xxxx 27: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 28: UIF TEMP[2].xxxx :0 29: ELSE :0 30: ENDIF 31: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 32: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 33: OR TEMP[4].x, TEMP[4].xxxx, TEMP[3].yyyy 34: UIF TEMP[4].xxxx :0 35: KILL 36: ENDIF 37: MUL TEMP[3].yw, CONST[18].xxzy, IN[4].yyyy 38: MOV TEMP[1].yw, TEMP[3].wyww 39: MAD TEMP[3].yw, IN[4].xxxx, CONST[17].xxzy, TEMP[1] 40: MOV TEMP[1].yw, TEMP[3].wyww 41: MAD TEMP[3].yw, IN[4].zzzz, CONST[19].xxzy, TEMP[1] 42: MOV TEMP[1].yw, TEMP[3].wyww 43: ADD TEMP[3].yw, TEMP[1], CONST[20].xxzy 44: MOV TEMP[1].yw, TEMP[3].wyww 45: MOV TEMP[3].xy, IN[6].zwww 46: TEX TEMP[3].yzw, TEMP[3], SAMP[5], 2D 47: MOV TEMP[2].w, TEMP[3].zyzw 48: MUL TEMP[3], TEMP[3].wwww, IMM[1].xxxx 49: MOV_SAT TEMP[3], TEMP[3] 50: MOV TEMP[2].x, TEMP[3].xxxx 51: ADD TEMP[4].x, -TEMP[3].xxxx, IMM[0].xxxx 52: MOV TEMP[4].x, TEMP[4].xxxx 53: MOV TEMP[4].y, IMM[0].zzzz 54: MOV TEMP[5].xy, TEMP[4].xyyy 55: MOV TEMP[5].w, IMM[0].zzzz 56: TXL TEMP[5], TEMP[5], SAMP[4], 2D 57: POW TEMP[6].x, TEMP[5].xxxx, IMM[1].yyyy 58: POW TEMP[6].y, TEMP[5].yyyy, IMM[1].yyyy 59: POW TEMP[6].z, TEMP[5].zzzz, IMM[1].yyyy 60: POW TEMP[6].w, TEMP[5].wwww, IMM[1].zzzz 61: MOV TEMP[4].w, TEMP[6] 62: MAD TEMP[5].xy, TEMP[1].ywzw, IMM[0].xyxx, IMM[0].zxzz 63: MOV TEMP[5].xy, TEMP[5].xyyy 64: TEX TEMP[5].xyz, TEMP[5], SAMP[3], 2D 65: MAD TEMP[7].yw, TEMP[5].xxzy, IMM[1].wwww, IMM[0].yyyy 66: DP2 TEMP[5].x, IMM[0].xyyy, TEMP[7].ywww 67: DP2 TEMP[7].x, IMM[0].yyyy, TEMP[7].ywww 68: MOV TEMP[5].y, TEMP[7].xxxx 69: MOV TEMP[5].zw, -TEMP[5].yyxy 70: ADD TEMP[5], TEMP[5], IMM[1].wwww 71: MUL TEMP[5], TEMP[5], TEMP[5] 72: MUL TEMP[5], TEMP[5], TEMP[5] 73: DP4 TEMP[7].x, IMM[0].xxxx, TEMP[5] 74: RCP TEMP[7].x, TEMP[7].xxxx 75: MUL TEMP[5], TEMP[7].xxxx, TEMP[5] 76: RCP TEMP[7].x, CONST[0].xxxx 77: MUL TEMP[5], TEMP[7].xxxx, TEMP[5] 78: MUL TEMP[7], TEMP[5], TEMP[5] 79: MOV TEMP[8].xy, IN[0].xyyy 80: TEX TEMP[8], TEMP[8], SAMP[0], 2D 81: MOV TEMP[9].xy, IN[0].xyyy 82: TEX TEMP[9].xw, TEMP[9], SAMP[1], 2D 83: MOV TEMP[10].w, TEMP[9].wwww 84: MAD TEMP[11].yw, TEMP[8].xxzy, IMM[1].wwww, IMM[0].yyyy 85: MOV TEMP[1].yw, TEMP[11].wyww 86: MUL TEMP[11].yz, TEMP[5].xxxx, TEMP[1].xyww 87: MOV TEMP[2].yz, TEMP[11].zyzz 88: MAD TEMP[9].z, TEMP[9].xxxx, IMM[1].wwww, IMM[0].yyyy 89: MOV TEMP[10].z, TEMP[9].zzzz 90: MOV TEMP[10].xy, TEMP[8].zwzz 91: MOV TEMP[8].xy, IN[1].xyyy 92: TEX TEMP[8], TEMP[8], SAMP[0], 2D 93: MOV TEMP[9].zw, TEMP[8].wwzw 94: MOV TEMP[12].xy, IN[1].xyyy 95: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 96: MOV TEMP[13].w, TEMP[12].wwww 97: ADD TEMP[14].xy, TEMP[8], TEMP[8] 98: MOV TEMP[9].xy, TEMP[14].xyxx 99: MAD TEMP[14].xy, TEMP[9], IMM[2].xyzy, IMM[2].yxzy 100: MOV TEMP[9].xy, TEMP[14].xyxx 101: MUL TEMP[14].xy, TEMP[5].yyyy, TEMP[9] 102: MOV TEMP[9].xy, TEMP[14].xyxx 103: MAD TEMP[15].yw, TEMP[1], TEMP[5].xxxx, TEMP[9].xxzy 104: MOV TEMP[1].yw, TEMP[15].wyww 105: MAD TEMP[12].z, TEMP[12].xxxx, IMM[2].wwww, IMM[0].xxxx 106: MOV TEMP[13].z, TEMP[12].zzzz 107: MOV TEMP[13].xy, TEMP[8].zwzz 108: MUL TEMP[8].xyz, TEMP[7].yyyy, TEMP[13] 109: MOV TEMP[13].xyz, TEMP[8].xyzx 110: MAD TEMP[8].xyz, TEMP[10], TEMP[7].xxxx, TEMP[13] 111: MOV TEMP[10].xyz, TEMP[8].xyzx 112: ADD TEMP[8].xy, TEMP[2].yzzw, TEMP[2].yzzw 113: MOV TEMP[5].xy, TEMP[8].xyxx 114: MUL TEMP[8].xy, TEMP[9], TEMP[5] 115: MOV TEMP[13].xy, TEMP[8].xyxx 116: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[14].yxxx 117: MOV TEMP[13].z, TEMP[8].xxxx 118: ADD TEMP[8].xyz, TEMP[10], TEMP[13] 119: MOV TEMP[10].xyz, TEMP[8].xyzx 120: MOV TEMP[8].xy, IN[2].xyyy 121: TEX TEMP[8], TEMP[8], SAMP[0], 2D 122: MOV TEMP[13].w, TEMP[8].wwww 123: MOV TEMP[12].xy, IN[2].xyyy 124: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 125: MOV TEMP[15].w, TEMP[12].wwww 126: MAD TEMP[16].xy, TEMP[8], IMM[2].wwww, IMM[0].xxxx 127: MOV TEMP[7].xy, TEMP[16].xyxx 128: MUL TEMP[16].zw, TEMP[5].zzzz, TEMP[16].xyxy 129: MOV TEMP[9].zw, TEMP[16].wwzw 130: MAD TEMP[17].yw, TEMP[7].xxzy, TEMP[5].zzzz, TEMP[1] 131: MOV TEMP[1].yw, TEMP[17].wyww 132: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].wwww, IMM[0].yyyy 133: MOV TEMP[15].z, TEMP[12].zzzz 134: MOV TEMP[15].xy, TEMP[8].zwzz 135: MAD TEMP[8].xyz, TEMP[15], TEMP[7].zzzz, TEMP[10] 136: MOV TEMP[7].xyz, TEMP[8].xyzx 137: MUL TEMP[8].xy, TEMP[5], TEMP[16].zwzw 138: MOV TEMP[10].xy, TEMP[8].xyxx 139: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[16].wzzz 140: MOV TEMP[10].z, TEMP[8].xxxx 141: ADD TEMP[8].xyz, TEMP[7], TEMP[10] 142: MOV TEMP[7].xyz, TEMP[8].xyzx 143: ADD TEMP[8].xy, TEMP[9], TEMP[9] 144: MOV TEMP[10].xy, TEMP[8].xyxx 145: MUL TEMP[8].xy, TEMP[16].zwzw, TEMP[10] 146: MOV TEMP[13].xy, TEMP[8].xyxx 147: DP2 TEMP[8].x, TEMP[14].xyyy, TEMP[16].wzzz 148: MOV TEMP[13].z, TEMP[8].xxxx 149: ADD TEMP[8].xyz, TEMP[7], TEMP[13] 150: MOV TEMP[7].xyz, TEMP[8].xyzx 151: MOV TEMP[8].xy, IN[3].xyyy 152: TEX TEMP[8], TEMP[8], SAMP[0], 2D 153: MOV TEMP[13].zw, TEMP[8].wwzw 154: MOV TEMP[12].xy, IN[3].xyyy 155: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 156: MOV TEMP[15].w, TEMP[12].wwww 157: ADD TEMP[17].zw, TEMP[8].xyyx, TEMP[8].xyyx 158: MOV TEMP[10].zw, TEMP[17].wwzw 159: MAD TEMP[17].zw, TEMP[10], IMM[0].ywyx, IMM[0].ywxy 160: MOV TEMP[10].zw, TEMP[17].wwzw 161: MUL TEMP[18].xy, TEMP[5].wwww, TEMP[17].zwzw 162: MOV TEMP[13].xy, TEMP[18].xyxx 163: MAD TEMP[17].xy, TEMP[17].wzzw, TEMP[5].wwww, TEMP[1].ywzw 164: MOV TEMP[19].xy, TEMP[17].xyxx 165: MAD TEMP[12].z, TEMP[12].xxxx, IMM[2].wwww, IMM[0].xxxx 166: MOV TEMP[15].z, TEMP[12].zzzz 167: MOV TEMP[15].xy, TEMP[8].zwzz 168: MAD TEMP[7].xyz, TEMP[15], TEMP[7].wwww, TEMP[7] 169: MUL TEMP[8].xy, TEMP[5], TEMP[13].yxzw 170: MOV TEMP[5].xy, TEMP[8].xyxx 171: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[18].xyyy 172: MOV TEMP[5].z, TEMP[8].xxxx 173: ADD TEMP[7].yzw, TEMP[5].xxyz, TEMP[7].xxyz 174: MOV TEMP[2].yzw, TEMP[7].zyzw 175: MUL TEMP[7].xy, TEMP[10], TEMP[13].yxzw 176: MOV TEMP[5].xy, TEMP[7].xyxx 177: DP2 TEMP[7].x, TEMP[14].xyyy, TEMP[18].xyyy 178: MOV TEMP[5].z, TEMP[7].xxxx 179: ADD TEMP[7].yzw, TEMP[2], TEMP[5].xxyz 180: MOV TEMP[2].yzw, TEMP[7].zyzw 181: MUL TEMP[7].yw, TEMP[9].xzzw, TEMP[13].xyzx 182: MOV TEMP[1].yw, TEMP[7].wyww 183: ADD TEMP[7].xy, TEMP[1].ywzw, TEMP[1].ywzw 184: MOV TEMP[5].xy, TEMP[7].xyxx 185: DP2 TEMP[7].x, TEMP[16].zwww, TEMP[18].xyyy 186: MOV TEMP[5].z, TEMP[7].xxxx 187: ADD TEMP[7].yzw, TEMP[2], TEMP[5].xxyz 188: MOV TEMP[2].yzw, TEMP[7].zyzw 189: MAD TEMP[7].yzw, TEMP[17].xxyx, -TEMP[17].xxyy, TEMP[2] 190: MOV TEMP[2].yz, TEMP[7].zyzw 191: MOV TEMP[19].z, IMM[0].xxxx 192: DP3 TEMP[8].x, CONST[1].xyzz, TEMP[19].xyzz 193: MOV_SAT TEMP[8].x, TEMP[8].xxxx 194: ADD TEMP[11].xyz, CONST[8], -IN[4] 195: MOV TEMP[5].xyz, TEMP[11].xyzx 196: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[11].xyzz 197: MAX TEMP[11].x, TEMP[11].xxxx, IMM[3].xxxx 198: RSQ TEMP[11].x, TEMP[11].xxxx 199: MUL TEMP[12].xyz, TEMP[11].xxxx, TEMP[5] 200: MAD TEMP[14].xyz, TEMP[5], TEMP[11].xxxx, -CONST[5] 201: MOV TEMP[9].xyz, TEMP[14].xyzx 202: MAD TEMP[14].xyz, CONST[5].wwww, TEMP[9], CONST[5] 203: MOV TEMP[9].xyz, TEMP[14].xyzx 204: DP3 TEMP[14].x, TEMP[14].xyzz, TEMP[14].xyzz 205: MAX TEMP[14].x, TEMP[14].xxxx, IMM[3].xxxx 206: RSQ TEMP[14].x, TEMP[14].xxxx 207: MAD TEMP[14].xyz, TEMP[9], TEMP[14].xxxx, CONST[1] 208: MOV TEMP[9].xyz, TEMP[14].xyzx 209: RCP TEMP[14].x, TEMP[14].zzzz 210: MAD TEMP[14].xy, TEMP[9], TEMP[14].xxxx, -TEMP[19] 211: RCP TEMP[15].x, CONST[3].wwww 212: ADD TEMP[16].zw, TEMP[2].xyyz, TEMP[15].xxxx 213: MUL TEMP[17].w, TEMP[7].wwww, TEMP[7].wwww 214: MAD TEMP[18].x, TEMP[16].zzzz, TEMP[16].wwww, -TEMP[17].wwww 215: MUL TEMP[20].y, TEMP[14].xxxx, TEMP[14].xxxx 216: ADD TEMP[7].w, TEMP[7].wwww, TEMP[7].wwww 217: MOV TEMP[2].w, TEMP[7].wwww 218: MUL TEMP[21].x, TEMP[14].xxxx, TEMP[7].wwww 219: MAD TEMP[22].z, TEMP[16].zzzz, TEMP[14].yyyy, -TEMP[21].xxxx 220: MUL TEMP[22].z, TEMP[14].yyyy, TEMP[22].zzzz 221: MAD TEMP[16].z, TEMP[20].yyyy, TEMP[16].wwww, TEMP[22].zzzz 222: MUL TEMP[16].z, TEMP[16].zzzz, IMM[3].yyyy 223: RCP TEMP[22].x, TEMP[18].xxxx 224: MUL TEMP[23].z, TEMP[22].xxxx, TEMP[16].zzzz 225: MOV TEMP[24].x, -TEMP[18].xxxx 226: FSGE TEMP[24].x, TEMP[24].xxxx, IMM[0].zzzz 227: UIF TEMP[24].xxxx :0 228: MOV TEMP[24].x, IMM[0].xxxx 229: ELSE :0 230: MOV TEMP[24].x, IMM[0].zzzz 231: ENDIF 232: MOV TEMP[10].w, TEMP[24].xxxx 233: MAD TEMP[16].z, TEMP[16].zzzz, TEMP[22].xxxx, IMM[3].zzzz 234: FSGE TEMP[16].x, TEMP[16].zzzz, IMM[0].zzzz 235: UIF TEMP[16].xxxx :0 236: MOV TEMP[16].x, IMM[0].xxxx 237: ELSE :0 238: MOV TEMP[16].x, IMM[0].zzzz 239: ENDIF 240: MOV TEMP[9].z, TEMP[16].xxxx 241: MUL TEMP[16].w, TEMP[23].zzzz, IMM[3].wwww 242: EX2 TEMP[16].x, TEMP[16].wwww 243: MAX TEMP[18].x, TEMP[18].xxxx, IMM[3].xxxx 244: RSQ TEMP[18].x, TEMP[18].xxxx 245: MUL TEMP[16].w, TEMP[16].xxxx, TEMP[18].xxxx 246: MOV TEMP[9].w, TEMP[16].wwww 247: MAD TEMP[15].xz, TEMP[15].xxxx, IMM[4].xxxx, TEMP[2].yyzw 248: MAD TEMP[18].w, TEMP[15].xxxx, TEMP[15].zzzz, -TEMP[17].wwww 249: MAD TEMP[21].x, TEMP[15].xxxx, TEMP[14].yyyy, -TEMP[21].xxxx 250: MUL TEMP[14].x, TEMP[14].yyyy, TEMP[21].xxxx 251: MAD TEMP[14].x, TEMP[20].yyyy, TEMP[15].zzzz, TEMP[14].xxxx 252: MUL TEMP[14].x, TEMP[14].xxxx, IMM[3].yyyy 253: RCP TEMP[15].x, TEMP[18].wwww 254: MOV TEMP[9].y, TEMP[15].xxxx 255: MUL TEMP[20].x, TEMP[15].xxxx, TEMP[14].xxxx 256: MOV TEMP[21].x, -TEMP[18].wwww 257: FSGE TEMP[21].x, TEMP[21].xxxx, IMM[0].zzzz 258: UIF TEMP[21].xxxx :0 259: MOV TEMP[21].x, IMM[0].xxxx 260: ELSE :0 261: MOV TEMP[21].x, IMM[0].zzzz 262: ENDIF 263: MOV TEMP[10].y, TEMP[21].xxxx 264: MAD TEMP[14].x, TEMP[14].xxxx, TEMP[15].xxxx, IMM[3].zzzz 265: FSGE TEMP[14].x, TEMP[14].xxxx, IMM[0].zzzz 266: UIF TEMP[14].xxxx :0 267: MOV TEMP[14].x, IMM[0].xxxx 268: ELSE :0 269: MOV TEMP[14].x, IMM[0].zzzz 270: ENDIF 271: MOV TEMP[9].x, TEMP[14].xxxx 272: ADD TEMP[9].xz, TEMP[9], TEMP[10].yyww 273: MUL TEMP[14].y, TEMP[20].xxxx, IMM[3].wwww 274: EX2 TEMP[14].x, TEMP[14].yyyy 275: MAX TEMP[15].x, TEMP[18].wwww, IMM[3].xxxx 276: RSQ TEMP[15].x, TEMP[15].xxxx 277: MUL TEMP[14].w, TEMP[15].xxxx, TEMP[14].xxxx 278: MOV TEMP[5].w, TEMP[14].wwww 279: MAD TEMP[11].xyz, TEMP[5], TEMP[11].xxxx, -CONST[6] 280: MOV TEMP[5].xyz, TEMP[11].xyzx 281: MAD TEMP[11].xyz, CONST[6].wwww, TEMP[5], CONST[6] 282: MOV TEMP[5].xyz, TEMP[11].xyzx 283: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[11].xyzz 284: MAX TEMP[11].x, TEMP[11].xxxx, IMM[3].xxxx 285: RSQ TEMP[11].x, TEMP[11].xxxx 286: MAD TEMP[11].xyz, TEMP[5], TEMP[11].xxxx, CONST[2] 287: MOV TEMP[5].xyz, TEMP[11].xyzx 288: RCP TEMP[11].x, TEMP[11].zzzz 289: MAD TEMP[11].xy, TEMP[5], TEMP[11].xxxx, -TEMP[19] 290: RCP TEMP[14].x, CONST[4].wwww 291: ADD TEMP[14].xy, TEMP[14].xxxx, TEMP[2].yzzw 292: MAD TEMP[15].w, TEMP[14].xxxx, TEMP[14].yyyy, -TEMP[17].wwww 293: MUL TEMP[17].z, TEMP[11].xxxx, TEMP[11].xxxx 294: MUL TEMP[7].w, TEMP[7].wwww, TEMP[11].xxxx 295: MAD TEMP[7].w, TEMP[14].xxxx, TEMP[11].yyyy, -TEMP[7].wwww 296: MUL TEMP[7].w, TEMP[11].yyyy, TEMP[7].wwww 297: MAD TEMP[7].w, TEMP[17].zzzz, TEMP[14].yyyy, TEMP[7].wwww 298: MUL TEMP[7].w, TEMP[7].wwww, IMM[3].yyyy 299: RCP TEMP[5].x, TEMP[15].wwww 300: MUL TEMP[11].y, TEMP[7].wwww, TEMP[5].xxxx 301: MOV TEMP[5].y, TEMP[11].yyyy 302: MOV TEMP[14].x, -TEMP[15].wwww 303: FSGE TEMP[14].x, TEMP[14].xxxx, IMM[0].zzzz 304: UIF TEMP[14].xxxx :0 305: MOV TEMP[14].x, IMM[0].xxxx 306: ELSE :0 307: MOV TEMP[14].x, IMM[0].zzzz 308: ENDIF 309: MOV TEMP[5].z, TEMP[14].xxxx 310: MAD TEMP[7].w, TEMP[7].wwww, TEMP[5].xxxx, IMM[3].zzzz 311: FSGE TEMP[7].x, TEMP[7].wwww, IMM[0].zzzz 312: UIF TEMP[7].xxxx :0 313: MOV TEMP[7].x, IMM[0].xxxx 314: ELSE :0 315: MOV TEMP[7].x, IMM[0].zzzz 316: ENDIF 317: ADD TEMP[7].w, TEMP[7].xxxx, TEMP[14].xxxx 318: MOV TEMP[2].w, TEMP[7].wwww 319: MUL TEMP[14].xw, TEMP[5].yyzw, IMM[4].yyxz 320: MAX TEMP[17].x, TEMP[15].wwww, IMM[3].xxxx 321: RSQ TEMP[17].x, TEMP[17].xxxx 322: EX2 TEMP[18].x, TEMP[14].xxxx 323: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[18].xxxx 324: MUL TEMP[18].xyz, CONST[10].xyww, IN[4].yyyy 325: MOV TEMP[10].xyz, TEMP[18].xyzx 326: MAD TEMP[18].xyz, IN[4].xxxx, CONST[9].xyww, TEMP[10] 327: MOV TEMP[10].xyz, TEMP[18].xyzx 328: MAD TEMP[18].xyz, IN[4].zzzz, CONST[11].xyww, TEMP[10] 329: MOV TEMP[10].xyz, TEMP[18].xyzx 330: ADD TEMP[18].xyz, TEMP[10], CONST[12].xyww 331: MOV TEMP[10].xyz, TEMP[18].xyzx 332: RCP TEMP[18].x, TEMP[18].zzzz 333: MUL TEMP[18].xy, TEMP[18].xxxx, TEMP[10] 334: MOV TEMP[10].xy, TEMP[18].xyxx 335: MAD TEMP[18].xy, TEMP[10], IMM[5], IMM[4].wwww 336: MOV TEMP[10].xy, TEMP[18].xyxx 337: MAD TEMP[18].yz, TEMP[2], IMM[5].zzzz, TEMP[10].xxyw 338: MOV TEMP[2].yz, TEMP[18].zyzz 339: MAD TEMP[18].xy, TEMP[15].wwww, IMM[1].wwww, TEMP[2].yzzw 340: MAD TEMP[20].xy, TEMP[15].wwww, IMM[6], TEMP[2].yzzw 341: MOV TEMP[20].xy, TEMP[20].xyyy 342: TEX TEMP[20], TEMP[20], SAMP[2], 2D 343: MUL TEMP[13], TEMP[20], IMM[5].wwww 344: MOV TEMP[18].xy, TEMP[18].xyyy 345: TEX TEMP[18], TEMP[18], SAMP[2], 2D 346: MAD TEMP[10], TEMP[18], IMM[5].wwww, TEMP[13] 347: MAD TEMP[18].xy, TEMP[15].wwww, IMM[6].yzzw, TEMP[2].yzzw 348: MOV TEMP[18].xy, TEMP[18].xyyy 349: TEX TEMP[18], TEMP[18], SAMP[2], 2D 350: MAD TEMP[10], TEMP[18], IMM[5].wwww, TEMP[10] 351: MAD TEMP[15].yz, TEMP[15].wwww, IMM[2].wwww, TEMP[2] 352: MOV TEMP[15].xy, TEMP[15].yzzz 353: TEX TEMP[15], TEMP[15], SAMP[2], 2D 354: MOV TEMP[13].w, TEMP[15].wwww 355: MAD TEMP[10], TEMP[15], IMM[5].wwww, TEMP[10] 356: ABS TEMP[15].x, TEMP[10].xxxx 357: LG2 TEMP[13].x, TEMP[15].xxxx 358: ABS TEMP[15].x, TEMP[10].yyyy 359: LG2 TEMP[15].x, TEMP[15].xxxx 360: MOV TEMP[13].y, TEMP[15].xxxx 361: ABS TEMP[15].x, TEMP[10].zzzz 362: LG2 TEMP[15].x, TEMP[15].xxxx 363: MOV TEMP[13].z, TEMP[15].xxxx 364: MUL TEMP[15].xyz, TEMP[13], IMM[1].yyyy 365: EX2 TEMP[13].x, TEMP[15].xxxx 366: EX2 TEMP[18].x, TEMP[15].yyyy 367: MOV TEMP[13].y, TEMP[18].xxxx 368: EX2 TEMP[15].x, TEMP[15].zzzz 369: MOV TEMP[13].z, TEMP[15].xxxx 370: DP3 TEMP[12].x, TEMP[19].xyzz, TEMP[12].xyzz 371: ADD TEMP[12].w, -TEMP[12].xxxx, IMM[0].xxxx 372: MUL TEMP[15].y, TEMP[12].wwww, TEMP[12].wwww 373: MUL TEMP[15].y, TEMP[15].yyyy, TEMP[15].yyyy 374: MUL TEMP[12].w, TEMP[12].wwww, TEMP[15].yyyy 375: MUL TEMP[15].xyz, TEMP[16].wwww, CONST[3] 376: MOV TEMP[16], -TEMP[9].zzzz 377: FSGE TEMP[18].x, TEMP[16].xxxx, IMM[0].zzzz 378: UIF TEMP[18].xxxx :0 379: MOV TEMP[18].x, TEMP[15].xxxx 380: ELSE :0 381: MOV TEMP[18].x, IMM[0].zzzz 382: ENDIF 383: MOV TEMP[18].x, TEMP[18].xxxx 384: FSGE TEMP[19].x, TEMP[16].yyyy, IMM[0].zzzz 385: UIF TEMP[19].xxxx :0 386: MOV TEMP[19].x, TEMP[15].yyyy 387: ELSE :0 388: MOV TEMP[19].x, IMM[0].zzzz 389: ENDIF 390: MOV TEMP[18].y, TEMP[19].xxxx 391: FSGE TEMP[19].x, TEMP[16].zzzz, IMM[0].zzzz 392: UIF TEMP[19].xxxx :0 393: MOV TEMP[15].x, TEMP[15].zzzz 394: ELSE :0 395: MOV TEMP[15].x, IMM[0].zzzz 396: ENDIF 397: MOV TEMP[18].z, TEMP[15].xxxx 398: FSGE TEMP[15].x, TEMP[16].wwww, IMM[0].zzzz 399: UIF TEMP[15].xxxx :0 400: ELSE :0 401: ENDIF 402: MUL TEMP[15].yzw, TEMP[17].xxxx, CONST[4].xxyz 403: MOV TEMP[7], -TEMP[7].wwww 404: FSGE TEMP[16].x, TEMP[7].xxxx, IMM[0].zzzz 405: UIF TEMP[16].xxxx :0 406: ELSE :0 407: ENDIF 408: FSGE TEMP[17].x, TEMP[7].yyyy, IMM[0].zzzz 409: UIF TEMP[17].xxxx :0 410: MOV TEMP[17].x, TEMP[15].yyyy 411: ELSE :0 412: MOV TEMP[17].x, IMM[0].zzzz 413: ENDIF 414: MOV TEMP[16].y, TEMP[17].xxxx 415: FSGE TEMP[17].x, TEMP[7].zzzz, IMM[0].zzzz 416: UIF TEMP[17].xxxx :0 417: MOV TEMP[17].x, TEMP[15].zzzz 418: ELSE :0 419: MOV TEMP[17].x, IMM[0].zzzz 420: ENDIF 421: MOV TEMP[16].z, TEMP[17].xxxx 422: FSGE TEMP[7].x, TEMP[7].wwww, IMM[0].zzzz 423: UIF TEMP[7].xxxx :0 424: MOV TEMP[7].x, TEMP[15].wwww 425: ELSE :0 426: MOV TEMP[7].x, IMM[0].zzzz 427: ENDIF 428: MOV TEMP[16].w, TEMP[7].xxxx 429: MOV TEMP[2].yzw, TEMP[16].zyzw 430: ADD TEMP[7].yzw, TEMP[2], TEMP[18].xxyz 431: MOV TEMP[2].yzw, TEMP[7].zyzw 432: MAD TEMP[7].yzw, TEMP[2], TEMP[12].wwww, TEMP[6].xxyz 433: MOV TEMP[2].yzw, TEMP[7].zyzw 434: MUL TEMP[7].xyz, TEMP[6], CONST[3] 435: MUL TEMP[7].xzw, TEMP[14].wwww, TEMP[7].xyyz 436: MOV TEMP[9], -TEMP[9].xxxx 437: FSGE TEMP[12].x, TEMP[9].xxxx, IMM[0].zzzz 438: UIF TEMP[12].xxxx :0 439: MOV TEMP[12].x, TEMP[7].xxxx 440: ELSE :0 441: MOV TEMP[12].x, IMM[0].zzzz 442: ENDIF 443: MOV TEMP[12].x, TEMP[12].xxxx 444: FSGE TEMP[14].x, TEMP[9].yyyy, IMM[0].zzzz 445: UIF TEMP[14].xxxx :0 446: ELSE :0 447: ENDIF 448: FSGE TEMP[14].x, TEMP[9].zzzz, IMM[0].zzzz 449: UIF TEMP[14].xxxx :0 450: MOV TEMP[14].x, TEMP[7].zzzz 451: ELSE :0 452: MOV TEMP[14].x, IMM[0].zzzz 453: ENDIF 454: MOV TEMP[12].z, TEMP[14].xxxx 455: FSGE TEMP[9].x, TEMP[9].wwww, IMM[0].zzzz 456: UIF TEMP[9].xxxx :0 457: MOV TEMP[7].x, TEMP[7].wwww 458: ELSE :0 459: MOV TEMP[7].x, IMM[0].zzzz 460: ENDIF 461: MOV TEMP[12].w, TEMP[7].xxxx 462: MAD TEMP[7].yzw, TEMP[2], TEMP[8].xxxx, TEMP[12].xxzw 463: MOV TEMP[2].yzw, TEMP[7].zyzw 464: MAD TEMP[7].yzw, TEMP[13].xxyz, CONST[23].xxxx, TEMP[2] 465: MOV TEMP[2].w, TEMP[7].zyzw 466: MUL TEMP[8].y, TEMP[11].yyyy, TEMP[11].yyyy 467: MUL TEMP[8].y, TEMP[11].yyyy, TEMP[8].yyyy 468: MUL TEMP[3].y, TEMP[3].xxxx, TEMP[8].yyyy 469: MAD TEMP[3].xyz, TEMP[3].yyyy, TEMP[6], TEMP[7].yzww 470: MOV TEMP[2].xyz, TEMP[3].xyzx 471: MUL TEMP[3].xyz, CONST[14].xyww, IN[4].yyyy 472: MOV TEMP[4].xyz, TEMP[3].xyzx 473: MAD TEMP[3].xyz, IN[4].xxxx, CONST[13].xyww, TEMP[4] 474: MOV TEMP[4].xyz, TEMP[3].xyzx 475: MAD TEMP[3].xyz, IN[4].zzzz, CONST[15].xyww, TEMP[4] 476: MOV TEMP[4].xyz, TEMP[3].xyzx 477: ADD TEMP[3].xyz, TEMP[4], CONST[16].xyww 478: RCP TEMP[7].x, TEMP[3].zzzz 479: MUL TEMP[3].yw, TEMP[7].xxxx, TEMP[3].xxzy 480: MOV TEMP[1].yw, TEMP[3].wyww 481: MAD TEMP[3].yw, TEMP[1], IMM[5].xxzy, IMM[4].wwww 482: MOV TEMP[1].yw, TEMP[3].wyww 483: MAD TEMP[3].xy, TEMP[1].ywzw, IMM[0].xyxx, IMM[0].zxzz 484: MOV TEMP[3].xy, TEMP[3].xyyy 485: TEX TEMP[3].xw, TEMP[3], SAMP[10], 2D 486: MOV TEMP[5].w, TEMP[3].wwww 487: MOV_SAT TEMP[3].x, TEMP[3].xxxx 488: MUL TEMP[3].y, TEMP[3].xxxx, CONST[21].xxxx 489: MUL TEMP[3].xyz, TEMP[3].yyyy, TEMP[2] 490: MOV TEMP[4].xyz, TEMP[3].xyzx 491: MAD TEMP[3].xyz, TEMP[4], IMM[7].xxxx, TEMP[2] 492: MAD TEMP[6], TEMP[10].wwww, TEMP[6].wwww, TEMP[6].wwww 493: MOV_SAT TEMP[6], TEMP[6] 494: MOV TEMP[6].w, TEMP[6].wwww 495: ABS TEMP[7].x, TEMP[3].xxxx 496: LG2 TEMP[4].x, TEMP[7].xxxx 497: ABS TEMP[7].x, TEMP[3].yyyy 498: LG2 TEMP[7].x, TEMP[7].xxxx 499: MOV TEMP[4].y, TEMP[7].xxxx 500: ABS TEMP[3].x, TEMP[3].zzzz 501: LG2 TEMP[3].x, TEMP[3].xxxx 502: MOV TEMP[4].z, TEMP[3].xxxx 503: MUL TEMP[3].xyz, TEMP[4], IMM[7].yyyy 504: EX2 TEMP[4].x, TEMP[3].xxxx 505: EX2 TEMP[7].x, TEMP[3].yyyy 506: MOV TEMP[4].y, TEMP[7].xxxx 507: EX2 TEMP[3].x, TEMP[3].zzzz 508: MOV TEMP[4].z, TEMP[3].xxxx 509: MAD TEMP[3].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 510: MOV TEMP[3].xy, TEMP[3].xyyy 511: TEX TEMP[3], TEMP[3], SAMP[7], 2D 512: MOV TEMP[2].w, TEMP[3].wwww 513: ADD TEMP[7].y, -TEMP[3].wwww, IMM[0].xxxx 514: MAD TEMP[3].xyz, TEMP[4], TEMP[7].yyyy, TEMP[3] 515: MOV TEMP[2].xyz, TEMP[3].xyzx 516: ADD TEMP[3].xyz, TEMP[2], IMM[7].zzzz 517: MOV TEMP[2].xyz, TEMP[3].xyzx 518: MAD TEMP[3].xyz, TEMP[0].xxxx, TEMP[2], IMM[7].wwww 519: MOV TEMP[2].xyz, TEMP[3].xyzx 520: MOV TEMP[3].xyz, TEMP[3].xyzz 521: TEX TEMP[3], TEMP[3], SAMP[9], 3D 522: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 523: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[3], TEMP[2] 524: MOV TEMP[5].xyz, TEMP[0].xyzx 525: RCP TEMP[1].x, IN[5].zzzz 526: MUL TEMP[2].x, TEMP[1].xxxx, IN[5].yyyy 527: MAD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy, IMM[3].yyyy 528: MOV TEMP[1].x, TEMP[2].xxxx 529: MOV TEMP[1].y, CONST[7].wwww 530: MOV TEMP[2].xy, TEMP[1].xyyy 531: TEX TEMP[2].x, TEMP[2], SAMP[6], 2D 532: MOV TEMP[1].x, TEMP[2].xxxx 533: ADD TEMP[3].yzw, -CONST[8].xxyz, IN[4].xxyz 534: MOV TEMP[1].w, TEMP[3].zyzw 535: DP3 TEMP[4].x, TEMP[3].yzww, TEMP[3].yzww 536: MOV TEMP[1].y, TEMP[4].xxxx 537: MUL TEMP[1].yz, TEMP[1].xyww, CONST[7].xyxw 538: MUL TEMP[4].z, TEMP[1].zzzz, IMM[8].xxxx 539: EX2 TEMP[4].x, TEMP[4].zzzz 540: ADD TEMP[4].z, -TEMP[4].xxxx, IMM[0].xxxx 541: MUL TEMP[1].y, TEMP[4].zzzz, TEMP[1].yyyy 542: RCP TEMP[3].x, TEMP[3].wwww 543: MUL TEMP[1].y, TEMP[3].xxxx, TEMP[1].yyyy 544: MUL TEMP[1].y, TEMP[1].yyyy, IMM[8].xxxx 545: EX2 TEMP[1].x, TEMP[1].yyyy 546: MOV_SAT TEMP[1].x, TEMP[1].xxxx 547: ADD TEMP[1].y, -TEMP[1].xxxx, IMM[0].xxxx 548: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[2].xxxx 549: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[22].xxyz 550: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[5] 551: MOV TEMP[6].xyz, TEMP[0].xyzx 552: MOV OUT[0], TEMP[6] 553: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 220) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 252) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %85 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %86 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %87 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %88 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %89 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %90 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %91 = load <8 x i32> addrspace(2)* %90, !tbaa !0 %92 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %93 = load <4 x i32> addrspace(2)* %92, !tbaa !0 %94 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %95 = load <8 x i32> addrspace(2)* %94, !tbaa !0 %96 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %97 = load <4 x i32> addrspace(2)* %96, !tbaa !0 %98 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %99 = load <8 x i32> addrspace(2)* %98, !tbaa !0 %100 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %101 = load <4 x i32> addrspace(2)* %100, !tbaa !0 %102 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %103 = load <8 x i32> addrspace(2)* %102, !tbaa !0 %104 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %105 = load <4 x i32> addrspace(2)* %104, !tbaa !0 %106 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %107 = load <8 x i32> addrspace(2)* %106, !tbaa !0 %108 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %109 = load <4 x i32> addrspace(2)* %108, !tbaa !0 %110 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %111 = load <8 x i32> addrspace(2)* %110, !tbaa !0 %112 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %113 = load <4 x i32> addrspace(2)* %112, !tbaa !0 %114 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %115 = load <8 x i32> addrspace(2)* %114, !tbaa !0 %116 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %117 = load <4 x i32> addrspace(2)* %116, !tbaa !0 %118 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %119 = load <8 x i32> addrspace(2)* %118, !tbaa !0 %120 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %121 = load <4 x i32> addrspace(2)* %120, !tbaa !0 %122 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %123 = load <8 x i32> addrspace(2)* %122, !tbaa !0 %124 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %125 = load <4 x i32> addrspace(2)* %124, !tbaa !0 %126 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %127 = load <8 x i32> addrspace(2)* %126, !tbaa !0 %128 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %129 = load <4 x i32> addrspace(2)* %128, !tbaa !0 %130 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %131 = load <8 x i32> addrspace(2)* %130, !tbaa !0 %132 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %133 = load <4 x i32> addrspace(2)* %132, !tbaa !0 %134 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %135 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %136 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %137 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %138 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %139 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %140 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %141 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %142 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %143 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %144 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %145 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %146 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %147 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %148 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %149 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %150 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %151 = fmul float %147, 1.000000e+00 %152 = fadd float %151, 0.000000e+00 %153 = fmul float %148, -1.000000e+00 %154 = fadd float %153, 1.000000e+00 %155 = bitcast float %152 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %123 to <32 x i8> %160 = bitcast <4 x i32> %125 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 2 %164 = call float @fabs(float %162) %165 = fsub float -0.000000e+00, %164 %166 = fsub float -0.000000e+00, %164 %167 = fsub float -0.000000e+00, %164 %168 = fsub float -0.000000e+00, %164 %169 = fcmp oge float %165, 0.000000e+00 %170 = sext i1 %169 to i32 %171 = bitcast i32 %170 to float %172 = bitcast float %171 to i32 %173 = icmp ne i32 %172, 0 %. = select i1 %173, float -1.000000e+00, float -0.000000e+00 %174 = fcmp oge float %166, 0.000000e+00 %175 = sext i1 %174 to i32 %176 = bitcast i32 %175 to float %177 = bitcast float %176 to i32 %178 = icmp ne i32 %177, 0 %temp16.0 = select i1 %178, float -1.000000e+00, float -0.000000e+00 %179 = fcmp oge float %167, 0.000000e+00 %180 = sext i1 %179 to i32 %181 = bitcast i32 %180 to float %182 = bitcast float %181 to i32 %183 = icmp ne i32 %182, 0 %.166 = select i1 %183, float -1.000000e+00, float -0.000000e+00 %184 = fcmp oge float %168, 0.000000e+00 %185 = sext i1 %184 to i32 %186 = bitcast i32 %185 to float %187 = bitcast float %186 to i32 %188 = icmp ne i32 %187, 0 %189 = fcmp olt float %., 0.000000e+00 %190 = sext i1 %189 to i32 %191 = fcmp olt float %temp16.0, 0.000000e+00 %192 = sext i1 %191 to i32 %193 = fcmp olt float %.166, 0.000000e+00 %194 = sext i1 %193 to i32 %195 = bitcast i32 %190 to float %196 = bitcast i32 %192 to float %197 = bitcast i32 %194 to float %198 = bitcast float %195 to i32 %199 = bitcast float %197 to i32 %200 = or i32 %198, %199 %201 = bitcast i32 %200 to float %202 = bitcast float %201 to i32 %203 = bitcast float %196 to i32 %204 = or i32 %202, %203 %205 = bitcast i32 %204 to float %206 = bitcast float %205 to i32 %207 = icmp ne i32 %206, 0 br i1 %207, label %IF110, label %ENDIF109 IF110: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF109 ENDIF109: ; preds = %main_body, %IF110 %208 = fmul float %79, %143 %209 = fmul float %80, %143 %210 = fmul float %142, %77 %211 = fadd float %210, %208 %212 = fmul float %142, %78 %213 = fadd float %212, %209 %214 = fmul float %144, %81 %215 = fadd float %214, %211 %216 = fmul float %144, %82 %217 = fadd float %216, %213 %218 = fadd float %215, %83 %219 = fadd float %217, %84 %220 = bitcast float %149 to i32 %221 = bitcast float %150 to i32 %222 = insertelement <2 x i32> undef, i32 %220, i32 0 %223 = insertelement <2 x i32> %222, i32 %221, i32 1 %224 = bitcast <8 x i32> %111 to <32 x i8> %225 = bitcast <4 x i32> %113 to <16 x i8> %226 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %223, <32 x i8> %224, <16 x i8> %225, i32 2) %227 = extractelement <4 x float> %226, i32 3 %228 = fmul float %227, 4.000000e+00 %229 = fmul float %227, 4.000000e+00 %230 = fmul float %227, 4.000000e+00 %231 = fmul float %227, 4.000000e+00 %232 = call float @llvm.AMDIL.clamp.(float %228, float 0.000000e+00, float 1.000000e+00) %233 = call float @llvm.AMDIL.clamp.(float %229, float 0.000000e+00, float 1.000000e+00) %234 = call float @llvm.AMDIL.clamp.(float %230, float 0.000000e+00, float 1.000000e+00) %235 = call float @llvm.AMDIL.clamp.(float %231, float 0.000000e+00, float 1.000000e+00) %236 = fsub float -0.000000e+00, %232 %237 = fadd float %236, 1.000000e+00 %238 = bitcast float %237 to i32 %239 = bitcast float 0.000000e+00 to i32 %240 = bitcast float 0.000000e+00 to i32 %241 = insertelement <4 x i32> undef, i32 %238, i32 0 %242 = insertelement <4 x i32> %241, i32 %239, i32 1 %243 = insertelement <4 x i32> %242, i32 %240, i32 2 %244 = insertelement <4 x i32> %243, i32 undef, i32 3 %245 = bitcast <8 x i32> %107 to <32 x i8> %246 = bitcast <4 x i32> %109 to <16 x i8> %247 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %244, <32 x i8> %245, <16 x i8> %246, i32 2) %248 = extractelement <4 x float> %247, i32 0 %249 = extractelement <4 x float> %247, i32 1 %250 = extractelement <4 x float> %247, i32 2 %251 = extractelement <4 x float> %247, i32 3 %252 = call float @llvm.pow.f32(float %248, float 0x40019999A0000000) %253 = call float @llvm.pow.f32(float %249, float 0x40019999A0000000) %254 = call float @llvm.pow.f32(float %250, float 0x40019999A0000000) %255 = call float @llvm.pow.f32(float %251, float 1.000000e+00) %256 = fmul float %218, 1.000000e+00 %257 = fadd float %256, 0.000000e+00 %258 = fmul float %219, -1.000000e+00 %259 = fadd float %258, 1.000000e+00 %260 = bitcast float %257 to i32 %261 = bitcast float %259 to i32 %262 = insertelement <2 x i32> undef, i32 %260, i32 0 %263 = insertelement <2 x i32> %262, i32 %261, i32 1 %264 = bitcast <8 x i32> %103 to <32 x i8> %265 = bitcast <4 x i32> %105 to <16 x i8> %266 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %263, <32 x i8> %264, <16 x i8> %265, i32 2) %267 = extractelement <4 x float> %266, i32 0 %268 = extractelement <4 x float> %266, i32 1 %269 = fmul float %267, 2.000000e+00 %270 = fadd float %269, -1.000000e+00 %271 = fmul float %268, 2.000000e+00 %272 = fadd float %271, -1.000000e+00 %273 = fmul float 1.000000e+00, %270 %274 = fmul float -1.000000e+00, %272 %275 = fadd float %273, %274 %276 = fmul float -1.000000e+00, %270 %277 = fmul float -1.000000e+00, %272 %278 = fadd float %276, %277 %279 = fsub float -0.000000e+00, %275 %280 = fsub float -0.000000e+00, %278 %281 = fadd float %275, 2.000000e+00 %282 = fadd float %278, 2.000000e+00 %283 = fadd float %279, 2.000000e+00 %284 = fadd float %280, 2.000000e+00 %285 = fmul float %281, %281 %286 = fmul float %282, %282 %287 = fmul float %283, %283 %288 = fmul float %284, %284 %289 = fmul float %285, %285 %290 = fmul float %286, %286 %291 = fmul float %287, %287 %292 = fmul float %288, %288 %293 = fmul float 1.000000e+00, %289 %294 = fmul float 1.000000e+00, %290 %295 = fadd float %293, %294 %296 = fmul float 1.000000e+00, %291 %297 = fadd float %295, %296 %298 = fmul float 1.000000e+00, %292 %299 = fadd float %297, %298 %300 = fdiv float 1.000000e+00, %299 %301 = fmul float %300, %289 %302 = fmul float %300, %290 %303 = fmul float %300, %291 %304 = fmul float %300, %292 %305 = fdiv float 1.000000e+00, %24 %306 = fmul float %305, %301 %307 = fmul float %305, %302 %308 = fmul float %305, %303 %309 = fmul float %305, %304 %310 = fmul float %306, %306 %311 = fmul float %307, %307 %312 = fmul float %308, %308 %313 = fmul float %309, %309 %314 = bitcast float %134 to i32 %315 = bitcast float %135 to i32 %316 = insertelement <2 x i32> undef, i32 %314, i32 0 %317 = insertelement <2 x i32> %316, i32 %315, i32 1 %318 = bitcast <8 x i32> %91 to <32 x i8> %319 = bitcast <4 x i32> %93 to <16 x i8> %320 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %317, <32 x i8> %318, <16 x i8> %319, i32 2) %321 = extractelement <4 x float> %320, i32 0 %322 = extractelement <4 x float> %320, i32 1 %323 = extractelement <4 x float> %320, i32 2 %324 = extractelement <4 x float> %320, i32 3 %325 = bitcast float %134 to i32 %326 = bitcast float %135 to i32 %327 = insertelement <2 x i32> undef, i32 %325, i32 0 %328 = insertelement <2 x i32> %327, i32 %326, i32 1 %329 = bitcast <8 x i32> %95 to <32 x i8> %330 = bitcast <4 x i32> %97 to <16 x i8> %331 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %328, <32 x i8> %329, <16 x i8> %330, i32 2) %332 = extractelement <4 x float> %331, i32 0 %333 = fmul float %321, 2.000000e+00 %334 = fadd float %333, -1.000000e+00 %335 = fmul float %322, 2.000000e+00 %336 = fadd float %335, -1.000000e+00 %337 = fmul float %306, %334 %338 = fmul float %306, %336 %339 = fmul float %332, 2.000000e+00 %340 = fadd float %339, -1.000000e+00 %341 = bitcast float %136 to i32 %342 = bitcast float %137 to i32 %343 = insertelement <2 x i32> undef, i32 %341, i32 0 %344 = insertelement <2 x i32> %343, i32 %342, i32 1 %345 = bitcast <8 x i32> %91 to <32 x i8> %346 = bitcast <4 x i32> %93 to <16 x i8> %347 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %344, <32 x i8> %345, <16 x i8> %346, i32 2) %348 = extractelement <4 x float> %347, i32 0 %349 = extractelement <4 x float> %347, i32 1 %350 = extractelement <4 x float> %347, i32 2 %351 = extractelement <4 x float> %347, i32 3 %352 = bitcast float %136 to i32 %353 = bitcast float %137 to i32 %354 = insertelement <2 x i32> undef, i32 %352, i32 0 %355 = insertelement <2 x i32> %354, i32 %353, i32 1 %356 = bitcast <8 x i32> %95 to <32 x i8> %357 = bitcast <4 x i32> %97 to <16 x i8> %358 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %355, <32 x i8> %356, <16 x i8> %357, i32 2) %359 = extractelement <4 x float> %358, i32 0 %360 = fadd float %348, %348 %361 = fadd float %349, %349 %362 = fmul float %360, -1.000000e+00 %363 = fadd float %362, 1.000000e+00 %364 = fmul float %361, 1.000000e+00 %365 = fadd float %364, -1.000000e+00 %366 = fmul float %307, %363 %367 = fmul float %307, %365 %368 = fmul float %334, %306 %369 = fadd float %368, %366 %370 = fmul float %336, %306 %371 = fadd float %370, %367 %372 = fmul float %359, -2.000000e+00 %373 = fadd float %372, 1.000000e+00 %374 = fmul float %311, %350 %375 = fmul float %311, %351 %376 = fmul float %311, %373 %377 = fmul float %323, %310 %378 = fadd float %377, %374 %379 = fmul float %324, %310 %380 = fadd float %379, %375 %381 = fmul float %340, %310 %382 = fadd float %381, %376 %383 = fadd float %337, %337 %384 = fadd float %338, %338 %385 = fmul float %366, %383 %386 = fmul float %367, %384 %387 = fmul float %337, %367 %388 = fmul float %338, %366 %389 = fadd float %387, %388 %390 = fadd float %378, %385 %391 = fadd float %380, %386 %392 = fadd float %382, %389 %393 = bitcast float %138 to i32 %394 = bitcast float %139 to i32 %395 = insertelement <2 x i32> undef, i32 %393, i32 0 %396 = insertelement <2 x i32> %395, i32 %394, i32 1 %397 = bitcast <8 x i32> %91 to <32 x i8> %398 = bitcast <4 x i32> %93 to <16 x i8> %399 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2) %400 = extractelement <4 x float> %399, i32 0 %401 = extractelement <4 x float> %399, i32 1 %402 = extractelement <4 x float> %399, i32 2 %403 = extractelement <4 x float> %399, i32 3 %404 = bitcast float %138 to i32 %405 = bitcast float %139 to i32 %406 = insertelement <2 x i32> undef, i32 %404, i32 0 %407 = insertelement <2 x i32> %406, i32 %405, i32 1 %408 = bitcast <8 x i32> %95 to <32 x i8> %409 = bitcast <4 x i32> %97 to <16 x i8> %410 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %407, <32 x i8> %408, <16 x i8> %409, i32 2) %411 = extractelement <4 x float> %410, i32 0 %412 = fmul float %400, -2.000000e+00 %413 = fadd float %412, 1.000000e+00 %414 = fmul float %401, -2.000000e+00 %415 = fadd float %414, 1.000000e+00 %416 = fmul float %308, %413 %417 = fmul float %308, %415 %418 = fmul float %413, %308 %419 = fadd float %418, %369 %420 = fmul float %415, %308 %421 = fadd float %420, %371 %422 = fmul float %411, 2.000000e+00 %423 = fadd float %422, -1.000000e+00 %424 = fmul float %402, %312 %425 = fadd float %424, %390 %426 = fmul float %403, %312 %427 = fadd float %426, %391 %428 = fmul float %423, %312 %429 = fadd float %428, %392 %430 = fmul float %383, %416 %431 = fmul float %384, %417 %432 = fmul float %337, %417 %433 = fmul float %338, %416 %434 = fadd float %432, %433 %435 = fadd float %425, %430 %436 = fadd float %427, %431 %437 = fadd float %429, %434 %438 = fadd float %366, %366 %439 = fadd float %367, %367 %440 = fmul float %416, %438 %441 = fmul float %417, %439 %442 = fmul float %366, %417 %443 = fmul float %367, %416 %444 = fadd float %442, %443 %445 = fadd float %435, %440 %446 = fadd float %436, %441 %447 = fadd float %437, %444 %448 = bitcast float %140 to i32 %449 = bitcast float %141 to i32 %450 = insertelement <2 x i32> undef, i32 %448, i32 0 %451 = insertelement <2 x i32> %450, i32 %449, i32 1 %452 = bitcast <8 x i32> %91 to <32 x i8> %453 = bitcast <4 x i32> %93 to <16 x i8> %454 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %451, <32 x i8> %452, <16 x i8> %453, i32 2) %455 = extractelement <4 x float> %454, i32 0 %456 = extractelement <4 x float> %454, i32 1 %457 = extractelement <4 x float> %454, i32 2 %458 = extractelement <4 x float> %454, i32 3 %459 = bitcast float %140 to i32 %460 = bitcast float %141 to i32 %461 = insertelement <2 x i32> undef, i32 %459, i32 0 %462 = insertelement <2 x i32> %461, i32 %460, i32 1 %463 = bitcast <8 x i32> %95 to <32 x i8> %464 = bitcast <4 x i32> %97 to <16 x i8> %465 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %462, <32 x i8> %463, <16 x i8> %464, i32 2) %466 = extractelement <4 x float> %465, i32 0 %467 = fadd float %456, %456 %468 = fadd float %455, %455 %469 = fmul float %467, -1.000000e+00 %470 = fadd float %469, 1.000000e+00 %471 = fmul float %468, 1.000000e+00 %472 = fadd float %471, -1.000000e+00 %473 = fmul float %309, %470 %474 = fmul float %309, %472 %475 = fmul float %472, %309 %476 = fadd float %475, %419 %477 = fmul float %470, %309 %478 = fadd float %477, %421 %479 = fmul float %466, -2.000000e+00 %480 = fadd float %479, 1.000000e+00 %481 = fmul float %457, %313 %482 = fadd float %481, %445 %483 = fmul float %458, %313 %484 = fadd float %483, %446 %485 = fmul float %480, %313 %486 = fadd float %485, %447 %487 = fmul float %383, %474 %488 = fmul float %384, %473 %489 = fmul float %337, %473 %490 = fmul float %338, %474 %491 = fadd float %489, %490 %492 = fadd float %487, %482 %493 = fadd float %488, %484 %494 = fadd float %491, %486 %495 = fmul float %438, %474 %496 = fmul float %439, %473 %497 = fmul float %366, %473 %498 = fmul float %367, %474 %499 = fadd float %497, %498 %500 = fadd float %492, %495 %501 = fadd float %493, %496 %502 = fadd float %494, %499 %503 = fmul float %416, %474 %504 = fmul float %417, %473 %505 = fadd float %503, %503 %506 = fadd float %504, %504 %507 = fmul float %416, %473 %508 = fmul float %417, %474 %509 = fadd float %507, %508 %510 = fadd float %500, %505 %511 = fadd float %501, %506 %512 = fadd float %502, %509 %513 = fsub float -0.000000e+00, %476 %514 = fmul float %476, %513 %515 = fadd float %514, %510 %516 = fsub float -0.000000e+00, %478 %517 = fmul float %478, %516 %518 = fadd float %517, %511 %519 = fsub float -0.000000e+00, %478 %520 = fmul float %476, %519 %521 = fadd float %520, %512 %522 = fmul float %25, %476 %523 = fmul float %26, %478 %524 = fadd float %523, %522 %525 = fmul float %27, 1.000000e+00 %526 = fadd float %524, %525 %527 = call float @llvm.AMDIL.clamp.(float %526, float 0.000000e+00, float 1.000000e+00) %528 = fsub float -0.000000e+00, %142 %529 = fadd float %50, %528 %530 = fsub float -0.000000e+00, %143 %531 = fadd float %51, %530 %532 = fsub float -0.000000e+00, %144 %533 = fadd float %52, %532 %534 = fmul float %529, %529 %535 = fmul float %531, %531 %536 = fadd float %535, %534 %537 = fmul float %533, %533 %538 = fadd float %536, %537 %539 = call float @llvm.maxnum.f32(float %538, float 0x3E7AD7F2A0000000) %540 = call float @llvm.AMDGPU.rsq.clamped.f32(float %539) %541 = fmul float %540, %529 %542 = fmul float %540, %531 %543 = fmul float %540, %533 %544 = fsub float -0.000000e+00, %39 %545 = fmul float %529, %540 %546 = fadd float %545, %544 %547 = fsub float -0.000000e+00, %40 %548 = fmul float %531, %540 %549 = fadd float %548, %547 %550 = fsub float -0.000000e+00, %41 %551 = fmul float %533, %540 %552 = fadd float %551, %550 %553 = fmul float %42, %546 %554 = fadd float %553, %39 %555 = fmul float %42, %549 %556 = fadd float %555, %40 %557 = fmul float %42, %552 %558 = fadd float %557, %41 %559 = fmul float %554, %554 %560 = fmul float %556, %556 %561 = fadd float %560, %559 %562 = fmul float %558, %558 %563 = fadd float %561, %562 %564 = call float @llvm.maxnum.f32(float %563, float 0x3E7AD7F2A0000000) %565 = call float @llvm.AMDGPU.rsq.clamped.f32(float %564) %566 = fmul float %554, %565 %567 = fadd float %566, %25 %568 = fmul float %556, %565 %569 = fadd float %568, %26 %570 = fmul float %558, %565 %571 = fadd float %570, %27 %572 = fdiv float 1.000000e+00, %571 %573 = fsub float -0.000000e+00, %476 %574 = fmul float %567, %572 %575 = fadd float %574, %573 %576 = fsub float -0.000000e+00, %478 %577 = fmul float %569, %572 %578 = fadd float %577, %576 %579 = fdiv float 1.000000e+00, %34 %580 = fadd float %515, %579 %581 = fadd float %518, %579 %582 = fmul float %521, %521 %583 = fsub float -0.000000e+00, %582 %584 = fmul float %580, %581 %585 = fadd float %584, %583 %586 = fmul float %575, %575 %587 = fadd float %521, %521 %588 = fmul float %575, %587 %589 = fsub float -0.000000e+00, %588 %590 = fmul float %580, %578 %591 = fadd float %590, %589 %592 = fmul float %578, %591 %593 = fmul float %586, %581 %594 = fadd float %593, %592 %595 = fmul float %594, 5.000000e-01 %596 = fdiv float 1.000000e+00, %585 %597 = fmul float %596, %595 %598 = fsub float -0.000000e+00, %585 %599 = fcmp oge float %598, 0.000000e+00 %600 = sext i1 %599 to i32 %601 = bitcast i32 %600 to float %602 = bitcast float %601 to i32 %603 = icmp ne i32 %602, 0 %.167 = select i1 %603, float 1.000000e+00, float 0.000000e+00 %604 = fmul float %595, %596 %605 = fadd float %604, -1.600000e+01 %606 = fcmp oge float %605, 0.000000e+00 %607 = sext i1 %606 to i32 %608 = bitcast i32 %607 to float %609 = bitcast float %608 to i32 %610 = icmp ne i32 %609, 0 %temp64.0 = select i1 %610, float 1.000000e+00, float 0.000000e+00 %611 = fmul float %597, 0xBFF7154CA0000000 %612 = call float @llvm.AMDIL.exp.(float %611) %613 = call float @llvm.maxnum.f32(float %585, float 0x3E7AD7F2A0000000) %614 = call float @llvm.AMDGPU.rsq.clamped.f32(float %613) %615 = fmul float %612, %614 %616 = fmul float %579, 1.600000e+01 %617 = fadd float %616, %515 %618 = fmul float %579, 1.600000e+01 %619 = fadd float %618, %518 %620 = fsub float -0.000000e+00, %582 %621 = fmul float %617, %619 %622 = fadd float %621, %620 %623 = fsub float -0.000000e+00, %588 %624 = fmul float %617, %578 %625 = fadd float %624, %623 %626 = fmul float %578, %625 %627 = fmul float %586, %619 %628 = fadd float %627, %626 %629 = fmul float %628, 5.000000e-01 %630 = fdiv float 1.000000e+00, %622 %631 = fmul float %630, %629 %632 = fsub float -0.000000e+00, %622 %633 = fcmp oge float %632, 0.000000e+00 %634 = sext i1 %633 to i32 %635 = bitcast i32 %634 to float %636 = bitcast float %635 to i32 %637 = icmp ne i32 %636, 0 %.168 = select i1 %637, float 1.000000e+00, float 0.000000e+00 %638 = fmul float %629, %630 %639 = fadd float %638, -1.600000e+01 %640 = fcmp oge float %639, 0.000000e+00 %641 = sext i1 %640 to i32 %642 = bitcast i32 %641 to float %643 = bitcast float %642 to i32 %644 = icmp ne i32 %643, 0 %temp56.0 = select i1 %644, float 1.000000e+00, float 0.000000e+00 %645 = fadd float %temp56.0, %.168 %646 = fadd float %temp64.0, %.167 %647 = fmul float %631, 0xBFF7154CA0000000 %648 = call float @llvm.AMDIL.exp.(float %647) %649 = call float @llvm.maxnum.f32(float %622, float 0x3E7AD7F2A0000000) %650 = call float @llvm.AMDGPU.rsq.clamped.f32(float %649) %651 = fmul float %650, %648 %652 = fsub float -0.000000e+00, %43 %653 = fmul float %529, %540 %654 = fadd float %653, %652 %655 = fsub float -0.000000e+00, %44 %656 = fmul float %531, %540 %657 = fadd float %656, %655 %658 = fsub float -0.000000e+00, %45 %659 = fmul float %533, %540 %660 = fadd float %659, %658 %661 = fmul float %46, %654 %662 = fadd float %661, %43 %663 = fmul float %46, %657 %664 = fadd float %663, %44 %665 = fmul float %46, %660 %666 = fadd float %665, %45 %667 = fmul float %662, %662 %668 = fmul float %664, %664 %669 = fadd float %668, %667 %670 = fmul float %666, %666 %671 = fadd float %669, %670 %672 = call float @llvm.maxnum.f32(float %671, float 0x3E7AD7F2A0000000) %673 = call float @llvm.AMDGPU.rsq.clamped.f32(float %672) %674 = fmul float %662, %673 %675 = fadd float %674, %28 %676 = fmul float %664, %673 %677 = fadd float %676, %29 %678 = fmul float %666, %673 %679 = fadd float %678, %30 %680 = fdiv float 1.000000e+00, %679 %681 = fsub float -0.000000e+00, %476 %682 = fmul float %675, %680 %683 = fadd float %682, %681 %684 = fsub float -0.000000e+00, %478 %685 = fmul float %677, %680 %686 = fadd float %685, %684 %687 = fdiv float 1.000000e+00, %38 %688 = fadd float %687, %515 %689 = fadd float %687, %518 %690 = fsub float -0.000000e+00, %582 %691 = fmul float %688, %689 %692 = fadd float %691, %690 %693 = fmul float %683, %683 %694 = fmul float %587, %683 %695 = fsub float -0.000000e+00, %694 %696 = fmul float %688, %686 %697 = fadd float %696, %695 %698 = fmul float %686, %697 %699 = fmul float %693, %689 %700 = fadd float %699, %698 %701 = fmul float %700, 5.000000e-01 %702 = fdiv float 1.000000e+00, %692 %703 = fmul float %701, %702 %704 = fsub float -0.000000e+00, %692 %705 = fcmp oge float %704, 0.000000e+00 %706 = sext i1 %705 to i32 %707 = bitcast i32 %706 to float %708 = bitcast float %707 to i32 %709 = icmp ne i32 %708, 0 %.169 = select i1 %709, float 1.000000e+00, float 0.000000e+00 %710 = fmul float %701, %702 %711 = fadd float %710, -1.600000e+01 %712 = fcmp oge float %711, 0.000000e+00 %713 = sext i1 %712 to i32 %714 = bitcast i32 %713 to float %715 = bitcast float %714 to i32 %716 = icmp ne i32 %715, 0 %temp28.0 = select i1 %716, float 1.000000e+00, float 0.000000e+00 %717 = fadd float %temp28.0, %.169 %718 = fmul float %703, 0xBFF7154CA0000000 %719 = fmul float %651, 0x3F747AE140000000 %720 = call float @llvm.maxnum.f32(float %692, float 0x3E7AD7F2A0000000) %721 = call float @llvm.AMDGPU.rsq.clamped.f32(float %720) %722 = call float @llvm.AMDIL.exp.(float %718) %723 = fmul float %721, %722 %724 = fmul float %56, %143 %725 = fmul float %57, %143 %726 = fmul float %58, %143 %727 = fmul float %142, %53 %728 = fadd float %727, %724 %729 = fmul float %142, %54 %730 = fadd float %729, %725 %731 = fmul float %142, %55 %732 = fadd float %731, %726 %733 = fmul float %144, %59 %734 = fadd float %733, %728 %735 = fmul float %144, %60 %736 = fadd float %735, %730 %737 = fmul float %144, %61 %738 = fadd float %737, %732 %739 = fadd float %734, %62 %740 = fadd float %736, %63 %741 = fadd float %738, %64 %742 = fdiv float 1.000000e+00, %741 %743 = fmul float %742, %739 %744 = fmul float %742, %740 %745 = fmul float %743, 5.000000e-01 %746 = fadd float %745, -5.000000e-01 %747 = fmul float %744, -5.000000e-01 %748 = fadd float %747, -5.000000e-01 %749 = fmul float %515, 6.000000e+00 %750 = fadd float %749, %746 %751 = fmul float %518, 6.000000e+00 %752 = fadd float %751, %748 %753 = fmul float %692, 2.000000e+00 %754 = fadd float %753, %750 %755 = fmul float %692, 2.000000e+00 %756 = fadd float %755, %752 %757 = fmul float %692, 2.000000e+00 %758 = fadd float %757, %750 %759 = fmul float %692, -2.000000e+00 %760 = fadd float %759, %752 %761 = bitcast float %758 to i32 %762 = bitcast float %760 to i32 %763 = insertelement <2 x i32> undef, i32 %761, i32 0 %764 = insertelement <2 x i32> %763, i32 %762, i32 1 %765 = bitcast <8 x i32> %99 to <32 x i8> %766 = bitcast <4 x i32> %101 to <16 x i8> %767 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %764, <32 x i8> %765, <16 x i8> %766, i32 2) %768 = extractelement <4 x float> %767, i32 0 %769 = extractelement <4 x float> %767, i32 1 %770 = extractelement <4 x float> %767, i32 2 %771 = extractelement <4 x float> %767, i32 3 %772 = fmul float %768, 2.500000e-01 %773 = fmul float %769, 2.500000e-01 %774 = fmul float %770, 2.500000e-01 %775 = fmul float %771, 2.500000e-01 %776 = bitcast float %754 to i32 %777 = bitcast float %756 to i32 %778 = insertelement <2 x i32> undef, i32 %776, i32 0 %779 = insertelement <2 x i32> %778, i32 %777, i32 1 %780 = bitcast <8 x i32> %99 to <32 x i8> %781 = bitcast <4 x i32> %101 to <16 x i8> %782 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %779, <32 x i8> %780, <16 x i8> %781, i32 2) %783 = extractelement <4 x float> %782, i32 0 %784 = extractelement <4 x float> %782, i32 1 %785 = extractelement <4 x float> %782, i32 2 %786 = extractelement <4 x float> %782, i32 3 %787 = fmul float %783, 2.500000e-01 %788 = fadd float %787, %772 %789 = fmul float %784, 2.500000e-01 %790 = fadd float %789, %773 %791 = fmul float %785, 2.500000e-01 %792 = fadd float %791, %774 %793 = fmul float %786, 2.500000e-01 %794 = fadd float %793, %775 %795 = fmul float %692, -2.000000e+00 %796 = fadd float %795, %750 %797 = fmul float %692, 6.000000e+00 %798 = fadd float %797, %752 %799 = bitcast float %796 to i32 %800 = bitcast float %798 to i32 %801 = insertelement <2 x i32> undef, i32 %799, i32 0 %802 = insertelement <2 x i32> %801, i32 %800, i32 1 %803 = bitcast <8 x i32> %99 to <32 x i8> %804 = bitcast <4 x i32> %101 to <16 x i8> %805 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %802, <32 x i8> %803, <16 x i8> %804, i32 2) %806 = extractelement <4 x float> %805, i32 0 %807 = extractelement <4 x float> %805, i32 1 %808 = extractelement <4 x float> %805, i32 2 %809 = extractelement <4 x float> %805, i32 3 %810 = fmul float %806, 2.500000e-01 %811 = fadd float %810, %788 %812 = fmul float %807, 2.500000e-01 %813 = fadd float %812, %790 %814 = fmul float %808, 2.500000e-01 %815 = fadd float %814, %792 %816 = fmul float %809, 2.500000e-01 %817 = fadd float %816, %794 %818 = fmul float %692, -2.000000e+00 %819 = fadd float %818, %750 %820 = fmul float %692, -2.000000e+00 %821 = fadd float %820, %752 %822 = bitcast float %819 to i32 %823 = bitcast float %821 to i32 %824 = insertelement <2 x i32> undef, i32 %822, i32 0 %825 = insertelement <2 x i32> %824, i32 %823, i32 1 %826 = bitcast <8 x i32> %99 to <32 x i8> %827 = bitcast <4 x i32> %101 to <16 x i8> %828 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %825, <32 x i8> %826, <16 x i8> %827, i32 2) %829 = extractelement <4 x float> %828, i32 0 %830 = extractelement <4 x float> %828, i32 1 %831 = extractelement <4 x float> %828, i32 2 %832 = extractelement <4 x float> %828, i32 3 %833 = fmul float %829, 2.500000e-01 %834 = fadd float %833, %811 %835 = fmul float %830, 2.500000e-01 %836 = fadd float %835, %813 %837 = fmul float %831, 2.500000e-01 %838 = fadd float %837, %815 %839 = fmul float %832, 2.500000e-01 %840 = fadd float %839, %817 %841 = call float @fabs(float %834) %842 = call float @llvm.log2.f32(float %841) %843 = call float @fabs(float %836) %844 = call float @llvm.log2.f32(float %843) %845 = call float @fabs(float %838) %846 = call float @llvm.log2.f32(float %845) %847 = fmul float %842, 0x40019999A0000000 %848 = fmul float %844, 0x40019999A0000000 %849 = fmul float %846, 0x40019999A0000000 %850 = call float @llvm.AMDIL.exp.(float %847) %851 = call float @llvm.AMDIL.exp.(float %848) %852 = call float @llvm.AMDIL.exp.(float %849) %853 = fmul float %476, %541 %854 = fmul float %478, %542 %855 = fadd float %854, %853 %856 = fmul float 1.000000e+00, %543 %857 = fadd float %855, %856 %858 = fsub float -0.000000e+00, %857 %859 = fadd float %858, 1.000000e+00 %860 = fmul float %859, %859 %861 = fmul float %860, %860 %862 = fmul float %859, %861 %863 = fmul float %615, %31 %864 = fmul float %615, %32 %865 = fmul float %615, %33 %866 = fsub float -0.000000e+00, %646 %867 = fsub float -0.000000e+00, %646 %868 = fsub float -0.000000e+00, %646 %869 = fsub float -0.000000e+00, %646 %870 = fcmp oge float %866, 0.000000e+00 %871 = sext i1 %870 to i32 %872 = bitcast i32 %871 to float %873 = bitcast float %872 to i32 %874 = icmp ne i32 %873, 0 %.170 = select i1 %874, float %863, float 0.000000e+00 %875 = fcmp oge float %867, 0.000000e+00 %876 = sext i1 %875 to i32 %877 = bitcast i32 %876 to float %878 = bitcast float %877 to i32 %879 = icmp ne i32 %878, 0 %temp76.0 = select i1 %879, float %864, float 0.000000e+00 %880 = fcmp oge float %868, 0.000000e+00 %881 = sext i1 %880 to i32 %882 = bitcast i32 %881 to float %883 = bitcast float %882 to i32 %884 = icmp ne i32 %883, 0 %.171 = select i1 %884, float %865, float 0.000000e+00 %885 = fcmp oge float %869, 0.000000e+00 %886 = sext i1 %885 to i32 %887 = bitcast i32 %886 to float %888 = bitcast float %887 to i32 %889 = icmp ne i32 %888, 0 %890 = fmul float %723, %35 %891 = fmul float %723, %36 %892 = fmul float %723, %37 %893 = fsub float -0.000000e+00, %717 %894 = fsub float -0.000000e+00, %717 %895 = fsub float -0.000000e+00, %717 %896 = fsub float -0.000000e+00, %717 %897 = fcmp oge float %893, 0.000000e+00 %898 = sext i1 %897 to i32 %899 = bitcast i32 %898 to float %900 = bitcast float %899 to i32 %901 = icmp ne i32 %900, 0 %902 = fcmp oge float %894, 0.000000e+00 %903 = sext i1 %902 to i32 %904 = bitcast i32 %903 to float %905 = bitcast float %904 to i32 %906 = icmp ne i32 %905, 0 %temp68.0 = select i1 %906, float %890, float 0.000000e+00 %907 = fcmp oge float %895, 0.000000e+00 %908 = sext i1 %907 to i32 %909 = bitcast i32 %908 to float %910 = bitcast float %909 to i32 %911 = icmp ne i32 %910, 0 %.172 = select i1 %911, float %891, float 0.000000e+00 %912 = fcmp oge float %896, 0.000000e+00 %913 = sext i1 %912 to i32 %914 = bitcast i32 %913 to float %915 = bitcast float %914 to i32 %916 = icmp ne i32 %915, 0 %temp28.1 = select i1 %916, float %892, float 0.000000e+00 %917 = fadd float %temp68.0, %.170 %918 = fadd float %.172, %temp76.0 %919 = fadd float %temp28.1, %.171 %920 = fmul float %917, %862 %921 = fadd float %920, %252 %922 = fmul float %918, %862 %923 = fadd float %922, %253 %924 = fmul float %919, %862 %925 = fadd float %924, %254 %926 = fmul float %252, %31 %927 = fmul float %253, %32 %928 = fmul float %254, %33 %929 = fmul float %719, %926 %930 = fmul float %719, %927 %931 = fmul float %719, %928 %932 = fsub float -0.000000e+00, %645 %933 = fsub float -0.000000e+00, %645 %934 = fsub float -0.000000e+00, %645 %935 = fsub float -0.000000e+00, %645 %936 = fcmp oge float %932, 0.000000e+00 %937 = sext i1 %936 to i32 %938 = bitcast i32 %937 to float %939 = bitcast float %938 to i32 %940 = icmp ne i32 %939, 0 %.173 = select i1 %940, float %929, float 0.000000e+00 %941 = fcmp oge float %933, 0.000000e+00 %942 = sext i1 %941 to i32 %943 = bitcast i32 %942 to float %944 = bitcast float %943 to i32 %945 = icmp ne i32 %944, 0 %946 = fcmp oge float %934, 0.000000e+00 %947 = sext i1 %946 to i32 %948 = bitcast i32 %947 to float %949 = bitcast float %948 to i32 %950 = icmp ne i32 %949, 0 %temp56.2 = select i1 %950, float %930, float 0.000000e+00 %951 = fcmp oge float %935, 0.000000e+00 %952 = sext i1 %951 to i32 %953 = bitcast i32 %952 to float %954 = bitcast float %953 to i32 %955 = icmp ne i32 %954, 0 %.174 = select i1 %955, float %931, float 0.000000e+00 %956 = fmul float %921, %527 %957 = fadd float %956, %.173 %958 = fmul float %923, %527 %959 = fadd float %958, %temp56.2 %960 = fmul float %925, %527 %961 = fadd float %960, %.174 %962 = fmul float %850, %89 %963 = fadd float %962, %957 %964 = fmul float %851, %89 %965 = fadd float %964, %959 %966 = fmul float %852, %89 %967 = fadd float %966, %961 %968 = fmul float %703, %703 %969 = fmul float %703, %968 %970 = fmul float %232, %969 %971 = fmul float %970, %252 %972 = fadd float %971, %963 %973 = fmul float %970, %253 %974 = fadd float %973, %965 %975 = fmul float %970, %254 %976 = fadd float %975, %967 %977 = fmul float %68, %143 %978 = fmul float %69, %143 %979 = fmul float %70, %143 %980 = fmul float %142, %65 %981 = fadd float %980, %977 %982 = fmul float %142, %66 %983 = fadd float %982, %978 %984 = fmul float %142, %67 %985 = fadd float %984, %979 %986 = fmul float %144, %71 %987 = fadd float %986, %981 %988 = fmul float %144, %72 %989 = fadd float %988, %983 %990 = fmul float %144, %73 %991 = fadd float %990, %985 %992 = fadd float %987, %74 %993 = fadd float %989, %75 %994 = fadd float %991, %76 %995 = fdiv float 1.000000e+00, %994 %996 = fmul float %995, %992 %997 = fmul float %995, %993 %998 = fmul float %996, 5.000000e-01 %999 = fadd float %998, -5.000000e-01 %1000 = fmul float %997, -5.000000e-01 %1001 = fadd float %1000, -5.000000e-01 %1002 = fmul float %999, 1.000000e+00 %1003 = fadd float %1002, 0.000000e+00 %1004 = fmul float %1001, -1.000000e+00 %1005 = fadd float %1004, 1.000000e+00 %1006 = bitcast float %1003 to i32 %1007 = bitcast float %1005 to i32 %1008 = insertelement <2 x i32> undef, i32 %1006, i32 0 %1009 = insertelement <2 x i32> %1008, i32 %1007, i32 1 %1010 = bitcast <8 x i32> %131 to <32 x i8> %1011 = bitcast <4 x i32> %133 to <16 x i8> %1012 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1009, <32 x i8> %1010, <16 x i8> %1011, i32 2) %1013 = extractelement <4 x float> %1012, i32 0 %1014 = call float @llvm.AMDIL.clamp.(float %1013, float 0.000000e+00, float 1.000000e+00) %1015 = fmul float %1014, %85 %1016 = fmul float %1015, %972 %1017 = fmul float %1015, %974 %1018 = fmul float %1015, %976 %1019 = fmul float %1016, 0xBFE570A3E0000000 %1020 = fadd float %1019, %972 %1021 = fmul float %1017, 0xBFE570A3E0000000 %1022 = fadd float %1021, %974 %1023 = fmul float %1018, 0xBFE570A3E0000000 %1024 = fadd float %1023, %976 %1025 = fmul float %840, %255 %1026 = fadd float %1025, %255 %1027 = fmul float %840, %255 %1028 = fadd float %1027, %255 %1029 = fmul float %840, %255 %1030 = fadd float %1029, %255 %1031 = fmul float %840, %255 %1032 = fadd float %1031, %255 %1033 = call float @llvm.AMDIL.clamp.(float %1026, float 0.000000e+00, float 1.000000e+00) %1034 = call float @llvm.AMDIL.clamp.(float %1028, float 0.000000e+00, float 1.000000e+00) %1035 = call float @llvm.AMDIL.clamp.(float %1030, float 0.000000e+00, float 1.000000e+00) %1036 = call float @llvm.AMDIL.clamp.(float %1032, float 0.000000e+00, float 1.000000e+00) %1037 = call float @fabs(float %1020) %1038 = call float @llvm.log2.f32(float %1037) %1039 = call float @fabs(float %1022) %1040 = call float @llvm.log2.f32(float %1039) %1041 = call float @fabs(float %1024) %1042 = call float @llvm.log2.f32(float %1041) %1043 = fmul float %1038, 0x3FDD1743E0000000 %1044 = fmul float %1040, 0x3FDD1743E0000000 %1045 = fmul float %1042, 0x3FDD1743E0000000 %1046 = call float @llvm.AMDIL.exp.(float %1043) %1047 = call float @llvm.AMDIL.exp.(float %1044) %1048 = call float @llvm.AMDIL.exp.(float %1045) %1049 = fmul float %147, 1.000000e+00 %1050 = fadd float %1049, 0.000000e+00 %1051 = fmul float %148, -1.000000e+00 %1052 = fadd float %1051, 1.000000e+00 %1053 = bitcast float %1050 to i32 %1054 = bitcast float %1052 to i32 %1055 = insertelement <2 x i32> undef, i32 %1053, i32 0 %1056 = insertelement <2 x i32> %1055, i32 %1054, i32 1 %1057 = bitcast <8 x i32> %119 to <32 x i8> %1058 = bitcast <4 x i32> %121 to <16 x i8> %1059 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1056, <32 x i8> %1057, <16 x i8> %1058, i32 2) %1060 = extractelement <4 x float> %1059, i32 0 %1061 = extractelement <4 x float> %1059, i32 1 %1062 = extractelement <4 x float> %1059, i32 2 %1063 = extractelement <4 x float> %1059, i32 3 %1064 = fsub float -0.000000e+00, %1063 %1065 = fadd float %1064, 1.000000e+00 %1066 = fmul float %1046, %1065 %1067 = fadd float %1066, %1060 %1068 = fmul float %1047, %1065 %1069 = fadd float %1068, %1061 %1070 = fmul float %1048, %1065 %1071 = fadd float %1070, %1062 %1072 = fadd float %1067, 0xBFD99999A0000000 %1073 = fadd float %1069, 0xBFD99999A0000000 %1074 = fadd float %1071, 0xBFD99999A0000000 %1075 = fmul float %162, %1072 %1076 = fadd float %1075, 0x3FD99999A0000000 %1077 = fmul float %162, %1073 %1078 = fadd float %1077, 0x3FD99999A0000000 %1079 = fmul float %162, %1074 %1080 = fadd float %1079, 0x3FD99999A0000000 %1081 = bitcast float %1076 to i32 %1082 = bitcast float %1078 to i32 %1083 = bitcast float %1080 to i32 %1084 = insertelement <4 x i32> undef, i32 %1081, i32 0 %1085 = insertelement <4 x i32> %1084, i32 %1082, i32 1 %1086 = insertelement <4 x i32> %1085, i32 %1083, i32 2 %1087 = insertelement <4 x i32> %1086, i32 undef, i32 3 %1088 = bitcast <8 x i32> %127 to <32 x i8> %1089 = bitcast <4 x i32> %129 to <16 x i8> %1090 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1087, <32 x i8> %1088, <16 x i8> %1089, i32 3) %1091 = extractelement <4 x float> %1090, i32 0 %1092 = extractelement <4 x float> %1090, i32 1 %1093 = extractelement <4 x float> %1090, i32 2 %1094 = fsub float -0.000000e+00, %162 %1095 = fmul float %163, %1094 %1096 = fadd float %1095, %162 %1097 = call float @llvm.AMDGPU.lrp(float %1096, float %1091, float %1076) %1098 = call float @llvm.AMDGPU.lrp(float %1096, float %1092, float %1078) %1099 = call float @llvm.AMDGPU.lrp(float %1096, float %1093, float %1080) %1100 = fdiv float 1.000000e+00, %146 %1101 = fmul float %1100, %145 %1102 = fmul float %1101, 5.000000e-01 %1103 = fadd float %1102, 5.000000e-01 %1104 = bitcast float %1103 to i32 %1105 = bitcast float %49 to i32 %1106 = insertelement <2 x i32> undef, i32 %1104, i32 0 %1107 = insertelement <2 x i32> %1106, i32 %1105, i32 1 %1108 = bitcast <8 x i32> %115 to <32 x i8> %1109 = bitcast <4 x i32> %117 to <16 x i8> %1110 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1107, <32 x i8> %1108, <16 x i8> %1109, i32 2) %1111 = extractelement <4 x float> %1110, i32 0 %1112 = fsub float -0.000000e+00, %50 %1113 = fadd float %1112, %142 %1114 = fsub float -0.000000e+00, %51 %1115 = fadd float %1114, %143 %1116 = fsub float -0.000000e+00, %52 %1117 = fadd float %1116, %144 %1118 = fmul float %1113, %1113 %1119 = fmul float %1115, %1115 %1120 = fadd float %1119, %1118 %1121 = fmul float %1117, %1117 %1122 = fadd float %1120, %1121 %1123 = fmul float %1122, %48 %1124 = fmul float %1117, %47 %1125 = fmul float %1124, 0x3FF7154CA0000000 %1126 = call float @llvm.AMDIL.exp.(float %1125) %1127 = fsub float -0.000000e+00, %1126 %1128 = fadd float %1127, 1.000000e+00 %1129 = fmul float %1128, %1123 %1130 = fdiv float 1.000000e+00, %1117 %1131 = fmul float %1130, %1129 %1132 = fmul float %1131, 0x3FF7154CA0000000 %1133 = call float @llvm.AMDIL.exp.(float %1132) %1134 = call float @llvm.AMDIL.clamp.(float %1133, float 0.000000e+00, float 1.000000e+00) %1135 = fsub float -0.000000e+00, %1134 %1136 = fadd float %1135, 1.000000e+00 %1137 = fmul float %1136, %1111 %1138 = fsub float -0.000000e+00, %1097 %1139 = fadd float %1138, %86 %1140 = fsub float -0.000000e+00, %1098 %1141 = fadd float %1140, %87 %1142 = fsub float -0.000000e+00, %1099 %1143 = fadd float %1142, %88 %1144 = fmul float %1137, %1139 %1145 = fadd float %1144, %1097 %1146 = fmul float %1137, %1141 %1147 = fadd float %1146, %1098 %1148 = fmul float %1137, %1143 %1149 = fadd float %1148, %1099 %1150 = call i32 @llvm.SI.packf16(float %1145, float %1147) %1151 = bitcast i32 %1150 to float %1152 = call i32 @llvm.SI.packf16(float %1149, float %1036) %1153 = bitcast i32 %1152 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1151, float %1153, float %1151, float %1153) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #4 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } attributes #4 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v11, v0, 1, 6, [m0] ; C82C1900 v_interp_p2_f32 v11, [v11], v1, 1, 6, [m0] ; C82D1901 v_sub_f32_e32 v3, 1.0, v11 ; 080616F2 v_interp_p1_f32 v12, v0, 0, 6, [m0] ; C8301800 v_interp_p2_f32 v12, [v12], v1, 0, 6, [m0] ; C8311801 v_add_f32_e32 v2, 0, v12 ; 06041880 s_load_dwordx4 s[8:11], s[4:5], 0x20 ; C0840520 s_load_dwordx8 s[12:19], s[6:7], 0x40 ; C0C60740 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430402 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_ge_f32_e64 s[0:1], -|v4|, 0 ; D00C0100 20010104 v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; D2000002 00018280 v_cmp_ne_i32_e64 s[0:1], v2, 0 ; D10A0000 00010102 v_mov_b32_e32 v2, 0x80000000 ; 7E0402FF 80000000 v_cndmask_b32_e64 v2, v2, -1.0, s[0:1] ; D2000002 1001E702 v_cmp_lt_f32_e64 s[16:17], v2, 0 ; D0020010 00010102 v_interp_p1_f32 v14, v0, 3, 6, [m0] ; C8381B00 v_interp_p2_f32 v14, [v14], v1, 3, 6, [m0] ; C8391B01 v_interp_p1_f32 v13, v0, 2, 6, [m0] ; C8341A00 v_interp_p2_f32 v13, [v13], v1, 2, 6, [m0] ; C8351A01 v_interp_p1_f32 v2, v0, 2, 5, [m0] ; C8081600 v_interp_p2_f32 v2, [v2], v1, 2, 5, [m0] ; C8091601 v_interp_p1_f32 v3, v0, 1, 5, [m0] ; C80C1500 v_interp_p2_f32 v3, [v3], v1, 1, 5, [m0] ; C80D1501 v_interp_p1_f32 v8, v0, 2, 4, [m0] ; C8201200 v_interp_p2_f32 v8, [v8], v1, 2, 4, [m0] ; C8211201 v_interp_p1_f32 v9, v0, 1, 4, [m0] ; C8241100 v_interp_p2_f32 v9, [v9], v1, 1, 4, [m0] ; C8251101 v_interp_p1_f32 v10, v0, 0, 4, [m0] ; C8281000 v_interp_p2_f32 v10, [v10], v1, 0, 4, [m0] ; C8291001 v_interp_p1_f32 v16, v0, 1, 3, [m0] ; C8400D00 v_interp_p2_f32 v16, [v16], v1, 1, 3, [m0] ; C8410D01 v_interp_p1_f32 v15, v0, 0, 3, [m0] ; C83C0C00 v_interp_p2_f32 v15, [v15], v1, 0, 3, [m0] ; C83D0C01 v_interp_p1_f32 v18, v0, 1, 2, [m0] ; C8480900 v_interp_p2_f32 v18, [v18], v1, 1, 2, [m0] ; C8490901 v_interp_p1_f32 v17, v0, 0, 2, [m0] ; C8440800 v_interp_p2_f32 v17, [v17], v1, 0, 2, [m0] ; C8450801 v_interp_p1_f32 v23, v0, 1, 1, [m0] ; C85C0500 v_interp_p2_f32 v23, [v23], v1, 1, 1, [m0] ; C85D0501 v_interp_p1_f32 v22, v0, 0, 1, [m0] ; C8580400 v_interp_p2_f32 v22, [v22], v1, 0, 1, [m0] ; C8590401 v_interp_p1_f32 v25, v0, 1, 0, [m0] ; C8640100 v_interp_p2_f32 v25, [v25], v1, 1, 0, [m0] ; C8650101 v_interp_p1_f32 v24, v0, 0, 0, [m0] ; C8600000 v_interp_p2_f32 v24, [v24], v1, 0, 0, [m0] ; C8610001 s_load_dwordx4 s[28:31], s[2:3], 0x0 ; C08E0300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s51, s[28:31], 0x5c ; C2199D5C s_buffer_load_dword s0, s[28:31], 0x5a ; C2001D5A s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 0 ; 04830000 s_buffer_load_dword s0, s[28:31], 0x59 ; C2001D59 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 2 ; 04830400 s_buffer_load_dword s0, s[28:31], 0x58 ; C2001D58 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 1 ; 04830200 s_buffer_load_dword s0, s[28:31], 0x54 ; C2001D54 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 22 ; 04832C00 s_buffer_load_dword s24, s[28:31], 0x51 ; C20C1D51 s_buffer_load_dword s18, s[28:31], 0x50 ; C2091D50 s_buffer_load_dword s8, s[28:31], 0x4d ; C2041D4D s_buffer_load_dword s19, s[28:31], 0x4c ; C2099D4C s_buffer_load_dword s11, s[28:31], 0x49 ; C2059D49 s_buffer_load_dword s9, s[28:31], 0x48 ; C2049D48 s_buffer_load_dword s12, s[28:31], 0x45 ; C2061D45 s_buffer_load_dword s10, s[28:31], 0x44 ; C2051D44 s_buffer_load_dword s0, s[28:31], 0x43 ; C2001D43 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 25 ; 04833200 s_buffer_load_dword s0, s[28:31], 0x41 ; C2001D41 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 29 ; 04833A00 s_buffer_load_dword s0, s[28:31], 0x40 ; C2001D40 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 23 ; 04832E00 s_buffer_load_dword s0, s[28:31], 0x3f ; C2001D3F s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 28 ; 04833800 s_buffer_load_dword s0, s[28:31], 0x3d ; C2001D3D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 31 ; 04833E00 s_buffer_load_dword s0, s[28:31], 0x3c ; C2001D3C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 24 ; 04833000 s_buffer_load_dword s0, s[28:31], 0x3b ; C2001D3B s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 32 ; 04834000 s_buffer_load_dword s0, s[28:31], 0x39 ; C2001D39 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 34 ; 04834400 s_buffer_load_dword s0, s[28:31], 0x38 ; C2001D38 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 27 ; 04833600 s_buffer_load_dword s0, s[28:31], 0x37 ; C2001D37 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 33 ; 04834200 s_buffer_load_dword s0, s[28:31], 0x35 ; C2001D35 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 35 ; 04834600 s_buffer_load_dword s0, s[28:31], 0x34 ; C2001D34 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 30 ; 04833C00 s_buffer_load_dword s0, s[28:31], 0x33 ; C2001D33 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 42 ; 04835400 s_buffer_load_dword s0, s[28:31], 0x31 ; C2001D31 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 50 ; 04836400 s_buffer_load_dword s0, s[28:31], 0x30 ; C2001D30 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 40 ; 04835000 s_buffer_load_dword s0, s[28:31], 0x2f ; C2001D2F s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 45 ; 04835A00 s_buffer_load_dword s0, s[28:31], 0x2d ; C2001D2D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 51 ; 04836600 s_buffer_load_dword s0, s[28:31], 0x2c ; C2001D2C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 41 ; 04835200 s_buffer_load_dword s0, s[28:31], 0x2b ; C2001D2B s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 52 ; 04836800 s_buffer_load_dword s0, s[28:31], 0x29 ; C2001D29 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 53 ; 04836A00 s_buffer_load_dword s0, s[28:31], 0x28 ; C2001D28 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 43 ; 04835600 s_buffer_load_dword s100, s[28:31], 0x27 ; C2321D27 s_buffer_load_dword s0, s[28:31], 0x25 ; C2001D25 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 54 ; 04836C00 s_buffer_load_dword s0, s[28:31], 0x24 ; C2001D24 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 44 ; 04835800 s_buffer_load_dword s67, s[28:31], 0x22 ; C2219D22 s_buffer_load_dword s99, s[28:31], 0x21 ; C2319D21 s_buffer_load_dword s98, s[28:31], 0x20 ; C2311D20 s_buffer_load_dword s13, s[28:31], 0x1f ; C2069D1F s_buffer_load_dword s0, s[28:31], 0x1d ; C2001D1D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 3 ; 04830600 s_buffer_load_dword s0, s[28:31], 0x1c ; C2001D1C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 4 ; 04830800 s_buffer_load_dword s91, s[28:31], 0x1b ; C22D9D1B s_buffer_load_dword s96, s[28:31], 0x1a ; C2301D1A s_buffer_load_dword s20, s[28:31], 0x19 ; C20A1D19 s_buffer_load_dword s101, s[28:31], 0x18 ; C2329D18 s_buffer_load_dword s35, s[28:31], 0x17 ; C2119D17 s_buffer_load_dword s66, s[28:31], 0x16 ; C2211D16 s_buffer_load_dword s34, s[28:31], 0x15 ; C2111D15 s_buffer_load_dword s32, s[28:31], 0x14 ; C2101D14 s_buffer_load_dword s23, s[28:31], 0x13 ; C20B9D13 s_buffer_load_dword s90, s[28:31], 0x12 ; C22D1D12 s_buffer_load_dword s0, s[28:31], 0x11 ; C2001D11 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 26 ; 04833400 s_buffer_load_dword s0, s[28:31], 0x10 ; C2001D10 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s0, 17 ; 04832200 s_buffer_load_dword s49, s[28:31], 0xf ; C2189D0F s_buffer_load_dword s89, s[28:31], 0xe ; C22C9D0E s_buffer_load_dword s33, s[28:31], 0xd ; C2109D0D s_buffer_load_dword s64, s[28:31], 0xc ; C2201D0C s_buffer_load_dword s21, s[28:31], 0xa ; C20A9D0A s_buffer_load_dword s65, s[28:31], 0x9 ; C2209D09 s_buffer_load_dword s22, s[28:31], 0x8 ; C20B1D08 s_buffer_load_dword s97, s[28:31], 0x6 ; C2309D06 s_buffer_load_dword s48, s[28:31], 0x5 ; C2181D05 s_buffer_load_dword s88, s[28:31], 0x4 ; C22C1D04 s_buffer_load_dword s50, s[28:31], 0x0 ; C2191D00 v_mov_b32_e32 v1, s13 ; 7E02020D v_mov_b32_e32 v19, s96 ; 7E260260 v_mov_b32_e32 v21, s20 ; 7E2A0214 v_mov_b32_e32 v20, s101 ; 7E280265 v_mov_b32_e32 v26, s66 ; 7E340242 v_mov_b32_e32 v28, s34 ; 7E380222 v_mov_b32_e32 v27, s32 ; 7E360220 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[14:15], s[16:17] ; BE8E2410 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E v_mul_f32_e32 v29, s11, v9 ; 103A120B v_mad_f32 v29, v10, s12, v29 ; D282001D 0474190A v_mad_f32 v29, v8, s8, v29 ; D282001D 04741108 v_add_f32_e32 v29, s24, v29 ; 063A3A18 v_sub_f32_e32 v30, 1.0, v29 ; 083C3AF2 v_mul_f32_e32 v31, s9, v9 ; 103E1209 v_mad_f32 v31, v10, s10, v31 ; D282001F 047C150A v_mad_f32 v31, v8, s19, v31 ; D282001F 047C2708 v_add_f32_e32 v31, s18, v31 ; 063E3E12 v_add_f32_e32 v29, 0, v31 ; 063A3E80 s_load_dwordx4 s[60:63], s[4:5], 0x0 ; C09E0500 s_load_dwordx4 s[16:19], s[4:5], 0x4 ; C0880504 s_load_dwordx4 s[76:79], s[4:5], 0x8 ; C0A60508 s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C s_load_dwordx4 s[92:95], s[4:5], 0x10 ; C0AE0510 s_load_dwordx4 s[44:47], s[4:5], 0x14 ; C0960514 s_load_dwordx4 s[8:11], s[4:5], 0x18 ; C0840518 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s8, 5 ; 04830A08 v_writelane_b32 v65, s9, 6 ; 04830C09 v_writelane_b32 v65, s10, 7 ; 04830E0A v_writelane_b32 v65, s11, 8 ; 0483100B s_load_dwordx4 s[8:11], s[4:5], 0x1c ; C084051C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s8, 36 ; 04834808 v_writelane_b32 v65, s9, 37 ; 04834A09 v_writelane_b32 v65, s10, 38 ; 04834C0A v_writelane_b32 v65, s11, 39 ; 04834E0B s_load_dwordx4 s[8:11], s[4:5], 0x24 ; C0840524 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s8, 18 ; 04832408 v_writelane_b32 v65, s9, 19 ; 04832609 v_writelane_b32 v65, s10, 20 ; 0483280A v_writelane_b32 v65, s11, 21 ; 04832A0B s_load_dwordx4 s[8:11], s[4:5], 0x28 ; C0840528 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s8, 46 ; 04835C08 v_writelane_b32 v65, s9, 47 ; 04835E09 v_writelane_b32 v65, s10, 48 ; 0483600A v_writelane_b32 v65, s11, 49 ; 0483620B s_load_dwordx8 s[8:15], s[6:7], 0x0 ; C0C40700 s_load_dwordx8 s[68:75], s[6:7], 0x8 ; C0E20708 s_load_dwordx8 s[80:87], s[6:7], 0x10 ; C0E80710 s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718 s_load_dwordx8 s[24:31], s[6:7], 0x20 ; C0CC0720 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s24, 55 ; 04836E18 v_writelane_b32 v65, s25, 56 ; 04837019 v_writelane_b32 v65, s26, 57 ; 0483721A v_writelane_b32 v65, s27, 58 ; 0483741B v_writelane_b32 v65, s28, 59 ; 0483761C v_writelane_b32 v65, s29, 60 ; 0483781D v_writelane_b32 v65, s30, 61 ; 04837A1E v_writelane_b32 v65, s31, 62 ; 04837C1F s_load_dwordx8 s[24:31], s[6:7], 0x28 ; C0CC0728 s_load_dwordx8 s[36:43], s[6:7], 0x30 ; C0D20730 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v65, s36, 9 ; 04831224 v_writelane_b32 v65, s37, 10 ; 04831425 v_writelane_b32 v65, s38, 11 ; 04831626 v_writelane_b32 v65, s39, 12 ; 04831827 v_writelane_b32 v65, s40, 13 ; 04831A28 v_writelane_b32 v65, s41, 14 ; 04831C29 v_writelane_b32 v65, s42, 15 ; 04831E2A v_writelane_b32 v65, s43, 16 ; 0483202B s_load_dwordx8 s[36:43], s[6:7], 0x38 ; C0D20738 image_sample v[29:30], 3, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[52:59], s[0:3] ; F0800300 000D1D1D s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v31, 2.0, v30, -1.0 ; D282001F 03CE3CF4 v_mad_f32 v29, 2.0, v29, -1.0 ; D282001D 03CE3AF4 v_subrev_f32_e32 v30, v31, v29 ; 0A3C3B1F v_add_f32_e32 v32, 2.0, v30 ; 06403CF4 v_mul_f32_e32 v32, v32, v32 ; 10404120 v_sub_f32_e64 v29, -v31, v29 ; D208001D 20023B1F v_add_f32_e32 v31, 2.0, v29 ; 063E3AF4 v_mul_f32_e32 v31, v31, v31 ; 103E3F1F v_mul_f32_e32 v31, v31, v31 ; 103E3F1F v_mad_f32 v33, v32, v32, v31 ; D2820021 047E4120 v_sub_f32_e32 v30, 2.0, v30 ; 083C3CF4 v_mul_f32_e32 v30, v30, v30 ; 103C3D1E v_mad_f32 v33, v30, v30, v33 ; D2820021 04863D1E v_sub_f32_e32 v29, 2.0, v29 ; 083A3AF4 v_mul_f32_e32 v29, v29, v29 ; 103A3B1D v_mad_f32 v33, v29, v29, v33 ; D2820021 04863B1D v_rcp_f32_e32 v33, v33 ; 7E425521 v_mul_f32_e32 v32, v32, v32 ; 10404120 v_mul_f32_e32 v32, v32, v33 ; 10404320 v_rcp_f32_e32 v34, s50 ; 7E445432 v_mul_f32_e32 v32, v32, v34 ; 10404520 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[8:15], s[60:63] ; F0800F00 01E22318 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v39, 2.0, v36, -1.0 ; D2820027 03CE48F4 v_mul_f32_e32 v40, v39, v32 ; 10504127 v_mad_f32 v41, v32, v39, v40 ; D2820029 04A24F20 v_mul_f32_e32 v31, v31, v33 ; 103E431F v_mul_f32_e32 v31, v31, v34 ; 103E451F v_mul_f32_e32 v42, v31, v31 ; 10543F1F image_sample v[43:46], 15, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[8:15], s[60:63] ; F0800F00 01E22B16 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v47, v46, v42 ; 105E552E v_mul_f32_e32 v48, v32, v32 ; 10604120 v_mad_f32 v47, v38, v48, v47 ; D282002F 04BE6126 v_mad_f32 v49, 2.0, v44, -1.0 ; D2820031 03CE58F4 v_mul_f32_e32 v50, v49, v31 ; 10643F31 v_mad_f32 v47, v50, v41, v47 ; D282002F 04BE5332 v_mul_f32_e32 v30, v30, v30 ; 103C3D1E v_mul_f32_e32 v30, v30, v33 ; 103C431E v_mul_f32_e32 v30, v30, v34 ; 103C451E v_mul_f32_e32 v51, v30, v30 ; 10663D1E image_sample v[52:55], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[8:15], s[60:63] ; F0800F00 01E23411 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v47, v55, v51, v47 ; D282002F 04BE6737 v_mad_f32 v56, -2.0, v53, 1.0 ; D2820038 03CA6AF5 v_mul_f32_e32 v57, v56, v30 ; 10723D38 v_mad_f32 v47, v41, v57, v47 ; D282002F 04BE7329 v_mad_f32 v49, v31, v49, v50 ; D2820031 04CA631F v_mad_f32 v47, v57, v49, v47 ; D282002F 04BE6339 v_mul_f32_e32 v29, v29, v29 ; 103A3B1D v_mul_f32_e32 v29, v29, v33 ; 103A431D v_mul_f32_e32 v29, v29, v34 ; 103A451D v_mul_f32_e32 v33, v29, v29 ; 10423B1D image_sample v[58:61], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[8:15], s[60:63] ; F0800F00 01E23A0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v34, v61, v33, v47 ; D2820022 04BE433D v_mad_f32 v47, -2.0, v59, 1.0 ; D282002F 03CA76F5 v_mul_f32_e32 v62, v47, v29 ; 107C3B2F v_mad_f32 v34, v41, v62, v34 ; D2820022 048A7D29 v_mad_f32 v34, v49, v62, v34 ; D2820022 048A7D31 v_mul_f32_e32 v41, v62, v57 ; 1052733E v_mad_f32 v34, 2.0, v41, v34 ; D2820022 048A52F4 v_mad_f32 v39, v32, v39, v50 ; D2820027 04CA4F20 v_mad_f32 v39, v30, v56, v39 ; D2820027 049E711E v_mad_f32 v39, v29, v47, v39 ; D2820027 049E5F1D v_mad_f32 v34, -v39, v39, v34 ; D2820022 248A4F27 v_rcp_f32_e32 v41, s49 ; 7E525431 v_add_f32_e32 v47, v41, v34 ; 065E4529 v_mad_f32 v49, 2.0, v35, -1.0 ; D2820031 03CE46F4 v_mul_f32_e32 v56, v49, v32 ; 10704131 v_mad_f32 v63, v32, v49, v56 ; D282003F 04E26320 v_mul_f32_e32 v64, v45, v42 ; 1080552D v_mad_f32 v35, v37, v48, v64 ; D2820023 05026125 v_mad_f32 v36, -2.0, v43, 1.0 ; D2820024 03CA56F5 v_mul_f32_e32 v37, v36, v31 ; 104A3F24 v_mad_f32 v35, v37, v63, v35 ; D2820023 048E7F25 v_mad_f32 v35, v54, v51, v35 ; D2820023 048E6736 v_mad_f32 v38, -2.0, v52, 1.0 ; D2820026 03CA68F5 v_mul_f32_e32 v43, v38, v30 ; 10563D26 v_mad_f32 v35, v63, v43, v35 ; D2820023 048E573F v_mad_f32 v31, v31, v36, v37 ; D282001F 0496491F v_mad_f32 v35, v43, v31, v35 ; D2820023 048E3F2B v_mad_f32 v35, v60, v33, v35 ; D2820023 048E433C v_mad_f32 v36, 2.0, v58, -1.0 ; D2820024 03CE74F4 v_mul_f32_e32 v44, v36, v29 ; 10583B24 v_mad_f32 v35, v63, v44, v35 ; D2820023 048E593F v_mad_f32 v31, v31, v44, v35 ; D282001F 048E591F v_mul_f32_e32 v35, v44, v43 ; 1046572C v_mad_f32 v31, 2.0, v35, v31 ; D282001F 047E46F4 v_mad_f32 v32, v32, v49, v37 ; D2820020 04966320 v_mad_f32 v30, v30, v38, v32 ; D282001E 04824D1E v_mad_f32 v29, v29, v36, v30 ; D282001D 047A491D v_mad_f32 v30, -v29, v29, v31 ; D282001E 247E3B1D v_add_f32_e32 v31, v41, v30 ; 063E3D29 v_mul_f32_e32 v32, v37, v40 ; 10405125 v_mad_f32 v32, v56, v50, v32 ; D2820020 04826538 image_sample v22, 1, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[68:75], s[16:19] ; F0800100 00911616 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v22, -2.0, v22, 1.0 ; D2820016 03CA2CF5 v_mul_f32_e32 v22, v22, v42 ; 102C5516 image_sample v23, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[68:75], s[16:19] ; F0800100 00911718 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, 2.0, v23, -1.0 ; D2820017 03CE2EF4 v_mad_f32 v22, v23, v48, v22 ; D2820016 045A6117 v_add_f32_e32 v22, v32, v22 ; 062C2D20 image_sample v17, 1, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[68:75], s[16:19] ; F0800100 00911111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v17, 2.0, v17, -1.0 ; D2820011 03CE22F4 v_mad_f32 v17, v17, v51, v22 ; D2820011 045A6711 v_mul_f32_e32 v18, v43, v40 ; 1024512B v_mad_f32 v18, v56, v57, v18 ; D2820012 044A7338 v_add_f32_e32 v17, v18, v17 ; 06222312 v_mul_f32_e32 v18, v43, v50 ; 1024652B v_mad_f32 v18, v37, v57, v18 ; D2820012 044A7325 v_add_f32_e32 v17, v18, v17 ; 06222312 image_sample v15, 1, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[68:75], s[16:19] ; F0800100 00910F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, -2.0, v15, 1.0 ; D282000F 03CA1EF5 v_mad_f32 v15, v15, v33, v17 ; D282000F 0446430F v_mul_f32_e32 v16, v44, v40 ; 1020512C v_mad_f32 v16, v56, v62, v16 ; D2820010 04427D38 v_add_f32_e32 v15, v15, v16 ; 061E210F v_mul_f32_e32 v16, v44, v50 ; 1020652C v_mad_f32 v16, v37, v62, v16 ; D2820010 04427D25 v_add_f32_e32 v15, v16, v15 ; 061E1F10 v_mul_f32_e32 v16, v44, v57 ; 1020732C v_mad_f32 v16, v43, v62, v16 ; D2820010 04427D2B v_add_f32_e32 v15, v16, v15 ; 061E1F10 v_mad_f32 v15, -v29, v39, v15 ; D282000F 243E4F1D v_mul_f32_e32 v16, v15, v15 ; 10201F0F v_mad_f32 v17, v31, v47, -v16 ; D2820011 84425F1F v_max_f32_e32 v18, 0x33d6bf95, v17 ; 202422FF 33D6BF95 v_rsq_clamp_f32_e32 v18, v18 ; 7E245912 v_rcp_f32_e32 v22, v17 ; 7E2C5511 v_add_f32_e32 v15, v15, v15 ; 061E1F0F v_sub_f32_e32 v23, s99, v9 ; 082E1263 v_sub_f32_e32 v24, s98, v10 ; 08301462 v_mul_f32_e32 v25, v24, v24 ; 10323118 v_mad_f32 v25, v23, v23, v25 ; D2820019 04662F17 v_sub_f32_e32 v32, s67, v8 ; 08401043 v_mad_f32 v25, v32, v32, v25 ; D2820019 04664120 v_max_f32_e32 v25, 0x33d6bf95, v25 ; 203232FF 33D6BF95 v_rsq_clamp_f32_e32 v25, v25 ; 7E325919 v_mad_f32 v33, v25, v23, -s34 ; D2820021 808A2F19 v_mad_f32 v28, v33, s35, v28 ; D282001C 04704721 v_mad_f32 v33, v25, v24, -s32 ; D2820021 80823119 v_mad_f32 v27, v33, s35, v27 ; D282001B 046C4721 v_mul_f32_e32 v33, v27, v27 ; 1042371B v_mad_f32 v33, v28, v28, v33 ; D2820021 0486391C v_mad_f32 v35, v25, v32, -s66 ; D2820023 810A4119 v_mad_f32 v26, v35, s35, v26 ; D282001A 04684723 v_mad_f32 v33, v26, v26, v33 ; D2820021 0486351A v_max_f32_e32 v33, 0x33d6bf95, v33 ; 204242FF 33D6BF95 v_rsq_clamp_f32_e32 v33, v33 ; 7E425921 v_mad_f32 v27, v27, v33, s88 ; D282001B 0162431B v_mad_f32 v26, v26, v33, s97 ; D282001A 0186431A v_rcp_f32_e32 v26, v26 ; 7E34551A v_mad_f32 v27, v27, v26, -v29 ; D282001B 8476351B v_mul_f32_e32 v35, v15, v27 ; 1046370F v_mad_f32 v28, v28, v33, s48 ; D282001C 00C2431C v_mad_f32 v26, v28, v26, -v39 ; D282001A 849E351C v_mad_f32 v28, v31, v26, -v35 ; D282001C 848E351F v_mul_f32_e32 v28, v28, v26 ; 1038351C v_mul_f32_e32 v27, v27, v27 ; 1036371B v_mad_f32 v28, v27, v47, v28 ; D282001C 04725F1B v_mul_f32_e32 v28, 0.5, v28 ; 103838F0 v_mul_f32_e32 v31, v28, v22 ; 103E2D1C v_mov_b32_e32 v33, 0xbfb8aa65 ; 7E4202FF BFB8AA65 v_mul_f32_e32 v31, v33, v31 ; 103E3F21 v_exp_f32_e32 v31, v31 ; 7E3E4B1F v_mul_f32_e32 v18, v18, v31 ; 10243F12 v_mul_f32_e32 v31, s89, v18 ; 103E2459 v_cmp_ge_f32_e64 s[0:1], -v17, 0 ; D00C0000 20010111 v_mov_b32_e32 v43, 0 ; 7E560280 v_cndmask_b32_e64 v17, 0, -1, s[0:1] ; D2000811 00018280 v_cmp_ne_i32_e64 s[0:1], v17, 0 ; D10A0000 00010111 v_cndmask_b32_e64 v17, 0, 1.0, s[0:1] ; D2000811 0001E480 v_mov_b32_e32 v36, 0xc1800000 ; 7E4802FF C1800000 v_mad_f32 v22, v22, v28, v36 ; D2820016 04923916 v_cmp_ge_f32_e64 s[0:1], v22, 0 ; D00C0000 00010116 v_cndmask_b32_e64 v22, 0, -1, s[0:1] ; D2000016 00018280 v_cmp_ne_i32_e64 s[0:1], v22, 0 ; D10A0000 00010116 v_cndmask_b32_e64 v22, 0, 1.0, s[0:1] ; D2000016 0001E480 v_add_f32_e32 v17, v17, v22 ; 06222D11 v_cmp_ge_f32_e64 s[0:1], -v17, 0 ; D00C0000 20010111 v_cndmask_b32_e64 v17, 0, -1, s[0:1] ; D2000811 00018280 v_cmp_ne_i32_e64 s[4:5], v17, 0 ; D10A0004 00010111 v_cndmask_b32_e64 v17, 0, v31, s[4:5] ; D2000811 00123E80 v_rcp_f32_e32 v22, s23 ; 7E2C5417 v_add_f32_e32 v28, v34, v22 ; 06382D22 v_add_f32_e32 v22, v30, v22 ; 062C2D1E v_mad_f32 v31, v22, v28, -v16 ; D282001F 84423916 v_max_f32_e32 v37, 0x33d6bf95, v31 ; 204A3EFF 33D6BF95 v_rsq_clamp_f32_e32 v37, v37 ; 7E4A5925 v_rcp_f32_e32 v38, v31 ; 7E4C551F v_mad_f32 v40, v25, v23, -s20 ; D2820028 80522F19 v_mad_f32 v21, v40, s91, v21 ; D2820015 0454B728 v_mad_f32 v40, v25, v24, -s101 ; D2820028 81963119 v_mad_f32 v20, v40, s91, v20 ; D2820014 0450B728 v_mul_f32_e32 v40, v20, v20 ; 10502914 v_mad_f32 v40, v21, v21, v40 ; D2820028 04A22B15 v_mad_f32 v46, v25, v32, -s96 ; D282002E 81824119 v_mad_f32 v19, v46, s91, v19 ; D2820013 044CB72E v_mad_f32 v40, v19, v19, v40 ; D2820028 04A22713 v_max_f32_e32 v40, 0x33d6bf95, v40 ; 205050FF 33D6BF95 v_rsq_clamp_f32_e32 v40, v40 ; 7E505928 v_mad_f32 v20, v20, v40, s22 ; D2820014 005A5114 v_mad_f32 v19, v19, v40, s21 ; D2820013 00565113 v_rcp_f32_e32 v19, v19 ; 7E265513 v_mad_f32 v20, v20, v19, -v29 ; D2820014 84762714 v_mul_f32_e32 v15, v20, v15 ; 101E1F14 v_mad_f32 v21, v21, v40, s65 ; D2820015 01065115 v_mad_f32 v19, v21, v19, -v39 ; D2820013 849E2715 v_mad_f32 v15, v22, v19, -v15 ; D282000F 843E2716 v_mul_f32_e32 v15, v15, v19 ; 101E270F v_mul_f32_e32 v19, v20, v20 ; 10262914 v_mad_f32 v15, v19, v28, v15 ; D282000F 043E3913 v_mul_f32_e32 v15, 0.5, v15 ; 101E1EF0 v_mul_f32_e32 v19, v38, v15 ; 10261F26 v_mul_f32_e32 v20, v33, v19 ; 10282721 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v20, v20, v37 ; 10284B14 v_mul_f32_e32 v21, s90, v20 ; 102A285A v_cmp_ge_f32_e64 s[0:1], -v31, 0 ; D00C0000 2001011F v_cndmask_b32_e64 v22, 0, -1, s[0:1] ; D2000016 00018280 v_cmp_ne_i32_e64 s[0:1], v22, 0 ; D10A0000 00010116 v_cndmask_b32_e64 v22, 0, 1.0, s[0:1] ; D2000016 0001E480 v_mad_f32 v15, v15, v38, v36 ; D282000F 04924D0F v_cmp_ge_f32_e64 s[0:1], v15, 0 ; D00C0000 0001010F v_cndmask_b32_e64 v15, 0, -1, s[0:1] ; D200080F 00018280 v_cmp_ne_i32_e64 s[0:1], v15, 0 ; D10A0000 0001010F v_cndmask_b32_e64 v15, 0, 1.0, s[0:1] ; D200080F 0001E480 v_add_f32_e32 v15, v22, v15 ; 061E1F16 v_cmp_ge_f32_e64 s[0:1], -v15, 0 ; D00C0000 2001010F v_cndmask_b32_e64 v15, 0, -1, s[0:1] ; D200080F 00018280 v_cmp_ne_i32_e64 s[14:15], v15, 0 ; D10A000E 0001010F v_cndmask_b32_e64 v15, 0, v21, s[14:15] ; D200080F 003A2A80 v_add_f32_e32 v15, v17, v15 ; 061E1F11 v_mul_f32_e32 v17, v24, v25 ; 10223318 v_mul_f32_e32 v17, v17, v29 ; 10223B11 v_mul_f32_e32 v21, v23, v25 ; 102A3317 v_mad_f32 v17, v39, v21, v17 ; D2820011 04462B27 v_mad_f32 v17, v25, v32, v17 ; D2820011 04464119 v_sub_f32_e32 v17, 1.0, v17 ; 082222F2 v_mul_f32_e32 v21, v17, v17 ; 102A2311 v_mul_f32_e32 v21, v21, v21 ; 102A2B15 v_mul_f32_e32 v17, v21, v17 ; 10222315 image_sample v13, 8, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[24:31], s[44:47] ; F0800800 01660D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v13, 4.0, v13 ; 101A1AF6 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_sub_f32_e32 v42, 1.0, v13 ; 08541AF2 v_mov_b32_e32 v44, v43 ; 7E58032B v_readlane_b32 s16, v65, 55 ; 02216F41 v_readlane_b32 s17, v65, 56 ; 02237141 v_readlane_b32 s18, v65, 57 ; 02257341 v_readlane_b32 s19, v65, 58 ; 02277541 v_readlane_b32 s20, v65, 59 ; 02297741 v_readlane_b32 s21, v65, 60 ; 022B7941 v_readlane_b32 s22, v65, 61 ; 022D7B41 v_readlane_b32 s23, v65, 62 ; 022F7D41 s_nop 2 ; BF800002 image_sample_l v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[42:45], s[16:23], s[92:95] ; F0900F00 02E4152A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v14, v23 ; 7E1C4F17 v_mov_b32_e32 v25, 0x400ccccd ; 7E3202FF 400CCCCD v_mul_legacy_f32_e32 v14, v25, v14 ; 0E1C1D19 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mad_f32 v15, v15, v17, v14 ; D282000F 043A230F v_mov_b32_e32 v28, 0x41800000 ; 7E3802FF 41800000 v_mad_f32 v32, v41, v28, v34 ; D2820020 048A3929 v_mad_f32 v28, v41, v28, v30 ; D282001C 047A3929 v_mad_f32 v16, v28, v32, -v16 ; D2820010 8442411C v_max_f32_e32 v37, 0x33d6bf95, v16 ; 204A20FF 33D6BF95 v_rsq_clamp_f32_e32 v37, v37 ; 7E4A5925 v_rcp_f32_e32 v38, v16 ; 7E4C5510 v_mad_f32 v28, v28, v26, -v35 ; D282001C 848E351C v_mul_f32_e32 v26, v28, v26 ; 1034351C v_mad_f32 v26, v27, v32, v26 ; D282001A 046A411B v_mul_f32_e32 v26, 0.5, v26 ; 103434F0 v_mul_f32_e32 v27, v26, v38 ; 10364D1A v_mul_f32_e32 v27, v33, v27 ; 10363721 v_exp_f32_e32 v27, v27 ; 7E364B1B v_mul_f32_e32 v27, v27, v37 ; 10364B1B v_mul_f32_e32 v27, 0x3ba3d70a, v27 ; 103636FF 3BA3D70A v_mul_f32_e32 v28, s89, v14 ; 10381C59 v_mul_f32_e32 v28, v28, v27 ; 1038371C v_cmp_ge_f32_e64 s[0:1], -v16, 0 ; D00C0000 20010110 v_cndmask_b32_e64 v16, 0, -1, s[0:1] ; D2000010 00018280 v_cmp_ne_i32_e64 s[0:1], v16, 0 ; D10A0000 00010110 v_cndmask_b32_e64 v16, 0, 1.0, s[0:1] ; D2000010 0001E480 v_mad_f32 v26, v38, v26, v36 ; D282001A 04923526 v_cmp_ge_f32_e64 s[0:1], v26, 0 ; D00C0000 0001011A v_cndmask_b32_e64 v26, 0, -1, s[0:1] ; D200001A 00018280 v_cmp_ne_i32_e64 s[0:1], v26, 0 ; D10A0000 0001011A v_cndmask_b32_e64 v26, 0, 1.0, s[0:1] ; D200001A 0001E480 v_add_f32_e32 v16, v16, v26 ; 06203510 v_cmp_ge_f32_e64 s[0:1], -v16, 0 ; D00C0000 20010110 v_cndmask_b32_e64 v16, 0, -1, s[0:1] ; D2000010 00018280 v_cmp_ne_i32_e64 s[20:21], v16, 0 ; D10A0014 00010110 v_cndmask_b32_e64 v16, 0, v28, s[20:21] ; D2000010 00523880 v_mul_f32_e32 v26, s88, v29 ; 10343A58 v_mad_f32 v26, s48, v39, v26 ; D282001A 046A4E30 v_add_f32_e32 v26, s97, v26 ; 06343461 v_add_f32_e64 v26, 0, v26 clamp ; D206081A 00023480 v_mad_f32 v15, v15, v26, v16 ; D282000F 0442350F v_readlane_b32 s0, v65, 53 ; 02016B41 s_nop 2 ; BF800002 v_mul_f32_e32 v16, s0, v9 ; 10201200 v_readlane_b32 s0, v65, 54 ; 02016D41 s_nop 2 ; BF800002 v_mad_f32 v16, v10, s0, v16 ; D2820010 0440010A v_readlane_b32 s0, v65, 51 ; 02016741 s_nop 2 ; BF800002 v_mad_f32 v16, v8, s0, v16 ; D2820010 04400108 v_readlane_b32 s0, v65, 50 ; 02016541 s_nop 2 ; BF800002 v_add_f32_e32 v16, s0, v16 ; 06202000 v_readlane_b32 s0, v65, 52 ; 02016941 s_nop 2 ; BF800002 v_mul_f32_e32 v28, s0, v9 ; 10381200 v_mad_f32 v28, v10, s100, v28 ; D282001C 0470C90A v_readlane_b32 s0, v65, 45 ; 02015B41 s_nop 2 ; BF800002 v_mad_f32 v28, v8, s0, v28 ; D282001C 04700108 v_readlane_b32 s0, v65, 42 ; 02015541 s_nop 2 ; BF800002 v_add_f32_e32 v28, s0, v28 ; 06383800 v_rcp_f32_e32 v28, v28 ; 7E38551C v_mul_f32_e32 v16, v16, v28 ; 10203910 v_mad_f32 v16, -0.5, v16, -0.5 ; D2820010 03C620F1 v_mov_b32_e32 v29, 0x40c00000 ; 7E3A02FF 40C00000 v_mad_f32 v16, v34, v29, v16 ; D2820010 04423B22 v_mad_f32 v33, -2.0, v31, v16 ; D2820021 04423EF5 v_readlane_b32 s0, v65, 43 ; 02015741 s_nop 2 ; BF800002 v_mul_f32_e32 v34, s0, v9 ; 10441200 v_readlane_b32 s0, v65, 44 ; 02015941 s_nop 2 ; BF800002 v_mad_f32 v34, v10, s0, v34 ; D2820022 0488010A v_readlane_b32 s0, v65, 41 ; 02015341 s_nop 2 ; BF800002 v_mad_f32 v34, v8, s0, v34 ; D2820022 04880108 v_readlane_b32 s0, v65, 40 ; 02015141 s_nop 2 ; BF800002 v_add_f32_e32 v34, s0, v34 ; 06444400 v_mul_f32_e32 v28, v34, v28 ; 10383922 v_mad_f32 v28, 0.5, v28, -0.5 ; D282001C 03C638F0 v_mad_f32 v28, v30, v29, v28 ; D282001C 04723B1E v_mad_f32 v32, 2.0, v31, v28 ; D2820020 04723EF4 image_sample v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[32:33], s[80:87], s[76:79] ; F0800F00 02742220 v_mov_b32_e32 v30, 0x3e800000 ; 7E3C02FF 3E800000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v38, 0x3e800000, v36 ; 104C48FF 3E800000 v_mad_f32 v39, 2.0, v31, v16 ; D2820027 04423EF4 v_mov_b32_e32 v40, v32 ; 7E500320 v_mov_b32_e32 v41, v33 ; 7E520321 v_mov_b32_e32 v41, v39 ; 7E520327 image_sample v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[40:41], s[80:87], s[76:79] ; F0800F00 02742728 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v38, v41, v30, v38 ; D2820026 049A3D29 v_mad_f32 v44, v31, v29, v16 ; D282002C 04423B1F v_mad_f32 v43, -2.0, v31, v28 ; D282002B 04723EF5 image_sample v[45:48], 15, 0, 0, 0, 0, 0, 0, 0, v[43:44], s[80:87], s[76:79] ; F0800F00 02742D2B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v47, v30, v38 ; D2820010 049A3D2F v_mov_b32_e32 v44, v33 ; 7E580321 image_sample v[49:52], 15, 0, 0, 0, 0, 0, 0, 0, v[43:44], s[80:87], s[76:79] ; F0800F00 0274312B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v51, v30, v16 ; D2820010 04423D33 v_log_f32_e64 v16, |v16| ; D34E0110 00000110 v_mul_f32_e32 v16, 0x400ccccd, v16 ; 102020FF 400CCCCD v_exp_f32_e32 v16, v16 ; 7E204B10 v_mad_f32 v15, v16, s51, v15 ; D282000F 043C6710 v_mul_f32_e32 v16, v19, v19 ; 10202713 v_mul_f32_e32 v16, v16, v19 ; 10202710 v_mul_f32_e32 v13, v16, v13 ; 101A1B10 v_mad_f32 v14, v13, v14, v15 ; D282000E 043E1D0D v_readlane_b32 s0, v65, 34 ; 02014541 s_nop 2 ; BF800002 v_mul_f32_e32 v15, s0, v9 ; 101E1200 v_readlane_b32 s0, v65, 35 ; 02014741 s_nop 2 ; BF800002 v_mad_f32 v15, v10, s0, v15 ; D282000F 043C010A v_readlane_b32 s0, v65, 31 ; 02013F41 s_nop 2 ; BF800002 v_mad_f32 v15, v8, s0, v15 ; D282000F 043C0108 v_readlane_b32 s0, v65, 29 ; 02013B41 s_nop 2 ; BF800002 v_add_f32_e32 v15, s0, v15 ; 061E1E00 v_readlane_b32 s0, v65, 32 ; 02014141 s_nop 2 ; BF800002 v_mul_f32_e32 v16, s0, v9 ; 10201200 v_readlane_b32 s0, v65, 33 ; 02014341 s_nop 2 ; BF800002 v_mad_f32 v16, v10, s0, v16 ; D2820010 0440010A v_readlane_b32 s0, v65, 28 ; 02013941 s_nop 2 ; BF800002 v_mad_f32 v16, v8, s0, v16 ; D2820010 04400108 v_readlane_b32 s0, v65, 25 ; 02013341 s_nop 2 ; BF800002 v_add_f32_e32 v16, s0, v16 ; 06202000 v_rcp_f32_e32 v16, v16 ; 7E205510 v_mul_f32_e32 v15, v15, v16 ; 101E210F v_mad_f32 v15, -0.5, v15, -0.5 ; D282000F 03C61EF1 v_sub_f32_e32 v29, 1.0, v15 ; 083A1EF2 v_readlane_b32 s0, v65, 27 ; 02013741 s_nop 2 ; BF800002 v_mul_f32_e32 v15, s0, v9 ; 101E1200 v_readlane_b32 s0, v65, 30 ; 02013D41 s_nop 2 ; BF800002 v_mad_f32 v15, v10, s0, v15 ; D282000F 043C010A v_readlane_b32 s0, v65, 24 ; 02013141 s_nop 2 ; BF800002 v_mad_f32 v15, v8, s0, v15 ; D282000F 043C0108 v_readlane_b32 s0, v65, 23 ; 02012F41 s_nop 2 ; BF800002 v_add_f32_e32 v15, s0, v15 ; 061E1E00 v_mul_f32_e32 v15, v15, v16 ; 101E210F v_mad_f32 v15, 0.5, v15, -0.5 ; D282000F 03C61EF0 v_add_f32_e32 v28, 0, v15 ; 06381E80 s_load_dwordx8 s[24:31], s[6:7], 0x50 ; C0CC0750 v_readlane_b32 s0, v65, 46 ; 02015D41 v_readlane_b32 s1, v65, 47 ; 02035F41 v_readlane_b32 s2, v65, 48 ; 02056141 v_readlane_b32 s3, v65, 49 ; 02076341 s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v15, 1, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[24:31], s[0:3] ; F0800100 00060F1C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e64 v15, 0, v15 clamp ; D206080F 00021E80 v_readlane_b32 s0, v65, 22 ; 02012D41 s_nop 2 ; BF800002 v_mul_f32_e32 v15, s0, v15 ; 101E1E00 v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_mov_b32_e32 v19, 0xbf2b851f ; 7E2602FF BF2B851F v_mad_f32 v14, v16, v19, v14 ; D282000E 043A2710 v_log_f32_e64 v14, |v14| ; D34E010E 0000010E v_mul_f32_e32 v14, 0x3ee8ba1f, v14 ; 101C1CFF 3EE8BA1F v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_sub_f32_e32 v29, 1.0, v11 ; 083A16F2 v_add_f32_e32 v28, 0, v12 ; 06381880 v_readlane_b32 s0, v65, 36 ; 02014941 v_readlane_b32 s1, v65, 37 ; 02034B41 v_readlane_b32 s2, v65, 38 ; 02054D41 v_readlane_b32 s3, v65, 39 ; 02074F41 s_nop 2 ; BF800002 image_sample v[53:56], 15, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[36:43], s[0:3] ; F0800F00 0009351C s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v11, 1.0, v56 ; 081670F2 v_mad_f32 v12, v14, v11, v55 ; D282000C 04DE170E v_mov_b32_e32 v14, 0xbecccccd ; 7E1C02FF BECCCCCD v_add_f32_e32 v12, v14, v12 ; 0618190E v_mov_b32_e32 v16, 0x3ecccccd ; 7E2002FF 3ECCCCCD v_mad_f32 v59, v4, v12, v16 ; D282003B 04421904 v_mul_f32_e32 v12, s33, v18 ; 10182421 v_cndmask_b32_e64 v12, 0, v12, s[4:5] ; D200000C 00121880 v_readlane_b32 s0, v65, 26 ; 02013541 s_nop 2 ; BF800002 v_mul_f32_e32 v28, s0, v20 ; 10382800 v_cndmask_b32_e64 v28, 0, v28, s[14:15] ; D200001C 003A3880 v_add_f32_e32 v12, v12, v28 ; 0618390C v_log_f32_e32 v28, v22 ; 7E384F16 v_mul_legacy_f32_e32 v28, v25, v28 ; 0E383919 v_exp_f32_e32 v28, v28 ; 7E384B1C v_mad_f32 v12, v12, v17, v28 ; D282000C 0472230C v_mul_f32_e32 v29, s33, v28 ; 103A3821 v_mul_f32_e32 v29, v29, v27 ; 103A371D v_cndmask_b32_e64 v29, 0, v29, s[20:21] ; D200081D 00523A80 v_mad_f32 v12, v12, v26, v29 ; D282000C 0476350C v_mul_f32_e32 v29, 0x3e800000, v35 ; 103A46FF 3E800000 v_mad_f32 v29, v40, v30, v29 ; D282001D 04763D28 v_mad_f32 v29, v46, v30, v29 ; D282001D 04763D2E v_mad_f32 v29, v50, v30, v29 ; D282001D 04763D32 v_log_f32_e64 v29, |v29| ; D34E011D 0000011D v_mul_f32_e32 v29, 0x400ccccd, v29 ; 103A3AFF 400CCCCD v_exp_f32_e32 v29, v29 ; 7E3A4B1D v_mad_f32 v12, v29, s51, v12 ; D282000C 0430671D v_mad_f32 v12, v13, v28, v12 ; D282000C 0432390D v_mul_f32_e32 v28, v12, v15 ; 10381F0C v_mad_f32 v12, v28, v19, v12 ; D282000C 0432271C v_log_f32_e64 v12, |v12| ; D34E010C 0000010C v_mul_f32_e32 v12, 0x3ee8ba1f, v12 ; 101818FF 3EE8BA1F v_exp_f32_e32 v12, v12 ; 7E184B0C v_mad_f32 v12, v12, v11, v54 ; D282000C 04DA170C v_add_f32_e32 v12, v14, v12 ; 0618190E v_mad_f32 v58, v4, v12, v16 ; D282003A 04421904 v_mul_f32_e32 v12, s64, v18 ; 10182440 v_cndmask_b32_e64 v12, 0, v12, s[4:5] ; D200000C 00121880 v_readlane_b32 s0, v65, 17 ; 02012341 s_nop 2 ; BF800002 v_mul_f32_e32 v18, s0, v20 ; 10242800 v_cndmask_b32_e64 v18, 0, v18, s[14:15] ; D2000012 003A2480 v_add_f32_e32 v12, v12, v18 ; 0618250C v_log_f32_e32 v18, v21 ; 7E244F15 v_mul_legacy_f32_e32 v18, v25, v18 ; 0E242519 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mad_f32 v12, v12, v17, v18 ; D282000C 044A230C v_mul_f32_e32 v17, s64, v18 ; 10222440 v_mul_f32_e32 v17, v17, v27 ; 10223711 v_cndmask_b32_e64 v17, 0, v17, s[20:21] ; D2000811 00522280 v_mad_f32 v12, v12, v26, v17 ; D282000C 0446350C v_mul_f32_e32 v17, 0x3e800000, v34 ; 102244FF 3E800000 v_mad_f32 v17, v39, v30, v17 ; D2820011 04463D27 v_mad_f32 v17, v45, v30, v17 ; D2820011 04463D2D v_mad_f32 v17, v49, v30, v17 ; D2820011 04463D31 v_log_f32_e64 v17, |v17| ; D34E0111 00000111 v_mul_f32_e32 v17, 0x400ccccd, v17 ; 102222FF 400CCCCD v_exp_f32_e32 v17, v17 ; 7E224B11 v_mad_f32 v12, v17, s51, v12 ; D282000C 04306711 v_mad_f32 v12, v13, v18, v12 ; D282000C 0432250D v_mul_f32_e32 v13, v12, v15 ; 101A1F0C v_mad_f32 v12, v13, v19, v12 ; D282000C 0432270D v_log_f32_e64 v12, |v12| ; D34E010C 0000010C v_mul_f32_e32 v12, 0x3ee8ba1f, v12 ; 101818FF 3EE8BA1F v_exp_f32_e32 v12, v12 ; 7E184B0C v_mad_f32 v11, v12, v11, v53 ; D282000B 04D6170C v_add_f32_e32 v11, v14, v11 ; 0616170E v_mad_f32 v57, v4, v11, v16 ; D2820039 04421704 s_load_dwordx8 s[0:7], s[6:7], 0x48 ; C0C00748 v_readlane_b32 s8, v65, 18 ; 02112541 v_readlane_b32 s9, v65, 19 ; 02132741 v_readlane_b32 s10, v65, 20 ; 02152941 v_readlane_b32 s11, v65, 21 ; 02172B41 s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[57:60], s[0:7], s[8:11] ; F0800700 00400B39 v_mad_f32 v4, -v6, v4, v4 ; D2820004 24120906 v_sub_f32_e32 v5, 1.0, v4 ; 080A08F2 v_mul_f32_e32 v6, v58, v5 ; 100C0B3A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v4, v12, v6 ; D2820006 041A1904 v_readlane_b32 s0, v65, 2 ; 02010541 s_nop 2 ; BF800002 v_sub_f32_e32 v7, s0, v6 ; 080E0C00 v_subrev_f32_e32 v9, s99, v9 ; 0A121263 v_subrev_f32_e32 v10, s98, v10 ; 0A141462 v_mul_f32_e32 v10, v10, v10 ; 1014150A v_mad_f32 v9, v9, v9, v10 ; D2820009 042A1309 v_subrev_f32_e32 v8, s67, v8 ; 0A101043 v_mad_f32 v9, v8, v8, v9 ; D2820009 04261108 v_readlane_b32 s0, v65, 3 ; 02010741 s_nop 2 ; BF800002 v_mul_f32_e32 v9, s0, v9 ; 10121200 v_readlane_b32 s0, v65, 4 ; 02010941 s_nop 2 ; BF800002 v_mul_f32_e32 v10, s0, v8 ; 10141000 v_mul_f32_e32 v10, 0x3fb8aa65, v10 ; 101414FF 3FB8AA65 v_exp_f32_e32 v10, v10 ; 7E144B0A v_sub_f32_e32 v10, 1.0, v10 ; 081414F2 v_mul_f32_e32 v9, v9, v10 ; 10121509 v_rcp_f32_e32 v8, v8 ; 7E105508 v_mul_f32_e32 v8, v9, v8 ; 10101109 v_mul_f32_e32 v8, 0x3fb8aa65, v8 ; 101010FF 3FB8AA65 v_exp_f32_e32 v8, v8 ; 7E104B08 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 v_rcp_f32_e32 v2, v2 ; 7E045502 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mad_f32 v0, 0.5, v2, 0.5 ; D2820000 03C204F0 v_readlane_b32 s0, v65, 5 ; 02010B41 v_readlane_b32 s1, v65, 6 ; 02030D41 v_readlane_b32 s2, v65, 7 ; 02050F41 v_readlane_b32 s3, v65, 8 ; 02071141 s_nop 2 ; BF800002 v_readlane_b32 s4, v65, 9 ; 02091341 v_readlane_b32 s5, v65, 10 ; 020B1541 v_readlane_b32 s6, v65, 11 ; 020D1741 v_readlane_b32 s7, v65, 12 ; 020F1941 v_readlane_b32 s8, v65, 13 ; 02111B41 v_readlane_b32 s9, v65, 14 ; 02131D41 v_readlane_b32 s10, v65, 15 ; 02151F41 v_readlane_b32 s11, v65, 16 ; 02172141 s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[0:3] ; F0800100 00010000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mad_f32 v1, v0, v7, v6 ; D2820001 041A0F00 v_mul_f32_e32 v2, v57, v5 ; 10040B39 v_mad_f32 v2, v4, v11, v2 ; D2820002 040A1704 v_readlane_b32 s0, v65, 1 ; 02010341 s_nop 2 ; BF800002 v_sub_f32_e32 v3, s0, v2 ; 08060400 v_mad_f32 v2, v0, v3, v2 ; D2820002 040A0700 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 v_mul_f32_e32 v2, v59, v5 ; 10040B3B v_mad_f32 v2, v4, v13, v2 ; D2820002 040A1B04 v_readlane_b32 s0, v65, 0 ; 02010141 s_nop 2 ; BF800002 v_sub_f32_e32 v3, s0, v2 ; 08060400 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_mul_f32_e32 v2, 0x3e800000, v37 ; 10044AFF 3E800000 v_mad_f32 v2, v42, v30, v2 ; D2820002 040A3D2A v_mad_f32 v2, v48, v30, v2 ; D2820002 040A3D30 v_mad_f32 v2, v52, v30, v2 ; D2820002 040A3D34 v_log_f32_e32 v3, v24 ; 7E064F18 v_mul_legacy_f32_e32 v3, 1.0, v3 ; 0E0606F2 v_exp_f32_e32 v3, v3 ; 7E064B03 v_mad_f32 v2, v2, v3, v3 ; D2820002 040E0702 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL CONST[0..13] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: MUL TEMP[0].xy, CONST[13], IN[1] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: ADD TEMP[1], CONST[12], IN[1].xyxy 3: MUL TEMP[1], TEMP[1], CONST[13].xyxy 4: MUL TEMP[2].xy, TEMP[1], CONST[13].zzzz 5: MOV TEMP[2].xy, TEMP[2].xyxx 6: MUL TEMP[3].xy, TEMP[1].zwzw, CONST[13].wwww 7: MOV TEMP[3].xy, TEMP[3].xyxx 8: MUL TEMP[4].xyz, CONST[1], IN[0].yyyy 9: MOV TEMP[1].xyz, TEMP[4].xyzx 10: MAD TEMP[4].xyz, IN[0].xxxx, CONST[0], TEMP[1] 11: MOV TEMP[1].xyz, TEMP[4].xyzx 12: MAD TEMP[4].xyz, IN[0].zzzz, CONST[2], TEMP[1] 13: MOV TEMP[1].xyz, TEMP[4].xyzx 14: ADD TEMP[4].xyz, TEMP[1], CONST[3] 15: MUL TEMP[5].xy, TEMP[4].yyyy, CONST[9] 16: MOV TEMP[5].xy, TEMP[5].xyxx 17: MAD TEMP[6].xy, TEMP[4].xxxx, CONST[8], TEMP[5] 18: MOV TEMP[5].xy, TEMP[6].xyxx 19: MAD TEMP[6].xy, TEMP[4].zzzz, CONST[10], TEMP[5] 20: MOV TEMP[5].xy, TEMP[6].xyxx 21: ADD TEMP[6].xy, TEMP[5], CONST[11] 22: MOV TEMP[5].xy, TEMP[6].xyxx 23: MAD TEMP[6].xy, TEMP[5], IMM[0].xyzz, IMM[0].yyyy 24: MOV TEMP[6].xy, TEMP[6].xyxx 25: MUL TEMP[5], TEMP[4].yyyy, CONST[5] 26: MAD TEMP[5], TEMP[4].xxxx, CONST[4], TEMP[5] 27: MAD TEMP[5], TEMP[4].zzzz, CONST[6], TEMP[5] 28: MOV TEMP[4].xyz, TEMP[4].xyzx 29: ADD TEMP[1], TEMP[5], CONST[7] 30: MOV TEMP[5].xyz, TEMP[1].xywx 31: MOV TEMP[7].xy, IN[2].xyxx 32: MOV TEMP[6].zw, IMM[0].zzzz 33: MOV TEMP[0].zw, IMM[0].wwzw 34: MOV TEMP[2].zw, IMM[0].wwzw 35: MOV TEMP[3].zw, IMM[0].wwzw 36: MOV TEMP[7].zw, IMM[0].wwzw 37: MOV TEMP[4].w, IMM[0].wwww 38: MOV TEMP[5].w, IMM[0].wwww 39: MOV OUT[6], TEMP[5] 40: MOV OUT[7], TEMP[6] 41: MOV OUT[1], TEMP[0] 42: MOV OUT[0], TEMP[1] 43: MOV OUT[2], TEMP[2] 44: MOV OUT[3], TEMP[3] 45: MOV OUT[4], TEMP[7] 46: MOV OUT[5], TEMP[4] 47: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0 %66 = add i32 %5, %7 %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %66) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = fmul float %53, %68 %77 = fmul float %54, %69 %78 = fadd float %49, %68 %79 = fadd float %50, %69 %80 = fadd float %51, %68 %81 = fadd float %52, %69 %82 = fmul float %78, %53 %83 = fmul float %79, %54 %84 = fmul float %80, %53 %85 = fmul float %81, %54 %86 = fmul float %82, %55 %87 = fmul float %83, %55 %88 = fmul float %84, %56 %89 = fmul float %85, %56 %90 = fmul float %16, %62 %91 = fmul float %17, %62 %92 = fmul float %18, %62 %93 = fmul float %61, %13 %94 = fadd float %93, %90 %95 = fmul float %61, %14 %96 = fadd float %95, %91 %97 = fmul float %61, %15 %98 = fadd float %97, %92 %99 = fmul float %63, %19 %100 = fadd float %99, %94 %101 = fmul float %63, %20 %102 = fadd float %101, %96 %103 = fmul float %63, %21 %104 = fadd float %103, %98 %105 = fadd float %100, %22 %106 = fadd float %102, %23 %107 = fadd float %104, %24 %108 = fmul float %106, %43 %109 = fmul float %106, %44 %110 = fmul float %105, %41 %111 = fadd float %110, %108 %112 = fmul float %105, %42 %113 = fadd float %112, %109 %114 = fmul float %107, %45 %115 = fadd float %114, %111 %116 = fmul float %107, %46 %117 = fadd float %116, %113 %118 = fadd float %115, %47 %119 = fadd float %117, %48 %120 = fmul float %118, 5.000000e-01 %121 = fadd float %120, -5.000000e-01 %122 = fmul float %119, -5.000000e-01 %123 = fadd float %122, -5.000000e-01 %124 = fmul float %106, %29 %125 = fmul float %106, %30 %126 = fmul float %106, %31 %127 = fmul float %106, %32 %128 = fmul float %105, %25 %129 = fadd float %128, %124 %130 = fmul float %105, %26 %131 = fadd float %130, %125 %132 = fmul float %105, %27 %133 = fadd float %132, %126 %134 = fmul float %105, %28 %135 = fadd float %134, %127 %136 = fmul float %107, %33 %137 = fadd float %136, %129 %138 = fmul float %107, %34 %139 = fadd float %138, %131 %140 = fmul float %107, %35 %141 = fadd float %140, %133 %142 = fmul float %107, %36 %143 = fadd float %142, %135 %144 = fadd float %137, %37 %145 = fadd float %139, %38 %146 = fadd float %141, %39 %147 = fadd float %143, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %76, float %77, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %87, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %88, float %89, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %74, float %75, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %105, float %106, float %107, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %144, float %145, float %147, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %121, float %123, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %144, float %145, float %146, float %147) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s12, s[0:3], 0x35 ; C2060135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s12, v2 ; 100A040C s_buffer_load_dword s13, s[0:3], 0x34 ; C2068134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s13, v1 ; 100C020D v_mov_b32_e32 v7, 1.0 ; 7E0E02F2 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v6, v5, v8, v7 ; F800020F 07080506 s_buffer_load_dword s14, s[0:3], 0x31 ; C2070131 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v5, s14, v2 ; 060A040E v_mul_f32_e32 v5, s12, v5 ; 100A0A0C s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s14, v5 ; 100A0A0E s_buffer_load_dword s15, s[0:3], 0x30 ; C2078130 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s15, v1 ; 060C020F v_mul_f32_e32 v6, s13, v6 ; 100C0C0D v_mul_f32_e32 v6, s14, v6 ; 100C0C0E exp 15, 33, 0, 0, 0, v6, v5, v8, v7 ; F800021F 07080506 s_buffer_load_dword s14, s[0:3], 0x33 ; C2070133 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v5, s14, v2 ; 060A040E v_mul_f32_e32 v5, s12, v5 ; 100A0A0C s_buffer_load_dword s12, s[0:3], 0x37 ; C2060137 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s12, v5 ; 100A0A0C s_buffer_load_dword s14, s[0:3], 0x32 ; C2070132 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s14, v1 ; 0602020E v_mul_f32_e32 v1, s13, v1 ; 1002020D v_mul_f32_e32 v1, s12, v1 ; 1002020C exp 15, 34, 0, 0, 0, v1, v5, v8, v7 ; F800022F 07080501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 35, 0, 0, 0, v1, v2, v8, v7 ; F800023F 07080201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v2, s4, v6 ; D2820000 04180902 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 36, 0, 0, 0, v0, v5, v4, v7 ; F800024F 07040500 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v5 ; 10020A04 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v1 ; D2820001 04040900 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v5 ; 10040A04 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v5 ; 10060A04 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 exp 15, 37, 0, 0, 0, v3, v2, v1, v7 ; F800025F 07010203 s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v6, s4, v5 ; 100C0A04 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x28 ; C2020128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0x2c ; C202012C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v6 ; 060C0C04 v_mad_f32 v6, 0.5, v6, -0.5 ; D2820006 03C60CF0 s_buffer_load_dword s4, s[0:3], 0x25 ; C2020125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v5 ; 100E0A04 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v4, s4, v7 ; D2820007 041C0904 s_buffer_load_dword s4, s[0:3], 0x2d ; C202012D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mad_f32 v7, -0.5, v7, -0.5 ; D2820007 03C60EF1 exp 15, 38, 0, 0, 0, v6, v7, v8, v8 ; F800026F 08080706 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v5 ; D2820000 04140900 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x1e ; C200011E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v3, v2, v0, v1 ; F80008CF 01000203 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL CONST[0..14] DCL TEMP[0..24], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 2.2000} IMM[1] FLT32 { 0.3330, 2.0000, -2.0000, 0.0000} IMM[2] FLT32 { 0.5000, -0.0000, -16.0000, -1.4427} IMM[3] FLT32 { 16.0000, 0.0050, 0.5000, -0.5000} IMM[4] FLT32 { 6.0000, 2.0000, -2.0000, 0.2500} IMM[5] FLT32 { 0.8000, 0.4545, 0.4000, 1.4427} 0: MAD TEMP[0].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[9], 2D 3: MOV TEMP[1].z, TEMP[0] 4: ABS TEMP[2].x, TEMP[0] 5: MOV TEMP[2], -TEMP[2].xxxx 6: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 7: UIF TEMP[3].xxxx :0 8: MOV TEMP[3].x, IMM[0].yyyy 9: ELSE :0 10: MOV TEMP[3].x, IMM[0].zzzz 11: ENDIF 12: MOV TEMP[3].x, TEMP[3].xxxx 13: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 14: UIF TEMP[4].xxxx :0 15: MOV TEMP[4].x, IMM[0].yyyy 16: ELSE :0 17: MOV TEMP[4].x, IMM[0].zzzz 18: ENDIF 19: MOV TEMP[3].y, TEMP[4].xxxx 20: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 21: UIF TEMP[4].xxxx :0 22: MOV TEMP[4].x, IMM[0].yyyy 23: ELSE :0 24: MOV TEMP[4].x, IMM[0].zzzz 25: ENDIF 26: MOV TEMP[3].z, TEMP[4].xxxx 27: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 28: UIF TEMP[2].xxxx :0 29: ELSE :0 30: ENDIF 31: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 32: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 33: OR TEMP[4].x, TEMP[4].xxxx, TEMP[3].yyyy 34: UIF TEMP[4].xxxx :0 35: KILL 36: ENDIF 37: MOV TEMP[3].xy, IN[3].xyyy 38: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 2D 39: POW TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww 40: POW TEMP[4].y, TEMP[3].yyyy, IMM[0].wwww 41: POW TEMP[4].z, TEMP[3].zzzz, IMM[0].wwww 42: MOV TEMP[2].xyz, TEMP[4].xyzx 43: RCP TEMP[3].x, CONST[0].xxxx 44: MUL TEMP[3].y, TEMP[3].xxxx, IMM[1].xxxx 45: MUL TEMP[5].w, TEMP[3].yyyy, TEMP[3].yyyy 46: MOV TEMP[6].xy, IN[0].xyyy 47: TEX TEMP[6], TEMP[6], SAMP[0], 2D 48: MOV TEMP[7].w, TEMP[6].wwzw 49: MOV TEMP[8].xy, IN[0].xyyy 50: TEX TEMP[8].x, TEMP[8], SAMP[1], 2D 51: MAD TEMP[9].xy, TEMP[6], IMM[1].yyyy, IMM[0].yyyy 52: MOV TEMP[7].xy, TEMP[9].xyxx 53: MUL TEMP[10].yz, TEMP[3].yyyy, TEMP[7].xxyw 54: MOV TEMP[9].z, TEMP[10].zyzz 55: MAD TEMP[8].z, TEMP[8].xxxx, IMM[1].yyyy, IMM[0].yyyy 56: MOV TEMP[8].z, TEMP[8].zzzz 57: MOV TEMP[8].xy, TEMP[6].zwzz 58: MOV TEMP[6].xy, IN[1].xyyy 59: TEX TEMP[6], TEMP[6], SAMP[2], 2D 60: MOV TEMP[11].w, TEMP[6].wwww 61: MOV TEMP[12].xy, IN[1].xyyy 62: TEX TEMP[12].xw, TEMP[12], SAMP[3], 2D 63: MOV TEMP[13].w, TEMP[12].wwww 64: ADD TEMP[14].zw, TEMP[6].xyxy, TEMP[6].xyxy 65: MOV TEMP[7].zw, TEMP[14].wwzw 66: MAD TEMP[14].zw, TEMP[7], IMM[0].yxyx, IMM[0].yxxy 67: MOV TEMP[7].zw, TEMP[14].wwzw 68: MUL TEMP[14].zw, TEMP[3].yyyy, TEMP[7] 69: MOV TEMP[7].zw, TEMP[14].wwzw 70: MAD TEMP[15].xy, TEMP[7], TEMP[3].yyyy, TEMP[14].zwzw 71: MOV TEMP[7].xy, TEMP[15].xyxx 72: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].zzzz, IMM[0].xxxx 73: MOV TEMP[13].z, TEMP[12].zzzz 74: MOV TEMP[13].xy, TEMP[6].zwzz 75: MUL TEMP[6].xyz, TEMP[5].wwww, TEMP[13] 76: MOV TEMP[11].xyz, TEMP[6].xyzx 77: MAD TEMP[6].xyz, TEMP[8], TEMP[5].wwww, TEMP[11] 78: MOV TEMP[8].xyz, TEMP[6].xyzx 79: ADD TEMP[6].xw, TEMP[10].yyzz, TEMP[10].yyzz 80: MOV TEMP[9].xw, TEMP[6].xxxw 81: MUL TEMP[6].xy, TEMP[14].zwzw, TEMP[9].xwzw 82: MOV TEMP[11].xy, TEMP[6].xyxx 83: DP2 TEMP[6].x, TEMP[10].yzzz, TEMP[14].wzzz 84: MOV TEMP[11].z, TEMP[6].xxxx 85: ADD TEMP[6].xyz, TEMP[8], TEMP[11] 86: MOV TEMP[8].xyz, TEMP[6].xyzx 87: MOV TEMP[6].xy, IN[2].xyyy 88: TEX TEMP[6], TEMP[6], SAMP[2], 2D 89: MOV TEMP[11].zw, TEMP[6].wwzw 90: MOV TEMP[12].xy, IN[2].xyyy 91: TEX TEMP[12].xw, TEMP[12], SAMP[3], 2D 92: MOV TEMP[13].xw, TEMP[12].xxxw 93: MAD TEMP[15].xy, TEMP[6].yxzw, IMM[1].yyyy, IMM[0].yyyy 94: MOV TEMP[11].xy, TEMP[15].xyxx 95: MUL TEMP[15].yz, TEMP[3].yyyy, TEMP[11].xxyw 96: MOV TEMP[13].yz, TEMP[15].zyzz 97: MAD TEMP[3].xy, TEMP[11].yxzw, TEMP[3].yyyy, TEMP[7] 98: MOV TEMP[3].xy, TEMP[3].xyxx 99: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].yyyy, IMM[0].yyyy 100: MOV TEMP[12].z, TEMP[12].zzzz 101: MOV TEMP[12].xy, TEMP[6].zwzz 102: MAD TEMP[5].xyz, TEMP[12], TEMP[5].wwww, TEMP[8] 103: MOV TEMP[8].xyz, TEMP[5].xyzx 104: MUL TEMP[5].xy, TEMP[9].xwzw, TEMP[13].zyzw 105: MOV TEMP[11].xy, TEMP[5].xyxx 106: DP2 TEMP[5].x, TEMP[10].yzzz, TEMP[15].yzzz 107: MOV TEMP[11].z, TEMP[5].xxxx 108: ADD TEMP[5].xyz, TEMP[8], TEMP[11] 109: MOV TEMP[9].xyz, TEMP[5].xyzx 110: MUL TEMP[5].yw, TEMP[7].xzzw, TEMP[13].xzzy 111: MOV TEMP[1].yw, TEMP[5].wyww 112: ADD TEMP[5].xy, TEMP[1].ywzw, TEMP[1].ywzw 113: MOV TEMP[8].xy, TEMP[5].xyxx 114: DP2 TEMP[5].x, TEMP[14].zwww, TEMP[15].yzzz 115: MOV TEMP[8].z, TEMP[5].xxxx 116: ADD TEMP[5].xyz, TEMP[9], TEMP[8] 117: MOV TEMP[7].xyz, TEMP[5].xyzx 118: MAD TEMP[5].xyz, TEMP[3].xyxw, -TEMP[3].xyyw, TEMP[7] 119: MOV TEMP[7].xy, TEMP[5].xyzx 120: MOV TEMP[3].z, IMM[0].xxxx 121: DP3 TEMP[6].x, CONST[1].xyzz, TEMP[3].xyzz 122: MOV_SAT TEMP[6].x, TEMP[6].xxxx 123: ADD TEMP[10].xyz, CONST[8], -IN[4] 124: MOV TEMP[9].xyz, TEMP[10].xyzx 125: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[10].xyzz 126: MAX TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww 127: RSQ TEMP[10].x, TEMP[10].xxxx 128: MUL TEMP[14].xyz, TEMP[10].xxxx, TEMP[9] 129: MAD TEMP[15].xyz, TEMP[9], TEMP[10].xxxx, -CONST[5] 130: MOV TEMP[11].xyz, TEMP[15].xyzx 131: MAD TEMP[15].xyz, CONST[5].wwww, TEMP[11], CONST[5] 132: MOV TEMP[11].xyz, TEMP[15].xyzx 133: DP3 TEMP[15].x, TEMP[15].xyzz, TEMP[15].xyzz 134: MAX TEMP[15].x, TEMP[15].xxxx, IMM[1].wwww 135: RSQ TEMP[15].x, TEMP[15].xxxx 136: MAD TEMP[15].xyz, TEMP[11], TEMP[15].xxxx, CONST[1] 137: MOV TEMP[11].xyz, TEMP[15].xyzx 138: RCP TEMP[15].x, TEMP[15].zzzz 139: MAD TEMP[15].xy, TEMP[11], TEMP[15].xxxx, -TEMP[3] 140: RCP TEMP[16].x, CONST[3].wwww 141: ADD TEMP[17].zw, TEMP[16].xxxx, TEMP[5].xyxy 142: MUL TEMP[18].w, TEMP[5].zzzz, TEMP[5].zzzz 143: MOV TEMP[7].w, TEMP[18].wwww 144: MAD TEMP[19].w, TEMP[17].zzzz, TEMP[17].wwww, -TEMP[18].wwww 145: MUL TEMP[20].w, TEMP[15].xxxx, TEMP[15].xxxx 146: ADD TEMP[5].z, TEMP[5].zzzz, TEMP[5].zzzz 147: MOV TEMP[7].z, TEMP[5].zzzz 148: MUL TEMP[21].x, TEMP[15].xxxx, TEMP[5].zzzz 149: MAD TEMP[22].z, TEMP[17].zzzz, TEMP[15].yyyy, -TEMP[21].xxxx 150: MUL TEMP[22].z, TEMP[15].yyyy, TEMP[22].zzzz 151: MAD TEMP[17].z, TEMP[20].wwww, TEMP[17].wwww, TEMP[22].zzzz 152: MUL TEMP[17].z, TEMP[17].zzzz, IMM[2].xxxx 153: RCP TEMP[22].x, TEMP[19].wwww 154: MUL TEMP[23].x, TEMP[22].xxxx, TEMP[17].zzzz 155: MOV TEMP[24].x, -TEMP[19].wwww 156: FSGE TEMP[24].x, TEMP[24].xxxx, IMM[0].zzzz 157: UIF TEMP[24].xxxx :0 158: MOV TEMP[24].x, IMM[0].xxxx 159: ELSE :0 160: MOV TEMP[24].x, IMM[2].yyyy 161: ENDIF 162: MAD TEMP[17].z, TEMP[17].zzzz, TEMP[22].xxxx, IMM[2].zzzz 163: FSGE TEMP[17].x, TEMP[17].zzzz, IMM[0].zzzz 164: UIF TEMP[17].xxxx :0 165: MOV TEMP[17].x, IMM[0].xxxx 166: ELSE :0 167: MOV TEMP[17].x, IMM[2].yyyy 168: ENDIF 169: ADD TEMP[17].z, TEMP[17].xxxx, TEMP[24].xxxx 170: MUL TEMP[22].w, TEMP[23].xxxx, IMM[2].wwww 171: EX2 TEMP[22].x, TEMP[22].wwww 172: MAX TEMP[19].x, TEMP[19].wwww, IMM[1].wwww 173: RSQ TEMP[19].x, TEMP[19].xxxx 174: MUL TEMP[19].w, TEMP[19].xxxx, TEMP[22].xxxx 175: MOV TEMP[9].w, TEMP[19].wwww 176: MAD TEMP[16].xy, TEMP[16].xxxx, IMM[3].xxxx, TEMP[7] 177: MAD TEMP[22].w, TEMP[16].xxxx, TEMP[16].yyyy, -TEMP[18].wwww 178: MAD TEMP[21].x, TEMP[16].xxxx, TEMP[15].yyyy, -TEMP[21].xxxx 179: MUL TEMP[15].x, TEMP[15].yyyy, TEMP[21].xxxx 180: MAD TEMP[15].w, TEMP[20].wwww, TEMP[16].yyyy, TEMP[15].xxxx 181: MUL TEMP[15].w, TEMP[15].wwww, IMM[2].xxxx 182: RCP TEMP[11].x, TEMP[22].wwww 183: MUL TEMP[16].y, TEMP[15].wwww, TEMP[11].xxxx 184: MOV TEMP[20].x, -TEMP[22].wwww 185: FSGE TEMP[20].x, TEMP[20].xxxx, IMM[0].zzzz 186: UIF TEMP[20].xxxx :0 187: MOV TEMP[20].x, IMM[0].xxxx 188: ELSE :0 189: MOV TEMP[20].x, IMM[2].yyyy 190: ENDIF 191: MAD TEMP[11].w, TEMP[15].wwww, TEMP[11].xxxx, IMM[2].zzzz 192: FSGE TEMP[11].x, TEMP[11].wwww, IMM[0].zzzz 193: UIF TEMP[11].xxxx :0 194: MOV TEMP[11].x, IMM[0].xxxx 195: ELSE :0 196: MOV TEMP[11].x, IMM[2].yyyy 197: ENDIF 198: ADD TEMP[11].w, TEMP[11].xxxx, TEMP[20].xxxx 199: MOV TEMP[8].w, TEMP[11].wwww 200: MUL TEMP[15].x, TEMP[16].yyyy, IMM[2].wwww 201: MAX TEMP[16].x, TEMP[22].wwww, IMM[1].wwww 202: RSQ TEMP[16].x, TEMP[16].xxxx 203: EX2 TEMP[15].x, TEMP[15].xxxx 204: MUL TEMP[15].w, TEMP[16].xxxx, TEMP[15].xxxx 205: MUL TEMP[15].w, TEMP[15].wwww, IMM[3].yyyy 206: MOV TEMP[2].w, TEMP[15].wwww 207: MAD TEMP[10].xyz, TEMP[9], TEMP[10].xxxx, -CONST[6] 208: MOV TEMP[9].xyz, TEMP[10].xyzx 209: MAD TEMP[10].xyz, CONST[6].wwww, TEMP[9], CONST[6] 210: MOV TEMP[9].xyz, TEMP[10].xyzx 211: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[10].xyzz 212: MAX TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww 213: RSQ TEMP[10].x, TEMP[10].xxxx 214: MAD TEMP[10].xyz, TEMP[9], TEMP[10].xxxx, CONST[2] 215: MOV TEMP[9].xyz, TEMP[10].xyzx 216: RCP TEMP[10].x, TEMP[10].zzzz 217: MAD TEMP[10].xy, TEMP[9], TEMP[10].xxxx, -TEMP[3] 218: RCP TEMP[16].x, CONST[4].wwww 219: ADD TEMP[16].xy, TEMP[16].xxxx, TEMP[7] 220: MAD TEMP[18].w, TEMP[16].xxxx, TEMP[16].yyyy, -TEMP[18].wwww 221: MUL TEMP[20].w, TEMP[10].xxxx, TEMP[10].xxxx 222: MUL TEMP[5].z, TEMP[5].zzzz, TEMP[10].xxxx 223: MAD TEMP[5].z, TEMP[16].xxxx, TEMP[10].yyyy, -TEMP[5].zzzz 224: MUL TEMP[5].z, TEMP[10].yyyy, TEMP[5].zzzz 225: MAD TEMP[5].z, TEMP[20].wwww, TEMP[16].yyyy, TEMP[5].zzzz 226: MUL TEMP[5].z, TEMP[5].zzzz, IMM[2].xxxx 227: RCP TEMP[10].x, TEMP[18].wwww 228: MUL TEMP[16].x, TEMP[10].xxxx, TEMP[5].zzzz 229: MOV TEMP[20].x, -TEMP[18].wwww 230: FSGE TEMP[20].x, TEMP[20].xxxx, IMM[0].zzzz 231: UIF TEMP[20].xxxx :0 232: MOV TEMP[20].x, IMM[0].xxxx 233: ELSE :0 234: MOV TEMP[20].x, IMM[2].yyyy 235: ENDIF 236: MAD TEMP[5].z, TEMP[5].zzzz, TEMP[10].xxxx, IMM[2].zzzz 237: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 238: UIF TEMP[5].xxxx :0 239: MOV TEMP[5].x, IMM[0].xxxx 240: ELSE :0 241: MOV TEMP[5].x, IMM[2].yyyy 242: ENDIF 243: ADD TEMP[5].z, TEMP[5].xxxx, TEMP[20].xxxx 244: MOV TEMP[7].z, TEMP[5].zzzz 245: MUL TEMP[10].w, TEMP[16].xxxx, IMM[2].wwww 246: EX2 TEMP[10].x, TEMP[10].wwww 247: MAX TEMP[16].x, TEMP[18].wwww, IMM[1].wwww 248: RSQ TEMP[16].x, TEMP[16].xxxx 249: MUL TEMP[10].w, TEMP[10].xxxx, TEMP[16].xxxx 250: MOV TEMP[7].w, TEMP[10].wwww 251: MUL TEMP[16].xyz, CONST[10].xyww, IN[4].yyyy 252: MOV TEMP[9].xyz, TEMP[16].xyzx 253: MAD TEMP[16].xyz, IN[4].xxxx, CONST[9].xyww, TEMP[9] 254: MOV TEMP[9].xyz, TEMP[16].xyzx 255: MAD TEMP[16].xyz, IN[4].zzzz, CONST[11].xyww, TEMP[9] 256: MOV TEMP[9].xyz, TEMP[16].xyzx 257: ADD TEMP[16].xyz, TEMP[9], CONST[12].xyww 258: MOV TEMP[9].xy, TEMP[16].xyzx 259: RCP TEMP[16].x, TEMP[16].zzzz 260: MOV TEMP[9].z, TEMP[16].xxxx 261: MUL TEMP[16].xy, TEMP[16].xxxx, TEMP[9] 262: MOV TEMP[9].xy, TEMP[16].xyxx 263: MAD TEMP[16].xy, TEMP[9], IMM[3].zwzw, IMM[3].wwww 264: MOV TEMP[9].xy, TEMP[16].xyxx 265: MAD TEMP[16].xy, TEMP[7], IMM[4].xxxx, TEMP[9] 266: MOV TEMP[7].xy, TEMP[16].xyxx 267: MAD TEMP[16].xy, TEMP[18].wwww, IMM[1].yyyy, TEMP[7] 268: MOV TEMP[16].xy, TEMP[16].xyyy 269: TEX TEMP[16], TEMP[16], SAMP[4], 2D 270: MAD TEMP[20].xy, TEMP[18].wwww, IMM[4].yzzw, TEMP[7] 271: MOV TEMP[20].xy, TEMP[20].xyyy 272: TEX TEMP[20], TEMP[20], SAMP[4], 2D 273: MUL TEMP[12], TEMP[20], IMM[4].wwww 274: MAD TEMP[13], TEMP[16], IMM[4].wwww, TEMP[12] 275: MAD TEMP[12].xy, TEMP[18].wwww, IMM[4].zxzw, TEMP[7] 276: MOV TEMP[12].xy, TEMP[12].xyyy 277: TEX TEMP[12], TEMP[12], SAMP[4], 2D 278: MAD TEMP[13], TEMP[12], IMM[4].wwww, TEMP[13] 279: MAD TEMP[12].xy, TEMP[18].wwww, IMM[1].zzzz, TEMP[7] 280: MOV TEMP[12].xy, TEMP[12].xyyy 281: TEX TEMP[12], TEMP[12], SAMP[4], 2D 282: MAD TEMP[13], TEMP[12], IMM[4].wwww, TEMP[13] 283: ABS TEMP[12].x, TEMP[13].xxxx 284: LG2 TEMP[9].x, TEMP[12].xxxx 285: ABS TEMP[12].x, TEMP[13].yyyy 286: LG2 TEMP[12].x, TEMP[12].xxxx 287: MOV TEMP[9].y, TEMP[12].xxxx 288: ABS TEMP[12].x, TEMP[13].zzzz 289: LG2 TEMP[12].x, TEMP[12].xxxx 290: MOV TEMP[9].z, TEMP[12].xxxx 291: MUL TEMP[12].xyz, TEMP[9], IMM[0].wwww 292: EX2 TEMP[13].x, TEMP[12].xxxx 293: EX2 TEMP[16].x, TEMP[12].yyyy 294: MOV TEMP[13].y, TEMP[16].xxxx 295: EX2 TEMP[12].x, TEMP[12].zzzz 296: MOV TEMP[13].z, TEMP[12].xxxx 297: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[14].xyzz 298: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].xxxx 299: MUL TEMP[12].x, TEMP[3].wwww, TEMP[3].wwww 300: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 301: MUL TEMP[14].y, TEMP[3].wwww, TEMP[12].xxxx 302: MUL TEMP[16].xyz, TEMP[19].wwww, CONST[3] 303: MOV TEMP[17], -TEMP[17].zzzz 304: FSGE TEMP[18].x, TEMP[17].xxxx, IMM[0].zzzz 305: UIF TEMP[18].xxxx :0 306: MOV TEMP[18].x, TEMP[16].xxxx 307: ELSE :0 308: MOV TEMP[18].x, IMM[2].yyyy 309: ENDIF 310: MOV TEMP[18].x, TEMP[18].xxxx 311: FSGE TEMP[19].x, TEMP[17].yyyy, IMM[0].zzzz 312: UIF TEMP[19].xxxx :0 313: MOV TEMP[19].x, TEMP[16].yyyy 314: ELSE :0 315: MOV TEMP[19].x, IMM[2].yyyy 316: ENDIF 317: MOV TEMP[18].y, TEMP[19].xxxx 318: FSGE TEMP[19].x, TEMP[17].zzzz, IMM[0].zzzz 319: UIF TEMP[19].xxxx :0 320: MOV TEMP[16].x, TEMP[16].zzzz 321: ELSE :0 322: MOV TEMP[16].x, IMM[2].yyyy 323: ENDIF 324: MOV TEMP[18].z, TEMP[16].xxxx 325: FSGE TEMP[16].x, TEMP[17].wwww, IMM[0].zzzz 326: UIF TEMP[16].xxxx :0 327: ELSE :0 328: ENDIF 329: MOV TEMP[9].xyz, TEMP[18].xyzx 330: MUL TEMP[10].xyz, TEMP[10].wwww, CONST[4] 331: MOV TEMP[8].xyz, TEMP[10].xyzx 332: MUL TEMP[10].xyz, TEMP[8], IMM[5].xxxx 333: MOV TEMP[5], -TEMP[5].zzzz 334: FSGE TEMP[16].x, TEMP[5].xxxx, IMM[0].zzzz 335: UIF TEMP[16].xxxx :0 336: MOV TEMP[16].x, TEMP[10].xxxx 337: ELSE :0 338: MOV TEMP[16].x, IMM[2].yyyy 339: ENDIF 340: MOV TEMP[16].x, TEMP[16].xxxx 341: FSGE TEMP[17].x, TEMP[5].yyyy, IMM[0].zzzz 342: UIF TEMP[17].xxxx :0 343: MOV TEMP[17].x, TEMP[10].yyyy 344: ELSE :0 345: MOV TEMP[17].x, IMM[2].yyyy 346: ENDIF 347: MOV TEMP[16].y, TEMP[17].xxxx 348: FSGE TEMP[17].x, TEMP[5].zzzz, IMM[0].zzzz 349: UIF TEMP[17].xxxx :0 350: MOV TEMP[10].x, TEMP[10].zzzz 351: ELSE :0 352: MOV TEMP[10].x, IMM[2].yyyy 353: ENDIF 354: MOV TEMP[16].z, TEMP[10].xxxx 355: FSGE TEMP[5].x, TEMP[5].wwww, IMM[0].zzzz 356: UIF TEMP[5].xxxx :0 357: ELSE :0 358: ENDIF 359: MOV TEMP[8].xyz, TEMP[16].xyzx 360: ADD TEMP[5].xyz, TEMP[9], TEMP[8] 361: MAD TEMP[4].yzw, TEMP[5].xxyz, TEMP[14].yyyy, TEMP[4].xxyz 362: MOV TEMP[7].w, TEMP[4].zyzw 363: MUL TEMP[5].xyz, TEMP[2], CONST[3] 364: MOV TEMP[2].xyz, TEMP[5].xyzx 365: MUL TEMP[5].xyz, TEMP[15].wwww, TEMP[2] 366: MOV TEMP[8], -TEMP[11].wwww 367: FSGE TEMP[10].x, TEMP[8].xxxx, IMM[0].zzzz 368: UIF TEMP[10].xxxx :0 369: MOV TEMP[10].x, TEMP[5].xxxx 370: ELSE :0 371: MOV TEMP[10].x, IMM[2].yyyy 372: ENDIF 373: MOV TEMP[10].x, TEMP[10].xxxx 374: FSGE TEMP[11].x, TEMP[8].yyyy, IMM[0].zzzz 375: UIF TEMP[11].xxxx :0 376: MOV TEMP[11].x, TEMP[5].yyyy 377: ELSE :0 378: MOV TEMP[11].x, IMM[2].yyyy 379: ENDIF 380: MOV TEMP[10].y, TEMP[11].xxxx 381: FSGE TEMP[11].x, TEMP[8].zzzz, IMM[0].zzzz 382: UIF TEMP[11].xxxx :0 383: MOV TEMP[5].x, TEMP[5].zzzz 384: ELSE :0 385: MOV TEMP[5].x, IMM[2].yyyy 386: ENDIF 387: MOV TEMP[10].z, TEMP[5].xxxx 388: FSGE TEMP[5].x, TEMP[8].wwww, IMM[0].zzzz 389: UIF TEMP[5].xxxx :0 390: ELSE :0 391: ENDIF 392: MOV TEMP[2].xyz, TEMP[10].xyzx 393: MAD TEMP[4].xyz, TEMP[4].yzww, TEMP[6].xxxx, TEMP[2] 394: MOV TEMP[2].xyz, TEMP[4].xyzx 395: MAD TEMP[4].xyz, TEMP[13], CONST[14].xxxx, TEMP[2] 396: MOV TEMP[2].xyz, TEMP[4].xyzx 397: MOV TEMP[4].xy, IN[3].xyyy 398: TEX TEMP[4].w, TEMP[4], SAMP[5], 2D 399: MOV TEMP[9].w, TEMP[4].wwww 400: MAD TEMP[3].y, TEMP[3].wwww, TEMP[12].xxxx, IMM[2].xxxx 401: MUL TEMP[3].xyz, TEMP[3].yyyy, TEMP[2] 402: ADD TEMP[5], TEMP[13].wwww, IMM[0].xxxx 403: MOV_SAT TEMP[5], TEMP[5] 404: MUL TEMP[4].w, TEMP[4].wwww, TEMP[5].yyyy 405: MOV TEMP[4].w, TEMP[4].wwww 406: ABS TEMP[5].x, TEMP[3].xxxx 407: LG2 TEMP[7].x, TEMP[5].xxxx 408: ABS TEMP[5].x, TEMP[3].yyyy 409: LG2 TEMP[5].x, TEMP[5].xxxx 410: MOV TEMP[7].y, TEMP[5].xxxx 411: ABS TEMP[3].x, TEMP[3].zzzz 412: LG2 TEMP[3].x, TEMP[3].xxxx 413: MOV TEMP[7].z, TEMP[3].xxxx 414: MUL TEMP[3].xyz, TEMP[7], IMM[5].yyyy 415: EX2 TEMP[7].x, TEMP[3].xxxx 416: EX2 TEMP[5].x, TEMP[3].yyyy 417: MOV TEMP[7].y, TEMP[5].xxxx 418: EX2 TEMP[3].x, TEMP[3].zzzz 419: MOV TEMP[7].z, TEMP[3].xxxx 420: MOV TEMP[3].xyz, TEMP[7].xyzz 421: TEX TEMP[3], TEMP[3], SAMP[11], 3D 422: MOV TEMP[2].w, TEMP[3].wwww 423: MAD TEMP[5].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 424: MOV TEMP[5].xy, TEMP[5].xyyy 425: TEX TEMP[5], TEMP[5], SAMP[8], 2D 426: MOV TEMP[7].w, TEMP[5].wwww 427: ADD TEMP[6].y, -TEMP[5].wwww, IMM[0].xxxx 428: MAD TEMP[3].xyz, TEMP[3], TEMP[6].yyyy, TEMP[5] 429: MOV TEMP[2].xyz, TEMP[3].xyzx 430: LRP TEMP[2].xyz, TEMP[0].xxxx, TEMP[2], IMM[5].zzzz 431: MOV TEMP[7].xyz, TEMP[2].xyzx 432: MOV TEMP[2].xyz, TEMP[2].xyzz 433: TEX TEMP[2], TEMP[2], SAMP[10], 3D 434: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 435: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[2], TEMP[7] 436: MOV TEMP[9].xyz, TEMP[0].xyzx 437: RCP TEMP[1].x, IN[5].zzzz 438: MUL TEMP[2].x, TEMP[1].xxxx, IN[5].yyyy 439: MAD TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx, IMM[2].xxxx 440: MOV TEMP[1].x, TEMP[2].xxxx 441: MOV TEMP[1].y, CONST[7].wwww 442: MOV TEMP[2].xy, TEMP[1].xyyy 443: TEX TEMP[2].x, TEMP[2], SAMP[7], 2D 444: MOV TEMP[1].x, TEMP[2].xxxx 445: ADD TEMP[3].yzw, -CONST[8].xxyz, IN[4].xxyz 446: MOV TEMP[1].w, TEMP[3].zyzw 447: DP3 TEMP[5].x, TEMP[3].yzww, TEMP[3].yzww 448: MOV TEMP[1].y, TEMP[5].xxxx 449: MUL TEMP[1].yz, TEMP[1].xyww, CONST[7].xyxw 450: MUL TEMP[5].z, TEMP[1].zzzz, IMM[5].wwww 451: EX2 TEMP[5].x, TEMP[5].zzzz 452: ADD TEMP[5].z, -TEMP[5].xxxx, IMM[0].xxxx 453: MUL TEMP[1].y, TEMP[5].zzzz, TEMP[1].yyyy 454: RCP TEMP[3].x, TEMP[3].wwww 455: MUL TEMP[1].y, TEMP[3].xxxx, TEMP[1].yyyy 456: MUL TEMP[1].y, TEMP[1].yyyy, IMM[5].wwww 457: EX2 TEMP[1].x, TEMP[1].yyyy 458: MOV_SAT TEMP[1].x, TEMP[1].xxxx 459: ADD TEMP[1].y, -TEMP[1].xxxx, IMM[0].xxxx 460: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[2].xxxx 461: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[13].xxyz 462: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[9] 463: MOV TEMP[4].xyz, TEMP[0].xyzx 464: MOV OUT[0], TEMP[4] 465: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %69 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %70 = load <8 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %72 = load <4 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %74 = load <8 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %76 = load <4 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %78 = load <8 x i32> addrspace(2)* %77, !tbaa !0 %79 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %80 = load <4 x i32> addrspace(2)* %79, !tbaa !0 %81 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %82 = load <8 x i32> addrspace(2)* %81, !tbaa !0 %83 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %84 = load <4 x i32> addrspace(2)* %83, !tbaa !0 %85 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %86 = load <8 x i32> addrspace(2)* %85, !tbaa !0 %87 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %88 = load <4 x i32> addrspace(2)* %87, !tbaa !0 %89 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %90 = load <8 x i32> addrspace(2)* %89, !tbaa !0 %91 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %92 = load <4 x i32> addrspace(2)* %91, !tbaa !0 %93 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %94 = load <8 x i32> addrspace(2)* %93, !tbaa !0 %95 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %96 = load <4 x i32> addrspace(2)* %95, !tbaa !0 %97 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %98 = load <8 x i32> addrspace(2)* %97, !tbaa !0 %99 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %100 = load <4 x i32> addrspace(2)* %99, !tbaa !0 %101 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %102 = load <8 x i32> addrspace(2)* %101, !tbaa !0 %103 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %104 = load <4 x i32> addrspace(2)* %103, !tbaa !0 %105 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %106 = load <8 x i32> addrspace(2)* %105, !tbaa !0 %107 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %108 = load <4 x i32> addrspace(2)* %107, !tbaa !0 %109 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %110 = load <8 x i32> addrspace(2)* %109, !tbaa !0 %111 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %112 = load <4 x i32> addrspace(2)* %111, !tbaa !0 %113 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 11 %114 = load <8 x i32> addrspace(2)* %113, !tbaa !0 %115 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 11 %116 = load <4 x i32> addrspace(2)* %115, !tbaa !0 %117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %132 = fmul float %130, 1.000000e+00 %133 = fadd float %132, 0.000000e+00 %134 = fmul float %131, -1.000000e+00 %135 = fadd float %134, 1.000000e+00 %136 = bitcast float %133 to i32 %137 = bitcast float %135 to i32 %138 = insertelement <2 x i32> undef, i32 %136, i32 0 %139 = insertelement <2 x i32> %138, i32 %137, i32 1 %140 = bitcast <8 x i32> %106 to <32 x i8> %141 = bitcast <4 x i32> %108 to <16 x i8> %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = extractelement <4 x float> %142, i32 2 %145 = call float @fabs(float %143) %146 = fsub float -0.000000e+00, %145 %147 = fsub float -0.000000e+00, %145 %148 = fsub float -0.000000e+00, %145 %149 = fsub float -0.000000e+00, %145 %150 = fcmp oge float %146, 0.000000e+00 %151 = sext i1 %150 to i32 %152 = bitcast i32 %151 to float %153 = bitcast float %152 to i32 %154 = icmp ne i32 %153, 0 %. = select i1 %154, float -1.000000e+00, float 0.000000e+00 %155 = fcmp oge float %147, 0.000000e+00 %156 = sext i1 %155 to i32 %157 = bitcast i32 %156 to float %158 = bitcast float %157 to i32 %159 = icmp ne i32 %158, 0 %temp16.0 = select i1 %159, float -1.000000e+00, float 0.000000e+00 %160 = fcmp oge float %148, 0.000000e+00 %161 = sext i1 %160 to i32 %162 = bitcast i32 %161 to float %163 = bitcast float %162 to i32 %164 = icmp ne i32 %163, 0 %.166 = select i1 %164, float -1.000000e+00, float 0.000000e+00 %165 = fcmp oge float %149, 0.000000e+00 %166 = sext i1 %165 to i32 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = icmp ne i32 %168, 0 %170 = fcmp olt float %., 0.000000e+00 %171 = sext i1 %170 to i32 %172 = fcmp olt float %temp16.0, 0.000000e+00 %173 = sext i1 %172 to i32 %174 = fcmp olt float %.166, 0.000000e+00 %175 = sext i1 %174 to i32 %176 = bitcast i32 %171 to float %177 = bitcast i32 %173 to float %178 = bitcast i32 %175 to float %179 = bitcast float %176 to i32 %180 = bitcast float %178 to i32 %181 = or i32 %179, %180 %182 = bitcast i32 %181 to float %183 = bitcast float %182 to i32 %184 = bitcast float %177 to i32 %185 = or i32 %183, %184 %186 = bitcast i32 %185 to float %187 = bitcast float %186 to i32 %188 = icmp ne i32 %187, 0 br i1 %188, label %IF110, label %ENDIF109 IF110: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF109 ENDIF109: ; preds = %main_body, %IF110 %189 = bitcast float %123 to i32 %190 = bitcast float %124 to i32 %191 = insertelement <2 x i32> undef, i32 %189, i32 0 %192 = insertelement <2 x i32> %191, i32 %190, i32 1 %193 = bitcast <8 x i32> %94 to <32 x i8> %194 = bitcast <4 x i32> %96 to <16 x i8> %195 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %192, <32 x i8> %193, <16 x i8> %194, i32 2) %196 = extractelement <4 x float> %195, i32 0 %197 = extractelement <4 x float> %195, i32 1 %198 = extractelement <4 x float> %195, i32 2 %199 = call float @llvm.pow.f32(float %196, float 0x40019999A0000000) %200 = call float @llvm.pow.f32(float %197, float 0x40019999A0000000) %201 = call float @llvm.pow.f32(float %198, float 0x40019999A0000000) %202 = fdiv float 1.000000e+00, %24 %203 = fmul float %202, 0x3FD54FDF40000000 %204 = fmul float %203, %203 %205 = bitcast float %117 to i32 %206 = bitcast float %118 to i32 %207 = insertelement <2 x i32> undef, i32 %205, i32 0 %208 = insertelement <2 x i32> %207, i32 %206, i32 1 %209 = bitcast <8 x i32> %70 to <32 x i8> %210 = bitcast <4 x i32> %72 to <16 x i8> %211 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %208, <32 x i8> %209, <16 x i8> %210, i32 2) %212 = extractelement <4 x float> %211, i32 0 %213 = extractelement <4 x float> %211, i32 1 %214 = extractelement <4 x float> %211, i32 2 %215 = extractelement <4 x float> %211, i32 3 %216 = bitcast float %117 to i32 %217 = bitcast float %118 to i32 %218 = insertelement <2 x i32> undef, i32 %216, i32 0 %219 = insertelement <2 x i32> %218, i32 %217, i32 1 %220 = bitcast <8 x i32> %74 to <32 x i8> %221 = bitcast <4 x i32> %76 to <16 x i8> %222 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %219, <32 x i8> %220, <16 x i8> %221, i32 2) %223 = extractelement <4 x float> %222, i32 0 %224 = fmul float %212, 2.000000e+00 %225 = fadd float %224, -1.000000e+00 %226 = fmul float %213, 2.000000e+00 %227 = fadd float %226, -1.000000e+00 %228 = fmul float %203, %225 %229 = fmul float %203, %227 %230 = fmul float %223, 2.000000e+00 %231 = fadd float %230, -1.000000e+00 %232 = bitcast float %119 to i32 %233 = bitcast float %120 to i32 %234 = insertelement <2 x i32> undef, i32 %232, i32 0 %235 = insertelement <2 x i32> %234, i32 %233, i32 1 %236 = bitcast <8 x i32> %78 to <32 x i8> %237 = bitcast <4 x i32> %80 to <16 x i8> %238 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %235, <32 x i8> %236, <16 x i8> %237, i32 2) %239 = extractelement <4 x float> %238, i32 0 %240 = extractelement <4 x float> %238, i32 1 %241 = extractelement <4 x float> %238, i32 2 %242 = extractelement <4 x float> %238, i32 3 %243 = bitcast float %119 to i32 %244 = bitcast float %120 to i32 %245 = insertelement <2 x i32> undef, i32 %243, i32 0 %246 = insertelement <2 x i32> %245, i32 %244, i32 1 %247 = bitcast <8 x i32> %82 to <32 x i8> %248 = bitcast <4 x i32> %84 to <16 x i8> %249 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = fadd float %239, %239 %252 = fadd float %240, %240 %253 = fmul float %251, -1.000000e+00 %254 = fadd float %253, 1.000000e+00 %255 = fmul float %252, 1.000000e+00 %256 = fadd float %255, -1.000000e+00 %257 = fmul float %203, %254 %258 = fmul float %203, %256 %259 = fmul float %225, %203 %260 = fadd float %259, %257 %261 = fmul float %227, %203 %262 = fadd float %261, %258 %263 = fmul float %250, -2.000000e+00 %264 = fadd float %263, 1.000000e+00 %265 = fmul float %204, %241 %266 = fmul float %204, %242 %267 = fmul float %204, %264 %268 = fmul float %214, %204 %269 = fadd float %268, %265 %270 = fmul float %215, %204 %271 = fadd float %270, %266 %272 = fmul float %231, %204 %273 = fadd float %272, %267 %274 = fadd float %228, %228 %275 = fadd float %229, %229 %276 = fmul float %257, %274 %277 = fmul float %258, %275 %278 = fmul float %228, %258 %279 = fmul float %229, %257 %280 = fadd float %278, %279 %281 = fadd float %269, %276 %282 = fadd float %271, %277 %283 = fadd float %273, %280 %284 = bitcast float %121 to i32 %285 = bitcast float %122 to i32 %286 = insertelement <2 x i32> undef, i32 %284, i32 0 %287 = insertelement <2 x i32> %286, i32 %285, i32 1 %288 = bitcast <8 x i32> %78 to <32 x i8> %289 = bitcast <4 x i32> %80 to <16 x i8> %290 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %287, <32 x i8> %288, <16 x i8> %289, i32 2) %291 = extractelement <4 x float> %290, i32 0 %292 = extractelement <4 x float> %290, i32 1 %293 = extractelement <4 x float> %290, i32 2 %294 = extractelement <4 x float> %290, i32 3 %295 = bitcast float %121 to i32 %296 = bitcast float %122 to i32 %297 = insertelement <2 x i32> undef, i32 %295, i32 0 %298 = insertelement <2 x i32> %297, i32 %296, i32 1 %299 = bitcast <8 x i32> %82 to <32 x i8> %300 = bitcast <4 x i32> %84 to <16 x i8> %301 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %298, <32 x i8> %299, <16 x i8> %300, i32 2) %302 = extractelement <4 x float> %301, i32 0 %303 = fmul float %292, 2.000000e+00 %304 = fadd float %303, -1.000000e+00 %305 = fmul float %291, 2.000000e+00 %306 = fadd float %305, -1.000000e+00 %307 = fmul float %203, %304 %308 = fmul float %203, %306 %309 = fmul float %306, %203 %310 = fadd float %309, %260 %311 = fmul float %304, %203 %312 = fadd float %311, %262 %313 = fmul float %302, 2.000000e+00 %314 = fadd float %313, -1.000000e+00 %315 = fmul float %293, %204 %316 = fadd float %315, %281 %317 = fmul float %294, %204 %318 = fadd float %317, %282 %319 = fmul float %314, %204 %320 = fadd float %319, %283 %321 = fmul float %274, %308 %322 = fmul float %275, %307 %323 = fmul float %228, %307 %324 = fmul float %229, %308 %325 = fadd float %323, %324 %326 = fadd float %316, %321 %327 = fadd float %318, %322 %328 = fadd float %320, %325 %329 = fmul float %257, %308 %330 = fmul float %258, %307 %331 = fadd float %329, %329 %332 = fadd float %330, %330 %333 = fmul float %257, %307 %334 = fmul float %258, %308 %335 = fadd float %333, %334 %336 = fadd float %326, %331 %337 = fadd float %327, %332 %338 = fadd float %328, %335 %339 = fsub float -0.000000e+00, %310 %340 = fmul float %310, %339 %341 = fadd float %340, %336 %342 = fsub float -0.000000e+00, %312 %343 = fmul float %312, %342 %344 = fadd float %343, %337 %345 = fsub float -0.000000e+00, %312 %346 = fmul float %310, %345 %347 = fadd float %346, %338 %348 = fmul float %25, %310 %349 = fmul float %26, %312 %350 = fadd float %349, %348 %351 = fmul float %27, 1.000000e+00 %352 = fadd float %350, %351 %353 = call float @llvm.AMDIL.clamp.(float %352, float 0.000000e+00, float 1.000000e+00) %354 = fsub float -0.000000e+00, %125 %355 = fadd float %50, %354 %356 = fsub float -0.000000e+00, %126 %357 = fadd float %51, %356 %358 = fsub float -0.000000e+00, %127 %359 = fadd float %52, %358 %360 = fmul float %355, %355 %361 = fmul float %357, %357 %362 = fadd float %361, %360 %363 = fmul float %359, %359 %364 = fadd float %362, %363 %365 = call float @llvm.maxnum.f32(float %364, float 0x3E7AD7F2A0000000) %366 = call float @llvm.AMDGPU.rsq.clamped.f32(float %365) %367 = fmul float %366, %355 %368 = fmul float %366, %357 %369 = fmul float %366, %359 %370 = fsub float -0.000000e+00, %39 %371 = fmul float %355, %366 %372 = fadd float %371, %370 %373 = fsub float -0.000000e+00, %40 %374 = fmul float %357, %366 %375 = fadd float %374, %373 %376 = fsub float -0.000000e+00, %41 %377 = fmul float %359, %366 %378 = fadd float %377, %376 %379 = fmul float %42, %372 %380 = fadd float %379, %39 %381 = fmul float %42, %375 %382 = fadd float %381, %40 %383 = fmul float %42, %378 %384 = fadd float %383, %41 %385 = fmul float %380, %380 %386 = fmul float %382, %382 %387 = fadd float %386, %385 %388 = fmul float %384, %384 %389 = fadd float %387, %388 %390 = call float @llvm.maxnum.f32(float %389, float 0x3E7AD7F2A0000000) %391 = call float @llvm.AMDGPU.rsq.clamped.f32(float %390) %392 = fmul float %380, %391 %393 = fadd float %392, %25 %394 = fmul float %382, %391 %395 = fadd float %394, %26 %396 = fmul float %384, %391 %397 = fadd float %396, %27 %398 = fdiv float 1.000000e+00, %397 %399 = fsub float -0.000000e+00, %310 %400 = fmul float %393, %398 %401 = fadd float %400, %399 %402 = fsub float -0.000000e+00, %312 %403 = fmul float %395, %398 %404 = fadd float %403, %402 %405 = fdiv float 1.000000e+00, %34 %406 = fadd float %405, %341 %407 = fadd float %405, %344 %408 = fmul float %347, %347 %409 = fsub float -0.000000e+00, %408 %410 = fmul float %406, %407 %411 = fadd float %410, %409 %412 = fmul float %401, %401 %413 = fadd float %347, %347 %414 = fmul float %401, %413 %415 = fsub float -0.000000e+00, %414 %416 = fmul float %406, %404 %417 = fadd float %416, %415 %418 = fmul float %404, %417 %419 = fmul float %412, %407 %420 = fadd float %419, %418 %421 = fmul float %420, 5.000000e-01 %422 = fdiv float 1.000000e+00, %411 %423 = fmul float %422, %421 %424 = fsub float -0.000000e+00, %411 %425 = fcmp oge float %424, 0.000000e+00 %426 = sext i1 %425 to i32 %427 = bitcast i32 %426 to float %428 = bitcast float %427 to i32 %429 = icmp ne i32 %428, 0 %.167 = select i1 %429, float 1.000000e+00, float -0.000000e+00 %430 = fmul float %421, %422 %431 = fadd float %430, -1.600000e+01 %432 = fcmp oge float %431, 0.000000e+00 %433 = sext i1 %432 to i32 %434 = bitcast i32 %433 to float %435 = bitcast float %434 to i32 %436 = icmp ne i32 %435, 0 %temp68.0 = select i1 %436, float 1.000000e+00, float -0.000000e+00 %437 = fadd float %temp68.0, %.167 %438 = fmul float %423, 0xBFF7154CA0000000 %439 = call float @llvm.AMDIL.exp.(float %438) %440 = call float @llvm.maxnum.f32(float %411, float 0x3E7AD7F2A0000000) %441 = call float @llvm.AMDGPU.rsq.clamped.f32(float %440) %442 = fmul float %441, %439 %443 = fmul float %405, 1.600000e+01 %444 = fadd float %443, %341 %445 = fmul float %405, 1.600000e+01 %446 = fadd float %445, %344 %447 = fsub float -0.000000e+00, %408 %448 = fmul float %444, %446 %449 = fadd float %448, %447 %450 = fsub float -0.000000e+00, %414 %451 = fmul float %444, %404 %452 = fadd float %451, %450 %453 = fmul float %404, %452 %454 = fmul float %412, %446 %455 = fadd float %454, %453 %456 = fmul float %455, 5.000000e-01 %457 = fdiv float 1.000000e+00, %449 %458 = fmul float %456, %457 %459 = fsub float -0.000000e+00, %449 %460 = fcmp oge float %459, 0.000000e+00 %461 = sext i1 %460 to i32 %462 = bitcast i32 %461 to float %463 = bitcast float %462 to i32 %464 = icmp ne i32 %463, 0 %.168 = select i1 %464, float 1.000000e+00, float -0.000000e+00 %465 = fmul float %456, %457 %466 = fadd float %465, -1.600000e+01 %467 = fcmp oge float %466, 0.000000e+00 %468 = sext i1 %467 to i32 %469 = bitcast i32 %468 to float %470 = bitcast float %469 to i32 %471 = icmp ne i32 %470, 0 %temp44.0 = select i1 %471, float 1.000000e+00, float -0.000000e+00 %472 = fadd float %temp44.0, %.168 %473 = fmul float %458, 0xBFF7154CA0000000 %474 = call float @llvm.maxnum.f32(float %449, float 0x3E7AD7F2A0000000) %475 = call float @llvm.AMDGPU.rsq.clamped.f32(float %474) %476 = call float @llvm.AMDIL.exp.(float %473) %477 = fmul float %475, %476 %478 = fmul float %477, 0x3F747AE140000000 %479 = fsub float -0.000000e+00, %43 %480 = fmul float %355, %366 %481 = fadd float %480, %479 %482 = fsub float -0.000000e+00, %44 %483 = fmul float %357, %366 %484 = fadd float %483, %482 %485 = fsub float -0.000000e+00, %45 %486 = fmul float %359, %366 %487 = fadd float %486, %485 %488 = fmul float %46, %481 %489 = fadd float %488, %43 %490 = fmul float %46, %484 %491 = fadd float %490, %44 %492 = fmul float %46, %487 %493 = fadd float %492, %45 %494 = fmul float %489, %489 %495 = fmul float %491, %491 %496 = fadd float %495, %494 %497 = fmul float %493, %493 %498 = fadd float %496, %497 %499 = call float @llvm.maxnum.f32(float %498, float 0x3E7AD7F2A0000000) %500 = call float @llvm.AMDGPU.rsq.clamped.f32(float %499) %501 = fmul float %489, %500 %502 = fadd float %501, %28 %503 = fmul float %491, %500 %504 = fadd float %503, %29 %505 = fmul float %493, %500 %506 = fadd float %505, %30 %507 = fdiv float 1.000000e+00, %506 %508 = fsub float -0.000000e+00, %310 %509 = fmul float %502, %507 %510 = fadd float %509, %508 %511 = fsub float -0.000000e+00, %312 %512 = fmul float %504, %507 %513 = fadd float %512, %511 %514 = fdiv float 1.000000e+00, %38 %515 = fadd float %514, %341 %516 = fadd float %514, %344 %517 = fsub float -0.000000e+00, %408 %518 = fmul float %515, %516 %519 = fadd float %518, %517 %520 = fmul float %510, %510 %521 = fmul float %413, %510 %522 = fsub float -0.000000e+00, %521 %523 = fmul float %515, %513 %524 = fadd float %523, %522 %525 = fmul float %513, %524 %526 = fmul float %520, %516 %527 = fadd float %526, %525 %528 = fmul float %527, 5.000000e-01 %529 = fdiv float 1.000000e+00, %519 %530 = fmul float %529, %528 %531 = fsub float -0.000000e+00, %519 %532 = fcmp oge float %531, 0.000000e+00 %533 = sext i1 %532 to i32 %534 = bitcast i32 %533 to float %535 = bitcast float %534 to i32 %536 = icmp ne i32 %535, 0 %.169 = select i1 %536, float 1.000000e+00, float -0.000000e+00 %537 = fmul float %528, %529 %538 = fadd float %537, -1.600000e+01 %539 = fcmp oge float %538, 0.000000e+00 %540 = sext i1 %539 to i32 %541 = bitcast i32 %540 to float %542 = bitcast float %541 to i32 %543 = icmp ne i32 %542, 0 %temp20.0 = select i1 %543, float 1.000000e+00, float -0.000000e+00 %544 = fadd float %temp20.0, %.169 %545 = fmul float %530, 0xBFF7154CA0000000 %546 = call float @llvm.AMDIL.exp.(float %545) %547 = call float @llvm.maxnum.f32(float %519, float 0x3E7AD7F2A0000000) %548 = call float @llvm.AMDGPU.rsq.clamped.f32(float %547) %549 = fmul float %546, %548 %550 = fmul float %56, %126 %551 = fmul float %57, %126 %552 = fmul float %58, %126 %553 = fmul float %125, %53 %554 = fadd float %553, %550 %555 = fmul float %125, %54 %556 = fadd float %555, %551 %557 = fmul float %125, %55 %558 = fadd float %557, %552 %559 = fmul float %127, %59 %560 = fadd float %559, %554 %561 = fmul float %127, %60 %562 = fadd float %561, %556 %563 = fmul float %127, %61 %564 = fadd float %563, %558 %565 = fadd float %560, %62 %566 = fadd float %562, %63 %567 = fadd float %564, %64 %568 = fdiv float 1.000000e+00, %567 %569 = fmul float %568, %565 %570 = fmul float %568, %566 %571 = fmul float %569, 5.000000e-01 %572 = fadd float %571, -5.000000e-01 %573 = fmul float %570, -5.000000e-01 %574 = fadd float %573, -5.000000e-01 %575 = fmul float %341, 6.000000e+00 %576 = fadd float %575, %572 %577 = fmul float %344, 6.000000e+00 %578 = fadd float %577, %574 %579 = fmul float %519, 2.000000e+00 %580 = fadd float %579, %576 %581 = fmul float %519, 2.000000e+00 %582 = fadd float %581, %578 %583 = bitcast float %580 to i32 %584 = bitcast float %582 to i32 %585 = insertelement <2 x i32> undef, i32 %583, i32 0 %586 = insertelement <2 x i32> %585, i32 %584, i32 1 %587 = bitcast <8 x i32> %86 to <32 x i8> %588 = bitcast <4 x i32> %88 to <16 x i8> %589 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %586, <32 x i8> %587, <16 x i8> %588, i32 2) %590 = extractelement <4 x float> %589, i32 0 %591 = extractelement <4 x float> %589, i32 1 %592 = extractelement <4 x float> %589, i32 2 %593 = extractelement <4 x float> %589, i32 3 %594 = fmul float %519, 2.000000e+00 %595 = fadd float %594, %576 %596 = fmul float %519, -2.000000e+00 %597 = fadd float %596, %578 %598 = bitcast float %595 to i32 %599 = bitcast float %597 to i32 %600 = insertelement <2 x i32> undef, i32 %598, i32 0 %601 = insertelement <2 x i32> %600, i32 %599, i32 1 %602 = bitcast <8 x i32> %86 to <32 x i8> %603 = bitcast <4 x i32> %88 to <16 x i8> %604 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %601, <32 x i8> %602, <16 x i8> %603, i32 2) %605 = extractelement <4 x float> %604, i32 0 %606 = extractelement <4 x float> %604, i32 1 %607 = extractelement <4 x float> %604, i32 2 %608 = extractelement <4 x float> %604, i32 3 %609 = fmul float %605, 2.500000e-01 %610 = fmul float %606, 2.500000e-01 %611 = fmul float %607, 2.500000e-01 %612 = fmul float %608, 2.500000e-01 %613 = fmul float %590, 2.500000e-01 %614 = fadd float %613, %609 %615 = fmul float %591, 2.500000e-01 %616 = fadd float %615, %610 %617 = fmul float %592, 2.500000e-01 %618 = fadd float %617, %611 %619 = fmul float %593, 2.500000e-01 %620 = fadd float %619, %612 %621 = fmul float %519, -2.000000e+00 %622 = fadd float %621, %576 %623 = fmul float %519, 6.000000e+00 %624 = fadd float %623, %578 %625 = bitcast float %622 to i32 %626 = bitcast float %624 to i32 %627 = insertelement <2 x i32> undef, i32 %625, i32 0 %628 = insertelement <2 x i32> %627, i32 %626, i32 1 %629 = bitcast <8 x i32> %86 to <32 x i8> %630 = bitcast <4 x i32> %88 to <16 x i8> %631 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %628, <32 x i8> %629, <16 x i8> %630, i32 2) %632 = extractelement <4 x float> %631, i32 0 %633 = extractelement <4 x float> %631, i32 1 %634 = extractelement <4 x float> %631, i32 2 %635 = extractelement <4 x float> %631, i32 3 %636 = fmul float %632, 2.500000e-01 %637 = fadd float %636, %614 %638 = fmul float %633, 2.500000e-01 %639 = fadd float %638, %616 %640 = fmul float %634, 2.500000e-01 %641 = fadd float %640, %618 %642 = fmul float %635, 2.500000e-01 %643 = fadd float %642, %620 %644 = fmul float %519, -2.000000e+00 %645 = fadd float %644, %576 %646 = fmul float %519, -2.000000e+00 %647 = fadd float %646, %578 %648 = bitcast float %645 to i32 %649 = bitcast float %647 to i32 %650 = insertelement <2 x i32> undef, i32 %648, i32 0 %651 = insertelement <2 x i32> %650, i32 %649, i32 1 %652 = bitcast <8 x i32> %86 to <32 x i8> %653 = bitcast <4 x i32> %88 to <16 x i8> %654 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %651, <32 x i8> %652, <16 x i8> %653, i32 2) %655 = extractelement <4 x float> %654, i32 0 %656 = extractelement <4 x float> %654, i32 1 %657 = extractelement <4 x float> %654, i32 2 %658 = extractelement <4 x float> %654, i32 3 %659 = fmul float %655, 2.500000e-01 %660 = fadd float %659, %637 %661 = fmul float %656, 2.500000e-01 %662 = fadd float %661, %639 %663 = fmul float %657, 2.500000e-01 %664 = fadd float %663, %641 %665 = fmul float %658, 2.500000e-01 %666 = fadd float %665, %643 %667 = call float @fabs(float %660) %668 = call float @llvm.log2.f32(float %667) %669 = call float @fabs(float %662) %670 = call float @llvm.log2.f32(float %669) %671 = call float @fabs(float %664) %672 = call float @llvm.log2.f32(float %671) %673 = fmul float %668, 0x40019999A0000000 %674 = fmul float %670, 0x40019999A0000000 %675 = fmul float %672, 0x40019999A0000000 %676 = call float @llvm.AMDIL.exp.(float %673) %677 = call float @llvm.AMDIL.exp.(float %674) %678 = call float @llvm.AMDIL.exp.(float %675) %679 = fmul float %310, %367 %680 = fmul float %312, %368 %681 = fadd float %680, %679 %682 = fmul float 1.000000e+00, %369 %683 = fadd float %681, %682 %684 = fsub float -0.000000e+00, %683 %685 = fadd float %684, 1.000000e+00 %686 = fmul float %685, %685 %687 = fmul float %686, %686 %688 = fmul float %685, %687 %689 = fmul float %442, %31 %690 = fmul float %442, %32 %691 = fmul float %442, %33 %692 = fsub float -0.000000e+00, %437 %693 = fsub float -0.000000e+00, %437 %694 = fsub float -0.000000e+00, %437 %695 = fsub float -0.000000e+00, %437 %696 = fcmp oge float %692, 0.000000e+00 %697 = sext i1 %696 to i32 %698 = bitcast i32 %697 to float %699 = bitcast float %698 to i32 %700 = icmp ne i32 %699, 0 %.170 = select i1 %700, float %689, float -0.000000e+00 %701 = fcmp oge float %693, 0.000000e+00 %702 = sext i1 %701 to i32 %703 = bitcast i32 %702 to float %704 = bitcast float %703 to i32 %705 = icmp ne i32 %704, 0 %temp76.0 = select i1 %705, float %690, float -0.000000e+00 %706 = fcmp oge float %694, 0.000000e+00 %707 = sext i1 %706 to i32 %708 = bitcast i32 %707 to float %709 = bitcast float %708 to i32 %710 = icmp ne i32 %709, 0 %.171 = select i1 %710, float %691, float -0.000000e+00 %711 = fcmp oge float %695, 0.000000e+00 %712 = sext i1 %711 to i32 %713 = bitcast i32 %712 to float %714 = bitcast float %713 to i32 %715 = icmp ne i32 %714, 0 %716 = fmul float %549, %35 %717 = fmul float %549, %36 %718 = fmul float %549, %37 %719 = fmul float %716, 0x3FE99999A0000000 %720 = fmul float %717, 0x3FE99999A0000000 %721 = fmul float %718, 0x3FE99999A0000000 %722 = fsub float -0.000000e+00, %544 %723 = fsub float -0.000000e+00, %544 %724 = fsub float -0.000000e+00, %544 %725 = fcmp oge float %722, 0.000000e+00 %726 = sext i1 %725 to i32 %727 = bitcast i32 %726 to float %728 = bitcast float %727 to i32 %729 = icmp ne i32 %728, 0 %temp64.1 = select i1 %729, float %719, float -0.000000e+00 %730 = fcmp oge float %723, 0.000000e+00 %731 = sext i1 %730 to i32 %732 = bitcast i32 %731 to float %733 = bitcast float %732 to i32 %734 = icmp ne i32 %733, 0 %.172 = select i1 %734, float %720, float -0.000000e+00 %735 = fcmp oge float %724, 0.000000e+00 %736 = sext i1 %735 to i32 %737 = bitcast i32 %736 to float %738 = bitcast float %737 to i32 %739 = icmp ne i32 %738, 0 %temp40.0 = select i1 %739, float %721, float -0.000000e+00 %740 = fadd float %.170, %temp64.1 %741 = fadd float %temp76.0, %.172 %742 = fadd float %.171, %temp40.0 %743 = fmul float %740, %688 %744 = fadd float %743, %199 %745 = fmul float %741, %688 %746 = fadd float %745, %200 %747 = fmul float %742, %688 %748 = fadd float %747, %201 %749 = fmul float %199, %31 %750 = fmul float %200, %32 %751 = fmul float %201, %33 %752 = fmul float %478, %749 %753 = fmul float %478, %750 %754 = fmul float %478, %751 %755 = fsub float -0.000000e+00, %472 %756 = fsub float -0.000000e+00, %472 %757 = fsub float -0.000000e+00, %472 %758 = fsub float -0.000000e+00, %472 %759 = fcmp oge float %755, 0.000000e+00 %760 = sext i1 %759 to i32 %761 = bitcast i32 %760 to float %762 = bitcast float %761 to i32 %763 = icmp ne i32 %762, 0 %.173 = select i1 %763, float %752, float -0.000000e+00 %764 = fcmp oge float %756, 0.000000e+00 %765 = sext i1 %764 to i32 %766 = bitcast i32 %765 to float %767 = bitcast float %766 to i32 %768 = icmp ne i32 %767, 0 %temp44.1 = select i1 %768, float %753, float -0.000000e+00 %769 = fcmp oge float %757, 0.000000e+00 %770 = sext i1 %769 to i32 %771 = bitcast i32 %770 to float %772 = bitcast float %771 to i32 %773 = icmp ne i32 %772, 0 %.174 = select i1 %773, float %754, float -0.000000e+00 %774 = fcmp oge float %758, 0.000000e+00 %775 = sext i1 %774 to i32 %776 = bitcast i32 %775 to float %777 = bitcast float %776 to i32 %778 = icmp ne i32 %777, 0 %779 = fmul float %744, %353 %780 = fadd float %779, %.173 %781 = fmul float %746, %353 %782 = fadd float %781, %temp44.1 %783 = fmul float %748, %353 %784 = fadd float %783, %.174 %785 = fmul float %676, %68 %786 = fadd float %785, %780 %787 = fmul float %677, %68 %788 = fadd float %787, %782 %789 = fmul float %678, %68 %790 = fadd float %789, %784 %791 = bitcast float %123 to i32 %792 = bitcast float %124 to i32 %793 = insertelement <2 x i32> undef, i32 %791, i32 0 %794 = insertelement <2 x i32> %793, i32 %792, i32 1 %795 = bitcast <8 x i32> %90 to <32 x i8> %796 = bitcast <4 x i32> %92 to <16 x i8> %797 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %794, <32 x i8> %795, <16 x i8> %796, i32 2) %798 = extractelement <4 x float> %797, i32 3 %799 = fmul float %685, %687 %800 = fadd float %799, 5.000000e-01 %801 = fmul float %800, %786 %802 = fmul float %800, %788 %803 = fmul float %800, %790 %804 = fadd float %666, 1.000000e+00 %805 = fadd float %666, 1.000000e+00 %806 = fadd float %666, 1.000000e+00 %807 = fadd float %666, 1.000000e+00 %808 = call float @llvm.AMDIL.clamp.(float %804, float 0.000000e+00, float 1.000000e+00) %809 = call float @llvm.AMDIL.clamp.(float %805, float 0.000000e+00, float 1.000000e+00) %810 = call float @llvm.AMDIL.clamp.(float %806, float 0.000000e+00, float 1.000000e+00) %811 = call float @llvm.AMDIL.clamp.(float %807, float 0.000000e+00, float 1.000000e+00) %812 = fmul float %798, %809 %813 = call float @fabs(float %801) %814 = call float @llvm.log2.f32(float %813) %815 = call float @fabs(float %802) %816 = call float @llvm.log2.f32(float %815) %817 = call float @fabs(float %803) %818 = call float @llvm.log2.f32(float %817) %819 = fmul float %814, 0x3FDD1743E0000000 %820 = fmul float %816, 0x3FDD1743E0000000 %821 = fmul float %818, 0x3FDD1743E0000000 %822 = call float @llvm.AMDIL.exp.(float %819) %823 = call float @llvm.AMDIL.exp.(float %820) %824 = call float @llvm.AMDIL.exp.(float %821) %825 = bitcast float %822 to i32 %826 = bitcast float %823 to i32 %827 = bitcast float %824 to i32 %828 = insertelement <4 x i32> undef, i32 %825, i32 0 %829 = insertelement <4 x i32> %828, i32 %826, i32 1 %830 = insertelement <4 x i32> %829, i32 %827, i32 2 %831 = insertelement <4 x i32> %830, i32 undef, i32 3 %832 = bitcast <8 x i32> %114 to <32 x i8> %833 = bitcast <4 x i32> %116 to <16 x i8> %834 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %831, <32 x i8> %832, <16 x i8> %833, i32 3) %835 = extractelement <4 x float> %834, i32 0 %836 = extractelement <4 x float> %834, i32 1 %837 = extractelement <4 x float> %834, i32 2 %838 = fmul float %130, 1.000000e+00 %839 = fadd float %838, 0.000000e+00 %840 = fmul float %131, -1.000000e+00 %841 = fadd float %840, 1.000000e+00 %842 = bitcast float %839 to i32 %843 = bitcast float %841 to i32 %844 = insertelement <2 x i32> undef, i32 %842, i32 0 %845 = insertelement <2 x i32> %844, i32 %843, i32 1 %846 = bitcast <8 x i32> %102 to <32 x i8> %847 = bitcast <4 x i32> %104 to <16 x i8> %848 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %845, <32 x i8> %846, <16 x i8> %847, i32 2) %849 = extractelement <4 x float> %848, i32 0 %850 = extractelement <4 x float> %848, i32 1 %851 = extractelement <4 x float> %848, i32 2 %852 = extractelement <4 x float> %848, i32 3 %853 = fsub float -0.000000e+00, %852 %854 = fadd float %853, 1.000000e+00 %855 = fmul float %835, %854 %856 = fadd float %855, %849 %857 = fmul float %836, %854 %858 = fadd float %857, %850 %859 = fmul float %837, %854 %860 = fadd float %859, %851 %861 = call float @llvm.AMDGPU.lrp(float %143, float %856, float 0x3FD99999A0000000) %862 = call float @llvm.AMDGPU.lrp(float %143, float %858, float 0x3FD99999A0000000) %863 = call float @llvm.AMDGPU.lrp(float %143, float %860, float 0x3FD99999A0000000) %864 = bitcast float %861 to i32 %865 = bitcast float %862 to i32 %866 = bitcast float %863 to i32 %867 = insertelement <4 x i32> undef, i32 %864, i32 0 %868 = insertelement <4 x i32> %867, i32 %865, i32 1 %869 = insertelement <4 x i32> %868, i32 %866, i32 2 %870 = insertelement <4 x i32> %869, i32 undef, i32 3 %871 = bitcast <8 x i32> %110 to <32 x i8> %872 = bitcast <4 x i32> %112 to <16 x i8> %873 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %870, <32 x i8> %871, <16 x i8> %872, i32 3) %874 = extractelement <4 x float> %873, i32 0 %875 = extractelement <4 x float> %873, i32 1 %876 = extractelement <4 x float> %873, i32 2 %877 = fsub float -0.000000e+00, %143 %878 = fmul float %144, %877 %879 = fadd float %878, %143 %880 = call float @llvm.AMDGPU.lrp(float %879, float %874, float %861) %881 = call float @llvm.AMDGPU.lrp(float %879, float %875, float %862) %882 = call float @llvm.AMDGPU.lrp(float %879, float %876, float %863) %883 = fdiv float 1.000000e+00, %129 %884 = fmul float %883, %128 %885 = fmul float %884, 5.000000e-01 %886 = fadd float %885, 5.000000e-01 %887 = bitcast float %886 to i32 %888 = bitcast float %49 to i32 %889 = insertelement <2 x i32> undef, i32 %887, i32 0 %890 = insertelement <2 x i32> %889, i32 %888, i32 1 %891 = bitcast <8 x i32> %98 to <32 x i8> %892 = bitcast <4 x i32> %100 to <16 x i8> %893 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %890, <32 x i8> %891, <16 x i8> %892, i32 2) %894 = extractelement <4 x float> %893, i32 0 %895 = fsub float -0.000000e+00, %50 %896 = fadd float %895, %125 %897 = fsub float -0.000000e+00, %51 %898 = fadd float %897, %126 %899 = fsub float -0.000000e+00, %52 %900 = fadd float %899, %127 %901 = fmul float %896, %896 %902 = fmul float %898, %898 %903 = fadd float %902, %901 %904 = fmul float %900, %900 %905 = fadd float %903, %904 %906 = fmul float %905, %48 %907 = fmul float %900, %47 %908 = fmul float %907, 0x3FF7154CA0000000 %909 = call float @llvm.AMDIL.exp.(float %908) %910 = fsub float -0.000000e+00, %909 %911 = fadd float %910, 1.000000e+00 %912 = fmul float %911, %906 %913 = fdiv float 1.000000e+00, %900 %914 = fmul float %913, %912 %915 = fmul float %914, 0x3FF7154CA0000000 %916 = call float @llvm.AMDIL.exp.(float %915) %917 = call float @llvm.AMDIL.clamp.(float %916, float 0.000000e+00, float 1.000000e+00) %918 = fsub float -0.000000e+00, %917 %919 = fadd float %918, 1.000000e+00 %920 = fmul float %919, %894 %921 = fsub float -0.000000e+00, %880 %922 = fadd float %921, %65 %923 = fsub float -0.000000e+00, %881 %924 = fadd float %923, %66 %925 = fsub float -0.000000e+00, %882 %926 = fadd float %925, %67 %927 = fmul float %920, %922 %928 = fadd float %927, %880 %929 = fmul float %920, %924 %930 = fadd float %929, %881 %931 = fmul float %920, %926 %932 = fadd float %931, %882 %933 = call i32 @llvm.SI.packf16(float %928, float %930) %934 = bitcast i32 %933 to float %935 = call i32 @llvm.SI.packf16(float %932, float %812) %936 = bitcast i32 %935 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %934, float %936, float %934, float %936) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 vcc, s[6:7] ; BEEA0406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v11, v0, 1, 6, [m0] ; C82C1900 v_interp_p2_f32 v11, [v11], v1, 1, 6, [m0] ; C82D1901 v_sub_f32_e32 v3, 1.0, v11 ; 080616F2 v_interp_p1_f32 v13, v0, 0, 6, [m0] ; C8341800 v_interp_p2_f32 v13, [v13], v1, 0, 6, [m0] ; C8351801 v_add_f32_e32 v2, 0, v13 ; 06041A80 s_load_dwordx4 s[8:11], s[4:5], 0x24 ; C0840524 s_load_dwordx8 s[12:19], vcc, 0x48 ; C0C66B48 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:9], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800F00 00430602 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_ge_f32_e64 s[0:1], -|v6|, 0 ; D00C0100 20010106 v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; D2000002 00018280 v_cmp_ne_i32_e64 s[0:1], v2, 0 ; D10A0000 00010102 v_cndmask_b32_e64 v2, 0, -1.0, s[0:1] ; D2000002 0001E680 v_cmp_lt_f32_e64 s[12:13], v2, 0 ; D002000C 00010102 v_interp_p1_f32 v4, v0, 2, 5, [m0] ; C8101600 v_interp_p2_f32 v4, [v4], v1, 2, 5, [m0] ; C8111601 v_interp_p1_f32 v5, v0, 1, 5, [m0] ; C8141500 v_interp_p2_f32 v5, [v5], v1, 1, 5, [m0] ; C8151501 v_interp_p1_f32 v10, v0, 2, 4, [m0] ; C8281200 v_interp_p2_f32 v10, [v10], v1, 2, 4, [m0] ; C8291201 v_interp_p1_f32 v12, v0, 1, 4, [m0] ; C8301100 v_interp_p2_f32 v12, [v12], v1, 1, 4, [m0] ; C8311101 v_interp_p1_f32 v14, v0, 0, 4, [m0] ; C8381000 v_interp_p2_f32 v14, [v14], v1, 0, 4, [m0] ; C8391001 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_interp_p1_f32 v2, v0, 0, 3, [m0] ; C8080C00 v_interp_p2_f32 v2, [v2], v1, 0, 3, [m0] ; C8090C01 v_interp_p1_f32 v17, v0, 1, 2, [m0] ; C8440900 v_interp_p2_f32 v17, [v17], v1, 1, 2, [m0] ; C8450901 v_interp_p1_f32 v16, v0, 0, 2, [m0] ; C8400800 v_interp_p2_f32 v16, [v16], v1, 0, 2, [m0] ; C8410801 v_interp_p1_f32 v21, v0, 1, 1, [m0] ; C8540500 v_interp_p2_f32 v21, [v21], v1, 1, 1, [m0] ; C8550501 v_interp_p1_f32 v20, v0, 0, 1, [m0] ; C8500400 v_interp_p2_f32 v20, [v20], v1, 0, 1, [m0] ; C8510401 v_interp_p1_f32 v23, v0, 1, 0, [m0] ; C85C0100 v_interp_p2_f32 v23, [v23], v1, 1, 0, [m0] ; C85D0101 v_interp_p1_f32 v22, v0, 0, 0, [m0] ; C8580000 v_interp_p2_f32 v22, [v22], v1, 0, 0, [m0] ; C8590001 s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s49, s[20:23], 0x38 ; C2189538 s_buffer_load_dword s0, s[20:23], 0x36 ; C2001536 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 0 ; 04750000 s_buffer_load_dword s0, s[20:23], 0x35 ; C2001535 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 2 ; 04750400 s_buffer_load_dword s0, s[20:23], 0x34 ; C2001534 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 1 ; 04750200 s_buffer_load_dword s0, s[20:23], 0x33 ; C2001533 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 45 ; 04755A00 s_buffer_load_dword s0, s[20:23], 0x31 ; C2001531 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 49 ; 04756200 s_buffer_load_dword s0, s[20:23], 0x30 ; C2001530 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 43 ; 04755600 s_buffer_load_dword s0, s[20:23], 0x2f ; C200152F s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 48 ; 04756000 s_buffer_load_dword s0, s[20:23], 0x2d ; C200152D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 50 ; 04756400 s_buffer_load_dword s0, s[20:23], 0x2c ; C200152C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 44 ; 04755800 s_buffer_load_dword s19, s[20:23], 0x2b ; C209952B s_buffer_load_dword s33, s[20:23], 0x29 ; C2109529 s_buffer_load_dword s0, s[20:23], 0x28 ; C2001528 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 46 ; 04755C00 s_buffer_load_dword s32, s[20:23], 0x27 ; C2101527 s_buffer_load_dword s64, s[20:23], 0x25 ; C2201525 s_buffer_load_dword s0, s[20:23], 0x24 ; C2001524 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 47 ; 04755E00 s_buffer_load_dword s16, s[20:23], 0x22 ; C2081522 s_buffer_load_dword s10, s[20:23], 0x21 ; C2051521 s_buffer_load_dword s11, s[20:23], 0x20 ; C2059520 s_buffer_load_dword s35, s[20:23], 0x1f ; C211951F s_buffer_load_dword s0, s[20:23], 0x1d ; C200151D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 3 ; 04750600 s_buffer_load_dword s0, s[20:23], 0x1c ; C200151C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 4 ; 04750800 s_buffer_load_dword s99, s[20:23], 0x1b ; C231951B s_buffer_load_dword s100, s[20:23], 0x1a ; C232151A s_buffer_load_dword s48, s[20:23], 0x19 ; C2181519 s_buffer_load_dword s9, s[20:23], 0x18 ; C2049518 s_buffer_load_dword s81, s[20:23], 0x17 ; C2289517 s_buffer_load_dword s82, s[20:23], 0x16 ; C2291516 s_buffer_load_dword s96, s[20:23], 0x15 ; C2301515 s_buffer_load_dword s83, s[20:23], 0x14 ; C2299514 s_buffer_load_dword s17, s[20:23], 0x13 ; C2089513 s_buffer_load_dword s98, s[20:23], 0x12 ; C2311512 s_buffer_load_dword s0, s[20:23], 0x11 ; C2001511 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 42 ; 04755400 s_buffer_load_dword s0, s[20:23], 0x10 ; C2001510 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 37 ; 04754A00 s_buffer_load_dword s97, s[20:23], 0xf ; C230950F s_buffer_load_dword s80, s[20:23], 0xe ; C228150E s_buffer_load_dword s51, s[20:23], 0xd ; C219950D s_buffer_load_dword s50, s[20:23], 0xc ; C219150C s_buffer_load_dword s18, s[20:23], 0xa ; C209150A s_buffer_load_dword s101, s[20:23], 0x9 ; C2329509 s_buffer_load_dword s8, s[20:23], 0x8 ; C2041508 s_buffer_load_dword s65, s[20:23], 0x6 ; C2209506 s_buffer_load_dword s66, s[20:23], 0x5 ; C2211505 s_buffer_load_dword s67, s[20:23], 0x4 ; C2219504 s_buffer_load_dword s34, s[20:23], 0x0 ; C2111500 v_mov_b32_e32 v1, s35 ; 7E020223 v_mov_b32_e32 v24, s100 ; 7E300264 v_mov_b32_e32 v26, s48 ; 7E340230 v_mov_b32_e32 v25, s9 ; 7E320209 v_mov_b32_e32 v15, s82 ; 7E1E0252 v_mov_b32_e32 v19, s96 ; 7E260260 v_mov_b32_e32 v18, s83 ; 7E240253 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx4 s[20:23], s[4:5], 0x4 ; C08A0504 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx4 s[52:55], s[4:5], 0xc ; C09A050C s_load_dwordx4 s[68:71], s[4:5], 0x10 ; C0A20510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 5 ; 04750A00 v_writelane_b32 v58, s1, 6 ; 04750C01 v_writelane_b32 v58, s2, 7 ; 04750E02 v_writelane_b32 v58, s3, 8 ; 04751003 s_load_dwordx4 s[0:3], s[4:5], 0x18 ; C0800518 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 51 ; 04756600 v_writelane_b32 v58, s1, 52 ; 04756801 v_writelane_b32 v58, s2, 53 ; 04756A02 v_writelane_b32 v58, s3, 54 ; 04756C03 s_load_dwordx4 s[0:3], s[4:5], 0x1c ; C080051C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 17 ; 04752200 v_writelane_b32 v58, s1, 18 ; 04752401 v_writelane_b32 v58, s2, 19 ; 04752602 v_writelane_b32 v58, s3, 20 ; 04752803 s_load_dwordx4 s[0:3], s[4:5], 0x20 ; C0800520 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 33 ; 04754200 v_writelane_b32 v58, s1, 34 ; 04754401 v_writelane_b32 v58, s2, 35 ; 04754602 v_writelane_b32 v58, s3, 36 ; 04754803 s_load_dwordx4 s[0:3], s[4:5], 0x28 ; C0800528 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 29 ; 04753A00 v_writelane_b32 v58, s1, 30 ; 04753C01 v_writelane_b32 v58, s2, 31 ; 04753E02 v_writelane_b32 v58, s3, 32 ; 04754003 s_load_dwordx4 s[0:3], s[4:5], 0x2c ; C080052C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s0, 38 ; 04754C00 v_writelane_b32 v58, s1, 39 ; 04754E01 v_writelane_b32 v58, s2, 40 ; 04755002 v_writelane_b32 v58, s3, 41 ; 04755203 s_load_dwordx8 s[24:31], vcc, 0x0 ; C0CC6B00 s_load_dwordx8 s[36:43], vcc, 0x8 ; C0D26B08 s_load_dwordx8 s[0:7], vcc, 0x10 ; C0C06B10 s_load_dwordx8 s[56:63], vcc, 0x18 ; C0DC6B18 s_load_dwordx8 s[72:79], vcc, 0x20 ; C0E46B20 s_load_dwordx8 s[88:95], vcc, 0x28 ; C0EC6B28 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s88, 9 ; 04751258 v_writelane_b32 v58, s89, 10 ; 04751459 v_writelane_b32 v58, s90, 11 ; 0475165A v_writelane_b32 v58, s91, 12 ; 0475185B v_writelane_b32 v58, s92, 13 ; 04751A5C v_writelane_b32 v58, s93, 14 ; 04751C5D v_writelane_b32 v58, s94, 15 ; 04751E5E v_writelane_b32 v58, s95, 16 ; 0475205F s_load_dwordx8 s[84:91], vcc, 0x30 ; C0EA6B30 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s84, 55 ; 04756E54 v_writelane_b32 v58, s85, 56 ; 04757055 v_writelane_b32 v58, s86, 57 ; 04757256 v_writelane_b32 v58, s87, 58 ; 04757457 v_writelane_b32 v58, s88, 59 ; 04757658 v_writelane_b32 v58, s89, 60 ; 04757859 v_writelane_b32 v58, s90, 61 ; 04757A5A v_writelane_b32 v58, s91, 62 ; 04757C5B s_load_dwordx8 s[84:91], vcc, 0x38 ; C0EA6B38 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v58, s84, 21 ; 04752A54 v_writelane_b32 v58, s85, 22 ; 04752C55 v_writelane_b32 v58, s86, 23 ; 04752E56 v_writelane_b32 v58, s87, 24 ; 04753057 v_writelane_b32 v58, s88, 25 ; 04753258 v_writelane_b32 v58, s89, 26 ; 04753459 v_writelane_b32 v58, s90, 27 ; 0475365A v_writelane_b32 v58, s91, 28 ; 0475385B image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[0:7], s[44:47] ; F0800F00 01601B14 v_rcp_f32_e32 v31, s34 ; 7E3E5422 v_mul_f32_e32 v31, 0x3eaa7efa, v31 ; 103E3EFF 3EAA7EFA v_mul_f32_e32 v32, v31, v31 ; 10403F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, v30, v32 ; 1042411E image_sample v[34:37], 15, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[24:31], s[12:15] ; F0800F00 00662216 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, v37, v32, v33 ; D2820021 04864125 v_mad_f32 v38, 2.0, v35, -1.0 ; D2820026 03CE46F4 v_mul_f32_e32 v39, v38, v31 ; 104E3F26 v_mad_f32 v40, v31, v38, v39 ; D2820028 049E4D1F v_mad_f32 v41, 2.0, v28, -1.0 ; D2820029 03CE38F4 v_mul_f32_e32 v41, v41, v31 ; 10523F29 v_mad_f32 v33, v41, v40, v33 ; D2820021 04865129 image_sample v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[0:7], s[44:47] ; F0800F00 01602A10 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, v45, v32, v33 ; D2820021 0486412D v_mad_f32 v46, 2.0, v43, -1.0 ; D282002E 03CE56F4 v_mul_f32_e32 v47, v46, v31 ; 105E3F2E v_mad_f32 v33, v40, v47, v33 ; D2820021 04865F28 v_mul_f32_e32 v40, v47, v41 ; 1050532F v_mad_f32 v33, 2.0, v40, v33 ; D2820021 048650F4 v_mad_f32 v38, v31, v38, v41 ; D2820026 04A64D1F v_mad_f32 v38, v31, v46, v38 ; D2820026 049A5D1F v_mad_f32 v33, -v38, v38, v33 ; D2820021 24864D26 v_rcp_f32_e32 v40, s17 ; 7E505411 v_add_f32_e32 v46, v33, v40 ; 065C5121 v_mul_f32_e32 v48, v29, v32 ; 1060411D v_mad_f32 v48, v36, v32, v48 ; D2820030 04C24124 v_mad_f32 v34, 2.0, v34, -1.0 ; D2820022 03CE44F4 v_mul_f32_e32 v35, v34, v31 ; 10463F22 v_mad_f32 v36, v31, v34, v35 ; D2820024 048E451F v_mad_f32 v27, -2.0, v27, 1.0 ; D282001B 03CA36F5 v_mul_f32_e32 v27, v27, v31 ; 10363F1B v_mad_f32 v28, v27, v36, v48 ; D282001C 04C2491B v_mad_f32 v28, v44, v32, v28 ; D282001C 0472412C v_mad_f32 v29, 2.0, v42, -1.0 ; D282001D 03CE54F4 v_mul_f32_e32 v30, v29, v31 ; 103C3F1D v_mad_f32 v28, v36, v30, v28 ; D282001C 04723D24 v_mul_f32_e32 v36, v30, v27 ; 1048371E v_mad_f32 v28, 2.0, v36, v28 ; D282001C 047248F4 v_mad_f32 v34, v31, v34, v27 ; D2820022 046E451F v_mad_f32 v29, v31, v29, v34 ; D282001D 048A3B1F v_mad_f32 v28, -v29, v29, v28 ; D282001C 24723B1D v_add_f32_e32 v31, v28, v40 ; 063E511C v_mul_f32_e32 v34, v30, v39 ; 10444F1E v_mad_f32 v34, v35, v47, v34 ; D2820022 048A5F23 v_mul_f32_e32 v36, v27, v39 ; 10484F1B v_mad_f32 v35, v35, v41, v36 ; D2820023 04925323 image_sample v20, 1, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[56:63], s[52:55] ; F0800100 01AE1414 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, -2.0, v20, 1.0 ; D2820014 03CA28F5 v_mul_f32_e32 v20, v20, v32 ; 10284114 image_sample v21, 1, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[36:43], s[20:23] ; F0800100 00A91516 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v21, 2.0, v21, -1.0 ; D2820015 03CE2AF4 v_mad_f32 v20, v21, v32, v20 ; D2820014 04524115 v_add_f32_e32 v20, v35, v20 ; 06282923 image_sample v16, 1, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[56:63], s[52:55] ; F0800100 01AE1010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, 2.0, v16, -1.0 ; D2820010 03CE20F4 v_mad_f32 v16, v16, v32, v20 ; D2820010 04524110 v_add_f32_e32 v16, v34, v16 ; 06202122 v_mul_f32_e32 v17, v30, v41 ; 1022531E v_mad_f32 v17, v27, v47, v17 ; D2820011 04465F1B v_add_f32_e32 v16, v17, v16 ; 06202111 v_mad_f32 v16, -v29, v38, v16 ; D2820010 24424D1D v_mul_f32_e32 v17, v16, v16 ; 10222110 v_mad_f32 v20, v31, v46, -v17 ; D2820014 84465D1F v_max_f32_e32 v21, 0x33d6bf95, v20 ; 202A28FF 33D6BF95 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_rcp_f32_e32 v22, v20 ; 7E2C5514 v_sub_f32_e32 v23, s10, v12 ; 082E180A v_sub_f32_e32 v27, s11, v14 ; 08361C0B v_mul_f32_e32 v30, v27, v27 ; 103C371B v_mad_f32 v30, v23, v23, v30 ; D282001E 047A2F17 v_sub_f32_e32 v32, s16, v10 ; 08401410 v_mad_f32 v30, v32, v32, v30 ; D282001E 047A4120 v_max_f32_e32 v30, 0x33d6bf95, v30 ; 203C3CFF 33D6BF95 v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E v_mad_f32 v34, v30, v23, -s48 ; D2820022 80C22F1E v_mad_f32 v26, v34, s99, v26 ; D282001A 0468C722 v_mad_f32 v34, v30, v27, -s9 ; D2820022 8026371E v_mad_f32 v25, v34, s99, v25 ; D2820019 0464C722 v_mul_f32_e32 v34, v25, v25 ; 10443319 v_mad_f32 v34, v26, v26, v34 ; D2820022 048A351A v_mad_f32 v35, v30, v32, -s100 ; D2820023 8192411E v_mad_f32 v24, v35, s99, v24 ; D2820018 0460C723 v_mad_f32 v34, v24, v24, v34 ; D2820022 048A3118 v_max_f32_e32 v34, 0x33d6bf95, v34 ; 204444FF 33D6BF95 v_rsq_clamp_f32_e32 v34, v34 ; 7E445922 v_mad_f32 v25, v25, v34, s8 ; D2820019 00224519 v_mad_f32 v24, v24, v34, s18 ; D2820018 004A4518 v_rcp_f32_e32 v24, v24 ; 7E305518 v_mad_f32 v25, v25, v24, -v29 ; D2820019 84763119 v_add_f32_e32 v16, v16, v16 ; 06202110 v_mul_f32_e32 v35, v25, v16 ; 10462119 v_mad_f32 v26, v26, v34, s101 ; D282001A 0196451A v_mad_f32 v24, v26, v24, -v38 ; D2820018 849A311A v_mad_f32 v26, v31, v24, -v35 ; D282001A 848E311F v_mul_f32_e32 v24, v26, v24 ; 1030311A v_mul_f32_e32 v25, v25, v25 ; 10323319 v_mad_f32 v24, v25, v46, v24 ; D2820018 04625D19 v_mul_f32_e32 v24, 0.5, v24 ; 103030F0 v_mul_f32_e32 v25, v24, v22 ; 10322D18 v_mov_b32_e32 v26, 0xbfb8aa65 ; 7E3402FF BFB8AA65 v_mul_f32_e32 v25, v26, v25 ; 1032331A v_exp_f32_e32 v25, v25 ; 7E324B19 v_mul_f32_e32 v21, v21, v25 ; 102A3315 v_mul_f32_e32 v25, s98, v21 ; 10322A62 v_mul_f32_e32 v25, 0x3f4ccccd, v25 ; 103232FF 3F4CCCCD v_cmp_ge_f32_e64 s[0:1], -v20, 0 ; D00C0000 20010114 v_cndmask_b32_e64 v31, 0, -1, s[0:1] ; D200081F 00018280 v_cmp_ne_i32_e64 s[0:1], v31, 0 ; D10A0000 0001011F v_mov_b32_e32 v31, 0x80000000 ; 7E3E02FF 80000000 v_cndmask_b32_e64 v34, v31, 1.0, s[0:1] ; D2000022 1801E51F v_mov_b32_e32 v35, 0xc1800000 ; 7E4602FF C1800000 v_mad_f32 v22, v22, v24, v35 ; D2820016 048E3116 v_cmp_ge_f32_e64 s[0:1], v22, 0 ; D00C0000 00010116 v_cndmask_b32_e64 v22, 0, -1, s[0:1] ; D2000016 00018280 v_cmp_ne_i32_e64 s[0:1], v22, 0 ; D10A0000 00010116 v_cndmask_b32_e64 v22, v31, 1.0, s[0:1] ; D2000016 1801E51F v_add_f32_e32 v22, v34, v22 ; 062C2D22 v_cmp_ge_f32_e64 s[0:1], -v22, 0 ; D00C0000 20010116 v_cndmask_b32_e64 v22, 0, -1, s[0:1] ; D2000016 00018280 v_cmp_ne_i32_e64 s[4:5], v22, 0 ; D10A0004 00010116 v_cndmask_b32_e64 v22, v31, v25, s[4:5] ; D2000016 1812331F v_rcp_f32_e32 v24, s97 ; 7E305461 v_add_f32_e32 v25, v33, v24 ; 06323121 v_add_f32_e32 v34, v28, v24 ; 0644311C v_mad_f32 v36, v34, v25, -v17 ; D2820024 84463322 v_max_f32_e32 v37, 0x33d6bf95, v36 ; 204A48FF 33D6BF95 v_rsq_clamp_f32_e32 v37, v37 ; 7E4A5925 v_rcp_f32_e32 v39, v36 ; 7E4E5524 v_mad_f32 v40, v30, v23, -s96 ; D2820028 81822F1E v_mad_f32 v19, v40, s81, v19 ; D2820013 044CA328 v_mad_f32 v40, v30, v27, -s83 ; D2820028 814E371E v_mad_f32 v18, v40, s81, v18 ; D2820012 0448A328 v_mul_f32_e32 v40, v18, v18 ; 10502512 v_mad_f32 v40, v19, v19, v40 ; D2820028 04A22713 v_mad_f32 v41, v30, v32, -s82 ; D2820029 814A411E v_mad_f32 v15, v41, s81, v15 ; D282000F 043CA329 v_mad_f32 v40, v15, v15, v40 ; D2820028 04A21F0F v_max_f32_e32 v40, 0x33d6bf95, v40 ; 205050FF 33D6BF95 v_rsq_clamp_f32_e32 v40, v40 ; 7E505928 v_mad_f32 v18, v18, v40, s67 ; D2820012 010E5112 v_mad_f32 v15, v15, v40, s65 ; D282000F 0106510F v_rcp_f32_e32 v15, v15 ; 7E1E550F v_mad_f32 v18, v18, v15, -v29 ; D2820012 84761F12 v_mul_f32_e32 v16, v16, v18 ; 10202510 v_mad_f32 v19, v19, v40, s66 ; D2820013 010A5113 v_mad_f32 v15, v19, v15, -v38 ; D282000F 849A1F13 v_mad_f32 v19, v34, v15, -v16 ; D2820013 84421F22 v_mul_f32_e32 v19, v19, v15 ; 10261F13 v_mul_f32_e32 v18, v18, v18 ; 10242512 v_mad_f32 v19, v18, v25, v19 ; D2820013 044E3312 v_mul_f32_e32 v19, 0.5, v19 ; 102626F0 v_mul_f32_e32 v25, v19, v39 ; 10324F13 v_mul_f32_e32 v25, v26, v25 ; 1032331A v_exp_f32_e32 v25, v25 ; 7E324B19 v_mul_f32_e32 v25, v25, v37 ; 10324B19 v_mul_f32_e32 v34, s80, v25 ; 10443250 v_cmp_ge_f32_e64 s[0:1], -v36, 0 ; D00C0000 20010124 v_cndmask_b32_e64 v36, 0, -1, s[0:1] ; D2000024 00018280 v_cmp_ne_i32_e64 s[0:1], v36, 0 ; D10A0000 00010124 v_cndmask_b32_e64 v36, v31, 1.0, s[0:1] ; D2000024 1801E51F v_mad_f32 v19, v39, v19, v35 ; D2820013 048E2727 v_cmp_ge_f32_e64 s[0:1], v19, 0 ; D00C0000 00010113 v_cndmask_b32_e64 v19, 0, -1, s[0:1] ; D2000813 00018280 v_cmp_ne_i32_e64 s[0:1], v19, 0 ; D10A0000 00010113 v_cndmask_b32_e64 v19, v31, 1.0, s[0:1] ; D2000813 1801E51F v_add_f32_e32 v19, v36, v19 ; 06262724 v_cmp_ge_f32_e64 s[0:1], -v19, 0 ; D00C0000 20010113 v_cndmask_b32_e64 v19, 0, -1, s[0:1] ; D2000813 00018280 v_cmp_ne_i32_e64 s[82:83], v19, 0 ; D10A0052 00010113 v_cndmask_b32_e64 v19, v31, v34, s[82:83] ; D2000813 194A451F v_add_f32_e32 v19, v22, v19 ; 06262716 v_mul_f32_e32 v22, v27, v30 ; 102C3D1B v_mul_f32_e32 v22, v22, v29 ; 102C3B16 v_mul_f32_e32 v23, v23, v30 ; 102E3D17 v_mad_f32 v22, v38, v23, v22 ; D2820016 045A2F26 v_mad_f32 v22, v30, v32, v22 ; D2820016 045A411E v_sub_f32_e32 v22, 1.0, v22 ; 082C2CF2 v_mul_f32_e32 v23, v22, v22 ; 102E2D16 v_mul_f32_e32 v23, v23, v23 ; 102E2F17 v_mul_f32_e32 v27, v23, v22 ; 10362D17 v_readlane_b32 s0, v58, 51 ; 0201673A v_readlane_b32 s1, v58, 52 ; 0203693A v_readlane_b32 s2, v58, 53 ; 02056B3A v_readlane_b32 s3, v58, 54 ; 02076D3A s_nop 2 ; BF800002 v_readlane_b32 s20, v58, 55 ; 02296F3A v_readlane_b32 s21, v58, 56 ; 022B713A v_readlane_b32 s22, v58, 57 ; 022D733A v_readlane_b32 s23, v58, 58 ; 022F753A v_readlane_b32 s24, v58, 59 ; 0231773A v_readlane_b32 s25, v58, 60 ; 0233793A v_readlane_b32 s26, v58, 61 ; 02357B3A v_readlane_b32 s27, v58, 62 ; 02377D3A s_nop 2 ; BF800002 image_sample v[39:41], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[0:3] ; F0800700 00052702 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v30, v41 ; 7E3C4F29 v_mov_b32_e32 v32, 0x400ccccd ; 7E4002FF 400CCCCD v_mul_legacy_f32_e32 v30, v32, v30 ; 0E3C3D20 v_exp_f32_e32 v30, v30 ; 7E3C4B1E v_mad_f32 v19, v19, v27, v30 ; D2820013 047A3713 v_mov_b32_e32 v34, 0x41800000 ; 7E4402FF 41800000 v_mad_f32 v36, v24, v34, v33 ; D2820024 04864518 v_mad_f32 v24, v24, v34, v28 ; D2820018 04724518 v_mad_f32 v17, v24, v36, -v17 ; D2820011 84464918 v_max_f32_e32 v34, 0x33d6bf95, v17 ; 204422FF 33D6BF95 v_rsq_clamp_f32_e32 v34, v34 ; 7E445922 v_rcp_f32_e32 v37, v17 ; 7E4A5511 v_mad_f32 v16, v24, v15, -v16 ; D2820010 84421F18 v_mul_f32_e32 v15, v16, v15 ; 101E1F10 v_mad_f32 v15, v18, v36, v15 ; D282000F 043E4912 v_mul_f32_e32 v15, 0.5, v15 ; 101E1EF0 v_mul_f32_e32 v16, v37, v15 ; 10201F25 v_mul_f32_e32 v16, v26, v16 ; 1020211A v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v16, v16, v34 ; 10204510 v_mul_f32_e32 v16, 0x3ba3d70a, v16 ; 102020FF 3BA3D70A v_mul_f32_e32 v18, s80, v30 ; 10243C50 v_mul_f32_e32 v18, v18, v16 ; 10242112 v_cmp_ge_f32_e64 s[0:1], -v17, 0 ; D00C0000 20010111 v_cndmask_b32_e64 v17, 0, -1, s[0:1] ; D2000811 00018280 v_cmp_ne_i32_e64 s[0:1], v17, 0 ; D10A0000 00010111 v_cndmask_b32_e64 v17, v31, 1.0, s[0:1] ; D2000811 1801E51F v_mad_f32 v15, v15, v37, v35 ; D282000F 048E4B0F v_cmp_ge_f32_e64 s[0:1], v15, 0 ; D00C0000 0001010F v_cndmask_b32_e64 v15, 0, -1, s[0:1] ; D200080F 00018280 v_cmp_ne_i32_e64 s[0:1], v15, 0 ; D10A0000 0001010F v_cndmask_b32_e64 v15, v31, 1.0, s[0:1] ; D200080F 1801E51F v_add_f32_e32 v15, v17, v15 ; 061E1F11 v_cmp_ge_f32_e64 s[0:1], -v15, 0 ; D00C0000 2001010F v_cndmask_b32_e64 v15, 0, -1, s[0:1] ; D200080F 00018280 v_cmp_ne_i32_e64 s[0:1], v15, 0 ; D10A0000 0001010F v_cndmask_b32_e64 v15, v31, v18, s[0:1] ; D200080F 1802251F v_mul_f32_e32 v17, s67, v29 ; 10223A43 v_mad_f32 v17, s66, v38, v17 ; D2820011 04464C42 v_add_f32_e32 v17, s65, v17 ; 06222241 v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 v_mad_f32 v15, v19, v17, v15 ; D282000F 043E2313 v_mul_f32_e32 v18, s33, v12 ; 10241821 v_mad_f32 v18, v14, s64, v18 ; D2820012 0448810E v_readlane_b32 s2, v58, 50 ; 0205653A s_nop 2 ; BF800002 v_mad_f32 v18, v10, s2, v18 ; D2820012 0448050A v_readlane_b32 s2, v58, 49 ; 0205633A s_nop 2 ; BF800002 v_add_f32_e32 v18, s2, v18 ; 06242402 v_mul_f32_e32 v19, s19, v12 ; 10261813 v_mad_f32 v19, v14, s32, v19 ; D2820013 044C410E v_readlane_b32 s2, v58, 48 ; 0205613A s_nop 2 ; BF800002 v_mad_f32 v19, v10, s2, v19 ; D2820013 044C050A v_readlane_b32 s2, v58, 45 ; 02055B3A s_nop 2 ; BF800002 v_add_f32_e32 v19, s2, v19 ; 06262602 v_rcp_f32_e32 v19, v19 ; 7E265513 v_mul_f32_e32 v18, v18, v19 ; 10242712 v_mad_f32 v18, -0.5, v18, -0.5 ; D2820012 03C624F1 v_mov_b32_e32 v24, 0x40c00000 ; 7E3002FF 40C00000 v_mad_f32 v18, v33, v24, v18 ; D2820012 044A3121 v_mad_f32 v30, -2.0, v20, v18 ; D282001E 044A28F5 v_readlane_b32 s2, v58, 46 ; 02055D3A s_nop 2 ; BF800002 v_mul_f32_e32 v26, s2, v12 ; 10341802 v_readlane_b32 s2, v58, 47 ; 02055F3A s_nop 2 ; BF800002 v_mad_f32 v26, v14, s2, v26 ; D282001A 0468050E v_readlane_b32 s2, v58, 44 ; 0205593A s_nop 2 ; BF800002 v_mad_f32 v26, v10, s2, v26 ; D282001A 0468050A v_readlane_b32 s2, v58, 43 ; 0205573A s_nop 2 ; BF800002 v_add_f32_e32 v26, s2, v26 ; 06343402 v_mul_f32_e32 v19, v26, v19 ; 1026271A v_mad_f32 v19, 0.5, v19, -0.5 ; D2820013 03C626F0 v_mad_f32 v19, v28, v24, v19 ; D2820013 044E311C v_mad_f32 v29, 2.0, v20, v19 ; D282001D 044E28F4 image_sample v[33:36], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[72:79], s[68:71] ; F0800F00 0232211D v_mov_b32_e32 v26, 0x3e800000 ; 7E3402FF 3E800000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v28, 0x3e800000, v35 ; 103846FF 3E800000 v_mad_f32 v37, 2.0, v20, v18 ; D2820025 044A28F4 v_mov_b32_e32 v42, v29 ; 7E54031D v_mov_b32_e32 v43, v30 ; 7E56031E v_mov_b32_e32 v43, v37 ; 7E560325 image_sample v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[42:43], s[72:79], s[68:71] ; F0800F00 02322A2A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v44, v26, v28 ; D282001C 0472352C v_mad_f32 v38, v20, v24, v18 ; D2820026 044A3114 v_mad_f32 v37, -2.0, v20, v19 ; D2820025 044E28F5 image_sample v[46:49], 15, 0, 0, 0, 0, 0, 0, 0, v[37:38], s[72:79], s[68:71] ; F0800F00 02322E25 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, v48, v26, v28 ; D2820012 04723530 v_mov_b32_e32 v38, v30 ; 7E4C031E image_sample v[50:53], 15, 0, 0, 0, 0, 0, 0, 0, v[37:38], s[72:79], s[68:71] ; F0800F00 02323225 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, v52, v26, v18 ; D2820012 044A3534 v_log_f32_e64 v18, |v18| ; D34E0112 00000112 v_mul_f32_e32 v18, 0x400ccccd, v18 ; 102424FF 400CCCCD v_exp_f32_e32 v18, v18 ; 7E244B12 v_mad_f32 v15, v18, s49, v15 ; D282000F 043C6312 v_mad_f32 v18, v22, v23, 0.5 ; D2820012 03C22F16 v_mul_f32_e32 v15, v15, v18 ; 101E250F v_log_f32_e64 v15, |v15| ; D34E010F 0000010F v_mul_f32_e32 v15, 0x3ee8ba1f, v15 ; 101E1EFF 3EE8BA1F v_exp_f32_e32 v56, v15 ; 7E704B0F v_readlane_b32 s2, v58, 42 ; 0205553A s_nop 2 ; BF800002 v_mul_f32_e32 v15, s2, v21 ; 101E2A02 v_mul_f32_e32 v15, 0x3f4ccccd, v15 ; 101E1EFF 3F4CCCCD v_cndmask_b32_e64 v15, v31, v15, s[4:5] ; D200080F 18121F1F v_mul_f32_e32 v19, s51, v25 ; 10263233 v_cndmask_b32_e64 v19, v31, v19, s[82:83] ; D2000813 194A271F v_add_f32_e32 v15, v15, v19 ; 061E270F v_log_f32_e32 v19, v40 ; 7E264F28 v_mul_legacy_f32_e32 v19, v32, v19 ; 0E262720 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mad_f32 v15, v15, v27, v19 ; D282000F 044E370F v_mul_f32_e32 v19, s51, v19 ; 10262633 v_mul_f32_e32 v19, v19, v16 ; 10262113 v_cndmask_b32_e64 v19, v31, v19, s[0:1] ; D2000813 1802271F v_mad_f32 v15, v15, v17, v19 ; D282000F 044E230F v_mul_f32_e32 v19, 0x3e800000, v34 ; 102644FF 3E800000 v_mad_f32 v19, v43, v26, v19 ; D2820013 044E352B v_mad_f32 v19, v47, v26, v19 ; D2820013 044E352F v_mad_f32 v19, v51, v26, v19 ; D2820013 044E3533 v_log_f32_e64 v19, |v19| ; D34E0113 00000113 v_mul_f32_e32 v19, 0x400ccccd, v19 ; 102626FF 400CCCCD v_exp_f32_e32 v19, v19 ; 7E264B13 v_mad_f32 v15, v19, s49, v15 ; D282000F 043C6313 v_mul_f32_e32 v15, v15, v18 ; 101E250F v_log_f32_e64 v15, |v15| ; D34E010F 0000010F v_mul_f32_e32 v15, 0x3ee8ba1f, v15 ; 101E1EFF 3EE8BA1F v_exp_f32_e32 v55, v15 ; 7E6E4B0F v_readlane_b32 s2, v58, 37 ; 02054B3A s_nop 2 ; BF800002 v_mul_f32_e32 v15, s2, v21 ; 101E2A02 v_mul_f32_e32 v15, 0x3f4ccccd, v15 ; 101E1EFF 3F4CCCCD v_cndmask_b32_e64 v15, v31, v15, s[4:5] ; D200080F 18121F1F v_mul_f32_e32 v19, s50, v25 ; 10263232 v_cndmask_b32_e64 v19, v31, v19, s[82:83] ; D2000813 194A271F v_add_f32_e32 v15, v15, v19 ; 061E270F v_log_f32_e32 v19, v39 ; 7E264F27 v_mul_legacy_f32_e32 v19, v32, v19 ; 0E262720 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mad_f32 v15, v15, v27, v19 ; D282000F 044E370F v_mul_f32_e32 v19, s50, v19 ; 10262632 v_mul_f32_e32 v16, v19, v16 ; 10202113 v_cndmask_b32_e64 v16, v31, v16, s[0:1] ; D2000010 1802211F v_mad_f32 v15, v15, v17, v16 ; D282000F 0442230F v_mul_f32_e32 v16, 0x3e800000, v33 ; 102042FF 3E800000 v_mad_f32 v16, v42, v26, v16 ; D2820010 0442352A v_mad_f32 v16, v46, v26, v16 ; D2820010 0442352E v_mad_f32 v16, v50, v26, v16 ; D2820010 04423532 v_log_f32_e64 v16, |v16| ; D34E0110 00000110 v_mul_f32_e32 v16, 0x400ccccd, v16 ; 102020FF 400CCCCD v_exp_f32_e32 v16, v16 ; 7E204B10 v_mad_f32 v15, v16, s49, v15 ; D282000F 043C6310 v_mul_f32_e32 v15, v15, v18 ; 101E250F v_log_f32_e64 v15, |v15| ; D34E010F 0000010F v_mul_f32_e32 v15, 0x3ee8ba1f, v15 ; 101E1EFF 3EE8BA1F v_exp_f32_e32 v54, v15 ; 7E6C4B0F s_load_dwordx8 s[0:7], vcc, 0x58 ; C0C06B58 v_readlane_b32 s12, v58, 38 ; 02194D3A v_readlane_b32 s13, v58, 39 ; 021B4F3A v_readlane_b32 s14, v58, 40 ; 021D513A v_readlane_b32 s15, v58, 41 ; 021F533A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[54:57], s[0:7], s[12:15] ; F0800700 00600F36 v_sub_f32_e32 v19, 1.0, v11 ; 082616F2 v_add_f32_e32 v18, 0, v13 ; 06241A80 s_load_dwordx8 s[0:7], vcc, 0x40 ; C0C06B40 v_readlane_b32 s12, v58, 33 ; 0219433A v_readlane_b32 s13, v58, 34 ; 021B453A v_readlane_b32 s14, v58, 35 ; 021D473A v_readlane_b32 s15, v58, 36 ; 021F493A s_nop 2 ; BF800002 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample v[18:21], 15, 0, 0, 0, 0, 0, 0, 0, v[18:19], s[0:7], s[12:15] ; F0800F00 00601212 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v11, 1.0, v21 ; 08162AF2 v_mad_f32 v13, v17, v11, v20 ; D282000D 04521711 v_sub_f32_e32 v22, 1.0, v6 ; 082C0CF2 v_mul_f32_e32 v22, 0x3ecccccd, v22 ; 102C2CFF 3ECCCCCD v_mad_f32 v29, v6, v13, v22 ; D282001D 045A1B06 v_mad_f32 v13, v16, v11, v19 ; D282000D 044E1710 v_mad_f32 v28, v6, v13, v22 ; D282001C 045A1B06 v_mad_f32 v11, v15, v11, v18 ; D282000B 044A170F v_mad_f32 v27, v6, v11, v22 ; D282001B 045A1706 s_load_dwordx8 s[0:7], vcc, 0x50 ; C0C06B50 v_readlane_b32 s12, v58, 29 ; 02193B3A v_readlane_b32 s13, v58, 30 ; 021B3D3A v_readlane_b32 s14, v58, 31 ; 021D3F3A v_readlane_b32 s15, v58, 32 ; 021F413A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[0:7], s[12:15] ; F0800700 00600F1B v_mad_f32 v6, -v8, v6, v6 ; D2820006 241A0D08 v_sub_f32_e32 v7, 1.0, v6 ; 080E0CF2 v_mul_f32_e32 v8, v28, v7 ; 10100F1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v6, v16, v8 ; D2820008 04222106 v_readlane_b32 s0, v58, 2 ; 0201053A s_nop 2 ; BF800002 v_sub_f32_e32 v9, s0, v8 ; 08121000 v_subrev_f32_e32 v11, s10, v12 ; 0A16180A v_subrev_f32_e32 v12, s11, v14 ; 0A181C0B v_mul_f32_e32 v12, v12, v12 ; 1018190C v_mad_f32 v11, v11, v11, v12 ; D282000B 0432170B v_subrev_f32_e32 v10, s16, v10 ; 0A141410 v_mad_f32 v11, v10, v10, v11 ; D282000B 042E150A v_readlane_b32 s0, v58, 3 ; 0201073A s_nop 2 ; BF800002 v_mul_f32_e32 v11, s0, v11 ; 10161600 v_readlane_b32 s0, v58, 4 ; 0201093A s_nop 2 ; BF800002 v_mul_f32_e32 v12, s0, v10 ; 10181400 v_mul_f32_e32 v12, 0x3fb8aa65, v12 ; 101818FF 3FB8AA65 v_exp_f32_e32 v12, v12 ; 7E184B0C v_sub_f32_e32 v12, 1.0, v12 ; 081818F2 v_mul_f32_e32 v11, v11, v12 ; 1016190B v_rcp_f32_e32 v10, v10 ; 7E14550A v_mul_f32_e32 v10, v11, v10 ; 1014150B v_mul_f32_e32 v10, 0x3fb8aa65, v10 ; 101414FF 3FB8AA65 v_exp_f32_e32 v10, v10 ; 7E144B0A v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_sub_f32_e32 v10, 1.0, v10 ; 081414F2 v_rcp_f32_e32 v4, v4 ; 7E085504 v_mul_f32_e32 v4, v5, v4 ; 10080905 v_mad_f32 v0, 0.5, v4, 0.5 ; D2820000 03C208F0 v_readlane_b32 s0, v58, 17 ; 0201233A v_readlane_b32 s1, v58, 18 ; 0203253A v_readlane_b32 s2, v58, 19 ; 0205273A v_readlane_b32 s3, v58, 20 ; 0207293A s_nop 2 ; BF800002 v_readlane_b32 s4, v58, 21 ; 02092B3A v_readlane_b32 s5, v58, 22 ; 020B2D3A v_readlane_b32 s6, v58, 23 ; 020D2F3A v_readlane_b32 s7, v58, 24 ; 020F313A v_readlane_b32 s8, v58, 25 ; 0211333A v_readlane_b32 s9, v58, 26 ; 0213353A v_readlane_b32 s10, v58, 27 ; 0215373A v_readlane_b32 s11, v58, 28 ; 0217393A s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[0:3] ; F0800100 00010000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v1, v0, v9, v8 ; D2820001 04221300 v_mul_f32_e32 v4, v27, v7 ; 10080F1B v_mad_f32 v4, v6, v15, v4 ; D2820004 04121F06 v_readlane_b32 s0, v58, 1 ; 0201033A s_nop 2 ; BF800002 v_sub_f32_e32 v5, s0, v4 ; 080A0800 v_mad_f32 v4, v0, v5, v4 ; D2820004 04120B00 v_cvt_pkrtz_f16_f32_e32 v1, v4, v1 ; 5E020304 v_mul_f32_e32 v4, v29, v7 ; 10080F1D v_mad_f32 v4, v6, v17, v4 ; D2820004 04122306 v_readlane_b32 s0, v58, 0 ; 0201013A s_nop 2 ; BF800002 v_sub_f32_e32 v5, s0, v4 ; 080A0800 v_mad_f32 v0, v0, v5, v4 ; D2820000 04120B00 v_mul_f32_e32 v4, 0x3e800000, v36 ; 100848FF 3E800000 v_mad_f32 v4, v45, v26, v4 ; D2820004 0412352D v_mad_f32 v4, v49, v26, v4 ; D2820004 04123531 v_mad_f32 v4, v53, v26, v4 ; D2820004 04123535 v_add_f32_e32 v4, 1.0, v4 ; 060808F2 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_readlane_b32 s0, v58, 5 ; 02010B3A v_readlane_b32 s1, v58, 6 ; 02030D3A v_readlane_b32 s2, v58, 7 ; 02050F3A v_readlane_b32 s3, v58, 8 ; 0207113A s_nop 2 ; BF800002 v_readlane_b32 s4, v58, 9 ; 0209133A v_readlane_b32 s5, v58, 10 ; 020B153A v_readlane_b32 s6, v58, 11 ; 020D173A v_readlane_b32 s7, v58, 12 ; 020F193A v_readlane_b32 s8, v58, 13 ; 02111B3A v_readlane_b32 s9, v58, 14 ; 02131D3A v_readlane_b32 s10, v58, 15 ; 02151F3A v_readlane_b32 s11, v58, 16 ; 0217213A s_nop 2 ; BF800002 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800800 00010202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL CONST[0..13] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 0.5000, -0.5000} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xyz, IN[1], IMM[0].xxxx, IMM[0].yyyy 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].x, IN[0].xxxx, IMM[0].zzzz, IMM[0].zzzz 3: MOV TEMP[2].xy, CONST[4].xyxx 4: ADD TEMP[3].yz, -TEMP[2].xxyw, CONST[5].xxyw 5: MAD TEMP[4].x, TEMP[1].xxxx, TEMP[3].yyyy, CONST[4].xxxx 6: MOV TEMP[2].x, TEMP[4].xxxx 7: MAD TEMP[5].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].zzzz 8: MAD TEMP[3].y, TEMP[5].xxxx, TEMP[3].zzzz, CONST[4].yyyy 9: MOV TEMP[2].y, TEMP[3].yyyy 10: MAD TEMP[5].z, IN[1].wwww, CONST[6].xxxx, CONST[7].xxxx 11: MOV TEMP[2].z, TEMP[5].zzzz 12: ADD TEMP[6].xyz, -TEMP[2], CONST[12] 13: MOV TEMP[6].xyz, TEMP[6].xyzx 14: MUL TEMP[1], TEMP[3].yyyy, CONST[1] 15: MAD TEMP[1], TEMP[4].xxxx, CONST[0], TEMP[1] 16: MAD TEMP[1], TEMP[5].zzzz, CONST[2], TEMP[1] 17: ADD TEMP[1], TEMP[1], CONST[3] 18: RCP TEMP[7].x, TEMP[1].wwww 19: MOV TEMP[2].w, TEMP[7].xxxx 20: MUL TEMP[7].xy, TEMP[1], TEMP[7].xxxx 21: MOV TEMP[7].xy, TEMP[7].xyxx 22: MOV TEMP[8], TEMP[1] 23: ADD TEMP[9].xy, TEMP[2], CONST[13].zwzw 24: MOV TEMP[1].xy, TEMP[9].xyxx 25: MUL TEMP[9].xy, TEMP[1], CONST[13] 26: MOV TEMP[9].xy, TEMP[9].xyxx 27: MUL TEMP[3].xy, TEMP[3].yyyy, CONST[9] 28: MOV TEMP[1].xy, TEMP[3].xyxx 29: MOV TEMP[2].xyz, TEMP[2].xyzx 30: MAD TEMP[3].xy, TEMP[4].xxxx, CONST[8], TEMP[1] 31: MOV TEMP[1].xy, TEMP[3].xyxx 32: MAD TEMP[3].xy, TEMP[5].zzzz, CONST[10], TEMP[1] 33: MOV TEMP[1].xy, TEMP[3].xyxx 34: ADD TEMP[3].xy, TEMP[1], CONST[11] 35: MOV TEMP[1].xy, TEMP[3].xyxx 36: MAD TEMP[1].xy, TEMP[1], IMM[0].zwyw, IMM[0].wwww 37: MOV TEMP[1].xy, TEMP[1].xyxx 38: MOV TEMP[0].w, IMM[1].xxxx 39: MOV TEMP[2].w, IMM[1].xxxx 40: MOV TEMP[6].w, IMM[1].xxxx 41: MOV TEMP[7].zw, IMM[1].xxyx 42: MOV TEMP[1].zw, IMM[1].xxyx 43: MOV TEMP[9].zw, IMM[1].xxyx 44: MOV OUT[6], TEMP[9] 45: MOV OUT[1], TEMP[0] 46: MOV OUT[2], TEMP[2] 47: MOV OUT[0], TEMP[8] 48: MOV OUT[3], TEMP[6] 49: MOV OUT[4], TEMP[7] 50: MOV OUT[5], TEMP[1] 51: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0 %58 = add i32 %5, %7 %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %58) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = fmul float %60, 2.000000e+00 %65 = fadd float %64, -1.000000e+00 %66 = fmul float %61, 2.000000e+00 %67 = fadd float %66, -1.000000e+00 %68 = fmul float %62, 2.000000e+00 %69 = fadd float %68, -1.000000e+00 %70 = fmul float %54, 5.000000e-01 %71 = fadd float %70, 5.000000e-01 %72 = fsub float -0.000000e+00, %29 %73 = fadd float %72, %31 %74 = fsub float -0.000000e+00, %30 %75 = fadd float %74, %32 %76 = fmul float %71, %73 %77 = fadd float %76, %29 %78 = fmul float %55, 5.000000e-01 %79 = fadd float %78, 5.000000e-01 %80 = fmul float %79, %75 %81 = fadd float %80, %30 %82 = fmul float %63, %33 %83 = fadd float %82, %34 %84 = fsub float -0.000000e+00, %77 %85 = fadd float %84, %43 %86 = fsub float -0.000000e+00, %81 %87 = fadd float %86, %44 %88 = fsub float -0.000000e+00, %83 %89 = fadd float %88, %45 %90 = fmul float %81, %17 %91 = fmul float %81, %18 %92 = fmul float %81, %19 %93 = fmul float %81, %20 %94 = fmul float %77, %13 %95 = fadd float %94, %90 %96 = fmul float %77, %14 %97 = fadd float %96, %91 %98 = fmul float %77, %15 %99 = fadd float %98, %92 %100 = fmul float %77, %16 %101 = fadd float %100, %93 %102 = fmul float %83, %21 %103 = fadd float %102, %95 %104 = fmul float %83, %22 %105 = fadd float %104, %97 %106 = fmul float %83, %23 %107 = fadd float %106, %99 %108 = fmul float %83, %24 %109 = fadd float %108, %101 %110 = fadd float %103, %25 %111 = fadd float %105, %26 %112 = fadd float %107, %27 %113 = fadd float %109, %28 %114 = fdiv float 1.000000e+00, %113 %115 = fmul float %110, %114 %116 = fmul float %111, %114 %117 = fadd float %77, %48 %118 = fadd float %81, %49 %119 = fmul float %117, %46 %120 = fmul float %118, %47 %121 = fmul float %81, %37 %122 = fmul float %81, %38 %123 = fmul float %77, %35 %124 = fadd float %123, %121 %125 = fmul float %77, %36 %126 = fadd float %125, %122 %127 = fmul float %83, %39 %128 = fadd float %127, %124 %129 = fmul float %83, %40 %130 = fadd float %129, %126 %131 = fadd float %128, %41 %132 = fadd float %130, %42 %133 = fmul float %131, 5.000000e-01 %134 = fadd float %133, -5.000000e-01 %135 = fmul float %132, -5.000000e-01 %136 = fadd float %135, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %65, float %67, float %69, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %77, float %81, float %83, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %85, float %87, float %89, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %115, float %116, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %134, float %136, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %119, float %120, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %110, float %111, float %112, float %113) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, 2.0, v3, -1.0 ; D2820005 03CE06F4 v_mad_f32 v6, 2.0, v2, -1.0 ; D2820006 03CE04F4 v_mad_f32 v7, 2.0, v1, -1.0 ; D2820007 03CE02F4 v_mov_b32_e32 v8, 1.0 ; 7E1002F2 exp 15, 32, 0, 0, 0, v7, v6, v5, v8 ; F800020F 08050607 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s8 ; 7E0A0208 v_sub_f32_e32 v5, s9, v5 ; 080A0A09 buffer_load_format_xyzw v[9:12], v0, s[4:7], 0 idxen ; E00C2000 80010900 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, 0.5, v10, 0.5 ; D2820000 03C214F0 v_mad_f32 v0, v0, v5, s8 ; D2820000 00220B00 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_sub_f32_e32 v5, s5, v5 ; 080A0A05 v_mad_f32 v6, 0.5, v9, 0.5 ; D2820006 03C212F0 v_mad_f32 v5, v6, v5, s4 ; D2820005 00120B06 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v1, s5, v4, v6 ; D2820001 041A0805 exp 15, 33, 0, 0, 0, v5, v0, v1, v8 ; F800021F 08010005 s_buffer_load_dword s4, s[0:3], 0x31 ; C2020131 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_sub_f32_e32 v2, s4, v0 ; 08040004 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v5 ; 08060A04 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v4, s4, v1 ; 08080204 exp 15, 34, 0, 0, 0, v3, v2, v4, v8 ; F800022F 08040203 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v0 ; 10040004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s4, v2 ; D2820002 04080905 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s4, v2 ; D2820002 04080901 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v5, s4, v3 ; D2820003 040C0905 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v1, s4, v3 ; D2820003 040C0901 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_rcp_f32_e32 v4, v3 ; 7E085503 v_mul_f32_e32 v6, v4, v2 ; 100C0504 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v0 ; 100E0004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s4, v7 ; D2820007 041C0905 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s4, v7 ; D2820007 041C0901 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 35, 0, 0, 0, v4, v6, v9, v8 ; F800023F 08090604 s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v0 ; 10080004 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s4, v4 ; D2820004 04100905 s_buffer_load_dword s4, s[0:3], 0x28 ; C2020128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s4, v4 ; D2820004 04100901 s_buffer_load_dword s4, s[0:3], 0x2c ; C202012C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 v_mad_f32 v4, 0.5, v4, -0.5 ; D2820004 03C608F0 s_buffer_load_dword s4, s[0:3], 0x25 ; C2020125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v0 ; 100C0004 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v6 ; D2820006 04180905 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 s_buffer_load_dword s4, s[0:3], 0x2d ; C202012D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v6 ; 060C0C04 v_mad_f32 v6, -0.5, v6, -0.5 ; D2820006 03C60CF1 exp 15, 36, 0, 0, 0, v4, v6, v9, v8 ; F800024F 08090604 s_buffer_load_dword s4, s[0:3], 0x37 ; C2020137 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v4, s4, v0 ; 06080004 s_buffer_load_dword s4, s[0:3], 0x35 ; C2020135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x36 ; C2020136 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v5 ; 060C0A04 s_buffer_load_dword s4, s[0:3], 0x34 ; C2020134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v6 ; 100C0C04 exp 15, 37, 0, 0, 0, v6, v4, v9, v8 ; F800025F 08090406 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s4, v0 ; D2820000 04000905 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s4, v0 ; D2820000 04000901 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v7, v2, v0, v3 ; F80008CF 03000207 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..16] DCL TEMP[0..28], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0000} IMM[1] FLT32 { 2.2000, 1.0000, -1.0000, 2.0000} IMM[2] FLT32 { 0.5000, -16.0000, -1.4427, 0.0000} IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 1.4427} IMM[4] FLT32 { 0.4545, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[4], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[7], 2D 3: ABS TEMP[1].x, TEMP[0] 4: MOV TEMP[2], -TEMP[1].xxxx 5: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 6: UIF TEMP[3].xxxx :0 7: MOV TEMP[3].x, IMM[0].yyyy 8: ELSE :0 9: MOV TEMP[3].x, IMM[0].wwww 10: ENDIF 11: MOV TEMP[3].x, TEMP[3].xxxx 12: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 13: UIF TEMP[4].xxxx :0 14: MOV TEMP[4].x, IMM[0].yyyy 15: ELSE :0 16: MOV TEMP[4].x, IMM[0].wwww 17: ENDIF 18: MOV TEMP[3].y, TEMP[4].xxxx 19: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 20: UIF TEMP[4].xxxx :0 21: MOV TEMP[4].x, IMM[0].yyyy 22: ELSE :0 23: MOV TEMP[4].x, IMM[0].wwww 24: ENDIF 25: MOV TEMP[3].z, TEMP[4].xxxx 26: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 27: UIF TEMP[2].xxxx :0 28: MOV TEMP[2].x, IMM[0].yyyy 29: ELSE :0 30: MOV TEMP[2].x, IMM[0].wwww 31: ENDIF 32: MOV TEMP[3].w, TEMP[2].xxxx 33: MOV TEMP[2].w, TEMP[3] 34: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 35: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 36: OR TEMP[4].x, TEMP[4].xxxx, TEMP[3].yyyy 37: UIF TEMP[4].xxxx :0 38: KILL 39: ENDIF 40: MOV TEMP[3].w, IMM[0].zzzz 41: MOV TEMP[3].x, IN[0].xxxx 42: MOV TEMP[3].y, IN[0].yyyy 43: MOV TEMP[3].z, IN[0].zzzz 44: DP4 TEMP[4].x, TEMP[3], TEMP[3] 45: RSQ TEMP[4].x, TEMP[4].xxxx 46: MUL TEMP[3].xyz, TEMP[3], TEMP[4].xxxx 47: MOV TEMP[2].xyz, TEMP[3].xyzx 48: MOV TEMP[4].w, IMM[0].zzzz 49: MOV TEMP[4].x, IN[2].xxxx 50: MOV TEMP[4].y, IN[2].yyyy 51: MOV TEMP[4].z, IN[2].zzzz 52: DP4 TEMP[5].x, TEMP[4], TEMP[4] 53: RSQ TEMP[5].x, TEMP[5].xxxx 54: MUL TEMP[4].xyz, TEMP[4], TEMP[5].xxxx 55: ABS TEMP[5], TEMP[2] 56: ABS TEMP[6], TEMP[2] 57: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 58: MUL TEMP[6].xyw, TEMP[5].xyzz, TEMP[5].xyzz 59: ADD TEMP[7].y, TEMP[6].yyyy, TEMP[6].xxxx 60: MAD TEMP[7].y, TEMP[5].zzzz, TEMP[5].zzzz, TEMP[7].yyyy 61: RCP TEMP[7].x, TEMP[7].yyyy 62: MUL TEMP[6].xyz, TEMP[7].xxxx, TEMP[6].xyww 63: MUL TEMP[7].yw, CONST[9].xxzy, IN[1].xyzz 64: MOV TEMP[7].xy, TEMP[7].ywww 65: TEX TEMP[7], TEMP[7], SAMP[3], 2D 66: POW TEMP[8].x, TEMP[7].xxxx, IMM[1].xxxx 67: POW TEMP[8].y, TEMP[7].yyyy, IMM[1].xxxx 68: POW TEMP[8].z, TEMP[7].zzzz, IMM[1].xxxx 69: POW TEMP[8].w, TEMP[7].wwww, IMM[1].yyyy 70: MOV TEMP[7].w, TEMP[8].wwww 71: MUL TEMP[9].yw, CONST[9].xxzy, IN[1].xxzz 72: MOV TEMP[9].xy, TEMP[9].ywww 73: TEX TEMP[9], TEMP[9], SAMP[3], 2D 74: POW TEMP[10].x, TEMP[9].xxxx, IMM[1].xxxx 75: POW TEMP[10].y, TEMP[9].yyyy, IMM[1].xxxx 76: POW TEMP[10].z, TEMP[9].zzzz, IMM[1].xxxx 77: POW TEMP[10].w, TEMP[9].wwww, IMM[1].yyyy 78: MOV TEMP[9].w, TEMP[10].wwww 79: MUL TEMP[10].xyz, TEMP[6].yyyy, TEMP[10] 80: MOV TEMP[9].xyz, TEMP[10].xyzx 81: MAD TEMP[8].xyz, TEMP[8], TEMP[6].xxxx, TEMP[9] 82: MOV TEMP[7].xyz, TEMP[8].xyzx 83: MUL TEMP[8].yw, CONST[4].xxxx, IN[1].xxzy 84: MOV TEMP[8].xy, TEMP[8].ywww 85: TEX TEMP[8], TEMP[8], SAMP[1], 2D 86: POW TEMP[10].x, TEMP[8].xxxx, IMM[1].xxxx 87: POW TEMP[10].y, TEMP[8].yyyy, IMM[1].xxxx 88: POW TEMP[10].z, TEMP[8].zzzz, IMM[1].xxxx 89: POW TEMP[10].w, TEMP[8].wwww, IMM[1].yyyy 90: MAD TEMP[8].xyz, TEMP[10], TEMP[6].zzzz, TEMP[7] 91: MOV TEMP[7].xyz, TEMP[8].xyzx 92: MUL TEMP[10].yw, CONST[10].xxzy, IN[1].xyzz 93: MOV TEMP[2].w, -TEMP[3].zzzz 94: DP3 TEMP[11].x, TEMP[2].wyxx, TEMP[4].xyzz 95: MOV TEMP[9].y, TEMP[11].xxxx 96: MUL TEMP[11].xyz, TEMP[2].xzyw, IMM[1].yyzw 97: DP3 TEMP[12].x, TEMP[11].zxyy, TEMP[4].xyzz 98: MOV TEMP[9].w, TEMP[12].xxxx 99: DP3 TEMP[12].x, TEMP[3].xyzz, TEMP[4].xyzz 100: MOV TEMP[9].z, TEMP[12].xxxx 101: MUL TEMP[12].xyz, TEMP[2].zyxw, IMM[1].zyyw 102: DP3 TEMP[13].x, TEMP[12].xyzz, CONST[1].xyzz 103: MOV TEMP[13].z, TEMP[13].xxxx 104: DP3 TEMP[14].x, TEMP[11].zxyy, CONST[1].xyzz 105: MOV TEMP[13].w, TEMP[14].xxxx 106: DP3 TEMP[13].x, TEMP[3].xyzz, CONST[1].xyzz 107: DP3 TEMP[14].x, TEMP[12].xyzz, CONST[2].xyzz 108: MOV TEMP[12].y, TEMP[14].xxxx 109: DP3 TEMP[14].x, TEMP[11].zxyy, CONST[2].xyzz 110: MOV TEMP[12].w, TEMP[14].xxxx 111: DP3 TEMP[14].x, TEMP[3].xyzz, CONST[2].xyzz 112: MOV TEMP[12].z, TEMP[14].xxxx 113: MOV TEMP[14].xy, TEMP[10].ywww 114: TEX TEMP[14].zw, TEMP[14], SAMP[5], 2D 115: MOV TEMP[15].zw, TEMP[14].wwzw 116: MOV TEMP[16].xy, TEMP[10].ywww 117: TEX TEMP[16], TEMP[16], SAMP[4], 2D 118: MOV TEMP[17].w, TEMP[16].wwww 119: MUL TEMP[18].y, CONST[3].xxxx, CONST[3].xxxx 120: MAD TEMP[19].xy, TEMP[16], IMM[1].wwww, IMM[0].yyyy 121: MOV TEMP[15].xy, TEMP[19].xyxx 122: MUL TEMP[19].xy, TEMP[15], CONST[3].xxxx 123: MOV TEMP[15].xy, TEMP[19].xyxx 124: MOV TEMP[15].z, IMM[0].xxxx 125: MOV TEMP[20].w, IMM[0].zzzz 126: MOV TEMP[20].x, TEMP[19].xxxx 127: MOV TEMP[20].y, TEMP[19].yyyy 128: MOV TEMP[20].z, IMM[0].xxxx 129: DP4 TEMP[19].x, TEMP[20], TEMP[20] 130: RSQ TEMP[19].x, TEMP[19].xxxx 131: MUL TEMP[19].xyz, TEMP[20], TEMP[19].xxxx 132: MAD TEMP[14].z, TEMP[14].wwww, IMM[1].wwww, IMM[0].yyyy 133: MOV TEMP[14].z, TEMP[14].zzzz 134: MOV TEMP[14].xy, TEMP[16].zwzz 135: MUL TEMP[16].xyz, TEMP[15].xyyw, TEMP[15].xyxw 136: MOV TEMP[17].xyz, TEMP[16].xyzx 137: MAD TEMP[16].xyz, TEMP[14], TEMP[18].yyyy, -TEMP[17] 138: DP3 TEMP[20].x, TEMP[13].zwxx, TEMP[19].xyzz 139: MOV_SAT TEMP[20].x, TEMP[20].xxxx 140: DP3 TEMP[21].x, TEMP[12].ywzz, TEMP[19].xyzz 141: MOV_SAT TEMP[21].x, TEMP[21].xxxx 142: MOV TEMP[2].w, TEMP[21].xxxx 143: ADD TEMP[22].xyz, TEMP[9].ywzw, TEMP[12].ywzw 144: MOV TEMP[14].xyz, TEMP[22].xyzx 145: RCP TEMP[22].x, TEMP[22].zzzz 146: MAD TEMP[22].xy, TEMP[14], TEMP[22].xxxx, -TEMP[15] 147: RCP TEMP[23].x, CONST[6].xxxx 148: ADD TEMP[24].zw, TEMP[23].xxxx, TEMP[16].xyxy 149: MUL TEMP[25].w, TEMP[16].zzzz, TEMP[16].zzzz 150: MAD TEMP[25].w, TEMP[24].zzzz, TEMP[24].wwww, -TEMP[25].wwww 151: MUL TEMP[26].w, TEMP[22].xxxx, TEMP[22].xxxx 152: MUL TEMP[27].w, TEMP[22].yyyy, TEMP[24].wwww 153: DP2 TEMP[16].x, TEMP[22].xxxx, -TEMP[16].zzzz 154: ADD TEMP[16].x, TEMP[16].xxxx, TEMP[27].wwww 155: MUL TEMP[16].w, TEMP[22].yyyy, TEMP[16].xxxx 156: MAD TEMP[16].w, TEMP[26].wwww, TEMP[24].wwww, TEMP[16].wwww 157: MUL TEMP[16].w, TEMP[16].wwww, IMM[2].xxxx 158: RCP TEMP[22].x, TEMP[25].wwww 159: MUL TEMP[24].z, TEMP[16].wwww, TEMP[22].xxxx 160: MOV TEMP[26].x, -TEMP[25].wwww 161: FSGE TEMP[26].x, TEMP[26].xxxx, IMM[0].zzzz 162: UIF TEMP[26].xxxx :0 163: MOV TEMP[26].x, IMM[0].xxxx 164: ELSE :0 165: MOV TEMP[26].x, IMM[0].zzzz 166: ENDIF 167: MAD TEMP[16].w, TEMP[16].wwww, TEMP[22].xxxx, IMM[2].yyyy 168: FSGE TEMP[16].x, TEMP[16].wwww, IMM[0].zzzz 169: UIF TEMP[16].xxxx :0 170: MOV TEMP[16].x, IMM[0].xxxx 171: ELSE :0 172: MOV TEMP[16].x, IMM[0].zzzz 173: ENDIF 174: ADD TEMP[16].w, TEMP[16].xxxx, TEMP[26].xxxx 175: MUL TEMP[22].w, TEMP[24].zzzz, IMM[2].zzzz 176: EX2 TEMP[22].x, TEMP[22].wwww 177: MAX TEMP[24].x, TEMP[25].wwww, IMM[2].wwww 178: RSQ TEMP[24].x, TEMP[24].xxxx 179: MUL TEMP[22].w, TEMP[24].xxxx, TEMP[22].xxxx 180: MOV TEMP[16].x, -TEMP[16].wwww 181: FSGE TEMP[16].x, TEMP[16].xxxx, IMM[0].zzzz 182: UIF TEMP[16].xxxx :0 183: MOV TEMP[16].x, TEMP[22].wwww 184: ELSE :0 185: MOV TEMP[16].x, IMM[0].zzzz 186: ENDIF 187: DP3 TEMP[22].x, TEMP[19].xzyy, TEMP[9].yzww 188: ADD TEMP[22].w, -TEMP[22].xxxx, IMM[0].xxxx 189: MUL TEMP[24].w, TEMP[22].wwww, TEMP[22].wwww 190: MUL TEMP[24].w, TEMP[24].wwww, TEMP[24].wwww 191: MUL TEMP[22].w, TEMP[22].wwww, TEMP[24].wwww 192: MOV TEMP[7].w, TEMP[22].wwww 193: MUL TEMP[24].xyz, TEMP[2].zyxw, IMM[1].zzyw 194: DP3 TEMP[15].x, TEMP[19].xyzz, TEMP[24].xyzz 195: DP3 TEMP[24].x, TEMP[19].yxzz, TEMP[3].xyyy 196: MOV TEMP[15].y, TEMP[24].xxxx 197: DP3 TEMP[19].x, TEMP[19].xyzz, TEMP[3].xzzz 198: MOV TEMP[15].z, TEMP[19].xxxx 199: MOV TEMP[19].xyz, TEMP[15].xyzz 200: TEX TEMP[19], TEMP[19], SAMP[0], CUBE 201: POW TEMP[24].x, TEMP[19].xxxx, IMM[1].xxxx 202: POW TEMP[24].y, TEMP[19].yyyy, IMM[1].xxxx 203: POW TEMP[24].z, TEMP[19].zzzz, IMM[1].xxxx 204: POW TEMP[24].w, TEMP[19].wwww, IMM[1].yyyy 205: MOV TEMP[15].w, TEMP[24].wwww 206: ADD TEMP[19].xyz, TEMP[20].xxxx, TEMP[24] 207: MOV TEMP[15].xyz, TEMP[19].xyzx 208: MUL TEMP[19].xyz, TEMP[7], TEMP[15] 209: MOV TEMP[15].xyz, TEMP[19].xyzx 210: MUL TEMP[16], TEMP[16].xxxx, TEMP[22].wwww 211: MOV_SAT TEMP[16], TEMP[16] 212: MAD TEMP[15].xyz, TEMP[16].wwww, TEMP[21].xxxx, TEMP[15] 213: MUL TEMP[16].xy, CONST[10], IN[1].xzzw 214: MUL TEMP[19].xyz, TEMP[2].zyxw, IMM[1].yyzw 215: DP3 TEMP[9].x, TEMP[19].zxyy, TEMP[4].xyzz 216: DP3 TEMP[20].x, TEMP[19].zxyy, CONST[1].xyzz 217: MOV TEMP[13].y, TEMP[20].xxxx 218: DP3 TEMP[12].x, TEMP[19].zxyy, CONST[2].xyzz 219: MOV TEMP[20].xy, TEMP[16].xyyy 220: TEX TEMP[20].w, TEMP[20], SAMP[5], 2D 221: MOV TEMP[14].w, TEMP[20].wwww 222: MOV TEMP[16].xy, TEMP[16].xyyy 223: TEX TEMP[16], TEMP[16], SAMP[4], 2D 224: MOV TEMP[17].zw, TEMP[16].wwzw 225: MAD TEMP[21].xy, TEMP[16], IMM[1].wwww, IMM[0].yyyy 226: MOV TEMP[17].xy, TEMP[21].xyxx 227: MUL TEMP[21].xy, TEMP[17], CONST[3].xxxx 228: MOV TEMP[14].xy, TEMP[21].xyxx 229: MOV TEMP[22].w, IMM[0].zzzz 230: MOV TEMP[22].x, TEMP[21].xxxx 231: MOV TEMP[22].y, TEMP[21].yyyy 232: MOV TEMP[22].z, IMM[0].xxxx 233: DP4 TEMP[21].x, TEMP[22], TEMP[22] 234: RSQ TEMP[21].x, TEMP[21].xxxx 235: MUL TEMP[21].xyz, TEMP[22], TEMP[21].xxxx 236: MAD TEMP[20].z, TEMP[20].wwww, IMM[1].wwww, IMM[0].yyyy 237: MOV TEMP[20].z, TEMP[20].zzzz 238: MOV TEMP[20].xy, TEMP[16].zwzz 239: MUL TEMP[16].xyz, TEMP[14].xyyw, TEMP[14].xyxw 240: MOV TEMP[17].xyz, TEMP[16].xyzx 241: MAD TEMP[16].xyz, TEMP[20], TEMP[18].yyyy, -TEMP[17] 242: MOV TEMP[17].xyz, TEMP[16].xyzx 243: DP3 TEMP[20].x, TEMP[13].ywxx, TEMP[21].xyzz 244: MOV_SAT TEMP[20].x, TEMP[20].xxxx 245: DP3 TEMP[22].x, TEMP[12].xwzz, TEMP[21].xyzz 246: MOV_SAT TEMP[22].x, TEMP[22].xxxx 247: ADD TEMP[24].yzw, TEMP[9].xxwz, TEMP[12].xxwz 248: MOV TEMP[13].yzw, TEMP[24].zyzw 249: RCP TEMP[24].x, TEMP[24].wwww 250: MAD TEMP[24].yz, TEMP[13], TEMP[24].xxxx, -TEMP[14].xxyw 251: ADD TEMP[25].xy, TEMP[23].xxxx, TEMP[17] 252: MUL TEMP[26].w, TEMP[16].zzzz, TEMP[16].zzzz 253: MAD TEMP[26].w, TEMP[25].xxxx, TEMP[25].yyyy, -TEMP[26].wwww 254: MUL TEMP[27].w, TEMP[24].yyyy, TEMP[24].yyyy 255: MUL TEMP[28].w, TEMP[24].zzzz, TEMP[25].yyyy 256: DP2 TEMP[16].x, TEMP[24].yyyy, -TEMP[16].zzzz 257: ADD TEMP[16].x, TEMP[16].xxxx, TEMP[28].wwww 258: MUL TEMP[16].w, TEMP[24].zzzz, TEMP[16].xxxx 259: MAD TEMP[16].w, TEMP[27].wwww, TEMP[25].yyyy, TEMP[16].wwww 260: MUL TEMP[16].w, TEMP[16].wwww, IMM[2].xxxx 261: RCP TEMP[24].x, TEMP[26].wwww 262: MUL TEMP[25].w, TEMP[16].wwww, TEMP[24].xxxx 263: MOV TEMP[12].w, TEMP[25].wwww 264: MOV TEMP[27].x, -TEMP[26].wwww 265: FSGE TEMP[27].x, TEMP[27].xxxx, IMM[0].zzzz 266: UIF TEMP[27].xxxx :0 267: MOV TEMP[27].x, IMM[0].xxxx 268: ELSE :0 269: MOV TEMP[27].x, IMM[0].zzzz 270: ENDIF 271: MAD TEMP[16].w, TEMP[16].wwww, TEMP[24].xxxx, IMM[2].yyyy 272: FSGE TEMP[16].x, TEMP[16].wwww, IMM[0].zzzz 273: UIF TEMP[16].xxxx :0 274: MOV TEMP[16].x, IMM[0].xxxx 275: ELSE :0 276: MOV TEMP[16].x, IMM[0].zzzz 277: ENDIF 278: ADD TEMP[16].w, TEMP[16].xxxx, TEMP[27].xxxx 279: MUL TEMP[24].w, TEMP[25].wwww, IMM[2].zzzz 280: EX2 TEMP[24].x, TEMP[24].wwww 281: MAX TEMP[25].x, TEMP[26].wwww, IMM[2].wwww 282: RSQ TEMP[25].x, TEMP[25].xxxx 283: MUL TEMP[24].w, TEMP[25].xxxx, TEMP[24].xxxx 284: MOV TEMP[16].x, -TEMP[16].wwww 285: FSGE TEMP[16].x, TEMP[16].xxxx, IMM[0].zzzz 286: UIF TEMP[16].xxxx :0 287: MOV TEMP[16].x, TEMP[24].wwww 288: ELSE :0 289: MOV TEMP[16].x, IMM[0].zzzz 290: ENDIF 291: DP3 TEMP[24].x, TEMP[21].xzyy, TEMP[9].xzww 292: ADD TEMP[24].w, -TEMP[24].xxxx, IMM[0].xxxx 293: MUL TEMP[25].w, TEMP[24].wwww, TEMP[24].wwww 294: MUL TEMP[25].w, TEMP[25].wwww, TEMP[25].wwww 295: MOV TEMP[9].w, TEMP[25].wwww 296: MUL TEMP[24].w, TEMP[24].wwww, TEMP[25].wwww 297: MUL TEMP[25].yzw, TEMP[3].xxyx, IMM[0].yyyx 298: DP3 TEMP[17].x, TEMP[21].xyzz, TEMP[25].yzww 299: DP3 TEMP[25].x, TEMP[21].yzxx, TEMP[3].xyzz 300: MOV TEMP[17].y, TEMP[25].xxxx 301: DP3 TEMP[21].x, TEMP[21].xyzz, TEMP[3].yzzz 302: MOV TEMP[17].z, TEMP[21].xxxx 303: MOV TEMP[21].xyz, TEMP[17].xyzz 304: TEX TEMP[21], TEMP[21], SAMP[0], CUBE 305: POW TEMP[25].x, TEMP[21].xxxx, IMM[1].xxxx 306: POW TEMP[25].y, TEMP[21].yyyy, IMM[1].xxxx 307: POW TEMP[25].z, TEMP[21].zzzz, IMM[1].xxxx 308: POW TEMP[25].w, TEMP[21].wwww, IMM[1].yyyy 309: MOV TEMP[17].w, TEMP[25].wwzw 310: ADD TEMP[20].yzw, TEMP[20].xxxx, TEMP[25].xxyz 311: MOV TEMP[13].yzw, TEMP[20].zyzw 312: MUL TEMP[20].yzw, TEMP[8].xxyz, TEMP[13] 313: MOV TEMP[13].yzw, TEMP[20].zyzw 314: MUL TEMP[16], TEMP[16].xxxx, TEMP[24].wwww 315: MOV_SAT TEMP[16], TEMP[16] 316: MAD TEMP[16].yzw, TEMP[16].wwww, TEMP[22].xxxx, TEMP[13] 317: MUL TEMP[20].xy, CONST[5].xxxx, IN[1] 318: DP3 TEMP[9].x, TEMP[19].xyzz, TEMP[4].xyzz 319: DP3 TEMP[21].x, TEMP[11].xyzz, TEMP[4].xyzz 320: MOV TEMP[9].y, TEMP[21].xxxx 321: DP3 TEMP[14].x, TEMP[19].xyzz, CONST[1].xyzz 322: DP3 TEMP[21].x, TEMP[11].xyzz, CONST[1].xyzz 323: MOV TEMP[14].y, TEMP[21].xxxx 324: DP3 TEMP[12].x, TEMP[19].xyzz, CONST[2].xyzz 325: DP3 TEMP[19].x, TEMP[11].xyzz, CONST[2].xyzz 326: MOV TEMP[12].y, TEMP[19].xxxx 327: MOV TEMP[19].xy, TEMP[20].xyyy 328: TEX TEMP[19], TEMP[19], SAMP[2], 2D 329: MOV TEMP[10].zw, TEMP[19].wwzw 330: MAD TEMP[20].xy, TEMP[19], IMM[1].wwww, IMM[0].yyyy 331: MOV TEMP[10].xy, TEMP[20].xyxx 332: MUL TEMP[20].xy, TEMP[10], CONST[3].xxxx 333: MOV TEMP[17].xy, TEMP[20].xyxx 334: MOV TEMP[17].z, IMM[0].xxxx 335: MOV TEMP[21].w, IMM[0].zzzz 336: MOV TEMP[21].x, TEMP[20].xxxx 337: MOV TEMP[21].y, TEMP[20].yyyy 338: MOV TEMP[21].z, IMM[0].xxxx 339: DP4 TEMP[20].x, TEMP[21], TEMP[21] 340: RSQ TEMP[20].x, TEMP[20].xxxx 341: MUL TEMP[20].xyz, TEMP[21], TEMP[20].xxxx 342: MAD TEMP[19].xyz, TEMP[19].zwww, IMM[1].yywz, IMM[0].zzyy 343: MOV TEMP[10].xyz, TEMP[19].xyzx 344: MUL TEMP[19].xyz, TEMP[17].xyyw, TEMP[17].xyxw 345: MOV TEMP[11].xyz, TEMP[19].xyzx 346: MAD TEMP[11].xyz, TEMP[10], TEMP[18].yyyy, -TEMP[11] 347: MOV TEMP[10].xyz, TEMP[11].xyzx 348: MOV TEMP[14].z, TEMP[13].xxxx 349: DP3 TEMP[14].x, TEMP[14].xyzz, TEMP[20].xyzz 350: MOV_SAT TEMP[14].x, TEMP[14].xxxx 351: DP3 TEMP[18].x, TEMP[12].xyzz, TEMP[20].xyzz 352: MOV_SAT TEMP[18].x, TEMP[18].xxxx 353: ADD TEMP[19].xyz, TEMP[9], TEMP[12] 354: MOV TEMP[12].xyz, TEMP[19].xyzx 355: RCP TEMP[19].x, TEMP[19].zzzz 356: MAD TEMP[12].xy, TEMP[12], TEMP[19].xxxx, -TEMP[17] 357: ADD TEMP[10].xy, TEMP[23].xxxx, TEMP[10] 358: MUL TEMP[17].w, TEMP[11].zzzz, TEMP[11].zzzz 359: MAD TEMP[17].w, TEMP[10].xxxx, TEMP[10].yyyy, -TEMP[17].wwww 360: MUL TEMP[19].w, TEMP[12].xxxx, TEMP[12].xxxx 361: MUL TEMP[21].w, TEMP[12].yyyy, TEMP[10].yyyy 362: DP2 TEMP[11].x, TEMP[12].xxxx, -TEMP[11].zzzz 363: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[21].wwww 364: MUL TEMP[11].w, TEMP[12].yyyy, TEMP[11].xxxx 365: MAD TEMP[10].w, TEMP[19].wwww, TEMP[10].yyyy, TEMP[11].wwww 366: MUL TEMP[10].w, TEMP[10].wwww, IMM[2].xxxx 367: RCP TEMP[11].x, TEMP[17].wwww 368: MUL TEMP[12].w, TEMP[10].wwww, TEMP[11].xxxx 369: MOV TEMP[7].w, TEMP[12].wwww 370: MOV TEMP[19].x, -TEMP[17].wwww 371: FSGE TEMP[19].x, TEMP[19].xxxx, IMM[0].zzzz 372: UIF TEMP[19].xxxx :0 373: MOV TEMP[19].x, IMM[0].xxxx 374: ELSE :0 375: MOV TEMP[19].x, IMM[0].zzzz 376: ENDIF 377: MAD TEMP[10].w, TEMP[10].wwww, TEMP[11].xxxx, IMM[2].yyyy 378: FSGE TEMP[10].x, TEMP[10].wwww, IMM[0].zzzz 379: UIF TEMP[10].xxxx :0 380: MOV TEMP[10].x, IMM[0].xxxx 381: ELSE :0 382: MOV TEMP[10].x, IMM[0].zzzz 383: ENDIF 384: ADD TEMP[10].w, TEMP[10].xxxx, TEMP[19].xxxx 385: MUL TEMP[11].w, TEMP[12].wwww, IMM[2].zzzz 386: EX2 TEMP[11].x, TEMP[11].wwww 387: MAX TEMP[12].x, TEMP[17].wwww, IMM[2].wwww 388: RSQ TEMP[12].x, TEMP[12].xxxx 389: MUL TEMP[11].w, TEMP[12].xxxx, TEMP[11].xxxx 390: MOV TEMP[10].x, -TEMP[10].wwww 391: FSGE TEMP[10].x, TEMP[10].xxxx, IMM[0].zzzz 392: UIF TEMP[10].xxxx :0 393: MOV TEMP[10].x, TEMP[11].wwww 394: ELSE :0 395: MOV TEMP[10].x, IMM[0].zzzz 396: ENDIF 397: MOV TEMP[2].w, TEMP[10].xxxx 398: DP3 TEMP[11].x, TEMP[20].xyzz, TEMP[9].xyzz 399: ADD TEMP[11].w, -TEMP[11].xxxx, IMM[0].xxxx 400: MUL TEMP[12].w, TEMP[11].wwww, TEMP[11].wwww 401: MUL TEMP[12].w, TEMP[12].wwww, TEMP[12].wwww 402: MUL TEMP[11].w, TEMP[11].wwww, TEMP[12].wwww 403: DP3 TEMP[9].x, TEMP[20].yzxx, TEMP[3].xxzz 404: DP3 TEMP[12].x, TEMP[20].xzyy, TEMP[3].yyzz 405: MOV TEMP[9].y, TEMP[12].xxxx 406: MUL TEMP[12].xyz, TEMP[2], IMM[1].zzyw 407: DP3 TEMP[12].x, TEMP[20].xyzz, TEMP[12].xyzz 408: MOV TEMP[9].z, TEMP[12].xxxx 409: MOV TEMP[12].xyz, TEMP[9].xyzz 410: TEX TEMP[12], TEMP[12], SAMP[0], CUBE 411: POW TEMP[17].x, TEMP[12].xxxx, IMM[1].xxxx 412: POW TEMP[17].y, TEMP[12].yyyy, IMM[1].xxxx 413: POW TEMP[17].z, TEMP[12].zzzz, IMM[1].xxxx 414: POW TEMP[17].w, TEMP[12].wwww, IMM[1].yyyy 415: MOV TEMP[9].w, TEMP[17].wwww 416: ADD TEMP[12].xyz, TEMP[14].xxxx, TEMP[17] 417: MOV TEMP[9].xyz, TEMP[12].xyzx 418: MUL TEMP[12].xyz, TEMP[7], TEMP[9] 419: MOV TEMP[9].xyz, TEMP[12].xyzx 420: MUL TEMP[10], TEMP[10].xxxx, TEMP[11].wwww 421: MOV_SAT TEMP[10], TEMP[10] 422: MAD TEMP[10].xyz, TEMP[10].yyyy, TEMP[18].xxxx, TEMP[9] 423: MOV TEMP[9].xyz, TEMP[10].xyzx 424: MUL TEMP[10].xyz, TEMP[6].yyyy, TEMP[16].yzww 425: MAD TEMP[10].xyw, TEMP[15].xyzz, TEMP[6].xxxx, TEMP[10].xyzz 426: MOV TEMP[5].w, TEMP[10].xyxw 427: MAD TEMP[6].xyz, TEMP[9], TEMP[6].zzzz, TEMP[10].xyww 428: MOV TEMP[5].xyz, TEMP[6].xyzx 429: DP3 TEMP[6].x, TEMP[8].xyzz, IMM[3].xyzz 430: ADD TEMP[6].y, -TEMP[6].xxxx, IMM[0].xxxx 431: MUL TEMP[6].xyz, TEMP[6].yyyy, CONST[7] 432: MOV TEMP[7].xyz, TEMP[6].xyzx 433: MOV_SAT TEMP[6].x, TEMP[13].xxxx 434: MOV TEMP[8].xyz, TEMP[3].xyzz 435: TEX TEMP[8], TEMP[8], SAMP[0], CUBE 436: POW TEMP[9].x, TEMP[8].xxxx, IMM[1].xxxx 437: POW TEMP[9].y, TEMP[8].yyyy, IMM[1].xxxx 438: POW TEMP[9].z, TEMP[8].zzzz, IMM[1].xxxx 439: POW TEMP[9].w, TEMP[8].wwww, IMM[1].yyyy 440: MOV TEMP[2].w, TEMP[9].wwww 441: ADD TEMP[6].xyz, TEMP[9], TEMP[6].xxxx 442: MOV TEMP[2].xyz, TEMP[6].xyzx 443: MAD TEMP[6].xyz, TEMP[7], TEMP[2], TEMP[5] 444: MAD TEMP[8].x, IN[3].yyyy, IMM[2].xxxx, IMM[2].xxxx 445: MOV TEMP[5].x, TEMP[8].xxxx 446: MOV TEMP[5].y, CONST[16].wwww 447: MOV TEMP[8].xy, TEMP[5].xyyy 448: TEX TEMP[8].x, TEMP[8], SAMP[6], 2D 449: MOV TEMP[5].x, TEMP[8].xxxx 450: ADD TEMP[9].yzw, -CONST[0].xxyz, IN[1].xxyz 451: DP3 TEMP[10].x, TEMP[9].yzww, TEMP[9].yzww 452: MUL TEMP[10].y, TEMP[10].xxxx, CONST[16].yyyy 453: MUL TEMP[11].w, TEMP[9].wwww, CONST[16].xxxx 454: MUL TEMP[11].w, TEMP[11].wwww, IMM[3].wwww 455: EX2 TEMP[11].x, TEMP[11].wwww 456: ADD TEMP[11].w, -TEMP[11].xxxx, IMM[0].xxxx 457: MUL TEMP[10].y, TEMP[11].wwww, TEMP[10].yyyy 458: RCP TEMP[9].x, TEMP[9].wwww 459: MUL TEMP[9].y, TEMP[9].xxxx, TEMP[10].yyyy 460: MUL TEMP[9].y, TEMP[9].yyyy, IMM[3].wwww 461: EX2 TEMP[9].x, TEMP[9].yyyy 462: MOV_SAT TEMP[9].x, TEMP[9].xxxx 463: ADD TEMP[9].y, -TEMP[9].xxxx, IMM[0].xxxx 464: MUL TEMP[10].w, TEMP[9].yyyy, TEMP[8].xxxx 465: ADD TEMP[11].w, -CONST[8].xxxx, IN[1].zzzz 466: MOV TEMP[1].w, TEMP[11].wwww 467: FSGE TEMP[12].x, TEMP[11].wwww, IMM[0].zzzz 468: UIF TEMP[12].xxxx :0 469: MOV TEMP[12].x, IMM[0].xxxx 470: ELSE :0 471: MOV TEMP[12].x, IMM[0].zzzz 472: ENDIF 473: MOV TEMP[12].w, TEMP[12].xxxx 474: ADD TEMP[13].w, CONST[8].xxxx, -IN[1].zzzz 475: MOV TEMP[7].xyz, CONST[11].xyzx 476: ADD TEMP[14].xyz, -TEMP[7], CONST[12] 477: MUL TEMP[15], TEMP[13].wwww, CONST[13].xxxx 478: MOV_SAT TEMP[15], TEMP[15] 479: MAD TEMP[14].yzw, TEMP[15].yyyy, TEMP[14].xxyz, CONST[11].xxyz 480: MOV TEMP[5].yzw, TEMP[14].zyzw 481: MUL TEMP[14].yzw, TEMP[6].xxyz, TEMP[5] 482: RCP TEMP[7].x, -TEMP[4].zzzz 483: MUL TEMP[7].w, TEMP[13].wwww, TEMP[7].xxxx 484: MUL TEMP[7].xyw, TEMP[7].wwww, TEMP[4].xyzz 485: MOV TEMP[3].w, TEMP[7].xyxw 486: DP3 TEMP[3].x, TEMP[7].xyww, TEMP[7].xyww 487: MAX TEMP[7].x, TEMP[3].xxxx, IMM[2].wwww 488: RSQ TEMP[13].x, TEMP[7].xxxx 489: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[7].xxxx 490: CMP TEMP[3].x, -TEMP[7].xxxx, TEMP[13].xxxx, IMM[0].zzzz 491: MUL TEMP[7].x, -TEMP[3].xxxx, CONST[14].xxxx 492: MUL TEMP[7].x, TEMP[7].xxxx, IMM[3].wwww 493: EX2 TEMP[7].x, TEMP[7].xxxx 494: MUL TEMP[13].y, TEMP[4].zzzz, TEMP[4].zzzz 495: MUL TEMP[13].y, TEMP[13].yyyy, TEMP[13].yyyy 496: MAD TEMP[4].y, TEMP[4].zzzz, -TEMP[13].yyyy, IMM[0].xxxx 497: ADD TEMP[4].y, -TEMP[4].yyyy, IMM[0].xxxx 498: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[4].yyyy 499: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[14].yzww 500: MOV TEMP[3].xyz, TEMP[4].xyzx 501: MAD TEMP[4].y, TEMP[8].xxxx, -TEMP[9].yyyy, IMM[0].xxxx 502: MUL TEMP[4].xyz, TEMP[4].yyyy, TEMP[3] 503: MOV TEMP[3].w, IMM[0].zzzz 504: FSGE TEMP[7].x, TEMP[11].wwww, IMM[0].zzzz 505: UIF TEMP[7].xxxx :0 506: MOV TEMP[7].x, TEMP[6].xxxx 507: ELSE :0 508: MOV TEMP[7].x, TEMP[4].xxxx 509: ENDIF 510: FSGE TEMP[8].x, TEMP[11].wwww, IMM[0].zzzz 511: UIF TEMP[8].xxxx :0 512: MOV TEMP[8].x, TEMP[6].yyyy 513: ELSE :0 514: MOV TEMP[8].x, TEMP[4].yyyy 515: ENDIF 516: FSGE TEMP[9].x, TEMP[11].wwww, IMM[0].zzzz 517: UIF TEMP[9].xxxx :0 518: MOV TEMP[6].x, TEMP[6].zzzz 519: ELSE :0 520: MOV TEMP[6].x, TEMP[4].zzzz 521: ENDIF 522: FSGE TEMP[4].x, TEMP[11].wwww, IMM[0].zzzz 523: UIF TEMP[4].xxxx :0 524: MOV TEMP[4].x, TEMP[10].wwww 525: ELSE :0 526: MOV TEMP[4].x, IMM[0].zzzz 527: ENDIF 528: MOV TEMP[2].w, TEMP[4].xxxx 529: ABS TEMP[7].x, TEMP[7].xxxx 530: LG2 TEMP[3].x, TEMP[7].xxxx 531: ABS TEMP[7].x, TEMP[8].xxxx 532: LG2 TEMP[7].x, TEMP[7].xxxx 533: MOV TEMP[3].y, TEMP[7].xxxx 534: ABS TEMP[6].x, TEMP[6].xxxx 535: LG2 TEMP[6].x, TEMP[6].xxxx 536: MOV TEMP[3].z, TEMP[6].xxxx 537: MUL TEMP[6].xyz, TEMP[3], IMM[4].xxxx 538: EX2 TEMP[3].x, TEMP[6].xxxx 539: EX2 TEMP[7].x, TEMP[6].yyyy 540: MOV TEMP[3].y, TEMP[7].xxxx 541: EX2 TEMP[6].x, TEMP[6].zzzz 542: MOV TEMP[3].z, TEMP[6].xxxx 543: MOV TEMP[6].xy, IN[5].xyyy 544: TEX TEMP[6], TEMP[6], SAMP[8], 2D 545: MOV TEMP[5].w, TEMP[6].wwww 546: LRP TEMP[3].xyz, TEMP[0].xxxx, TEMP[3], TEMP[6] 547: MOV TEMP[2].xyz, TEMP[3].xyzx 548: MOV TEMP[3].xyz, TEMP[3].xyzz 549: TEX TEMP[3], TEMP[3], SAMP[9], 3D 550: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 551: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[3], TEMP[2] 552: MOV TEMP[5].xyz, TEMP[0].xyzx 553: ADD TEMP[0].xyz, -TEMP[5], CONST[15] 554: MOV TEMP[1].xyz, TEMP[0].xyzx 555: MAD TEMP[0].xyz, TEMP[4].xxxx, TEMP[1], TEMP[5] 556: MOV TEMP[12].xyz, TEMP[0].xyzx 557: MOV OUT[0], TEMP[12] 558: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %59 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %60 = load <8 x i32> addrspace(2)* %59, !tbaa !0 %61 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %62 = load <4 x i32> addrspace(2)* %61, !tbaa !0 %63 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %64 = load <8 x i32> addrspace(2)* %63, !tbaa !0 %65 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %66 = load <4 x i32> addrspace(2)* %65, !tbaa !0 %67 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %68 = load <8 x i32> addrspace(2)* %67, !tbaa !0 %69 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %70 = load <4 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %72 = load <8 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %74 = load <4 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %76 = load <8 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %78 = load <4 x i32> addrspace(2)* %77, !tbaa !0 %79 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %80 = load <8 x i32> addrspace(2)* %79, !tbaa !0 %81 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %82 = load <4 x i32> addrspace(2)* %81, !tbaa !0 %83 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %84 = load <8 x i32> addrspace(2)* %83, !tbaa !0 %85 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %86 = load <4 x i32> addrspace(2)* %85, !tbaa !0 %87 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %88 = load <8 x i32> addrspace(2)* %87, !tbaa !0 %89 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %90 = load <4 x i32> addrspace(2)* %89, !tbaa !0 %91 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %92 = load <8 x i32> addrspace(2)* %91, !tbaa !0 %93 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %94 = load <4 x i32> addrspace(2)* %93, !tbaa !0 %95 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %96 = load <8 x i32> addrspace(2)* %95, !tbaa !0 %97 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %98 = load <4 x i32> addrspace(2)* %97, !tbaa !0 %99 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %113 = fmul float %109, 1.000000e+00 %114 = fadd float %113, 0.000000e+00 %115 = fmul float %110, -1.000000e+00 %116 = fadd float %115, 1.000000e+00 %117 = bitcast float %114 to i32 %118 = bitcast float %116 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = bitcast <8 x i32> %88 to <32 x i8> %122 = bitcast <4 x i32> %90 to <16 x i8> %123 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %121, <16 x i8> %122, i32 2) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 2 %126 = call float @fabs(float %124) %127 = fsub float -0.000000e+00, %126 %128 = fsub float -0.000000e+00, %126 %129 = fsub float -0.000000e+00, %126 %130 = fsub float -0.000000e+00, %126 %131 = fcmp oge float %127, 0.000000e+00 %132 = sext i1 %131 to i32 %133 = bitcast i32 %132 to float %134 = bitcast float %133 to i32 %135 = icmp ne i32 %134, 0 %. = select i1 %135, float -1.000000e+00, float -0.000000e+00 %136 = fcmp oge float %128, 0.000000e+00 %137 = sext i1 %136 to i32 %138 = bitcast i32 %137 to float %139 = bitcast float %138 to i32 %140 = icmp ne i32 %139, 0 %temp16.0 = select i1 %140, float -1.000000e+00, float -0.000000e+00 %141 = fcmp oge float %129, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %142 to float %144 = bitcast float %143 to i32 %145 = icmp ne i32 %144, 0 %.170 = select i1 %145, float -1.000000e+00, float -0.000000e+00 %146 = fcmp oge float %130, 0.000000e+00 %147 = sext i1 %146 to i32 %148 = bitcast i32 %147 to float %149 = bitcast float %148 to i32 %150 = icmp ne i32 %149, 0 %temp8.0 = select i1 %150, float -1.000000e+00, float -0.000000e+00 %151 = fcmp olt float %., 0.000000e+00 %152 = sext i1 %151 to i32 %153 = fcmp olt float %temp16.0, 0.000000e+00 %154 = sext i1 %153 to i32 %155 = fcmp olt float %.170, 0.000000e+00 %156 = sext i1 %155 to i32 %157 = bitcast i32 %152 to float %158 = bitcast i32 %154 to float %159 = bitcast i32 %156 to float %160 = bitcast float %157 to i32 %161 = bitcast float %159 to i32 %162 = or i32 %160, %161 %163 = bitcast i32 %162 to float %164 = bitcast float %163 to i32 %165 = bitcast float %158 to i32 %166 = or i32 %164, %165 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = icmp ne i32 %168, 0 br i1 %169, label %IF126, label %ENDIF125 IF126: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF125 ENDIF125: ; preds = %main_body, %IF126 %170 = fmul float %99, %99 %171 = fmul float %100, %100 %172 = fadd float %170, %171 %173 = fmul float %101, %101 %174 = fadd float %172, %173 %175 = fmul float 0.000000e+00, 0.000000e+00 %176 = fadd float %174, %175 %177 = call float @llvm.AMDGPU.rsq.clamped.f32(float %176) %178 = fmul float %99, %177 %179 = fmul float %100, %177 %180 = fmul float %101, %177 %181 = fmul float %105, %105 %182 = fmul float %106, %106 %183 = fadd float %181, %182 %184 = fmul float %107, %107 %185 = fadd float %183, %184 %186 = fmul float 0.000000e+00, 0.000000e+00 %187 = fadd float %185, %186 %188 = call float @llvm.AMDGPU.rsq.clamped.f32(float %187) %189 = fmul float %105, %188 %190 = fmul float %106, %188 %191 = fmul float %107, %188 %192 = call float @fabs(float %178) %193 = call float @fabs(float %179) %194 = call float @fabs(float %180) %195 = call float @fabs(float %temp8.0) %196 = call float @fabs(float %178) %197 = call float @fabs(float %179) %198 = call float @fabs(float %180) %199 = call float @fabs(float %temp8.0) %200 = fmul float %192, %196 %201 = fmul float %193, %197 %202 = fmul float %194, %198 %203 = fmul float %200, %200 %204 = fmul float %201, %201 %205 = fmul float %202, %202 %206 = fadd float %204, %203 %207 = fmul float %202, %202 %208 = fadd float %207, %206 %209 = fdiv float 1.000000e+00, %208 %210 = fmul float %209, %203 %211 = fmul float %209, %204 %212 = fmul float %209, %205 %213 = fmul float %41, %103 %214 = fmul float %42, %104 %215 = bitcast float %213 to i32 %216 = bitcast float %214 to i32 %217 = insertelement <2 x i32> undef, i32 %215, i32 0 %218 = insertelement <2 x i32> %217, i32 %216, i32 1 %219 = bitcast <8 x i32> %72 to <32 x i8> %220 = bitcast <4 x i32> %74 to <16 x i8> %221 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %218, <32 x i8> %219, <16 x i8> %220, i32 2) %222 = extractelement <4 x float> %221, i32 0 %223 = extractelement <4 x float> %221, i32 1 %224 = extractelement <4 x float> %221, i32 2 %225 = call float @llvm.pow.f32(float %222, float 0x40019999A0000000) %226 = call float @llvm.pow.f32(float %223, float 0x40019999A0000000) %227 = call float @llvm.pow.f32(float %224, float 0x40019999A0000000) %228 = fmul float %41, %102 %229 = fmul float %42, %104 %230 = bitcast float %228 to i32 %231 = bitcast float %229 to i32 %232 = insertelement <2 x i32> undef, i32 %230, i32 0 %233 = insertelement <2 x i32> %232, i32 %231, i32 1 %234 = bitcast <8 x i32> %72 to <32 x i8> %235 = bitcast <4 x i32> %74 to <16 x i8> %236 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %233, <32 x i8> %234, <16 x i8> %235, i32 2) %237 = extractelement <4 x float> %236, i32 0 %238 = extractelement <4 x float> %236, i32 1 %239 = extractelement <4 x float> %236, i32 2 %240 = call float @llvm.pow.f32(float %237, float 0x40019999A0000000) %241 = call float @llvm.pow.f32(float %238, float 0x40019999A0000000) %242 = call float @llvm.pow.f32(float %239, float 0x40019999A0000000) %243 = fmul float %211, %240 %244 = fmul float %211, %241 %245 = fmul float %211, %242 %246 = fmul float %225, %210 %247 = fadd float %246, %243 %248 = fmul float %226, %210 %249 = fadd float %248, %244 %250 = fmul float %227, %210 %251 = fadd float %250, %245 %252 = fmul float %34, %102 %253 = fmul float %34, %103 %254 = bitcast float %252 to i32 %255 = bitcast float %253 to i32 %256 = insertelement <2 x i32> undef, i32 %254, i32 0 %257 = insertelement <2 x i32> %256, i32 %255, i32 1 %258 = bitcast <8 x i32> %64 to <32 x i8> %259 = bitcast <4 x i32> %66 to <16 x i8> %260 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %257, <32 x i8> %258, <16 x i8> %259, i32 2) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = extractelement <4 x float> %260, i32 3 %265 = call float @llvm.pow.f32(float %261, float 0x40019999A0000000) %266 = call float @llvm.pow.f32(float %262, float 0x40019999A0000000) %267 = call float @llvm.pow.f32(float %263, float 0x40019999A0000000) %268 = fmul float %265, %212 %269 = fadd float %268, %247 %270 = fmul float %266, %212 %271 = fadd float %270, %249 %272 = fmul float %267, %212 %273 = fadd float %272, %251 %274 = fmul float %43, %103 %275 = fmul float %44, %104 %276 = fsub float -0.000000e+00, %180 %277 = fmul float %276, %189 %278 = fmul float %179, %190 %279 = fadd float %278, %277 %280 = fmul float %178, %191 %281 = fadd float %279, %280 %282 = fmul float %178, 1.000000e+00 %283 = fmul float %180, 1.000000e+00 %284 = fmul float %179, -1.000000e+00 %285 = fmul float %284, %189 %286 = fmul float %282, %190 %287 = fadd float %286, %285 %288 = fmul float %283, %191 %289 = fadd float %287, %288 %290 = fmul float %178, %189 %291 = fmul float %179, %190 %292 = fadd float %291, %290 %293 = fmul float %180, %191 %294 = fadd float %292, %293 %295 = fmul float %180, -1.000000e+00 %296 = fmul float %179, 1.000000e+00 %297 = fmul float %178, 1.000000e+00 %298 = fmul float %295, %27 %299 = fmul float %296, %28 %300 = fadd float %299, %298 %301 = fmul float %297, %29 %302 = fadd float %300, %301 %303 = fmul float %284, %27 %304 = fmul float %282, %28 %305 = fadd float %304, %303 %306 = fmul float %283, %29 %307 = fadd float %305, %306 %308 = fmul float %178, %27 %309 = fmul float %179, %28 %310 = fadd float %309, %308 %311 = fmul float %180, %29 %312 = fadd float %310, %311 %313 = fmul float %295, %30 %314 = fmul float %296, %31 %315 = fadd float %314, %313 %316 = fmul float %297, %32 %317 = fadd float %315, %316 %318 = fmul float %284, %30 %319 = fmul float %282, %31 %320 = fadd float %319, %318 %321 = fmul float %283, %32 %322 = fadd float %320, %321 %323 = fmul float %178, %30 %324 = fmul float %179, %31 %325 = fadd float %324, %323 %326 = fmul float %180, %32 %327 = fadd float %325, %326 %328 = bitcast float %274 to i32 %329 = bitcast float %275 to i32 %330 = insertelement <2 x i32> undef, i32 %328, i32 0 %331 = insertelement <2 x i32> %330, i32 %329, i32 1 %332 = bitcast <8 x i32> %80 to <32 x i8> %333 = bitcast <4 x i32> %82 to <16 x i8> %334 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %331, <32 x i8> %332, <16 x i8> %333, i32 2) %335 = extractelement <4 x float> %334, i32 3 %336 = bitcast float %274 to i32 %337 = bitcast float %275 to i32 %338 = insertelement <2 x i32> undef, i32 %336, i32 0 %339 = insertelement <2 x i32> %338, i32 %337, i32 1 %340 = bitcast <8 x i32> %76 to <32 x i8> %341 = bitcast <4 x i32> %78 to <16 x i8> %342 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %339, <32 x i8> %340, <16 x i8> %341, i32 2) %343 = extractelement <4 x float> %342, i32 0 %344 = extractelement <4 x float> %342, i32 1 %345 = extractelement <4 x float> %342, i32 2 %346 = extractelement <4 x float> %342, i32 3 %347 = fmul float %33, %33 %348 = fmul float %343, 2.000000e+00 %349 = fadd float %348, -1.000000e+00 %350 = fmul float %344, 2.000000e+00 %351 = fadd float %350, -1.000000e+00 %352 = fmul float %349, %33 %353 = fmul float %351, %33 %354 = fmul float %352, %352 %355 = fmul float %353, %353 %356 = fadd float %354, %355 %357 = fmul float 1.000000e+00, 1.000000e+00 %358 = fadd float %356, %357 %359 = fmul float 0.000000e+00, 0.000000e+00 %360 = fadd float %358, %359 %361 = call float @llvm.AMDGPU.rsq.clamped.f32(float %360) %362 = fmul float %352, %361 %363 = fmul float %353, %361 %364 = fmul float 1.000000e+00, %361 %365 = fmul float %335, 2.000000e+00 %366 = fadd float %365, -1.000000e+00 %367 = fmul float %352, %352 %368 = fmul float %353, %353 %369 = fmul float %353, %352 %370 = fsub float -0.000000e+00, %367 %371 = fmul float %345, %347 %372 = fadd float %371, %370 %373 = fsub float -0.000000e+00, %368 %374 = fmul float %346, %347 %375 = fadd float %374, %373 %376 = fsub float -0.000000e+00, %369 %377 = fmul float %366, %347 %378 = fadd float %377, %376 %379 = fmul float %302, %362 %380 = fmul float %307, %363 %381 = fadd float %380, %379 %382 = fmul float %312, %364 %383 = fadd float %381, %382 %384 = call float @llvm.AMDIL.clamp.(float %383, float 0.000000e+00, float 1.000000e+00) %385 = fmul float %317, %362 %386 = fmul float %322, %363 %387 = fadd float %386, %385 %388 = fmul float %327, %364 %389 = fadd float %387, %388 %390 = call float @llvm.AMDIL.clamp.(float %389, float 0.000000e+00, float 1.000000e+00) %391 = fadd float %281, %317 %392 = fadd float %289, %322 %393 = fadd float %294, %327 %394 = fdiv float 1.000000e+00, %393 %395 = fsub float -0.000000e+00, %352 %396 = fmul float %391, %394 %397 = fadd float %396, %395 %398 = fsub float -0.000000e+00, %353 %399 = fmul float %392, %394 %400 = fadd float %399, %398 %401 = fdiv float 1.000000e+00, %36 %402 = fadd float %401, %372 %403 = fadd float %401, %375 %404 = fmul float %378, %378 %405 = fsub float -0.000000e+00, %404 %406 = fmul float %402, %403 %407 = fadd float %406, %405 %408 = fmul float %397, %397 %409 = fmul float %400, %403 %410 = fsub float -0.000000e+00, %378 %411 = fsub float -0.000000e+00, %378 %412 = fmul float %397, %410 %413 = fmul float %397, %411 %414 = fadd float %412, %413 %415 = fadd float %414, %409 %416 = fmul float %400, %415 %417 = fmul float %408, %403 %418 = fadd float %417, %416 %419 = fmul float %418, 5.000000e-01 %420 = fdiv float 1.000000e+00, %407 %421 = fmul float %419, %420 %422 = fsub float -0.000000e+00, %407 %423 = fcmp oge float %422, 0.000000e+00 %424 = sext i1 %423 to i32 %425 = bitcast i32 %424 to float %426 = bitcast float %425 to i32 %427 = icmp ne i32 %426, 0 %.171 = select i1 %427, float 1.000000e+00, float 0.000000e+00 %428 = fmul float %419, %420 %429 = fadd float %428, -1.600000e+01 %430 = fcmp oge float %429, 0.000000e+00 %431 = sext i1 %430 to i32 %432 = bitcast i32 %431 to float %433 = bitcast float %432 to i32 %434 = icmp ne i32 %433, 0 %temp64.0 = select i1 %434, float 1.000000e+00, float 0.000000e+00 %435 = fadd float %temp64.0, %.171 %436 = fmul float %421, 0xBFF7154CA0000000 %437 = call float @llvm.AMDIL.exp.(float %436) %438 = call float @llvm.maxnum.f32(float %407, float 0x3E7AD7F2A0000000) %439 = call float @llvm.AMDGPU.rsq.clamped.f32(float %438) %440 = fmul float %439, %437 %441 = fsub float -0.000000e+00, %435 %442 = fcmp oge float %441, 0.000000e+00 %443 = sext i1 %442 to i32 %444 = bitcast i32 %443 to float %445 = bitcast float %444 to i32 %446 = icmp ne i32 %445, 0 %.172 = select i1 %446, float %440, float 0.000000e+00 %447 = fmul float %362, %281 %448 = fmul float %364, %294 %449 = fadd float %448, %447 %450 = fmul float %363, %289 %451 = fadd float %449, %450 %452 = fsub float -0.000000e+00, %451 %453 = fadd float %452, 1.000000e+00 %454 = fmul float %453, %453 %455 = fmul float %454, %454 %456 = fmul float %453, %455 %457 = fmul float %180, -1.000000e+00 %458 = fmul float %179, -1.000000e+00 %459 = fmul float %178, 1.000000e+00 %460 = fmul float %362, %457 %461 = fmul float %363, %458 %462 = fadd float %461, %460 %463 = fmul float %364, %459 %464 = fadd float %462, %463 %465 = fmul float %363, %178 %466 = fmul float %362, %179 %467 = fadd float %466, %465 %468 = fmul float %364, %179 %469 = fadd float %467, %468 %470 = fmul float %362, %178 %471 = fmul float %363, %180 %472 = fadd float %471, %470 %473 = fmul float %364, %180 %474 = fadd float %472, %473 %475 = insertelement <4 x float> undef, float %464, i32 0 %476 = insertelement <4 x float> %475, float %469, i32 1 %477 = insertelement <4 x float> %476, float %474, i32 2 %478 = insertelement <4 x float> %477, float 0.000000e+00, i32 3 %479 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %478) %480 = extractelement <4 x float> %479, i32 0 %481 = extractelement <4 x float> %479, i32 1 %482 = extractelement <4 x float> %479, i32 2 %483 = extractelement <4 x float> %479, i32 3 %484 = call float @fabs(float %482) %485 = fdiv float 1.000000e+00, %484 %486 = fmul float %480, %485 %487 = fadd float %486, 1.500000e+00 %488 = fmul float %481, %485 %489 = fadd float %488, 1.500000e+00 %490 = bitcast float %489 to i32 %491 = bitcast float %487 to i32 %492 = bitcast float %483 to i32 %493 = insertelement <4 x i32> undef, i32 %490, i32 0 %494 = insertelement <4 x i32> %493, i32 %491, i32 1 %495 = insertelement <4 x i32> %494, i32 %492, i32 2 %496 = insertelement <4 x i32> %495, i32 undef, i32 3 %497 = bitcast <8 x i32> %60 to <32 x i8> %498 = bitcast <4 x i32> %62 to <16 x i8> %499 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %496, <32 x i8> %497, <16 x i8> %498, i32 4) %500 = extractelement <4 x float> %499, i32 0 %501 = extractelement <4 x float> %499, i32 1 %502 = extractelement <4 x float> %499, i32 2 %503 = call float @llvm.pow.f32(float %500, float 0x40019999A0000000) %504 = call float @llvm.pow.f32(float %501, float 0x40019999A0000000) %505 = call float @llvm.pow.f32(float %502, float 0x40019999A0000000) %506 = fadd float %384, %503 %507 = fadd float %384, %504 %508 = fadd float %384, %505 %509 = fmul float %269, %506 %510 = fmul float %271, %507 %511 = fmul float %273, %508 %512 = fmul float %.172, %456 %513 = fmul float %.172, %456 %514 = fmul float %.172, %456 %515 = fmul float %.172, %456 %516 = call float @llvm.AMDIL.clamp.(float %512, float 0.000000e+00, float 1.000000e+00) %517 = call float @llvm.AMDIL.clamp.(float %513, float 0.000000e+00, float 1.000000e+00) %518 = call float @llvm.AMDIL.clamp.(float %514, float 0.000000e+00, float 1.000000e+00) %519 = call float @llvm.AMDIL.clamp.(float %515, float 0.000000e+00, float 1.000000e+00) %520 = fmul float %519, %390 %521 = fadd float %520, %509 %522 = fmul float %519, %390 %523 = fadd float %522, %510 %524 = fmul float %519, %390 %525 = fadd float %524, %511 %526 = fmul float %43, %102 %527 = fmul float %44, %104 %528 = fmul float %180, 1.000000e+00 %529 = fmul float %179, 1.000000e+00 %530 = fmul float %178, -1.000000e+00 %531 = fmul float %530, %189 %532 = fmul float %528, %190 %533 = fadd float %532, %531 %534 = fmul float %529, %191 %535 = fadd float %533, %534 %536 = fmul float %530, %27 %537 = fmul float %528, %28 %538 = fadd float %537, %536 %539 = fmul float %529, %29 %540 = fadd float %538, %539 %541 = fmul float %530, %30 %542 = fmul float %528, %31 %543 = fadd float %542, %541 %544 = fmul float %529, %32 %545 = fadd float %543, %544 %546 = bitcast float %526 to i32 %547 = bitcast float %527 to i32 %548 = insertelement <2 x i32> undef, i32 %546, i32 0 %549 = insertelement <2 x i32> %548, i32 %547, i32 1 %550 = bitcast <8 x i32> %80 to <32 x i8> %551 = bitcast <4 x i32> %82 to <16 x i8> %552 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %549, <32 x i8> %550, <16 x i8> %551, i32 2) %553 = extractelement <4 x float> %552, i32 3 %554 = bitcast float %526 to i32 %555 = bitcast float %527 to i32 %556 = insertelement <2 x i32> undef, i32 %554, i32 0 %557 = insertelement <2 x i32> %556, i32 %555, i32 1 %558 = bitcast <8 x i32> %76 to <32 x i8> %559 = bitcast <4 x i32> %78 to <16 x i8> %560 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %557, <32 x i8> %558, <16 x i8> %559, i32 2) %561 = extractelement <4 x float> %560, i32 0 %562 = extractelement <4 x float> %560, i32 1 %563 = extractelement <4 x float> %560, i32 2 %564 = extractelement <4 x float> %560, i32 3 %565 = fmul float %561, 2.000000e+00 %566 = fadd float %565, -1.000000e+00 %567 = fmul float %562, 2.000000e+00 %568 = fadd float %567, -1.000000e+00 %569 = fmul float %566, %33 %570 = fmul float %568, %33 %571 = fmul float %569, %569 %572 = fmul float %570, %570 %573 = fadd float %571, %572 %574 = fmul float 1.000000e+00, 1.000000e+00 %575 = fadd float %573, %574 %576 = fmul float 0.000000e+00, 0.000000e+00 %577 = fadd float %575, %576 %578 = call float @llvm.AMDGPU.rsq.clamped.f32(float %577) %579 = fmul float %569, %578 %580 = fmul float %570, %578 %581 = fmul float 1.000000e+00, %578 %582 = fmul float %553, 2.000000e+00 %583 = fadd float %582, -1.000000e+00 %584 = fmul float %569, %569 %585 = fmul float %570, %570 %586 = fmul float %570, %569 %587 = fsub float -0.000000e+00, %584 %588 = fmul float %563, %347 %589 = fadd float %588, %587 %590 = fsub float -0.000000e+00, %585 %591 = fmul float %564, %347 %592 = fadd float %591, %590 %593 = fsub float -0.000000e+00, %586 %594 = fmul float %583, %347 %595 = fadd float %594, %593 %596 = fmul float %540, %579 %597 = fmul float %307, %580 %598 = fadd float %597, %596 %599 = fmul float %312, %581 %600 = fadd float %598, %599 %601 = call float @llvm.AMDIL.clamp.(float %600, float 0.000000e+00, float 1.000000e+00) %602 = fmul float %545, %579 %603 = fmul float %322, %580 %604 = fadd float %603, %602 %605 = fmul float %327, %581 %606 = fadd float %604, %605 %607 = call float @llvm.AMDIL.clamp.(float %606, float 0.000000e+00, float 1.000000e+00) %608 = fadd float %535, %545 %609 = fadd float %289, %322 %610 = fadd float %294, %327 %611 = fdiv float 1.000000e+00, %610 %612 = fsub float -0.000000e+00, %569 %613 = fmul float %608, %611 %614 = fadd float %613, %612 %615 = fsub float -0.000000e+00, %570 %616 = fmul float %609, %611 %617 = fadd float %616, %615 %618 = fadd float %401, %589 %619 = fadd float %401, %592 %620 = fmul float %595, %595 %621 = fsub float -0.000000e+00, %620 %622 = fmul float %618, %619 %623 = fadd float %622, %621 %624 = fmul float %614, %614 %625 = fmul float %617, %619 %626 = fsub float -0.000000e+00, %595 %627 = fsub float -0.000000e+00, %595 %628 = fmul float %614, %626 %629 = fmul float %614, %627 %630 = fadd float %628, %629 %631 = fadd float %630, %625 %632 = fmul float %617, %631 %633 = fmul float %624, %619 %634 = fadd float %633, %632 %635 = fmul float %634, 5.000000e-01 %636 = fdiv float 1.000000e+00, %623 %637 = fmul float %635, %636 %638 = fsub float -0.000000e+00, %623 %639 = fcmp oge float %638, 0.000000e+00 %640 = sext i1 %639 to i32 %641 = bitcast i32 %640 to float %642 = bitcast float %641 to i32 %643 = icmp ne i32 %642, 0 %temp108.0 = select i1 %643, float 1.000000e+00, float 0.000000e+00 %644 = fmul float %635, %636 %645 = fadd float %644, -1.600000e+01 %646 = fcmp oge float %645, 0.000000e+00 %647 = sext i1 %646 to i32 %648 = bitcast i32 %647 to float %649 = bitcast float %648 to i32 %650 = icmp ne i32 %649, 0 %.173 = select i1 %650, float 1.000000e+00, float 0.000000e+00 %651 = fadd float %.173, %temp108.0 %652 = fmul float %637, 0xBFF7154CA0000000 %653 = call float @llvm.AMDIL.exp.(float %652) %654 = call float @llvm.maxnum.f32(float %623, float 0x3E7AD7F2A0000000) %655 = call float @llvm.AMDGPU.rsq.clamped.f32(float %654) %656 = fmul float %655, %653 %657 = fsub float -0.000000e+00, %651 %658 = fcmp oge float %657, 0.000000e+00 %659 = sext i1 %658 to i32 %660 = bitcast i32 %659 to float %661 = bitcast float %660 to i32 %662 = icmp ne i32 %661, 0 %temp64.3 = select i1 %662, float %656, float 0.000000e+00 %663 = fmul float %579, %535 %664 = fmul float %581, %294 %665 = fadd float %664, %663 %666 = fmul float %580, %289 %667 = fadd float %665, %666 %668 = fsub float -0.000000e+00, %667 %669 = fadd float %668, 1.000000e+00 %670 = fmul float %669, %669 %671 = fmul float %670, %670 %672 = fmul float %669, %671 %673 = fmul float %178, -1.000000e+00 %674 = fmul float %179, -1.000000e+00 %675 = fmul float %178, 1.000000e+00 %676 = fmul float %579, %673 %677 = fmul float %580, %674 %678 = fadd float %677, %676 %679 = fmul float %581, %675 %680 = fadd float %678, %679 %681 = fmul float %580, %178 %682 = fmul float %581, %179 %683 = fadd float %682, %681 %684 = fmul float %579, %180 %685 = fadd float %683, %684 %686 = fmul float %579, %179 %687 = fmul float %580, %180 %688 = fadd float %687, %686 %689 = fmul float %581, %180 %690 = fadd float %688, %689 %691 = insertelement <4 x float> undef, float %680, i32 0 %692 = insertelement <4 x float> %691, float %685, i32 1 %693 = insertelement <4 x float> %692, float %690, i32 2 %694 = insertelement <4 x float> %693, float 0.000000e+00, i32 3 %695 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %694) %696 = extractelement <4 x float> %695, i32 0 %697 = extractelement <4 x float> %695, i32 1 %698 = extractelement <4 x float> %695, i32 2 %699 = extractelement <4 x float> %695, i32 3 %700 = call float @fabs(float %698) %701 = fdiv float 1.000000e+00, %700 %702 = fmul float %696, %701 %703 = fadd float %702, 1.500000e+00 %704 = fmul float %697, %701 %705 = fadd float %704, 1.500000e+00 %706 = bitcast float %705 to i32 %707 = bitcast float %703 to i32 %708 = bitcast float %699 to i32 %709 = insertelement <4 x i32> undef, i32 %706, i32 0 %710 = insertelement <4 x i32> %709, i32 %707, i32 1 %711 = insertelement <4 x i32> %710, i32 %708, i32 2 %712 = insertelement <4 x i32> %711, i32 undef, i32 3 %713 = bitcast <8 x i32> %60 to <32 x i8> %714 = bitcast <4 x i32> %62 to <16 x i8> %715 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %712, <32 x i8> %713, <16 x i8> %714, i32 4) %716 = extractelement <4 x float> %715, i32 0 %717 = extractelement <4 x float> %715, i32 1 %718 = extractelement <4 x float> %715, i32 2 %719 = call float @llvm.pow.f32(float %716, float 0x40019999A0000000) %720 = call float @llvm.pow.f32(float %717, float 0x40019999A0000000) %721 = call float @llvm.pow.f32(float %718, float 0x40019999A0000000) %722 = fadd float %601, %719 %723 = fadd float %601, %720 %724 = fadd float %601, %721 %725 = fmul float %269, %722 %726 = fmul float %271, %723 %727 = fmul float %273, %724 %728 = fmul float %temp64.3, %672 %729 = fmul float %temp64.3, %672 %730 = fmul float %temp64.3, %672 %731 = fmul float %temp64.3, %672 %732 = call float @llvm.AMDIL.clamp.(float %728, float 0.000000e+00, float 1.000000e+00) %733 = call float @llvm.AMDIL.clamp.(float %729, float 0.000000e+00, float 1.000000e+00) %734 = call float @llvm.AMDIL.clamp.(float %730, float 0.000000e+00, float 1.000000e+00) %735 = call float @llvm.AMDIL.clamp.(float %731, float 0.000000e+00, float 1.000000e+00) %736 = fmul float %735, %607 %737 = fadd float %736, %725 %738 = fmul float %735, %607 %739 = fadd float %738, %726 %740 = fmul float %735, %607 %741 = fadd float %740, %727 %742 = fmul float %35, %102 %743 = fmul float %35, %103 %744 = fmul float %528, %189 %745 = fmul float %529, %190 %746 = fadd float %745, %744 %747 = fmul float %530, %191 %748 = fadd float %746, %747 %749 = fmul float %282, %189 %750 = fmul float %283, %190 %751 = fadd float %750, %749 %752 = fmul float %284, %191 %753 = fadd float %751, %752 %754 = fmul float %528, %27 %755 = fmul float %529, %28 %756 = fadd float %755, %754 %757 = fmul float %530, %29 %758 = fadd float %756, %757 %759 = fmul float %282, %27 %760 = fmul float %283, %28 %761 = fadd float %760, %759 %762 = fmul float %284, %29 %763 = fadd float %761, %762 %764 = fmul float %528, %30 %765 = fmul float %529, %31 %766 = fadd float %765, %764 %767 = fmul float %530, %32 %768 = fadd float %766, %767 %769 = fmul float %282, %30 %770 = fmul float %283, %31 %771 = fadd float %770, %769 %772 = fmul float %284, %32 %773 = fadd float %771, %772 %774 = bitcast float %742 to i32 %775 = bitcast float %743 to i32 %776 = insertelement <2 x i32> undef, i32 %774, i32 0 %777 = insertelement <2 x i32> %776, i32 %775, i32 1 %778 = bitcast <8 x i32> %68 to <32 x i8> %779 = bitcast <4 x i32> %70 to <16 x i8> %780 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %777, <32 x i8> %778, <16 x i8> %779, i32 2) %781 = extractelement <4 x float> %780, i32 0 %782 = extractelement <4 x float> %780, i32 1 %783 = extractelement <4 x float> %780, i32 2 %784 = extractelement <4 x float> %780, i32 3 %785 = fmul float %781, 2.000000e+00 %786 = fadd float %785, -1.000000e+00 %787 = fmul float %782, 2.000000e+00 %788 = fadd float %787, -1.000000e+00 %789 = fmul float %786, %33 %790 = fmul float %788, %33 %791 = fmul float %789, %789 %792 = fmul float %790, %790 %793 = fadd float %791, %792 %794 = fmul float 1.000000e+00, 1.000000e+00 %795 = fadd float %793, %794 %796 = fmul float 0.000000e+00, 0.000000e+00 %797 = fadd float %795, %796 %798 = call float @llvm.AMDGPU.rsq.clamped.f32(float %797) %799 = fmul float %789, %798 %800 = fmul float %790, %798 %801 = fmul float 1.000000e+00, %798 %802 = fmul float %783, 1.000000e+00 %803 = fadd float %802, 0.000000e+00 %804 = fmul float %784, 1.000000e+00 %805 = fadd float %804, 0.000000e+00 %806 = fmul float %784, 2.000000e+00 %807 = fadd float %806, -1.000000e+00 %808 = fmul float %789, %789 %809 = fmul float %790, %790 %810 = fmul float %790, %789 %811 = fsub float -0.000000e+00, %808 %812 = fmul float %803, %347 %813 = fadd float %812, %811 %814 = fsub float -0.000000e+00, %809 %815 = fmul float %805, %347 %816 = fadd float %815, %814 %817 = fsub float -0.000000e+00, %810 %818 = fmul float %807, %347 %819 = fadd float %818, %817 %820 = fmul float %758, %799 %821 = fmul float %763, %800 %822 = fadd float %821, %820 %823 = fmul float %312, %801 %824 = fadd float %822, %823 %825 = call float @llvm.AMDIL.clamp.(float %824, float 0.000000e+00, float 1.000000e+00) %826 = fmul float %768, %799 %827 = fmul float %773, %800 %828 = fadd float %827, %826 %829 = fmul float %327, %801 %830 = fadd float %828, %829 %831 = call float @llvm.AMDIL.clamp.(float %830, float 0.000000e+00, float 1.000000e+00) %832 = fadd float %748, %768 %833 = fadd float %753, %773 %834 = fadd float %294, %327 %835 = fdiv float 1.000000e+00, %834 %836 = fsub float -0.000000e+00, %789 %837 = fmul float %832, %835 %838 = fadd float %837, %836 %839 = fsub float -0.000000e+00, %790 %840 = fmul float %833, %835 %841 = fadd float %840, %839 %842 = fadd float %401, %813 %843 = fadd float %401, %816 %844 = fmul float %819, %819 %845 = fsub float -0.000000e+00, %844 %846 = fmul float %842, %843 %847 = fadd float %846, %845 %848 = fmul float %838, %838 %849 = fmul float %841, %843 %850 = fsub float -0.000000e+00, %819 %851 = fsub float -0.000000e+00, %819 %852 = fmul float %838, %850 %853 = fmul float %838, %851 %854 = fadd float %852, %853 %855 = fadd float %854, %849 %856 = fmul float %841, %855 %857 = fmul float %848, %843 %858 = fadd float %857, %856 %859 = fmul float %858, 5.000000e-01 %860 = fdiv float 1.000000e+00, %847 %861 = fmul float %859, %860 %862 = fsub float -0.000000e+00, %847 %863 = fcmp oge float %862, 0.000000e+00 %864 = sext i1 %863 to i32 %865 = bitcast i32 %864 to float %866 = bitcast float %865 to i32 %867 = icmp ne i32 %866, 0 %.174 = select i1 %867, float 1.000000e+00, float 0.000000e+00 %868 = fmul float %859, %860 %869 = fadd float %868, -1.600000e+01 %870 = fcmp oge float %869, 0.000000e+00 %871 = sext i1 %870 to i32 %872 = bitcast i32 %871 to float %873 = bitcast float %872 to i32 %874 = icmp ne i32 %873, 0 %temp40.0 = select i1 %874, float 1.000000e+00, float 0.000000e+00 %875 = fadd float %temp40.0, %.174 %876 = fmul float %861, 0xBFF7154CA0000000 %877 = call float @llvm.AMDIL.exp.(float %876) %878 = call float @llvm.maxnum.f32(float %847, float 0x3E7AD7F2A0000000) %879 = call float @llvm.AMDGPU.rsq.clamped.f32(float %878) %880 = fmul float %879, %877 %881 = fsub float -0.000000e+00, %875 %882 = fcmp oge float %881, 0.000000e+00 %883 = sext i1 %882 to i32 %884 = bitcast i32 %883 to float %885 = bitcast float %884 to i32 %886 = icmp ne i32 %885, 0 %.175 = select i1 %886, float %880, float 0.000000e+00 %887 = fmul float %799, %748 %888 = fmul float %800, %753 %889 = fadd float %888, %887 %890 = fmul float %801, %294 %891 = fadd float %889, %890 %892 = fsub float -0.000000e+00, %891 %893 = fadd float %892, 1.000000e+00 %894 = fmul float %893, %893 %895 = fmul float %894, %894 %896 = fmul float %893, %895 %897 = fmul float %800, %178 %898 = fmul float %801, %178 %899 = fadd float %898, %897 %900 = fmul float %799, %180 %901 = fadd float %899, %900 %902 = fmul float %799, %179 %903 = fmul float %801, %179 %904 = fadd float %903, %902 %905 = fmul float %800, %180 %906 = fadd float %904, %905 %907 = fmul float %178, -1.000000e+00 %908 = fmul float %179, -1.000000e+00 %909 = fmul float %180, 1.000000e+00 %910 = fmul float %799, %907 %911 = fmul float %800, %908 %912 = fadd float %911, %910 %913 = fmul float %801, %909 %914 = fadd float %912, %913 %915 = insertelement <4 x float> undef, float %901, i32 0 %916 = insertelement <4 x float> %915, float %906, i32 1 %917 = insertelement <4 x float> %916, float %914, i32 2 %918 = insertelement <4 x float> %917, float %895, i32 3 %919 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %918) %920 = extractelement <4 x float> %919, i32 0 %921 = extractelement <4 x float> %919, i32 1 %922 = extractelement <4 x float> %919, i32 2 %923 = extractelement <4 x float> %919, i32 3 %924 = call float @fabs(float %922) %925 = fdiv float 1.000000e+00, %924 %926 = fmul float %920, %925 %927 = fadd float %926, 1.500000e+00 %928 = fmul float %921, %925 %929 = fadd float %928, 1.500000e+00 %930 = bitcast float %929 to i32 %931 = bitcast float %927 to i32 %932 = bitcast float %923 to i32 %933 = insertelement <4 x i32> undef, i32 %930, i32 0 %934 = insertelement <4 x i32> %933, i32 %931, i32 1 %935 = insertelement <4 x i32> %934, i32 %932, i32 2 %936 = insertelement <4 x i32> %935, i32 undef, i32 3 %937 = bitcast <8 x i32> %60 to <32 x i8> %938 = bitcast <4 x i32> %62 to <16 x i8> %939 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %936, <32 x i8> %937, <16 x i8> %938, i32 4) %940 = extractelement <4 x float> %939, i32 0 %941 = extractelement <4 x float> %939, i32 1 %942 = extractelement <4 x float> %939, i32 2 %943 = call float @llvm.pow.f32(float %940, float 0x40019999A0000000) %944 = call float @llvm.pow.f32(float %941, float 0x40019999A0000000) %945 = call float @llvm.pow.f32(float %942, float 0x40019999A0000000) %946 = fadd float %825, %943 %947 = fadd float %825, %944 %948 = fadd float %825, %945 %949 = fmul float %269, %946 %950 = fmul float %271, %947 %951 = fmul float %273, %948 %952 = fmul float %.175, %896 %953 = fmul float %.175, %896 %954 = fmul float %.175, %896 %955 = fmul float %.175, %896 %956 = call float @llvm.AMDIL.clamp.(float %952, float 0.000000e+00, float 1.000000e+00) %957 = call float @llvm.AMDIL.clamp.(float %953, float 0.000000e+00, float 1.000000e+00) %958 = call float @llvm.AMDIL.clamp.(float %954, float 0.000000e+00, float 1.000000e+00) %959 = call float @llvm.AMDIL.clamp.(float %955, float 0.000000e+00, float 1.000000e+00) %960 = fmul float %957, %831 %961 = fadd float %960, %949 %962 = fmul float %957, %831 %963 = fadd float %962, %950 %964 = fmul float %957, %831 %965 = fadd float %964, %951 %966 = fmul float %211, %737 %967 = fmul float %211, %739 %968 = fmul float %211, %741 %969 = fmul float %521, %210 %970 = fadd float %969, %966 %971 = fmul float %523, %210 %972 = fadd float %971, %967 %973 = fmul float %525, %210 %974 = fadd float %973, %968 %975 = fmul float %961, %212 %976 = fadd float %975, %970 %977 = fmul float %963, %212 %978 = fadd float %977, %972 %979 = fmul float %965, %212 %980 = fadd float %979, %974 %981 = fmul float %269, 0x3FD3333340000000 %982 = fmul float %271, 0x3FE2E147A0000000 %983 = fadd float %982, %981 %984 = fmul float %273, 0x3FBC28F5C0000000 %985 = fadd float %983, %984 %986 = fsub float -0.000000e+00, %985 %987 = fadd float %986, 1.000000e+00 %988 = fmul float %987, %37 %989 = fmul float %987, %38 %990 = fmul float %987, %39 %991 = call float @llvm.AMDIL.clamp.(float %312, float 0.000000e+00, float 1.000000e+00) %992 = insertelement <4 x float> undef, float %178, i32 0 %993 = insertelement <4 x float> %992, float %179, i32 1 %994 = insertelement <4 x float> %993, float %180, i32 2 %995 = insertelement <4 x float> %994, float %264, i32 3 %996 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %995) %997 = extractelement <4 x float> %996, i32 0 %998 = extractelement <4 x float> %996, i32 1 %999 = extractelement <4 x float> %996, i32 2 %1000 = extractelement <4 x float> %996, i32 3 %1001 = call float @fabs(float %999) %1002 = fdiv float 1.000000e+00, %1001 %1003 = fmul float %997, %1002 %1004 = fadd float %1003, 1.500000e+00 %1005 = fmul float %998, %1002 %1006 = fadd float %1005, 1.500000e+00 %1007 = bitcast float %1006 to i32 %1008 = bitcast float %1004 to i32 %1009 = bitcast float %1000 to i32 %1010 = insertelement <4 x i32> undef, i32 %1007, i32 0 %1011 = insertelement <4 x i32> %1010, i32 %1008, i32 1 %1012 = insertelement <4 x i32> %1011, i32 %1009, i32 2 %1013 = insertelement <4 x i32> %1012, i32 undef, i32 3 %1014 = bitcast <8 x i32> %60 to <32 x i8> %1015 = bitcast <4 x i32> %62 to <16 x i8> %1016 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1013, <32 x i8> %1014, <16 x i8> %1015, i32 4) %1017 = extractelement <4 x float> %1016, i32 0 %1018 = extractelement <4 x float> %1016, i32 1 %1019 = extractelement <4 x float> %1016, i32 2 %1020 = call float @llvm.pow.f32(float %1017, float 0x40019999A0000000) %1021 = call float @llvm.pow.f32(float %1018, float 0x40019999A0000000) %1022 = call float @llvm.pow.f32(float %1019, float 0x40019999A0000000) %1023 = fadd float %1020, %991 %1024 = fadd float %1021, %991 %1025 = fadd float %1022, %991 %1026 = fmul float %988, %1023 %1027 = fadd float %1026, %976 %1028 = fmul float %989, %1024 %1029 = fadd float %1028, %978 %1030 = fmul float %990, %1025 %1031 = fadd float %1030, %980 %1032 = fmul float %108, 5.000000e-01 %1033 = fadd float %1032, 5.000000e-01 %1034 = bitcast float %1033 to i32 %1035 = bitcast float %58 to i32 %1036 = insertelement <2 x i32> undef, i32 %1034, i32 0 %1037 = insertelement <2 x i32> %1036, i32 %1035, i32 1 %1038 = bitcast <8 x i32> %84 to <32 x i8> %1039 = bitcast <4 x i32> %86 to <16 x i8> %1040 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1037, <32 x i8> %1038, <16 x i8> %1039, i32 2) %1041 = extractelement <4 x float> %1040, i32 0 %1042 = fsub float -0.000000e+00, %24 %1043 = fadd float %1042, %102 %1044 = fsub float -0.000000e+00, %25 %1045 = fadd float %1044, %103 %1046 = fsub float -0.000000e+00, %26 %1047 = fadd float %1046, %104 %1048 = fmul float %1043, %1043 %1049 = fmul float %1045, %1045 %1050 = fadd float %1049, %1048 %1051 = fmul float %1047, %1047 %1052 = fadd float %1050, %1051 %1053 = fmul float %1052, %57 %1054 = fmul float %1047, %56 %1055 = fmul float %1054, 0x3FF7154CA0000000 %1056 = call float @llvm.AMDIL.exp.(float %1055) %1057 = fsub float -0.000000e+00, %1056 %1058 = fadd float %1057, 1.000000e+00 %1059 = fmul float %1058, %1053 %1060 = fdiv float 1.000000e+00, %1047 %1061 = fmul float %1060, %1059 %1062 = fmul float %1061, 0x3FF7154CA0000000 %1063 = call float @llvm.AMDIL.exp.(float %1062) %1064 = call float @llvm.AMDIL.clamp.(float %1063, float 0.000000e+00, float 1.000000e+00) %1065 = fsub float -0.000000e+00, %1064 %1066 = fadd float %1065, 1.000000e+00 %1067 = fmul float %1066, %1041 %1068 = fsub float -0.000000e+00, %40 %1069 = fadd float %1068, %104 %1070 = fcmp oge float %1069, 0.000000e+00 %1071 = sext i1 %1070 to i32 %1072 = bitcast i32 %1071 to float %1073 = bitcast float %1072 to i32 %1074 = icmp ne i32 %1073, 0 %temp48.0 = select i1 %1074, float 1.000000e+00, float 0.000000e+00 %1075 = fsub float -0.000000e+00, %104 %1076 = fadd float %40, %1075 %1077 = fsub float -0.000000e+00, %45 %1078 = fadd float %1077, %48 %1079 = fsub float -0.000000e+00, %46 %1080 = fadd float %1079, %49 %1081 = fsub float -0.000000e+00, %47 %1082 = fadd float %1081, %50 %1083 = fmul float %1076, %51 %1084 = fmul float %1076, %51 %1085 = fmul float %1076, %51 %1086 = fmul float %1076, %51 %1087 = call float @llvm.AMDIL.clamp.(float %1083, float 0.000000e+00, float 1.000000e+00) %1088 = call float @llvm.AMDIL.clamp.(float %1084, float 0.000000e+00, float 1.000000e+00) %1089 = call float @llvm.AMDIL.clamp.(float %1085, float 0.000000e+00, float 1.000000e+00) %1090 = call float @llvm.AMDIL.clamp.(float %1086, float 0.000000e+00, float 1.000000e+00) %1091 = fmul float %1088, %1078 %1092 = fadd float %1091, %45 %1093 = fmul float %1088, %1080 %1094 = fadd float %1093, %46 %1095 = fmul float %1088, %1082 %1096 = fadd float %1095, %47 %1097 = fmul float %1027, %1092 %1098 = fmul float %1029, %1094 %1099 = fmul float %1031, %1096 %1100 = fsub float -0.000000e+00, %191 %1101 = fdiv float 1.000000e+00, %1100 %1102 = fmul float %1076, %1101 %1103 = fmul float %1102, %189 %1104 = fmul float %1102, %190 %1105 = fmul float %1102, %191 %1106 = fmul float %1103, %1103 %1107 = fmul float %1104, %1104 %1108 = fadd float %1107, %1106 %1109 = fmul float %1105, %1105 %1110 = fadd float %1108, %1109 %1111 = call float @llvm.maxnum.f32(float %1110, float 0x3E7AD7F2A0000000) %1112 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1111) %1113 = fmul float %1112, %1111 %1114 = fsub float -0.000000e+00, %1111 %1115 = call float @llvm.AMDGPU.cndlt(float %1114, float %1113, float 0.000000e+00) %1116 = fsub float -0.000000e+00, %1115 %1117 = fmul float %1116, %52 %1118 = fmul float %1117, 0x3FF7154CA0000000 %1119 = call float @llvm.AMDIL.exp.(float %1118) %1120 = fmul float %191, %191 %1121 = fmul float %1120, %1120 %1122 = fsub float -0.000000e+00, %1121 %1123 = fmul float %191, %1122 %1124 = fadd float %1123, 1.000000e+00 %1125 = fsub float -0.000000e+00, %1124 %1126 = fadd float %1125, 1.000000e+00 %1127 = fmul float %1119, %1126 %1128 = fmul float %1127, %1097 %1129 = fmul float %1127, %1098 %1130 = fmul float %1127, %1099 %1131 = fsub float -0.000000e+00, %1066 %1132 = fmul float %1041, %1131 %1133 = fadd float %1132, 1.000000e+00 %1134 = fmul float %1133, %1128 %1135 = fmul float %1133, %1129 %1136 = fmul float %1133, %1130 %1137 = fcmp oge float %1069, 0.000000e+00 %1138 = sext i1 %1137 to i32 %1139 = bitcast i32 %1138 to float %1140 = bitcast float %1139 to i32 %1141 = icmp ne i32 %1140, 0 %.176 = select i1 %1141, float %1027, float %1134 %1142 = fcmp oge float %1069, 0.000000e+00 %1143 = sext i1 %1142 to i32 %1144 = bitcast i32 %1143 to float %1145 = bitcast float %1144 to i32 %1146 = icmp ne i32 %1145, 0 %temp32.0 = select i1 %1146, float %1029, float %1135 %1147 = fcmp oge float %1069, 0.000000e+00 %1148 = sext i1 %1147 to i32 %1149 = bitcast i32 %1148 to float %1150 = bitcast float %1149 to i32 %1151 = icmp ne i32 %1150, 0 %.177 = select i1 %1151, float %1031, float %1136 %1152 = fcmp oge float %1069, 0.000000e+00 %1153 = sext i1 %1152 to i32 %1154 = bitcast i32 %1153 to float %1155 = bitcast float %1154 to i32 %1156 = icmp ne i32 %1155, 0 %temp16.2 = select i1 %1156, float %1067, float 0.000000e+00 %1157 = call float @fabs(float %.176) %1158 = call float @llvm.log2.f32(float %1157) %1159 = call float @fabs(float %temp32.0) %1160 = call float @llvm.log2.f32(float %1159) %1161 = call float @fabs(float %.177) %1162 = call float @llvm.log2.f32(float %1161) %1163 = fmul float %1158, 0x3FDD1743E0000000 %1164 = fmul float %1160, 0x3FDD1743E0000000 %1165 = fmul float %1162, 0x3FDD1743E0000000 %1166 = call float @llvm.AMDIL.exp.(float %1163) %1167 = call float @llvm.AMDIL.exp.(float %1164) %1168 = call float @llvm.AMDIL.exp.(float %1165) %1169 = bitcast float %111 to i32 %1170 = bitcast float %112 to i32 %1171 = insertelement <2 x i32> undef, i32 %1169, i32 0 %1172 = insertelement <2 x i32> %1171, i32 %1170, i32 1 %1173 = bitcast <8 x i32> %92 to <32 x i8> %1174 = bitcast <4 x i32> %94 to <16 x i8> %1175 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1172, <32 x i8> %1173, <16 x i8> %1174, i32 2) %1176 = extractelement <4 x float> %1175, i32 0 %1177 = extractelement <4 x float> %1175, i32 1 %1178 = extractelement <4 x float> %1175, i32 2 %1179 = call float @llvm.AMDGPU.lrp(float %124, float %1166, float %1176) %1180 = call float @llvm.AMDGPU.lrp(float %124, float %1167, float %1177) %1181 = call float @llvm.AMDGPU.lrp(float %124, float %1168, float %1178) %1182 = bitcast float %1179 to i32 %1183 = bitcast float %1180 to i32 %1184 = bitcast float %1181 to i32 %1185 = insertelement <4 x i32> undef, i32 %1182, i32 0 %1186 = insertelement <4 x i32> %1185, i32 %1183, i32 1 %1187 = insertelement <4 x i32> %1186, i32 %1184, i32 2 %1188 = insertelement <4 x i32> %1187, i32 undef, i32 3 %1189 = bitcast <8 x i32> %96 to <32 x i8> %1190 = bitcast <4 x i32> %98 to <16 x i8> %1191 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1188, <32 x i8> %1189, <16 x i8> %1190, i32 3) %1192 = extractelement <4 x float> %1191, i32 0 %1193 = extractelement <4 x float> %1191, i32 1 %1194 = extractelement <4 x float> %1191, i32 2 %1195 = fsub float -0.000000e+00, %124 %1196 = fmul float %125, %1195 %1197 = fadd float %1196, %124 %1198 = call float @llvm.AMDGPU.lrp(float %1197, float %1192, float %1179) %1199 = call float @llvm.AMDGPU.lrp(float %1197, float %1193, float %1180) %1200 = call float @llvm.AMDGPU.lrp(float %1197, float %1194, float %1181) %1201 = fsub float -0.000000e+00, %1198 %1202 = fadd float %1201, %53 %1203 = fsub float -0.000000e+00, %1199 %1204 = fadd float %1203, %54 %1205 = fsub float -0.000000e+00, %1200 %1206 = fadd float %1205, %55 %1207 = fmul float %temp16.2, %1202 %1208 = fadd float %1207, %1198 %1209 = fmul float %temp16.2, %1204 %1210 = fadd float %1209, %1199 %1211 = fmul float %temp16.2, %1206 %1212 = fadd float %1211, %1200 %1213 = call i32 @llvm.SI.packf16(float %1208, float %1210) %1214 = bitcast i32 %1213 to float %1215 = call i32 @llvm.SI.packf16(float %1212, float %temp48.0) %1216 = bitcast i32 %1215 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1214, float %1216, float %1214, float %1216) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 s[12:13], s[6:7] ; BE8C0406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 4, [m0] ; C8081100 v_interp_p2_f32 v2, [v2], v1, 1, 4, [m0] ; C8091101 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_interp_p1_f32 v4, v0, 0, 4, [m0] ; C8101000 v_interp_p2_f32 v4, [v4], v1, 0, 4, [m0] ; C8111001 v_add_f32_e32 v2, 0, v4 ; 06040880 s_load_dwordx4 s[8:11], s[4:5], 0x1c ; C084051C s_load_dwordx8 s[16:23], s[12:13], 0x38 ; C0C80D38 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[8:11] ; F0800F00 00440202 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_ge_f32_e64 s[0:1], -|v2|, 0 ; D00C0100 20010102 v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; D2000006 00018280 v_cmp_ne_i32_e64 s[0:1], v6, 0 ; D10A0000 00010106 v_mov_b32_e32 v6, 0x80000000 ; 7E0C02FF 80000000 v_cndmask_b32_e64 v6, v6, -1.0, s[0:1] ; D2000006 1001E706 v_cmp_lt_f32_e64 s[6:7], v6, 0 ; D0020006 00010106 v_interp_p1_f32 v7, v0, 1, 5, [m0] ; C81C1500 v_interp_p2_f32 v7, [v7], v1, 1, 5, [m0] ; C81D1501 v_interp_p1_f32 v6, v0, 0, 5, [m0] ; C8181400 v_interp_p2_f32 v6, [v6], v1, 0, 5, [m0] ; C8191401 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v23, v0, 2, 2, [m0] ; C85C0A00 v_interp_p2_f32 v23, [v23], v1, 2, 2, [m0] ; C85D0A01 v_interp_p1_f32 v24, v0, 1, 2, [m0] ; C8600900 v_interp_p2_f32 v24, [v24], v1, 1, 2, [m0] ; C8610901 v_interp_p1_f32 v25, v0, 0, 2, [m0] ; C8640800 v_interp_p2_f32 v25, [v25], v1, 0, 2, [m0] ; C8650801 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v11, v0, 0, 1, [m0] ; C82C0400 v_interp_p2_f32 v11, [v11], v1, 0, 1, [m0] ; C82D0401 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_interp_p1_f32 v15, v0, 1, 0, [m0] ; C83C0100 v_interp_p2_f32 v15, [v15], v1, 1, 0, [m0] ; C83D0101 v_interp_p1_f32 v16, v0, 0, 0, [m0] ; C8400000 v_interp_p2_f32 v16, [v16], v1, 0, 0, [m0] ; C8410001 s_load_dwordx4 s[8:11], s[2:3], 0x0 ; C0840300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s1, s[8:11], 0x43 ; C2008943 s_buffer_load_dword s0, s[8:11], 0x41 ; C2000941 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 11 ; 04B51600 s_buffer_load_dword s0, s[8:11], 0x40 ; C2000940 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 12 ; 04B51800 s_buffer_load_dword s0, s[8:11], 0x3e ; C200093E s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 0 ; 04B50000 s_buffer_load_dword s0, s[8:11], 0x3d ; C200093D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 2 ; 04B50400 s_buffer_load_dword s0, s[8:11], 0x3c ; C200093C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 1 ; 04B50200 s_buffer_load_dword s0, s[8:11], 0x38 ; C2000938 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 13 ; 04B51A00 s_buffer_load_dword s101, s[8:11], 0x34 ; C2328934 s_buffer_load_dword s17, s[8:11], 0x32 ; C2088932 s_buffer_load_dword s0, s[8:11], 0x31 ; C2000931 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 9 ; 04B51200 s_buffer_load_dword s0, s[8:11], 0x30 ; C2000930 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 3 ; 04B50600 s_buffer_load_dword s2, s[8:11], 0x2e ; C201092E s_buffer_load_dword s3, s[8:11], 0x2d ; C201892D s_buffer_load_dword s20, s[8:11], 0x2c ; C20A092C s_buffer_load_dword s0, s[8:11], 0x29 ; C2000929 s_buffer_load_dword s59, s[8:11], 0x28 ; C21D8928 s_buffer_load_dword s100, s[8:11], 0x25 ; C2320925 s_buffer_load_dword s99, s[8:11], 0x24 ; C2318924 s_buffer_load_dword s16, s[8:11], 0x20 ; C2080920 s_buffer_load_dword s40, s[8:11], 0x1e ; C214091E s_buffer_load_dword s14, s[8:11], 0x1d ; C207091D s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s14, 10 ; 04B5140E s_buffer_load_dword s14, s[8:11], 0x1c ; C207091C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s14, 4 ; 04B5080E s_buffer_load_dword s96, s[8:11], 0x18 ; C2300918 s_buffer_load_dword s58, s[8:11], 0x14 ; C21D0914 s_buffer_load_dword s98, s[8:11], 0x10 ; C2310910 s_buffer_load_dword s57, s[8:11], 0xc ; C21C890C s_buffer_load_dword s41, s[8:11], 0xa ; C214890A s_buffer_load_dword s42, s[8:11], 0x9 ; C2150909 s_buffer_load_dword s97, s[8:11], 0x8 ; C2308908 s_buffer_load_dword s43, s[8:11], 0x6 ; C2158906 s_buffer_load_dword s56, s[8:11], 0x5 ; C21C0905 s_buffer_load_dword s18, s[8:11], 0x4 ; C2090904 s_buffer_load_dword s19, s[8:11], 0x2 ; C2098902 s_buffer_load_dword s14, s[8:11], 0x1 ; C2070901 s_buffer_load_dword s15, s[8:11], 0x0 ; C2078900 v_mov_b32_e32 v13, s1 ; 7E1A0201 v_mov_b32_e32 v20, s2 ; 7E280202 v_mov_b32_e32 v1, s3 ; 7E020203 v_mov_b32_e32 v0, s20 ; 7E000214 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[2:3], s[6:7] ; BE822406 s_xor_b64 s[2:3], exec, s[2:3] ; 8982027E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[2:3] ; 88FE027E v_mul_f32_e32 v27, s0, v9 ; 10361200 v_mul_f32_e32 v26, s59, v10 ; 1034143B s_load_dwordx4 s[44:47], s[4:5], 0x0 ; C0960500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[68:71], s[4:5], 0x8 ; C0A20508 s_load_dwordx4 s[24:27], s[4:5], 0xc ; C08C050C s_load_dwordx4 s[84:87], s[4:5], 0x10 ; C0AA0510 s_load_dwordx4 s[72:75], s[4:5], 0x14 ; C0A40514 s_load_dwordx4 s[36:39], s[4:5], 0x18 ; C0920518 s_load_dwordx4 s[20:23], s[4:5], 0x20 ; C08A0520 s_load_dwordx4 s[0:3], s[4:5], 0x24 ; C0800524 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 5 ; 04B50A00 v_writelane_b32 v90, s1, 6 ; 04B50C01 v_writelane_b32 v90, s2, 7 ; 04B50E02 v_writelane_b32 v90, s3, 8 ; 04B51003 s_load_dwordx8 s[48:55], s[12:13], 0x0 ; C0D80D00 s_load_dwordx8 s[28:35], s[12:13], 0x8 ; C0CE0D08 s_load_dwordx8 s[0:7], s[12:13], 0x10 ; C0C00D10 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s0, 22 ; 04B52C00 v_writelane_b32 v90, s1, 23 ; 04B52E01 v_writelane_b32 v90, s2, 24 ; 04B53002 v_writelane_b32 v90, s3, 25 ; 04B53203 v_writelane_b32 v90, s4, 26 ; 04B53404 v_writelane_b32 v90, s5, 27 ; 04B53605 v_writelane_b32 v90, s6, 28 ; 04B53806 v_writelane_b32 v90, s7, 29 ; 04B53A07 s_load_dwordx8 s[0:7], s[12:13], 0x18 ; C0C00D18 s_load_dwordx8 s[88:95], s[12:13], 0x20 ; C0EC0D20 s_load_dwordx8 s[76:83], s[12:13], 0x28 ; C0E60D28 s_load_dwordx8 s[60:67], s[12:13], 0x30 ; C0DE0D30 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v90, s60, 14 ; 04B51C3C v_writelane_b32 v90, s61, 15 ; 04B51E3D v_writelane_b32 v90, s62, 16 ; 04B5203E v_writelane_b32 v90, s63, 17 ; 04B5223F v_writelane_b32 v90, s64, 18 ; 04B52440 v_writelane_b32 v90, s65, 19 ; 04B52641 v_writelane_b32 v90, s66, 20 ; 04B52842 v_writelane_b32 v90, s67, 21 ; 04B52A43 image_sample v[28:31], 15, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[88:95], s[84:87] ; F0800F00 02B61C1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v17, 2.0, v28, -1.0 ; D2820011 03CE38F4 v_mul_f32_e32 v32, s57, v17 ; 10402239 v_mad_f32 v17, 2.0, v29, -1.0 ; D2820011 03CE3AF4 v_mul_f32_e32 v33, s57, v17 ; 10422239 v_mul_f32_e32 v34, v33, v33 ; 10444321 v_mad_f32 v17, v32, v32, v34 ; D2820011 048A4120 v_add_f32_e32 v17, 1.0, v17 ; 062222F2 v_mov_b32_e32 v38, 0 ; 7E4C0280 v_add_f32_e32 v17, 0, v17 ; 06222280 v_rsq_clamp_f32_e32 v39, v17 ; 7E4E5911 v_mul_f32_e32 v40, v39, v33 ; 10504327 v_mul_f32_e32 v41, v39, v32 ; 10524127 v_mul_f32_e32 v17, v15, v15 ; 10221F0F v_mad_f32 v17, v16, v16, v17 ; D2820011 04462110 v_mad_f32 v17, v14, v14, v17 ; D2820011 04461D0E v_add_f32_e32 v17, 0, v17 ; 06222280 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mul_f32_e32 v42, v17, v16 ; 10542111 v_mul_f32_e32 v16, v42, v41 ; 1020532A v_mul_f32_e32 v44, v17, v14 ; 10581D11 v_mad_f32 v14, v40, v44, v16 ; D282000E 04425928 v_mad_f32 v37, v39, v44, v14 ; D2820025 043A5927 v_mul_f32_e32 v14, v42, v40 ; 101C512A v_mul_f32_e32 v43, v17, v15 ; 10561F11 v_mad_f32 v14, v41, v43, v14 ; D282000E 043A5729 v_mad_f32 v36, v39, v43, v14 ; D2820024 043A5727 v_mul_f32_e32 v14, v44, v41 ; 101C532C v_mad_f32 v14, v40, -v43, -v14 ; D282000E C43A5728 v_mad_f32 v35, v39, v42, v14 ; D2820023 043A5527 v_cubeid_f32 v49, v35, v36, v37 ; D2880031 04964923 v_cubema_f32 v48, v35, v36, v37 ; D28E0030 04964923 v_cubesc_f32 v47, v35, v36, v37 ; D28A002F 04964923 v_cubetc_f32 v46, v35, v36, v37 ; D28C002E 04964923 v_rcp_f32_e64 v14, |v48| ; D354010E 00000130 v_mov_b32_e32 v54, 0x3fc00000 ; 7E6C02FF 3FC00000 v_mad_f32 v48, v46, v14, v54 ; D2820030 04DA1D2E v_mad_f32 v47, v47, v14, v54 ; D282002F 04DA1D2F image_sample v[15:17], 7, 0, 0, 0, 0, 0, 0, 0, v[47:50], s[48:55], s[44:47] ; F0800700 016C0F2F s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v18, v17 ; 7E244F11 v_mov_b32_e32 v14, 0x400ccccd ; 7E1C02FF 400CCCCD v_mul_legacy_f32_e32 v18, v14, v18 ; 0E24250E v_exp_f32_e32 v19, v18 ; 7E264B12 v_mul_f32_e32 v46, s18, v44 ; 105C5812 v_mad_f32 v18, v43, s56, -v46 ; D2820012 84B8712B v_mad_f32 v18, v42, s43, v18 ; D2820012 0448572A v_mul_f32_e32 v18, v41, v18 ; 10242529 v_mul_f32_e32 v21, s18, v43 ; 102A5612 v_mad_f32 v21, v42, s56, -v21 ; D2820015 8454712A v_mad_f32 v47, v44, s43, v21 ; D282002F 0454572C v_mad_f32 v18, v47, v40, v18 ; D2820012 044A512F v_mul_f32_e32 v48, s18, v42 ; 10605412 v_mad_f32 v21, v43, s56, v48 ; D2820015 04C0712B v_mad_f32 v49, v44, s43, v21 ; D2820031 0454572C v_mad_f32 v18, v49, v39, v18 ; D2820012 044A4F31 v_add_f32_e64 v18, 0, v18 clamp ; D2060812 00022480 v_add_f32_e32 v50, v19, v18 ; 06642513 v_mul_f32_e64 v19, |v43|, |v43| ; D2100313 0002572B v_mul_f32_e64 v21, |v42|, |v42| ; D2100315 0002552A v_mul_f32_e32 v21, v21, v21 ; 102A2B15 v_mad_f32 v22, v19, v19, v21 ; D2820016 04562713 v_mul_f32_e64 v51, |v44|, |v44| ; D2100333 0002592C v_mad_f32 v22, v51, v51, v22 ; D2820016 045A6733 v_rcp_f32_e32 v22, v22 ; 7E2C5516 v_mul_f32_e32 v19, v19, v19 ; 10262713 v_mul_f32_e32 v19, v19, v22 ; 10262D13 v_mul_f32_e32 v53, s100, v9 ; 106A1264 v_mul_f32_e32 v52, s99, v11 ; 10681663 image_sample v[55:57], 7, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[0:7], s[24:27] ; F0800700 00C03734 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v58, v57 ; 7E744F39 v_mul_legacy_f32_e32 v58, v14, v58 ; 0E74750E v_exp_f32_e32 v58, v58 ; 7E744B3A v_mul_f32_e32 v58, v58, v19 ; 1074273A v_mul_f32_e32 v21, v21, v22 ; 102A2D15 v_mul_f32_e32 v52, s99, v10 ; 10681463 image_sample v[59:61], 7, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[0:7], s[24:27] ; F0800700 00C03B34 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v52, v61 ; 7E684F3D v_mul_legacy_f32_e32 v52, v14, v52 ; 0E68690E v_exp_f32_e32 v52, v52 ; 7E684B34 v_mad_f32 v52, v52, v21, v58 ; D2820034 04EA2B34 v_mul_f32_e32 v51, v51, v51 ; 10666733 v_mul_f32_e32 v22, v51, v22 ; 102C2D33 v_mul_f32_e32 v63, s98, v10 ; 107E1462 v_mul_f32_e32 v62, s98, v11 ; 107C1662 image_sample v[62:65], 15, 0, 0, 0, 0, 0, 0, 0, v[62:63], s[28:35], s[8:11] ; F0800F00 00473E3E s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v51, v64 ; 7E664F40 v_mul_legacy_f32_e32 v51, v14, v51 ; 0E66670E v_exp_f32_e32 v51, v51 ; 7E664B33 v_mad_f32 v51, v51, v22, v52 ; D2820033 04D22D33 v_mul_f32_e32 v50, v50, v51 ; 10646732 v_mul_f32_e32 v52, s97, v43 ; 10685661 v_mad_f32 v52, v42, s42, -v52 ; D2820034 84D0552A v_mad_f32 v52, v44, s41, v52 ; D2820034 04D0532C v_mul_f32_e32 v53, v24, v24 ; 106A3118 v_mad_f32 v53, v25, v25, v53 ; D2820035 04D63319 v_mad_f32 v53, v23, v23, v53 ; D2820035 04D62F17 v_add_f32_e32 v53, 0, v53 ; 066A6A80 v_rsq_clamp_f32_e32 v53, v53 ; 7E6A5935 v_mul_f32_e32 v25, v53, v25 ; 10323335 v_mul_f32_e32 v58, v25, v43 ; 10745719 v_mul_f32_e32 v24, v53, v24 ; 10303135 v_mad_f32 v58, v42, v24, -v58 ; D282003A 84EA312A v_mul_f32_e32 v23, v53, v23 ; 102E2F35 v_mad_f32 v53, v44, v23, v58 ; D2820035 04EA2F2C v_add_f32_e32 v58, v52, v53 ; 06746B34 v_mul_f32_e32 v66, s97, v42 ; 10845461 v_mad_f32 v67, v43, s42, v66 ; D2820043 0508552B v_mad_f32 v67, v44, s41, v67 ; D2820043 050C532C v_mul_f32_e32 v68, v25, v42 ; 10885519 v_mad_f32 v69, v43, v24, v68 ; D2820045 0512312B v_mad_f32 v69, v44, v23, v69 ; D2820045 05162F2C v_add_f32_e32 v70, v67, v69 ; 068C8B43 v_rcp_f32_e32 v70, v70 ; 7E8C5546 v_mad_f32 v71, v58, v70, -v33 ; D2820047 84868D3A v_mul_f32_e64 v72, s57, s57 ; D2100048 00007239 v_mad_f32 v34, v31, v72, -v34 ; D2820022 848A911F v_rcp_f32_e32 v73, s96 ; 7E925460 v_add_f32_e32 v34, v34, v73 ; 06449322 v_mul_f32_e32 v74, v34, v71 ; 10948F22 v_mul_f32_e32 v75, s97, v44 ; 10965861 v_mad_f32 v76, v43, s42, -v75 ; D282004C 852C552B v_mad_f32 v76, v42, s41, v76 ; D282004C 0530532A v_mul_f32_e32 v77, v25, v44 ; 109A5919 v_mad_f32 v78, v43, v24, -v77 ; D282004E 8536312B v_mad_f32 v78, v42, v23, v78 ; D282004E 053A2F2A v_add_f32_e32 v79, v76, v78 ; 069E9D4C v_mad_f32 v79, v79, v70, -v32 ; D282004F 84828D4F v_mul_f32_e32 v33, v32, v33 ; 10424320 image_sample v80, 8, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[76:83], s[72:75] ; F0800800 0253501A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v80, 2.0, v80, -1.0 ; D2820050 03CEA0F4 v_mad_f32 v33, v80, v72, -v33 ; D2820021 84869150 v_mul_f32_e64 v80, v79, -v33 ; D2100050 4002434F v_mad_f32 v74, 2.0, v80, v74 ; D282004A 052AA0F4 v_mul_f32_e32 v71, v74, v71 ; 108E8F4A v_mul_f32_e32 v74, v79, v79 ; 10949F4F v_mad_f32 v71, v74, v34, v71 ; D2820047 051E454A v_mul_f32_e32 v71, 0.5, v71 ; 108E8EF0 v_mul_f32_e32 v32, v32, v32 ; 10404120 v_mad_f32 v28, v30, v72, -v32 ; D282001C 8482911E v_add_f32_e32 v28, v28, v73 ; 0638931C v_mul_f32_e32 v29, v33, v33 ; 103A4321 v_mad_f32 v28, v28, v34, -v29 ; D282001C 8476451C v_rcp_f32_e32 v29, v28 ; 7E3A551C v_mul_f32_e32 v30, v29, v71 ; 103C8F1D v_mov_b32_e32 v31, 0xbfb8aa65 ; 7E3E02FF BFB8AA65 v_mul_f32_e32 v30, v31, v30 ; 103C3D1F v_exp_f32_e32 v30, v30 ; 7E3C4B1E v_max_f32_e32 v32, 0x33d6bf95, v28 ; 204038FF 33D6BF95 v_rsq_clamp_f32_e32 v32, v32 ; 7E405920 v_mul_f32_e32 v30, v30, v32 ; 103C411E v_mov_b32_e32 v32, 0xc1800000 ; 7E4002FF C1800000 v_mad_f32 v29, v71, v29, v32 ; D282001D 04823B47 v_cmp_ge_f32_e64 s[0:1], v29, 0 ; D00C0000 0001011D v_cndmask_b32_e64 v29, 0, -1, s[0:1] ; D200081D 00018280 v_cmp_ne_i32_e64 s[0:1], v29, 0 ; D10A0000 0001011D v_cndmask_b32_e64 v29, 0, 1.0, s[0:1] ; D200081D 0001E480 v_cmp_ge_f32_e64 s[0:1], -v28, 0 ; D00C0000 2001011C v_cndmask_b32_e64 v28, 0, -1, s[0:1] ; D200001C 00018280 v_cmp_ne_i32_e64 s[0:1], v28, 0 ; D10A0000 0001011C v_cndmask_b32_e64 v28, 0, 1.0, s[0:1] ; D200001C 0001E480 v_add_f32_e32 v28, v28, v29 ; 06383B1C v_cmp_ge_f32_e64 s[0:1], -v28, 0 ; D00C0000 2001011C v_cndmask_b32_e64 v28, 0, -1, s[0:1] ; D200001C 00018280 v_cmp_ne_i32_e64 s[0:1], v28, 0 ; D10A0000 0001011C v_cndmask_b32_e64 v28, 0, v30, s[0:1] ; D200001C 00023C80 v_mul_f32_e32 v29, v78, v41 ; 103A534E v_mad_f32 v29, v39, v69, v29 ; D282001D 04768B27 v_mad_f32 v29, v40, v53, v29 ; D282001D 04766B28 v_sub_f32_e32 v29, 1.0, v29 ; 083A3AF2 v_mul_f32_e32 v30, v29, v29 ; 103C3B1D v_mul_f32_e32 v30, v30, v30 ; 103C3D1E v_mul_f32_e32 v29, v30, v29 ; 103A3B1E v_mul_f32_e32 v28, v29, v28 ; 1038391D v_add_f32_e64 v28, 0, v28 clamp ; D206081C 00023880 v_mul_f32_e32 v29, v41, v76 ; 103A9929 v_mad_f32 v29, v52, v40, v29 ; D282001D 04765134 v_mad_f32 v29, v67, v39, v29 ; D282001D 04764F43 v_add_f32_e64 v29, 0, v29 clamp ; D206081D 00023A80 v_mad_f32 v30, v28, v29, v50 ; D282001E 04CA3B1C v_mul_f32_e32 v26, s59, v11 ; 1034163B image_sample v[78:81], 15, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[88:95], s[84:87] ; F0800F00 02B64E1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, 2.0, v78, -1.0 ; D2820021 03CE9CF4 v_mul_f32_e32 v33, s57, v33 ; 10424239 v_mad_f32 v34, 2.0, v79, -1.0 ; D2820022 03CE9EF4 v_mul_f32_e32 v34, s57, v34 ; 10444439 v_mul_f32_e32 v39, v34, v34 ; 104E4522 v_mad_f32 v40, v33, v33, v39 ; D2820028 049E4321 v_add_f32_e32 v40, 1.0, v40 ; 065050F2 v_add_f32_e32 v40, 0, v40 ; 06505080 v_rsq_clamp_f32_e32 v40, v40 ; 7E505928 v_mul_f32_e32 v41, v40, v34 ; 10524528 v_mul_f32_e32 v50, v40, v33 ; 10644328 v_mul_f32_e32 v71, v43, v50 ; 108E652B v_mad_f32 v71, v41, v44, v71 ; D2820047 051E5929 v_mad_f32 v37, v40, v44, v71 ; D2820025 051E5928 v_mul_f32_e32 v71, v42, v41 ; 108E532A v_mad_f32 v71, v40, v43, v71 ; D2820047 051E5728 v_mad_f32 v36, v50, v44, v71 ; D2820024 051E5932 v_mul_f32_e32 v71, v42, v50 ; 108E652A v_mad_f32 v71, v41, -v43, -v71 ; D2820047 C51E5729 v_mad_f32 v35, v40, v42, v71 ; D2820023 051E5528 v_cubeid_f32 v85, v35, v36, v37 ; D2880055 04964923 v_cubema_f32 v84, v35, v36, v37 ; D28E0054 04964923 v_cubesc_f32 v83, v35, v36, v37 ; D28A0053 04964923 v_cubetc_f32 v82, v35, v36, v37 ; D28C0052 04964923 v_rcp_f32_e64 v35, |v84| ; D3540123 00000154 v_mad_f32 v84, v82, v35, v54 ; D2820054 04DA4752 v_mad_f32 v83, v83, v35, v54 ; D2820053 04DA4753 image_sample v[35:37], 7, 0, 0, 0, 0, 0, 0, 0, v[83:86], s[48:55], s[44:47] ; F0800700 016C2353 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v38, v37 ; 7E4C4F25 v_mul_legacy_f32_e32 v38, v14, v38 ; 0E4C4D0E v_exp_f32_e32 v38, v38 ; 7E4C4B26 v_mad_f32 v71, v44, s56, -v48 ; D2820047 84C0712C v_mad_f32 v71, v43, s43, v71 ; D2820047 051C572B v_mul_f32_e32 v71, v50, v71 ; 108E8F32 v_mad_f32 v47, v47, v41, v71 ; D282002F 051E532F v_mad_f32 v47, v49, v40, v47 ; D282002F 04BE5131 v_add_f32_e64 v47, 0, v47 clamp ; D206082F 00025E80 v_add_f32_e32 v38, v38, v47 ; 064C5F26 v_mul_f32_e32 v38, v38, v51 ; 104C6726 v_mad_f32 v58, v58, v70, -v34 ; D282003A 848A8D3A v_mad_f32 v39, v81, v72, -v39 ; D2820027 849E9151 v_add_f32_e32 v39, v39, v73 ; 064E9327 v_mul_f32_e32 v71, v39, v58 ; 108E7527 v_mad_f32 v74, v44, s42, -v66 ; D282004A 8508552C v_mad_f32 v74, v43, s41, v74 ; D282004A 0528532B v_mad_f32 v76, v44, v24, -v68 ; D282004C 8512312C v_mad_f32 v76, v43, v23, v76 ; D282004C 05322F2B v_add_f32_e32 v82, v74, v76 ; 06A4994A v_mad_f32 v82, v82, v70, -v33 ; D2820052 84868D52 v_mul_f32_e32 v34, v33, v34 ; 10444521 image_sample v26, 8, 0, 0, 0, 0, 0, 0, 0, v[26:27], s[76:83], s[72:75] ; F0800800 02531A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, 2.0, v26, -1.0 ; D282001A 03CE34F4 v_mad_f32 v26, v26, v72, -v34 ; D282001A 848A911A v_mul_f32_e64 v27, v82, -v26 ; D210001B 40023552 v_mad_f32 v27, 2.0, v27, v71 ; D282001B 051E36F4 v_mul_f32_e32 v27, v27, v58 ; 1036751B v_mul_f32_e32 v34, v82, v82 ; 1044A552 v_mad_f32 v27, v34, v39, v27 ; D282001B 046E4F22 v_mul_f32_e32 v27, 0.5, v27 ; 103636F0 v_mul_f32_e32 v33, v33, v33 ; 10424321 v_mad_f32 v33, v80, v72, -v33 ; D2820021 84869150 v_add_f32_e32 v33, v33, v73 ; 06429321 v_mul_f32_e32 v26, v26, v26 ; 1034351A v_mad_f32 v26, v33, v39, -v26 ; D282001A 846A4F21 v_rcp_f32_e32 v33, v26 ; 7E42551A v_mul_f32_e32 v34, v33, v27 ; 10443721 v_mul_f32_e32 v34, v31, v34 ; 1044451F v_exp_f32_e32 v34, v34 ; 7E444B22 v_max_f32_e32 v39, 0x33d6bf95, v26 ; 204E34FF 33D6BF95 v_rsq_clamp_f32_e32 v39, v39 ; 7E4E5927 v_mul_f32_e32 v34, v34, v39 ; 10444F22 v_mad_f32 v27, v27, v33, v32 ; D282001B 0482431B v_cmp_ge_f32_e64 s[0:1], v27, 0 ; D00C0000 0001011B v_cndmask_b32_e64 v27, 0, -1, s[0:1] ; D200081B 00018280 v_cmp_ne_i32_e64 s[0:1], v27, 0 ; D10A0000 0001011B v_cndmask_b32_e64 v27, 0, 1.0, s[0:1] ; D200081B 0001E480 v_cmp_ge_f32_e64 s[0:1], -v26, 0 ; D00C0000 2001011A v_cndmask_b32_e64 v26, 0, -1, s[0:1] ; D200001A 00018280 v_cmp_ne_i32_e64 s[0:1], v26, 0 ; D10A0000 0001011A v_cndmask_b32_e64 v26, 0, 1.0, s[0:1] ; D200001A 0001E480 v_add_f32_e32 v26, v26, v27 ; 0634371A v_cmp_ge_f32_e64 s[0:1], -v26, 0 ; D00C0000 2001011A v_cndmask_b32_e64 v26, 0, -1, s[0:1] ; D200001A 00018280 v_cmp_ne_i32_e64 s[0:1], v26, 0 ; D10A0000 0001011A v_cndmask_b32_e64 v26, 0, v34, s[0:1] ; D200001A 00024480 v_mul_f32_e32 v27, v76, v50 ; 1036654C v_mad_f32 v27, v40, v69, v27 ; D282001B 046E8B28 v_mad_f32 v27, v41, v53, v27 ; D282001B 046E6B29 v_sub_f32_e32 v27, 1.0, v27 ; 083636F2 v_mul_f32_e32 v33, v27, v27 ; 1042371B v_mul_f32_e32 v33, v33, v33 ; 10424321 v_mul_f32_e32 v27, v33, v27 ; 10363721 v_mul_f32_e32 v26, v27, v26 ; 1034351B v_add_f32_e64 v26, 0, v26 clamp ; D206081A 00023480 v_mul_f32_e32 v27, v50, v74 ; 10369532 v_mad_f32 v27, v52, v41, v27 ; D282001B 046E5334 v_mad_f32 v27, v67, v40, v27 ; D282001B 046E5143 v_add_f32_e64 v27, 0, v27 clamp ; D206081B 00023680 v_mad_f32 v33, v26, v27, v38 ; D2820021 049A371A v_mul_f32_e32 v33, v33, v19 ; 10422721 v_mad_f32 v30, v30, v21, v33 ; D282001E 04862B1E v_mul_f32_e32 v34, s58, v10 ; 1044143A v_mul_f32_e32 v33, s58, v11 ; 1042163A v_readlane_b32 s0, v90, 22 ; 02012D5A v_readlane_b32 s1, v90, 23 ; 02032F5A v_readlane_b32 s2, v90, 24 ; 0205315A v_readlane_b32 s3, v90, 25 ; 0207335A v_readlane_b32 s4, v90, 26 ; 0209355A v_readlane_b32 s5, v90, 27 ; 020B375A v_readlane_b32 s6, v90, 28 ; 020D395A v_readlane_b32 s7, v90, 29 ; 020F3B5A s_nop 2 ; BF800002 image_sample v[38:41], 15, 0, 0, 0, 0, 0, 0, 0, v[33:34], s[0:7], s[68:71] ; F0800F00 02202621 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, 2.0, v38, -1.0 ; D2820021 03CE4CF4 v_mul_f32_e32 v33, s57, v33 ; 10424239 v_mad_f32 v34, 2.0, v39, -1.0 ; D2820022 03CE4EF4 v_mul_f32_e32 v34, s57, v34 ; 10444439 v_mul_f32_e32 v50, v34, v34 ; 10644522 v_mad_f32 v52, v33, v33, v50 ; D2820034 04CA4321 v_add_f32_e32 v52, 1.0, v52 ; 066868F2 v_add_f32_e32 v52, 0, v52 ; 06686880 v_rsq_clamp_f32_e32 v52, v52 ; 7E685934 v_mul_f32_e32 v53, v52, v34 ; 106A4534 v_mul_f32_e32 v58, v52, v33 ; 10744334 v_mul_f32_e32 v71, v42, v58 ; 108E752A v_mad_f32 v71, v53, -v43, -v71 ; D2820047 C51E5735 v_mad_f32 v80, v52, v44, v71 ; D2820050 051E5934 v_mul_f32_e32 v71, v43, v58 ; 108E752B v_mad_f32 v71, v52, v43, v71 ; D2820047 051E5734 v_mad_f32 v79, v53, v44, v71 ; D282004F 051E5935 v_mul_f32_e32 v71, v42, v53 ; 108E6B2A v_mad_f32 v71, v52, v42, v71 ; D2820047 051E5534 v_mad_f32 v78, v58, v44, v71 ; D282004E 051E593A v_mad_f32 v71, v43, v24, v77 ; D2820047 0536312B v_mad_f32 v71, -v42, v23, v71 ; D2820047 251E2F2A v_mul_f32_e32 v74, v71, v58 ; 10947547 v_mad_f32 v68, v44, v24, v68 ; D2820044 0512312C v_mad_f32 v68, -v43, v23, v68 ; D2820044 25122F2B v_mad_f32 v74, v53, v68, v74 ; D282004A 052A8935 v_mad_f32 v69, v52, v69, v74 ; D2820045 052A8B34 v_sub_f32_e32 v69, 1.0, v69 ; 088A8AF2 v_mul_f32_e32 v74, v69, v69 ; 10948B45 v_mul_f32_e32 v81, v74, v74 ; 10A2954A v_cubeid_f32 v85, v78, v79, v80 ; D2880055 05429F4E v_cubema_f32 v84, v78, v79, v80 ; D28E0054 05429F4E v_cubesc_f32 v83, v78, v79, v80 ; D28A0053 05429F4E v_cubetc_f32 v82, v78, v79, v80 ; D28C0052 05429F4E v_rcp_f32_e64 v74, |v84| ; D354014A 00000154 v_mad_f32 v84, v82, v74, v54 ; D2820054 04DA9552 v_mad_f32 v83, v83, v74, v54 ; D2820053 04DA9553 image_sample v[82:84], 7, 0, 0, 0, 0, 0, 0, 0, v[83:86], s[48:55], s[44:47] ; F0800700 016C5253 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v74, v84 ; 7E944F54 v_mul_legacy_f32_e32 v74, v14, v74 ; 0E94950E v_exp_f32_e32 v74, v74 ; 7E944B4A v_mad_f32 v46, v43, s56, v46 ; D282002E 04B8712B v_mad_f32 v46, -v42, s43, v46 ; D282002E 24B8572A v_mul_f32_e32 v46, v58, v46 ; 105C5D3A v_mad_f32 v48, v44, s56, v48 ; D2820030 04C0712C v_mad_f32 v48, -v43, s43, v48 ; D2820030 24C0572B v_mad_f32 v46, v48, v53, v46 ; D282002E 04BA6B30 v_mad_f32 v46, v49, v52, v46 ; D282002E 04BA6931 v_add_f32_e64 v46, 0, v46 clamp ; D206082E 00025C80 v_add_f32_e32 v48, v74, v46 ; 06605D4A v_mul_f32_e32 v48, v48, v51 ; 10606730 v_mad_f32 v66, v44, s42, v66 ; D2820042 0508552C v_mad_f32 v66, -v43, s41, v66 ; D2820042 2508532B v_add_f32_e32 v68, v66, v68 ; 06888942 v_mad_f32 v68, v68, v70, -v34 ; D2820044 848A8D44 v_add_f32_e32 v74, 0, v41 ; 06945280 v_mad_f32 v50, v74, v72, -v50 ; D2820032 84CA914A v_add_f32_e32 v50, v50, v73 ; 06649332 v_mul_f32_e32 v74, v50, v68 ; 10948932 v_mad_f32 v75, v43, s42, v75 ; D282004B 052C552B v_mad_f32 v75, -v42, s41, v75 ; D282004B 252C532A v_add_f32_e32 v71, v75, v71 ; 068E8F4B v_mad_f32 v70, v71, v70, -v33 ; D2820046 84868D47 v_mul_f32_e32 v34, v33, v34 ; 10444521 v_mad_f32 v71, 2.0, v41, -1.0 ; D2820047 03CE52F4 v_mad_f32 v34, v71, v72, -v34 ; D2820022 848A9147 v_mul_f32_e64 v71, v70, -v34 ; D2100047 40024546 v_mad_f32 v71, 2.0, v71, v74 ; D2820047 052A8EF4 v_mul_f32_e32 v68, v71, v68 ; 10888947 v_mul_f32_e32 v70, v70, v70 ; 108C8D46 v_mad_f32 v68, v70, v50, v68 ; D2820044 05126546 v_mul_f32_e32 v68, 0.5, v68 ; 108888F0 v_add_f32_e32 v38, 0, v40 ; 064C5080 v_mul_f32_e32 v33, v33, v33 ; 10424321 v_mad_f32 v33, v38, v72, -v33 ; D2820021 84869126 v_add_f32_e32 v33, v33, v73 ; 06429321 v_mul_f32_e32 v34, v34, v34 ; 10444522 v_mad_f32 v33, v33, v50, -v34 ; D2820021 848A6521 v_rcp_f32_e32 v34, v33 ; 7E445521 v_mul_f32_e32 v38, v34, v68 ; 104C8922 v_mul_f32_e32 v31, v31, v38 ; 103E4D1F v_exp_f32_e32 v31, v31 ; 7E3E4B1F v_max_f32_e32 v38, 0x33d6bf95, v33 ; 204C42FF 33D6BF95 v_rsq_clamp_f32_e32 v38, v38 ; 7E4C5926 v_mul_f32_e32 v31, v31, v38 ; 103E4D1F v_mad_f32 v32, v68, v34, v32 ; D2820020 04824544 v_cmp_ge_f32_e64 s[0:1], v32, 0 ; D00C0000 00010120 v_cndmask_b32_e64 v32, 0, -1, s[0:1] ; D2000020 00018280 v_cmp_ne_i32_e64 s[0:1], v32, 0 ; D10A0000 00010120 v_cndmask_b32_e64 v32, 0, 1.0, s[0:1] ; D2000020 0001E480 v_cmp_ge_f32_e64 s[0:1], -v33, 0 ; D00C0000 20010121 v_cndmask_b32_e64 v33, 0, -1, s[0:1] ; D2000821 00018280 v_cmp_ne_i32_e64 s[0:1], v33, 0 ; D10A0000 00010121 v_cndmask_b32_e64 v33, 0, 1.0, s[0:1] ; D2000821 0001E480 v_add_f32_e32 v32, v33, v32 ; 06404121 v_cmp_ge_f32_e64 s[0:1], -v32, 0 ; D00C0000 20010120 v_cndmask_b32_e64 v32, 0, -1, s[0:1] ; D2000020 00018280 v_cmp_ne_i32_e64 s[0:1], v32, 0 ; D10A0000 00010120 v_cndmask_b32_e64 v31, 0, v31, s[0:1] ; D200081F 00023E80 v_mul_f32_e32 v32, v81, v69 ; 10408B51 v_mul_f32_e32 v31, v32, v31 ; 103E3F20 v_add_f32_e64 v31, 0, v31 clamp ; D206081F 00023E80 v_mul_f32_e32 v32, v58, v75 ; 1040973A v_mad_f32 v32, v66, v53, v32 ; D2820020 04826B42 v_mad_f32 v32, v67, v52, v32 ; D2820020 04826943 v_add_f32_e64 v32, 0, v32 clamp ; D2060820 00024080 v_mad_f32 v33, v31, v32, v48 ; D2820021 04C2411F v_mad_f32 v30, v33, v22, v30 ; D282001E 047A2D21 v_mov_b32_e32 v45, v65 ; 7E5A0341 v_cubeid_f32 v69, v42, v43, v44 ; D2880045 04B2572A v_cubema_f32 v68, v42, v43, v44 ; D28E0044 04B2572A v_cubesc_f32 v67, v42, v43, v44 ; D28A0043 04B2572A v_cubetc_f32 v66, v42, v43, v44 ; D28C0042 04B2572A v_rcp_f32_e64 v33, |v68| ; D3540121 00000144 v_mad_f32 v68, v66, v33, v54 ; D2820044 04DA4342 v_mad_f32 v67, v67, v33, v54 ; D2820043 04DA4343 image_sample v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[67:70], s[48:55], s[44:47] ; F0800700 016C2643 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v33, v40 ; 7E424F28 v_mul_legacy_f32_e32 v33, v14, v33 ; 0E42430E v_exp_f32_e32 v33, v33 ; 7E424B21 v_add_f32_e64 v34, 0, v49 clamp ; D2060822 00026280 v_add_f32_e32 v33, v34, v33 ; 06424322 v_log_f32_e32 v41, v56 ; 7E524F38 v_mul_legacy_f32_e32 v41, v14, v41 ; 0E52530E v_exp_f32_e32 v41, v41 ; 7E524B29 v_mul_f32_e32 v41, v41, v19 ; 10522729 v_log_f32_e32 v42, v60 ; 7E544F3C v_mul_legacy_f32_e32 v42, v14, v42 ; 0E54550E v_exp_f32_e32 v42, v42 ; 7E544B2A v_mad_f32 v41, v42, v21, v41 ; D2820029 04A62B2A v_log_f32_e32 v42, v63 ; 7E544F3F v_mul_legacy_f32_e32 v42, v14, v42 ; 0E54550E v_exp_f32_e32 v42, v42 ; 7E544B2A v_mad_f32 v41, v42, v22, v41 ; D2820029 04A62D2A v_log_f32_e32 v42, v55 ; 7E544F37 v_mul_legacy_f32_e32 v42, v14, v42 ; 0E54550E v_exp_f32_e32 v42, v42 ; 7E544B2A v_mul_f32_e32 v42, v42, v19 ; 1054272A v_log_f32_e32 v43, v59 ; 7E564F3B v_mul_legacy_f32_e32 v43, v14, v43 ; 0E56570E v_exp_f32_e32 v43, v43 ; 7E564B2B v_mad_f32 v42, v43, v21, v42 ; D282002A 04AA2B2B v_log_f32_e32 v43, v62 ; 7E564F3E v_mul_legacy_f32_e32 v43, v14, v43 ; 0E56570E v_exp_f32_e32 v43, v43 ; 7E564B2B v_mad_f32 v42, v43, v22, v42 ; D282002A 04AA2D2B v_mul_f32_e32 v43, 0x3e99999a, v42 ; 105654FF 3E99999A v_mov_b32_e32 v44, 0x3f170a3d ; 7E5802FF 3F170A3D v_mad_f32 v43, v44, v41, v43 ; D282002B 04AE532C v_mov_b32_e32 v44, 0x3de147ae ; 7E5802FF 3DE147AE v_mad_f32 v43, v44, v51, v43 ; D282002B 04AE672C v_sub_f32_e32 v43, 1.0, v43 ; 085656F2 v_mul_f32_e32 v44, s40, v43 ; 10585628 v_mad_f32 v30, v44, v33, v30 ; D282001E 047A432C v_sub_f32_e32 v33, s17, v20 ; 08422811 v_sub_f32_e32 v44, s16, v9 ; 08581210 v_mul_f32_e32 v45, s101, v44 ; 105A5865 v_add_f32_e64 v45, 0, v45 clamp ; D206082D 00025A80 v_mad_f32 v20, v45, v33, v20 ; D2820014 0452432D v_mul_f32_e32 v20, v20, v30 ; 10283D14 v_rcp_f32_e64 v33, -v23 ; D3540021 20000117 v_mul_f32_e32 v33, v33, v44 ; 10425921 v_mul_f32_e32 v24, v24, v33 ; 10304318 v_mul_f32_e32 v25, v25, v33 ; 10324319 v_mul_f32_e32 v25, v25, v25 ; 10323319 v_mad_f32 v24, v24, v24, v25 ; D2820018 04663118 v_mul_f32_e32 v25, v23, v33 ; 10324317 v_mad_f32 v24, v25, v25, v24 ; D2820018 04623319 v_max_f32_e32 v24, 0x33d6bf95, v24 ; 203030FF 33D6BF95 v_rsq_clamp_f32_e32 v25, v24 ; 7E325918 v_mul_f32_e32 v25, v24, v25 ; 10323318 v_xor_b32_e32 v24, 0x80000000, v24 ; 3A3030FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v24 ; 7C083080 v_cndmask_b32_e64 v24, 0, v25, vcc ; D2000018 01AA3280 v_readlane_b32 s0, v90, 13 ; 02011B5A s_nop 2 ; BF800002 v_mul_f32_e64 v24, -v24, s0 ; D2100018 20000118 v_mul_f32_e32 v24, 0x3fb8aa65, v24 ; 103030FF 3FB8AA65 v_exp_f32_e32 v24, v24 ; 7E304B18 v_mul_f32_e32 v25, v23, v23 ; 10322F17 v_mul_f32_e32 v25, v25, v25 ; 10323319 v_mad_f32 v23, -v23, v25, 1.0 ; D2820017 23CA3317 v_sub_f32_e32 v23, 1.0, v23 ; 082E2EF2 v_mul_f32_e32 v23, v23, v24 ; 102E3117 v_mul_f32_e32 v20, v20, v23 ; 10282F14 v_subrev_f32_e32 v10, s14, v10 ; 0A14140E v_subrev_f32_e32 v11, s15, v11 ; 0A16160F v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mad_f32 v10, v10, v10, v11 ; D282000A 042E150A v_subrev_f32_e32 v11, s19, v9 ; 0A161213 v_mad_f32 v10, v11, v11, v10 ; D282000A 042A170B v_readlane_b32 s0, v90, 11 ; 0201175A s_nop 2 ; BF800002 v_mul_f32_e32 v10, s0, v10 ; 10141400 v_readlane_b32 s0, v90, 12 ; 0201195A s_nop 2 ; BF800002 v_mul_f32_e32 v24, s0, v11 ; 10301600 v_mul_f32_e32 v24, 0x3fb8aa65, v24 ; 103030FF 3FB8AA65 v_exp_f32_e32 v24, v24 ; 7E304B18 v_sub_f32_e32 v24, 1.0, v24 ; 083030F2 v_mul_f32_e32 v10, v10, v24 ; 1014310A v_rcp_f32_e32 v11, v11 ; 7E16550B v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mul_f32_e32 v10, 0x3fb8aa65, v10 ; 101414FF 3FB8AA65 v_exp_f32_e32 v10, v10 ; 7E144B0A v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_sub_f32_e32 v10, 1.0, v10 ; 081414F2 v_mad_f32 v12, 0.5, v8, 0.5 ; D282000C 03C210F0 v_readlane_b32 s0, v90, 14 ; 02011D5A v_readlane_b32 s1, v90, 15 ; 02031F5A v_readlane_b32 s2, v90, 16 ; 0205215A v_readlane_b32 s3, v90, 17 ; 0207235A v_readlane_b32 s4, v90, 18 ; 0209255A v_readlane_b32 s5, v90, 19 ; 020B275A v_readlane_b32 s6, v90, 20 ; 020D295A v_readlane_b32 s7, v90, 21 ; 020F2B5A s_nop 2 ; BF800002 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[0:7], s[36:39] ; F0800100 0120080C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v11, -v10, v8, 1.0 ; D282000B 23CA110A v_mul_f32_e32 v12, v20, v11 ; 10181714 v_subrev_f32_e32 v9, s16, v9 ; 0A121210 v_cmp_ge_f32_e64 s[0:1], v9, 0 ; D00C0000 00010109 v_cndmask_b32_e64 v9, 0, -1, s[0:1] ; D2000809 00018280 v_cmp_ne_i32_e64 s[0:1], v9, 0 ; D10A0000 00010109 v_cndmask_b32_e64 v9, v12, v30, s[0:1] ; D2000809 00023D0C v_log_f32_e64 v9, |v9| ; D34E0109 00000109 v_mul_f32_e32 v9, 0x3ee8ba1f, v9 ; 101212FF 3EE8BA1F v_exp_f32_e32 v9, v9 ; 7E124B09 v_sub_f32_e32 v12, 1.0, v2 ; 081804F2 s_load_dwordx8 s[4:11], s[12:13], 0x40 ; C0C20D40 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[48:50], 7, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[4:11], s[20:23] ; F0800700 00A13006 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v6, v50, v12 ; 100C1932 v_mad_f32 v53, v2, v9, v6 ; D2820035 041A1302 v_log_f32_e32 v6, v16 ; 7E0C4F10 v_mul_legacy_f32_e32 v6, v14, v6 ; 0E0C0D0E v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_add_f32_e32 v6, v6, v18 ; 060C2506 v_mul_f32_e32 v6, v6, v41 ; 100C5306 v_mad_f32 v6, v28, v29, v6 ; D2820006 041A3B1C v_log_f32_e32 v7, v36 ; 7E0E4F24 v_mul_legacy_f32_e32 v7, v14, v7 ; 0E0E0F0E v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_add_f32_e32 v7, v7, v47 ; 060E5F07 v_mul_f32_e32 v7, v7, v41 ; 100E5307 v_mad_f32 v7, v26, v27, v7 ; D2820007 041E371A v_mul_f32_e32 v7, v7, v19 ; 100E2707 v_mad_f32 v6, v6, v21, v7 ; D2820006 041E2B06 v_log_f32_e32 v7, v83 ; 7E0E4F53 v_mul_legacy_f32_e32 v7, v14, v7 ; 0E0E0F0E v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_add_f32_e32 v7, v7, v46 ; 060E5D07 v_mul_f32_e32 v7, v7, v41 ; 100E5307 v_mad_f32 v7, v31, v32, v7 ; D2820007 041E411F v_mad_f32 v6, v7, v22, v6 ; D2820006 041A2D07 v_log_f32_e32 v7, v39 ; 7E0E4F27 v_mul_legacy_f32_e32 v7, v14, v7 ; 0E0E0F0E v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_add_f32_e32 v7, v34, v7 ; 060E0F22 v_readlane_b32 s2, v90, 10 ; 0205155A s_nop 2 ; BF800002 v_mul_f32_e32 v9, s2, v43 ; 10125602 v_mad_f32 v6, v9, v7, v6 ; D2820006 041A0F09 v_readlane_b32 s2, v90, 9 ; 0205135A s_nop 2 ; BF800002 v_sub_f32_e32 v7, s2, v1 ; 080E0202 v_mad_f32 v1, v45, v7, v1 ; D2820001 04060F2D v_mul_f32_e32 v1, v1, v6 ; 10020D01 v_mul_f32_e32 v1, v1, v23 ; 10022F01 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_cndmask_b32_e64 v1, v1, v6, s[0:1] ; D2000801 08020D01 v_log_f32_e64 v1, |v1| ; D34E0101 00000101 v_mul_f32_e32 v1, 0x3ee8ba1f, v1 ; 100202FF 3EE8BA1F v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v6, v49, v12 ; 100C1931 v_mad_f32 v52, v2, v1, v6 ; D2820034 041A0302 v_log_f32_e32 v1, v15 ; 7E024F0F v_mul_legacy_f32_e32 v1, v14, v1 ; 0E02030E v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e32 v1, v1, v18 ; 06022501 v_mul_f32_e32 v1, v1, v42 ; 10025501 v_mad_f32 v1, v28, v29, v1 ; D2820001 04063B1C v_log_f32_e32 v6, v35 ; 7E0C4F23 v_mul_legacy_f32_e32 v6, v14, v6 ; 0E0C0D0E v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_add_f32_e32 v6, v6, v47 ; 060C5F06 v_mul_f32_e32 v6, v6, v42 ; 100C5506 v_mad_f32 v6, v26, v27, v6 ; D2820006 041A371A v_mul_f32_e32 v6, v6, v19 ; 100C2706 v_mad_f32 v1, v1, v21, v6 ; D2820001 041A2B01 v_log_f32_e32 v6, v82 ; 7E0C4F52 v_mul_legacy_f32_e32 v6, v14, v6 ; 0E0C0D0E v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_add_f32_e32 v6, v6, v46 ; 060C5D06 v_mul_f32_e32 v6, v6, v42 ; 100C5506 v_mad_f32 v6, v31, v32, v6 ; D2820006 041A411F v_mad_f32 v1, v6, v22, v1 ; D2820001 04062D06 v_log_f32_e32 v6, v38 ; 7E0C4F26 v_mul_legacy_f32_e32 v6, v14, v6 ; 0E0C0D0E v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_add_f32_e32 v6, v34, v6 ; 060C0D22 v_readlane_b32 s2, v90, 4 ; 0205095A s_nop 2 ; BF800002 v_mul_f32_e32 v7, s2, v43 ; 100E5602 v_mad_f32 v1, v7, v6, v1 ; D2820001 04060D07 v_readlane_b32 s2, v90, 3 ; 0205075A s_nop 2 ; BF800002 v_sub_f32_e32 v6, s2, v0 ; 080C0002 v_mad_f32 v0, v45, v6, v0 ; D2820000 04020D2D v_mul_f32_e32 v0, v0, v1 ; 10000300 v_mul_f32_e32 v0, v0, v23 ; 10002F00 v_mul_f32_e32 v0, v0, v11 ; 10001700 v_cndmask_b32_e64 v0, v0, v1, s[0:1] ; D2000000 00020300 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v1, v48, v12 ; 10021930 v_mad_f32 v51, v2, v0, v1 ; D2820033 04060102 s_load_dwordx8 s[4:11], s[12:13], 0x48 ; C0C20D48 v_readlane_b32 s12, v90, 5 ; 02190B5A v_readlane_b32 s13, v90, 6 ; 021B0D5A v_readlane_b32 s14, v90, 7 ; 021D0F5A v_readlane_b32 s15, v90, 8 ; 021F115A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[11:13], 7, 0, 0, 0, 0, 0, 0, 0, v[51:54], s[4:11], s[12:15] ; F0800700 00610B33 v_mad_f32 v0, -v4, v2, v2 ; D2820000 240A0504 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, v52, v1 ; 10040334 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v0, v12, v2 ; D2820002 040A1900 v_readlane_b32 s2, v90, 2 ; 0205055A s_nop 2 ; BF800002 v_sub_f32_e32 v3, s2, v2 ; 08060402 v_mul_f32_e32 v4, v8, v10 ; 10081508 v_cndmask_b32_e64 v4, 0, v4, s[0:1] ; D2000004 00020880 v_mad_f32 v2, v4, v3, v2 ; D2820002 040A0704 v_mul_f32_e32 v3, v51, v1 ; 10060333 v_mad_f32 v3, v0, v11, v3 ; D2820003 040E1700 v_readlane_b32 s2, v90, 1 ; 0205035A s_nop 2 ; BF800002 v_sub_f32_e32 v5, s2, v3 ; 080A0602 v_mad_f32 v3, v4, v5, v3 ; D2820003 040E0B04 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_mul_f32_e32 v1, v53, v1 ; 10020335 v_mad_f32 v0, v0, v13, v1 ; D2820000 04061B00 v_readlane_b32 s2, v90, 0 ; 0205015A s_nop 2 ; BF800002 v_sub_f32_e32 v1, s2, v0 ; 08020002 v_mad_f32 v0, v4, v1, v0 ; D2820000 04020304 v_cndmask_b32_e64 v1, 0, 1.0, s[0:1] ; D2000801 0001E480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL SAMP[0] DCL CONST[0..15] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 255.0000, -0.5000} IMM[1] FLT32 { 0.5000, -0.5000, 1.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[15], IN[0] 1: MOV TEMP[1].xy, TEMP[0].xyxx 2: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[15] 3: MOV TEMP[1].zw, TEMP[0].wwzw 4: MUL TEMP[0].xy, TEMP[1], CONST[9] 5: MOV TEMP[1].xy, TEMP[0].xyxx 6: ADD TEMP[2].zw, TEMP[1], IMM[0].xxxx 7: MUL TEMP[2].xy, TEMP[2].zwzw, CONST[10] 8: MOV TEMP[3].xy, TEMP[2].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 11: MUL TEMP[3].z, TEMP[3].wwww, CONST[9].zzzz 12: MUL TEMP[3].z, TEMP[3].zzzz, IMM[0].zzzz 13: MUL TEMP[2], TEMP[0].yyyy, CONST[12] 14: MAD TEMP[2], TEMP[0].xxxx, CONST[11], TEMP[2] 15: MAD TEMP[1], TEMP[3].zzzz, CONST[13], TEMP[2] 16: ADD TEMP[1], TEMP[1], CONST[14] 17: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 18: MAD TEMP[2], TEMP[1].xxxx, CONST[0], TEMP[2] 19: MAD TEMP[2], TEMP[1].zzzz, CONST[2], TEMP[2] 20: MAD TEMP[2], TEMP[1].wwww, CONST[3], TEMP[2] 21: MOV TEMP[0], TEMP[2] 22: MUL TEMP[2], TEMP[1].yyyy, CONST[5] 23: MAD TEMP[2], TEMP[1].xxxx, CONST[4], TEMP[2] 24: MAD TEMP[2], TEMP[1].zzzz, CONST[6], TEMP[2] 25: MAD TEMP[2], TEMP[1].wwww, CONST[7], TEMP[2] 26: MAD TEMP[1], TEMP[2], IMM[1].xyzz, IMM[0].xxyy 27: MOV OUT[1], TEMP[1] 28: MOV OUT[0], TEMP[0] 29: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0 %76 = add i32 %5, %7 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = fadd float %66, %78 %81 = fadd float %67, %79 %82 = fadd float %80, %68 %83 = fadd float %81, %69 %84 = fmul float %80, %45 %85 = fmul float %81, %46 %86 = fadd float %82, 5.000000e-01 %87 = fadd float %83, 5.000000e-01 %88 = fmul float %86, %48 %89 = fmul float %87, %49 %90 = bitcast float %88 to i32 %91 = bitcast float %89 to i32 %92 = bitcast float 0.000000e+00 to i32 %93 = insertelement <4 x i32> undef, i32 %90, i32 0 %94 = insertelement <4 x i32> %93, i32 %91, i32 1 %95 = insertelement <4 x i32> %94, i32 %92, i32 2 %96 = insertelement <4 x i32> %95, i32 undef, i32 3 %97 = bitcast <8 x i32> %71 to <32 x i8> %98 = bitcast <4 x i32> %73 to <16 x i8> %99 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 3 %101 = fmul float %100, %47 %102 = fmul float %101, 2.550000e+02 %103 = fmul float %85, %54 %104 = fmul float %85, %55 %105 = fmul float %85, %56 %106 = fmul float %85, %57 %107 = fmul float %84, %50 %108 = fadd float %107, %103 %109 = fmul float %84, %51 %110 = fadd float %109, %104 %111 = fmul float %84, %52 %112 = fadd float %111, %105 %113 = fmul float %84, %53 %114 = fadd float %113, %106 %115 = fmul float %102, %58 %116 = fadd float %115, %108 %117 = fmul float %102, %59 %118 = fadd float %117, %110 %119 = fmul float %102, %60 %120 = fadd float %119, %112 %121 = fmul float %102, %61 %122 = fadd float %121, %114 %123 = fadd float %116, %62 %124 = fadd float %118, %63 %125 = fadd float %120, %64 %126 = fadd float %122, %65 %127 = fmul float %124, %17 %128 = fmul float %124, %18 %129 = fmul float %124, %19 %130 = fmul float %124, %20 %131 = fmul float %123, %13 %132 = fadd float %131, %127 %133 = fmul float %123, %14 %134 = fadd float %133, %128 %135 = fmul float %123, %15 %136 = fadd float %135, %129 %137 = fmul float %123, %16 %138 = fadd float %137, %130 %139 = fmul float %125, %21 %140 = fadd float %139, %132 %141 = fmul float %125, %22 %142 = fadd float %141, %134 %143 = fmul float %125, %23 %144 = fadd float %143, %136 %145 = fmul float %125, %24 %146 = fadd float %145, %138 %147 = fmul float %126, %25 %148 = fadd float %147, %140 %149 = fmul float %126, %26 %150 = fadd float %149, %142 %151 = fmul float %126, %27 %152 = fadd float %151, %144 %153 = fmul float %126, %28 %154 = fadd float %153, %146 %155 = fmul float %124, %33 %156 = fmul float %124, %34 %157 = fmul float %124, %35 %158 = fmul float %124, %36 %159 = fmul float %123, %29 %160 = fadd float %159, %155 %161 = fmul float %123, %30 %162 = fadd float %161, %156 %163 = fmul float %123, %31 %164 = fadd float %163, %157 %165 = fmul float %123, %32 %166 = fadd float %165, %158 %167 = fmul float %125, %37 %168 = fadd float %167, %160 %169 = fmul float %125, %38 %170 = fadd float %169, %162 %171 = fmul float %125, %39 %172 = fadd float %171, %164 %173 = fmul float %125, %40 %174 = fadd float %173, %166 %175 = fmul float %126, %41 %176 = fadd float %175, %168 %177 = fmul float %126, %42 %178 = fadd float %177, %170 %179 = fmul float %126, %43 %180 = fadd float %179, %172 %181 = fmul float %126, %44 %182 = fadd float %181, %174 %183 = fmul float %176, 5.000000e-01 %184 = fadd float %183, 5.000000e-01 %185 = fmul float %178, -5.000000e-01 %186 = fadd float %185, 5.000000e-01 %187 = fmul float %180, 1.000000e+00 %188 = fadd float %187, 0.000000e+00 %189 = fmul float %182, 1.000000e+00 %190 = fadd float %189, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %184, float %186, float %188, float %190) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %148, float %150, float %152, float %154) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x0 ; C0840900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x3c ; C204013C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s8, v0 ; 06080008 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v4 ; 100A0808 s_buffer_load_dword s8, s[0:3], 0x3d ; C204013D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s8, v1 ; 06000208 s_buffer_load_dword s8, s[0:3], 0x25 ; C2040125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v0 ; 10020008 s_buffer_load_dword s8, s[0:3], 0x30 ; C2040130 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s8, v1 ; 10040208 s_buffer_load_dword s8, s[0:3], 0x2c ; C204012C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s8, v2 ; D2820002 04081105 s_buffer_load_dword s8, s[0:3], 0x3f ; C204013F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s8, v0 ; 06000008 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 s_buffer_load_dword s8, s[0:3], 0x29 ; C2040129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v0 ; 100E0008 s_buffer_load_dword s8, s[0:3], 0x3e ; C204013E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s8, v4 ; 06000808 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 s_buffer_load_dword s8, s[0:3], 0x28 ; C2040128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v0 ; 100C0008 v_mov_b32_e32 v8, 0 ; 7E100280 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[12:19], s[8:11] ; F0900800 00430006 s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s4, v0 ; 10000004 v_mul_f32_e32 v0, 0x437f0000, v0 ; 100000FF 437F0000 s_buffer_load_dword s4, s[0:3], 0x34 ; C2020134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x38 ; C2020138 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x31 ; C2020131 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v1 ; 10060204 s_buffer_load_dword s4, s[0:3], 0x2d ; C202012D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v5, s4, v3 ; D2820003 040C0905 s_buffer_load_dword s4, s[0:3], 0x35 ; C2020135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x39 ; C2020139 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x2e ; C202012E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v6 ; D2820006 04180905 s_buffer_load_dword s4, s[0:3], 0x36 ; C2020136 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x3a ; C202013A s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v6 ; 060C0C04 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s4, v4 ; D2820004 04100906 s_buffer_load_dword s4, s[0:3], 0x33 ; C2020133 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x2f ; C202012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v5, s4, v1 ; D2820001 04040905 s_buffer_load_dword s4, s[0:3], 0x37 ; C2020137 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s4, s[0:3], 0x3b ; C202013B s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v4 ; D2820001 04100900 v_mad_f32 v1, -0.5, v1, 0.5 ; D2820001 03C202F1 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s4, v4 ; D2820004 04100906 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 v_add_f32_e32 v4, 0, v4 ; 06080880 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v3 ; 100A0604 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v6, s4, v5 ; D2820005 04140906 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 v_add_f32_e32 v5, 0, v5 ; 060A0A80 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v3 ; 100E0604 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v6, s4, v7 ; D2820007 041C0906 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 v_mad_f32 v7, 0.5, v7, 0.5 ; D2820007 03C20EF0 exp 15, 32, 0, 0, 0, v7, v1, v5, v4 ; F800020F 04050107 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v3 ; 10020604 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v2, s4, v1 ; D2820001 04040902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v6, s4, v1 ; D2820001 04040906 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v1 ; D2820001 04040900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s4, v4 ; D2820004 04100906 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v3 ; 100A0604 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v6, s4, v5 ; D2820005 04140906 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v3 ; 10060604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v2, s4, v3 ; D2820002 040C0902 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s4, v2 ; D2820002 04080906 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v2 ; D2820000 04080100 exp 15, 12, 0, 1, 0, v0, v5, v4, v1 ; F80008CF 01040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = fmul float %26, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %27, -1.000000e+00 %31 = fadd float %30, 1.000000e+00 %32 = bitcast float %29 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float %45 = call i32 @llvm.SI.packf16(float %41, float %42) %46 = bitcast i32 %45 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %44, float %46, float %44, float %46) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_add_f32_e32 v2, 0, v4 ; 06040880 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..254] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} 0: F2I TEMP[0].x, IN[2].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: MOV TEMP[0].xyz, CONST[ADDR[0].x+7].xyzx 3: MAD TEMP[1].xy, CONST[6], IN[1].zwzw, TEMP[0] 4: MOV TEMP[1].xy, TEMP[1].xyxx 5: ADD TEMP[2].xy, TEMP[0].zzzz, -CONST[5].xzzw 6: MOV TEMP[0].xy, TEMP[2].xyxx 7: MUL TEMP[2].z, TEMP[0].zzzz, CONST[4].zzzz 8: MOV TEMP[0].z, TEMP[2].zzzz 9: LRP TEMP[2].x, TEMP[2].zzzz, CONST[4].yyyy, CONST[4].xxxx 10: MUL TEMP[3], TEMP[0], CONST[5].ywzw 11: MOV_SAT TEMP[3], TEMP[3] 12: ADD TEMP[4].y, -TEMP[3].yyyy, IMM[0].xxxx 13: MUL TEMP[3].xy, TEMP[4].yyyy, TEMP[3].xxxx 14: MOV TEMP[3].xy, TEMP[3].xyxx 15: MAD TEMP[4].xy, IN[1], IMM[0].yyyy, IMM[0].zzzz 16: MOV TEMP[0].xy, TEMP[4].xyxx 17: MAD TEMP[4].xy, TEMP[0], -TEMP[2].xxxx, IN[0] 18: MUL TEMP[2], TEMP[4].yyyy, CONST[1] 19: MAD TEMP[0], TEMP[4].xxxx, CONST[0], TEMP[2] 20: MAD TEMP[0], CONST[4].wwww, CONST[2], TEMP[0] 21: ADD TEMP[0], TEMP[0], CONST[3] 22: MOV TEMP[1].zw, IMM[0].xxwx 23: MOV TEMP[3].zw, IMM[0].xxwx 24: MOV OUT[1], TEMP[1] 25: MOV OUT[0], TEMP[0] 26: MOV OUT[2], TEMP[3] 27: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = fptosi float %57 to i32 %59 = bitcast i32 %58 to float %60 = bitcast float %59 to i32 %61 = shl i32 %60, 4 %62 = add i32 %61, 112 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %60, 4 %65 = add i32 %64, 116 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = shl i32 %60, 4 %68 = add i32 %67, 120 %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %68) %70 = fmul float %37, %51 %71 = fadd float %70, %63 %72 = fmul float %38, %52 %73 = fadd float %72, %66 %74 = fsub float -0.000000e+00, %33 %75 = fadd float %69, %74 %76 = fsub float -0.000000e+00, %35 %77 = fadd float %69, %76 %78 = fmul float %69, %31 %79 = call float @llvm.AMDGPU.lrp(float %78, float %30, float %29) %80 = fmul float %75, %34 %81 = fmul float %77, %36 %82 = fmul float %78, %35 %83 = fmul float 0.000000e+00, %36 %84 = call float @llvm.AMDIL.clamp.(float %80, float 0.000000e+00, float 1.000000e+00) %85 = call float @llvm.AMDIL.clamp.(float %81, float 0.000000e+00, float 1.000000e+00) %86 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00) %87 = call float @llvm.AMDIL.clamp.(float %83, float 0.000000e+00, float 1.000000e+00) %88 = fsub float -0.000000e+00, %85 %89 = fadd float %88, 1.000000e+00 %90 = fmul float %89, %84 %91 = fmul float %89, %84 %92 = fmul float %49, 2.000000e+00 %93 = fadd float %92, -1.000000e+00 %94 = fmul float %50, 2.000000e+00 %95 = fadd float %94, -1.000000e+00 %96 = fsub float -0.000000e+00, %79 %97 = fmul float %93, %96 %98 = fadd float %97, %43 %99 = fsub float -0.000000e+00, %79 %100 = fmul float %95, %99 %101 = fadd float %100, %44 %102 = fmul float %101, %17 %103 = fmul float %101, %18 %104 = fmul float %101, %19 %105 = fmul float %101, %20 %106 = fmul float %98, %13 %107 = fadd float %106, %102 %108 = fmul float %98, %14 %109 = fadd float %108, %103 %110 = fmul float %98, %15 %111 = fadd float %110, %104 %112 = fmul float %98, %16 %113 = fadd float %112, %105 %114 = fmul float %32, %21 %115 = fadd float %114, %107 %116 = fmul float %32, %22 %117 = fadd float %116, %109 %118 = fmul float %32, %23 %119 = fadd float %118, %111 %120 = fmul float %32, %24 %121 = fadd float %120, %113 %122 = fadd float %115, %25 %123 = fadd float %117, %26 %124 = fadd float %119, %27 %125 = fadd float %121, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %71, float %73, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %90, float %91, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %122, float %123, float %124, float %125) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_add_i32_e32 v2, 0x74, v1 ; 4A0402FF 00000074 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 buffer_load_format_xyzw v[3:6], v0, s[12:15], 0 idxen ; E00C2000 80030300 s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v2, s8, v6, v2 ; D2820002 040A0C08 v_add_i32_e32 v7, 0x70, v1 ; 4A0E02FF 00000070 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v7, s8, v5, v7 ; D2820007 041E0A08 v_mov_b32_e32 v8, 1.0 ; 7E1002F2 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 32, 0, 0, 0, v7, v2, v9, v8 ; F800020F 08090207 v_add_i32_e32 v1, 0x78, v1 ; 4A0202FF 00000078 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000 v_subrev_f32_e32 v2, s8, v1 ; 0A040208 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s8, v2 ; 10040408 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v2, 1.0, v2 ; 080404F2 s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v7, s8, v1 ; 0A0E0208 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v7 ; 100E0E08 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v2, v7, v2 ; 10040507 exp 15, 33, 0, 0, 0, v2, v2, v9, v8 ; F800021F 08090202 s_buffer_load_dword s8, s[0:3], 0x12 ; C2040112 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s8, v1 ; 10040208 v_mad_f32 v1, -v1, s8, 1.0 ; D2820001 23C81101 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v2, s8, v1 ; D2820001 04041102 v_mad_f32 v2, 2.0, v3, -1.0 ; D2820002 03CE06F4 buffer_load_format_xyzw v[7:10], v0, s[4:7], 0 idxen ; E00C2000 80010700 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v2, v1, v7 ; D2820000 241E0302 v_mad_f32 v2, 2.0, v4, -1.0 ; D2820002 03CE08F4 v_mad_f32 v1, -v2, v1, v8 ; D2820001 24220302 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v1 ; 10040204 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_mad_f32 v2, v3, s5, v2 ; D2820002 04080B03 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v1 ; 10060204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v3, v4, s5, v3 ; D2820003 040C0B04 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v4, v5, s5, v4 ; D2820004 04100B05 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_mad_f32 v0, v1, s5, v0 ; D2820000 04000B01 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1].wwww 7: MUL TEMP[1].xyz, TEMP[1], IN[1].xxxx 8: MOV TEMP[0].xyz, TEMP[1].xyzx 9: MUL TEMP[0].xyz, TEMP[0], CONST[0] 10: ABS TEMP[1].x, TEMP[0].xxxx 11: LG2 TEMP[1].x, TEMP[1].xxxx 12: ABS TEMP[2].x, TEMP[0].yyyy 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[1].y, TEMP[2].xxxx 15: ABS TEMP[0].x, TEMP[0].zzzz 16: LG2 TEMP[0].x, TEMP[0].xxxx 17: MOV TEMP[1].z, TEMP[0].xxxx 18: MUL TEMP[0].xyz, TEMP[1], IMM[0].zzzz 19: EX2 TEMP[1].x, TEMP[0].xxxx 20: EX2 TEMP[2].x, TEMP[0].yyyy 21: MOV TEMP[1].y, TEMP[2].xxxx 22: EX2 TEMP[0].x, TEMP[0].zzzz 23: MOV TEMP[1].z, TEMP[0].xxxx 24: MOV TEMP[1].w, IMM[0].yyyy 25: MOV OUT[0], TEMP[1] 26: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %28 = load <8 x i32> addrspace(2)* %27, !tbaa !0 %29 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %30 = load <4 x i32> addrspace(2)* %29, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %34 = bitcast float %31 to i32 %35 = bitcast float %32 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %28 to <32 x i8> %39 = bitcast <4 x i32> %30 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %42, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = fmul float %44, %33 %48 = fmul float %45, %33 %49 = fmul float %46, %33 %50 = fmul float %47, %24 %51 = fmul float %48, %25 %52 = fmul float %49, %26 %53 = call float @fabs(float %50) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %51) %56 = call float @llvm.log2.f32(float %55) %57 = call float @fabs(float %52) %58 = call float @llvm.log2.f32(float %57) %59 = fmul float %54, 0x3FDD1743E0000000 %60 = fmul float %56, 0x3FDD1743E0000000 %61 = fmul float %58, 0x3FDD1743E0000000 %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call float @llvm.AMDIL.exp.(float %60) %64 = call float @llvm.AMDIL.exp.(float %61) %65 = call i32 @llvm.SI.packf16(float %62, float %63) %66 = bitcast i32 %65 to float %67 = call i32 @llvm.SI.packf16(float %64, float 1.000000e+00) %68 = bitcast i32 %67 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %66, float %68, float %66, float %68) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430202 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v5, v3 ; 7E0A4F03 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_mul_f32_e32 v0, v7, v5 ; 10000B07 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e32 v1, v2 ; 7E024F02 v_mul_legacy_f32_e32 v1, v6, v1 ; 0E020306 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v1, v7, v1 ; 10020307 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 v_log_f32_e64 v1, |v1| ; D34E0101 00000101 v_mul_f32_e32 v1, 0x3ee8ba1f, v1 ; 100202FF 3EE8BA1F v_exp_f32_e32 v1, v1 ; 7E024B01 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_log_f32_e32 v1, v4 ; 7E024F04 v_mul_legacy_f32_e32 v1, v6, v1 ; 0E020306 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v1, v7, v1 ; 10020307 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s0, v1 ; 10020200 v_log_f32_e64 v1, |v1| ; D34E0101 00000101 v_mul_f32_e32 v1, 0x3ee8ba1f, v1 ; 100202FF 3EE8BA1F v_exp_f32_e32 v1, v1 ; 7E024B01 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..12] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 0.0000} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, CONST[10], IN[2].yyyy 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].xyz, IN[2].xxxx, CONST[9], TEMP[0] 3: MOV TEMP[0].xyz, TEMP[1].xyzx 4: MAD TEMP[1].xyz, IN[2].zzzz, CONST[11], TEMP[0] 5: MOV TEMP[0].xyz, TEMP[1].xyzx 6: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 7: MAX TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MOV TEMP[0].w, TEMP[1].xxxx 10: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[0] 11: MOV TEMP[1].xyz, TEMP[1].xyzx 12: MUL TEMP[0], CONST[10], IN[0].yyyy 13: MAD TEMP[0], IN[0].xxxx, CONST[9], TEMP[0] 14: MAD TEMP[0], IN[0].zzzz, CONST[11], TEMP[0] 15: ADD TEMP[0], TEMP[0], CONST[12] 16: MUL TEMP[2], TEMP[0].yyyy, CONST[1] 17: MAD TEMP[2], TEMP[0].xxxx, CONST[0], TEMP[2] 18: MAD TEMP[2], TEMP[0].zzzz, CONST[2], TEMP[2] 19: MAD TEMP[2], TEMP[0].wwww, CONST[3], TEMP[2] 20: RCP TEMP[3].x, TEMP[2].wwww 21: MOV TEMP[0].w, TEMP[3].xxxx 22: MUL TEMP[3].xy, TEMP[3].xxxx, TEMP[2] 23: MOV TEMP[3].xy, TEMP[3].xyxx 24: MOV TEMP[4], TEMP[2] 25: ADD TEMP[5].xy, TEMP[0], CONST[8].zwzw 26: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 27: MOV TEMP[5].zw, TEMP[5].wwzw 28: MUL TEMP[6].xy, TEMP[0].yyyy, CONST[5] 29: MOV TEMP[2].xy, TEMP[6].xyxx 30: MAD TEMP[6].xy, TEMP[0].xxxx, CONST[4], TEMP[2] 31: MOV TEMP[2].xy, TEMP[6].xyxx 32: MAD TEMP[6].xy, TEMP[0].zzzz, CONST[6], TEMP[2] 33: MOV TEMP[2].xy, TEMP[6].xyxx 34: MOV TEMP[6].xyz, TEMP[0].xyzx 35: ADD TEMP[2].xy, TEMP[2], CONST[7] 36: MOV TEMP[0].xy, TEMP[2].xyxx 37: MAD TEMP[0].xy, TEMP[0], IMM[0].yzww, IMM[0].zzzz 38: MOV TEMP[0].xy, TEMP[0].xyxx 39: MOV TEMP[5].xy, IN[1].xyxx 40: MOV TEMP[6].w, IMM[1].xxxx 41: MOV TEMP[3].zw, IMM[1].xxyx 42: MOV TEMP[0].zw, IMM[1].xxyx 43: MOV OUT[2], TEMP[5] 44: MOV OUT[3], TEMP[6] 45: MOV OUT[0], TEMP[4] 46: MOV OUT[1], TEMP[1] 47: MOV OUT[4], TEMP[3] 48: MOV OUT[5], TEMP[0] 49: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0 %66 = add i32 %5, %7 %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %66) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %45, %75 %78 = fmul float %46, %75 %79 = fmul float %47, %75 %80 = fmul float %74, %41 %81 = fadd float %80, %77 %82 = fmul float %74, %42 %83 = fadd float %82, %78 %84 = fmul float %74, %43 %85 = fadd float %84, %79 %86 = fmul float %76, %49 %87 = fadd float %86, %81 %88 = fmul float %76, %50 %89 = fadd float %88, %83 %90 = fmul float %76, %51 %91 = fadd float %90, %85 %92 = fmul float %87, %87 %93 = fmul float %89, %89 %94 = fadd float %93, %92 %95 = fmul float %91, %91 %96 = fadd float %94, %95 %97 = call float @llvm.maxnum.f32(float %96, float 0x3E7AD7F2A0000000) %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) %99 = fmul float %98, %87 %100 = fmul float %98, %89 %101 = fmul float %98, %91 %102 = fmul float %45, %62 %103 = fmul float %46, %62 %104 = fmul float %47, %62 %105 = fmul float %48, %62 %106 = fmul float %61, %41 %107 = fadd float %106, %102 %108 = fmul float %61, %42 %109 = fadd float %108, %103 %110 = fmul float %61, %43 %111 = fadd float %110, %104 %112 = fmul float %61, %44 %113 = fadd float %112, %105 %114 = fmul float %63, %49 %115 = fadd float %114, %107 %116 = fmul float %63, %50 %117 = fadd float %116, %109 %118 = fmul float %63, %51 %119 = fadd float %118, %111 %120 = fmul float %63, %52 %121 = fadd float %120, %113 %122 = fadd float %115, %53 %123 = fadd float %117, %54 %124 = fadd float %119, %55 %125 = fadd float %121, %56 %126 = fmul float %123, %17 %127 = fmul float %123, %18 %128 = fmul float %123, %19 %129 = fmul float %123, %20 %130 = fmul float %122, %13 %131 = fadd float %130, %126 %132 = fmul float %122, %14 %133 = fadd float %132, %127 %134 = fmul float %122, %15 %135 = fadd float %134, %128 %136 = fmul float %122, %16 %137 = fadd float %136, %129 %138 = fmul float %124, %21 %139 = fadd float %138, %131 %140 = fmul float %124, %22 %141 = fadd float %140, %133 %142 = fmul float %124, %23 %143 = fadd float %142, %135 %144 = fmul float %124, %24 %145 = fadd float %144, %137 %146 = fmul float %125, %25 %147 = fadd float %146, %139 %148 = fmul float %125, %26 %149 = fadd float %148, %141 %150 = fmul float %125, %27 %151 = fadd float %150, %143 %152 = fmul float %125, %28 %153 = fadd float %152, %145 %154 = fdiv float 1.000000e+00, %153 %155 = fmul float %154, %147 %156 = fmul float %154, %149 %157 = fadd float %122, %39 %158 = fadd float %123, %40 %159 = fmul float %157, %37 %160 = fmul float %158, %38 %161 = fmul float %123, %31 %162 = fmul float %123, %32 %163 = fmul float %122, %29 %164 = fadd float %163, %161 %165 = fmul float %122, %30 %166 = fadd float %165, %162 %167 = fmul float %124, %33 %168 = fadd float %167, %164 %169 = fmul float %124, %34 %170 = fadd float %169, %166 %171 = fadd float %168, %35 %172 = fadd float %170, %36 %173 = fmul float %171, 5.000000e-01 %174 = fadd float %173, -5.000000e-01 %175 = fmul float %172, -5.000000e-01 %176 = fadd float %175, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %99, float %100, float %101, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %68, float %69, float %159, float %160) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %122, float %123, float %124, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %155, float %156, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %174, float %176, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %147, float %149, float %151, float %153) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x29 ; C2040129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v2 ; 100A0408 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s9, v5 ; D2820005 04141301 s_buffer_load_dword s10, s[0:3], 0x2d ; C205012D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s10, v5 ; D2820005 04141503 s_buffer_load_dword s11, s[0:3], 0x28 ; C2058128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s11, v2 ; 100C040B s_buffer_load_dword s16, s[0:3], 0x24 ; C2080124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s16, v6 ; D2820006 04182101 s_buffer_load_dword s17, s[0:3], 0x2c ; C208812C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s17, v6 ; D2820006 04182303 v_mul_f32_e32 v7, v6, v6 ; 100E0D06 v_mad_f32 v7, v5, v5, v7 ; D2820007 041E0B05 s_buffer_load_dword s18, s[0:3], 0x2a ; C209012A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s18, v2 ; 10100412 s_buffer_load_dword s19, s[0:3], 0x26 ; C2098126 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v1, s19, v8 ; D2820008 04202701 s_buffer_load_dword s20, s[0:3], 0x2e ; C20A012E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v3, s20, v8 ; D2820001 04202903 v_mad_f32 v2, v1, v1, v7 ; D2820002 041E0301 v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mul_f32_e32 v3, v5, v2 ; 10060505 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mov_b32_e32 v4, 0 ; 7E080280 exp 15, 32, 0, 0, 0, v2, v3, v1, v4 ; F800020F 04010302 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v1, s8, v6 ; 10020C08 v_mad_f32 v1, v5, s9, v1 ; D2820001 04041305 v_mad_f32 v1, v7, s10, v1 ; D2820001 04041507 s_buffer_load_dword s8, s[0:3], 0x31 ; C2040131 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s8, v1 ; 06020208 s_buffer_load_dword s8, s[0:3], 0x23 ; C2040123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s8, v1 ; 06040208 s_buffer_load_dword s8, s[0:3], 0x21 ; C2040121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s8, v2 ; 10040408 v_mul_f32_e32 v3, s11, v6 ; 10060C0B v_mad_f32 v3, v5, s16, v3 ; D2820003 040C2105 v_mad_f32 v3, v7, s17, v3 ; D2820003 040C2307 s_buffer_load_dword s8, s[0:3], 0x30 ; C2040130 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s8, v3 ; 06060608 s_buffer_load_dword s8, s[0:3], 0x22 ; C2040122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v9, s8, v3 ; 06120608 s_buffer_load_dword s8, s[0:3], 0x20 ; C2040120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s8, v9 ; 10121208 buffer_load_format_xyzw v[10:13], v0, s[4:7], 0 idxen ; E00C2000 80010A00 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v10, v11, v9, v2 ; F800021F 02090B0A v_mul_f32_e32 v0, s18, v6 ; 10000C12 v_mad_f32 v0, v5, s19, v0 ; D2820000 04002705 v_mad_f32 v0, v7, s20, v0 ; D2820000 04002907 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v0, s4, v0 ; 06000004 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 34, 0, 0, 0, v3, v1, v0, v2 ; F800022F 02000103 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v9, s4, v1 ; 10120204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v3, s4, v9 ; D2820009 04240903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v0, s4, v9 ; D2820009 04240900 s_buffer_load_dword s4, s[0:3], 0x2b ; C202012B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s4, v6 ; 10140C04 s_buffer_load_dword s4, s[0:3], 0x27 ; C2020127 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v5, s4, v10 ; D282000A 04280905 s_buffer_load_dword s4, s[0:3], 0x2f ; C202012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v7, s4, v10 ; D2820005 04280907 s_buffer_load_dword s4, s[0:3], 0x33 ; C2020133 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v9 ; D2820006 04240905 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v3, s4, v7 ; D2820007 041C0903 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s4, v7 ; D2820007 041C0905 v_rcp_f32_e32 v8, v7 ; 7E105507 v_mul_f32_e32 v9, v6, v8 ; 10121106 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s4, v1 ; 10140204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v3, s4, v10 ; D282000A 04280903 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v0, s4, v10 ; D282000A 04280900 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v5, s4, v10 ; D282000A 04280905 v_mul_f32_e32 v8, v10, v8 ; 1010110A exp 15, 35, 0, 0, 0, v8, v9, v4, v2 ; F800023F 02040908 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v8, s4, v1 ; 10100204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v3, s4, v8 ; D2820008 04200903 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v0, s4, v8 ; D2820008 04200900 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v8, s4, v8 ; 06101004 v_mad_f32 v8, 0.5, v8, -0.5 ; D2820008 03C610F0 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s4, v1 ; 10120204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v3, s4, v9 ; D2820009 04240903 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v0, s4, v9 ; D2820009 04240900 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v9, s4, v9 ; 06121204 v_mad_f32 v9, -0.5, v9, -0.5 ; D2820009 03C612F1 exp 15, 36, 0, 0, 0, v8, v9, v4, v2 ; F800024F 02040908 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v3, s4, v1 ; D2820001 04040903 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s0, v0 ; D2820000 04000105 exp 15, 12, 0, 1, 0, v10, v6, v0, v7 ; F80008CF 0700060A s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[6].x, TEMP[6].xxxx, TEMP[5].yyyy 14: UIF TEMP[6].xxxx :0 15: KILL 16: ENDIF 17: MOV TEMP[5].xyz, IN[0].xyzz 18: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 19: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 20: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 21: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 22: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 23: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 24: MOV TEMP[0].xyz, TEMP[1].xyzx 25: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 26: MOV TEMP[0].xyz, TEMP[1].xyzx 27: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 28: MOV TEMP[3].yzw, TEMP[1].zyzw 29: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 30: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 31: RSQ TEMP[5].x, TEMP[5].xxxx 32: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 33: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 34: MOV TEMP[5].w, IMM[0].wwww 35: MOV TEMP[5].x, TEMP[3].yyyy 36: MOV TEMP[5].y, TEMP[3].zzzz 37: MOV TEMP[5].z, TEMP[3].wwww 38: DP4 TEMP[3].x, TEMP[5], TEMP[5] 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 41: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 42: MOV_SAT TEMP[3].x, TEMP[3].xxxx 43: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 44: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 45: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 46: MOV TEMP[6].xyz, TEMP[5].yzww 47: TEX TEMP[6], TEMP[6], SAMP[4], CUBE 48: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 49: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 50: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 51: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 52: MOV TEMP[4].w, TEMP[7].wwww 53: MOV TEMP[6].xy, IN[1].xyyy 54: TEX TEMP[6], TEMP[6], SAMP[1], 2D 55: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 56: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 57: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 58: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 59: ABS TEMP[6].x, TEMP[8].wwww 60: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 61: MOV TEMP[5].xyz, TEMP[5].yzww 62: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 63: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 64: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 65: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 66: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 67: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 68: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 69: ABS TEMP[3].x, TEMP[3].xxxx 70: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 71: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 72: MOV TEMP[0].w, TEMP[6].wwww 73: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 74: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 75: ABS TEMP[5].x, TEMP[3].xxxx 76: LG2 TEMP[4].x, TEMP[5].xxxx 77: ABS TEMP[5].x, TEMP[3].yyyy 78: LG2 TEMP[5].x, TEMP[5].xxxx 79: MOV TEMP[4].y, TEMP[5].xxxx 80: ABS TEMP[3].x, TEMP[3].zzzz 81: LG2 TEMP[3].x, TEMP[3].xxxx 82: MOV TEMP[4].z, TEMP[3].xxxx 83: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 84: EX2 TEMP[4].x, TEMP[3].xxxx 85: EX2 TEMP[5].x, TEMP[3].yyyy 86: MOV TEMP[4].y, TEMP[5].xxxx 87: EX2 TEMP[3].x, TEMP[3].zzzz 88: MOV TEMP[4].z, TEMP[3].xxxx 89: MOV TEMP[3].xyz, TEMP[4].xyzz 90: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 91: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 92: MOV TEMP[5].xy, TEMP[5].xyyy 93: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 94: MOV TEMP[1].w, TEMP[5].wwww 95: MOV TEMP[6].xy, IN[1].zwww 96: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 97: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 98: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 99: MOV TEMP[6].xyz, TEMP[3].yzww 100: TEX TEMP[6], TEMP[6], SAMP[9], 3D 101: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 102: MOV TEMP[1].xyz, TEMP[3].xyzx 103: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 104: MOV TEMP[0].xyz, TEMP[3].xyzx 105: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 106: MOV TEMP[4].z, TEMP[3].zzzz 107: MOV TEMP[4].xy, IN[2].xyxx 108: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 109: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 110: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 111: EX2 TEMP[5].x, TEMP[5].wwww 112: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 113: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 114: RCP TEMP[3].x, TEMP[3].wwww 115: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 116: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 117: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 118: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 119: EX2 TEMP[3].x, TEMP[3].wwww 120: MOV_SAT TEMP[3].x, TEMP[3].xxxx 121: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 122: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 123: MOV TEMP[4].x, TEMP[5].xxxx 124: MOV TEMP[4].y, CONST[4].wwww 125: MOV TEMP[4].xy, TEMP[4].xyyy 126: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 127: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 128: MOV TEMP[0].w, TEMP[3].wwww 129: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 130: MOV TEMP[0].xyz, TEMP[0].xyzx 131: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 132: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 133: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 134: UIF TEMP[3].xxxx :0 135: MOV TEMP[2].x, TEMP[2].xxxx 136: ELSE :0 137: MOV TEMP[2].x, TEMP[1].xxxx 138: ENDIF 139: MOV TEMP[0].w, TEMP[2].xxxx 140: MOV OUT[0], TEMP[0] 141: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = icmp ne i32 %128, 0 br i1 %129, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %130 = insertelement <4 x float> undef, float %78, i32 0 %131 = insertelement <4 x float> %130, float %79, i32 1 %132 = insertelement <4 x float> %131, float %80, i32 2 %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 3 %134 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %133) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = extractelement <4 x float> %134, i32 3 %139 = call float @fabs(float %137) %140 = fdiv float 1.000000e+00, %139 %141 = fmul float %135, %140 %142 = fadd float %141, 1.500000e+00 %143 = fmul float %136, %140 %144 = fadd float %143, 1.500000e+00 %145 = bitcast float %144 to i32 %146 = bitcast float %142 to i32 %147 = bitcast float %138 to i32 %148 = insertelement <4 x i32> undef, i32 %145, i32 0 %149 = insertelement <4 x i32> %148, i32 %146, i32 1 %150 = insertelement <4 x i32> %149, i32 %147, i32 2 %151 = insertelement <4 x i32> %150, i32 undef, i32 3 %152 = bitcast <8 x i32> %47 to <32 x i8> %153 = bitcast <4 x i32> %49 to <16 x i8> %154 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %151, <32 x i8> %152, <16 x i8> %153, i32 4) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = extractelement <4 x float> %154, i32 3 %159 = call float @llvm.pow.f32(float %155, float 0x40019999A0000000) %160 = call float @llvm.pow.f32(float %156, float 0x40019999A0000000) %161 = call float @llvm.pow.f32(float %157, float 0x40019999A0000000) %162 = call float @llvm.pow.f32(float %158, float 1.000000e+00) %163 = fmul float %102, %159 %164 = fmul float %103, %160 %165 = fmul float %104, %161 %166 = fadd float %163, %163 %167 = fadd float %164, %164 %168 = fadd float %165, %165 %169 = fsub float -0.000000e+00, %85 %170 = fadd float %24, %169 %171 = fsub float -0.000000e+00, %86 %172 = fadd float %25, %171 %173 = fsub float -0.000000e+00, %87 %174 = fadd float %26, %173 %175 = fmul float %170, %170 %176 = fmul float %172, %172 %177 = fadd float %176, %175 %178 = fmul float %174, %174 %179 = fadd float %177, %178 %180 = call float @llvm.maxnum.f32(float %179, float 0x3E7AD7F2A0000000) %181 = call float @llvm.AMDGPU.rsq.clamped.f32(float %180) %182 = fmul float %181, %170 %183 = fmul float %181, %172 %184 = fmul float %181, %174 %185 = fmul float %170, %181 %186 = fadd float %185, 0x3FAB15B580000000 %187 = fmul float %172, %181 %188 = fadd float %187, 0x3FEB126EA0000000 %189 = fmul float %174, %181 %190 = fadd float %189, 0x3FE0ED9160000000 %191 = fmul float %186, %186 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = fmul float %190, %190 %195 = fadd float %193, %194 %196 = fmul float 0.000000e+00, 0.000000e+00 %197 = fadd float %195, %196 %198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197) %199 = fmul float %186, %198 %200 = fmul float %188, %198 %201 = fmul float %190, %198 %202 = fmul float %199, %78 %203 = fmul float %200, %79 %204 = fadd float %203, %202 %205 = fmul float %201, %80 %206 = fadd float %204, %205 %207 = call float @llvm.AMDIL.clamp.(float %206, float 0.000000e+00, float 1.000000e+00) %208 = fmul float %78, %182 %209 = fmul float %79, %183 %210 = fadd float %209, %208 %211 = fmul float %80, %184 %212 = fadd float %210, %211 %213 = fadd float %212, %212 %214 = fsub float -0.000000e+00, %182 %215 = fmul float %213, %78 %216 = fadd float %215, %214 %217 = fsub float -0.000000e+00, %183 %218 = fmul float %213, %79 %219 = fadd float %218, %217 %220 = fsub float -0.000000e+00, %184 %221 = fmul float %213, %80 %222 = fadd float %221, %220 %223 = insertelement <4 x float> undef, float %216, i32 0 %224 = insertelement <4 x float> %223, float %219, i32 1 %225 = insertelement <4 x float> %224, float %222, i32 2 %226 = insertelement <4 x float> %225, float %162, i32 3 %227 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %226) %228 = extractelement <4 x float> %227, i32 0 %229 = extractelement <4 x float> %227, i32 1 %230 = extractelement <4 x float> %227, i32 2 %231 = extractelement <4 x float> %227, i32 3 %232 = call float @fabs(float %230) %233 = fdiv float 1.000000e+00, %232 %234 = fmul float %228, %233 %235 = fadd float %234, 1.500000e+00 %236 = fmul float %229, %233 %237 = fadd float %236, 1.500000e+00 %238 = bitcast float %237 to i32 %239 = bitcast float %235 to i32 %240 = bitcast float %231 to i32 %241 = insertelement <4 x i32> undef, i32 %238, i32 0 %242 = insertelement <4 x i32> %241, i32 %239, i32 1 %243 = insertelement <4 x i32> %242, i32 %240, i32 2 %244 = insertelement <4 x i32> %243, i32 undef, i32 3 %245 = bitcast <8 x i32> %55 to <32 x i8> %246 = bitcast <4 x i32> %57 to <16 x i8> %247 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %244, <32 x i8> %245, <16 x i8> %246, i32 4) %248 = extractelement <4 x float> %247, i32 0 %249 = extractelement <4 x float> %247, i32 1 %250 = extractelement <4 x float> %247, i32 2 %251 = call float @llvm.pow.f32(float %248, float 0x40019999A0000000) %252 = call float @llvm.pow.f32(float %249, float 0x40019999A0000000) %253 = call float @llvm.pow.f32(float %250, float 0x40019999A0000000) %254 = bitcast float %81 to i32 %255 = bitcast float %82 to i32 %256 = insertelement <2 x i32> undef, i32 %254, i32 0 %257 = insertelement <2 x i32> %256, i32 %255, i32 1 %258 = bitcast <8 x i32> %43 to <32 x i8> %259 = bitcast <4 x i32> %45 to <16 x i8> %260 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %257, <32 x i8> %258, <16 x i8> %259, i32 2) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = extractelement <4 x float> %260, i32 3 %265 = call float @llvm.pow.f32(float %261, float 0x40019999A0000000) %266 = call float @llvm.pow.f32(float %262, float 0x40019999A0000000) %267 = call float @llvm.pow.f32(float %263, float 0x40019999A0000000) %268 = call float @llvm.pow.f32(float %264, float 1.000000e+00) %269 = call float @fabs(float %268) %270 = call float @llvm.pow.f32(float %269, float 0x40019999A0000000) %271 = insertelement <4 x float> undef, float %216, i32 0 %272 = insertelement <4 x float> %271, float %219, i32 1 %273 = insertelement <4 x float> %272, float %222, i32 2 %274 = insertelement <4 x float> %273, float %222, i32 3 %275 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %274) %276 = extractelement <4 x float> %275, i32 0 %277 = extractelement <4 x float> %275, i32 1 %278 = extractelement <4 x float> %275, i32 2 %279 = extractelement <4 x float> %275, i32 3 %280 = call float @fabs(float %278) %281 = fdiv float 1.000000e+00, %280 %282 = fmul float %276, %281 %283 = fadd float %282, 1.500000e+00 %284 = fmul float %277, %281 %285 = fadd float %284, 1.500000e+00 %286 = bitcast float %285 to i32 %287 = bitcast float %283 to i32 %288 = bitcast float %279 to i32 %289 = insertelement <4 x i32> undef, i32 %286, i32 0 %290 = insertelement <4 x i32> %289, i32 %287, i32 1 %291 = insertelement <4 x i32> %290, i32 %288, i32 2 %292 = insertelement <4 x i32> %291, i32 undef, i32 3 %293 = bitcast <8 x i32> %51 to <32 x i8> %294 = bitcast <4 x i32> %53 to <16 x i8> %295 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %292, <32 x i8> %293, <16 x i8> %294, i32 4) %296 = extractelement <4 x float> %295, i32 0 %297 = extractelement <4 x float> %295, i32 1 %298 = extractelement <4 x float> %295, i32 2 %299 = call float @llvm.pow.f32(float %296, float 0x40019999A0000000) %300 = call float @llvm.pow.f32(float %297, float 0x40019999A0000000) %301 = call float @llvm.pow.f32(float %298, float 0x40019999A0000000) %302 = call float @llvm.AMDGPU.lrp(float %270, float %299, float %251) %303 = call float @llvm.AMDGPU.lrp(float %270, float %300, float %252) %304 = call float @llvm.AMDGPU.lrp(float %270, float %301, float %253) %305 = fmul float %270, 1.990000e+02 %306 = fadd float %305, 1.000000e+00 %307 = call float @fabs(float %207) %308 = call float @llvm.pow.f32(float %307, float %306) %309 = fmul float %306, 0x3FB99999A0000000 %310 = fmul float %309, %308 %311 = fadd float %310, %302 %312 = fmul float %309, %308 %313 = fadd float %312, %303 %314 = fmul float %309, %308 %315 = fadd float %314, %304 %316 = fmul float %311, %265 %317 = fadd float %316, %166 %318 = fmul float %313, %266 %319 = fadd float %318, %167 %320 = fmul float %315, %267 %321 = fadd float %320, %168 %322 = call float @fabs(float %317) %323 = call float @llvm.log2.f32(float %322) %324 = call float @fabs(float %319) %325 = call float @llvm.log2.f32(float %324) %326 = call float @fabs(float %321) %327 = call float @llvm.log2.f32(float %326) %328 = fmul float %323, 0x3FDD1743E0000000 %329 = fmul float %325, 0x3FDD1743E0000000 %330 = fmul float %327, 0x3FDD1743E0000000 %331 = call float @llvm.AMDIL.exp.(float %328) %332 = call float @llvm.AMDIL.exp.(float %329) %333 = call float @llvm.AMDIL.exp.(float %330) %334 = bitcast float %331 to i32 %335 = bitcast float %332 to i32 %336 = bitcast float %333 to i32 %337 = insertelement <4 x i32> undef, i32 %334, i32 0 %338 = insertelement <4 x i32> %337, i32 %335, i32 1 %339 = insertelement <4 x i32> %338, i32 %336, i32 2 %340 = insertelement <4 x i32> %339, i32 undef, i32 3 %341 = bitcast <8 x i32> %63 to <32 x i8> %342 = bitcast <4 x i32> %65 to <16 x i8> %343 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %340, <32 x i8> %341, <16 x i8> %342, i32 3) %344 = extractelement <4 x float> %343, i32 0 %345 = extractelement <4 x float> %343, i32 1 %346 = extractelement <4 x float> %343, i32 2 %347 = fmul float %89, 1.000000e+00 %348 = fadd float %347, 0.000000e+00 %349 = fmul float %90, -1.000000e+00 %350 = fadd float %349, 1.000000e+00 %351 = bitcast float %348 to i32 %352 = bitcast float %350 to i32 %353 = insertelement <2 x i32> undef, i32 %351, i32 0 %354 = insertelement <2 x i32> %353, i32 %352, i32 1 %355 = bitcast <8 x i32> %67 to <32 x i8> %356 = bitcast <4 x i32> %69 to <16 x i8> %357 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %354, <32 x i8> %355, <16 x i8> %356, i32 2) %358 = extractelement <4 x float> %357, i32 0 %359 = extractelement <4 x float> %357, i32 2 %360 = bitcast float %83 to i32 %361 = bitcast float %84 to i32 %362 = insertelement <2 x i32> undef, i32 %360, i32 0 %363 = insertelement <2 x i32> %362, i32 %361, i32 1 %364 = bitcast <8 x i32> %71 to <32 x i8> %365 = bitcast <4 x i32> %73 to <16 x i8> %366 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %363, <32 x i8> %364, <16 x i8> %365, i32 2) %367 = extractelement <4 x float> %366, i32 0 %368 = extractelement <4 x float> %366, i32 1 %369 = extractelement <4 x float> %366, i32 2 %370 = call float @llvm.AMDGPU.lrp(float %358, float %344, float %367) %371 = call float @llvm.AMDGPU.lrp(float %358, float %345, float %368) %372 = call float @llvm.AMDGPU.lrp(float %358, float %346, float %369) %373 = fsub float -0.000000e+00, %358 %374 = fmul float %359, %373 %375 = fadd float %374, %358 %376 = bitcast float %370 to i32 %377 = bitcast float %371 to i32 %378 = bitcast float %372 to i32 %379 = insertelement <4 x i32> undef, i32 %376, i32 0 %380 = insertelement <4 x i32> %379, i32 %377, i32 1 %381 = insertelement <4 x i32> %380, i32 %378, i32 2 %382 = insertelement <4 x i32> %381, i32 undef, i32 3 %383 = bitcast <8 x i32> %75 to <32 x i8> %384 = bitcast <4 x i32> %77 to <16 x i8> %385 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %382, <32 x i8> %383, <16 x i8> %384, i32 3) %386 = extractelement <4 x float> %385, i32 0 %387 = extractelement <4 x float> %385, i32 1 %388 = extractelement <4 x float> %385, i32 2 %389 = call float @llvm.AMDGPU.lrp(float %375, float %386, float %370) %390 = call float @llvm.AMDGPU.lrp(float %375, float %387, float %371) %391 = call float @llvm.AMDGPU.lrp(float %375, float %388, float %372) %392 = fsub float -0.000000e+00, %389 %393 = fadd float %392, %30 %394 = fsub float -0.000000e+00, %390 %395 = fadd float %394, %31 %396 = fsub float -0.000000e+00, %391 %397 = fadd float %396, %32 %398 = fmul float %37, %87 %399 = fsub float -0.000000e+00, %33 %400 = fadd float %85, %399 %401 = fsub float -0.000000e+00, %34 %402 = fadd float %86, %401 %403 = fsub float -0.000000e+00, %35 %404 = fadd float %398, %403 %405 = fmul float %404, %27 %406 = fmul float %405, 0x3FF7154CA0000000 %407 = call float @llvm.AMDIL.exp.(float %406) %408 = fsub float -0.000000e+00, %407 %409 = fadd float %408, 1.000000e+00 %410 = fmul float %400, %400 %411 = fmul float %402, %402 %412 = fadd float %411, %410 %413 = fmul float %404, %404 %414 = fadd float %412, %413 %415 = fdiv float 1.000000e+00, %404 %416 = fmul float %414, %28 %417 = fmul float %409, %416 %418 = fmul float %415, %417 %419 = fmul float %418, 0x3FF7154CA0000000 %420 = call float @llvm.AMDIL.exp.(float %419) %421 = call float @llvm.AMDIL.clamp.(float %420, float 0.000000e+00, float 1.000000e+00) %422 = fsub float -0.000000e+00, %421 %423 = fadd float %422, 1.000000e+00 %424 = fmul float %88, 5.000000e-01 %425 = fadd float %424, 5.000000e-01 %426 = bitcast float %425 to i32 %427 = bitcast float %29 to i32 %428 = insertelement <2 x i32> undef, i32 %426, i32 0 %429 = insertelement <2 x i32> %428, i32 %427, i32 1 %430 = bitcast <8 x i32> %59 to <32 x i8> %431 = bitcast <4 x i32> %61 to <16 x i8> %432 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %429, <32 x i8> %430, <16 x i8> %431, i32 2) %433 = extractelement <4 x float> %432, i32 0 %434 = fmul float %423, %433 %435 = fmul float %434, %393 %436 = fadd float %435, %389 %437 = fmul float %434, %395 %438 = fadd float %437, %390 %439 = fmul float %434, %397 %440 = fadd float %439, %391 %441 = fmul float %107, 5.000000e-01 %442 = fsub float -0.000000e+00, %36 %443 = fadd float %442, %87 %444 = fcmp oge float %443, 0.000000e+00 %445 = sext i1 %444 to i32 %446 = bitcast i32 %445 to float %447 = bitcast float %446 to i32 %448 = icmp ne i32 %447, 0 %. = select i1 %448, float %107, float %441 %449 = call i32 @llvm.SI.packf16(float %436, float %438) %450 = bitcast i32 %449 to float %451 = call i32 @llvm.SI.packf16(float %440, float %.) %452 = bitcast i32 %451 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %450, float %452, float %450, float %452) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 vcc, s[6:7] ; BEEA0406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v16, v0, 1, 1, [m0] ; C8400500 v_interp_p2_f32 v16, [v16], v1, 1, 1, [m0] ; C8410501 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], vcc, 0x0 ; C0C66B00 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[12:19], s[8:11] ; F0800F00 0043020F s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v6, v4 ; 7E0C4F04 v_mov_b32_e32 v8, 0x400ccccd ; 7E1002FF 400CCCCD v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v14, v6 ; 7E1C4B06 v_log_f32_e32 v6, v3 ; 7E0C4F03 v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v10, v6 ; 7E144B06 v_log_f32_e32 v6, v2 ; 7E0C4F02 v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v7, v6 ; 7E0E4B06 v_log_f32_e32 v2, v5 ; 7E044F05 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_and_b32_e32 v2, 0x7fffffff, v2 ; 360404FF 7FFFFFFF v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, v8, v2 ; 0E040508 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mov_b32_e32 v3, 0xbdcccccd ; 7E0602FF BDCCCCCD v_add_f32_e32 v3, v2, v3 ; 06060702 v_cmp_lt_f32_e64 s[0:1], v3, 0 ; D0020000 00010103 v_interp_p1_f32 v8, v0, 1, 4, [m0] ; C8201100 v_interp_p2_f32 v8, [v8], v1, 1, 4, [m0] ; C8211101 v_interp_p1_f32 v9, v0, 0, 4, [m0] ; C8241000 v_interp_p2_f32 v9, [v9], v1, 0, 4, [m0] ; C8251001 v_interp_p1_f32 v4, v0, 1, 3, [m0] ; C8100D00 v_interp_p2_f32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 v_interp_p1_f32 v3, v0, 2, 2, [m0] ; C80C0A00 v_interp_p2_f32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v12, v0, 3, 1, [m0] ; C8300700 v_interp_p2_f32 v12, [v12], v1, 3, 1, [m0] ; C8310701 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v19, v0, 2, 0, [m0] ; C84C0200 v_interp_p2_f32 v19, [v19], v1, 2, 0, [m0] ; C84D0201 v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100 v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101 v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000 v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001 s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[16:19], 0x20 ; C2011120 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s2, 3 ; 04690602 s_buffer_load_dword s2, s[16:19], 0x1c ; C201111C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s2, 0 ; 04690002 s_buffer_load_dword s2, s[16:19], 0x1a ; C201111A s_buffer_load_dword s100, s[16:19], 0x19 ; C2321119 s_buffer_load_dword s101, s[16:19], 0x18 ; C2329118 s_buffer_load_dword s3, s[16:19], 0x16 ; C2019116 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s3, 1 ; 04690203 s_buffer_load_dword s12, s[16:19], 0x15 ; C2061115 s_buffer_load_dword s3, s[16:19], 0x14 ; C2019114 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s3, 2 ; 04690403 s_buffer_load_dword s3, s[16:19], 0x13 ; C2019113 s_buffer_load_dword s6, s[16:19], 0x11 ; C2031111 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s6, 4 ; 04690806 s_buffer_load_dword s6, s[16:19], 0x10 ; C2031110 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s6, 5 ; 04690A06 s_buffer_load_dword s13, s[16:19], 0xe ; C206910E s_buffer_load_dword s14, s[16:19], 0xd ; C207110D s_buffer_load_dword s15, s[16:19], 0xc ; C207910C v_mov_b32_e32 v13, s2 ; 7E1A0202 v_mov_b32_e32 v1, s3 ; 7E020203 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[0:1], s[0:1] ; BE802400 s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_mov_b32_e32 v20, 0 ; 7E280280 v_cubeid_f32 v24, v17, v18, v19 ; D2880018 044E2511 v_cubema_f32 v23, v17, v18, v19 ; D28E0017 044E2511 v_cubesc_f32 v22, v17, v18, v19 ; D28A0016 044E2511 v_cubetc_f32 v21, v17, v18, v19 ; D28C0015 044E2511 v_rcp_f32_e64 v29, |v23| ; D354011D 00000117 v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000 v_mad_f32 v23, v21, v29, v30 ; D2820017 047A3B15 v_mad_f32 v22, v22, v29, v30 ; D2820016 047A3B16 s_load_dwordx4 s[80:83], s[4:5], 0x4 ; C0A80504 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx4 s[68:71], s[4:5], 0xc ; C0A2050C s_load_dwordx4 s[84:87], s[4:5], 0x10 ; C0AA0510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s0, 6 ; 04690C00 v_writelane_b32 v52, s1, 7 ; 04690E01 v_writelane_b32 v52, s2, 8 ; 04691002 v_writelane_b32 v52, s3, 9 ; 04691203 s_load_dwordx4 s[40:43], s[4:5], 0x18 ; C0940518 s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C s_load_dwordx4 s[16:19], s[4:5], 0x20 ; C0880520 s_load_dwordx4 s[96:99], s[4:5], 0x24 ; C0B00524 s_load_dwordx8 s[88:95], vcc, 0x8 ; C0EC6B08 s_load_dwordx8 s[0:7], vcc, 0x10 ; C0C06B10 s_load_dwordx8 s[72:79], vcc, 0x18 ; C0E46B18 s_load_dwordx8 s[28:35], vcc, 0x20 ; C0CE6B20 s_load_dwordx8 s[20:27], vcc, 0x28 ; C0CA6B28 s_load_dwordx8 s[60:67], vcc, 0x30 ; C0DE6B30 s_load_dwordx8 s[52:59], vcc, 0x38 ; C0DA6B38 s_load_dwordx8 s[44:51], vcc, 0x40 ; C0D66B40 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[0:7], s[8:11] ; F0800F00 00401516 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v25, v24 ; 7E324F18 v_mul_legacy_f32_e32 v25, 1.0, v25 ; 0E3232F2 v_exp_f32_e32 v28, v25 ; 7E384B19 v_sub_f32_e32 v29, s14, v5 ; 083A0A0E v_sub_f32_e32 v31, s15, v6 ; 083E0C0F v_mul_f32_e32 v32, v31, v31 ; 10403F1F v_mad_f32 v32, v29, v29, v32 ; D2820020 04823B1D v_sub_f32_e32 v33, s13, v3 ; 0842060D v_mad_f32 v32, v33, v33, v32 ; D2820020 04824321 v_max_f32_e32 v32, 0x33d6bf95, v32 ; 204040FF 33D6BF95 v_rsq_clamp_f32_e32 v32, v32 ; 7E405920 v_mul_f32_e32 v34, v29, v32 ; 1044411D v_mul_f32_e32 v35, v31, v32 ; 1046411F v_mul_f32_e32 v36, v35, v17 ; 10482323 v_mad_f32 v36, v18, v34, v36 ; D2820024 04924512 v_mul_f32_e32 v37, v33, v32 ; 104A4121 v_mad_f32 v36, v19, v37, v36 ; D2820024 04924B13 v_add_f32_e32 v36, v36, v36 ; 06484924 v_mad_f32 v27, v36, v19, -v37 ; D282001B 84962724 v_mad_f32 v26, v36, v18, -v34 ; D282001A 848A2524 v_mad_f32 v25, v36, v17, -v35 ; D2820019 848E2324 v_cubeid_f32 v37, v25, v26, v27 ; D2880025 046E3519 v_cubema_f32 v36, v25, v26, v27 ; D28E0024 046E3519 v_cubesc_f32 v35, v25, v26, v27 ; D28A0023 046E3519 v_cubetc_f32 v34, v25, v26, v27 ; D28C0022 046E3519 v_rcp_f32_e64 v42, |v36| ; D354012A 00000124 v_mad_f32 v36, v34, v42, v30 ; D2820024 047A5522 v_mad_f32 v35, v35, v42, v30 ; D2820023 047A5523 image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[28:35], s[84:87] ; F0800700 02A72223 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v37, v36 ; 7E4A4F24 v_mov_b32_e32 v38, 0x400ccccd ; 7E4C02FF 400CCCCD v_mul_legacy_f32_e32 v37, v38, v37 ; 0E4A4B26 v_exp_f32_e32 v37, v37 ; 7E4A4B25 image_sample v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[88:95], s[80:83] ; F0800F00 0296270F s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v15, v42 ; 7E1E4F2A v_mul_legacy_f32_e32 v15, 1.0, v15 ; 0E1E1EF2 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mov_b32_e32 v16, 0x7fffffff ; 7E2002FF 7FFFFFFF v_and_b32_e32 v15, v15, v16 ; 361E210F v_log_f32_e32 v15, v15 ; 7E1E4F0F v_mul_legacy_f32_e32 v15, v38, v15 ; 0E1E1F26 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_sub_f32_e32 v43, 1.0, v15 ; 08561EF2 v_mul_f32_e32 v37, v37, v43 ; 104A5725 v_mov_b32_e32 v28, v27 ; 7E38031B v_cubeid_f32 v47, v25, v26, v27 ; D288002F 046E3519 v_cubema_f32 v46, v25, v26, v27 ; D28E002E 046E3519 v_cubesc_f32 v45, v25, v26, v27 ; D28A002D 046E3519 v_cubetc_f32 v44, v25, v26, v27 ; D28C002C 046E3519 v_rcp_f32_e64 v25, |v46| ; D3540119 0000012E v_mad_f32 v46, v44, v25, v30 ; D282002E 047A332C v_mad_f32 v45, v45, v25, v30 ; D282002D 047A332D image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[72:79], s[68:71] ; F0800700 0232192D s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v28, v27 ; 7E384F1B v_mul_legacy_f32_e32 v28, v38, v28 ; 0E383926 v_exp_f32_e32 v28, v28 ; 7E384B1C v_mad_f32 v28, v15, v28, v37 ; D282001C 0496390F v_mov_b32_e32 v30, 0x3d58adac ; 7E3C02FF 3D58ADAC v_mad_f32 v30, v31, v32, v30 ; D282001E 047A411F v_mov_b32_e32 v31, 0x3f589375 ; 7E3E02FF 3F589375 v_mad_f32 v29, v29, v32, v31 ; D282001D 047E411D v_mul_f32_e32 v31, v29, v29 ; 103E3B1D v_mad_f32 v31, v30, v30, v31 ; D282001F 047E3D1E v_mov_b32_e32 v37, 0x3f076c8b ; 7E4A02FF 3F076C8B v_mad_f32 v32, v33, v32, v37 ; D2820020 04964121 v_mad_f32 v31, v32, v32, v31 ; D282001F 047E4120 v_add_f32_e32 v31, 0, v31 ; 063E3E80 v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F v_mul_f32_e32 v29, v31, v29 ; 103A3B1F v_mul_f32_e32 v30, v31, v30 ; 103C3D1F v_mul_f32_e32 v30, v17, v30 ; 103C3D11 v_mad_f32 v29, v29, v18, v30 ; D282001D 047A251D v_mul_f32_e32 v30, v31, v32 ; 103C411F v_mad_f32 v17, v30, v19, v29 ; D2820011 0476271E v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 v_and_b32_e32 v16, v17, v16 ; 36202111 v_log_f32_e32 v16, v16 ; 7E204F10 v_mov_b32_e32 v17, 0x43470000 ; 7E2202FF 43470000 v_mad_f32 v17, v17, v15, 1.0 ; D2820011 03CA1F11 v_mul_legacy_f32_e32 v16, v17, v16 ; 0E202111 v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v17, 0x3dcccccd, v17 ; 102222FF 3DCCCCCD v_mad_f32 v18, v17, v16, v28 ; D2820012 04722111 v_log_f32_e32 v19, v41 ; 7E264F29 v_mul_legacy_f32_e32 v19, v38, v19 ; 0E262726 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v18, v19, v18 ; 10242513 v_log_f32_e32 v19, v23 ; 7E264F17 v_mul_legacy_f32_e32 v19, v38, v19 ; 0E262726 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v14, v19, v14 ; 101C1D13 v_mad_f32 v14, 2.0, v14, v18 ; D282000E 044A1CF4 v_log_f32_e64 v14, |v14| ; D34E010E 0000010E v_mul_f32_e32 v14, 0x3ee8ba1f, v14 ; 101C1CFF 3EE8BA1F v_exp_f32_e32 v30, v14 ; 7E3C4B0E v_log_f32_e32 v14, v35 ; 7E1C4F23 v_mul_legacy_f32_e32 v14, v38, v14 ; 0E1C1D26 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v14, v14, v43 ; 101C570E v_log_f32_e32 v18, v26 ; 7E244F1A v_mul_legacy_f32_e32 v18, v38, v18 ; 0E242526 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mad_f32 v14, v15, v18, v14 ; D282000E 043A250F v_mad_f32 v14, v17, v16, v14 ; D282000E 043A2111 v_log_f32_e32 v18, v40 ; 7E244F28 v_mul_legacy_f32_e32 v18, v38, v18 ; 0E242526 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v14, v18, v14 ; 101C1D12 v_log_f32_e32 v18, v22 ; 7E244F16 v_mul_legacy_f32_e32 v18, v38, v18 ; 0E242526 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v10, v18, v10 ; 10141512 v_mad_f32 v10, 2.0, v10, v14 ; D282000A 043A14F4 v_log_f32_e64 v10, |v10| ; D34E010A 0000010A v_mul_f32_e32 v10, 0x3ee8ba1f, v10 ; 101414FF 3EE8BA1F v_exp_f32_e32 v29, v10 ; 7E3A4B0A v_log_f32_e32 v10, v34 ; 7E144F22 v_mul_legacy_f32_e32 v10, v38, v10 ; 0E141526 v_exp_f32_e32 v10, v10 ; 7E144B0A v_mul_f32_e32 v10, v10, v43 ; 1014570A v_log_f32_e32 v14, v25 ; 7E1C4F19 v_mul_legacy_f32_e32 v14, v38, v14 ; 0E1C1D26 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mad_f32 v10, v15, v14, v10 ; D282000A 042A1D0F v_mad_f32 v10, v17, v16, v10 ; D282000A 042A2111 v_log_f32_e32 v14, v39 ; 7E1C4F27 v_mul_legacy_f32_e32 v14, v38, v14 ; 0E1C1D26 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v10, v14, v10 ; 1014150E v_log_f32_e32 v14, v21 ; 7E1C4F15 v_mul_legacy_f32_e32 v14, v38, v14 ; 0E1C1D26 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v7, v14, v7 ; 100E0F0E v_mad_f32 v7, 2.0, v7, v10 ; D2820007 042A0EF4 v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_mul_f32_e32 v7, 0x3ee8ba1f, v7 ; 100E0EFF 3EE8BA1F v_exp_f32_e32 v28, v7 ; 7E384B07 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[60:67], s[40:43] ; F0800700 014F0E1C v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 v_add_f32_e32 v7, 0, v9 ; 060E1280 image_sample v[7:8], 5, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[52:59], s[36:39] ; F0800500 012D0707 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v9, 1.0, v7 ; 08120EF2 image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[44:51], s[16:19] ; F0800700 008B0A0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v12, v9 ; 1022130C v_mad_f32 v19, v7, v16, v17 ; D2820013 04462107 v_mul_f32_e32 v21, v11, v9 ; 102A130B v_mad_f32 v18, v7, v15, v21 ; D2820012 04561F07 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mad_f32 v17, v7, v14, v9 ; D2820011 04261D07 s_load_dwordx8 s[0:7], vcc, 0x48 ; C0C06B48 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[0:7], s[96:99] ; F0800700 03000911 v_mad_f32 v7, -v8, v7, v7 ; D2820007 241E0F08 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_mul_f32_e32 v12, v18, v8 ; 10181112 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v7, v10, v12 ; D282000C 04321507 v_sub_f32_e32 v14, s12, v12 ; 081C180C v_readlane_b32 s0, v52, 3 ; 02010734 s_nop 2 ; BF800002 v_mad_f32 v13, v3, s0, -v13 ; D282000D 84340103 v_subrev_f32_e32 v5, s100, v5 ; 0A0A0A64 v_subrev_f32_e32 v6, s101, v6 ; 0A0C0C65 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mad_f32 v5, v5, v5, v6 ; D2820005 041A0B05 v_mad_f32 v5, v13, v13, v5 ; D2820005 04161B0D v_readlane_b32 s0, v52, 4 ; 02010934 s_nop 2 ; BF800002 v_mul_f32_e32 v5, s0, v5 ; 100A0A00 v_readlane_b32 s0, v52, 5 ; 02010B34 s_nop 2 ; BF800002 v_mul_f32_e32 v6, s0, v13 ; 100C1A00 v_mul_f32_e32 v6, 0x3fb8aa65, v6 ; 100C0CFF 3FB8AA65 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_sub_f32_e32 v6, 1.0, v6 ; 080C0CF2 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_rcp_f32_e32 v6, v13 ; 7E0C550D v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v5, 0x3fb8aa65, v5 ; 100A0AFF 3FB8AA65 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 v_mad_f32 v0, 0.5, v4, 0.5 ; D2820000 03C208F0 v_readlane_b32 s0, v52, 6 ; 02010D34 v_readlane_b32 s1, v52, 7 ; 02030F34 v_readlane_b32 s2, v52, 8 ; 02051134 v_readlane_b32 s3, v52, 9 ; 02071334 s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[20:27], s[0:3] ; F0800100 00050000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mad_f32 v1, v0, v14, v12 ; D2820001 04321D00 v_mul_f32_e32 v4, v17, v8 ; 10081111 v_mad_f32 v4, v7, v9, v4 ; D2820004 04121307 v_readlane_b32 s0, v52, 2 ; 02010534 s_nop 2 ; BF800002 v_sub_f32_e32 v5, s0, v4 ; 080A0800 v_mad_f32 v4, v0, v5, v4 ; D2820004 04120B00 v_cvt_pkrtz_f16_f32_e32 v1, v4, v1 ; 5E020304 v_mul_f32_e32 v4, v19, v8 ; 10081113 v_mad_f32 v4, v7, v11, v4 ; D2820004 04121707 v_readlane_b32 s0, v52, 1 ; 02010334 s_nop 2 ; BF800002 v_sub_f32_e32 v5, s0, v4 ; 080A0800 v_mad_f32 v0, v0, v5, v4 ; D2820000 04120B00 v_mul_f32_e32 v4, 0.5, v2 ; 100804F0 v_readlane_b32 s0, v52, 0 ; 02010134 s_nop 2 ; BF800002 v_subrev_f32_e32 v3, s0, v3 ; 0A060600 v_cmp_ge_f32_e64 s[0:1], v3, 0 ; D00C0000 00010103 v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; D2000803 00018280 v_cmp_ne_i32_e64 s[0:1], v3, 0 ; D10A0000 00010103 v_cndmask_b32_e64 v2, v4, v2, s[0:1] ; D2000002 00020504 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..98] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: F2I TEMP[0].x, IN[2].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+9] 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xyz, IN[0], CONST[ADDR[0].x+9].wwww, TEMP[1] 6: MOV TEMP[2].xyz, TEMP[1].xyzx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[0].xyz, CONST[ADDR[0].x+9].xyzx 9: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 10: MAD TEMP[3], TEMP[1].xxxx, CONST[0], TEMP[3] 11: MAD TEMP[3], TEMP[1].zzzz, CONST[2], TEMP[3] 12: ADD TEMP[3], TEMP[3], CONST[3] 13: RCP TEMP[4].x, TEMP[3].wwww 14: MOV TEMP[2].w, TEMP[4].xxxx 15: MUL TEMP[4].xy, TEMP[4].xxxx, TEMP[3] 16: MOV TEMP[4].xy, TEMP[4].xyxx 17: MOV TEMP[3], TEMP[3] 18: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 19: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 20: MOV TEMP[5].zw, TEMP[5].wwzw 21: MUL TEMP[6].yw, TEMP[1].yyyy, CONST[5].xxzy 22: MOV TEMP[2].yw, TEMP[6].wyww 23: MAD TEMP[6].xy, TEMP[1].xxxx, CONST[4], TEMP[2].ywzw 24: MOV TEMP[2].xy, TEMP[6].xyxx 25: MAD TEMP[1].xy, TEMP[1].zzzz, CONST[6], TEMP[2] 26: MOV TEMP[2].xy, TEMP[1].xyxx 27: ADD TEMP[1].xy, TEMP[2], CONST[7] 28: MOV TEMP[2].xy, TEMP[1].xyxx 29: MAD TEMP[1].xy, TEMP[2], IMM[0].xyzz, IMM[0].yyyy 30: MOV TEMP[1].xy, TEMP[1].xyxx 31: MOV TEMP[5].xy, IN[1].xyxx 32: MOV TEMP[0].w, IMM[0].wwww 33: MOV TEMP[1].zw, IMM[0].wwzw 34: MOV TEMP[4].zw, IMM[0].wwzw 35: MOV OUT[1], TEMP[5] 36: MOV OUT[0], TEMP[3] 37: MOV OUT[2], TEMP[0] 38: MOV OUT[3], TEMP[1] 39: MOV OUT[4], TEMP[4] 40: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = fptosi float %58 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 144 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 148 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %61, 4 %69 = add i32 %68, 152 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = bitcast float %60 to i32 %72 = shl i32 %71, 4 %73 = add i32 %72, 156 %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %73) %75 = fmul float %45, %74 %76 = fadd float %75, %64 %77 = shl i32 %71, 4 %78 = add i32 %77, 156 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %46, %79 %81 = fadd float %80, %67 %82 = shl i32 %71, 4 %83 = add i32 %82, 156 %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %83) %85 = fmul float %47, %84 %86 = fadd float %85, %70 %87 = bitcast float %60 to i32 %88 = shl i32 %87, 4 %89 = add i32 %88, 144 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = shl i32 %87, 4 %92 = add i32 %91, 148 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = shl i32 %87, 4 %95 = add i32 %94, 152 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = fmul float %81, %17 %98 = fmul float %81, %18 %99 = fmul float %81, %19 %100 = fmul float %81, %20 %101 = fmul float %76, %13 %102 = fadd float %101, %97 %103 = fmul float %76, %14 %104 = fadd float %103, %98 %105 = fmul float %76, %15 %106 = fadd float %105, %99 %107 = fmul float %76, %16 %108 = fadd float %107, %100 %109 = fmul float %86, %21 %110 = fadd float %109, %102 %111 = fmul float %86, %22 %112 = fadd float %111, %104 %113 = fmul float %86, %23 %114 = fadd float %113, %106 %115 = fmul float %86, %24 %116 = fadd float %115, %108 %117 = fadd float %110, %25 %118 = fadd float %112, %26 %119 = fadd float %114, %27 %120 = fadd float %116, %28 %121 = fdiv float 1.000000e+00, %120 %122 = fmul float %121, %117 %123 = fmul float %121, %118 %124 = fadd float %76, %39 %125 = fadd float %81, %40 %126 = fmul float %124, %37 %127 = fmul float %125, %38 %128 = fmul float %81, %31 %129 = fmul float %81, %32 %130 = fmul float %76, %29 %131 = fadd float %130, %128 %132 = fmul float %76, %30 %133 = fadd float %132, %129 %134 = fmul float %86, %33 %135 = fadd float %134, %131 %136 = fmul float %86, %34 %137 = fadd float %136, %133 %138 = fadd float %135, %35 %139 = fadd float %137, %36 %140 = fmul float %138, 5.000000e-01 %141 = fadd float %140, -5.000000e-01 %142 = fmul float %139, -5.000000e-01 %143 = fadd float %142, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %53, float %126, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %90, float %93, float %96, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %141, float %143, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %122, float %123, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %117, float %118, float %119, float %120) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_add_i32_e32 v2, 0x94, v1 ; 4A0402FF 00000094 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 v_add_i32_e32 v3, 0x9c, v1 ; 4A0602FF 0000009C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v5, v3, v2 ; D2820008 040A0705 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v9, s4, v8 ; 06121004 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s4, v9 ; 10121204 v_add_i32_e32 v10, 0x90, v1 ; 4A1402FF 00000090 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v11, v4, v3, v10 ; D282000B 042A0704 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v12, s4, v11 ; 06181604 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s4, v12 ; 10181804 buffer_load_format_xyzw v[13:16], v0, s[12:15], 0 idxen ; E00C2000 80030D00 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v13, v14, v12, v9 ; F800020F 090C0E0D v_add_i32_e32 v0, 0x98, v1 ; 4A0002FF 00000098 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 exp 15, 33, 0, 0, 0, v10, v2, v0, v1 ; F800021F 0100020A s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v8 ; 10041004 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v11, s4, v2 ; D2820002 0408090B v_mad_f32 v0, v6, v3, v0 ; D2820000 04020706 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 v_mad_f32 v2, 0.5, v2, -0.5 ; D2820002 03C604F0 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v8 ; 10061004 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v11, s4, v3 ; D2820003 040C090B s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mad_f32 v3, -0.5, v3, -0.5 ; D2820003 03C606F1 v_mov_b32_e32 v4, 0 ; 7E080280 exp 15, 34, 0, 0, 0, v2, v3, v4, v1 ; F800022F 01040302 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v8 ; 10041004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v11, s4, v2 ; D2820002 0408090B s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v8 ; 10061004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v11, s4, v3 ; D2820003 040C090B s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_rcp_f32_e32 v5, v3 ; 7E0A5503 v_mul_f32_e32 v6, v2, v5 ; 100C0B02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v8 ; 100E1004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v11, s4, v7 ; D2820007 041C090B s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 exp 15, 35, 0, 0, 0, v5, v6, v4, v1 ; F800023F 01040605 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v8 ; 10021004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v11, s4, v1 ; D2820001 0404090B s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v7, v2, v0, v3 ; F80008CF 03000207 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL IN[3], GENERIC[11], PERSPECTIVE DCL IN[4], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[14] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..10], LOCAL IMM[0] FLT32 { -0.1000, 0.0000, -1.0000, 1.0000} IMM[1] FLT32 { 0.0010, -0.1471, -0.2889, 0.4360} IMM[2] FLT32 { 0.6150, -0.5150, -0.1000, 0.5000} IMM[3] FLT32 { 1.0000, 1.1398, -0.3947, -0.5806} IMM[4] FLT32 { 1.0000, 2.0321, 1.4427, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[14].xxxx, CONST[14].yyyy 2: MAD TEMP[1].xyz, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xy, IN[1].xyyy 4: TEX TEMP[2], TEMP[2], SAMP[0], 2D 5: MOV TEMP[3].xw, TEMP[2] 6: ADD TEMP[4].x, TEMP[2].wwww, IMM[0].xxxx 7: FSGE TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy 8: UIF TEMP[5].xxxx :2 9: MOV TEMP[5].x, IMM[0].yyyy 10: ELSE :2 11: MOV TEMP[5].x, IMM[0].zzzz 12: ENDIF 13: MOV TEMP[5].x, TEMP[5].xxxx 14: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 15: UIF TEMP[6].xxxx :2 16: MOV TEMP[6].x, IMM[0].yyyy 17: ELSE :2 18: MOV TEMP[6].x, IMM[0].zzzz 19: ENDIF 20: MOV TEMP[5].y, TEMP[6].xxxx 21: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 22: UIF TEMP[6].xxxx :2 23: MOV TEMP[6].x, IMM[0].yyyy 24: ELSE :2 25: MOV TEMP[6].x, IMM[0].zzzz 26: ENDIF 27: MOV TEMP[5].z, TEMP[6].xxxx 28: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 29: UIF TEMP[6].xxxx :2 30: ELSE :2 31: ENDIF 32: FSLT TEMP[5].xyz, TEMP[5].xyzz, IMM[0].yyyy 33: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 34: OR TEMP[6].x, TEMP[6].xxxx, TEMP[5].yyyy 35: UIF TEMP[6].xxxx :2 36: KILL 37: ENDIF 38: MAD TEMP[5].xy, IN[3], IMM[0].wzww, IMM[0].ywyy 39: MOV TEMP[5].xy, TEMP[5].xyyy 40: TEX TEMP[5], TEMP[5], SAMP[5], 2D 41: MOV TEMP[4].z, TEMP[5] 42: ABS TEMP[6].x, TEMP[5] 43: MOV TEMP[6], -TEMP[6].xxxx 44: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].yyyy 45: UIF TEMP[7].xxxx :2 46: MOV TEMP[7].x, IMM[0].zzzz 47: ELSE :2 48: MOV TEMP[7].x, IMM[0].yyyy 49: ENDIF 50: MOV TEMP[7].x, TEMP[7].xxxx 51: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].yyyy 52: UIF TEMP[8].xxxx :2 53: MOV TEMP[8].x, IMM[0].zzzz 54: ELSE :2 55: MOV TEMP[8].x, IMM[0].yyyy 56: ENDIF 57: MOV TEMP[7].y, TEMP[8].xxxx 58: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].yyyy 59: UIF TEMP[8].xxxx :2 60: MOV TEMP[8].x, IMM[0].zzzz 61: ELSE :2 62: MOV TEMP[8].x, IMM[0].yyyy 63: ENDIF 64: MOV TEMP[7].z, TEMP[8].xxxx 65: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].yyyy 66: UIF TEMP[6].xxxx :2 67: MOV TEMP[6].x, IMM[0].zzzz 68: ELSE :2 69: MOV TEMP[6].x, IMM[0].yyyy 70: ENDIF 71: MOV TEMP[7].w, TEMP[6].xxxx 72: MOV TEMP[6].z, TEMP[7] 73: FSLT TEMP[7].xyz, TEMP[7].xyzz, IMM[0].yyyy 74: OR TEMP[8].x, TEMP[7].xxxx, TEMP[7].zzzz 75: OR TEMP[8].x, TEMP[8].xxxx, TEMP[7].yyyy 76: UIF TEMP[8].xxxx :2 77: KILL 78: ENDIF 79: RCP TEMP[6].x, CONST[0].xxxx 80: RCP TEMP[7].x, CONST[0].yyyy 81: MOV TEMP[6].y, TEMP[7].xxxx 82: MUL TEMP[1].yw, TEMP[6].xxzy, TEMP[1].xxzy 83: MOV TEMP[4].yw, TEMP[1].wyww 84: MUL TEMP[1].xy, IMM[1].xxxx, IN[2] 85: MOV TEMP[1].xy, TEMP[1].xyyy 86: TEX TEMP[1], TEMP[1], SAMP[1], 2D 87: MOV TEMP[6].xw, TEMP[1].xxxw 88: MAD TEMP[7].xy, TEMP[4].ywzw, IMM[0].wzww, IMM[0].ywyy 89: MOV TEMP[7].xy, TEMP[7].xyyy 90: TEX TEMP[7], TEMP[7], SAMP[2], 2D 91: MOV TEMP[8].w, TEMP[7].xyxw 92: DP3 TEMP[9].x, IMM[1].yzww, TEMP[1].xyzz 93: MOV TEMP[9].y, TEMP[9].xxxx 94: DP3 TEMP[10].x, IMM[2].xyzz, TEMP[1].xyzz 95: MOV TEMP[9].z, TEMP[10].xxxx 96: DP3 TEMP[10].x, IMM[1].yzww, TEMP[2].xyzz 97: MOV TEMP[6].y, TEMP[10].xxxx 98: DP3 TEMP[10].x, IMM[2].xyzz, TEMP[2].xyzz 99: MOV TEMP[6].z, TEMP[10].xxxx 100: LRP TEMP[1].yz, TEMP[1].wwww, TEMP[9], TEMP[6] 101: MOV TEMP[3].yz, TEMP[1].zyzz 102: DP2 TEMP[6].x, IMM[3].xyyy, TEMP[3].xzzz 103: DP3 TEMP[1].x, IMM[3].xzww, TEMP[3].xyzz 104: MOV TEMP[6].y, TEMP[1].xxxx 105: DP2 TEMP[1].x, IMM[4].xyyy, TEMP[3].xyyy 106: MOV TEMP[6].z, TEMP[1].xxxx 107: MUL TEMP[1].xyz, TEMP[7], CONST[1].xxxx 108: MOV TEMP[3].xyz, TEMP[1].xyzx 109: MAD TEMP[1].y, TEMP[7].wwww, -CONST[1].xxxx, -IMM[0].zzzz 110: MOV TEMP[7].xyz, TEMP[6].xyzz 111: TEX TEMP[7], TEMP[7], SAMP[4], 3D 112: MAD TEMP[1].xyz, TEMP[7], TEMP[1].yyyy, TEMP[3] 113: MOV TEMP[3].xyz, TEMP[1].xyzx 114: MOV TEMP[1].xy, IN[1].zwww 115: TEX TEMP[1], TEMP[1], SAMP[6], 2D 116: LRP TEMP[1].xyz, TEMP[5].xxxx, TEMP[3], TEMP[1] 117: MOV TEMP[8].xyz, TEMP[1].xyzx 118: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 119: MOV TEMP[1].xyz, TEMP[1].xyzz 120: TEX TEMP[1], TEMP[1], SAMP[7], 3D 121: LRP TEMP[1].xyz, TEMP[5].xxxx, TEMP[1], TEMP[8] 122: MOV TEMP[4].xyz, TEMP[1].xyzx 123: MAD TEMP[1].x, IN[4].yyyy, IMM[2].wwww, IMM[2].wwww 124: MOV TEMP[3].x, TEMP[1].xxxx 125: MOV TEMP[3].y, CONST[4].wwww 126: MOV TEMP[1].xy, TEMP[3].xyyy 127: TEX TEMP[1].xw, TEMP[1], SAMP[3], 2D 128: MOV TEMP[6].w, TEMP[1].wwww 129: ADD TEMP[5].xyz, -CONST[2], IN[2] 130: MOV TEMP[3].z, TEMP[5].xyzx 131: DP3 TEMP[3].x, TEMP[5].xyzz, TEMP[5].xyzz 132: MUL TEMP[7].xy, TEMP[3].xzzw, CONST[4].yxzw 133: MUL TEMP[8].y, TEMP[7].yyyy, IMM[4].zzzz 134: EX2 TEMP[8].x, TEMP[8].yyyy 135: ADD TEMP[8].y, -TEMP[8].xxxx, IMM[0].wwww 136: MUL TEMP[7].x, TEMP[8].yyyy, TEMP[7].xxxx 137: RCP TEMP[5].x, TEMP[5].zzzz 138: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 139: MUL TEMP[5].x, TEMP[5].xxxx, IMM[4].zzzz 140: MOV TEMP[3].x, TEMP[5].xxxx 141: EX2 TEMP[5].x, TEMP[5].xxxx 142: MOV_SAT TEMP[3].x, TEMP[5].xxxx 143: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].wwww 144: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 145: ADD TEMP[3].xyz, -TEMP[4], CONST[3] 146: MOV TEMP[6].xyz, TEMP[3].xyzx 147: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[6], TEMP[4] 148: MOV TEMP[1].xyz, TEMP[1].xyzx 149: MOV TEMP[1].w, TEMP[2].wwww 150: MOV OUT[0], TEMP[1] 151: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %84 = fmul float %15, %40 %85 = fadd float %84, %41 %86 = fmul float %14, %36 %87 = fadd float %86, %38 %88 = fmul float %85, %37 %89 = fadd float %88, %39 %90 = bitcast float %74 to i32 %91 = bitcast float %75 to i32 %92 = insertelement <2 x i32> undef, i32 %90, i32 0 %93 = insertelement <2 x i32> %92, i32 %91, i32 1 %94 = bitcast <8 x i32> %43 to <32 x i8> %95 = bitcast <4 x i32> %45 to <16 x i8> %96 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %94, <16 x i8> %95, i32 2) %97 = extractelement <4 x float> %96, i32 0 %98 = extractelement <4 x float> %96, i32 1 %99 = extractelement <4 x float> %96, i32 2 %100 = extractelement <4 x float> %96, i32 3 %101 = fadd float %100, 0xBFB99999A0000000 %102 = fcmp oge float %101, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float 0.000000e+00, float -1.000000e+00 %107 = fcmp oge float %101, 0.000000e+00 %108 = sext i1 %107 to i32 %109 = bitcast i32 %108 to float %110 = bitcast float %109 to i32 %111 = icmp ne i32 %110, 0 %temp24.0 = select i1 %111, float 0.000000e+00, float -1.000000e+00 %112 = fcmp oge float %101, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = bitcast i32 %113 to float %115 = bitcast float %114 to i32 %116 = icmp ne i32 %115, 0 %.71 = select i1 %116, float 0.000000e+00, float -1.000000e+00 %117 = fcmp oge float %101, 0.000000e+00 %118 = sext i1 %117 to i32 %119 = bitcast i32 %118 to float %120 = bitcast float %119 to i32 %121 = icmp ne i32 %120, 0 %122 = fcmp olt float %., 0.000000e+00 %123 = sext i1 %122 to i32 %124 = fcmp olt float %temp24.0, 0.000000e+00 %125 = sext i1 %124 to i32 %126 = fcmp olt float %.71, 0.000000e+00 %127 = sext i1 %126 to i32 %128 = bitcast i32 %123 to float %129 = bitcast i32 %125 to float %130 = bitcast i32 %127 to float %131 = bitcast float %128 to i32 %132 = bitcast float %130 to i32 %133 = or i32 %131, %132 %134 = bitcast i32 %133 to float %135 = bitcast float %134 to i32 %136 = bitcast float %129 to i32 %137 = or i32 %135, %136 %138 = bitcast i32 %137 to float %139 = bitcast float %138 to i32 %140 = icmp ne i32 %139, 0 br i1 %140, label %IF54, label %ENDIF53 IF54: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF53 ENDIF53: ; preds = %main_body, %IF54 %141 = fmul float %81, 1.000000e+00 %142 = fadd float %141, 0.000000e+00 %143 = fmul float %82, -1.000000e+00 %144 = fadd float %143, 1.000000e+00 %145 = bitcast float %142 to i32 %146 = bitcast float %144 to i32 %147 = insertelement <2 x i32> undef, i32 %145, i32 0 %148 = insertelement <2 x i32> %147, i32 %146, i32 1 %149 = bitcast <8 x i32> %63 to <32 x i8> %150 = bitcast <4 x i32> %65 to <16 x i8> %151 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %148, <32 x i8> %149, <16 x i8> %150, i32 2) %152 = extractelement <4 x float> %151, i32 0 %153 = extractelement <4 x float> %151, i32 2 %154 = call float @fabs(float %152) %155 = fsub float -0.000000e+00, %154 %156 = fsub float -0.000000e+00, %154 %157 = fsub float -0.000000e+00, %154 %158 = fsub float -0.000000e+00, %154 %159 = fcmp oge float %155, 0.000000e+00 %160 = sext i1 %159 to i32 %161 = bitcast i32 %160 to float %162 = bitcast float %161 to i32 %163 = icmp ne i32 %162, 0 %.72 = select i1 %163, float -1.000000e+00, float 0.000000e+00 %164 = fcmp oge float %156, 0.000000e+00 %165 = sext i1 %164 to i32 %166 = bitcast i32 %165 to float %167 = bitcast float %166 to i32 %168 = icmp ne i32 %167, 0 %temp32.0 = select i1 %168, float -1.000000e+00, float 0.000000e+00 %169 = fcmp oge float %157, 0.000000e+00 %170 = sext i1 %169 to i32 %171 = bitcast i32 %170 to float %172 = bitcast float %171 to i32 %173 = icmp ne i32 %172, 0 %.73 = select i1 %173, float -1.000000e+00, float 0.000000e+00 %174 = fcmp oge float %158, 0.000000e+00 %175 = sext i1 %174 to i32 %176 = bitcast i32 %175 to float %177 = bitcast float %176 to i32 %178 = icmp ne i32 %177, 0 %179 = fcmp olt float %.72, 0.000000e+00 %180 = sext i1 %179 to i32 %181 = fcmp olt float %temp32.0, 0.000000e+00 %182 = sext i1 %181 to i32 %183 = fcmp olt float %.73, 0.000000e+00 %184 = sext i1 %183 to i32 %185 = bitcast i32 %180 to float %186 = bitcast i32 %182 to float %187 = bitcast i32 %184 to float %188 = bitcast float %185 to i32 %189 = bitcast float %187 to i32 %190 = or i32 %188, %189 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = bitcast float %186 to i32 %194 = or i32 %192, %193 %195 = bitcast i32 %194 to float %196 = bitcast float %195 to i32 %197 = icmp ne i32 %196, 0 br i1 %197, label %IF69, label %ENDIF68 IF69: ; preds = %ENDIF53 call void @llvm.AMDGPU.kilp() br label %ENDIF68 ENDIF68: ; preds = %ENDIF53, %IF69 %198 = fdiv float 1.000000e+00, %24 %199 = fdiv float 1.000000e+00, %25 %200 = fmul float %198, %87 %201 = fmul float %199, %89 %202 = fmul float 0x3F50624DE0000000, %78 %203 = fmul float 0x3F50624DE0000000, %79 %204 = bitcast float %202 to i32 %205 = bitcast float %203 to i32 %206 = insertelement <2 x i32> undef, i32 %204, i32 0 %207 = insertelement <2 x i32> %206, i32 %205, i32 1 %208 = bitcast <8 x i32> %47 to <32 x i8> %209 = bitcast <4 x i32> %49 to <16 x i8> %210 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %207, <32 x i8> %208, <16 x i8> %209, i32 2) %211 = extractelement <4 x float> %210, i32 0 %212 = extractelement <4 x float> %210, i32 1 %213 = extractelement <4 x float> %210, i32 2 %214 = extractelement <4 x float> %210, i32 3 %215 = fmul float %200, 1.000000e+00 %216 = fadd float %215, 0.000000e+00 %217 = fmul float %201, -1.000000e+00 %218 = fadd float %217, 1.000000e+00 %219 = bitcast float %216 to i32 %220 = bitcast float %218 to i32 %221 = insertelement <2 x i32> undef, i32 %219, i32 0 %222 = insertelement <2 x i32> %221, i32 %220, i32 1 %223 = bitcast <8 x i32> %51 to <32 x i8> %224 = bitcast <4 x i32> %53 to <16 x i8> %225 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %222, <32 x i8> %223, <16 x i8> %224, i32 2) %226 = extractelement <4 x float> %225, i32 0 %227 = extractelement <4 x float> %225, i32 1 %228 = extractelement <4 x float> %225, i32 2 %229 = extractelement <4 x float> %225, i32 3 %230 = fmul float 0xBFC2D527E0000000, %211 %231 = fmul float 0xBFD27CAEA0000000, %212 %232 = fadd float %231, %230 %233 = fmul float 0x3FDBE76C80000000, %213 %234 = fadd float %232, %233 %235 = fmul float 0x3FE3AE1480000000, %211 %236 = fmul float 0xBFE07ACC40000000, %212 %237 = fadd float %236, %235 %238 = fmul float 0xBFB99A4160000000, %213 %239 = fadd float %237, %238 %240 = fmul float 0xBFC2D527E0000000, %97 %241 = fmul float 0xBFD27CAEA0000000, %98 %242 = fadd float %241, %240 %243 = fmul float 0x3FDBE76C80000000, %99 %244 = fadd float %242, %243 %245 = fmul float 0x3FE3AE1480000000, %97 %246 = fmul float 0xBFE07ACC40000000, %98 %247 = fadd float %246, %245 %248 = fmul float 0xBFB99A4160000000, %99 %249 = fadd float %247, %248 %250 = call float @llvm.AMDGPU.lrp(float %214, float %234, float %244) %251 = call float @llvm.AMDGPU.lrp(float %214, float %239, float %249) %252 = fmul float 1.000000e+00, %97 %253 = fmul float 0x3FF23CBE60000000, %251 %254 = fadd float %252, %253 %255 = fmul float 1.000000e+00, %97 %256 = fmul float 0xBFD941F220000000, %250 %257 = fadd float %256, %255 %258 = fmul float 0xBFE2944680000000, %251 %259 = fadd float %257, %258 %260 = fmul float 1.000000e+00, %97 %261 = fmul float 0x400041C2E0000000, %250 %262 = fadd float %260, %261 %263 = fmul float %226, %26 %264 = fmul float %227, %26 %265 = fmul float %228, %26 %266 = fsub float -0.000000e+00, %26 %267 = fmul float %229, %266 %268 = fadd float %267, 1.000000e+00 %269 = bitcast float %254 to i32 %270 = bitcast float %259 to i32 %271 = bitcast float %262 to i32 %272 = insertelement <4 x i32> undef, i32 %269, i32 0 %273 = insertelement <4 x i32> %272, i32 %270, i32 1 %274 = insertelement <4 x i32> %273, i32 %271, i32 2 %275 = insertelement <4 x i32> %274, i32 undef, i32 3 %276 = bitcast <8 x i32> %59 to <32 x i8> %277 = bitcast <4 x i32> %61 to <16 x i8> %278 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %275, <32 x i8> %276, <16 x i8> %277, i32 3) %279 = extractelement <4 x float> %278, i32 0 %280 = extractelement <4 x float> %278, i32 1 %281 = extractelement <4 x float> %278, i32 2 %282 = fmul float %279, %268 %283 = fadd float %282, %263 %284 = fmul float %280, %268 %285 = fadd float %284, %264 %286 = fmul float %281, %268 %287 = fadd float %286, %265 %288 = bitcast float %76 to i32 %289 = bitcast float %77 to i32 %290 = insertelement <2 x i32> undef, i32 %288, i32 0 %291 = insertelement <2 x i32> %290, i32 %289, i32 1 %292 = bitcast <8 x i32> %67 to <32 x i8> %293 = bitcast <4 x i32> %69 to <16 x i8> %294 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %291, <32 x i8> %292, <16 x i8> %293, i32 2) %295 = extractelement <4 x float> %294, i32 0 %296 = extractelement <4 x float> %294, i32 1 %297 = extractelement <4 x float> %294, i32 2 %298 = call float @llvm.AMDGPU.lrp(float %152, float %283, float %295) %299 = call float @llvm.AMDGPU.lrp(float %152, float %285, float %296) %300 = call float @llvm.AMDGPU.lrp(float %152, float %287, float %297) %301 = fsub float -0.000000e+00, %152 %302 = fmul float %153, %301 %303 = fadd float %302, %152 %304 = bitcast float %298 to i32 %305 = bitcast float %299 to i32 %306 = bitcast float %300 to i32 %307 = insertelement <4 x i32> undef, i32 %304, i32 0 %308 = insertelement <4 x i32> %307, i32 %305, i32 1 %309 = insertelement <4 x i32> %308, i32 %306, i32 2 %310 = insertelement <4 x i32> %309, i32 undef, i32 3 %311 = bitcast <8 x i32> %71 to <32 x i8> %312 = bitcast <4 x i32> %73 to <16 x i8> %313 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %310, <32 x i8> %311, <16 x i8> %312, i32 3) %314 = extractelement <4 x float> %313, i32 0 %315 = extractelement <4 x float> %313, i32 1 %316 = extractelement <4 x float> %313, i32 2 %317 = call float @llvm.AMDGPU.lrp(float %303, float %314, float %298) %318 = call float @llvm.AMDGPU.lrp(float %303, float %315, float %299) %319 = call float @llvm.AMDGPU.lrp(float %303, float %316, float %300) %320 = fmul float %83, 5.000000e-01 %321 = fadd float %320, 5.000000e-01 %322 = bitcast float %321 to i32 %323 = bitcast float %35 to i32 %324 = insertelement <2 x i32> undef, i32 %322, i32 0 %325 = insertelement <2 x i32> %324, i32 %323, i32 1 %326 = bitcast <8 x i32> %55 to <32 x i8> %327 = bitcast <4 x i32> %57 to <16 x i8> %328 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %325, <32 x i8> %326, <16 x i8> %327, i32 2) %329 = extractelement <4 x float> %328, i32 0 %330 = fsub float -0.000000e+00, %27 %331 = fadd float %330, %78 %332 = fsub float -0.000000e+00, %28 %333 = fadd float %332, %79 %334 = fsub float -0.000000e+00, %29 %335 = fadd float %334, %80 %336 = fmul float %331, %331 %337 = fmul float %333, %333 %338 = fadd float %337, %336 %339 = fmul float %335, %335 %340 = fadd float %338, %339 %341 = fmul float %340, %34 %342 = fmul float %335, %33 %343 = fmul float %342, 0x3FF7154CA0000000 %344 = call float @llvm.AMDIL.exp.(float %343) %345 = fsub float -0.000000e+00, %344 %346 = fadd float %345, 1.000000e+00 %347 = fmul float %346, %341 %348 = fdiv float 1.000000e+00, %335 %349 = fmul float %348, %347 %350 = fmul float %349, 0x3FF7154CA0000000 %351 = call float @llvm.AMDIL.exp.(float %350) %352 = call float @llvm.AMDIL.clamp.(float %351, float 0.000000e+00, float 1.000000e+00) %353 = fsub float -0.000000e+00, %352 %354 = fadd float %353, 1.000000e+00 %355 = fmul float %354, %329 %356 = fsub float -0.000000e+00, %317 %357 = fadd float %356, %30 %358 = fsub float -0.000000e+00, %318 %359 = fadd float %358, %31 %360 = fsub float -0.000000e+00, %319 %361 = fadd float %360, %32 %362 = fmul float %355, %357 %363 = fadd float %362, %317 %364 = fmul float %355, %359 %365 = fadd float %364, %318 %366 = fmul float %355, %361 %367 = fadd float %366, %319 %368 = call i32 @llvm.SI.packf16(float %363, float %365) %369 = bitcast i32 %368 to float %370 = call i32 @llvm.SI.packf16(float %367, float %100) %371 = bitcast i32 %370 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %369, float %371, float %369, float %371) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[12:19], s[8:11] ; F0800F00 00430404 v_mov_b32_e32 v8, 0xbdcccccd ; 7E1002FF BDCCCCCD s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v8, v7, v8 ; 06101107 v_cmp_ge_f32_e64 s[0:1], v8, 0 ; D00C0000 00010108 v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; D2000008 00018280 v_cmp_ne_i32_e64 s[0:1], v8, 0 ; D10A0000 00010108 v_cndmask_b32_e64 v8, -1.0, 0, s[0:1] ; D2000008 180100F3 v_cmp_lt_f32_e64 s[0:1], v8, 0 ; D0020000 00010108 v_interp_p1_f32 v8, v0, 1, 3, [m0] ; C8200D00 v_interp_p2_f32 v8, [v8], v1, 1, 3, [m0] ; C8210D01 v_interp_p1_f32 v14, v0, 1, 2, [m0] ; C8380900 v_interp_p2_f32 v14, [v14], v1, 1, 2, [m0] ; C8390901 v_interp_p1_f32 v15, v0, 0, 2, [m0] ; C83C0800 v_interp_p2_f32 v15, [v15], v1, 0, 2, [m0] ; C83D0801 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 v_interp_p1_f32 v10, v0, 1, 1, [m0] ; C8280500 v_interp_p2_f32 v10, [v10], v1, 1, 1, [m0] ; C8290501 v_interp_p1_f32 v11, v0, 0, 1, [m0] ; C82C0400 v_interp_p2_f32 v11, [v11], v1, 0, 1, [m0] ; C82D0401 v_interp_p1_f32 v13, v0, 3, 0, [m0] ; C8340300 v_interp_p2_f32 v13, [v13], v1, 3, 0, [m0] ; C8350301 v_interp_p1_f32 v12, v0, 2, 0, [m0] ; C8300200 v_interp_p2_f32 v12, [v12], v1, 2, 0, [m0] ; C8310201 s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s14, s[20:23], 0x38 ; C2071538 s_buffer_load_dword s15, s[20:23], 0x15 ; C2079515 s_buffer_load_dword s24, s[20:23], 0x14 ; C20C1514 s_buffer_load_dword s25, s[20:23], 0x13 ; C20C9513 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[0:1] ; BE8C2400 s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s17, s[20:23], 0x39 ; C2089539 s_buffer_load_dword s18, s[20:23], 0x17 ; C2091517 s_buffer_load_dword s19, s[20:23], 0x16 ; C2099516 s_buffer_load_dword s2, s[20:23], 0x11 ; C2011511 s_buffer_load_dword s3, s[20:23], 0x10 ; C2019510 s_buffer_load_dword s0, s[20:23], 0xe ; C200150E s_buffer_load_dword s9, s[20:23], 0xd ; C204950D s_buffer_load_dword s1, s[20:23], 0xc ; C200950C s_buffer_load_dword s8, s[20:23], 0xa ; C204150A s_buffer_load_dword s10, s[20:23], 0x9 ; C2051509 s_buffer_load_dword s11, s[20:23], 0x8 ; C2059508 s_buffer_load_dword s16, s[20:23], 0x4 ; C2081504 s_buffer_load_dword s28, s[20:23], 0x1 ; C20E1501 s_buffer_load_dword s29, s[20:23], 0x0 ; C20E9500 v_mov_b32_e32 v19, s14 ; 7E26020E v_mov_b32_e32 v18, s15 ; 7E24020F v_mov_b32_e32 v20, s24 ; 7E280218 v_mov_b32_e32 v1, s25 ; 7E020219 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v17, 1.0, v14 ; 08221CF2 v_add_f32_e32 v16, 0, v15 ; 06201E80 s_load_dwordx4 s[12:15], s[4:5], 0x14 ; C0860514 s_load_dwordx8 s[20:27], s[6:7], 0x28 ; C0CA0728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:17], 15, 0, 0, 0, 0, 0, 0, 0, v[16:17], s[20:27], s[12:15] ; F0800F00 00650E10 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_ge_f32_e64 s[12:13], -|v14|, 0 ; D00C010C 2001010E v_cndmask_b32_e64 v21, 0, -1, s[12:13] ; D2000815 00318280 v_cmp_ne_i32_e64 s[12:13], v21, 0 ; D10A000C 00010115 v_cndmask_b32_e64 v21, 0, -1.0, s[12:13] ; D2000815 0031E680 v_cmp_lt_f32_e64 s[12:13], v21, 0 ; D002000C 00010115 s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E v_mul_f32_e32 v22, 0x3a83126f, v10 ; 102C14FF 3A83126F v_mul_f32_e32 v21, 0x3a83126f, v11 ; 102A16FF 3A83126F s_load_dwordx4 s[68:71], s[4:5], 0x4 ; C0A20504 s_load_dwordx4 s[48:51], s[4:5], 0x8 ; C0980508 s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C s_load_dwordx4 s[72:75], s[4:5], 0x10 ; C0A40510 s_load_dwordx4 s[44:47], s[4:5], 0x18 ; C0960518 s_load_dwordx4 s[32:35], s[4:5], 0x1c ; C090051C s_load_dwordx8 s[76:83], s[6:7], 0x8 ; C0E60708 s_load_dwordx8 s[60:67], s[6:7], 0x10 ; C0DE0710 s_load_dwordx8 s[20:27], s[6:7], 0x18 ; C0CA0718 s_load_dwordx8 s[84:91], s[6:7], 0x20 ; C0EA0720 s_load_dwordx8 s[52:59], s[6:7], 0x30 ; C0DA0730 s_load_dwordx8 s[36:43], s[6:7], 0x38 ; C0D20738 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[21:22], s[76:83], s[68:71] ; F0800F00 02331515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v25, 0x3f1d70a4, v21 ; 10322AFF 3F1D70A4 v_mov_b32_e32 v26, 0xbf03d662 ; 7E3402FF BF03D662 v_mad_f32 v25, v22, v26, v25 ; D2820019 04663516 v_mov_b32_e32 v27, 0xbdccd20b ; 7E3602FF BDCCD20B v_mad_f32 v25, v23, v27, v25 ; D2820019 04663717 v_sub_f32_e32 v28, 1.0, v24 ; 083830F2 v_mul_f32_e32 v29, 0x3f1d70a4, v4 ; 103A08FF 3F1D70A4 v_mad_f32 v26, v5, v26, v29 ; D282001A 04763505 v_mad_f32 v26, v6, v27, v26 ; D282001A 046A3706 v_mul_f32_e32 v26, v26, v28 ; 1034391A v_mad_f32 v25, v24, v25, v26 ; D2820019 046A3318 v_mov_b32_e32 v26, 0xbe16a93f ; 7E3402FF BE16A93F v_mul_f32_e32 v27, v26, v21 ; 10362B1A v_mov_b32_e32 v29, 0xbe93e575 ; 7E3A02FF BE93E575 v_mad_f32 v27, v22, v29, v27 ; D282001B 046E3B16 v_mov_b32_e32 v30, 0x3edf3b64 ; 7E3C02FF 3EDF3B64 v_mad_f32 v27, v23, v30, v27 ; D282001B 046E3D17 v_mul_f32_e32 v26, v26, v4 ; 1034091A v_mad_f32 v26, v5, v29, v26 ; D282001A 046A3B05 v_mad_f32 v26, v6, v30, v26 ; D282001A 046A3D06 v_mul_f32_e32 v26, v26, v28 ; 1034391A v_mad_f32 v21, v24, v27, v26 ; D2820015 046A3718 v_mov_b32_e32 v22, 0xbeca0f91 ; 7E2C02FF BECA0F91 v_mad_f32 v22, v22, v21, v4 ; D2820016 04122B16 v_mov_b32_e32 v23, 0xbf14a234 ; 7E2E02FF BF14A234 v_mad_f32 v27, v23, v25, v22 ; D282001B 045A3317 v_mov_b32_e32 v22, 0x40020e17 ; 7E2C02FF 40020E17 v_mad_f32 v28, v22, v21, v4 ; D282001C 04122B16 v_mov_b32_e32 v21, 0x3f91e5f3 ; 7E2A02FF 3F91E5F3 v_mad_f32 v26, v21, v25, v4 ; D282001A 04123315 image_sample v[21:23], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[84:91], s[72:75] ; F0800700 0255151A v_mad_f32 v2, v20, v2, s19 ; D2820002 004E0514 v_rcp_f32_e32 v20, s29 ; 7E28541D v_mad_f32 v24, v20, v2, 0 ; D2820018 02020514 v_mad_f32 v2, v19, v3, s17 ; D2820002 00460713 v_mad_f32 v2, v18, v2, s18 ; D2820002 004A0512 v_rcp_f32_e32 v3, s28 ; 7E06541C v_mad_f32 v25, -v3, v2, 1.0 ; D2820019 23CA0503 image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[60:67], s[48:51] ; F0800F00 018F1818 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, -v27, s16, 1.0 ; D2820002 23C8211B v_mul_f32_e32 v3, s16, v26 ; 10063410 v_mad_f32 v3, v23, v2, v3 ; D2820003 040E0517 v_sub_f32_e32 v18, 1.0, v14 ; 08241CF2 image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[52:59], s[44:47] ; F0800700 016D1C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v30, v18 ; 1018251E v_mad_f32 v33, v14, v3, v12 ; D2820021 0432070E v_mul_f32_e32 v3, s16, v25 ; 10063210 v_mad_f32 v3, v22, v2, v3 ; D2820003 040E0516 v_mul_f32_e32 v12, v29, v18 ; 1018251D v_mad_f32 v32, v14, v3, v12 ; D2820020 0432070E v_mul_f32_e32 v3, s16, v24 ; 10063010 v_mad_f32 v2, v21, v2, v3 ; D2820002 040E0515 v_mul_f32_e32 v3, v28, v18 ; 1006251C v_mad_f32 v31, v14, v2, v3 ; D282001F 040E050E image_sample v[18:20], 7, 0, 0, 0, 0, 0, 0, 0, v[31:34], s[36:43], s[32:35] ; F0800700 0109121F v_mad_f32 v2, -v16, v14, v14 ; D2820002 243A1D10 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v12, v32, v3 ; 10180720 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v2, v19, v12 ; D282000C 04322702 v_sub_f32_e32 v13, s9, v12 ; 081A1809 v_subrev_f32_e32 v10, s10, v10 ; 0A14140A v_subrev_f32_e32 v11, s11, v11 ; 0A16160B v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mad_f32 v10, v10, v10, v11 ; D282000A 042E150A v_subrev_f32_e32 v9, s8, v9 ; 0A121208 v_mad_f32 v10, v9, v9, v10 ; D282000A 042A1309 v_mul_f32_e32 v10, s2, v10 ; 10141402 v_mul_f32_e32 v11, s3, v9 ; 10161203 v_mul_f32_e32 v11, 0x3fb8aa65, v11 ; 101616FF 3FB8AA65 v_exp_f32_e32 v11, v11 ; 7E164B0B v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_mul_f32_e32 v10, v10, v11 ; 1014170A v_rcp_f32_e32 v9, v9 ; 7E125509 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mul_f32_e32 v9, 0x3fb8aa65, v9 ; 101212FF 3FB8AA65 v_exp_f32_e32 v9, v9 ; 7E124B09 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_mad_f32 v0, 0.5, v8, 0.5 ; D2820000 03C210F0 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[20:27], s[12:15] ; F0800100 00650000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mad_f32 v1, v0, v13, v12 ; D2820001 04321B00 v_mul_f32_e32 v8, v31, v3 ; 1010071F v_mad_f32 v8, v2, v18, v8 ; D2820008 04222502 v_sub_f32_e32 v9, s1, v8 ; 08121001 v_mad_f32 v8, v0, v9, v8 ; D2820008 04221300 v_cvt_pkrtz_f16_f32_e32 v1, v8, v1 ; 5E020308 v_mul_f32_e32 v3, v33, v3 ; 10060721 v_mad_f32 v2, v2, v20, v3 ; D2820002 040E2902 v_sub_f32_e32 v3, s0, v2 ; 08060400 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..104] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} IMM[2] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+9] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[3] 112: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 113: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 114: RSQ TEMP[0].x, TEMP[0].xxxx 115: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[3] 116: MOV TEMP[0].xyz, TEMP[0].xyzx 117: RCP TEMP[4].x, TEMP[1].wwww 118: MOV TEMP[2].w, TEMP[4].xxxx 119: MUL TEMP[4].xy, TEMP[1], TEMP[4].xxxx 120: MOV TEMP[4].xy, TEMP[4].xyxx 121: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 122: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 123: MOV TEMP[5].zw, TEMP[5].wwzw 124: MUL TEMP[6].xy, TEMP[2].yyyy, CONST[5] 125: MOV TEMP[3].xy, TEMP[6].xyxx 126: MAD TEMP[6].xy, TEMP[2].xxxx, CONST[4], TEMP[3] 127: MOV TEMP[3].xy, TEMP[6].xyxx 128: MAD TEMP[6].xy, TEMP[2].zzzz, CONST[6], TEMP[3] 129: MOV TEMP[3].xy, TEMP[6].xyxx 130: ADD TEMP[6].xy, TEMP[3], CONST[7] 131: MOV TEMP[3].xy, TEMP[6].xyxx 132: MAD TEMP[3].xy, TEMP[3], IMM[2].xyzz, IMM[2].yyyy 133: MOV TEMP[3].xy, TEMP[3].xyxx 134: MOV TEMP[5].xy, IN[4].xyxx 135: MOV TEMP[2].xyz, TEMP[2].xyzx 136: MOV TEMP[2].w, IMM[0].yyyy 137: MOV TEMP[4].zw, IMM[0].yyzy 138: MOV TEMP[3].zw, IMM[0].yyzy 139: MOV OUT[2], TEMP[5] 140: MOV OUT[3], TEMP[2] 141: MOV OUT[0], TEMP[1] 142: MOV OUT[1], TEMP[0] 143: MOV OUT[4], TEMP[4] 144: MOV OUT[5], TEMP[3] 145: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0 %57 = add i32 %5, %7 %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %57) %59 = extractelement <4 x float> %58, i32 0 %60 = extractelement <4 x float> %58, i32 1 %61 = extractelement <4 x float> %58, i32 2 %62 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = fmul float 3.000000e+00, %59 %76 = fmul float %45, 1.000000e+00 %77 = fadd float %76, 0.000000e+00 %78 = fmul float %46, 1.000000e+00 %79 = fadd float %78, 0.000000e+00 %80 = fmul float %47, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %45, 0.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = fptosi float %75 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 144 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %86, 4 %91 = add i32 %90, 148 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %86, 4 %94 = add i32 %93, 152 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %86, 4 %97 = add i32 %96, 156 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fmul float %77, %89 %100 = fmul float %79, %92 %101 = fadd float %99, %100 %102 = fmul float %81, %95 %103 = fadd float %101, %102 %104 = fmul float %83, %98 %105 = fadd float %103, %104 %106 = fptosi float %75 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = add i32 1, %108 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = shl i32 %111, 4 %113 = add i32 %112, 144 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = shl i32 %111, 4 %116 = add i32 %115, 148 %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %116) %118 = shl i32 %111, 4 %119 = add i32 %118, 152 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %111, 4 %122 = add i32 %121, 156 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = fmul float %77, %114 %125 = fmul float %79, %117 %126 = fadd float %124, %125 %127 = fmul float %81, %120 %128 = fadd float %126, %127 %129 = fmul float %83, %123 %130 = fadd float %128, %129 %131 = fptosi float %75 to i32 %132 = bitcast i32 %131 to float %133 = bitcast float %132 to i32 %134 = add i32 2, %133 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = shl i32 %136, 4 %138 = add i32 %137, 144 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = shl i32 %136, 4 %141 = add i32 %140, 148 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = shl i32 %136, 4 %144 = add i32 %143, 152 %145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %144) %146 = shl i32 %136, 4 %147 = add i32 %146, 156 %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %147) %149 = fmul float %77, %139 %150 = fmul float %79, %142 %151 = fadd float %149, %150 %152 = fmul float %81, %145 %153 = fadd float %151, %152 %154 = fmul float %83, %148 %155 = fadd float %153, %154 %156 = fmul float %105, %52 %157 = fmul float %130, %52 %158 = fmul float %155, %52 %159 = fptosi float %75 to i32 %160 = bitcast i32 %159 to float %161 = bitcast float %160 to i32 %162 = shl i32 %161, 4 %163 = add i32 %162, 144 %164 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %163) %165 = shl i32 %161, 4 %166 = add i32 %165, 148 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = shl i32 %161, 4 %169 = add i32 %168, 152 %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %169) %171 = fmul float %66, %164 %172 = fmul float %67, %167 %173 = fadd float %172, %171 %174 = fmul float %68, %170 %175 = fadd float %173, %174 %176 = fptosi float %75 to i32 %177 = bitcast i32 %176 to float %178 = bitcast float %177 to i32 %179 = add i32 1, %178 %180 = bitcast i32 %179 to float %181 = bitcast float %180 to i32 %182 = shl i32 %181, 4 %183 = add i32 %182, 144 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = shl i32 %181, 4 %186 = add i32 %185, 148 %187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %186) %188 = shl i32 %181, 4 %189 = add i32 %188, 152 %190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %189) %191 = fmul float %66, %184 %192 = fmul float %67, %187 %193 = fadd float %192, %191 %194 = fmul float %68, %190 %195 = fadd float %193, %194 %196 = fptosi float %75 to i32 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = add i32 2, %198 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = shl i32 %201, 4 %203 = add i32 %202, 144 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %201, 4 %206 = add i32 %205, 148 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %201, 4 %209 = add i32 %208, 152 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %66, %204 %212 = fmul float %67, %207 %213 = fadd float %212, %211 %214 = fmul float %68, %210 %215 = fadd float %213, %214 %216 = fmul float %175, %52 %217 = fmul float %195, %52 %218 = fmul float %215, %52 %219 = fcmp olt float 0.000000e+00, %53 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 br i1 %223, label %IF, label %ENDIF IF: ; preds = %main_body %224 = fmul float 3.000000e+00, %60 %225 = fptosi float %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = shl i32 %227, 4 %229 = add i32 %228, 144 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = shl i32 %227, 4 %232 = add i32 %231, 148 %233 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %232) %234 = shl i32 %227, 4 %235 = add i32 %234, 152 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = shl i32 %227, 4 %238 = add i32 %237, 156 %239 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %238) %240 = fmul float %77, %230 %241 = fmul float %79, %233 %242 = fadd float %240, %241 %243 = fmul float %81, %236 %244 = fadd float %242, %243 %245 = fmul float %83, %239 %246 = fadd float %244, %245 %247 = fptosi float %224 to i32 %248 = bitcast i32 %247 to float %249 = bitcast float %248 to i32 %250 = add i32 1, %249 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 144 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = shl i32 %252, 4 %257 = add i32 %256, 148 %258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %257) %259 = shl i32 %252, 4 %260 = add i32 %259, 152 %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %260) %262 = shl i32 %252, 4 %263 = add i32 %262, 156 %264 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %263) %265 = fmul float %77, %255 %266 = fmul float %79, %258 %267 = fadd float %265, %266 %268 = fmul float %81, %261 %269 = fadd float %267, %268 %270 = fmul float %83, %264 %271 = fadd float %269, %270 %272 = fptosi float %224 to i32 %273 = bitcast i32 %272 to float %274 = bitcast float %273 to i32 %275 = add i32 2, %274 %276 = bitcast i32 %275 to float %277 = bitcast float %276 to i32 %278 = shl i32 %277, 4 %279 = add i32 %278, 144 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = shl i32 %277, 4 %282 = add i32 %281, 148 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = shl i32 %277, 4 %285 = add i32 %284, 152 %286 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %285) %287 = shl i32 %277, 4 %288 = add i32 %287, 156 %289 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %288) %290 = fmul float %77, %280 %291 = fmul float %79, %283 %292 = fadd float %290, %291 %293 = fmul float %81, %286 %294 = fadd float %292, %293 %295 = fmul float %83, %289 %296 = fadd float %294, %295 %297 = fmul float %53, %246 %298 = fadd float %297, %156 %299 = fmul float %53, %271 %300 = fadd float %299, %157 %301 = fmul float %53, %296 %302 = fadd float %301, %158 %303 = fptosi float %224 to i32 %304 = bitcast i32 %303 to float %305 = bitcast float %304 to i32 %306 = shl i32 %305, 4 %307 = add i32 %306, 144 %308 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %307) %309 = shl i32 %305, 4 %310 = add i32 %309, 148 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = shl i32 %305, 4 %313 = add i32 %312, 152 %314 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %313) %315 = fmul float %66, %308 %316 = fmul float %67, %311 %317 = fadd float %316, %315 %318 = fmul float %68, %314 %319 = fadd float %317, %318 %320 = fptosi float %224 to i32 %321 = bitcast i32 %320 to float %322 = bitcast float %321 to i32 %323 = add i32 1, %322 %324 = bitcast i32 %323 to float %325 = bitcast float %324 to i32 %326 = shl i32 %325, 4 %327 = add i32 %326, 144 %328 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %327) %329 = shl i32 %325, 4 %330 = add i32 %329, 148 %331 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %330) %332 = shl i32 %325, 4 %333 = add i32 %332, 152 %334 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %333) %335 = fmul float %66, %328 %336 = fmul float %67, %331 %337 = fadd float %336, %335 %338 = fmul float %68, %334 %339 = fadd float %337, %338 %340 = fptosi float %224 to i32 %341 = bitcast i32 %340 to float %342 = bitcast float %341 to i32 %343 = add i32 2, %342 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 144 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = shl i32 %345, 4 %350 = add i32 %349, 148 %351 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %350) %352 = shl i32 %345, 4 %353 = add i32 %352, 152 %354 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %353) %355 = fmul float %66, %348 %356 = fmul float %67, %351 %357 = fadd float %356, %355 %358 = fmul float %68, %354 %359 = fadd float %357, %358 %360 = fmul float %53, %319 %361 = fadd float %360, %216 %362 = fmul float %53, %339 %363 = fadd float %362, %217 %364 = fmul float %53, %359 %365 = fadd float %364, %218 %366 = fcmp olt float 0.000000e+00, %54 %367 = sext i1 %366 to i32 %368 = bitcast i32 %367 to float %369 = bitcast float %368 to i32 %370 = icmp ne i32 %369, 0 br i1 %370, label %IF70, label %ENDIF ENDIF: ; preds = %IF70, %IF, %main_body %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %428, %IF70 ], [ %224, %IF ] %temp8.0 = phi float [ %156, %main_body ], [ %502, %IF70 ], [ %298, %IF ] %temp9.0 = phi float [ %157, %main_body ], [ %504, %IF70 ], [ %300, %IF ] %temp10.0 = phi float [ %158, %main_body ], [ %506, %IF70 ], [ %302, %IF ] %temp12.0 = phi float [ %216, %main_body ], [ %565, %IF70 ], [ %361, %IF ] %temp13.0 = phi float [ %217, %main_body ], [ %567, %IF70 ], [ %363, %IF ] %temp14.0 = phi float [ %218, %main_body ], [ %569, %IF70 ], [ %365, %IF ] %371 = fmul float %temp9.0, %17 %372 = fmul float %temp9.0, %18 %373 = fmul float %temp9.0, %19 %374 = fmul float %temp9.0, %20 %375 = fmul float %temp8.0, %13 %376 = fadd float %375, %371 %377 = fmul float %temp8.0, %14 %378 = fadd float %377, %372 %379 = fmul float %temp8.0, %15 %380 = fadd float %379, %373 %381 = fmul float %temp8.0, %16 %382 = fadd float %381, %374 %383 = fmul float %temp10.0, %21 %384 = fadd float %383, %376 %385 = fmul float %temp10.0, %22 %386 = fadd float %385, %378 %387 = fmul float %temp10.0, %23 %388 = fadd float %387, %380 %389 = fmul float %temp10.0, %24 %390 = fadd float %389, %382 %391 = fadd float %384, %25 %392 = fadd float %386, %26 %393 = fadd float %388, %27 %394 = fadd float %390, %28 %395 = fmul float %temp12.0, %temp12.0 %396 = fmul float %temp13.0, %temp13.0 %397 = fadd float %396, %395 %398 = fmul float %temp14.0, %temp14.0 %399 = fadd float %397, %398 %400 = call float @llvm.maxnum.f32(float %399, float 0x3E7AD7F2A0000000) %401 = call float @llvm.AMDGPU.rsq.clamped.f32(float %400) %402 = fmul float %401, %temp12.0 %403 = fmul float %401, %temp13.0 %404 = fmul float %401, %temp14.0 %405 = fdiv float 1.000000e+00, %394 %406 = fmul float %391, %405 %407 = fmul float %392, %405 %408 = fadd float %temp8.0, %39 %409 = fadd float %temp9.0, %40 %410 = fmul float %408, %37 %411 = fmul float %409, %38 %412 = fmul float %temp9.0, %31 %413 = fmul float %temp9.0, %32 %414 = fmul float %temp8.0, %29 %415 = fadd float %414, %412 %416 = fmul float %temp8.0, %30 %417 = fadd float %416, %413 %418 = fmul float %temp10.0, %33 %419 = fadd float %418, %415 %420 = fmul float %temp10.0, %34 %421 = fadd float %420, %417 %422 = fadd float %419, %35 %423 = fadd float %421, %36 %424 = fmul float %422, 5.000000e-01 %425 = fadd float %424, -5.000000e-01 %426 = fmul float %423, -5.000000e-01 %427 = fadd float %426, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %402, float %403, float %404, float %temp3.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %410, float %411) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %406, float %407, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %425, float %427, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %391, float %392, float %393, float %394) ret void IF70: ; preds = %IF %428 = fmul float 3.000000e+00, %61 %429 = fptosi float %428 to i32 %430 = bitcast i32 %429 to float %431 = bitcast float %430 to i32 %432 = shl i32 %431, 4 %433 = add i32 %432, 144 %434 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %433) %435 = shl i32 %431, 4 %436 = add i32 %435, 148 %437 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %436) %438 = shl i32 %431, 4 %439 = add i32 %438, 152 %440 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %439) %441 = shl i32 %431, 4 %442 = add i32 %441, 156 %443 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %442) %444 = fmul float %77, %434 %445 = fmul float %79, %437 %446 = fadd float %444, %445 %447 = fmul float %81, %440 %448 = fadd float %446, %447 %449 = fmul float %83, %443 %450 = fadd float %448, %449 %451 = fptosi float %428 to i32 %452 = bitcast i32 %451 to float %453 = bitcast float %452 to i32 %454 = add i32 1, %453 %455 = bitcast i32 %454 to float %456 = bitcast float %455 to i32 %457 = shl i32 %456, 4 %458 = add i32 %457, 144 %459 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %458) %460 = shl i32 %456, 4 %461 = add i32 %460, 148 %462 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %461) %463 = shl i32 %456, 4 %464 = add i32 %463, 152 %465 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %464) %466 = shl i32 %456, 4 %467 = add i32 %466, 156 %468 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %467) %469 = fmul float %77, %459 %470 = fmul float %79, %462 %471 = fadd float %469, %470 %472 = fmul float %81, %465 %473 = fadd float %471, %472 %474 = fmul float %83, %468 %475 = fadd float %473, %474 %476 = fptosi float %428 to i32 %477 = bitcast i32 %476 to float %478 = bitcast float %477 to i32 %479 = add i32 2, %478 %480 = bitcast i32 %479 to float %481 = bitcast float %480 to i32 %482 = shl i32 %481, 4 %483 = add i32 %482, 144 %484 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %483) %485 = shl i32 %481, 4 %486 = add i32 %485, 148 %487 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %486) %488 = shl i32 %481, 4 %489 = add i32 %488, 152 %490 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %489) %491 = shl i32 %481, 4 %492 = add i32 %491, 156 %493 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %492) %494 = fmul float %77, %484 %495 = fmul float %79, %487 %496 = fadd float %494, %495 %497 = fmul float %81, %490 %498 = fadd float %496, %497 %499 = fmul float %83, %493 %500 = fadd float %498, %499 %501 = fmul float %54, %450 %502 = fadd float %501, %298 %503 = fmul float %54, %475 %504 = fadd float %503, %300 %505 = fmul float %54, %500 %506 = fadd float %505, %302 %507 = fptosi float %428 to i32 %508 = bitcast i32 %507 to float %509 = bitcast float %508 to i32 %510 = shl i32 %509, 4 %511 = add i32 %510, 144 %512 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %511) %513 = shl i32 %509, 4 %514 = add i32 %513, 148 %515 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %514) %516 = shl i32 %509, 4 %517 = add i32 %516, 152 %518 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %517) %519 = fmul float %66, %512 %520 = fmul float %67, %515 %521 = fadd float %520, %519 %522 = fmul float %68, %518 %523 = fadd float %521, %522 %524 = fptosi float %428 to i32 %525 = bitcast i32 %524 to float %526 = bitcast float %525 to i32 %527 = add i32 1, %526 %528 = bitcast i32 %527 to float %529 = bitcast float %528 to i32 %530 = shl i32 %529, 4 %531 = add i32 %530, 144 %532 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %531) %533 = shl i32 %529, 4 %534 = add i32 %533, 148 %535 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %534) %536 = shl i32 %529, 4 %537 = add i32 %536, 152 %538 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %537) %539 = fmul float %66, %532 %540 = fmul float %67, %535 %541 = fadd float %540, %539 %542 = fmul float %68, %538 %543 = fadd float %541, %542 %544 = fptosi float %428 to i32 %545 = bitcast i32 %544 to float %546 = bitcast float %545 to i32 %547 = add i32 2, %546 %548 = bitcast i32 %547 to float %549 = bitcast float %548 to i32 %550 = shl i32 %549, 4 %551 = add i32 %550, 144 %552 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %551) %553 = shl i32 %549, 4 %554 = add i32 %553, 148 %555 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %554) %556 = shl i32 %549, 4 %557 = add i32 %556, 152 %558 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %557) %559 = fmul float %66, %552 %560 = fmul float %67, %555 %561 = fadd float %560, %559 %562 = fmul float %68, %558 %563 = fadd float %561, %562 %564 = fmul float %54, %523 %565 = fadd float %564, %361 %566 = fmul float %54, %543 %567 = fadd float %566, %363 %568 = fmul float %54, %563 %569 = fadd float %568, %365 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v11, s10, v0 ; 4A16000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[4:7], s[8:9], 0x10 ; C0820910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[20:23], v11, s[20:23], 0 idxen ; E00C2000 8005140B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v20 ; 100028FF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v3, 4, v0 ; 34060084 v_add_i32_e32 v0, 0xb0, v3 ; 4A0006FF 000000B0 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_format_xyzw v[15:18], v11, s[24:27], 0 idxen ; E00C2000 80060F0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v0, v15 ; 10021F00 v_add_i32_e32 v2, 0xb4, v3 ; 4A0406FF 000000B4 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v16, v2, v1 ; D2820001 04060510 v_add_i32_e32 v4, 0xb8, v3 ; 4A0806FF 000000B8 buffer_load_dword v12, v4, s[0:3], 0 offen ; E0301000 80000C04 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v17, v12, v1 ; D2820001 04061911 buffer_load_format_xyzw v[7:10], v11, s[16:19], 0 idxen ; E00C2000 8004070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_i32_e32 v4, 0xa0, v3 ; 4A0806FF 000000A0 buffer_load_dword v13, v4, s[0:3], 0 offen ; E0301000 80000D04 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v13, v15 ; 10081F0D v_add_i32_e32 v5, 0xa4, v3 ; 4A0A06FF 000000A4 buffer_load_dword v14, v5, s[0:3], 0 offen ; E0301000 80000E05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v16, v14, v4 ; D2820004 04121D10 v_add_i32_e32 v5, 0xa8, v3 ; 4A0A06FF 000000A8 buffer_load_dword v27, v5, s[0:3], 0 offen ; E0301000 80001B05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v17, v27, v4 ; D2820004 04123711 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_add_i32_e32 v5, 0x90, v3 ; 4A0A06FF 00000090 buffer_load_dword v28, v5, s[0:3], 0 offen ; E0301000 80001C05 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v28, v15 ; 100A1F1C v_add_i32_e32 v6, 0x94, v3 ; 4A0C06FF 00000094 buffer_load_dword v29, v6, s[0:3], 0 offen ; E0301000 80001D06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v16, v29, v5 ; D2820005 04163B10 v_add_i32_e32 v6, 0x98, v3 ; 4A0C06FF 00000098 buffer_load_dword v30, v6, s[0:3], 0 offen ; E0301000 80001E06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v17, v30, v5 ; D2820005 04163D11 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 buffer_load_format_xyzw v[31:34], v11, s[12:15], 0 idxen ; E00C2000 80031F0B v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v19, 0, v32 ; 06264080 v_mul_f32_e32 v2, v2, v19 ; 10042702 v_add_f32_e32 v24, 0, v31 ; 06303E80 v_mad_f32 v0, v24, v0, v2 ; D2820000 040A0118 v_add_f32_e32 v26, 0, v33 ; 06344280 v_mad_f32 v0, v26, v12, v0 ; D2820000 0402191A v_mad_f32 v25, 0, v31, 1.0 ; D2820019 03CA3E80 v_add_i32_e32 v2, 0xbc, v3 ; 4A0406FF 000000BC buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v25, v2, v0 ; D2820000 04020519 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mul_f32_e32 v2, v14, v19 ; 1004270E v_mad_f32 v2, v24, v13, v2 ; D2820002 040A1B18 v_mad_f32 v2, v26, v27, v2 ; D2820002 040A371A v_add_i32_e32 v12, 0xac, v3 ; 4A1806FF 000000AC buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v25, v12, v2 ; D2820002 040A1919 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v12, v29, v19 ; 1018271D v_mad_f32 v12, v24, v28, v12 ; D282000C 04323918 v_mad_f32 v12, v26, v30, v12 ; D282000C 04323D1A v_add_i32_e32 v3, 0x9c, v3 ; 4A0606FF 0000009C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v25, v3, v12 ; D2820003 04320719 v_mul_f32_e32 v3, v7, v3 ; 10060707 buffer_load_format_xyzw v[11:14], v11, s[4:7], 0 idxen ; E00C2000 80010B0B v_cmp_gt_f32_e64 s[4:5], v8, 0 ; D0080004 00010108 v_cndmask_b32_e64 v27, 0, -1, s[4:5] ; D200081B 00118280 v_cmp_ne_i32_e64 s[32:33], v27, 0 ; D10A0020 0001011B s_buffer_load_dword s20, s[0:3], 0x23 ; C20A0123 s_buffer_load_dword s19, s[0:3], 0x22 ; C2098122 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s17, s[0:3], 0xf ; C208810F s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s24, s[0:3], 0xd ; C20C010D s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s23, s[0:3], 0xb ; C20B810B s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s26, s[0:3], 0x9 ; C20D0109 s_buffer_load_dword s18, s[0:3], 0x8 ; C2090108 s_buffer_load_dword s27, s[0:3], 0x7 ; C20D8107 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s25, s[0:3], 0x4 ; C20C8104 s_buffer_load_dword s29, s[0:3], 0x3 ; C20E8103 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s31, s[0:3], 0x1 ; C20F8101 s_buffer_load_dword s28, s[0:3], 0x0 ; C20E0100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v6, 0x40400000, v21 ; 100C2AFF 40400000 v_cvt_i32_f32_e32 v27, v6 ; 7E361106 v_lshlrev_b32_e32 v27, 4, v27 ; 34363684 v_add_i32_e32 v28, 0xb4, v27 ; 4A3836FF 000000B4 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_add_i32_e32 v29, 0xb0, v27 ; 4A3A36FF 000000B0 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v30, v29, v15 ; 103C1F1D v_mad_f32 v30, v16, v28, v30 ; D282001E 047A3910 v_add_i32_e32 v31, 0xb8, v27 ; 4A3E36FF 000000B8 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, v17, v31, v30 ; D282001E 047A3F11 v_mad_f32 v1, v8, v30, v1 ; D2820001 04063D08 v_add_i32_e32 v30, 0xa4, v27 ; 4A3C36FF 000000A4 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E v_add_i32_e32 v32, 0xa0, v27 ; 4A4036FF 000000A0 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, v32, v15 ; 10421F20 v_mad_f32 v33, v16, v30, v33 ; D2820021 04863D10 v_add_i32_e32 v34, 0xa8, v27 ; 4A4436FF 000000A8 buffer_load_dword v34, v34, s[0:3], 0 offen ; E0301000 80002222 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, v17, v34, v33 ; D2820021 04864511 v_mad_f32 v4, v8, v33, v4 ; D2820004 04124308 v_add_i32_e32 v33, 0x94, v27 ; 4A4236FF 00000094 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 v_add_i32_e32 v35, 0x90, v27 ; 4A4636FF 00000090 buffer_load_dword v35, v35, s[0:3], 0 offen ; E0301000 80002323 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, v35, v15 ; 10481F23 v_mad_f32 v36, v16, v33, v36 ; D2820024 04924310 v_add_i32_e32 v37, 0x98, v27 ; 4A4A36FF 00000098 buffer_load_dword v37, v37, s[0:3], 0 offen ; E0301000 80002525 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v36, v17, v37, v36 ; D2820024 04924B11 v_mad_f32 v5, v8, v36, v5 ; D2820005 04164908 v_mul_f32_e32 v28, v28, v19 ; 1038271C v_mad_f32 v28, v24, v29, v28 ; D282001C 04723B18 v_mad_f32 v28, v26, v31, v28 ; D282001C 04723F1A v_add_i32_e32 v29, 0xbc, v27 ; 4A3A36FF 000000BC buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v25, v29, v28 ; D282001C 04723B19 v_mad_f32 v0, v8, v28, v0 ; D2820000 04023908 v_mul_f32_e32 v28, v30, v19 ; 1038271E v_mad_f32 v28, v24, v32, v28 ; D282001C 04724118 v_mad_f32 v28, v26, v34, v28 ; D282001C 0472451A v_add_i32_e32 v29, 0xac, v27 ; 4A3A36FF 000000AC buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v25, v29, v28 ; D282001C 04723B19 v_mad_f32 v2, v8, v28, v2 ; D2820002 040A3908 v_mul_f32_e32 v28, v33, v19 ; 10382721 v_mad_f32 v28, v24, v35, v28 ; D282001C 04724718 v_mad_f32 v28, v26, v37, v28 ; D282001C 04724B1A v_add_i32_e32 v27, 0x9c, v27 ; 4A3636FF 0000009C buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v25, v27, v28 ; D282001B 04723719 v_mad_f32 v3, v8, v27, v3 ; D2820003 040E3708 v_cmp_gt_f32_e64 s[34:35], v9, 0 ; D0080022 00010109 v_cndmask_b32_e64 v27, 0, -1, s[34:35] ; D200081B 00898280 v_cmp_ne_i32_e64 s[34:35], v27, 0 ; D10A0022 0001011B s_and_saveexec_b64 s[34:35], s[34:35] ; BEA22422 s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v6, 0x40400000, v22 ; 100C2CFF 40400000 v_cvt_i32_f32_e32 v20, v6 ; 7E281106 v_lshlrev_b32_e32 v20, 4, v20 ; 34282884 v_add_i32_e32 v21, 0xb4, v20 ; 4A2A28FF 000000B4 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 v_add_i32_e32 v22, 0xb0, v20 ; 4A2C28FF 000000B0 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v23, v22, v15 ; 102E1F16 v_mad_f32 v23, v16, v21, v23 ; D2820017 045E2B10 v_add_i32_e32 v27, 0xb8, v20 ; 4A3628FF 000000B8 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, v17, v27, v23 ; D2820017 045E3711 v_mad_f32 v1, v9, v23, v1 ; D2820001 04062F09 v_add_i32_e32 v23, 0xa4, v20 ; 4A2E28FF 000000A4 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_add_i32_e32 v28, 0xa0, v20 ; 4A3828FF 000000A0 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v28, v15 ; 103A1F1C v_mad_f32 v29, v16, v23, v29 ; D282001D 04762F10 v_add_i32_e32 v30, 0xa8, v20 ; 4A3C28FF 000000A8 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, v17, v30, v29 ; D282001D 04763D11 v_mad_f32 v4, v9, v29, v4 ; D2820004 04123B09 v_add_i32_e32 v29, 0x94, v20 ; 4A3A28FF 00000094 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_add_i32_e32 v31, 0x90, v20 ; 4A3E28FF 00000090 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v31, v15 ; 10401F1F v_mad_f32 v32, v16, v29, v32 ; D2820020 04823B10 v_add_i32_e32 v33, 0x98, v20 ; 4A4228FF 00000098 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v17, v33, v32 ; D282000F 04824311 v_mad_f32 v5, v9, v15, v5 ; D2820005 04161F09 v_mul_f32_e32 v15, v21, v19 ; 101E2715 v_mad_f32 v15, v24, v22, v15 ; D282000F 043E2D18 v_mad_f32 v15, v26, v27, v15 ; D282000F 043E371A v_add_i32_e32 v16, 0xbc, v20 ; 4A2028FF 000000BC buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v0, v9, v15, v0 ; D2820000 04021F09 v_mul_f32_e32 v15, v23, v19 ; 101E2717 v_mad_f32 v15, v24, v28, v15 ; D282000F 043E3918 v_mad_f32 v15, v26, v30, v15 ; D282000F 043E3D1A v_add_i32_e32 v16, 0xac, v20 ; 4A2028FF 000000AC buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v2, v9, v15, v2 ; D2820002 040A1F09 v_mul_f32_e32 v15, v29, v19 ; 101E271D v_mad_f32 v15, v24, v31, v15 ; D282000F 043E3F18 v_mad_f32 v15, v26, v33, v15 ; D282000F 043E431A v_add_i32_e32 v16, 0x9c, v20 ; 4A2028FF 0000009C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v3, v9, v15, v3 ; D2820003 040E1F09 s_or_b64 exec, exec, s[34:35] ; 88FE227E s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mul_f32_e32 v7, v5, v5 ; 100E0B05 v_mad_f32 v7, v4, v4, v7 ; D2820007 041E0904 v_mad_f32 v7, v1, v1, v7 ; D2820007 041E0301 v_max_f32_e32 v7, 0x33d6bf95, v7 ; 200E0EFF 33D6BF95 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v5, v5, v7 ; 100A0F05 exp 15, 32, 0, 0, 0, v5, v4, v1, v6 ; F800020F 06010405 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, s20, v2 ; 06020414 v_mul_f32_e32 v1, s22, v1 ; 10020216 v_add_f32_e32 v4, s19, v3 ; 06080613 v_mul_f32_e32 v4, s21, v4 ; 10080815 exp 15, 33, 0, 0, 0, v11, v12, v4, v1 ; F800021F 01040C0B s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 34, 0, 0, 0, v3, v2, v0, v1 ; F800022F 01000203 v_mul_f32_e32 v4, s30, v2 ; 1008041E v_mad_f32 v4, v3, s31, v4 ; D2820004 04103F03 v_mad_f32 v4, v0, s26, v4 ; D2820004 04103500 v_add_f32_e32 v4, s24, v4 ; 06080818 v_mul_f32_e32 v5, s27, v2 ; 100A041B v_mad_f32 v5, v3, s29, v5 ; D2820005 04143B03 v_mad_f32 v5, v0, s23, v5 ; D2820005 04142F00 v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v6, v5 ; 7E0C5505 v_mul_f32_e32 v7, v6, v4 ; 100E0906 v_mul_f32_e32 v8, s25, v2 ; 10100419 v_mad_f32 v8, v3, s28, v8 ; D2820008 04203903 v_mad_f32 v8, v0, s18, v8 ; D2820008 04202500 v_add_f32_e32 v8, s16, v8 ; 06101010 v_mul_f32_e32 v6, v6, v8 ; 100C1106 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 35, 0, 0, 0, v6, v7, v9, v1 ; F800023F 01090706 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v6, s12, v2 ; 100C040C v_mad_f32 v6, v3, s13, v6 ; D2820006 04181B03 v_mad_f32 v6, v0, s11, v6 ; D2820006 04181700 v_add_f32_e32 v6, s7, v6 ; 060C0C07 v_mad_f32 v6, 0.5, v6, -0.5 ; D2820006 03C60CF0 v_mul_f32_e32 v7, s8, v2 ; 100E0408 v_mad_f32 v7, v3, s9, v7 ; D2820007 041C1303 v_mad_f32 v7, v0, s5, v7 ; D2820007 041C0B00 v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mad_f32 v7, -0.5, v7, -0.5 ; D2820007 03C60EF1 exp 15, 36, 0, 0, 0, v6, v7, v9, v1 ; F800024F 01090706 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s14, v2 ; 1002040E v_mad_f32 v1, v3, s15, v1 ; D2820001 04041F03 v_mad_f32 v0, v0, s10, v1 ; D2820000 04041500 v_add_f32_e32 v0, s6, v0 ; 06000006 exp 15, 12, 0, 1, 0, v8, v4, v0, v5 ; F80008CF 05000408 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[6].x, TEMP[6].xxxx, TEMP[5].yyyy 14: UIF TEMP[6].xxxx :0 15: KILL 16: ENDIF 17: MOV TEMP[5].xyz, IN[0].xyzz 18: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 19: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 20: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 21: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 22: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 23: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 24: MOV TEMP[0].xyz, TEMP[1].xyzx 25: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 26: MOV TEMP[0].xyz, TEMP[1].xyzx 27: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 28: MOV TEMP[3].yzw, TEMP[1].zyzw 29: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 30: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 31: RSQ TEMP[5].x, TEMP[5].xxxx 32: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 33: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 34: MOV TEMP[5].w, IMM[0].wwww 35: MOV TEMP[5].x, TEMP[3].yyyy 36: MOV TEMP[5].y, TEMP[3].zzzz 37: MOV TEMP[5].z, TEMP[3].wwww 38: DP4 TEMP[3].x, TEMP[5], TEMP[5] 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 41: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 42: MOV_SAT TEMP[3].x, TEMP[3].xxxx 43: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 44: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 45: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 46: MOV TEMP[6].xyz, TEMP[5].yzww 47: TEX TEMP[6], TEMP[6], SAMP[4], CUBE 48: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 49: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 50: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 51: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 52: MOV TEMP[4].w, TEMP[7].wwww 53: MOV TEMP[6].xy, IN[1].xyyy 54: TEX TEMP[6], TEMP[6], SAMP[1], 2D 55: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 56: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 57: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 58: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 59: ABS TEMP[6].x, TEMP[8].wwww 60: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 61: MOV TEMP[5].xyz, TEMP[5].yzww 62: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 63: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 64: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 65: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 66: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 67: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 68: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 69: ABS TEMP[3].x, TEMP[3].xxxx 70: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 71: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 72: MOV TEMP[0].w, TEMP[6].wwww 73: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 74: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 75: ABS TEMP[5].x, TEMP[3].xxxx 76: LG2 TEMP[4].x, TEMP[5].xxxx 77: ABS TEMP[5].x, TEMP[3].yyyy 78: LG2 TEMP[5].x, TEMP[5].xxxx 79: MOV TEMP[4].y, TEMP[5].xxxx 80: ABS TEMP[3].x, TEMP[3].zzzz 81: LG2 TEMP[3].x, TEMP[3].xxxx 82: MOV TEMP[4].z, TEMP[3].xxxx 83: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 84: EX2 TEMP[4].x, TEMP[3].xxxx 85: EX2 TEMP[5].x, TEMP[3].yyyy 86: MOV TEMP[4].y, TEMP[5].xxxx 87: EX2 TEMP[3].x, TEMP[3].zzzz 88: MOV TEMP[4].z, TEMP[3].xxxx 89: MOV TEMP[3].xyz, TEMP[4].xyzz 90: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 91: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 92: MOV TEMP[5].xy, TEMP[5].xyyy 93: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 94: MOV TEMP[1].w, TEMP[5].wwww 95: MOV TEMP[6].xy, IN[1].zwww 96: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 97: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 98: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 99: MOV TEMP[6].xyz, TEMP[3].yzww 100: TEX TEMP[6], TEMP[6], SAMP[9], 3D 101: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 102: MOV TEMP[1].xyz, TEMP[3].xyzx 103: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 104: MOV TEMP[0].xyz, TEMP[3].xyzx 105: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 106: MOV TEMP[4].z, TEMP[3].zzzz 107: MOV TEMP[4].xy, IN[2].xyxx 108: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 109: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 110: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 111: EX2 TEMP[5].x, TEMP[5].wwww 112: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 113: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 114: RCP TEMP[3].x, TEMP[3].wwww 115: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 116: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 117: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 118: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 119: EX2 TEMP[3].x, TEMP[3].wwww 120: MOV_SAT TEMP[3].x, TEMP[3].xxxx 121: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 122: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 123: MOV TEMP[4].x, TEMP[5].xxxx 124: MOV TEMP[4].y, CONST[4].wwww 125: MOV TEMP[4].xy, TEMP[4].xyyy 126: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 127: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 128: MOV TEMP[0].w, TEMP[3].wwww 129: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 130: MOV TEMP[0].xyz, TEMP[0].xyzx 131: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 132: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 133: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 134: UIF TEMP[3].xxxx :0 135: MOV TEMP[2].x, TEMP[2].xxxx 136: ELSE :0 137: MOV TEMP[2].x, TEMP[1].xxxx 138: ENDIF 139: MOV TEMP[0].w, TEMP[2].xxxx 140: MOV OUT[0], TEMP[0] 141: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = icmp ne i32 %128, 0 br i1 %129, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %130 = insertelement <4 x float> undef, float %78, i32 0 %131 = insertelement <4 x float> %130, float %79, i32 1 %132 = insertelement <4 x float> %131, float %80, i32 2 %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 3 %134 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %133) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = extractelement <4 x float> %134, i32 3 %139 = call float @fabs(float %137) %140 = fdiv float 1.000000e+00, %139 %141 = fmul float %135, %140 %142 = fadd float %141, 1.500000e+00 %143 = fmul float %136, %140 %144 = fadd float %143, 1.500000e+00 %145 = bitcast float %144 to i32 %146 = bitcast float %142 to i32 %147 = bitcast float %138 to i32 %148 = insertelement <4 x i32> undef, i32 %145, i32 0 %149 = insertelement <4 x i32> %148, i32 %146, i32 1 %150 = insertelement <4 x i32> %149, i32 %147, i32 2 %151 = insertelement <4 x i32> %150, i32 undef, i32 3 %152 = bitcast <8 x i32> %47 to <32 x i8> %153 = bitcast <4 x i32> %49 to <16 x i8> %154 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %151, <32 x i8> %152, <16 x i8> %153, i32 4) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = extractelement <4 x float> %154, i32 3 %159 = call float @llvm.pow.f32(float %155, float 0x40019999A0000000) %160 = call float @llvm.pow.f32(float %156, float 0x40019999A0000000) %161 = call float @llvm.pow.f32(float %157, float 0x40019999A0000000) %162 = call float @llvm.pow.f32(float %158, float 1.000000e+00) %163 = fmul float %102, %159 %164 = fmul float %103, %160 %165 = fmul float %104, %161 %166 = fadd float %163, %163 %167 = fadd float %164, %164 %168 = fadd float %165, %165 %169 = fsub float -0.000000e+00, %85 %170 = fadd float %24, %169 %171 = fsub float -0.000000e+00, %86 %172 = fadd float %25, %171 %173 = fsub float -0.000000e+00, %87 %174 = fadd float %26, %173 %175 = fmul float %170, %170 %176 = fmul float %172, %172 %177 = fadd float %176, %175 %178 = fmul float %174, %174 %179 = fadd float %177, %178 %180 = call float @llvm.maxnum.f32(float %179, float 0x3E7AD7F2A0000000) %181 = call float @llvm.AMDGPU.rsq.clamped.f32(float %180) %182 = fmul float %181, %170 %183 = fmul float %181, %172 %184 = fmul float %181, %174 %185 = fmul float %170, %181 %186 = fadd float %185, 0x3FAB15B580000000 %187 = fmul float %172, %181 %188 = fadd float %187, 0x3FEB126EA0000000 %189 = fmul float %174, %181 %190 = fadd float %189, 0x3FE0ED9160000000 %191 = fmul float %186, %186 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = fmul float %190, %190 %195 = fadd float %193, %194 %196 = fmul float 0.000000e+00, 0.000000e+00 %197 = fadd float %195, %196 %198 = call float @llvm.AMDGPU.rsq.clamped.f32(float %197) %199 = fmul float %186, %198 %200 = fmul float %188, %198 %201 = fmul float %190, %198 %202 = fmul float %199, %78 %203 = fmul float %200, %79 %204 = fadd float %203, %202 %205 = fmul float %201, %80 %206 = fadd float %204, %205 %207 = call float @llvm.AMDIL.clamp.(float %206, float 0.000000e+00, float 1.000000e+00) %208 = fmul float %78, %182 %209 = fmul float %79, %183 %210 = fadd float %209, %208 %211 = fmul float %80, %184 %212 = fadd float %210, %211 %213 = fadd float %212, %212 %214 = fsub float -0.000000e+00, %182 %215 = fmul float %213, %78 %216 = fadd float %215, %214 %217 = fsub float -0.000000e+00, %183 %218 = fmul float %213, %79 %219 = fadd float %218, %217 %220 = fsub float -0.000000e+00, %184 %221 = fmul float %213, %80 %222 = fadd float %221, %220 %223 = insertelement <4 x float> undef, float %216, i32 0 %224 = insertelement <4 x float> %223, float %219, i32 1 %225 = insertelement <4 x float> %224, float %222, i32 2 %226 = insertelement <4 x float> %225, float %162, i32 3 %227 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %226) %228 = extractelement <4 x float> %227, i32 0 %229 = extractelement <4 x float> %227, i32 1 %230 = extractelement <4 x float> %227, i32 2 %231 = extractelement <4 x float> %227, i32 3 %232 = call float @fabs(float %230) %233 = fdiv float 1.000000e+00, %232 %234 = fmul float %228, %233 %235 = fadd float %234, 1.500000e+00 %236 = fmul float %229, %233 %237 = fadd float %236, 1.500000e+00 %238 = bitcast float %237 to i32 %239 = bitcast float %235 to i32 %240 = bitcast float %231 to i32 %241 = insertelement <4 x i32> undef, i32 %238, i32 0 %242 = insertelement <4 x i32> %241, i32 %239, i32 1 %243 = insertelement <4 x i32> %242, i32 %240, i32 2 %244 = insertelement <4 x i32> %243, i32 undef, i32 3 %245 = bitcast <8 x i32> %55 to <32 x i8> %246 = bitcast <4 x i32> %57 to <16 x i8> %247 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %244, <32 x i8> %245, <16 x i8> %246, i32 4) %248 = extractelement <4 x float> %247, i32 0 %249 = extractelement <4 x float> %247, i32 1 %250 = extractelement <4 x float> %247, i32 2 %251 = call float @llvm.pow.f32(float %248, float 0x40019999A0000000) %252 = call float @llvm.pow.f32(float %249, float 0x40019999A0000000) %253 = call float @llvm.pow.f32(float %250, float 0x40019999A0000000) %254 = bitcast float %81 to i32 %255 = bitcast float %82 to i32 %256 = insertelement <2 x i32> undef, i32 %254, i32 0 %257 = insertelement <2 x i32> %256, i32 %255, i32 1 %258 = bitcast <8 x i32> %43 to <32 x i8> %259 = bitcast <4 x i32> %45 to <16 x i8> %260 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %257, <32 x i8> %258, <16 x i8> %259, i32 2) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = extractelement <4 x float> %260, i32 3 %265 = call float @llvm.pow.f32(float %261, float 0x40019999A0000000) %266 = call float @llvm.pow.f32(float %262, float 0x40019999A0000000) %267 = call float @llvm.pow.f32(float %263, float 0x40019999A0000000) %268 = call float @llvm.pow.f32(float %264, float 1.000000e+00) %269 = call float @fabs(float %268) %270 = call float @llvm.pow.f32(float %269, float 0x40019999A0000000) %271 = insertelement <4 x float> undef, float %216, i32 0 %272 = insertelement <4 x float> %271, float %219, i32 1 %273 = insertelement <4 x float> %272, float %222, i32 2 %274 = insertelement <4 x float> %273, float %222, i32 3 %275 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %274) %276 = extractelement <4 x float> %275, i32 0 %277 = extractelement <4 x float> %275, i32 1 %278 = extractelement <4 x float> %275, i32 2 %279 = extractelement <4 x float> %275, i32 3 %280 = call float @fabs(float %278) %281 = fdiv float 1.000000e+00, %280 %282 = fmul float %276, %281 %283 = fadd float %282, 1.500000e+00 %284 = fmul float %277, %281 %285 = fadd float %284, 1.500000e+00 %286 = bitcast float %285 to i32 %287 = bitcast float %283 to i32 %288 = bitcast float %279 to i32 %289 = insertelement <4 x i32> undef, i32 %286, i32 0 %290 = insertelement <4 x i32> %289, i32 %287, i32 1 %291 = insertelement <4 x i32> %290, i32 %288, i32 2 %292 = insertelement <4 x i32> %291, i32 undef, i32 3 %293 = bitcast <8 x i32> %51 to <32 x i8> %294 = bitcast <4 x i32> %53 to <16 x i8> %295 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %292, <32 x i8> %293, <16 x i8> %294, i32 4) %296 = extractelement <4 x float> %295, i32 0 %297 = extractelement <4 x float> %295, i32 1 %298 = extractelement <4 x float> %295, i32 2 %299 = call float @llvm.pow.f32(float %296, float 0x40019999A0000000) %300 = call float @llvm.pow.f32(float %297, float 0x40019999A0000000) %301 = call float @llvm.pow.f32(float %298, float 0x40019999A0000000) %302 = call float @llvm.AMDGPU.lrp(float %270, float %299, float %251) %303 = call float @llvm.AMDGPU.lrp(float %270, float %300, float %252) %304 = call float @llvm.AMDGPU.lrp(float %270, float %301, float %253) %305 = fmul float %270, 1.990000e+02 %306 = fadd float %305, 1.000000e+00 %307 = call float @fabs(float %207) %308 = call float @llvm.pow.f32(float %307, float %306) %309 = fmul float %306, 0x3FB99999A0000000 %310 = fmul float %309, %308 %311 = fadd float %310, %302 %312 = fmul float %309, %308 %313 = fadd float %312, %303 %314 = fmul float %309, %308 %315 = fadd float %314, %304 %316 = fmul float %311, %265 %317 = fadd float %316, %166 %318 = fmul float %313, %266 %319 = fadd float %318, %167 %320 = fmul float %315, %267 %321 = fadd float %320, %168 %322 = call float @fabs(float %317) %323 = call float @llvm.log2.f32(float %322) %324 = call float @fabs(float %319) %325 = call float @llvm.log2.f32(float %324) %326 = call float @fabs(float %321) %327 = call float @llvm.log2.f32(float %326) %328 = fmul float %323, 0x3FDD1743E0000000 %329 = fmul float %325, 0x3FDD1743E0000000 %330 = fmul float %327, 0x3FDD1743E0000000 %331 = call float @llvm.AMDIL.exp.(float %328) %332 = call float @llvm.AMDIL.exp.(float %329) %333 = call float @llvm.AMDIL.exp.(float %330) %334 = bitcast float %331 to i32 %335 = bitcast float %332 to i32 %336 = bitcast float %333 to i32 %337 = insertelement <4 x i32> undef, i32 %334, i32 0 %338 = insertelement <4 x i32> %337, i32 %335, i32 1 %339 = insertelement <4 x i32> %338, i32 %336, i32 2 %340 = insertelement <4 x i32> %339, i32 undef, i32 3 %341 = bitcast <8 x i32> %63 to <32 x i8> %342 = bitcast <4 x i32> %65 to <16 x i8> %343 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %340, <32 x i8> %341, <16 x i8> %342, i32 3) %344 = extractelement <4 x float> %343, i32 0 %345 = extractelement <4 x float> %343, i32 1 %346 = extractelement <4 x float> %343, i32 2 %347 = fmul float %89, 1.000000e+00 %348 = fadd float %347, 0.000000e+00 %349 = fmul float %90, -1.000000e+00 %350 = fadd float %349, 1.000000e+00 %351 = bitcast float %348 to i32 %352 = bitcast float %350 to i32 %353 = insertelement <2 x i32> undef, i32 %351, i32 0 %354 = insertelement <2 x i32> %353, i32 %352, i32 1 %355 = bitcast <8 x i32> %67 to <32 x i8> %356 = bitcast <4 x i32> %69 to <16 x i8> %357 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %354, <32 x i8> %355, <16 x i8> %356, i32 2) %358 = extractelement <4 x float> %357, i32 0 %359 = extractelement <4 x float> %357, i32 2 %360 = bitcast float %83 to i32 %361 = bitcast float %84 to i32 %362 = insertelement <2 x i32> undef, i32 %360, i32 0 %363 = insertelement <2 x i32> %362, i32 %361, i32 1 %364 = bitcast <8 x i32> %71 to <32 x i8> %365 = bitcast <4 x i32> %73 to <16 x i8> %366 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %363, <32 x i8> %364, <16 x i8> %365, i32 2) %367 = extractelement <4 x float> %366, i32 0 %368 = extractelement <4 x float> %366, i32 1 %369 = extractelement <4 x float> %366, i32 2 %370 = call float @llvm.AMDGPU.lrp(float %358, float %344, float %367) %371 = call float @llvm.AMDGPU.lrp(float %358, float %345, float %368) %372 = call float @llvm.AMDGPU.lrp(float %358, float %346, float %369) %373 = fsub float -0.000000e+00, %358 %374 = fmul float %359, %373 %375 = fadd float %374, %358 %376 = bitcast float %370 to i32 %377 = bitcast float %371 to i32 %378 = bitcast float %372 to i32 %379 = insertelement <4 x i32> undef, i32 %376, i32 0 %380 = insertelement <4 x i32> %379, i32 %377, i32 1 %381 = insertelement <4 x i32> %380, i32 %378, i32 2 %382 = insertelement <4 x i32> %381, i32 undef, i32 3 %383 = bitcast <8 x i32> %75 to <32 x i8> %384 = bitcast <4 x i32> %77 to <16 x i8> %385 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %382, <32 x i8> %383, <16 x i8> %384, i32 3) %386 = extractelement <4 x float> %385, i32 0 %387 = extractelement <4 x float> %385, i32 1 %388 = extractelement <4 x float> %385, i32 2 %389 = call float @llvm.AMDGPU.lrp(float %375, float %386, float %370) %390 = call float @llvm.AMDGPU.lrp(float %375, float %387, float %371) %391 = call float @llvm.AMDGPU.lrp(float %375, float %388, float %372) %392 = fsub float -0.000000e+00, %389 %393 = fadd float %392, %30 %394 = fsub float -0.000000e+00, %390 %395 = fadd float %394, %31 %396 = fsub float -0.000000e+00, %391 %397 = fadd float %396, %32 %398 = fmul float %37, %87 %399 = fsub float -0.000000e+00, %33 %400 = fadd float %85, %399 %401 = fsub float -0.000000e+00, %34 %402 = fadd float %86, %401 %403 = fsub float -0.000000e+00, %35 %404 = fadd float %398, %403 %405 = fmul float %404, %27 %406 = fmul float %405, 0x3FF7154CA0000000 %407 = call float @llvm.AMDIL.exp.(float %406) %408 = fsub float -0.000000e+00, %407 %409 = fadd float %408, 1.000000e+00 %410 = fmul float %400, %400 %411 = fmul float %402, %402 %412 = fadd float %411, %410 %413 = fmul float %404, %404 %414 = fadd float %412, %413 %415 = fdiv float 1.000000e+00, %404 %416 = fmul float %414, %28 %417 = fmul float %409, %416 %418 = fmul float %415, %417 %419 = fmul float %418, 0x3FF7154CA0000000 %420 = call float @llvm.AMDIL.exp.(float %419) %421 = call float @llvm.AMDIL.clamp.(float %420, float 0.000000e+00, float 1.000000e+00) %422 = fsub float -0.000000e+00, %421 %423 = fadd float %422, 1.000000e+00 %424 = fmul float %88, 5.000000e-01 %425 = fadd float %424, 5.000000e-01 %426 = bitcast float %425 to i32 %427 = bitcast float %29 to i32 %428 = insertelement <2 x i32> undef, i32 %426, i32 0 %429 = insertelement <2 x i32> %428, i32 %427, i32 1 %430 = bitcast <8 x i32> %59 to <32 x i8> %431 = bitcast <4 x i32> %61 to <16 x i8> %432 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %429, <32 x i8> %430, <16 x i8> %431, i32 2) %433 = extractelement <4 x float> %432, i32 0 %434 = fmul float %423, %433 %435 = fmul float %434, %393 %436 = fadd float %435, %389 %437 = fmul float %434, %395 %438 = fadd float %437, %390 %439 = fmul float %434, %397 %440 = fadd float %439, %391 %441 = fmul float %107, 5.000000e-01 %442 = fsub float -0.000000e+00, %36 %443 = fadd float %442, %87 %444 = fcmp oge float %443, 0.000000e+00 %445 = sext i1 %444 to i32 %446 = bitcast i32 %445 to float %447 = bitcast float %446 to i32 %448 = icmp ne i32 %447, 0 %. = select i1 %448, float %107, float %441 %449 = call i32 @llvm.SI.packf16(float %436, float %438) %450 = bitcast i32 %449 to float %451 = call i32 @llvm.SI.packf16(float %440, float %.) %452 = bitcast i32 %451 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %450, float %452, float %450, float %452) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 vcc, s[6:7] ; BEEA0406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v16, v0, 1, 1, [m0] ; C8400500 v_interp_p2_f32 v16, [v16], v1, 1, 1, [m0] ; C8410501 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], vcc, 0x0 ; C0C66B00 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[12:19], s[8:11] ; F0800F00 0043020F s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v6, v4 ; 7E0C4F04 v_mov_b32_e32 v8, 0x400ccccd ; 7E1002FF 400CCCCD v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v14, v6 ; 7E1C4B06 v_log_f32_e32 v6, v3 ; 7E0C4F03 v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v10, v6 ; 7E144B06 v_log_f32_e32 v6, v2 ; 7E0C4F02 v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v7, v6 ; 7E0E4B06 v_log_f32_e32 v2, v5 ; 7E044F05 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_and_b32_e32 v2, 0x7fffffff, v2 ; 360404FF 7FFFFFFF v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, v8, v2 ; 0E040508 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mov_b32_e32 v3, 0xbdcccccd ; 7E0602FF BDCCCCCD v_add_f32_e32 v3, v2, v3 ; 06060702 v_cmp_lt_f32_e64 s[0:1], v3, 0 ; D0020000 00010103 v_interp_p1_f32 v8, v0, 1, 4, [m0] ; C8201100 v_interp_p2_f32 v8, [v8], v1, 1, 4, [m0] ; C8211101 v_interp_p1_f32 v9, v0, 0, 4, [m0] ; C8241000 v_interp_p2_f32 v9, [v9], v1, 0, 4, [m0] ; C8251001 v_interp_p1_f32 v4, v0, 1, 3, [m0] ; C8100D00 v_interp_p2_f32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 v_interp_p1_f32 v3, v0, 2, 2, [m0] ; C80C0A00 v_interp_p2_f32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v12, v0, 3, 1, [m0] ; C8300700 v_interp_p2_f32 v12, [v12], v1, 3, 1, [m0] ; C8310701 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v19, v0, 2, 0, [m0] ; C84C0200 v_interp_p2_f32 v19, [v19], v1, 2, 0, [m0] ; C84D0201 v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100 v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101 v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000 v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001 s_load_dwordx4 s[16:19], s[2:3], 0x0 ; C0880300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s2, s[16:19], 0x20 ; C2011120 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s2, 3 ; 04690602 s_buffer_load_dword s2, s[16:19], 0x1c ; C201111C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s2, 0 ; 04690002 s_buffer_load_dword s2, s[16:19], 0x1a ; C201111A s_buffer_load_dword s100, s[16:19], 0x19 ; C2321119 s_buffer_load_dword s101, s[16:19], 0x18 ; C2329118 s_buffer_load_dword s3, s[16:19], 0x16 ; C2019116 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s3, 1 ; 04690203 s_buffer_load_dword s12, s[16:19], 0x15 ; C2061115 s_buffer_load_dword s3, s[16:19], 0x14 ; C2019114 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s3, 2 ; 04690403 s_buffer_load_dword s3, s[16:19], 0x13 ; C2019113 s_buffer_load_dword s6, s[16:19], 0x11 ; C2031111 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s6, 4 ; 04690806 s_buffer_load_dword s6, s[16:19], 0x10 ; C2031110 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s6, 5 ; 04690A06 s_buffer_load_dword s13, s[16:19], 0xe ; C206910E s_buffer_load_dword s14, s[16:19], 0xd ; C207110D s_buffer_load_dword s15, s[16:19], 0xc ; C207910C v_mov_b32_e32 v13, s2 ; 7E1A0202 v_mov_b32_e32 v1, s3 ; 7E020203 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[0:1], s[0:1] ; BE802400 s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[0:1] ; 88FE007E v_mov_b32_e32 v20, 0 ; 7E280280 v_cubeid_f32 v24, v17, v18, v19 ; D2880018 044E2511 v_cubema_f32 v23, v17, v18, v19 ; D28E0017 044E2511 v_cubesc_f32 v22, v17, v18, v19 ; D28A0016 044E2511 v_cubetc_f32 v21, v17, v18, v19 ; D28C0015 044E2511 v_rcp_f32_e64 v29, |v23| ; D354011D 00000117 v_mov_b32_e32 v30, 0x3fc00000 ; 7E3C02FF 3FC00000 v_mad_f32 v23, v21, v29, v30 ; D2820017 047A3B15 v_mad_f32 v22, v22, v29, v30 ; D2820016 047A3B16 s_load_dwordx4 s[80:83], s[4:5], 0x4 ; C0A80504 s_load_dwordx4 s[8:11], s[4:5], 0x8 ; C0840508 s_load_dwordx4 s[68:71], s[4:5], 0xc ; C0A2050C s_load_dwordx4 s[84:87], s[4:5], 0x10 ; C0AA0510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v52, s0, 6 ; 04690C00 v_writelane_b32 v52, s1, 7 ; 04690E01 v_writelane_b32 v52, s2, 8 ; 04691002 v_writelane_b32 v52, s3, 9 ; 04691203 s_load_dwordx4 s[40:43], s[4:5], 0x18 ; C0940518 s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C s_load_dwordx4 s[16:19], s[4:5], 0x20 ; C0880520 s_load_dwordx4 s[96:99], s[4:5], 0x24 ; C0B00524 s_load_dwordx8 s[88:95], vcc, 0x8 ; C0EC6B08 s_load_dwordx8 s[0:7], vcc, 0x10 ; C0C06B10 s_load_dwordx8 s[72:79], vcc, 0x18 ; C0E46B18 s_load_dwordx8 s[28:35], vcc, 0x20 ; C0CE6B20 s_load_dwordx8 s[20:27], vcc, 0x28 ; C0CA6B28 s_load_dwordx8 s[60:67], vcc, 0x30 ; C0DE6B30 s_load_dwordx8 s[52:59], vcc, 0x38 ; C0DA6B38 s_load_dwordx8 s[44:51], vcc, 0x40 ; C0D66B40 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[0:7], s[8:11] ; F0800F00 00401516 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v25, v24 ; 7E324F18 v_mul_legacy_f32_e32 v25, 1.0, v25 ; 0E3232F2 v_exp_f32_e32 v28, v25 ; 7E384B19 v_sub_f32_e32 v29, s14, v5 ; 083A0A0E v_sub_f32_e32 v31, s15, v6 ; 083E0C0F v_mul_f32_e32 v32, v31, v31 ; 10403F1F v_mad_f32 v32, v29, v29, v32 ; D2820020 04823B1D v_sub_f32_e32 v33, s13, v3 ; 0842060D v_mad_f32 v32, v33, v33, v32 ; D2820020 04824321 v_max_f32_e32 v32, 0x33d6bf95, v32 ; 204040FF 33D6BF95 v_rsq_clamp_f32_e32 v32, v32 ; 7E405920 v_mul_f32_e32 v34, v29, v32 ; 1044411D v_mul_f32_e32 v35, v31, v32 ; 1046411F v_mul_f32_e32 v36, v35, v17 ; 10482323 v_mad_f32 v36, v18, v34, v36 ; D2820024 04924512 v_mul_f32_e32 v37, v33, v32 ; 104A4121 v_mad_f32 v36, v19, v37, v36 ; D2820024 04924B13 v_add_f32_e32 v36, v36, v36 ; 06484924 v_mad_f32 v27, v36, v19, -v37 ; D282001B 84962724 v_mad_f32 v26, v36, v18, -v34 ; D282001A 848A2524 v_mad_f32 v25, v36, v17, -v35 ; D2820019 848E2324 v_cubeid_f32 v37, v25, v26, v27 ; D2880025 046E3519 v_cubema_f32 v36, v25, v26, v27 ; D28E0024 046E3519 v_cubesc_f32 v35, v25, v26, v27 ; D28A0023 046E3519 v_cubetc_f32 v34, v25, v26, v27 ; D28C0022 046E3519 v_rcp_f32_e64 v42, |v36| ; D354012A 00000124 v_mad_f32 v36, v34, v42, v30 ; D2820024 047A5522 v_mad_f32 v35, v35, v42, v30 ; D2820023 047A5523 image_sample v[34:36], 7, 0, 0, 0, 0, 0, 0, 0, v[35:38], s[28:35], s[84:87] ; F0800700 02A72223 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v37, v36 ; 7E4A4F24 v_mov_b32_e32 v38, 0x400ccccd ; 7E4C02FF 400CCCCD v_mul_legacy_f32_e32 v37, v38, v37 ; 0E4A4B26 v_exp_f32_e32 v37, v37 ; 7E4A4B25 image_sample v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[88:95], s[80:83] ; F0800F00 0296270F s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v15, v42 ; 7E1E4F2A v_mul_legacy_f32_e32 v15, 1.0, v15 ; 0E1E1EF2 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mov_b32_e32 v16, 0x7fffffff ; 7E2002FF 7FFFFFFF v_and_b32_e32 v15, v15, v16 ; 361E210F v_log_f32_e32 v15, v15 ; 7E1E4F0F v_mul_legacy_f32_e32 v15, v38, v15 ; 0E1E1F26 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_sub_f32_e32 v43, 1.0, v15 ; 08561EF2 v_mul_f32_e32 v37, v37, v43 ; 104A5725 v_mov_b32_e32 v28, v27 ; 7E38031B v_cubeid_f32 v47, v25, v26, v27 ; D288002F 046E3519 v_cubema_f32 v46, v25, v26, v27 ; D28E002E 046E3519 v_cubesc_f32 v45, v25, v26, v27 ; D28A002D 046E3519 v_cubetc_f32 v44, v25, v26, v27 ; D28C002C 046E3519 v_rcp_f32_e64 v25, |v46| ; D3540119 0000012E v_mad_f32 v46, v44, v25, v30 ; D282002E 047A332C v_mad_f32 v45, v45, v25, v30 ; D282002D 047A332D image_sample v[25:27], 7, 0, 0, 0, 0, 0, 0, 0, v[45:48], s[72:79], s[68:71] ; F0800700 0232192D s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v28, v27 ; 7E384F1B v_mul_legacy_f32_e32 v28, v38, v28 ; 0E383926 v_exp_f32_e32 v28, v28 ; 7E384B1C v_mad_f32 v28, v15, v28, v37 ; D282001C 0496390F v_mov_b32_e32 v30, 0x3d58adac ; 7E3C02FF 3D58ADAC v_mad_f32 v30, v31, v32, v30 ; D282001E 047A411F v_mov_b32_e32 v31, 0x3f589375 ; 7E3E02FF 3F589375 v_mad_f32 v29, v29, v32, v31 ; D282001D 047E411D v_mul_f32_e32 v31, v29, v29 ; 103E3B1D v_mad_f32 v31, v30, v30, v31 ; D282001F 047E3D1E v_mov_b32_e32 v37, 0x3f076c8b ; 7E4A02FF 3F076C8B v_mad_f32 v32, v33, v32, v37 ; D2820020 04964121 v_mad_f32 v31, v32, v32, v31 ; D282001F 047E4120 v_add_f32_e32 v31, 0, v31 ; 063E3E80 v_rsq_clamp_f32_e32 v31, v31 ; 7E3E591F v_mul_f32_e32 v29, v31, v29 ; 103A3B1F v_mul_f32_e32 v30, v31, v30 ; 103C3D1F v_mul_f32_e32 v30, v17, v30 ; 103C3D11 v_mad_f32 v29, v29, v18, v30 ; D282001D 047A251D v_mul_f32_e32 v30, v31, v32 ; 103C411F v_mad_f32 v17, v30, v19, v29 ; D2820011 0476271E v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 v_and_b32_e32 v16, v17, v16 ; 36202111 v_log_f32_e32 v16, v16 ; 7E204F10 v_mov_b32_e32 v17, 0x43470000 ; 7E2202FF 43470000 v_mad_f32 v17, v17, v15, 1.0 ; D2820011 03CA1F11 v_mul_legacy_f32_e32 v16, v17, v16 ; 0E202111 v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v17, 0x3dcccccd, v17 ; 102222FF 3DCCCCCD v_mad_f32 v18, v17, v16, v28 ; D2820012 04722111 v_log_f32_e32 v19, v41 ; 7E264F29 v_mul_legacy_f32_e32 v19, v38, v19 ; 0E262726 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v18, v19, v18 ; 10242513 v_log_f32_e32 v19, v23 ; 7E264F17 v_mul_legacy_f32_e32 v19, v38, v19 ; 0E262726 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v14, v19, v14 ; 101C1D13 v_mad_f32 v14, 2.0, v14, v18 ; D282000E 044A1CF4 v_log_f32_e64 v14, |v14| ; D34E010E 0000010E v_mul_f32_e32 v14, 0x3ee8ba1f, v14 ; 101C1CFF 3EE8BA1F v_exp_f32_e32 v30, v14 ; 7E3C4B0E v_log_f32_e32 v14, v35 ; 7E1C4F23 v_mul_legacy_f32_e32 v14, v38, v14 ; 0E1C1D26 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v14, v14, v43 ; 101C570E v_log_f32_e32 v18, v26 ; 7E244F1A v_mul_legacy_f32_e32 v18, v38, v18 ; 0E242526 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mad_f32 v14, v15, v18, v14 ; D282000E 043A250F v_mad_f32 v14, v17, v16, v14 ; D282000E 043A2111 v_log_f32_e32 v18, v40 ; 7E244F28 v_mul_legacy_f32_e32 v18, v38, v18 ; 0E242526 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v14, v18, v14 ; 101C1D12 v_log_f32_e32 v18, v22 ; 7E244F16 v_mul_legacy_f32_e32 v18, v38, v18 ; 0E242526 v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v10, v18, v10 ; 10141512 v_mad_f32 v10, 2.0, v10, v14 ; D282000A 043A14F4 v_log_f32_e64 v10, |v10| ; D34E010A 0000010A v_mul_f32_e32 v10, 0x3ee8ba1f, v10 ; 101414FF 3EE8BA1F v_exp_f32_e32 v29, v10 ; 7E3A4B0A v_log_f32_e32 v10, v34 ; 7E144F22 v_mul_legacy_f32_e32 v10, v38, v10 ; 0E141526 v_exp_f32_e32 v10, v10 ; 7E144B0A v_mul_f32_e32 v10, v10, v43 ; 1014570A v_log_f32_e32 v14, v25 ; 7E1C4F19 v_mul_legacy_f32_e32 v14, v38, v14 ; 0E1C1D26 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mad_f32 v10, v15, v14, v10 ; D282000A 042A1D0F v_mad_f32 v10, v17, v16, v10 ; D282000A 042A2111 v_log_f32_e32 v14, v39 ; 7E1C4F27 v_mul_legacy_f32_e32 v14, v38, v14 ; 0E1C1D26 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v10, v14, v10 ; 1014150E v_log_f32_e32 v14, v21 ; 7E1C4F15 v_mul_legacy_f32_e32 v14, v38, v14 ; 0E1C1D26 v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v7, v14, v7 ; 100E0F0E v_mad_f32 v7, 2.0, v7, v10 ; D2820007 042A0EF4 v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_mul_f32_e32 v7, 0x3ee8ba1f, v7 ; 100E0EFF 3EE8BA1F v_exp_f32_e32 v28, v7 ; 7E384B07 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[28:31], s[60:67], s[40:43] ; F0800700 014F0E1C v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 v_add_f32_e32 v7, 0, v9 ; 060E1280 image_sample v[7:8], 5, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[52:59], s[36:39] ; F0800500 012D0707 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v9, 1.0, v7 ; 08120EF2 image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[44:51], s[16:19] ; F0800700 008B0A0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v12, v9 ; 1022130C v_mad_f32 v19, v7, v16, v17 ; D2820013 04462107 v_mul_f32_e32 v21, v11, v9 ; 102A130B v_mad_f32 v18, v7, v15, v21 ; D2820012 04561F07 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mad_f32 v17, v7, v14, v9 ; D2820011 04261D07 s_load_dwordx8 s[0:7], vcc, 0x48 ; C0C06B48 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[0:7], s[96:99] ; F0800700 03000911 v_mad_f32 v7, -v8, v7, v7 ; D2820007 241E0F08 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_mul_f32_e32 v12, v18, v8 ; 10181112 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v7, v10, v12 ; D282000C 04321507 v_sub_f32_e32 v14, s12, v12 ; 081C180C v_readlane_b32 s0, v52, 3 ; 02010734 s_nop 2 ; BF800002 v_mad_f32 v13, v3, s0, -v13 ; D282000D 84340103 v_subrev_f32_e32 v5, s100, v5 ; 0A0A0A64 v_subrev_f32_e32 v6, s101, v6 ; 0A0C0C65 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mad_f32 v5, v5, v5, v6 ; D2820005 041A0B05 v_mad_f32 v5, v13, v13, v5 ; D2820005 04161B0D v_readlane_b32 s0, v52, 4 ; 02010934 s_nop 2 ; BF800002 v_mul_f32_e32 v5, s0, v5 ; 100A0A00 v_readlane_b32 s0, v52, 5 ; 02010B34 s_nop 2 ; BF800002 v_mul_f32_e32 v6, s0, v13 ; 100C1A00 v_mul_f32_e32 v6, 0x3fb8aa65, v6 ; 100C0CFF 3FB8AA65 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_sub_f32_e32 v6, 1.0, v6 ; 080C0CF2 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_rcp_f32_e32 v6, v13 ; 7E0C550D v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v5, 0x3fb8aa65, v5 ; 100A0AFF 3FB8AA65 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 v_mad_f32 v0, 0.5, v4, 0.5 ; D2820000 03C208F0 v_readlane_b32 s0, v52, 6 ; 02010D34 v_readlane_b32 s1, v52, 7 ; 02030F34 v_readlane_b32 s2, v52, 8 ; 02051134 v_readlane_b32 s3, v52, 9 ; 02071334 s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[20:27], s[0:3] ; F0800100 00050000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mad_f32 v1, v0, v14, v12 ; D2820001 04321D00 v_mul_f32_e32 v4, v17, v8 ; 10081111 v_mad_f32 v4, v7, v9, v4 ; D2820004 04121307 v_readlane_b32 s0, v52, 2 ; 02010534 s_nop 2 ; BF800002 v_sub_f32_e32 v5, s0, v4 ; 080A0800 v_mad_f32 v4, v0, v5, v4 ; D2820004 04120B00 v_cvt_pkrtz_f16_f32_e32 v1, v4, v1 ; 5E020304 v_mul_f32_e32 v4, v19, v8 ; 10081113 v_mad_f32 v4, v7, v11, v4 ; D2820004 04121707 v_readlane_b32 s0, v52, 1 ; 02010334 s_nop 2 ; BF800002 v_sub_f32_e32 v5, s0, v4 ; 080A0800 v_mad_f32 v0, v0, v5, v4 ; D2820000 04120B00 v_mul_f32_e32 v4, 0.5, v2 ; 100804F0 v_readlane_b32 s0, v52, 0 ; 02010134 s_nop 2 ; BF800002 v_subrev_f32_e32 v3, s0, v3 ; 0A060600 v_cmp_ge_f32_e64 s[0:1], v3, 0 ; D00C0000 00010103 v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; D2000803 00018280 v_cmp_ne_i32_e64 s[0:1], v3, 0 ; D10A0000 00010103 v_cndmask_b32_e64 v2, v4, v2, s[0:1] ; D2000002 00020504 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..104] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} IMM[2] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+9] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[3] 112: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 113: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 114: RSQ TEMP[0].x, TEMP[0].xxxx 115: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[3] 116: MOV TEMP[0].xyz, TEMP[0].xyzx 117: RCP TEMP[4].x, TEMP[1].wwww 118: MOV TEMP[2].w, TEMP[4].xxxx 119: MUL TEMP[4].xy, TEMP[1], TEMP[4].xxxx 120: MOV TEMP[4].xy, TEMP[4].xyxx 121: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 122: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 123: MOV TEMP[5].zw, TEMP[5].wwzw 124: MUL TEMP[6].xy, TEMP[2].yyyy, CONST[5] 125: MOV TEMP[3].xy, TEMP[6].xyxx 126: MAD TEMP[6].xy, TEMP[2].xxxx, CONST[4], TEMP[3] 127: MOV TEMP[3].xy, TEMP[6].xyxx 128: MAD TEMP[6].xy, TEMP[2].zzzz, CONST[6], TEMP[3] 129: MOV TEMP[3].xy, TEMP[6].xyxx 130: ADD TEMP[6].xy, TEMP[3], CONST[7] 131: MOV TEMP[3].xy, TEMP[6].xyxx 132: MAD TEMP[3].xy, TEMP[3], IMM[2].xyzz, IMM[2].yyyy 133: MOV TEMP[3].xy, TEMP[3].xyxx 134: MOV TEMP[5].xy, IN[4].xyxx 135: MOV TEMP[2].xyz, TEMP[2].xyzx 136: MOV TEMP[2].w, IMM[0].yyyy 137: MOV TEMP[4].zw, IMM[0].yyzy 138: MOV TEMP[3].zw, IMM[0].yyzy 139: MOV OUT[2], TEMP[5] 140: MOV OUT[3], TEMP[2] 141: MOV OUT[0], TEMP[1] 142: MOV OUT[1], TEMP[0] 143: MOV OUT[4], TEMP[4] 144: MOV OUT[5], TEMP[3] 145: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0 %57 = add i32 %5, %7 %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %57) %59 = extractelement <4 x float> %58, i32 0 %60 = extractelement <4 x float> %58, i32 1 %61 = extractelement <4 x float> %58, i32 2 %62 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = fmul float 3.000000e+00, %59 %76 = fmul float %45, 1.000000e+00 %77 = fadd float %76, 0.000000e+00 %78 = fmul float %46, 1.000000e+00 %79 = fadd float %78, 0.000000e+00 %80 = fmul float %47, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %45, 0.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = fptosi float %75 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 144 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %86, 4 %91 = add i32 %90, 148 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %86, 4 %94 = add i32 %93, 152 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %86, 4 %97 = add i32 %96, 156 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fmul float %77, %89 %100 = fmul float %79, %92 %101 = fadd float %99, %100 %102 = fmul float %81, %95 %103 = fadd float %101, %102 %104 = fmul float %83, %98 %105 = fadd float %103, %104 %106 = fptosi float %75 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = add i32 1, %108 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = shl i32 %111, 4 %113 = add i32 %112, 144 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = shl i32 %111, 4 %116 = add i32 %115, 148 %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %116) %118 = shl i32 %111, 4 %119 = add i32 %118, 152 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %111, 4 %122 = add i32 %121, 156 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = fmul float %77, %114 %125 = fmul float %79, %117 %126 = fadd float %124, %125 %127 = fmul float %81, %120 %128 = fadd float %126, %127 %129 = fmul float %83, %123 %130 = fadd float %128, %129 %131 = fptosi float %75 to i32 %132 = bitcast i32 %131 to float %133 = bitcast float %132 to i32 %134 = add i32 2, %133 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = shl i32 %136, 4 %138 = add i32 %137, 144 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = shl i32 %136, 4 %141 = add i32 %140, 148 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = shl i32 %136, 4 %144 = add i32 %143, 152 %145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %144) %146 = shl i32 %136, 4 %147 = add i32 %146, 156 %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %147) %149 = fmul float %77, %139 %150 = fmul float %79, %142 %151 = fadd float %149, %150 %152 = fmul float %81, %145 %153 = fadd float %151, %152 %154 = fmul float %83, %148 %155 = fadd float %153, %154 %156 = fmul float %105, %52 %157 = fmul float %130, %52 %158 = fmul float %155, %52 %159 = fptosi float %75 to i32 %160 = bitcast i32 %159 to float %161 = bitcast float %160 to i32 %162 = shl i32 %161, 4 %163 = add i32 %162, 144 %164 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %163) %165 = shl i32 %161, 4 %166 = add i32 %165, 148 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = shl i32 %161, 4 %169 = add i32 %168, 152 %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %169) %171 = fmul float %66, %164 %172 = fmul float %67, %167 %173 = fadd float %172, %171 %174 = fmul float %68, %170 %175 = fadd float %173, %174 %176 = fptosi float %75 to i32 %177 = bitcast i32 %176 to float %178 = bitcast float %177 to i32 %179 = add i32 1, %178 %180 = bitcast i32 %179 to float %181 = bitcast float %180 to i32 %182 = shl i32 %181, 4 %183 = add i32 %182, 144 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = shl i32 %181, 4 %186 = add i32 %185, 148 %187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %186) %188 = shl i32 %181, 4 %189 = add i32 %188, 152 %190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %189) %191 = fmul float %66, %184 %192 = fmul float %67, %187 %193 = fadd float %192, %191 %194 = fmul float %68, %190 %195 = fadd float %193, %194 %196 = fptosi float %75 to i32 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = add i32 2, %198 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = shl i32 %201, 4 %203 = add i32 %202, 144 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %201, 4 %206 = add i32 %205, 148 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %201, 4 %209 = add i32 %208, 152 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %66, %204 %212 = fmul float %67, %207 %213 = fadd float %212, %211 %214 = fmul float %68, %210 %215 = fadd float %213, %214 %216 = fmul float %175, %52 %217 = fmul float %195, %52 %218 = fmul float %215, %52 %219 = fcmp olt float 0.000000e+00, %53 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 br i1 %223, label %IF, label %ENDIF IF: ; preds = %main_body %224 = fmul float 3.000000e+00, %60 %225 = fptosi float %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = shl i32 %227, 4 %229 = add i32 %228, 144 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = shl i32 %227, 4 %232 = add i32 %231, 148 %233 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %232) %234 = shl i32 %227, 4 %235 = add i32 %234, 152 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = shl i32 %227, 4 %238 = add i32 %237, 156 %239 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %238) %240 = fmul float %77, %230 %241 = fmul float %79, %233 %242 = fadd float %240, %241 %243 = fmul float %81, %236 %244 = fadd float %242, %243 %245 = fmul float %83, %239 %246 = fadd float %244, %245 %247 = fptosi float %224 to i32 %248 = bitcast i32 %247 to float %249 = bitcast float %248 to i32 %250 = add i32 1, %249 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 144 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = shl i32 %252, 4 %257 = add i32 %256, 148 %258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %257) %259 = shl i32 %252, 4 %260 = add i32 %259, 152 %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %260) %262 = shl i32 %252, 4 %263 = add i32 %262, 156 %264 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %263) %265 = fmul float %77, %255 %266 = fmul float %79, %258 %267 = fadd float %265, %266 %268 = fmul float %81, %261 %269 = fadd float %267, %268 %270 = fmul float %83, %264 %271 = fadd float %269, %270 %272 = fptosi float %224 to i32 %273 = bitcast i32 %272 to float %274 = bitcast float %273 to i32 %275 = add i32 2, %274 %276 = bitcast i32 %275 to float %277 = bitcast float %276 to i32 %278 = shl i32 %277, 4 %279 = add i32 %278, 144 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = shl i32 %277, 4 %282 = add i32 %281, 148 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = shl i32 %277, 4 %285 = add i32 %284, 152 %286 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %285) %287 = shl i32 %277, 4 %288 = add i32 %287, 156 %289 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %288) %290 = fmul float %77, %280 %291 = fmul float %79, %283 %292 = fadd float %290, %291 %293 = fmul float %81, %286 %294 = fadd float %292, %293 %295 = fmul float %83, %289 %296 = fadd float %294, %295 %297 = fmul float %53, %246 %298 = fadd float %297, %156 %299 = fmul float %53, %271 %300 = fadd float %299, %157 %301 = fmul float %53, %296 %302 = fadd float %301, %158 %303 = fptosi float %224 to i32 %304 = bitcast i32 %303 to float %305 = bitcast float %304 to i32 %306 = shl i32 %305, 4 %307 = add i32 %306, 144 %308 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %307) %309 = shl i32 %305, 4 %310 = add i32 %309, 148 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = shl i32 %305, 4 %313 = add i32 %312, 152 %314 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %313) %315 = fmul float %66, %308 %316 = fmul float %67, %311 %317 = fadd float %316, %315 %318 = fmul float %68, %314 %319 = fadd float %317, %318 %320 = fptosi float %224 to i32 %321 = bitcast i32 %320 to float %322 = bitcast float %321 to i32 %323 = add i32 1, %322 %324 = bitcast i32 %323 to float %325 = bitcast float %324 to i32 %326 = shl i32 %325, 4 %327 = add i32 %326, 144 %328 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %327) %329 = shl i32 %325, 4 %330 = add i32 %329, 148 %331 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %330) %332 = shl i32 %325, 4 %333 = add i32 %332, 152 %334 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %333) %335 = fmul float %66, %328 %336 = fmul float %67, %331 %337 = fadd float %336, %335 %338 = fmul float %68, %334 %339 = fadd float %337, %338 %340 = fptosi float %224 to i32 %341 = bitcast i32 %340 to float %342 = bitcast float %341 to i32 %343 = add i32 2, %342 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 144 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = shl i32 %345, 4 %350 = add i32 %349, 148 %351 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %350) %352 = shl i32 %345, 4 %353 = add i32 %352, 152 %354 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %353) %355 = fmul float %66, %348 %356 = fmul float %67, %351 %357 = fadd float %356, %355 %358 = fmul float %68, %354 %359 = fadd float %357, %358 %360 = fmul float %53, %319 %361 = fadd float %360, %216 %362 = fmul float %53, %339 %363 = fadd float %362, %217 %364 = fmul float %53, %359 %365 = fadd float %364, %218 %366 = fcmp olt float 0.000000e+00, %54 %367 = sext i1 %366 to i32 %368 = bitcast i32 %367 to float %369 = bitcast float %368 to i32 %370 = icmp ne i32 %369, 0 br i1 %370, label %IF70, label %ENDIF ENDIF: ; preds = %IF70, %IF, %main_body %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %428, %IF70 ], [ %224, %IF ] %temp8.0 = phi float [ %156, %main_body ], [ %502, %IF70 ], [ %298, %IF ] %temp9.0 = phi float [ %157, %main_body ], [ %504, %IF70 ], [ %300, %IF ] %temp10.0 = phi float [ %158, %main_body ], [ %506, %IF70 ], [ %302, %IF ] %temp12.0 = phi float [ %216, %main_body ], [ %565, %IF70 ], [ %361, %IF ] %temp13.0 = phi float [ %217, %main_body ], [ %567, %IF70 ], [ %363, %IF ] %temp14.0 = phi float [ %218, %main_body ], [ %569, %IF70 ], [ %365, %IF ] %371 = fmul float %temp9.0, %17 %372 = fmul float %temp9.0, %18 %373 = fmul float %temp9.0, %19 %374 = fmul float %temp9.0, %20 %375 = fmul float %temp8.0, %13 %376 = fadd float %375, %371 %377 = fmul float %temp8.0, %14 %378 = fadd float %377, %372 %379 = fmul float %temp8.0, %15 %380 = fadd float %379, %373 %381 = fmul float %temp8.0, %16 %382 = fadd float %381, %374 %383 = fmul float %temp10.0, %21 %384 = fadd float %383, %376 %385 = fmul float %temp10.0, %22 %386 = fadd float %385, %378 %387 = fmul float %temp10.0, %23 %388 = fadd float %387, %380 %389 = fmul float %temp10.0, %24 %390 = fadd float %389, %382 %391 = fadd float %384, %25 %392 = fadd float %386, %26 %393 = fadd float %388, %27 %394 = fadd float %390, %28 %395 = fmul float %temp12.0, %temp12.0 %396 = fmul float %temp13.0, %temp13.0 %397 = fadd float %396, %395 %398 = fmul float %temp14.0, %temp14.0 %399 = fadd float %397, %398 %400 = call float @llvm.maxnum.f32(float %399, float 0x3E7AD7F2A0000000) %401 = call float @llvm.AMDGPU.rsq.clamped.f32(float %400) %402 = fmul float %401, %temp12.0 %403 = fmul float %401, %temp13.0 %404 = fmul float %401, %temp14.0 %405 = fdiv float 1.000000e+00, %394 %406 = fmul float %391, %405 %407 = fmul float %392, %405 %408 = fadd float %temp8.0, %39 %409 = fadd float %temp9.0, %40 %410 = fmul float %408, %37 %411 = fmul float %409, %38 %412 = fmul float %temp9.0, %31 %413 = fmul float %temp9.0, %32 %414 = fmul float %temp8.0, %29 %415 = fadd float %414, %412 %416 = fmul float %temp8.0, %30 %417 = fadd float %416, %413 %418 = fmul float %temp10.0, %33 %419 = fadd float %418, %415 %420 = fmul float %temp10.0, %34 %421 = fadd float %420, %417 %422 = fadd float %419, %35 %423 = fadd float %421, %36 %424 = fmul float %422, 5.000000e-01 %425 = fadd float %424, -5.000000e-01 %426 = fmul float %423, -5.000000e-01 %427 = fadd float %426, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %402, float %403, float %404, float %temp3.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %410, float %411) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %406, float %407, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %425, float %427, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %391, float %392, float %393, float %394) ret void IF70: ; preds = %IF %428 = fmul float 3.000000e+00, %61 %429 = fptosi float %428 to i32 %430 = bitcast i32 %429 to float %431 = bitcast float %430 to i32 %432 = shl i32 %431, 4 %433 = add i32 %432, 144 %434 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %433) %435 = shl i32 %431, 4 %436 = add i32 %435, 148 %437 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %436) %438 = shl i32 %431, 4 %439 = add i32 %438, 152 %440 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %439) %441 = shl i32 %431, 4 %442 = add i32 %441, 156 %443 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %442) %444 = fmul float %77, %434 %445 = fmul float %79, %437 %446 = fadd float %444, %445 %447 = fmul float %81, %440 %448 = fadd float %446, %447 %449 = fmul float %83, %443 %450 = fadd float %448, %449 %451 = fptosi float %428 to i32 %452 = bitcast i32 %451 to float %453 = bitcast float %452 to i32 %454 = add i32 1, %453 %455 = bitcast i32 %454 to float %456 = bitcast float %455 to i32 %457 = shl i32 %456, 4 %458 = add i32 %457, 144 %459 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %458) %460 = shl i32 %456, 4 %461 = add i32 %460, 148 %462 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %461) %463 = shl i32 %456, 4 %464 = add i32 %463, 152 %465 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %464) %466 = shl i32 %456, 4 %467 = add i32 %466, 156 %468 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %467) %469 = fmul float %77, %459 %470 = fmul float %79, %462 %471 = fadd float %469, %470 %472 = fmul float %81, %465 %473 = fadd float %471, %472 %474 = fmul float %83, %468 %475 = fadd float %473, %474 %476 = fptosi float %428 to i32 %477 = bitcast i32 %476 to float %478 = bitcast float %477 to i32 %479 = add i32 2, %478 %480 = bitcast i32 %479 to float %481 = bitcast float %480 to i32 %482 = shl i32 %481, 4 %483 = add i32 %482, 144 %484 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %483) %485 = shl i32 %481, 4 %486 = add i32 %485, 148 %487 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %486) %488 = shl i32 %481, 4 %489 = add i32 %488, 152 %490 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %489) %491 = shl i32 %481, 4 %492 = add i32 %491, 156 %493 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %492) %494 = fmul float %77, %484 %495 = fmul float %79, %487 %496 = fadd float %494, %495 %497 = fmul float %81, %490 %498 = fadd float %496, %497 %499 = fmul float %83, %493 %500 = fadd float %498, %499 %501 = fmul float %54, %450 %502 = fadd float %501, %298 %503 = fmul float %54, %475 %504 = fadd float %503, %300 %505 = fmul float %54, %500 %506 = fadd float %505, %302 %507 = fptosi float %428 to i32 %508 = bitcast i32 %507 to float %509 = bitcast float %508 to i32 %510 = shl i32 %509, 4 %511 = add i32 %510, 144 %512 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %511) %513 = shl i32 %509, 4 %514 = add i32 %513, 148 %515 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %514) %516 = shl i32 %509, 4 %517 = add i32 %516, 152 %518 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %517) %519 = fmul float %66, %512 %520 = fmul float %67, %515 %521 = fadd float %520, %519 %522 = fmul float %68, %518 %523 = fadd float %521, %522 %524 = fptosi float %428 to i32 %525 = bitcast i32 %524 to float %526 = bitcast float %525 to i32 %527 = add i32 1, %526 %528 = bitcast i32 %527 to float %529 = bitcast float %528 to i32 %530 = shl i32 %529, 4 %531 = add i32 %530, 144 %532 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %531) %533 = shl i32 %529, 4 %534 = add i32 %533, 148 %535 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %534) %536 = shl i32 %529, 4 %537 = add i32 %536, 152 %538 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %537) %539 = fmul float %66, %532 %540 = fmul float %67, %535 %541 = fadd float %540, %539 %542 = fmul float %68, %538 %543 = fadd float %541, %542 %544 = fptosi float %428 to i32 %545 = bitcast i32 %544 to float %546 = bitcast float %545 to i32 %547 = add i32 2, %546 %548 = bitcast i32 %547 to float %549 = bitcast float %548 to i32 %550 = shl i32 %549, 4 %551 = add i32 %550, 144 %552 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %551) %553 = shl i32 %549, 4 %554 = add i32 %553, 148 %555 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %554) %556 = shl i32 %549, 4 %557 = add i32 %556, 152 %558 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %557) %559 = fmul float %66, %552 %560 = fmul float %67, %555 %561 = fadd float %560, %559 %562 = fmul float %68, %558 %563 = fadd float %561, %562 %564 = fmul float %54, %523 %565 = fadd float %564, %361 %566 = fmul float %54, %543 %567 = fadd float %566, %363 %568 = fmul float %54, %563 %569 = fadd float %568, %365 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v11, s10, v0 ; 4A16000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[4:7], s[8:9], 0x10 ; C0820910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[20:23], v11, s[20:23], 0 idxen ; E00C2000 8005140B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v20 ; 100028FF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v3, 4, v0 ; 34060084 v_add_i32_e32 v0, 0xb0, v3 ; 4A0006FF 000000B0 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_format_xyzw v[15:18], v11, s[24:27], 0 idxen ; E00C2000 80060F0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v0, v15 ; 10021F00 v_add_i32_e32 v2, 0xb4, v3 ; 4A0406FF 000000B4 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v16, v2, v1 ; D2820001 04060510 v_add_i32_e32 v4, 0xb8, v3 ; 4A0806FF 000000B8 buffer_load_dword v12, v4, s[0:3], 0 offen ; E0301000 80000C04 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v17, v12, v1 ; D2820001 04061911 buffer_load_format_xyzw v[7:10], v11, s[16:19], 0 idxen ; E00C2000 8004070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_i32_e32 v4, 0xa0, v3 ; 4A0806FF 000000A0 buffer_load_dword v13, v4, s[0:3], 0 offen ; E0301000 80000D04 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v13, v15 ; 10081F0D v_add_i32_e32 v5, 0xa4, v3 ; 4A0A06FF 000000A4 buffer_load_dword v14, v5, s[0:3], 0 offen ; E0301000 80000E05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v16, v14, v4 ; D2820004 04121D10 v_add_i32_e32 v5, 0xa8, v3 ; 4A0A06FF 000000A8 buffer_load_dword v27, v5, s[0:3], 0 offen ; E0301000 80001B05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v17, v27, v4 ; D2820004 04123711 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_add_i32_e32 v5, 0x90, v3 ; 4A0A06FF 00000090 buffer_load_dword v28, v5, s[0:3], 0 offen ; E0301000 80001C05 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v28, v15 ; 100A1F1C v_add_i32_e32 v6, 0x94, v3 ; 4A0C06FF 00000094 buffer_load_dword v29, v6, s[0:3], 0 offen ; E0301000 80001D06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v16, v29, v5 ; D2820005 04163B10 v_add_i32_e32 v6, 0x98, v3 ; 4A0C06FF 00000098 buffer_load_dword v30, v6, s[0:3], 0 offen ; E0301000 80001E06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v17, v30, v5 ; D2820005 04163D11 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 buffer_load_format_xyzw v[31:34], v11, s[12:15], 0 idxen ; E00C2000 80031F0B v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v19, 0, v32 ; 06264080 v_mul_f32_e32 v2, v2, v19 ; 10042702 v_add_f32_e32 v24, 0, v31 ; 06303E80 v_mad_f32 v0, v24, v0, v2 ; D2820000 040A0118 v_add_f32_e32 v26, 0, v33 ; 06344280 v_mad_f32 v0, v26, v12, v0 ; D2820000 0402191A v_mad_f32 v25, 0, v31, 1.0 ; D2820019 03CA3E80 v_add_i32_e32 v2, 0xbc, v3 ; 4A0406FF 000000BC buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v25, v2, v0 ; D2820000 04020519 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mul_f32_e32 v2, v14, v19 ; 1004270E v_mad_f32 v2, v24, v13, v2 ; D2820002 040A1B18 v_mad_f32 v2, v26, v27, v2 ; D2820002 040A371A v_add_i32_e32 v12, 0xac, v3 ; 4A1806FF 000000AC buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v25, v12, v2 ; D2820002 040A1919 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v12, v29, v19 ; 1018271D v_mad_f32 v12, v24, v28, v12 ; D282000C 04323918 v_mad_f32 v12, v26, v30, v12 ; D282000C 04323D1A v_add_i32_e32 v3, 0x9c, v3 ; 4A0606FF 0000009C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v25, v3, v12 ; D2820003 04320719 v_mul_f32_e32 v3, v7, v3 ; 10060707 buffer_load_format_xyzw v[11:14], v11, s[4:7], 0 idxen ; E00C2000 80010B0B v_cmp_gt_f32_e64 s[4:5], v8, 0 ; D0080004 00010108 v_cndmask_b32_e64 v27, 0, -1, s[4:5] ; D200081B 00118280 v_cmp_ne_i32_e64 s[32:33], v27, 0 ; D10A0020 0001011B s_buffer_load_dword s20, s[0:3], 0x23 ; C20A0123 s_buffer_load_dword s19, s[0:3], 0x22 ; C2098122 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s17, s[0:3], 0xf ; C208810F s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s24, s[0:3], 0xd ; C20C010D s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s23, s[0:3], 0xb ; C20B810B s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s26, s[0:3], 0x9 ; C20D0109 s_buffer_load_dword s18, s[0:3], 0x8 ; C2090108 s_buffer_load_dword s27, s[0:3], 0x7 ; C20D8107 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s25, s[0:3], 0x4 ; C20C8104 s_buffer_load_dword s29, s[0:3], 0x3 ; C20E8103 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s31, s[0:3], 0x1 ; C20F8101 s_buffer_load_dword s28, s[0:3], 0x0 ; C20E0100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v6, 0x40400000, v21 ; 100C2AFF 40400000 v_cvt_i32_f32_e32 v27, v6 ; 7E361106 v_lshlrev_b32_e32 v27, 4, v27 ; 34363684 v_add_i32_e32 v28, 0xb4, v27 ; 4A3836FF 000000B4 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_add_i32_e32 v29, 0xb0, v27 ; 4A3A36FF 000000B0 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v30, v29, v15 ; 103C1F1D v_mad_f32 v30, v16, v28, v30 ; D282001E 047A3910 v_add_i32_e32 v31, 0xb8, v27 ; 4A3E36FF 000000B8 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, v17, v31, v30 ; D282001E 047A3F11 v_mad_f32 v1, v8, v30, v1 ; D2820001 04063D08 v_add_i32_e32 v30, 0xa4, v27 ; 4A3C36FF 000000A4 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E v_add_i32_e32 v32, 0xa0, v27 ; 4A4036FF 000000A0 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, v32, v15 ; 10421F20 v_mad_f32 v33, v16, v30, v33 ; D2820021 04863D10 v_add_i32_e32 v34, 0xa8, v27 ; 4A4436FF 000000A8 buffer_load_dword v34, v34, s[0:3], 0 offen ; E0301000 80002222 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, v17, v34, v33 ; D2820021 04864511 v_mad_f32 v4, v8, v33, v4 ; D2820004 04124308 v_add_i32_e32 v33, 0x94, v27 ; 4A4236FF 00000094 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 v_add_i32_e32 v35, 0x90, v27 ; 4A4636FF 00000090 buffer_load_dword v35, v35, s[0:3], 0 offen ; E0301000 80002323 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, v35, v15 ; 10481F23 v_mad_f32 v36, v16, v33, v36 ; D2820024 04924310 v_add_i32_e32 v37, 0x98, v27 ; 4A4A36FF 00000098 buffer_load_dword v37, v37, s[0:3], 0 offen ; E0301000 80002525 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v36, v17, v37, v36 ; D2820024 04924B11 v_mad_f32 v5, v8, v36, v5 ; D2820005 04164908 v_mul_f32_e32 v28, v28, v19 ; 1038271C v_mad_f32 v28, v24, v29, v28 ; D282001C 04723B18 v_mad_f32 v28, v26, v31, v28 ; D282001C 04723F1A v_add_i32_e32 v29, 0xbc, v27 ; 4A3A36FF 000000BC buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v25, v29, v28 ; D282001C 04723B19 v_mad_f32 v0, v8, v28, v0 ; D2820000 04023908 v_mul_f32_e32 v28, v30, v19 ; 1038271E v_mad_f32 v28, v24, v32, v28 ; D282001C 04724118 v_mad_f32 v28, v26, v34, v28 ; D282001C 0472451A v_add_i32_e32 v29, 0xac, v27 ; 4A3A36FF 000000AC buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v25, v29, v28 ; D282001C 04723B19 v_mad_f32 v2, v8, v28, v2 ; D2820002 040A3908 v_mul_f32_e32 v28, v33, v19 ; 10382721 v_mad_f32 v28, v24, v35, v28 ; D282001C 04724718 v_mad_f32 v28, v26, v37, v28 ; D282001C 04724B1A v_add_i32_e32 v27, 0x9c, v27 ; 4A3636FF 0000009C buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v25, v27, v28 ; D282001B 04723719 v_mad_f32 v3, v8, v27, v3 ; D2820003 040E3708 v_cmp_gt_f32_e64 s[34:35], v9, 0 ; D0080022 00010109 v_cndmask_b32_e64 v27, 0, -1, s[34:35] ; D200081B 00898280 v_cmp_ne_i32_e64 s[34:35], v27, 0 ; D10A0022 0001011B s_and_saveexec_b64 s[34:35], s[34:35] ; BEA22422 s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v6, 0x40400000, v22 ; 100C2CFF 40400000 v_cvt_i32_f32_e32 v20, v6 ; 7E281106 v_lshlrev_b32_e32 v20, 4, v20 ; 34282884 v_add_i32_e32 v21, 0xb4, v20 ; 4A2A28FF 000000B4 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 v_add_i32_e32 v22, 0xb0, v20 ; 4A2C28FF 000000B0 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v23, v22, v15 ; 102E1F16 v_mad_f32 v23, v16, v21, v23 ; D2820017 045E2B10 v_add_i32_e32 v27, 0xb8, v20 ; 4A3628FF 000000B8 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, v17, v27, v23 ; D2820017 045E3711 v_mad_f32 v1, v9, v23, v1 ; D2820001 04062F09 v_add_i32_e32 v23, 0xa4, v20 ; 4A2E28FF 000000A4 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_add_i32_e32 v28, 0xa0, v20 ; 4A3828FF 000000A0 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v28, v15 ; 103A1F1C v_mad_f32 v29, v16, v23, v29 ; D282001D 04762F10 v_add_i32_e32 v30, 0xa8, v20 ; 4A3C28FF 000000A8 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, v17, v30, v29 ; D282001D 04763D11 v_mad_f32 v4, v9, v29, v4 ; D2820004 04123B09 v_add_i32_e32 v29, 0x94, v20 ; 4A3A28FF 00000094 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_add_i32_e32 v31, 0x90, v20 ; 4A3E28FF 00000090 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v31, v15 ; 10401F1F v_mad_f32 v32, v16, v29, v32 ; D2820020 04823B10 v_add_i32_e32 v33, 0x98, v20 ; 4A4228FF 00000098 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v17, v33, v32 ; D282000F 04824311 v_mad_f32 v5, v9, v15, v5 ; D2820005 04161F09 v_mul_f32_e32 v15, v21, v19 ; 101E2715 v_mad_f32 v15, v24, v22, v15 ; D282000F 043E2D18 v_mad_f32 v15, v26, v27, v15 ; D282000F 043E371A v_add_i32_e32 v16, 0xbc, v20 ; 4A2028FF 000000BC buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v0, v9, v15, v0 ; D2820000 04021F09 v_mul_f32_e32 v15, v23, v19 ; 101E2717 v_mad_f32 v15, v24, v28, v15 ; D282000F 043E3918 v_mad_f32 v15, v26, v30, v15 ; D282000F 043E3D1A v_add_i32_e32 v16, 0xac, v20 ; 4A2028FF 000000AC buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v2, v9, v15, v2 ; D2820002 040A1F09 v_mul_f32_e32 v15, v29, v19 ; 101E271D v_mad_f32 v15, v24, v31, v15 ; D282000F 043E3F18 v_mad_f32 v15, v26, v33, v15 ; D282000F 043E431A v_add_i32_e32 v16, 0x9c, v20 ; 4A2028FF 0000009C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v3, v9, v15, v3 ; D2820003 040E1F09 s_or_b64 exec, exec, s[34:35] ; 88FE227E s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mul_f32_e32 v7, v5, v5 ; 100E0B05 v_mad_f32 v7, v4, v4, v7 ; D2820007 041E0904 v_mad_f32 v7, v1, v1, v7 ; D2820007 041E0301 v_max_f32_e32 v7, 0x33d6bf95, v7 ; 200E0EFF 33D6BF95 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v5, v5, v7 ; 100A0F05 exp 15, 32, 0, 0, 0, v5, v4, v1, v6 ; F800020F 06010405 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, s20, v2 ; 06020414 v_mul_f32_e32 v1, s22, v1 ; 10020216 v_add_f32_e32 v4, s19, v3 ; 06080613 v_mul_f32_e32 v4, s21, v4 ; 10080815 exp 15, 33, 0, 0, 0, v11, v12, v4, v1 ; F800021F 01040C0B s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 34, 0, 0, 0, v3, v2, v0, v1 ; F800022F 01000203 v_mul_f32_e32 v4, s30, v2 ; 1008041E v_mad_f32 v4, v3, s31, v4 ; D2820004 04103F03 v_mad_f32 v4, v0, s26, v4 ; D2820004 04103500 v_add_f32_e32 v4, s24, v4 ; 06080818 v_mul_f32_e32 v5, s27, v2 ; 100A041B v_mad_f32 v5, v3, s29, v5 ; D2820005 04143B03 v_mad_f32 v5, v0, s23, v5 ; D2820005 04142F00 v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v6, v5 ; 7E0C5505 v_mul_f32_e32 v7, v6, v4 ; 100E0906 v_mul_f32_e32 v8, s25, v2 ; 10100419 v_mad_f32 v8, v3, s28, v8 ; D2820008 04203903 v_mad_f32 v8, v0, s18, v8 ; D2820008 04202500 v_add_f32_e32 v8, s16, v8 ; 06101010 v_mul_f32_e32 v6, v6, v8 ; 100C1106 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 35, 0, 0, 0, v6, v7, v9, v1 ; F800023F 01090706 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v6, s12, v2 ; 100C040C v_mad_f32 v6, v3, s13, v6 ; D2820006 04181B03 v_mad_f32 v6, v0, s11, v6 ; D2820006 04181700 v_add_f32_e32 v6, s7, v6 ; 060C0C07 v_mad_f32 v6, 0.5, v6, -0.5 ; D2820006 03C60CF0 v_mul_f32_e32 v7, s8, v2 ; 100E0408 v_mad_f32 v7, v3, s9, v7 ; D2820007 041C1303 v_mad_f32 v7, v0, s5, v7 ; D2820007 041C0B00 v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mad_f32 v7, -0.5, v7, -0.5 ; D2820007 03C60EF1 exp 15, 36, 0, 0, 0, v6, v7, v9, v1 ; F800024F 01090706 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s14, v2 ; 1002040E v_mad_f32 v1, v3, s15, v1 ; D2820001 04041F03 v_mad_f32 v0, v0, s10, v1 ; D2820000 04041500 v_add_f32_e32 v0, s6, v0 ; 06000006 exp 15, 12, 0, 1, 0, v8, v4, v0, v5 ; F80008CF 05000408 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[6].x, TEMP[6].xxxx, TEMP[5].yyyy 14: UIF TEMP[6].xxxx :0 15: KILL 16: ENDIF 17: MOV TEMP[5].xyz, IN[0].xyzz 18: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 19: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 20: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 21: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 22: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 23: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 24: MOV TEMP[0].xyz, TEMP[1].xyzx 25: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 26: MOV TEMP[0].xyz, TEMP[1].xyzx 27: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 28: MOV TEMP[3].yzw, TEMP[1].zyzw 29: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 30: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 31: RSQ TEMP[5].x, TEMP[5].xxxx 32: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 33: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 34: MOV TEMP[5].w, IMM[0].wwww 35: MOV TEMP[5].x, TEMP[3].yyyy 36: MOV TEMP[5].y, TEMP[3].zzzz 37: MOV TEMP[5].z, TEMP[3].wwww 38: DP4 TEMP[3].x, TEMP[5], TEMP[5] 39: RSQ TEMP[3].x, TEMP[3].xxxx 40: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 41: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 42: MOV_SAT TEMP[3].x, TEMP[3].xxxx 43: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 44: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 45: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 46: MOV TEMP[6].xy, TEMP[5].yzzz 47: TEX TEMP[6], TEMP[6], SAMP[4], 2D 48: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 49: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 50: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 51: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 52: MOV TEMP[4].w, TEMP[7].wwww 53: MOV TEMP[6].xy, IN[1].xyyy 54: TEX TEMP[6], TEMP[6], SAMP[1], 2D 55: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 56: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 57: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 58: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 59: ABS TEMP[6].x, TEMP[8].wwww 60: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 61: MOV TEMP[5].xyz, TEMP[5].yzww 62: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 63: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 64: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 65: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 66: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 67: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 68: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 69: ABS TEMP[3].x, TEMP[3].xxxx 70: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 71: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 72: MOV TEMP[0].w, TEMP[6].wwww 73: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 74: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 75: ABS TEMP[5].x, TEMP[3].xxxx 76: LG2 TEMP[4].x, TEMP[5].xxxx 77: ABS TEMP[5].x, TEMP[3].yyyy 78: LG2 TEMP[5].x, TEMP[5].xxxx 79: MOV TEMP[4].y, TEMP[5].xxxx 80: ABS TEMP[3].x, TEMP[3].zzzz 81: LG2 TEMP[3].x, TEMP[3].xxxx 82: MOV TEMP[4].z, TEMP[3].xxxx 83: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 84: EX2 TEMP[4].x, TEMP[3].xxxx 85: EX2 TEMP[5].x, TEMP[3].yyyy 86: MOV TEMP[4].y, TEMP[5].xxxx 87: EX2 TEMP[3].x, TEMP[3].zzzz 88: MOV TEMP[4].z, TEMP[3].xxxx 89: MOV TEMP[3].xyz, TEMP[4].xyzz 90: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 91: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 92: MOV TEMP[5].xy, TEMP[5].xyyy 93: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 94: MOV TEMP[1].w, TEMP[5].wwww 95: MOV TEMP[6].xy, IN[1].zwww 96: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 97: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 98: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 99: MOV TEMP[6].xyz, TEMP[3].yzww 100: TEX TEMP[6], TEMP[6], SAMP[9], 3D 101: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 102: MOV TEMP[1].xyz, TEMP[3].xyzx 103: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 104: MOV TEMP[0].xyz, TEMP[3].xyzx 105: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 106: MOV TEMP[4].z, TEMP[3].zzzz 107: MOV TEMP[4].xy, IN[2].xyxx 108: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 109: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 110: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 111: EX2 TEMP[5].x, TEMP[5].wwww 112: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 113: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 114: RCP TEMP[3].x, TEMP[3].wwww 115: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 116: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 117: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 118: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 119: EX2 TEMP[3].x, TEMP[3].wwww 120: MOV_SAT TEMP[3].x, TEMP[3].xxxx 121: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 122: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 123: MOV TEMP[4].x, TEMP[5].xxxx 124: MOV TEMP[4].y, CONST[4].wwww 125: MOV TEMP[4].xy, TEMP[4].xyyy 126: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 127: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 128: MOV TEMP[0].w, TEMP[3].wwww 129: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 130: MOV TEMP[0].xyz, TEMP[0].xyzx 131: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 132: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 133: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 134: UIF TEMP[3].xxxx :0 135: MOV TEMP[2].x, TEMP[2].xxxx 136: ELSE :0 137: MOV TEMP[2].x, TEMP[1].xxxx 138: ENDIF 139: MOV TEMP[0].w, TEMP[2].xxxx 140: MOV OUT[0], TEMP[0] 141: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = icmp ne i32 %128, 0 br i1 %129, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %130 = insertelement <4 x float> undef, float %78, i32 0 %131 = insertelement <4 x float> %130, float %79, i32 1 %132 = insertelement <4 x float> %131, float %80, i32 2 %133 = insertelement <4 x float> %132, float 0.000000e+00, i32 3 %134 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %133) %135 = extractelement <4 x float> %134, i32 0 %136 = extractelement <4 x float> %134, i32 1 %137 = extractelement <4 x float> %134, i32 2 %138 = extractelement <4 x float> %134, i32 3 %139 = call float @fabs(float %137) %140 = fdiv float 1.000000e+00, %139 %141 = fmul float %135, %140 %142 = fadd float %141, 1.500000e+00 %143 = fmul float %136, %140 %144 = fadd float %143, 1.500000e+00 %145 = bitcast float %144 to i32 %146 = bitcast float %142 to i32 %147 = bitcast float %138 to i32 %148 = insertelement <4 x i32> undef, i32 %145, i32 0 %149 = insertelement <4 x i32> %148, i32 %146, i32 1 %150 = insertelement <4 x i32> %149, i32 %147, i32 2 %151 = insertelement <4 x i32> %150, i32 undef, i32 3 %152 = bitcast <8 x i32> %47 to <32 x i8> %153 = bitcast <4 x i32> %49 to <16 x i8> %154 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %151, <32 x i8> %152, <16 x i8> %153, i32 4) %155 = extractelement <4 x float> %154, i32 0 %156 = extractelement <4 x float> %154, i32 1 %157 = extractelement <4 x float> %154, i32 2 %158 = call float @llvm.pow.f32(float %155, float 0x40019999A0000000) %159 = call float @llvm.pow.f32(float %156, float 0x40019999A0000000) %160 = call float @llvm.pow.f32(float %157, float 0x40019999A0000000) %161 = fmul float %102, %158 %162 = fmul float %103, %159 %163 = fmul float %104, %160 %164 = fadd float %161, %161 %165 = fadd float %162, %162 %166 = fadd float %163, %163 %167 = fsub float -0.000000e+00, %85 %168 = fadd float %24, %167 %169 = fsub float -0.000000e+00, %86 %170 = fadd float %25, %169 %171 = fsub float -0.000000e+00, %87 %172 = fadd float %26, %171 %173 = fmul float %168, %168 %174 = fmul float %170, %170 %175 = fadd float %174, %173 %176 = fmul float %172, %172 %177 = fadd float %175, %176 %178 = call float @llvm.maxnum.f32(float %177, float 0x3E7AD7F2A0000000) %179 = call float @llvm.AMDGPU.rsq.clamped.f32(float %178) %180 = fmul float %179, %168 %181 = fmul float %179, %170 %182 = fmul float %179, %172 %183 = fmul float %168, %179 %184 = fadd float %183, 0x3FAB15B580000000 %185 = fmul float %170, %179 %186 = fadd float %185, 0x3FEB126EA0000000 %187 = fmul float %172, %179 %188 = fadd float %187, 0x3FE0ED9160000000 %189 = fmul float %184, %184 %190 = fmul float %186, %186 %191 = fadd float %189, %190 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = fmul float 0.000000e+00, 0.000000e+00 %195 = fadd float %193, %194 %196 = call float @llvm.AMDGPU.rsq.clamped.f32(float %195) %197 = fmul float %184, %196 %198 = fmul float %186, %196 %199 = fmul float %188, %196 %200 = fmul float %197, %78 %201 = fmul float %198, %79 %202 = fadd float %201, %200 %203 = fmul float %199, %80 %204 = fadd float %202, %203 %205 = call float @llvm.AMDIL.clamp.(float %204, float 0.000000e+00, float 1.000000e+00) %206 = fmul float %78, %180 %207 = fmul float %79, %181 %208 = fadd float %207, %206 %209 = fmul float %80, %182 %210 = fadd float %208, %209 %211 = fadd float %210, %210 %212 = fsub float -0.000000e+00, %180 %213 = fmul float %211, %78 %214 = fadd float %213, %212 %215 = fsub float -0.000000e+00, %181 %216 = fmul float %211, %79 %217 = fadd float %216, %215 %218 = fsub float -0.000000e+00, %182 %219 = fmul float %211, %80 %220 = fadd float %219, %218 %221 = bitcast float %214 to i32 %222 = bitcast float %217 to i32 %223 = insertelement <2 x i32> undef, i32 %221, i32 0 %224 = insertelement <2 x i32> %223, i32 %222, i32 1 %225 = bitcast <8 x i32> %55 to <32 x i8> %226 = bitcast <4 x i32> %57 to <16 x i8> %227 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %224, <32 x i8> %225, <16 x i8> %226, i32 2) %228 = extractelement <4 x float> %227, i32 0 %229 = extractelement <4 x float> %227, i32 1 %230 = extractelement <4 x float> %227, i32 2 %231 = call float @llvm.pow.f32(float %228, float 0x40019999A0000000) %232 = call float @llvm.pow.f32(float %229, float 0x40019999A0000000) %233 = call float @llvm.pow.f32(float %230, float 0x40019999A0000000) %234 = bitcast float %81 to i32 %235 = bitcast float %82 to i32 %236 = insertelement <2 x i32> undef, i32 %234, i32 0 %237 = insertelement <2 x i32> %236, i32 %235, i32 1 %238 = bitcast <8 x i32> %43 to <32 x i8> %239 = bitcast <4 x i32> %45 to <16 x i8> %240 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %237, <32 x i8> %238, <16 x i8> %239, i32 2) %241 = extractelement <4 x float> %240, i32 0 %242 = extractelement <4 x float> %240, i32 1 %243 = extractelement <4 x float> %240, i32 2 %244 = extractelement <4 x float> %240, i32 3 %245 = call float @llvm.pow.f32(float %241, float 0x40019999A0000000) %246 = call float @llvm.pow.f32(float %242, float 0x40019999A0000000) %247 = call float @llvm.pow.f32(float %243, float 0x40019999A0000000) %248 = call float @llvm.pow.f32(float %244, float 1.000000e+00) %249 = call float @fabs(float %248) %250 = call float @llvm.pow.f32(float %249, float 0x40019999A0000000) %251 = insertelement <4 x float> undef, float %214, i32 0 %252 = insertelement <4 x float> %251, float %217, i32 1 %253 = insertelement <4 x float> %252, float %220, i32 2 %254 = insertelement <4 x float> %253, float %220, i32 3 %255 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %254) %256 = extractelement <4 x float> %255, i32 0 %257 = extractelement <4 x float> %255, i32 1 %258 = extractelement <4 x float> %255, i32 2 %259 = extractelement <4 x float> %255, i32 3 %260 = call float @fabs(float %258) %261 = fdiv float 1.000000e+00, %260 %262 = fmul float %256, %261 %263 = fadd float %262, 1.500000e+00 %264 = fmul float %257, %261 %265 = fadd float %264, 1.500000e+00 %266 = bitcast float %265 to i32 %267 = bitcast float %263 to i32 %268 = bitcast float %259 to i32 %269 = insertelement <4 x i32> undef, i32 %266, i32 0 %270 = insertelement <4 x i32> %269, i32 %267, i32 1 %271 = insertelement <4 x i32> %270, i32 %268, i32 2 %272 = insertelement <4 x i32> %271, i32 undef, i32 3 %273 = bitcast <8 x i32> %51 to <32 x i8> %274 = bitcast <4 x i32> %53 to <16 x i8> %275 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %272, <32 x i8> %273, <16 x i8> %274, i32 4) %276 = extractelement <4 x float> %275, i32 0 %277 = extractelement <4 x float> %275, i32 1 %278 = extractelement <4 x float> %275, i32 2 %279 = call float @llvm.pow.f32(float %276, float 0x40019999A0000000) %280 = call float @llvm.pow.f32(float %277, float 0x40019999A0000000) %281 = call float @llvm.pow.f32(float %278, float 0x40019999A0000000) %282 = call float @llvm.AMDGPU.lrp(float %250, float %279, float %231) %283 = call float @llvm.AMDGPU.lrp(float %250, float %280, float %232) %284 = call float @llvm.AMDGPU.lrp(float %250, float %281, float %233) %285 = fmul float %250, 1.990000e+02 %286 = fadd float %285, 1.000000e+00 %287 = call float @fabs(float %205) %288 = call float @llvm.pow.f32(float %287, float %286) %289 = fmul float %286, 0x3FB99999A0000000 %290 = fmul float %289, %288 %291 = fadd float %290, %282 %292 = fmul float %289, %288 %293 = fadd float %292, %283 %294 = fmul float %289, %288 %295 = fadd float %294, %284 %296 = fmul float %291, %245 %297 = fadd float %296, %164 %298 = fmul float %293, %246 %299 = fadd float %298, %165 %300 = fmul float %295, %247 %301 = fadd float %300, %166 %302 = call float @fabs(float %297) %303 = call float @llvm.log2.f32(float %302) %304 = call float @fabs(float %299) %305 = call float @llvm.log2.f32(float %304) %306 = call float @fabs(float %301) %307 = call float @llvm.log2.f32(float %306) %308 = fmul float %303, 0x3FDD1743E0000000 %309 = fmul float %305, 0x3FDD1743E0000000 %310 = fmul float %307, 0x3FDD1743E0000000 %311 = call float @llvm.AMDIL.exp.(float %308) %312 = call float @llvm.AMDIL.exp.(float %309) %313 = call float @llvm.AMDIL.exp.(float %310) %314 = bitcast float %311 to i32 %315 = bitcast float %312 to i32 %316 = bitcast float %313 to i32 %317 = insertelement <4 x i32> undef, i32 %314, i32 0 %318 = insertelement <4 x i32> %317, i32 %315, i32 1 %319 = insertelement <4 x i32> %318, i32 %316, i32 2 %320 = insertelement <4 x i32> %319, i32 undef, i32 3 %321 = bitcast <8 x i32> %63 to <32 x i8> %322 = bitcast <4 x i32> %65 to <16 x i8> %323 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %320, <32 x i8> %321, <16 x i8> %322, i32 3) %324 = extractelement <4 x float> %323, i32 0 %325 = extractelement <4 x float> %323, i32 1 %326 = extractelement <4 x float> %323, i32 2 %327 = fmul float %89, 1.000000e+00 %328 = fadd float %327, 0.000000e+00 %329 = fmul float %90, -1.000000e+00 %330 = fadd float %329, 1.000000e+00 %331 = bitcast float %328 to i32 %332 = bitcast float %330 to i32 %333 = insertelement <2 x i32> undef, i32 %331, i32 0 %334 = insertelement <2 x i32> %333, i32 %332, i32 1 %335 = bitcast <8 x i32> %67 to <32 x i8> %336 = bitcast <4 x i32> %69 to <16 x i8> %337 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %334, <32 x i8> %335, <16 x i8> %336, i32 2) %338 = extractelement <4 x float> %337, i32 0 %339 = extractelement <4 x float> %337, i32 2 %340 = bitcast float %83 to i32 %341 = bitcast float %84 to i32 %342 = insertelement <2 x i32> undef, i32 %340, i32 0 %343 = insertelement <2 x i32> %342, i32 %341, i32 1 %344 = bitcast <8 x i32> %71 to <32 x i8> %345 = bitcast <4 x i32> %73 to <16 x i8> %346 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %343, <32 x i8> %344, <16 x i8> %345, i32 2) %347 = extractelement <4 x float> %346, i32 0 %348 = extractelement <4 x float> %346, i32 1 %349 = extractelement <4 x float> %346, i32 2 %350 = call float @llvm.AMDGPU.lrp(float %338, float %324, float %347) %351 = call float @llvm.AMDGPU.lrp(float %338, float %325, float %348) %352 = call float @llvm.AMDGPU.lrp(float %338, float %326, float %349) %353 = fsub float -0.000000e+00, %338 %354 = fmul float %339, %353 %355 = fadd float %354, %338 %356 = bitcast float %350 to i32 %357 = bitcast float %351 to i32 %358 = bitcast float %352 to i32 %359 = insertelement <4 x i32> undef, i32 %356, i32 0 %360 = insertelement <4 x i32> %359, i32 %357, i32 1 %361 = insertelement <4 x i32> %360, i32 %358, i32 2 %362 = insertelement <4 x i32> %361, i32 undef, i32 3 %363 = bitcast <8 x i32> %75 to <32 x i8> %364 = bitcast <4 x i32> %77 to <16 x i8> %365 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %362, <32 x i8> %363, <16 x i8> %364, i32 3) %366 = extractelement <4 x float> %365, i32 0 %367 = extractelement <4 x float> %365, i32 1 %368 = extractelement <4 x float> %365, i32 2 %369 = call float @llvm.AMDGPU.lrp(float %355, float %366, float %350) %370 = call float @llvm.AMDGPU.lrp(float %355, float %367, float %351) %371 = call float @llvm.AMDGPU.lrp(float %355, float %368, float %352) %372 = fsub float -0.000000e+00, %369 %373 = fadd float %372, %30 %374 = fsub float -0.000000e+00, %370 %375 = fadd float %374, %31 %376 = fsub float -0.000000e+00, %371 %377 = fadd float %376, %32 %378 = fmul float %37, %87 %379 = fsub float -0.000000e+00, %33 %380 = fadd float %85, %379 %381 = fsub float -0.000000e+00, %34 %382 = fadd float %86, %381 %383 = fsub float -0.000000e+00, %35 %384 = fadd float %378, %383 %385 = fmul float %384, %27 %386 = fmul float %385, 0x3FF7154CA0000000 %387 = call float @llvm.AMDIL.exp.(float %386) %388 = fsub float -0.000000e+00, %387 %389 = fadd float %388, 1.000000e+00 %390 = fmul float %380, %380 %391 = fmul float %382, %382 %392 = fadd float %391, %390 %393 = fmul float %384, %384 %394 = fadd float %392, %393 %395 = fdiv float 1.000000e+00, %384 %396 = fmul float %394, %28 %397 = fmul float %389, %396 %398 = fmul float %395, %397 %399 = fmul float %398, 0x3FF7154CA0000000 %400 = call float @llvm.AMDIL.exp.(float %399) %401 = call float @llvm.AMDIL.clamp.(float %400, float 0.000000e+00, float 1.000000e+00) %402 = fsub float -0.000000e+00, %401 %403 = fadd float %402, 1.000000e+00 %404 = fmul float %88, 5.000000e-01 %405 = fadd float %404, 5.000000e-01 %406 = bitcast float %405 to i32 %407 = bitcast float %29 to i32 %408 = insertelement <2 x i32> undef, i32 %406, i32 0 %409 = insertelement <2 x i32> %408, i32 %407, i32 1 %410 = bitcast <8 x i32> %59 to <32 x i8> %411 = bitcast <4 x i32> %61 to <16 x i8> %412 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %409, <32 x i8> %410, <16 x i8> %411, i32 2) %413 = extractelement <4 x float> %412, i32 0 %414 = fmul float %403, %413 %415 = fmul float %414, %373 %416 = fadd float %415, %369 %417 = fmul float %414, %375 %418 = fadd float %417, %370 %419 = fmul float %414, %377 %420 = fadd float %419, %371 %421 = fmul float %107, 5.000000e-01 %422 = fsub float -0.000000e+00, %36 %423 = fadd float %422, %87 %424 = fcmp oge float %423, 0.000000e+00 %425 = sext i1 %424 to i32 %426 = bitcast i32 %425 to float %427 = bitcast float %426 to i32 %428 = icmp ne i32 %427, 0 %. = select i1 %428, float %107, float %421 %429 = call i32 @llvm.SI.packf16(float %416, float %418) %430 = bitcast i32 %429 to float %431 = call i32 @llvm.SI.packf16(float %420, float %.) %432 = bitcast i32 %431 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %430, float %432, float %430, float %432) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 vcc, s[6:7] ; BEEA0406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v16, v0, 1, 1, [m0] ; C8400500 v_interp_p2_f32 v16, [v16], v1, 1, 1, [m0] ; C8410501 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], vcc, 0x0 ; C0C66B00 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[12:19], s[8:11] ; F0800F00 0043020F s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v6, v4 ; 7E0C4F04 v_mov_b32_e32 v8, 0x400ccccd ; 7E1002FF 400CCCCD v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v14, v6 ; 7E1C4B06 v_log_f32_e32 v6, v3 ; 7E0C4F03 v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v10, v6 ; 7E144B06 v_log_f32_e32 v6, v2 ; 7E0C4F02 v_mul_legacy_f32_e32 v6, v8, v6 ; 0E0C0D08 v_exp_f32_e32 v7, v6 ; 7E0E4B06 v_log_f32_e32 v2, v5 ; 7E044F05 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_and_b32_e32 v2, 0x7fffffff, v2 ; 360404FF 7FFFFFFF v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, v8, v2 ; 0E040508 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mov_b32_e32 v3, 0xbdcccccd ; 7E0602FF BDCCCCCD v_add_f32_e32 v3, v2, v3 ; 06060702 v_cmp_lt_f32_e64 s[16:17], v3, 0 ; D0020010 00010103 v_interp_p1_f32 v8, v0, 1, 4, [m0] ; C8201100 v_interp_p2_f32 v8, [v8], v1, 1, 4, [m0] ; C8211101 v_interp_p1_f32 v9, v0, 0, 4, [m0] ; C8241000 v_interp_p2_f32 v9, [v9], v1, 0, 4, [m0] ; C8251001 v_interp_p1_f32 v4, v0, 1, 3, [m0] ; C8100D00 v_interp_p2_f32 v4, [v4], v1, 1, 3, [m0] ; C8110D01 v_interp_p1_f32 v3, v0, 2, 2, [m0] ; C80C0A00 v_interp_p2_f32 v3, [v3], v1, 2, 2, [m0] ; C80D0A01 v_interp_p1_f32 v5, v0, 1, 2, [m0] ; C8140900 v_interp_p2_f32 v5, [v5], v1, 1, 2, [m0] ; C8150901 v_interp_p1_f32 v6, v0, 0, 2, [m0] ; C8180800 v_interp_p2_f32 v6, [v6], v1, 0, 2, [m0] ; C8190801 v_interp_p1_f32 v12, v0, 3, 1, [m0] ; C8300700 v_interp_p2_f32 v12, [v12], v1, 3, 1, [m0] ; C8310701 v_interp_p1_f32 v11, v0, 2, 1, [m0] ; C82C0600 v_interp_p2_f32 v11, [v11], v1, 2, 1, [m0] ; C82D0601 v_interp_p1_f32 v19, v0, 2, 0, [m0] ; C84C0200 v_interp_p2_f32 v19, [v19], v1, 2, 0, [m0] ; C84D0201 v_interp_p1_f32 v18, v0, 1, 0, [m0] ; C8480100 v_interp_p2_f32 v18, [v18], v1, 1, 0, [m0] ; C8490101 v_interp_p1_f32 v17, v0, 0, 0, [m0] ; C8440000 v_interp_p2_f32 v17, [v17], v1, 0, 0, [m0] ; C8450001 s_load_dwordx4 s[20:23], s[2:3], 0x0 ; C08A0300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[20:23], 0x20 ; C2001520 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v48, s0, 3 ; 04610600 s_buffer_load_dword s0, s[20:23], 0x1c ; C200151C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v48, s0, 0 ; 04610000 s_buffer_load_dword s18, s[20:23], 0x1a ; C209151A s_buffer_load_dword s0, s[20:23], 0x19 ; C2001519 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v48, s0, 4 ; 04610800 s_buffer_load_dword s0, s[20:23], 0x18 ; C2001518 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v48, s0, 5 ; 04610A00 s_buffer_load_dword s0, s[20:23], 0x16 ; C2001516 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v48, s0, 1 ; 04610200 s_buffer_load_dword s0, s[20:23], 0x15 ; C2001515 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v48, s0, 6 ; 04610C00 s_buffer_load_dword s0, s[20:23], 0x14 ; C2001514 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v48, s0, 2 ; 04610400 s_buffer_load_dword s19, s[20:23], 0x13 ; C2099513 s_buffer_load_dword s100, s[20:23], 0x11 ; C2321511 s_buffer_load_dword s101, s[20:23], 0x10 ; C2329510 s_buffer_load_dword s13, s[20:23], 0xe ; C206950E s_buffer_load_dword s14, s[20:23], 0xd ; C207150D s_buffer_load_dword s15, s[20:23], 0xc ; C207950C v_mov_b32_e32 v13, s18 ; 7E1A0212 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s19 ; 7E020213 s_and_saveexec_b64 s[16:17], s[16:17] ; BE902410 s_xor_b64 s[16:17], exec, s[16:17] ; 8990107E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[16:17] ; 88FE107E v_sub_f32_e32 v21, s14, v5 ; 082A0A0E v_sub_f32_e32 v22, s15, v6 ; 082C0C0F v_mul_f32_e32 v23, v22, v22 ; 102E2D16 v_mad_f32 v23, v21, v21, v23 ; D2820017 045E2B15 v_sub_f32_e32 v24, s13, v3 ; 0830060D v_mad_f32 v23, v24, v24, v23 ; D2820017 045E3118 v_max_f32_e32 v23, 0x33d6bf95, v23 ; 202E2EFF 33D6BF95 v_rsq_clamp_f32_e32 v23, v23 ; 7E2E5917 v_mul_f32_e32 v25, v21, v23 ; 10322F15 v_mul_f32_e32 v26, v22, v23 ; 10342F16 v_mul_f32_e32 v27, v26, v17 ; 1036231A v_mad_f32 v27, v18, v25, v27 ; D282001B 046E3312 v_mul_f32_e32 v28, v24, v23 ; 10382F18 v_mad_f32 v27, v19, v28, v27 ; D282001B 046E3913 v_add_f32_e32 v27, v27, v27 ; 0636371B v_mad_f32 v30, v27, v19, -v28 ; D282001E 8472271B v_mad_f32 v29, v27, v18, -v25 ; D282001D 8466251B v_mad_f32 v28, v27, v17, -v26 ; D282001C 846A231B v_mov_b32_e32 v31, v30 ; 7E3E031E v_cubeid_f32 v35, v28, v29, v30 ; D2880023 047A3B1C v_cubema_f32 v34, v28, v29, v30 ; D28E0022 047A3B1C v_cubesc_f32 v33, v28, v29, v30 ; D28A0021 047A3B1C v_cubetc_f32 v32, v28, v29, v30 ; D28C0020 047A3B1C v_rcp_f32_e64 v25, |v34| ; D3540119 00000122 v_mov_b32_e32 v26, 0x3fc00000 ; 7E3402FF 3FC00000 v_mad_f32 v34, v32, v25, v26 ; D2820022 046A3320 v_mad_f32 v33, v33, v25, v26 ; D2820021 046A3321 s_load_dwordx4 s[80:83], s[4:5], 0x4 ; C0A80504 s_load_dwordx4 s[68:71], s[4:5], 0x8 ; C0A20508 s_load_dwordx4 s[84:87], s[4:5], 0xc ; C0AA050C s_load_dwordx4 s[88:91], s[4:5], 0x10 ; C0AC0510 s_load_dwordx4 s[16:19], s[4:5], 0x14 ; C0880514 s_load_dwordx4 s[40:43], s[4:5], 0x18 ; C0940518 s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C s_load_dwordx4 s[32:35], s[4:5], 0x20 ; C0900520 s_load_dwordx4 s[28:31], s[4:5], 0x24 ; C08E0524 s_load_dwordx8 s[92:99], vcc, 0x8 ; C0EE6B08 s_load_dwordx8 s[72:79], vcc, 0x10 ; C0E46B10 s_load_dwordx8 s[8:15], vcc, 0x18 ; C0C46B18 s_load_dwordx8 s[0:7], vcc, 0x20 ; C0C06B20 s_load_dwordx8 s[20:27], vcc, 0x28 ; C0CA6B28 s_load_dwordx8 s[60:67], vcc, 0x30 ; C0DE6B30 s_load_dwordx8 s[52:59], vcc, 0x38 ; C0DA6B38 s_load_dwordx8 s[44:51], vcc, 0x40 ; C0D66B40 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[32:34], 7, 0, 0, 0, 0, 0, 0, 0, v[33:36], s[8:15], s[84:87] ; F0800700 02A22021 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v25, v34 ; 7E324F22 v_mov_b32_e32 v27, 0x400ccccd ; 7E3602FF 400CCCCD v_mul_legacy_f32_e32 v25, v27, v25 ; 0E32331B v_exp_f32_e32 v25, v25 ; 7E324B19 image_sample v[28:30], 7, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[0:7], s[88:91] ; F0800700 02C01C1C s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v31, v30 ; 7E3E4F1E v_mul_legacy_f32_e32 v31, v27, v31 ; 0E3E3F1B v_exp_f32_e32 v31, v31 ; 7E3E4B1F image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[92:99], s[80:83] ; F0800F00 0297230F s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v15, v38 ; 7E1E4F26 v_mul_legacy_f32_e32 v15, 1.0, v15 ; 0E1E1EF2 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mov_b32_e32 v16, 0x7fffffff ; 7E2002FF 7FFFFFFF v_and_b32_e32 v15, v15, v16 ; 361E210F v_log_f32_e32 v15, v15 ; 7E1E4F0F v_mul_legacy_f32_e32 v15, v27, v15 ; 0E1E1F1B v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_sub_f32_e32 v39, 1.0, v15 ; 084E1EF2 v_mul_f32_e32 v31, v31, v39 ; 103E4F1F v_mad_f32 v25, v15, v25, v31 ; D2820019 047E330F v_mov_b32_e32 v31, 0x3d58adac ; 7E3E02FF 3D58ADAC v_mad_f32 v22, v22, v23, v31 ; D2820016 047E2F16 v_mov_b32_e32 v31, 0x3f589375 ; 7E3E02FF 3F589375 v_mad_f32 v21, v21, v23, v31 ; D2820015 047E2F15 v_mul_f32_e32 v31, v21, v21 ; 103E2B15 v_mad_f32 v31, v22, v22, v31 ; D282001F 047E2D16 v_mov_b32_e32 v40, 0x3f076c8b ; 7E5002FF 3F076C8B v_mad_f32 v23, v24, v23, v40 ; D2820017 04A22F18 v_mad_f32 v24, v23, v23, v31 ; D2820018 047E2F17 v_mov_b32_e32 v20, 0 ; 7E280280 v_add_f32_e32 v24, 0, v24 ; 06303080 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v21, v24, v21 ; 102A2B18 v_mul_f32_e32 v22, v24, v22 ; 102C2D18 v_mul_f32_e32 v22, v17, v22 ; 102C2D11 v_mad_f32 v21, v21, v18, v22 ; D2820015 045A2515 v_mul_f32_e32 v22, v24, v23 ; 102C2F18 v_mad_f32 v21, v22, v19, v21 ; D2820015 04562716 v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_and_b32_e32 v16, v21, v16 ; 36202115 v_log_f32_e32 v16, v16 ; 7E204F10 v_mov_b32_e32 v21, 0x43470000 ; 7E2A02FF 43470000 v_mad_f32 v21, v21, v15, 1.0 ; D2820015 03CA1F15 v_mul_legacy_f32_e32 v16, v21, v16 ; 0E202115 v_exp_f32_e32 v16, v16 ; 7E204B10 v_mul_f32_e32 v21, 0x3dcccccd, v21 ; 102A2AFF 3DCCCCCD v_mad_f32 v22, v21, v16, v25 ; D2820016 04662115 v_log_f32_e32 v23, v37 ; 7E2E4F25 v_mul_legacy_f32_e32 v23, v27, v23 ; 0E2E2F1B v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mul_f32_e32 v22, v23, v22 ; 102C2D17 v_cubeid_f32 v43, v17, v18, v19 ; D288002B 044E2511 v_cubema_f32 v42, v17, v18, v19 ; D28E002A 044E2511 v_cubesc_f32 v41, v17, v18, v19 ; D28A0029 044E2511 v_cubetc_f32 v40, v17, v18, v19 ; D28C0028 044E2511 v_rcp_f32_e64 v17, |v42| ; D3540111 0000012A v_mad_f32 v42, v40, v17, v26 ; D282002A 046A2328 v_mad_f32 v41, v41, v17, v26 ; D2820029 046A2329 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[72:79], s[68:71] ; F0800700 02321129 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v20, v19 ; 7E284F13 v_mul_legacy_f32_e32 v20, v27, v20 ; 0E28291B v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v14, v20, v14 ; 101C1D14 v_mad_f32 v14, 2.0, v14, v22 ; D282000E 045A1CF4 v_log_f32_e64 v14, |v14| ; D34E010E 0000010E v_mul_f32_e32 v14, 0x3ee8ba1f, v14 ; 101C1CFF 3EE8BA1F v_exp_f32_e32 v24, v14 ; 7E304B0E v_log_f32_e32 v14, v33 ; 7E1C4F21 v_mul_legacy_f32_e32 v14, v27, v14 ; 0E1C1D1B v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_log_f32_e32 v20, v29 ; 7E284F1D v_mul_legacy_f32_e32 v20, v27, v20 ; 0E28291B v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v20, v20, v39 ; 10284F14 v_mad_f32 v14, v15, v14, v20 ; D282000E 04521D0F v_mad_f32 v14, v21, v16, v14 ; D282000E 043A2115 v_log_f32_e32 v20, v36 ; 7E284F24 v_mul_legacy_f32_e32 v20, v27, v20 ; 0E28291B v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v14, v20, v14 ; 101C1D14 v_log_f32_e32 v20, v18 ; 7E284F12 v_mul_legacy_f32_e32 v20, v27, v20 ; 0E28291B v_exp_f32_e32 v20, v20 ; 7E284B14 v_mul_f32_e32 v10, v20, v10 ; 10141514 v_mad_f32 v10, 2.0, v10, v14 ; D282000A 043A14F4 v_log_f32_e64 v10, |v10| ; D34E010A 0000010A v_mul_f32_e32 v10, 0x3ee8ba1f, v10 ; 101414FF 3EE8BA1F v_exp_f32_e32 v23, v10 ; 7E2E4B0A v_log_f32_e32 v10, v32 ; 7E144F20 v_mul_legacy_f32_e32 v10, v27, v10 ; 0E14151B v_exp_f32_e32 v10, v10 ; 7E144B0A v_log_f32_e32 v14, v28 ; 7E1C4F1C v_mul_legacy_f32_e32 v14, v27, v14 ; 0E1C1D1B v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v14, v14, v39 ; 101C4F0E v_mad_f32 v10, v15, v10, v14 ; D282000A 043A150F v_mad_f32 v10, v21, v16, v10 ; D282000A 042A2115 v_log_f32_e32 v14, v35 ; 7E1C4F23 v_mul_legacy_f32_e32 v14, v27, v14 ; 0E1C1D1B v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v10, v14, v10 ; 1014150E v_log_f32_e32 v14, v17 ; 7E1C4F11 v_mul_legacy_f32_e32 v14, v27, v14 ; 0E1C1D1B v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v7, v14, v7 ; 100E0F0E v_mad_f32 v7, 2.0, v7, v10 ; D2820007 042A0EF4 v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_mul_f32_e32 v7, 0x3ee8ba1f, v7 ; 100E0EFF 3EE8BA1F v_exp_f32_e32 v22, v7 ; 7E2C4B07 image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[60:67], s[40:43] ; F0800700 014F0E16 v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 v_add_f32_e32 v7, 0, v9 ; 060E1280 image_sample v[7:8], 5, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[52:59], s[36:39] ; F0800500 012D0707 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v9, 1.0, v7 ; 08120EF2 image_sample v[10:12], 7, 0, 0, 0, 0, 0, 0, 0, v[11:12], s[44:51], s[32:35] ; F0800700 010B0A0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v12, v9 ; 1022130C v_mad_f32 v19, v7, v16, v17 ; D2820013 04462107 v_mul_f32_e32 v21, v11, v9 ; 102A130B v_mad_f32 v18, v7, v15, v21 ; D2820012 04561F07 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_mad_f32 v17, v7, v14, v9 ; D2820011 04261D07 s_load_dwordx8 s[0:7], vcc, 0x48 ; C0C06B48 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[0:7], s[28:31] ; F0800700 00E00911 v_mad_f32 v7, -v8, v7, v7 ; D2820007 241E0F08 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_mul_f32_e32 v12, v18, v8 ; 10181112 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v7, v10, v12 ; D282000C 04321507 v_readlane_b32 s0, v48, 6 ; 02010D30 s_nop 2 ; BF800002 v_sub_f32_e32 v14, s0, v12 ; 081C1800 v_readlane_b32 s0, v48, 3 ; 02010730 s_nop 2 ; BF800002 v_mad_f32 v13, v3, s0, -v13 ; D282000D 84340103 v_readlane_b32 s0, v48, 4 ; 02010930 s_nop 2 ; BF800002 v_subrev_f32_e32 v5, s0, v5 ; 0A0A0A00 v_readlane_b32 s0, v48, 5 ; 02010B30 s_nop 2 ; BF800002 v_subrev_f32_e32 v6, s0, v6 ; 0A0C0C00 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mad_f32 v5, v5, v5, v6 ; D2820005 041A0B05 v_mad_f32 v5, v13, v13, v5 ; D2820005 04161B0D v_mul_f32_e32 v5, s100, v5 ; 100A0A64 v_mul_f32_e32 v6, s101, v13 ; 100C1A65 v_mul_f32_e32 v6, 0x3fb8aa65, v6 ; 100C0CFF 3FB8AA65 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_sub_f32_e32 v6, 1.0, v6 ; 080C0CF2 v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_rcp_f32_e32 v6, v13 ; 7E0C550D v_mul_f32_e32 v5, v5, v6 ; 100A0D05 v_mul_f32_e32 v5, 0x3fb8aa65, v5 ; 100A0AFF 3FB8AA65 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 v_mad_f32 v0, 0.5, v4, 0.5 ; D2820000 03C208F0 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[20:27], s[16:19] ; F0800100 00850000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v5 ; 10000B00 v_mad_f32 v1, v0, v14, v12 ; D2820001 04321D00 v_mul_f32_e32 v4, v17, v8 ; 10081111 v_mad_f32 v4, v7, v9, v4 ; D2820004 04121307 v_readlane_b32 s0, v48, 2 ; 02010530 s_nop 2 ; BF800002 v_sub_f32_e32 v5, s0, v4 ; 080A0800 v_mad_f32 v4, v0, v5, v4 ; D2820004 04120B00 v_cvt_pkrtz_f16_f32_e32 v1, v4, v1 ; 5E020304 v_mul_f32_e32 v4, v19, v8 ; 10081113 v_mad_f32 v4, v7, v11, v4 ; D2820004 04121707 v_readlane_b32 s0, v48, 1 ; 02010330 s_nop 2 ; BF800002 v_sub_f32_e32 v5, s0, v4 ; 080A0800 v_mad_f32 v0, v0, v5, v4 ; D2820000 04120B00 v_mul_f32_e32 v4, 0.5, v2 ; 100804F0 v_readlane_b32 s0, v48, 0 ; 02010130 s_nop 2 ; BF800002 v_subrev_f32_e32 v3, s0, v3 ; 0A060600 v_cmp_ge_f32_e64 s[0:1], v3, 0 ; D00C0000 00010103 v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; D2000803 00018280 v_cmp_ne_i32_e64 s[0:1], v3, 0 ; D10A0000 00010103 v_cndmask_b32_e64 v2, v4, v2, s[0:1] ; D2000002 00020504 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..103] DCL TEMP[0..5], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+8] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+8] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+8] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+8] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+8] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[5] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[4], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[6], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[7] 112: ADD TEMP[0].xyz, TEMP[2], -CONST[3] 113: MOV TEMP[0].xyz, TEMP[0].xyzx 114: MOV TEMP[3].w, IN[4].xxxx 115: MOV TEMP[0].w, IN[4].yyyy 116: MOV TEMP[2].xyz, TEMP[2].xyzx 117: MOV TEMP[4].xyz, TEMP[1].xywx 118: MOV TEMP[2].w, IMM[0].yyyy 119: MOV TEMP[4].w, IMM[0].yyyy 120: MOV OUT[1], TEMP[3] 121: MOV OUT[0], TEMP[1] 122: MOV OUT[2], TEMP[0] 123: MOV OUT[3], TEMP[2] 124: MOV OUT[4], TEMP[4] 125: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = fmul float 3.000000e+00, %50 %67 = fmul float %36, 1.000000e+00 %68 = fadd float %67, 0.000000e+00 %69 = fmul float %37, 1.000000e+00 %70 = fadd float %69, 0.000000e+00 %71 = fmul float %38, 1.000000e+00 %72 = fadd float %71, 0.000000e+00 %73 = fmul float %36, 0.000000e+00 %74 = fadd float %73, 1.000000e+00 %75 = fptosi float %66 to i32 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = shl i32 %77, 4 %79 = add i32 %78, 128 %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %79) %81 = shl i32 %77, 4 %82 = add i32 %81, 132 %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %82) %84 = shl i32 %77, 4 %85 = add i32 %84, 136 %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %85) %87 = shl i32 %77, 4 %88 = add i32 %87, 140 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %68, %80 %91 = fmul float %70, %83 %92 = fadd float %90, %91 %93 = fmul float %72, %86 %94 = fadd float %92, %93 %95 = fmul float %74, %89 %96 = fadd float %94, %95 %97 = fptosi float %66 to i32 %98 = bitcast i32 %97 to float %99 = bitcast float %98 to i32 %100 = add i32 1, %99 %101 = bitcast i32 %100 to float %102 = bitcast float %101 to i32 %103 = shl i32 %102, 4 %104 = add i32 %103, 128 %105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %104) %106 = shl i32 %102, 4 %107 = add i32 %106, 132 %108 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %107) %109 = shl i32 %102, 4 %110 = add i32 %109, 136 %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %110) %112 = shl i32 %102, 4 %113 = add i32 %112, 140 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = fmul float %68, %105 %116 = fmul float %70, %108 %117 = fadd float %115, %116 %118 = fmul float %72, %111 %119 = fadd float %117, %118 %120 = fmul float %74, %114 %121 = fadd float %119, %120 %122 = fptosi float %66 to i32 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = add i32 2, %124 %126 = bitcast i32 %125 to float %127 = bitcast float %126 to i32 %128 = shl i32 %127, 4 %129 = add i32 %128, 128 %130 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %129) %131 = shl i32 %127, 4 %132 = add i32 %131, 132 %133 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %132) %134 = shl i32 %127, 4 %135 = add i32 %134, 136 %136 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %135) %137 = shl i32 %127, 4 %138 = add i32 %137, 140 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = fmul float %68, %130 %141 = fmul float %70, %133 %142 = fadd float %140, %141 %143 = fmul float %72, %136 %144 = fadd float %142, %143 %145 = fmul float %74, %139 %146 = fadd float %144, %145 %147 = fmul float %96, %43 %148 = fmul float %121, %43 %149 = fmul float %146, %43 %150 = fptosi float %66 to i32 %151 = bitcast i32 %150 to float %152 = bitcast float %151 to i32 %153 = shl i32 %152, 4 %154 = add i32 %153, 128 %155 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %154) %156 = shl i32 %152, 4 %157 = add i32 %156, 132 %158 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %157) %159 = shl i32 %152, 4 %160 = add i32 %159, 136 %161 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %160) %162 = fmul float %57, %155 %163 = fmul float %58, %158 %164 = fadd float %163, %162 %165 = fmul float %59, %161 %166 = fadd float %164, %165 %167 = fptosi float %66 to i32 %168 = bitcast i32 %167 to float %169 = bitcast float %168 to i32 %170 = add i32 1, %169 %171 = bitcast i32 %170 to float %172 = bitcast float %171 to i32 %173 = shl i32 %172, 4 %174 = add i32 %173, 128 %175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %174) %176 = shl i32 %172, 4 %177 = add i32 %176, 132 %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %177) %179 = shl i32 %172, 4 %180 = add i32 %179, 136 %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %180) %182 = fmul float %57, %175 %183 = fmul float %58, %178 %184 = fadd float %183, %182 %185 = fmul float %59, %181 %186 = fadd float %184, %185 %187 = fptosi float %66 to i32 %188 = bitcast i32 %187 to float %189 = bitcast float %188 to i32 %190 = add i32 2, %189 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = shl i32 %192, 4 %194 = add i32 %193, 128 %195 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %194) %196 = shl i32 %192, 4 %197 = add i32 %196, 132 %198 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %197) %199 = shl i32 %192, 4 %200 = add i32 %199, 136 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = fmul float %57, %195 %203 = fmul float %58, %198 %204 = fadd float %203, %202 %205 = fmul float %59, %201 %206 = fadd float %204, %205 %207 = fmul float %166, %43 %208 = fmul float %186, %43 %209 = fmul float %206, %43 %210 = fcmp olt float 0.000000e+00, %44 %211 = sext i1 %210 to i32 %212 = bitcast i32 %211 to float %213 = bitcast float %212 to i32 %214 = icmp ne i32 %213, 0 br i1 %214, label %IF, label %ENDIF IF: ; preds = %main_body %215 = fmul float 3.000000e+00, %51 %216 = fptosi float %215 to i32 %217 = bitcast i32 %216 to float %218 = bitcast float %217 to i32 %219 = shl i32 %218, 4 %220 = add i32 %219, 128 %221 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %220) %222 = shl i32 %218, 4 %223 = add i32 %222, 132 %224 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %223) %225 = shl i32 %218, 4 %226 = add i32 %225, 136 %227 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %226) %228 = shl i32 %218, 4 %229 = add i32 %228, 140 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = fmul float %68, %221 %232 = fmul float %70, %224 %233 = fadd float %231, %232 %234 = fmul float %72, %227 %235 = fadd float %233, %234 %236 = fmul float %74, %230 %237 = fadd float %235, %236 %238 = fptosi float %215 to i32 %239 = bitcast i32 %238 to float %240 = bitcast float %239 to i32 %241 = add i32 1, %240 %242 = bitcast i32 %241 to float %243 = bitcast float %242 to i32 %244 = shl i32 %243, 4 %245 = add i32 %244, 128 %246 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %245) %247 = shl i32 %243, 4 %248 = add i32 %247, 132 %249 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %248) %250 = shl i32 %243, 4 %251 = add i32 %250, 136 %252 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %251) %253 = shl i32 %243, 4 %254 = add i32 %253, 140 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = fmul float %68, %246 %257 = fmul float %70, %249 %258 = fadd float %256, %257 %259 = fmul float %72, %252 %260 = fadd float %258, %259 %261 = fmul float %74, %255 %262 = fadd float %260, %261 %263 = fptosi float %215 to i32 %264 = bitcast i32 %263 to float %265 = bitcast float %264 to i32 %266 = add i32 2, %265 %267 = bitcast i32 %266 to float %268 = bitcast float %267 to i32 %269 = shl i32 %268, 4 %270 = add i32 %269, 128 %271 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %270) %272 = shl i32 %268, 4 %273 = add i32 %272, 132 %274 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %273) %275 = shl i32 %268, 4 %276 = add i32 %275, 136 %277 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %276) %278 = shl i32 %268, 4 %279 = add i32 %278, 140 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = fmul float %68, %271 %282 = fmul float %70, %274 %283 = fadd float %281, %282 %284 = fmul float %72, %277 %285 = fadd float %283, %284 %286 = fmul float %74, %280 %287 = fadd float %285, %286 %288 = fmul float %44, %237 %289 = fadd float %288, %147 %290 = fmul float %44, %262 %291 = fadd float %290, %148 %292 = fmul float %44, %287 %293 = fadd float %292, %149 %294 = fptosi float %215 to i32 %295 = bitcast i32 %294 to float %296 = bitcast float %295 to i32 %297 = shl i32 %296, 4 %298 = add i32 %297, 128 %299 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %298) %300 = shl i32 %296, 4 %301 = add i32 %300, 132 %302 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %301) %303 = shl i32 %296, 4 %304 = add i32 %303, 136 %305 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %304) %306 = fmul float %57, %299 %307 = fmul float %58, %302 %308 = fadd float %307, %306 %309 = fmul float %59, %305 %310 = fadd float %308, %309 %311 = fptosi float %215 to i32 %312 = bitcast i32 %311 to float %313 = bitcast float %312 to i32 %314 = add i32 1, %313 %315 = bitcast i32 %314 to float %316 = bitcast float %315 to i32 %317 = shl i32 %316, 4 %318 = add i32 %317, 128 %319 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %318) %320 = shl i32 %316, 4 %321 = add i32 %320, 132 %322 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %321) %323 = shl i32 %316, 4 %324 = add i32 %323, 136 %325 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %324) %326 = fmul float %57, %319 %327 = fmul float %58, %322 %328 = fadd float %327, %326 %329 = fmul float %59, %325 %330 = fadd float %328, %329 %331 = fptosi float %215 to i32 %332 = bitcast i32 %331 to float %333 = bitcast float %332 to i32 %334 = add i32 2, %333 %335 = bitcast i32 %334 to float %336 = bitcast float %335 to i32 %337 = shl i32 %336, 4 %338 = add i32 %337, 128 %339 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %338) %340 = shl i32 %336, 4 %341 = add i32 %340, 132 %342 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %341) %343 = shl i32 %336, 4 %344 = add i32 %343, 136 %345 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %344) %346 = fmul float %57, %339 %347 = fmul float %58, %342 %348 = fadd float %347, %346 %349 = fmul float %59, %345 %350 = fadd float %348, %349 %351 = fmul float %44, %310 %352 = fadd float %351, %207 %353 = fmul float %44, %330 %354 = fadd float %353, %208 %355 = fmul float %44, %350 %356 = fadd float %355, %209 %357 = fcmp olt float 0.000000e+00, %45 %358 = sext i1 %357 to i32 %359 = bitcast i32 %358 to float %360 = bitcast float %359 to i32 %361 = icmp ne i32 %360, 0 br i1 %361, label %IF66, label %ENDIF ENDIF: ; preds = %IF66, %IF, %main_body %temp8.0 = phi float [ %147, %main_body ], [ %466, %IF66 ], [ %289, %IF ] %temp9.0 = phi float [ %148, %main_body ], [ %468, %IF66 ], [ %291, %IF ] %temp10.0 = phi float [ %149, %main_body ], [ %470, %IF66 ], [ %293, %IF ] %temp12.0 = phi float [ %207, %main_body ], [ %529, %IF66 ], [ %352, %IF ] %temp13.0 = phi float [ %208, %main_body ], [ %531, %IF66 ], [ %354, %IF ] %temp14.0 = phi float [ %209, %main_body ], [ %533, %IF66 ], [ %356, %IF ] %362 = fmul float %temp9.0, %20 %363 = fmul float %temp9.0, %21 %364 = fmul float %temp9.0, %22 %365 = fmul float %temp9.0, %23 %366 = fmul float %temp8.0, %16 %367 = fadd float %366, %362 %368 = fmul float %temp8.0, %17 %369 = fadd float %368, %363 %370 = fmul float %temp8.0, %18 %371 = fadd float %370, %364 %372 = fmul float %temp8.0, %19 %373 = fadd float %372, %365 %374 = fmul float %temp10.0, %24 %375 = fadd float %374, %367 %376 = fmul float %temp10.0, %25 %377 = fadd float %376, %369 %378 = fmul float %temp10.0, %26 %379 = fadd float %378, %371 %380 = fmul float %temp10.0, %27 %381 = fadd float %380, %373 %382 = fadd float %375, %28 %383 = fadd float %377, %29 %384 = fadd float %379, %30 %385 = fadd float %381, %31 %386 = fsub float -0.000000e+00, %13 %387 = fadd float %temp8.0, %386 %388 = fsub float -0.000000e+00, %14 %389 = fadd float %temp9.0, %388 %390 = fsub float -0.000000e+00, %15 %391 = fadd float %temp10.0, %390 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %temp12.0, float %temp13.0, float %temp14.0, float %64) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %387, float %389, float %391, float %65) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %382, float %383, float %385, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %382, float %383, float %384, float %385) ret void IF66: ; preds = %IF %392 = fmul float 3.000000e+00, %52 %393 = fptosi float %392 to i32 %394 = bitcast i32 %393 to float %395 = bitcast float %394 to i32 %396 = shl i32 %395, 4 %397 = add i32 %396, 128 %398 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %397) %399 = shl i32 %395, 4 %400 = add i32 %399, 132 %401 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %400) %402 = shl i32 %395, 4 %403 = add i32 %402, 136 %404 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %403) %405 = shl i32 %395, 4 %406 = add i32 %405, 140 %407 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %406) %408 = fmul float %68, %398 %409 = fmul float %70, %401 %410 = fadd float %408, %409 %411 = fmul float %72, %404 %412 = fadd float %410, %411 %413 = fmul float %74, %407 %414 = fadd float %412, %413 %415 = fptosi float %392 to i32 %416 = bitcast i32 %415 to float %417 = bitcast float %416 to i32 %418 = add i32 1, %417 %419 = bitcast i32 %418 to float %420 = bitcast float %419 to i32 %421 = shl i32 %420, 4 %422 = add i32 %421, 128 %423 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %422) %424 = shl i32 %420, 4 %425 = add i32 %424, 132 %426 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %425) %427 = shl i32 %420, 4 %428 = add i32 %427, 136 %429 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %428) %430 = shl i32 %420, 4 %431 = add i32 %430, 140 %432 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %431) %433 = fmul float %68, %423 %434 = fmul float %70, %426 %435 = fadd float %433, %434 %436 = fmul float %72, %429 %437 = fadd float %435, %436 %438 = fmul float %74, %432 %439 = fadd float %437, %438 %440 = fptosi float %392 to i32 %441 = bitcast i32 %440 to float %442 = bitcast float %441 to i32 %443 = add i32 2, %442 %444 = bitcast i32 %443 to float %445 = bitcast float %444 to i32 %446 = shl i32 %445, 4 %447 = add i32 %446, 128 %448 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %447) %449 = shl i32 %445, 4 %450 = add i32 %449, 132 %451 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %450) %452 = shl i32 %445, 4 %453 = add i32 %452, 136 %454 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %453) %455 = shl i32 %445, 4 %456 = add i32 %455, 140 %457 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %456) %458 = fmul float %68, %448 %459 = fmul float %70, %451 %460 = fadd float %458, %459 %461 = fmul float %72, %454 %462 = fadd float %460, %461 %463 = fmul float %74, %457 %464 = fadd float %462, %463 %465 = fmul float %45, %414 %466 = fadd float %465, %289 %467 = fmul float %45, %439 %468 = fadd float %467, %291 %469 = fmul float %45, %464 %470 = fadd float %469, %293 %471 = fptosi float %392 to i32 %472 = bitcast i32 %471 to float %473 = bitcast float %472 to i32 %474 = shl i32 %473, 4 %475 = add i32 %474, 128 %476 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %475) %477 = shl i32 %473, 4 %478 = add i32 %477, 132 %479 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %478) %480 = shl i32 %473, 4 %481 = add i32 %480, 136 %482 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %481) %483 = fmul float %57, %476 %484 = fmul float %58, %479 %485 = fadd float %484, %483 %486 = fmul float %59, %482 %487 = fadd float %485, %486 %488 = fptosi float %392 to i32 %489 = bitcast i32 %488 to float %490 = bitcast float %489 to i32 %491 = add i32 1, %490 %492 = bitcast i32 %491 to float %493 = bitcast float %492 to i32 %494 = shl i32 %493, 4 %495 = add i32 %494, 128 %496 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %495) %497 = shl i32 %493, 4 %498 = add i32 %497, 132 %499 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %498) %500 = shl i32 %493, 4 %501 = add i32 %500, 136 %502 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %501) %503 = fmul float %57, %496 %504 = fmul float %58, %499 %505 = fadd float %504, %503 %506 = fmul float %59, %502 %507 = fadd float %505, %506 %508 = fptosi float %392 to i32 %509 = bitcast i32 %508 to float %510 = bitcast float %509 to i32 %511 = add i32 2, %510 %512 = bitcast i32 %511 to float %513 = bitcast float %512 to i32 %514 = shl i32 %513, 4 %515 = add i32 %514, 128 %516 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %515) %517 = shl i32 %513, 4 %518 = add i32 %517, 132 %519 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %518) %520 = shl i32 %513, 4 %521 = add i32 %520, 136 %522 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %521) %523 = fmul float %57, %516 %524 = fmul float %58, %519 %525 = fadd float %524, %523 %526 = fmul float %59, %522 %527 = fadd float %525, %526 %528 = fmul float %45, %487 %529 = fadd float %528, %352 %530 = fmul float %45, %507 %531 = fadd float %530, %354 %532 = fmul float %45, %527 %533 = fadd float %532, %356 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v14, s10, v0 ; 4A1C000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[4:7], s[8:9], 0x10 ; C0820910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[20:23], v14, s[20:23], 0 idxen ; E00C2000 8005140E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v20 ; 100028FF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v5, 4, v0 ; 340A0084 v_add_i32_e32 v0, 0xa0, v5 ; 4A000AFF 000000A0 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v0, s[0:3], 0 offen ; E0301000 80000200 buffer_load_format_xyzw v[10:13], v14, s[24:27], 0 idxen ; E00C2000 80060A0E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v10 ; 10001502 v_add_i32_e32 v1, 0xa4, v5 ; 4A020AFF 000000A4 buffer_load_dword v4, v1, s[0:3], 0 offen ; E0301000 80000401 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v4, v0 ; D2820000 0402090B v_add_i32_e32 v1, 0xa8, v5 ; 4A020AFF 000000A8 buffer_load_dword v15, v1, s[0:3], 0 offen ; E0301000 80000F01 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v12, v15, v0 ; D2820000 04021F0C buffer_load_format_xyzw v[6:9], v14, s[16:19], 0 idxen ; E00C2000 8004060E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_add_i32_e32 v1, 0x90, v5 ; 4A020AFF 00000090 buffer_load_dword v16, v1, s[0:3], 0 offen ; E0301000 80001001 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v16, v10 ; 10021510 v_add_i32_e32 v3, 0x94, v5 ; 4A060AFF 00000094 buffer_load_dword v17, v3, s[0:3], 0 offen ; E0301000 80001103 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, v17, v1 ; D2820001 0406230B v_add_i32_e32 v3, 0x98, v5 ; 4A060AFF 00000098 buffer_load_dword v26, v3, s[0:3], 0 offen ; E0301000 80001A03 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v12, v26, v1 ; D2820001 0406350C v_mul_f32_e32 v1, v6, v1 ; 10020306 v_add_i32_e32 v3, 0x80, v5 ; 4A060AFF 00000080 buffer_load_dword v27, v3, s[0:3], 0 offen ; E0301000 80001B03 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v27, v10 ; 1006151B v_add_i32_e32 v18, 0x84, v5 ; 4A240AFF 00000084 buffer_load_dword v28, v18, s[0:3], 0 offen ; E0301000 80001C12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v11, v28, v3 ; D2820003 040E390B v_add_i32_e32 v18, 0x88, v5 ; 4A240AFF 00000088 buffer_load_dword v29, v18, s[0:3], 0 offen ; E0301000 80001D12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v12, v29, v3 ; D2820003 040E3B0C v_mul_f32_e32 v3, v6, v3 ; 10060706 buffer_load_format_xyzw v[30:33], v14, s[12:15], 0 idxen ; E00C2000 80031E0E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v18, 0, v31 ; 06243E80 v_mul_f32_e32 v4, v4, v18 ; 10082504 v_add_f32_e32 v19, 0, v30 ; 06263C80 v_mad_f32 v2, v19, v2, v4 ; D2820002 04120513 v_add_f32_e32 v25, 0, v32 ; 06324080 v_mad_f32 v2, v25, v15, v2 ; D2820002 040A1F19 v_mad_f32 v24, 0, v30, 1.0 ; D2820018 03CA3C80 v_add_i32_e32 v4, 0xac, v5 ; 4A080AFF 000000AC buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v24, v4, v2 ; D2820002 040A0918 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v4, v17, v18 ; 10082511 v_mad_f32 v4, v19, v16, v4 ; D2820004 04122113 v_mad_f32 v4, v25, v26, v4 ; D2820004 04123519 v_add_i32_e32 v15, 0x9c, v5 ; 4A1E0AFF 0000009C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v24, v15, v4 ; D2820004 04121F18 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v15, v28, v18 ; 101E251C v_mad_f32 v15, v19, v27, v15 ; D282000F 043E3713 v_mad_f32 v15, v25, v29, v15 ; D282000F 043E3B19 v_add_i32_e32 v5, 0x8c, v5 ; 4A0A0AFF 0000008C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v24, v5, v15 ; D2820005 043E0B18 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 buffer_load_format_xyzw v[14:17], v14, s[4:7], 0 idxen ; E00C2000 80010E0E v_cmp_gt_f32_e64 s[4:5], v7, 0 ; D0080004 00010107 v_cndmask_b32_e64 v26, 0, -1, s[4:5] ; D200001A 00118280 v_cmp_ne_i32_e64 s[24:25], v26, 0 ; D10A0018 0001011A s_buffer_load_dword s9, s[0:3], 0x1f ; C204811F s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_buffer_load_dword s7, s[0:3], 0x1d ; C203811D s_buffer_load_dword s6, s[0:3], 0x1c ; C203011C s_buffer_load_dword s13, s[0:3], 0x1b ; C206811B s_buffer_load_dword s5, s[0:3], 0x1a ; C202811A s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s17, s[0:3], 0x17 ; C2088117 s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 s_buffer_load_dword s15, s[0:3], 0x15 ; C2078115 s_buffer_load_dword s14, s[0:3], 0x14 ; C2070114 s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113 s_buffer_load_dword s11, s[0:3], 0x12 ; C2058112 s_buffer_load_dword s18, s[0:3], 0x11 ; C2090111 s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xc ; C20B010C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[24:25], s[24:25] ; BE982418 s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v26, 0x40400000, v21 ; 10342AFF 40400000 v_cvt_i32_f32_e32 v26, v26 ; 7E34111A v_lshlrev_b32_e32 v26, 4, v26 ; 34343484 v_add_i32_e32 v27, 0xa4, v26 ; 4A3634FF 000000A4 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B v_add_i32_e32 v28, 0xa0, v26 ; 4A3834FF 000000A0 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v28, v10 ; 103A151C v_mad_f32 v29, v11, v27, v29 ; D282001D 0476370B v_add_i32_e32 v30, 0xa8, v26 ; 4A3C34FF 000000A8 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, v12, v30, v29 ; D282001D 04763D0C v_mad_f32 v0, v7, v29, v0 ; D2820000 04023B07 v_add_i32_e32 v29, 0x94, v26 ; 4A3A34FF 00000094 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_add_i32_e32 v31, 0x90, v26 ; 4A3E34FF 00000090 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v31, v10 ; 1040151F v_mad_f32 v32, v11, v29, v32 ; D2820020 04823B0B v_add_i32_e32 v33, 0x98, v26 ; 4A4234FF 00000098 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v32, v12, v33, v32 ; D2820020 0482430C v_mad_f32 v1, v7, v32, v1 ; D2820001 04064107 v_add_i32_e32 v32, 0x84, v26 ; 4A4034FF 00000084 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 v_add_i32_e32 v34, 0x80, v26 ; 4A4434FF 00000080 buffer_load_dword v34, v34, s[0:3], 0 offen ; E0301000 80002222 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v35, v34, v10 ; 10461522 v_mad_f32 v35, v11, v32, v35 ; D2820023 048E410B v_add_i32_e32 v36, 0x88, v26 ; 4A4834FF 00000088 buffer_load_dword v36, v36, s[0:3], 0 offen ; E0301000 80002424 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, v12, v36, v35 ; D2820023 048E490C v_mad_f32 v3, v7, v35, v3 ; D2820003 040E4707 v_mul_f32_e32 v27, v27, v18 ; 1036251B v_mad_f32 v27, v19, v28, v27 ; D282001B 046E3913 v_mad_f32 v27, v25, v30, v27 ; D282001B 046E3D19 v_add_i32_e32 v28, 0xac, v26 ; 4A3834FF 000000AC buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v24, v28, v27 ; D282001B 046E3918 v_mad_f32 v2, v7, v27, v2 ; D2820002 040A3707 v_mul_f32_e32 v27, v29, v18 ; 1036251D v_mad_f32 v27, v19, v31, v27 ; D282001B 046E3F13 v_mad_f32 v27, v25, v33, v27 ; D282001B 046E4319 v_add_i32_e32 v28, 0x9c, v26 ; 4A3834FF 0000009C buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v24, v28, v27 ; D282001B 046E3918 v_mad_f32 v4, v7, v27, v4 ; D2820004 04123707 v_mul_f32_e32 v27, v32, v18 ; 10362520 v_mad_f32 v27, v19, v34, v27 ; D282001B 046E4513 v_mad_f32 v27, v25, v36, v27 ; D282001B 046E4919 v_add_i32_e32 v26, 0x8c, v26 ; 4A3434FF 0000008C buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, v24, v26, v27 ; D282001A 046E3518 v_mad_f32 v5, v7, v26, v5 ; D2820005 04163507 v_cmp_gt_f32_e64 s[26:27], v8, 0 ; D008001A 00010108 v_cndmask_b32_e64 v26, 0, -1, s[26:27] ; D200001A 00698280 v_cmp_ne_i32_e64 s[26:27], v26, 0 ; D10A001A 0001011A s_and_saveexec_b64 s[26:27], s[26:27] ; BE9A241A s_xor_b64 s[26:27], exec, s[26:27] ; 899A1A7E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v20, 0x40400000, v22 ; 10282CFF 40400000 v_cvt_i32_f32_e32 v20, v20 ; 7E281114 v_lshlrev_b32_e32 v20, 4, v20 ; 34282884 v_add_i32_e32 v21, 0xa4, v20 ; 4A2A28FF 000000A4 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 v_add_i32_e32 v22, 0xa0, v20 ; 4A2C28FF 000000A0 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v23, v22, v10 ; 102E1516 v_mad_f32 v23, v11, v21, v23 ; D2820017 045E2B0B v_add_i32_e32 v26, 0xa8, v20 ; 4A3428FF 000000A8 buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, v12, v26, v23 ; D2820017 045E350C v_mad_f32 v0, v8, v23, v0 ; D2820000 04022F08 v_add_i32_e32 v23, 0x94, v20 ; 4A2E28FF 00000094 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_add_i32_e32 v27, 0x90, v20 ; 4A3628FF 00000090 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v28, v27, v10 ; 1038151B v_mad_f32 v28, v11, v23, v28 ; D282001C 04722F0B v_add_i32_e32 v29, 0x98, v20 ; 4A3A28FF 00000098 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v12, v29, v28 ; D282001C 04723B0C v_mad_f32 v1, v8, v28, v1 ; D2820001 04063908 v_add_i32_e32 v28, 0x84, v20 ; 4A3828FF 00000084 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_add_i32_e32 v30, 0x80, v20 ; 4A3C28FF 00000080 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v31, v30, v10 ; 103E151E v_mad_f32 v31, v11, v28, v31 ; D282001F 047E390B v_add_i32_e32 v32, 0x88, v20 ; 4A4028FF 00000088 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v12, v32, v31 ; D282000A 047E410C v_mad_f32 v3, v8, v10, v3 ; D2820003 040E1508 v_mul_f32_e32 v10, v21, v18 ; 10142515 v_mad_f32 v10, v19, v22, v10 ; D282000A 042A2D13 v_mad_f32 v10, v25, v26, v10 ; D282000A 042A3519 v_add_i32_e32 v11, 0xac, v20 ; 4A1628FF 000000AC buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v24, v11, v10 ; D282000A 042A1718 v_mad_f32 v2, v8, v10, v2 ; D2820002 040A1508 v_mul_f32_e32 v10, v23, v18 ; 10142517 v_mad_f32 v10, v19, v27, v10 ; D282000A 042A3713 v_mad_f32 v10, v25, v29, v10 ; D282000A 042A3B19 v_add_i32_e32 v11, 0x9c, v20 ; 4A1628FF 0000009C buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v24, v11, v10 ; D282000A 042A1718 v_mad_f32 v4, v8, v10, v4 ; D2820004 04121508 v_mul_f32_e32 v10, v28, v18 ; 1014251C v_mad_f32 v10, v19, v30, v10 ; D282000A 042A3D13 v_mad_f32 v10, v25, v32, v10 ; D282000A 042A4119 v_add_i32_e32 v11, 0x8c, v20 ; 4A1628FF 0000008C buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v24, v11, v10 ; D282000A 042A1718 v_mad_f32 v5, v8, v10, v5 ; D2820005 04161508 s_or_b64 exec, exec, s[26:27] ; 88FE1A7E s_or_b64 exec, exec, s[24:25] ; 88FE187E exp 15, 32, 0, 0, 0, v3, v1, v0, v14 ; F800020F 0E000103 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v0, s20, v2 ; 0A000414 v_subrev_f32_e32 v1, s21, v4 ; 0A020815 v_subrev_f32_e32 v3, s22, v5 ; 0A060A16 exp 15, 33, 0, 0, 0, v3, v1, v0, v15 ; F800021F 0F000103 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v5, v4, v2, v0 ; F800022F 00020405 v_mul_f32_e32 v1, s17, v4 ; 10020811 v_mad_f32 v1, v5, s19, v1 ; D2820001 04042705 v_mad_f32 v1, v2, s13, v1 ; D2820001 04041B02 v_add_f32_e32 v1, s9, v1 ; 06020209 v_mul_f32_e32 v3, s15, v4 ; 1006080F v_mad_f32 v3, v5, s18, v3 ; D2820003 040C2505 v_mad_f32 v3, v2, s12, v3 ; D2820003 040C1902 v_add_f32_e32 v3, s7, v3 ; 06060607 v_mul_f32_e32 v6, s14, v4 ; 100C080E v_mad_f32 v6, v5, s16, v6 ; D2820006 04182105 v_mad_f32 v6, v2, s10, v6 ; D2820006 04181502 v_add_f32_e32 v6, s6, v6 ; 060C0C06 exp 15, 35, 0, 0, 0, v6, v3, v1, v0 ; F800023F 00010306 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s8, v4 ; 10000808 v_mad_f32 v0, v5, s11, v0 ; D2820000 04001705 v_mad_f32 v0, v2, s5, v0 ; D2820000 04000B02 v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 12, 0, 1, 0, v6, v3, v0, v1 ; F80008CF 01000306 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..5] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.4427, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.0000, 0.3300, 0.0000, 0.0000} 0: ADD TEMP[0].xyz, -CONST[3], IN[2] 1: MUL TEMP[1].w, TEMP[0].zzzz, CONST[1].xxxx 2: MUL TEMP[1].w, TEMP[1].wwww, IMM[0].xxxx 3: EX2 TEMP[1].x, TEMP[1].wwww 4: ADD TEMP[1].w, -TEMP[1].xxxx, IMM[0].yyyy 5: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 6: RCP TEMP[3].x, TEMP[0].zzzz 7: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1].yyyy 8: MUL TEMP[1].x, TEMP[1].wwww, TEMP[2].xxxx 9: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 10: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MOV TEMP[0].x, TEMP[1].xxxx 12: EX2 TEMP[1].x, TEMP[1].xxxx 13: MOV_SAT TEMP[0].x, TEMP[1].xxxx 14: ADD TEMP[1].x, -TEMP[0].xxxx, IMM[0].yyyy 15: RCP TEMP[2].x, IN[3].zzzz 16: MUL TEMP[2].y, TEMP[2].xxxx, IN[3].yyyy 17: MAD TEMP[2].x, TEMP[2].yyyy, IMM[0].zzzz, IMM[0].zzzz 18: MOV TEMP[2].x, TEMP[2].xxxx 19: MOV TEMP[2].y, CONST[1].wwww 20: MOV TEMP[3].xy, TEMP[2].xyyy 21: TEX TEMP[3].x, TEMP[3], SAMP[5], 2D 22: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 23: MOV TEMP[0].x, TEMP[1].xxxx 24: MOV TEMP[3].w, IMM[0].wwww 25: MOV TEMP[3].x, IN[1].xxxx 26: MOV TEMP[3].y, IN[1].yyyy 27: MOV TEMP[3].z, IN[1].zzzz 28: DP4 TEMP[4].x, TEMP[3], TEMP[3] 29: RSQ TEMP[4].x, TEMP[4].xxxx 30: MUL TEMP[3].xyz, TEMP[3], TEMP[4].xxxx 31: MOV TEMP[4].w, IMM[0].wwww 32: MOV TEMP[4].x, IN[0].xxxx 33: MOV TEMP[4].y, IN[0].yyyy 34: MOV TEMP[4].z, IN[0].zzzz 35: DP4 TEMP[5].x, TEMP[4], TEMP[4] 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4], TEMP[5].xxxx 38: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[4].xyzz 39: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 40: MAD TEMP[3].yzw, TEMP[4].xxyz, -TEMP[5].yyyy, TEMP[3].xxyz 41: MOV TEMP[4].xyz, TEMP[4].xyzz 42: TEX TEMP[4], TEMP[4], SAMP[2], CUBE 43: MUL TEMP[4].xyz, TEMP[4], TEMP[4] 44: MOV TEMP[5].xyz, TEMP[3].yzww 45: TEX TEMP[5], TEMP[5], SAMP[4], CUBE 46: MOV TEMP[6].w, TEMP[5].wwww 47: MOV TEMP[3].xyz, TEMP[3].yzww 48: TEX TEMP[3].xyz, TEMP[3], SAMP[3], CUBE 49: MUL TEMP[7].yzw, TEMP[3].xxyz, TEMP[3].xxyz 50: MOV TEMP[0].yzw, TEMP[7].zyzw 51: MAD TEMP[5].xyz, TEMP[5], TEMP[5], -TEMP[7].yzww 52: MOV TEMP[3].x, IN[0].wwww 53: MOV TEMP[3].y, IN[1].wwww 54: MOV TEMP[7].xy, TEMP[3].xyyy 55: TEX TEMP[7], TEMP[7], SAMP[1], 2D 56: MOV TEMP[8].xy, TEMP[3].xyyy 57: TEX TEMP[8], TEMP[8], SAMP[0], 2D 58: MUL TEMP[3], TEMP[8], TEMP[8] 59: MUL TEMP[7], TEMP[7], TEMP[7] 60: MUL TEMP[8].w, TEMP[7].wwww, TEMP[7].wwww 61: MUL TEMP[8], TEMP[8].wwww, TEMP[8].wwww 62: MOV_SAT TEMP[8], TEMP[8] 63: MOV TEMP[2].w, TEMP[8].wwww 64: MAD TEMP[5].yzw, TEMP[8].wwww, TEMP[5].xxyz, TEMP[0] 65: MOV TEMP[0].yzw, TEMP[5].zyzw 66: MUL TEMP[5].yzw, TEMP[0], TEMP[7].xxyz 67: MOV TEMP[0].yzw, TEMP[5].zyzw 68: MUL TEMP[5].xyz, CONST[4], CONST[4] 69: MOV TEMP[6].xyz, TEMP[5].xyzx 70: MOV TEMP[7].xyz, CONST[4].xyzx 71: MAD TEMP[5].xyz, TEMP[7], -TEMP[7], IMM[0].yyyy 72: MOV TEMP[7].xyz, TEMP[5].xyzx 73: MAD TEMP[5].xyz, TEMP[3].wwww, TEMP[7], TEMP[6] 74: MOV TEMP[6].xyz, TEMP[5].xyzx 75: MUL TEMP[3].xyz, TEMP[3], TEMP[6] 76: MAD TEMP[0].yzw, TEMP[3].xxyz, TEMP[4].xxyz, TEMP[0] 77: MAX TEMP[3].x, TEMP[0].yyyy, IMM[1].xxxx 78: RSQ TEMP[3].x, TEMP[3].xxxx 79: RCP TEMP[2].x, TEMP[3].xxxx 80: MAX TEMP[3].x, TEMP[0].zzzz, IMM[1].xxxx 81: RSQ TEMP[3].x, TEMP[3].xxxx 82: MAX TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx 83: RSQ TEMP[4].x, TEMP[0].xxxx 84: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 85: CMP TEMP[4].x, -TEMP[0].xxxx, TEMP[4].xxxx, IMM[0].wwww 86: MOV TEMP[2].z, TEMP[4].xxxx 87: RCP TEMP[0].x, TEMP[3].xxxx 88: MOV TEMP[2].y, TEMP[0].xxxx 89: ADD TEMP[0].yzw, -TEMP[2].xxyz, CONST[2].xxyz 90: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[2] 91: MOV TEMP[0].xyz, TEMP[0].xyzx 92: ADD TEMP[1].x, -CONST[0].xxxx, IN[2].zzzz 93: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww 94: UIF TEMP[1].xxxx :0 95: MOV TEMP[1].x, IMM[0].yyyy 96: ELSE :0 97: MOV TEMP[1].x, IMM[1].yyyy 98: ENDIF 99: ADD TEMP[2].y, -CONST[5].xxxx, CONST[5].yyyy 100: MAD TEMP[2].y, CONST[5].zzzz, TEMP[2].yyyy, CONST[5].xxxx 101: MUL TEMP[1].w, TEMP[2].yyyy, TEMP[1].xxxx 102: MOV TEMP[0].w, TEMP[1].wwww 103: MOV OUT[0], TEMP[0] 104: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %40 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %49 = load <8 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %51 = load <4 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %53 = load <8 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %55 = load <4 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %57 = load <8 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %59 = load <4 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %61 = load <8 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %63 = load <4 x i32> addrspace(2)* %62, !tbaa !0 %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %77 = fsub float -0.000000e+00, %31 %78 = fadd float %77, %72 %79 = fsub float -0.000000e+00, %32 %80 = fadd float %79, %73 %81 = fsub float -0.000000e+00, %33 %82 = fadd float %81, %74 %83 = fmul float %82, %25 %84 = fmul float %83, 0x3FF7154CA0000000 %85 = call float @llvm.AMDIL.exp.(float %84) %86 = fsub float -0.000000e+00, %85 %87 = fadd float %86, 1.000000e+00 %88 = fmul float %78, %78 %89 = fmul float %80, %80 %90 = fadd float %89, %88 %91 = fmul float %82, %82 %92 = fadd float %90, %91 %93 = fdiv float 1.000000e+00, %82 %94 = fmul float %92, %26 %95 = fmul float %87, %94 %96 = fmul float %93, %95 %97 = fmul float %96, 0x3FF7154CA0000000 %98 = call float @llvm.AMDIL.exp.(float %97) %99 = call float @llvm.AMDIL.clamp.(float %98, float 0.000000e+00, float 1.000000e+00) %100 = fsub float -0.000000e+00, %99 %101 = fadd float %100, 1.000000e+00 %102 = fdiv float 1.000000e+00, %76 %103 = fmul float %102, %75 %104 = fmul float %103, 5.000000e-01 %105 = fadd float %104, 5.000000e-01 %106 = bitcast float %105 to i32 %107 = bitcast float %27 to i32 %108 = insertelement <2 x i32> undef, i32 %106, i32 0 %109 = insertelement <2 x i32> %108, i32 %107, i32 1 %110 = bitcast <8 x i32> %61 to <32 x i8> %111 = bitcast <4 x i32> %63 to <16 x i8> %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %109, <32 x i8> %110, <16 x i8> %111, i32 2) %113 = extractelement <4 x float> %112, i32 0 %114 = fmul float %101, %113 %115 = fmul float %68, %68 %116 = fmul float %69, %69 %117 = fadd float %115, %116 %118 = fmul float %70, %70 %119 = fadd float %117, %118 %120 = fmul float 0.000000e+00, 0.000000e+00 %121 = fadd float %119, %120 %122 = call float @llvm.AMDGPU.rsq.clamped.f32(float %121) %123 = fmul float %68, %122 %124 = fmul float %69, %122 %125 = fmul float %70, %122 %126 = fmul float %64, %64 %127 = fmul float %65, %65 %128 = fadd float %126, %127 %129 = fmul float %66, %66 %130 = fadd float %128, %129 %131 = fmul float 0.000000e+00, 0.000000e+00 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %64, %133 %135 = fmul float %65, %133 %136 = fmul float %66, %133 %137 = fmul float %123, %134 %138 = fmul float %124, %135 %139 = fadd float %138, %137 %140 = fmul float %125, %136 %141 = fadd float %139, %140 %142 = fadd float %141, %141 %143 = fsub float -0.000000e+00, %142 %144 = fmul float %134, %143 %145 = fadd float %144, %123 %146 = fsub float -0.000000e+00, %142 %147 = fmul float %135, %146 %148 = fadd float %147, %124 %149 = fsub float -0.000000e+00, %142 %150 = fmul float %136, %149 %151 = fadd float %150, %125 %152 = insertelement <4 x float> undef, float %134, i32 0 %153 = insertelement <4 x float> %152, float %135, i32 1 %154 = insertelement <4 x float> %153, float %136, i32 2 %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 3 %156 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %155) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = extractelement <4 x float> %156, i32 2 %160 = extractelement <4 x float> %156, i32 3 %161 = call float @fabs(float %159) %162 = fdiv float 1.000000e+00, %161 %163 = fmul float %157, %162 %164 = fadd float %163, 1.500000e+00 %165 = fmul float %158, %162 %166 = fadd float %165, 1.500000e+00 %167 = bitcast float %166 to i32 %168 = bitcast float %164 to i32 %169 = bitcast float %160 to i32 %170 = insertelement <4 x i32> undef, i32 %167, i32 0 %171 = insertelement <4 x i32> %170, i32 %168, i32 1 %172 = insertelement <4 x i32> %171, i32 %169, i32 2 %173 = insertelement <4 x i32> %172, i32 undef, i32 3 %174 = bitcast <8 x i32> %49 to <32 x i8> %175 = bitcast <4 x i32> %51 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 4) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = fmul float %177, %177 %181 = fmul float %178, %178 %182 = fmul float %179, %179 %183 = insertelement <4 x float> undef, float %145, i32 0 %184 = insertelement <4 x float> %183, float %148, i32 1 %185 = insertelement <4 x float> %184, float %151, i32 2 %186 = insertelement <4 x float> %185, float 0.000000e+00, i32 3 %187 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %186) %188 = extractelement <4 x float> %187, i32 0 %189 = extractelement <4 x float> %187, i32 1 %190 = extractelement <4 x float> %187, i32 2 %191 = extractelement <4 x float> %187, i32 3 %192 = call float @fabs(float %190) %193 = fdiv float 1.000000e+00, %192 %194 = fmul float %188, %193 %195 = fadd float %194, 1.500000e+00 %196 = fmul float %189, %193 %197 = fadd float %196, 1.500000e+00 %198 = bitcast float %197 to i32 %199 = bitcast float %195 to i32 %200 = bitcast float %191 to i32 %201 = insertelement <4 x i32> undef, i32 %198, i32 0 %202 = insertelement <4 x i32> %201, i32 %199, i32 1 %203 = insertelement <4 x i32> %202, i32 %200, i32 2 %204 = insertelement <4 x i32> %203, i32 undef, i32 3 %205 = bitcast <8 x i32> %57 to <32 x i8> %206 = bitcast <4 x i32> %59 to <16 x i8> %207 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %204, <32 x i8> %205, <16 x i8> %206, i32 4) %208 = extractelement <4 x float> %207, i32 0 %209 = extractelement <4 x float> %207, i32 1 %210 = extractelement <4 x float> %207, i32 2 %211 = insertelement <4 x float> undef, float %145, i32 0 %212 = insertelement <4 x float> %211, float %148, i32 1 %213 = insertelement <4 x float> %212, float %151, i32 2 %214 = insertelement <4 x float> %213, float %151, i32 3 %215 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %214) %216 = extractelement <4 x float> %215, i32 0 %217 = extractelement <4 x float> %215, i32 1 %218 = extractelement <4 x float> %215, i32 2 %219 = extractelement <4 x float> %215, i32 3 %220 = call float @fabs(float %218) %221 = fdiv float 1.000000e+00, %220 %222 = fmul float %216, %221 %223 = fadd float %222, 1.500000e+00 %224 = fmul float %217, %221 %225 = fadd float %224, 1.500000e+00 %226 = bitcast float %225 to i32 %227 = bitcast float %223 to i32 %228 = bitcast float %219 to i32 %229 = insertelement <4 x i32> undef, i32 %226, i32 0 %230 = insertelement <4 x i32> %229, i32 %227, i32 1 %231 = insertelement <4 x i32> %230, i32 %228, i32 2 %232 = insertelement <4 x i32> %231, i32 undef, i32 3 %233 = bitcast <8 x i32> %53 to <32 x i8> %234 = bitcast <4 x i32> %55 to <16 x i8> %235 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %232, <32 x i8> %233, <16 x i8> %234, i32 4) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = fmul float %236, %236 %240 = fmul float %237, %237 %241 = fmul float %238, %238 %242 = fsub float -0.000000e+00, %239 %243 = fmul float %208, %208 %244 = fadd float %243, %242 %245 = fsub float -0.000000e+00, %240 %246 = fmul float %209, %209 %247 = fadd float %246, %245 %248 = fsub float -0.000000e+00, %241 %249 = fmul float %210, %210 %250 = fadd float %249, %248 %251 = bitcast float %67 to i32 %252 = bitcast float %71 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = bitcast <8 x i32> %45 to <32 x i8> %256 = bitcast <4 x i32> %47 to <16 x i8> %257 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %254, <32 x i8> %255, <16 x i8> %256, i32 2) %258 = extractelement <4 x float> %257, i32 0 %259 = extractelement <4 x float> %257, i32 1 %260 = extractelement <4 x float> %257, i32 2 %261 = extractelement <4 x float> %257, i32 3 %262 = bitcast float %67 to i32 %263 = bitcast float %71 to i32 %264 = insertelement <2 x i32> undef, i32 %262, i32 0 %265 = insertelement <2 x i32> %264, i32 %263, i32 1 %266 = bitcast <8 x i32> %41 to <32 x i8> %267 = bitcast <4 x i32> %43 to <16 x i8> %268 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %265, <32 x i8> %266, <16 x i8> %267, i32 2) %269 = extractelement <4 x float> %268, i32 0 %270 = extractelement <4 x float> %268, i32 1 %271 = extractelement <4 x float> %268, i32 2 %272 = extractelement <4 x float> %268, i32 3 %273 = fmul float %269, %269 %274 = fmul float %270, %270 %275 = fmul float %271, %271 %276 = fmul float %272, %272 %277 = fmul float %258, %258 %278 = fmul float %259, %259 %279 = fmul float %260, %260 %280 = fmul float %261, %261 %281 = fmul float %280, %280 %282 = fmul float %281, %281 %283 = fmul float %281, %281 %284 = fmul float %281, %281 %285 = fmul float %281, %281 %286 = call float @llvm.AMDIL.clamp.(float %282, float 0.000000e+00, float 1.000000e+00) %287 = call float @llvm.AMDIL.clamp.(float %283, float 0.000000e+00, float 1.000000e+00) %288 = call float @llvm.AMDIL.clamp.(float %284, float 0.000000e+00, float 1.000000e+00) %289 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00) %290 = fmul float %289, %244 %291 = fadd float %290, %239 %292 = fmul float %289, %247 %293 = fadd float %292, %240 %294 = fmul float %289, %250 %295 = fadd float %294, %241 %296 = fmul float %291, %277 %297 = fmul float %293, %278 %298 = fmul float %295, %279 %299 = fmul float %34, %34 %300 = fmul float %35, %35 %301 = fmul float %36, %36 %302 = fsub float -0.000000e+00, %34 %303 = fmul float %34, %302 %304 = fadd float %303, 1.000000e+00 %305 = fsub float -0.000000e+00, %35 %306 = fmul float %35, %305 %307 = fadd float %306, 1.000000e+00 %308 = fsub float -0.000000e+00, %36 %309 = fmul float %36, %308 %310 = fadd float %309, 1.000000e+00 %311 = fmul float %276, %304 %312 = fadd float %311, %299 %313 = fmul float %276, %307 %314 = fadd float %313, %300 %315 = fmul float %276, %310 %316 = fadd float %315, %301 %317 = fmul float %273, %312 %318 = fmul float %274, %314 %319 = fmul float %275, %316 %320 = fmul float %317, %180 %321 = fadd float %320, %296 %322 = fmul float %318, %181 %323 = fadd float %322, %297 %324 = fmul float %319, %182 %325 = fadd float %324, %298 %326 = call float @llvm.maxnum.f32(float %321, float 0x3E7AD7F2A0000000) %327 = call float @llvm.AMDGPU.rsq.clamped.f32(float %326) %328 = fdiv float 1.000000e+00, %327 %329 = call float @llvm.maxnum.f32(float %323, float 0x3E7AD7F2A0000000) %330 = call float @llvm.AMDGPU.rsq.clamped.f32(float %329) %331 = call float @llvm.maxnum.f32(float %325, float 0x3E7AD7F2A0000000) %332 = call float @llvm.AMDGPU.rsq.clamped.f32(float %331) %333 = fmul float %332, %331 %334 = fsub float -0.000000e+00, %331 %335 = call float @llvm.AMDGPU.cndlt(float %334, float %333, float 0.000000e+00) %336 = fdiv float 1.000000e+00, %330 %337 = fsub float -0.000000e+00, %328 %338 = fadd float %337, %28 %339 = fsub float -0.000000e+00, %336 %340 = fadd float %339, %29 %341 = fsub float -0.000000e+00, %335 %342 = fadd float %341, %30 %343 = fmul float %114, %338 %344 = fadd float %343, %328 %345 = fmul float %114, %340 %346 = fadd float %345, %336 %347 = fmul float %114, %342 %348 = fadd float %347, %335 %349 = fsub float -0.000000e+00, %24 %350 = fadd float %349, %74 %351 = fcmp oge float %350, 0.000000e+00 %352 = sext i1 %351 to i32 %353 = bitcast i32 %352 to float %354 = bitcast float %353 to i32 %355 = icmp ne i32 %354, 0 %. = select i1 %355, float 1.000000e+00, float 0x3FD51EB860000000 %356 = fsub float -0.000000e+00, %37 %357 = fadd float %356, %38 %358 = fmul float %39, %357 %359 = fadd float %358, %37 %360 = fmul float %359, %. %361 = call i32 @llvm.SI.packf16(float %344, float %346) %362 = bitcast i32 %361 to float %363 = call i32 @llvm.SI.packf16(float %348, float %360) %364 = bitcast i32 %363 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %362, float %364, float %362, float %364) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mul_f32_e32 v4, v3, v3 ; 10080703 v_mad_f32 v4, v2, v2, v4 ; D2820004 04120502 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_mad_f32 v4, v5, v5, v4 ; D2820004 04120B05 v_mov_b32_e32 v9, 0 ; 7E120280 v_add_f32_e32 v4, 0, v4 ; 06080880 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v10, v4, v2 ; 10140504 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v14, v0, 1, 1, [m0] ; C8380500 v_interp_p2_f32 v14, [v14], v1, 1, 1, [m0] ; C8390501 v_mul_f32_e32 v15, v14, v14 ; 101E1D0E v_mad_f32 v15, v2, v2, v15 ; D282000F 043E0502 v_interp_p1_f32 v16, v0, 2, 1, [m0] ; C8400600 v_interp_p2_f32 v16, [v16], v1, 2, 1, [m0] ; C8410601 v_mad_f32 v15, v16, v16, v15 ; D282000F 043E2110 v_add_f32_e32 v15, 0, v15 ; 061E1E80 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v17, v15, v2 ; 1022050F v_mul_f32_e32 v17, v10, v17 ; 1022230A v_mul_f32_e32 v11, v4, v3 ; 10160704 v_mul_f32_e32 v3, v15, v14 ; 10061D0F v_mad_f32 v3, v3, v11, v17 ; D2820003 04461703 v_mul_f32_e32 v12, v4, v5 ; 10180B04 v_mul_f32_e32 v4, v15, v16 ; 1008210F v_mad_f32 v3, v4, v12, v3 ; D2820003 040E1904 v_add_f32_e32 v3, v3, v3 ; 06060703 v_mul_f32_e32 v4, v3, v12 ; 10081903 v_mad_f32 v8, v16, v15, -v4 ; D2820008 84121F10 v_mul_f32_e32 v4, v3, v11 ; 10081703 v_mad_f32 v7, v14, v15, -v4 ; D2820007 84121F0E v_mul_f32_e32 v3, v3, v10 ; 10061503 v_mad_f32 v6, v2, v15, -v3 ; D2820006 840E1F02 v_cubeid_f32 v17, v6, v7, v8 ; D2880011 04220F06 v_cubema_f32 v16, v6, v7, v8 ; D28E0010 04220F06 v_cubesc_f32 v15, v6, v7, v8 ; D28A000F 04220F06 v_cubetc_f32 v14, v6, v7, v8 ; D28C000E 04220F06 v_rcp_f32_e64 v2, |v16| ; D3540102 00000110 v_mov_b32_e32 v3, 0x3fc00000 ; 7E0602FF 3FC00000 v_mad_f32 v16, v14, v2, v3 ; D2820010 040E050E v_mad_f32 v15, v15, v2, v3 ; D282000F 040E050F s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx4 s[48:51], s[4:5], 0xc ; C098050C s_load_dwordx4 s[52:55], s[4:5], 0x10 ; C09A0510 s_load_dwordx4 s[8:11], s[4:5], 0x14 ; C0840514 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx8 s[56:63], s[6:7], 0x8 ; C0DC0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[64:71], s[6:7], 0x18 ; C0E00718 s_load_dwordx8 s[72:79], s[6:7], 0x20 ; C0E40720 s_load_dwordx8 s[12:19], s[6:7], 0x28 ; C0C60728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[72:79], s[52:55] ; F0800700 01B20E0F v_mov_b32_e32 v17, v6 ; 7E220306 v_mov_b32_e32 v18, v7 ; 7E240307 v_mov_b32_e32 v19, v8 ; 7E260308 v_mov_b32_e32 v20, v9 ; 7E280309 v_mov_b32_e32 v20, v8 ; 7E280308 v_cubeid_f32 v24, v17, v18, v19 ; D2880018 044E2511 v_cubema_f32 v23, v17, v18, v19 ; D28E0017 044E2511 v_cubesc_f32 v22, v17, v18, v19 ; D28A0016 044E2511 v_cubetc_f32 v21, v17, v18, v19 ; D28C0015 044E2511 v_rcp_f32_e64 v2, |v23| ; D3540102 00000117 v_mad_f32 v23, v21, v2, v3 ; D2820017 040E0515 v_mad_f32 v22, v22, v2, v3 ; D2820016 040E0516 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[64:71], s[48:51] ; F0800700 01901116 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v18, v18 ; 10042512 v_mad_f32 v4, v15, v15, -v2 ; D2820004 840A1F0F v_interp_p1_f32 v21, v0, 3, 1, [m0] ; C8540700 v_interp_p2_f32 v21, [v21], v1, 3, 1, [m0] ; C8550701 v_interp_p1_f32 v20, v0, 3, 0, [m0] ; C8500300 v_interp_p2_f32 v20, [v20], v1, 3, 0, [m0] ; C8510301 image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[56:63], s[44:47] ; F0800F00 016E1614 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v25, v25 ; 100A3319 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mad_f32 v2, v5, v4, v2 ; D2820002 040A0905 v_mul_f32_e32 v4, v23, v23 ; 10082F17 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mov_b32_e32 v13, v9 ; 7E1A0309 v_cubeid_f32 v29, v10, v11, v12 ; D288001D 0432170A v_cubema_f32 v28, v10, v11, v12 ; D28E001C 0432170A v_cubesc_f32 v27, v10, v11, v12 ; D28A001B 0432170A v_cubetc_f32 v26, v10, v11, v12 ; D28C001A 0432170A v_rcp_f32_e64 v4, |v28| ; D3540104 0000011C v_mad_f32 v28, v26, v4, v3 ; D282001C 040E091A v_mad_f32 v27, v27, v4, v3 ; D282001B 040E091B image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[36:43], s[24:27] ; F0800700 00C9061B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v7, v7 ; 10060F07 image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[28:35], s[20:23] ; F0800F00 00A70914 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v12, v12 ; 1008190C s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, -s4, s4, 1.0 ; D282000D 23C80804 v_mul_f32_e64 v20, s4, s4 ; D2100014 00000804 v_mad_f32 v13, v4, v13, v20 ; D282000D 04521B04 v_mul_f32_e32 v20, v10, v10 ; 1028150A v_mul_f32_e32 v13, v13, v20 ; 101A290D v_mad_f32 v2, v13, v3, v2 ; D2820002 040A070D v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_rcp_f32_e32 v2, v2 ; 7E045502 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_interp_p1_f32 v13, v0, 1, 2, [m0] ; C8340900 v_interp_p2_f32 v13, [v13], v1, 1, 2, [m0] ; C8350901 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v13, s4, v13 ; 0A1A1A04 v_interp_p1_f32 v20, v0, 0, 2, [m0] ; C8500800 v_interp_p2_f32 v20, [v20], v1, 0, 2, [m0] ; C8510801 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v20, s4, v20 ; 0A282804 v_mul_f32_e32 v20, v20, v20 ; 10282914 v_mad_f32 v13, v13, v13, v20 ; D282000D 04521B0D v_interp_p1_f32 v20, v0, 2, 2, [m0] ; C8500A00 v_interp_p2_f32 v20, [v20], v1, 2, 2, [m0] ; C8510A01 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v21, s4, v20 ; 0A2A2804 v_mad_f32 v13, v21, v21, v13 ; D282000D 04362B15 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s4, v13 ; 101A1A04 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v26, s4, v21 ; 10342A04 v_mul_f32_e32 v26, 0x3fb8aa65, v26 ; 103434FF 3FB8AA65 v_exp_f32_e32 v26, v26 ; 7E344B1A v_sub_f32_e32 v26, 1.0, v26 ; 083434F2 v_mul_f32_e32 v13, v13, v26 ; 101A350D v_rcp_f32_e32 v21, v21 ; 7E2A5515 v_mul_f32_e32 v13, v13, v21 ; 101A2B0D v_mul_f32_e32 v13, 0x3fb8aa65, v13 ; 101A1AFF 3FB8AA65 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_sub_f32_e32 v13, 1.0, v13 ; 081A1AF2 v_interp_p1_f32 v21, v0, 1, 3, [m0] ; C8540D00 v_interp_p2_f32 v21, [v21], v1, 1, 3, [m0] ; C8550D01 v_interp_p1_f32 v26, v0, 2, 3, [m0] ; C8680E00 v_interp_p2_f32 v26, [v26], v1, 2, 3, [m0] ; C8690E01 v_rcp_f32_e32 v0, v26 ; 7E00551A v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[8:11] ; F0800100 00430000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mad_f32 v1, v0, v3, v2 ; D2820001 040A0700 v_mul_f32_e32 v2, v17, v17 ; 10042311 v_mad_f32 v3, v14, v14, -v2 ; D2820003 840A1D0E v_mad_f32 v2, v5, v3, v2 ; D2820002 040A0705 v_mul_f32_e32 v3, v22, v22 ; 10062D16 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v3, v6, v6 ; 10060D06 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, -s4, s4, 1.0 ; D282000D 23C80804 v_mul_f32_e64 v21, s4, s4 ; D2100015 00000804 v_mad_f32 v13, v4, v13, v21 ; D282000D 04561B04 v_mul_f32_e32 v21, v9, v9 ; 102A1309 v_mul_f32_e32 v13, v13, v21 ; 101A2B0D v_mad_f32 v2, v13, v3, v2 ; D2820002 040A070D v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_rcp_f32_e32 v2, v2 ; 7E045502 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v2, v0, v3, v2 ; D2820002 040A0700 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 v_mul_f32_e32 v2, v19, v19 ; 10042713 v_mad_f32 v3, v16, v16, -v2 ; D2820003 840A2110 v_mad_f32 v2, v5, v3, v2 ; D2820002 040A0705 v_mul_f32_e32 v3, v24, v24 ; 10063118 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v3, v8, v8 ; 10061108 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, -s4, s4, 1.0 ; D2820005 23C80804 v_mul_f32_e64 v6, s4, s4 ; D2100006 00000804 v_mad_f32 v4, v4, v5, v6 ; D2820004 041A0B04 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mad_f32 v2, v4, v3, v2 ; D2820002 040A0704 v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v3, v2 ; 7E065902 v_mul_f32_e32 v3, v2, v3 ; 10060702 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, 0, v3, vcc ; D2000002 01AA0680 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v2, s4, v20 ; 0A042804 v_cmp_ge_f32_e64 s[4:5], v2, 0 ; D00C0004 00010102 v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; D2000002 00118280 v_cmp_ne_i32_e64 s[4:5], v2, 0 ; D10A0004 00010102 v_mov_b32_e32 v2, 0x3ea8f5c3 ; 7E0402FF 3EA8F5C3 v_cndmask_b32_e64 v2, v2, 1.0, s[4:5] ; D2000002 1011E502 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_sub_f32_e32 v3, s5, v3 ; 08060605 s_buffer_load_dword s0, s[0:3], 0x16 ; C2000116 v_mov_b32_e32 v4, s4 ; 7E080204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v3, s0, v4 ; D2820003 04100103 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].zzzz 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].zzzz 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, IN[0].yyyy, CONST[ADDR[0].x].xyww 7: MOV TEMP[0].xyz, TEMP[2].xyzx 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: MAD TEMP[2].xyz, IN[0].xxxx, CONST[ADDR[0].x].xyww, TEMP[0] 11: MOV TEMP[0].xyz, TEMP[2].xyzx 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: MAD TEMP[0].xyz, IN[0].zzzz, CONST[ADDR[0].x].xyww, TEMP[0] 16: F2I TEMP[1].x, TEMP[1].xxxx 17: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: UARL ADDR[0].x, TEMP[1].xxxx 19: ADD TEMP[0].xyw, TEMP[0].xyzz, CONST[ADDR[0].x] 20: MOV TEMP[0].xyw, TEMP[0].xyxw 21: MOV TEMP[0].z, IMM[0].yyyy 22: MOV OUT[0], TEMP[0] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0 %22 = add i32 %5, %7 %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %21, i32 0, i32 %22) %24 = extractelement <4 x float> %23, i32 2 %25 = call float @llvm.AMDIL.fraction.(float %24) %26 = fsub float -0.000000e+00, %25 %27 = fadd float %26, %24 %28 = fmul float %27, 4.000000e+00 %29 = fptosi float %28 to i32 %30 = bitcast i32 %29 to float %31 = bitcast float %30 to i32 %32 = add i32 1, %31 %33 = bitcast i32 %32 to float %34 = bitcast float %33 to i32 %35 = shl i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = fmul float %18, %36 %38 = shl i32 %34, 4 %39 = add i32 %38, 4 %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %39) %41 = fmul float %18, %40 %42 = shl i32 %34, 4 %43 = add i32 %42, 12 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = fmul float %18, %44 %46 = fptosi float %28 to i32 %47 = bitcast i32 %46 to float %48 = bitcast float %47 to i32 %49 = shl i32 %48, 4 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %17, %50 %52 = fadd float %51, %37 %53 = shl i32 %48, 4 %54 = add i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %54) %56 = fmul float %17, %55 %57 = fadd float %56, %41 %58 = shl i32 %48, 4 %59 = add i32 %58, 12 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %45 %63 = fptosi float %28 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = add i32 2, %65 %67 = bitcast i32 %66 to float %68 = bitcast float %67 to i32 %69 = shl i32 %68, 4 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %19, %70 %72 = fadd float %71, %52 %73 = shl i32 %68, 4 %74 = add i32 %73, 4 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %19, %75 %77 = fadd float %76, %57 %78 = shl i32 %68, 4 %79 = add i32 %78, 12 %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %79) %81 = fmul float %19, %80 %82 = fadd float %81, %62 %83 = fptosi float %28 to i32 %84 = bitcast i32 %83 to float %85 = bitcast float %84 to i32 %86 = add i32 3, %85 %87 = bitcast i32 %86 to float %88 = bitcast float %87 to i32 %89 = shl i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fadd float %72, %90 %92 = shl i32 %88, 4 %93 = add i32 %92, 4 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fadd float %77, %94 %96 = shl i32 %88, 4 %97 = add i32 %96, 12 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fadd float %82, %98 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %91, float %95, float 0.000000e+00, float %99) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v3 ; 7E0A4103 v_subrev_f32_e32 v1, v5, v3 ; 0A020705 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v1, s[0:3], 0 offen ; E0301000 80000201 v_add_i32_e32 v3, 16, v1 ; 4A060290 buffer_load_dword v4, v3, s[0:3], 0 offen ; E0301000 80000403 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v6 ; 10000D04 v_mad_f32 v0, v5, v2, v0 ; D2820000 04020505 v_add_i32_e32 v2, 32, v1 ; 4A0402A0 buffer_load_dword v4, v2, s[0:3], 0 offen ; E0301000 80000402 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v7, v4, v0 ; D2820000 04020907 v_add_i32_e32 v4, 48, v1 ; 4A0802B0 buffer_load_dword v9, v4, s[0:3], 0 offen ; E0301000 80000904 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v9, v0 ; 06000109 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v6 ; 10140D0A v_mad_f32 v9, v5, v9, v10 ; D2820009 042A1305 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v7, v10, v9 ; D2820009 04261507 v_or_b32_e32 v10, 12, v4 ; 3814088C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v9, v10, v9 ; 0612130A v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v3, 4, v3 ; 38060684 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v1, v5, v1, v3 ; D2820001 040E0305 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 v_or_b32_e32 v2, 4, v4 ; 38040884 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, v2, v1 ; 06020302 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v1, v2, v9 ; F80008CF 09020100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..196] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.5000, -0.5000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} IMM[2] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FRC TEMP[0].x, IN[1].zzzz 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].zzzz 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[2].x, TEMP[1].xxxx 18: UADD TEMP[2].x, IMM[1].zzzz, TEMP[2].xxxx 19: UARL ADDR[0].x, TEMP[2].xxxx 20: UARL ADDR[0].x, TEMP[2].xxxx 21: ADD TEMP[2], TEMP[0], CONST[ADDR[0].x] 22: F2I TEMP[3].x, TEMP[1].xxxx 23: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: MUL TEMP[3].xyz, IN[0].yyyy, CONST[ADDR[0].x+96] 26: MOV TEMP[0].xyz, TEMP[3].xyzx 27: F2I TEMP[3].x, TEMP[1].xxxx 28: UARL ADDR[0].x, TEMP[3].xxxx 29: MAD TEMP[3].xyz, IN[0].xxxx, CONST[ADDR[0].x+96], TEMP[0] 30: MOV TEMP[0].xyz, TEMP[3].xyzx 31: F2I TEMP[3].x, TEMP[1].xxxx 32: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 33: UARL ADDR[0].x, TEMP[3].xxxx 34: MAD TEMP[3].xyz, IN[0].zzzz, CONST[ADDR[0].x+96], TEMP[0] 35: MOV TEMP[0].xyz, TEMP[3].xyzx 36: F2I TEMP[1].x, TEMP[1].xxxx 37: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 38: UARL ADDR[0].x, TEMP[1].xxxx 39: ADD TEMP[1].xyz, TEMP[0], CONST[ADDR[0].x+96] 40: MOV TEMP[0].xyz, TEMP[1].xyzx 41: ADD TEMP[3].xy, TEMP[0], CONST[196].zwzw 42: MOV TEMP[3].xy, TEMP[3].xyxx 43: MUL TEMP[3].xy, TEMP[3], CONST[196] 44: MOV TEMP[3].xy, TEMP[3].xyxx 45: MUL TEMP[4].yw, TEMP[1].yyyy, CONST[193].xxzy 46: MOV TEMP[0].yw, TEMP[4].wyww 47: MAD TEMP[4].xy, TEMP[1].xxxx, CONST[192], TEMP[0].ywzw 48: MOV TEMP[0].xy, TEMP[4].xyxx 49: MAD TEMP[1].xy, TEMP[1].zzzz, CONST[194], TEMP[0] 50: MOV TEMP[0].xy, TEMP[1].xyxx 51: ADD TEMP[1].xy, TEMP[0], CONST[195] 52: MOV TEMP[0].xy, TEMP[1].xyxx 53: MAD TEMP[0].xy, TEMP[0], IMM[0].yzzw, IMM[0].zzzz 54: MOV TEMP[0].xy, TEMP[0].xyxx 55: MOV TEMP[3].zw, IMM[2].yyxy 56: MOV TEMP[0].zw, IMM[2].yyxy 57: MOV OUT[1], TEMP[3] 58: MOV OUT[2], TEMP[0] 59: MOV OUT[0], TEMP[2] 60: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3072) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3076) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3088) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3092) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3104) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3108) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3120) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3124) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3136) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3140) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3144) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3148) %25 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0 %27 = add i32 %5, %7 %28 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %27) %29 = extractelement <4 x float> %28, i32 0 %30 = extractelement <4 x float> %28, i32 1 %31 = extractelement <4 x float> %28, i32 2 %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 2 %37 = call float @llvm.AMDIL.fraction.(float %36) %38 = fsub float -0.000000e+00, %37 %39 = fadd float %38, %36 %40 = fmul float %39, 4.000000e+00 %41 = fptosi float %40 to i32 %42 = bitcast i32 %41 to float %43 = bitcast float %42 to i32 %44 = add i32 1, %43 %45 = bitcast i32 %44 to float %46 = bitcast float %45 to i32 %47 = shl i32 %46, 4 %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %47) %49 = fmul float %30, %48 %50 = shl i32 %46, 4 %51 = add i32 %50, 4 %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %51) %53 = fmul float %30, %52 %54 = shl i32 %46, 4 %55 = add i32 %54, 8 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = fmul float %30, %56 %58 = shl i32 %46, 4 %59 = add i32 %58, 12 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %30, %60 %62 = fptosi float %40 to i32 %63 = bitcast i32 %62 to float %64 = bitcast float %63 to i32 %65 = shl i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = fmul float %29, %66 %68 = fadd float %67, %49 %69 = shl i32 %64, 4 %70 = add i32 %69, 4 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %29, %71 %73 = fadd float %72, %53 %74 = shl i32 %64, 4 %75 = add i32 %74, 8 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %29, %76 %78 = fadd float %77, %57 %79 = shl i32 %64, 4 %80 = add i32 %79, 12 %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %80) %82 = fmul float %29, %81 %83 = fadd float %82, %61 %84 = fptosi float %40 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = add i32 2, %86 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = shl i32 %89, 4 %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %90) %92 = fmul float %31, %91 %93 = fadd float %92, %68 %94 = shl i32 %89, 4 %95 = add i32 %94, 4 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = fmul float %31, %96 %98 = fadd float %97, %73 %99 = shl i32 %89, 4 %100 = add i32 %99, 8 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = fmul float %31, %101 %103 = fadd float %102, %78 %104 = shl i32 %89, 4 %105 = add i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) %107 = fmul float %31, %106 %108 = fadd float %107, %83 %109 = fptosi float %40 to i32 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = add i32 3, %111 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = shl i32 %114, 4 %116 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %115) %117 = fadd float %93, %116 %118 = shl i32 %114, 4 %119 = add i32 %118, 4 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fadd float %98, %120 %122 = shl i32 %114, 4 %123 = add i32 %122, 8 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = fadd float %103, %124 %126 = shl i32 %114, 4 %127 = add i32 %126, 12 %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %127) %129 = fadd float %108, %128 %130 = fptosi float %40 to i32 %131 = bitcast i32 %130 to float %132 = bitcast float %131 to i32 %133 = add i32 1, %132 %134 = bitcast i32 %133 to float %135 = bitcast float %134 to i32 %136 = shl i32 %135, 4 %137 = add i32 %136, 1536 %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %137) %139 = fmul float %30, %138 %140 = shl i32 %135, 4 %141 = add i32 %140, 1540 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = fmul float %30, %142 %144 = shl i32 %135, 4 %145 = add i32 %144, 1544 %146 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %145) %147 = fmul float %30, %146 %148 = fptosi float %40 to i32 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = shl i32 %150, 4 %152 = add i32 %151, 1536 %153 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %152) %154 = fmul float %29, %153 %155 = fadd float %154, %139 %156 = shl i32 %150, 4 %157 = add i32 %156, 1540 %158 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %157) %159 = fmul float %29, %158 %160 = fadd float %159, %143 %161 = shl i32 %150, 4 %162 = add i32 %161, 1544 %163 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %162) %164 = fmul float %29, %163 %165 = fadd float %164, %147 %166 = fptosi float %40 to i32 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = add i32 2, %168 %170 = bitcast i32 %169 to float %171 = bitcast float %170 to i32 %172 = shl i32 %171, 4 %173 = add i32 %172, 1536 %174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %173) %175 = fmul float %31, %174 %176 = fadd float %175, %155 %177 = shl i32 %171, 4 %178 = add i32 %177, 1540 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = fmul float %31, %179 %181 = fadd float %180, %160 %182 = shl i32 %171, 4 %183 = add i32 %182, 1544 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = fmul float %31, %184 %186 = fadd float %185, %165 %187 = fptosi float %40 to i32 %188 = bitcast i32 %187 to float %189 = bitcast float %188 to i32 %190 = add i32 3, %189 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = shl i32 %192, 4 %194 = add i32 %193, 1536 %195 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %194) %196 = fadd float %176, %195 %197 = shl i32 %192, 4 %198 = add i32 %197, 1540 %199 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %198) %200 = fadd float %181, %199 %201 = shl i32 %192, 4 %202 = add i32 %201, 1544 %203 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %202) %204 = fadd float %186, %203 %205 = fadd float %196, %23 %206 = fadd float %200, %24 %207 = fmul float %205, %21 %208 = fmul float %206, %22 %209 = fmul float %200, %15 %210 = fmul float %200, %16 %211 = fmul float %196, %13 %212 = fadd float %211, %209 %213 = fmul float %196, %14 %214 = fadd float %213, %210 %215 = fmul float %204, %17 %216 = fadd float %215, %212 %217 = fmul float %204, %18 %218 = fadd float %217, %214 %219 = fadd float %216, %19 %220 = fadd float %218, %20 %221 = fmul float %219, 5.000000e-01 %222 = fadd float %221, -5.000000e-01 %223 = fmul float %220, -5.000000e-01 %224 = fadd float %223, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %207, float %208, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %222, float %224, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %117, float %121, float %125, float %129) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v3 ; 7E0A4103 v_subrev_f32_e32 v1, v5, v3 ; 0A020705 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_add_i32_e32 v2, 0x604, v1 ; 4A0402FF 00000604 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 v_add_i32_e32 v3, 0x614, v1 ; 4A0602FF 00000614 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v3, v5 ; 10000B03 v_mad_f32 v0, v4, v2, v0 ; D2820000 04020504 v_add_i32_e32 v2, 0x624, v1 ; 4A0402FF 00000624 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v2, v0 ; D2820000 04020506 v_add_i32_e32 v2, 0x634, v1 ; 4A0402FF 00000634 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v2, v0 ; 06000102 s_movk_i32 s4, 0xc4c ; B0040C4C s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v0 ; 06040004 s_movk_i32 s4, 0xc44 ; B0040C44 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v2 ; 10040404 v_add_i32_e32 v3, 0x600, v1 ; 4A0602FF 00000600 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 v_add_i32_e32 v8, 0x610, v1 ; 4A1002FF 00000610 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v8, v5 ; 10100B08 v_mad_f32 v3, v4, v3, v8 ; D2820003 04220704 v_add_i32_e32 v8, 0x620, v1 ; 4A1002FF 00000620 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v6, v8, v3 ; D2820003 040E1106 v_add_i32_e32 v8, 0x630, v1 ; 4A1002FF 00000630 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v3, v8, v3 ; 06060708 s_movk_i32 s4, 0xc48 ; B0040C48 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v8, s4, v3 ; 06100604 s_movk_i32 s4, 0xc40 ; B0040C40 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v8 ; 10101004 v_mov_b32_e32 v9, 1.0 ; 7E1202F2 v_mov_b32_e32 v10, 0 ; 7E140280 exp 15, 32, 0, 0, 0, v8, v2, v10, v9 ; F800020F 090A0208 s_movk_i32 s4, 0xc10 ; B0040C10 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v0 ; 10040004 s_movk_i32 s4, 0xc00 ; B0040C00 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v3, s4, v2 ; D2820002 04080903 v_add_i32_e32 v8, 0x608, v1 ; 4A1002FF 00000608 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_add_i32_e32 v11, 0x618, v1 ; 4A1602FF 00000618 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v5 ; 10160B0B v_mad_f32 v8, v4, v8, v11 ; D2820008 042E1104 v_add_i32_e32 v11, 0x628, v1 ; 4A1602FF 00000628 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v6, v11, v8 ; D2820008 04221706 v_add_i32_e32 v11, 0x638, v1 ; 4A1602FF 00000638 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v8, v11, v8 ; 0610110B s_movk_i32 s4, 0xc20 ; B0040C20 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v8, s4, v2 ; D2820002 04080908 s_movk_i32 s4, 0xc30 ; B0040C30 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 v_mad_f32 v2, 0.5, v2, -0.5 ; D2820002 03C604F0 s_movk_i32 s4, 0xc14 ; B0040C14 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_movk_i32 s4, 0xc04 ; B0040C04 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s4, v0 ; D2820000 04000903 s_movk_i32 s4, 0xc24 ; B0040C24 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v8, s4, v0 ; D2820000 04000908 s_movk_i32 s4, 0xc34 ; B0040C34 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 v_mad_f32 v0, -0.5, v0, -0.5 ; D2820000 03C600F1 exp 15, 33, 0, 0, 0, v2, v0, v10, v9 ; F800021F 090A0002 s_waitcnt expcnt(0) ; BF8C070F buffer_load_dword v0, v1, s[0:3], 0 offen ; E0301000 80000001 v_add_i32_e32 v2, 16, v1 ; 4A040290 buffer_load_dword v3, v2, s[0:3], 0 offen ; E0301000 80000302 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mad_f32 v0, v4, v0, v3 ; D2820000 040E0104 v_add_i32_e32 v3, 32, v1 ; 4A0602A0 buffer_load_dword v8, v3, s[0:3], 0 offen ; E0301000 80000803 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v8, v0 ; D2820000 04021106 v_add_i32_e32 v8, 48, v1 ; 4A1002B0 buffer_load_dword v9, v8, s[0:3], 0 offen ; E0301000 80000908 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v9, v0 ; 06000109 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v5 ; 10140B0A v_mad_f32 v9, v4, v9, v10 ; D2820009 042A1304 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v6, v10, v9 ; D2820009 04261506 v_or_b32_e32 v10, 12, v8 ; 3814108C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v9, v10, v9 ; 0612130A v_or_b32_e32 v10, 8, v1 ; 38140288 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v11, 8, v2 ; 38160488 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v5 ; 10160B0B v_mad_f32 v10, v4, v10, v11 ; D282000A 042E1504 v_or_b32_e32 v11, 8, v3 ; 38160688 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v6, v11, v10 ; D282000A 042A1706 v_or_b32_e32 v11, 8, v8 ; 38161088 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v10, v11, v10 ; 0614150B v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mad_f32 v1, v4, v1, v2 ; D2820001 040A0304 v_or_b32_e32 v2, 4, v3 ; 38040684 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v6, v2, v1 ; D2820001 04060506 v_or_b32_e32 v2, 4, v8 ; 38041084 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, v2, v1 ; 06020302 exp 15, 12, 0, 1, 0, v0, v1, v10, v9 ; F80008CF 090A0100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0100} 0: MAD TEMP[0].xy, IN[1], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].y, TEMP[0], SAMP[1], 2D 3: ADD TEMP[1].x, TEMP[0].yyyy, IMM[0].wwww 4: FSGE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz 5: UIF TEMP[2].xxxx :0 6: MOV TEMP[2].x, IMM[0].zzzz 7: ELSE :0 8: MOV TEMP[2].x, IMM[0].yyyy 9: ENDIF 10: MOV TEMP[2].x, TEMP[2].xxxx 11: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz 12: UIF TEMP[3].xxxx :0 13: MOV TEMP[3].x, IMM[0].zzzz 14: ELSE :0 15: MOV TEMP[3].x, IMM[0].yyyy 16: ENDIF 17: MOV TEMP[2].y, TEMP[3].xxxx 18: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz 19: UIF TEMP[3].xxxx :0 20: MOV TEMP[3].x, IMM[0].zzzz 21: ELSE :0 22: MOV TEMP[3].x, IMM[0].yyyy 23: ENDIF 24: MOV TEMP[2].z, TEMP[3].xxxx 25: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 26: UIF TEMP[1].xxxx :0 27: ELSE :0 28: ENDIF 29: FSLT TEMP[1].xyz, TEMP[2].xyzz, IMM[0].zzzz 30: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 31: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 32: UIF TEMP[2].xxxx :0 33: KILL 34: ENDIF 35: MOV TEMP[1].xy, IN[0].xyyy 36: TEX TEMP[1], TEMP[1], SAMP[0], 2D 37: MUL TEMP[0].w, TEMP[0].yyyy, TEMP[1].wwww 38: MOV TEMP[0].w, TEMP[0].wwww 39: MOV TEMP[0].xyz, TEMP[1].xyzx 40: MOV OUT[0], TEMP[0] 41: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %34 = fmul float %32, 1.000000e+00 %35 = fadd float %34, 0.000000e+00 %36 = fmul float %33, -1.000000e+00 %37 = fadd float %36, 1.000000e+00 %38 = bitcast float %35 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <8 x i32> %27 to <32 x i8> %43 = bitcast <4 x i32> %29 to <16 x i8> %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %42, <16 x i8> %43, i32 2) %45 = extractelement <4 x float> %44, i32 1 %46 = fadd float %45, 0xBF847AE140000000 %47 = fcmp oge float %46, 0.000000e+00 %48 = sext i1 %47 to i32 %49 = bitcast i32 %48 to float %50 = bitcast float %49 to i32 %51 = icmp ne i32 %50, 0 %. = select i1 %51, float 0.000000e+00, float -1.000000e+00 %52 = fcmp oge float %46, 0.000000e+00 %53 = sext i1 %52 to i32 %54 = bitcast i32 %53 to float %55 = bitcast float %54 to i32 %56 = icmp ne i32 %55, 0 %temp12.0 = select i1 %56, float 0.000000e+00, float -1.000000e+00 %57 = fcmp oge float %46, 0.000000e+00 %58 = sext i1 %57 to i32 %59 = bitcast i32 %58 to float %60 = bitcast float %59 to i32 %61 = icmp ne i32 %60, 0 %.28 = select i1 %61, float 0.000000e+00, float -1.000000e+00 %62 = fcmp oge float %46, 0.000000e+00 %63 = sext i1 %62 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = icmp ne i32 %65, 0 %67 = fcmp olt float %., 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %temp12.0, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = fcmp olt float %.28, 0.000000e+00 %72 = sext i1 %71 to i32 %73 = bitcast i32 %68 to float %74 = bitcast i32 %70 to float %75 = bitcast i32 %72 to float %76 = bitcast float %73 to i32 %77 = bitcast float %75 to i32 %78 = or i32 %76, %77 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = bitcast float %74 to i32 %82 = or i32 %80, %81 %83 = bitcast i32 %82 to float %84 = bitcast float %83 to i32 %85 = icmp ne i32 %84, 0 br i1 %85, label %IF26, label %ENDIF25 IF26: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF25 ENDIF25: ; preds = %main_body, %IF26 %86 = bitcast float %30 to i32 %87 = bitcast float %31 to i32 %88 = insertelement <2 x i32> undef, i32 %86, i32 0 %89 = insertelement <2 x i32> %88, i32 %87, i32 1 %90 = bitcast <8 x i32> %23 to <32 x i8> %91 = bitcast <4 x i32> %25 to <16 x i8> %92 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %89, <32 x i8> %90, <16 x i8> %91, i32 2) %93 = extractelement <4 x float> %92, i32 0 %94 = extractelement <4 x float> %92, i32 1 %95 = extractelement <4 x float> %92, i32 2 %96 = extractelement <4 x float> %92, i32 3 %97 = fmul float %45, %96 %98 = call i32 @llvm.SI.packf16(float %93, float %94) %99 = bitcast i32 %98 to float %100 = call i32 @llvm.SI.packf16(float %95, float %97) %101 = bitcast i32 %100 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %99, float %101, float %99, float %101) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 1, [m0] ; C8080500 v_interp_p2_f32 v2, [v2], v1, 1, 1, [m0] ; C8090501 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_add_f32_e32 v2, 0, v4 ; 06040880 s_load_dwordx4 s[0:3], s[4:5], 0x4 ; C0800504 s_load_dwordx8 s[8:15], s[6:7], 0x8 ; C0C40708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 2, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[8:15], s[0:3] ; F0800200 00020202 v_mov_b32_e32 v3, 0xbc23d70a ; 7E0602FF BC23D70A s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v3, v2, v3 ; 06060702 v_cmp_ge_f32_e64 s[0:1], v3, 0 ; D00C0000 00010103 v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; D2000803 00018280 v_cmp_ne_i32_e64 s[0:1], v3, 0 ; D10A0000 00010103 v_cndmask_b32_e64 v3, -1.0, 0, s[0:1] ; D2000803 180100F3 v_cmp_lt_f32_e64 s[0:1], v3, 0 ; D0020000 00010103 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 s_and_saveexec_b64 s[0:1], s[0:1] ; BE802400 s_xor_b64 s[0:1], exec, s[0:1] ; 8980007E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[0:1] ; 88FE007E s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[4:11], s[0:3] ; F0800F00 00010303 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v3, v4 ; 5E000903 v_mul_f32_e32 v1, v6, v2 ; 10020506 v_cvt_pkrtz_f16_f32_e32 v1, v5, v1 ; 5E020305 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL IN[6] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..7] DCL TEMP[0..10], LOCAL IMM[0] FLT32 { 0.1592, 0.5000, 6.2832, -3.1416} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, IN[4].zzzz, IN[4].yyyy 1: MAD TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx, IMM[0].yyyy 2: FRC TEMP[1].x, TEMP[1].xxxx 3: MAD TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz, IMM[0].wwww 4: COS TEMP[2].x, TEMP[1].xxxx 5: SIN TEMP[1].x, TEMP[1].xxxx 6: MUL TEMP[0], IN[6].xyzy, IN[6].xxxy 7: LRP TEMP[3].xy, TEMP[0].xwzw, IMM[1].xxxx, TEMP[2].xxxx 8: MOV TEMP[4].x, TEMP[3].xyxx 9: ADD TEMP[5].x, -TEMP[2].xxxx, IMM[1].xxxx 10: MUL TEMP[6].yzw, TEMP[1].xxxx, IN[6].xyzx 11: MAD TEMP[7].xy, TEMP[0].zyzw, TEMP[5].xxxx, -TEMP[6].yzzw 12: MOV TEMP[8].x, TEMP[7].xyxx 13: MAD TEMP[9].xy, TEMP[0].yzzw, TEMP[5].xxxx, TEMP[6].zyzw 14: MOV TEMP[10].x, TEMP[9].xyxx 15: MOV TEMP[4].z, TEMP[7].yyyy 16: MOV TEMP[4].w, TEMP[9].yyyy 17: MOV TEMP[10].w, TEMP[3].yyyy 18: DP3 TEMP[3].x, IN[5].xyzz, TEMP[4].xzww 19: MUL TEMP[3].x, TEMP[3].xxxx, IN[4].xxxx 20: MOV TEMP[4].x, TEMP[3].xxxx 21: MUL TEMP[3].yz, IN[6].zzzz, IN[6] 22: LRP TEMP[2].w, TEMP[3].zzzz, IMM[1].xxxx, TEMP[2].xxxx 23: MOV TEMP[8].w, TEMP[2].wwww 24: MAD TEMP[2].z, TEMP[3].yyyy, TEMP[5].xxxx, TEMP[6].wwww 25: MOV TEMP[8].z, TEMP[2].zzzz 26: MAD TEMP[2].z, TEMP[3].yyyy, TEMP[5].xxxx, -TEMP[6].wwww 27: MOV TEMP[10].z, TEMP[2].zzzz 28: DP3 TEMP[2].x, IN[5].xzyy, TEMP[10].xzww 29: MUL TEMP[2].y, TEMP[2].xxxx, IN[4].xxxx 30: MOV TEMP[4].y, TEMP[2].yyyy 31: DP3 TEMP[2].x, IN[5].xyzz, TEMP[8].xzww 32: MUL TEMP[2].z, TEMP[2].xxxx, IN[4].xxxx 33: MOV TEMP[4].z, TEMP[2].zzzz 34: ADD TEMP[2].xyz, TEMP[4], IN[0] 35: MUL TEMP[4], CONST[1].yyyy, CONST[5] 36: MAD TEMP[4], CONST[1].xxxx, CONST[4], TEMP[4] 37: MAD TEMP[4], CONST[1].zzzz, CONST[6], TEMP[4] 38: MAD TEMP[1], CONST[1].wwww, CONST[7], TEMP[4] 39: MUL TEMP[1], TEMP[2].yyyy, TEMP[1] 40: MUL TEMP[8], CONST[0].yyyy, CONST[5] 41: MAD TEMP[8], CONST[0].xxxx, CONST[4], TEMP[8] 42: MAD TEMP[8], CONST[0].zzzz, CONST[6], TEMP[8] 43: MAD TEMP[4], CONST[0].wwww, CONST[7], TEMP[8] 44: MAD TEMP[1], TEMP[2].xxxx, TEMP[4], TEMP[1] 45: MUL TEMP[8], CONST[2].yyyy, CONST[5] 46: MAD TEMP[8], CONST[2].xxxx, CONST[4], TEMP[8] 47: MAD TEMP[8], CONST[2].zzzz, CONST[6], TEMP[8] 48: MAD TEMP[4], CONST[2].wwww, CONST[7], TEMP[8] 49: MAD TEMP[0], TEMP[2].zzzz, TEMP[4], TEMP[1] 50: MUL TEMP[4], CONST[3].yyyy, CONST[5] 51: MAD TEMP[4], CONST[3].xxxx, CONST[4], TEMP[4] 52: MAD TEMP[4], CONST[3].zzzz, CONST[6], TEMP[4] 53: MAD TEMP[1], CONST[3].wwww, CONST[7], TEMP[4] 54: ADD TEMP[0], TEMP[0], TEMP[1] 55: MUL TEMP[1], IN[1], IN[2] 56: MOV TEMP[2].xy, IN[3].xyxx 57: MOV TEMP[2].zw, IMM[1].xxyx 58: MOV OUT[1], TEMP[1] 59: MOV OUT[2], TEMP[2] 60: MOV OUT[0], TEMP[0] 61: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = extractelement <4 x float> %63, i32 2 %67 = extractelement <4 x float> %63, i32 3 %68 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %69 = load <16 x i8> addrspace(2)* %68, !tbaa !0 %70 = add i32 %5, %7 %71 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %69, i32 0, i32 %70) %72 = extractelement <4 x float> %71, i32 0 %73 = extractelement <4 x float> %71, i32 1 %74 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0 %76 = add i32 %5, %7 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = extractelement <4 x float> %77, i32 2 %81 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 5 %82 = load <16 x i8> addrspace(2)* %81, !tbaa !0 %83 = add i32 %5, %7 %84 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %82, i32 0, i32 %83) %85 = extractelement <4 x float> %84, i32 0 %86 = extractelement <4 x float> %84, i32 1 %87 = extractelement <4 x float> %84, i32 2 %88 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 6 %89 = load <16 x i8> addrspace(2)* %88, !tbaa !0 %90 = add i32 %5, %7 %91 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %89, i32 0, i32 %90) %92 = extractelement <4 x float> %91, i32 0 %93 = extractelement <4 x float> %91, i32 1 %94 = extractelement <4 x float> %91, i32 2 %95 = fadd float %80, %79 %96 = fmul float %95, 0x3FC45F30E0000000 %97 = fadd float %96, 5.000000e-01 %98 = call float @llvm.AMDIL.fraction.(float %97) %99 = fmul float %98, 0x401921FC80000000 %100 = fadd float %99, 0xC00921FA00000000 %101 = call float @llvm.cos.f32(float %100) %102 = call float @llvm.sin.f32(float %100) %103 = fmul float %92, %92 %104 = fmul float %93, %92 %105 = fmul float %94, %92 %106 = fmul float %93, %93 %107 = call float @llvm.AMDGPU.lrp(float %103, float 1.000000e+00, float %101) %108 = call float @llvm.AMDGPU.lrp(float %106, float 1.000000e+00, float %101) %109 = fsub float -0.000000e+00, %101 %110 = fadd float %109, 1.000000e+00 %111 = fmul float %102, %93 %112 = fmul float %102, %94 %113 = fmul float %102, %92 %114 = fsub float -0.000000e+00, %111 %115 = fmul float %105, %110 %116 = fadd float %115, %114 %117 = fsub float -0.000000e+00, %112 %118 = fmul float %104, %110 %119 = fadd float %118, %117 %120 = fmul float %104, %110 %121 = fadd float %120, %112 %122 = fmul float %105, %110 %123 = fadd float %122, %111 %124 = fmul float %85, %107 %125 = fmul float %86, %119 %126 = fadd float %125, %124 %127 = fmul float %87, %123 %128 = fadd float %126, %127 %129 = fmul float %128, %78 %130 = fmul float %94, %93 %131 = fmul float %94, %94 %132 = call float @llvm.AMDGPU.lrp(float %131, float 1.000000e+00, float %101) %133 = fmul float %130, %110 %134 = fadd float %133, %113 %135 = fsub float -0.000000e+00, %113 %136 = fmul float %130, %110 %137 = fadd float %136, %135 %138 = fmul float %85, %121 %139 = fmul float %87, %137 %140 = fadd float %139, %138 %141 = fmul float %86, %108 %142 = fadd float %140, %141 %143 = fmul float %142, %78 %144 = fmul float %85, %116 %145 = fmul float %86, %134 %146 = fadd float %145, %144 %147 = fmul float %87, %132 %148 = fadd float %146, %147 %149 = fmul float %148, %78 %150 = fadd float %129, %49 %151 = fadd float %143, %50 %152 = fadd float %149, %51 %153 = fmul float %18, %33 %154 = fmul float %18, %34 %155 = fmul float %18, %35 %156 = fmul float %18, %36 %157 = fmul float %17, %29 %158 = fadd float %157, %153 %159 = fmul float %17, %30 %160 = fadd float %159, %154 %161 = fmul float %17, %31 %162 = fadd float %161, %155 %163 = fmul float %17, %32 %164 = fadd float %163, %156 %165 = fmul float %19, %37 %166 = fadd float %165, %158 %167 = fmul float %19, %38 %168 = fadd float %167, %160 %169 = fmul float %19, %39 %170 = fadd float %169, %162 %171 = fmul float %19, %40 %172 = fadd float %171, %164 %173 = fmul float %20, %41 %174 = fadd float %173, %166 %175 = fmul float %20, %42 %176 = fadd float %175, %168 %177 = fmul float %20, %43 %178 = fadd float %177, %170 %179 = fmul float %20, %44 %180 = fadd float %179, %172 %181 = fmul float %151, %174 %182 = fmul float %151, %176 %183 = fmul float %151, %178 %184 = fmul float %151, %180 %185 = fmul float %14, %33 %186 = fmul float %14, %34 %187 = fmul float %14, %35 %188 = fmul float %14, %36 %189 = fmul float %13, %29 %190 = fadd float %189, %185 %191 = fmul float %13, %30 %192 = fadd float %191, %186 %193 = fmul float %13, %31 %194 = fadd float %193, %187 %195 = fmul float %13, %32 %196 = fadd float %195, %188 %197 = fmul float %15, %37 %198 = fadd float %197, %190 %199 = fmul float %15, %38 %200 = fadd float %199, %192 %201 = fmul float %15, %39 %202 = fadd float %201, %194 %203 = fmul float %15, %40 %204 = fadd float %203, %196 %205 = fmul float %16, %41 %206 = fadd float %205, %198 %207 = fmul float %16, %42 %208 = fadd float %207, %200 %209 = fmul float %16, %43 %210 = fadd float %209, %202 %211 = fmul float %16, %44 %212 = fadd float %211, %204 %213 = fmul float %150, %206 %214 = fadd float %213, %181 %215 = fmul float %150, %208 %216 = fadd float %215, %182 %217 = fmul float %150, %210 %218 = fadd float %217, %183 %219 = fmul float %150, %212 %220 = fadd float %219, %184 %221 = fmul float %22, %33 %222 = fmul float %22, %34 %223 = fmul float %22, %35 %224 = fmul float %22, %36 %225 = fmul float %21, %29 %226 = fadd float %225, %221 %227 = fmul float %21, %30 %228 = fadd float %227, %222 %229 = fmul float %21, %31 %230 = fadd float %229, %223 %231 = fmul float %21, %32 %232 = fadd float %231, %224 %233 = fmul float %23, %37 %234 = fadd float %233, %226 %235 = fmul float %23, %38 %236 = fadd float %235, %228 %237 = fmul float %23, %39 %238 = fadd float %237, %230 %239 = fmul float %23, %40 %240 = fadd float %239, %232 %241 = fmul float %24, %41 %242 = fadd float %241, %234 %243 = fmul float %24, %42 %244 = fadd float %243, %236 %245 = fmul float %24, %43 %246 = fadd float %245, %238 %247 = fmul float %24, %44 %248 = fadd float %247, %240 %249 = fmul float %152, %242 %250 = fadd float %249, %214 %251 = fmul float %152, %244 %252 = fadd float %251, %216 %253 = fmul float %152, %246 %254 = fadd float %253, %218 %255 = fmul float %152, %248 %256 = fadd float %255, %220 %257 = fmul float %26, %33 %258 = fmul float %26, %34 %259 = fmul float %26, %35 %260 = fmul float %26, %36 %261 = fmul float %25, %29 %262 = fadd float %261, %257 %263 = fmul float %25, %30 %264 = fadd float %263, %258 %265 = fmul float %25, %31 %266 = fadd float %265, %259 %267 = fmul float %25, %32 %268 = fadd float %267, %260 %269 = fmul float %27, %37 %270 = fadd float %269, %262 %271 = fmul float %27, %38 %272 = fadd float %271, %264 %273 = fmul float %27, %39 %274 = fadd float %273, %266 %275 = fmul float %27, %40 %276 = fadd float %275, %268 %277 = fmul float %28, %41 %278 = fadd float %277, %270 %279 = fmul float %28, %42 %280 = fadd float %279, %272 %281 = fmul float %28, %43 %282 = fadd float %281, %274 %283 = fmul float %28, %44 %284 = fadd float %283, %276 %285 = fadd float %250, %278 %286 = fadd float %252, %280 %287 = fadd float %254, %282 %288 = fadd float %256, %284 %289 = fmul float %56, %64 %290 = fmul float %57, %65 %291 = fmul float %58, %66 %292 = fmul float %59, %67 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %289, float %290, float %291, float %292) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %72, float %73, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %285, float %286, float %287, float %288) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.cos.f32(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.sin.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[28:31], s[8:9], 0x10 ; C08E0910 s_load_dwordx4 s[12:15], s[8:9], 0x14 ; C0860914 s_load_dwordx4 s[8:11], s[8:9], 0x18 ; C0840918 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[20:23], 0 idxen ; E00C2000 80050100 buffer_load_format_xyzw v[5:8], v0, s[16:19], 0 idxen ; E00C2000 80040500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v4, v8 ; 10121104 v_mul_f32_e32 v10, v3, v7 ; 10140F03 v_mul_f32_e32 v11, v2, v6 ; 10160D02 v_mul_f32_e32 v1, v1, v5 ; 10020B01 exp 15, 32, 0, 0, 0, v1, v11, v10, v9 ; F800020F 090A0B01 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[24:27], 0 idxen ; E00C2000 80060100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v1, v3 ; 100A0701 buffer_load_format_xyzw v[6:9], v0, s[28:31], 0 idxen ; E00C2000 80070600 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v10, v7, v8 ; 06141107 v_mov_b32_e32 v11, 0x3e22f987 ; 7E1602FF 3E22F987 v_mad_f32 v10, v11, v10, 0.5 ; D282000A 03C2150B v_fract_f32_e32 v10, v10 ; 7E14410A v_mov_b32_e32 v11, 0xc0490fd0 ; 7E1602FF C0490FD0 v_mov_b32_e32 v12, 0x40c90fe4 ; 7E1802FF 40C90FE4 v_mad_f32 v10, v10, v12, v11 ; D282000A 042E190A v_mul_f32_e32 v10, 0x3e22f983, v10 ; 101414FF 3E22F983 v_fract_f32_e32 v10, v10 ; 7E14410A v_sin_f32_e32 v11, v10 ; 7E166B0A v_mul_f32_e32 v12, v2, v11 ; 10181702 v_cos_f32_e32 v10, v10 ; 7E146D0A v_sub_f32_e32 v13, 1.0, v10 ; 081A14F2 v_mad_f32 v14, v5, v13, v12 ; D282000E 04321B05 v_mul_f32_e32 v15, v1, v2 ; 101E0501 v_mul_f32_e32 v16, v3, v11 ; 10201703 v_mad_f32 v17, v15, v13, -v16 ; D2820011 84421B0F v_mad_f32 v18, -v1, v1, 1.0 ; D2820012 23CA0301 v_mul_f32_e32 v18, v10, v18 ; 1024250A v_mad_f32 v18, v1, v1, v18 ; D2820012 044A0301 buffer_load_format_xyzw v[19:22], v0, s[12:15], 0 idxen ; E00C2000 80031300 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v18, v18, v19 ; 10242712 v_mad_f32 v17, v20, v17, v18 ; D2820011 044A2314 v_mad_f32 v14, v21, v14, v17 ; D282000E 04461D15 buffer_load_format_xyzw v[23:26], v0, s[4:7], 0 idxen ; E00C2000 80011700 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v14, v6, v23 ; D2820000 045E0D0E v_mul_f32_e32 v14, v2, v3 ; 101C0702 v_mul_f32_e32 v11, v1, v11 ; 10161701 v_mad_f32 v17, v14, v13, -v11 ; D2820011 842E1B0E v_mad_f32 v15, v15, v13, v16 ; D282000F 04421B0F v_mul_f32_e32 v15, v15, v19 ; 101E270F v_mad_f32 v15, v21, v17, v15 ; D282000F 043E2315 v_mad_f32 v16, -v2, v2, 1.0 ; D2820010 23CA0502 v_mul_f32_e32 v16, v10, v16 ; 1020210A v_mad_f32 v16, v2, v2, v16 ; D2820010 04420502 v_mad_f32 v15, v20, v16, v15 ; D282000F 043E2114 v_mad_f32 v15, v15, v6, v24 ; D282000F 04620D0F s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v16, s4 ; 7E200204 v_mul_f32_e32 v16, s5, v16 ; 10202005 s_buffer_load_dword s6, s[0:3], 0x13 ; C2030113 s_buffer_load_dword s7, s[0:3], 0x4 ; C2038104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s6 ; 7E220206 v_mad_f32 v16, v17, s7, v16 ; D2820010 04400F11 s_buffer_load_dword s8, s[0:3], 0x1b ; C204011B s_buffer_load_dword s9, s[0:3], 0x6 ; C2048106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s8 ; 7E220208 v_mad_f32 v16, v17, s9, v16 ; D2820010 04401311 s_buffer_load_dword s10, s[0:3], 0x1f ; C205011F s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v17, s10 ; 7E22020A v_mad_f32 v16, v17, s11, v16 ; D2820010 04401711 v_mul_f32_e32 v16, v16, v15 ; 10201F10 s_buffer_load_dword s12, s[0:3], 0x1 ; C2060101 v_mov_b32_e32 v17, s4 ; 7E220204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v17, s12, v17 ; 1022220C s_buffer_load_dword s13, s[0:3], 0x0 ; C2068100 v_mov_b32_e32 v18, s6 ; 7E240206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v18, s13, v17 ; D2820011 04441B12 s_buffer_load_dword s14, s[0:3], 0x2 ; C2070102 v_mov_b32_e32 v18, s8 ; 7E240208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v18, s14, v17 ; D2820011 04441D12 s_buffer_load_dword s15, s[0:3], 0x3 ; C2078103 v_mov_b32_e32 v18, s10 ; 7E24020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v18, s15, v17 ; D2820011 04441F12 v_mad_f32 v16, v0, v17, v16 ; D2820010 04422300 v_mad_f32 v11, v14, v13, v11 ; D282000B 042E1B0E v_mad_f32 v5, v5, v13, -v12 ; D2820005 84321B05 v_mul_f32_e32 v5, v5, v19 ; 100A2705 v_mad_f32 v5, v20, v11, v5 ; D2820005 04161714 v_mad_f32 v11, -v3, v3, 1.0 ; D282000B 23CA0703 v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mad_f32 v1, v3, v3, v10 ; D2820001 042A0703 v_mad_f32 v1, v21, v1, v5 ; D2820001 04160315 v_mad_f32 v1, v1, v6, v25 ; D2820001 04660D01 s_buffer_load_dword s16, s[0:3], 0x9 ; C2080109 v_mov_b32_e32 v2, s4 ; 7E040204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s16, v2 ; 10040410 s_buffer_load_dword s17, s[0:3], 0x8 ; C2088108 v_mov_b32_e32 v3, s6 ; 7E060206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v3, s17, v2 ; D2820002 04082303 s_buffer_load_dword s18, s[0:3], 0xa ; C209010A v_mov_b32_e32 v3, s8 ; 7E060208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v3, s18, v2 ; D2820002 04082503 s_buffer_load_dword s19, s[0:3], 0xb ; C209810B v_mov_b32_e32 v3, s10 ; 7E06020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v3, s19, v2 ; D2820002 04082703 v_mad_f32 v2, v1, v2, v16 ; D2820002 04420501 s_buffer_load_dword s20, s[0:3], 0xd ; C20A010D v_mov_b32_e32 v3, s4 ; 7E060204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s20, v3 ; 10060614 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C v_mov_b32_e32 v4, s6 ; 7E080206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s6, s[0:3], 0xe ; C203010E v_mov_b32_e32 v4, s8 ; 7E080208 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s6, v3 ; D2820003 040C0D04 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F v_mov_b32_e32 v4, s10 ; 7E08020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s8, v3 ; D2820003 040C1104 v_add_f32_e32 v2, v3, v2 ; 06040503 s_buffer_load_dword s10, s[0:3], 0x16 ; C2050116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s10 ; 7E06020A v_mul_f32_e32 v3, s5, v3 ; 10060605 s_buffer_load_dword s21, s[0:3], 0x12 ; C20A8112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s21 ; 7E080215 v_mad_f32 v3, v4, s7, v3 ; D2820003 040C0F04 s_buffer_load_dword s22, s[0:3], 0x1a ; C20B011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s22 ; 7E080216 v_mad_f32 v3, v4, s9, v3 ; D2820003 040C1304 s_buffer_load_dword s23, s[0:3], 0x1e ; C20B811E s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s23 ; 7E080217 v_mad_f32 v3, v4, s11, v3 ; D2820003 040C1704 v_mul_f32_e32 v3, v3, v15 ; 10061F03 v_mov_b32_e32 v4, s10 ; 7E08020A v_mul_f32_e32 v4, s12, v4 ; 1008080C v_mov_b32_e32 v5, s21 ; 7E0A0215 v_mad_f32 v4, v5, s13, v4 ; D2820004 04101B05 v_mov_b32_e32 v5, s22 ; 7E0A0216 v_mad_f32 v4, v5, s14, v4 ; D2820004 04101D05 v_mov_b32_e32 v5, s23 ; 7E0A0217 v_mad_f32 v4, v5, s15, v4 ; D2820004 04101F05 v_mad_f32 v3, v0, v4, v3 ; D2820003 040E0900 v_mov_b32_e32 v4, s10 ; 7E08020A v_mul_f32_e32 v4, s16, v4 ; 10080810 v_mov_b32_e32 v5, s21 ; 7E0A0215 v_mad_f32 v4, v5, s17, v4 ; D2820004 04102305 v_mov_b32_e32 v5, s22 ; 7E0A0216 v_mad_f32 v4, v5, s18, v4 ; D2820004 04102505 v_mov_b32_e32 v5, s23 ; 7E0A0217 v_mad_f32 v4, v5, s19, v4 ; D2820004 04102705 v_mad_f32 v3, v1, v4, v3 ; D2820003 040E0901 v_mov_b32_e32 v4, s10 ; 7E08020A v_mul_f32_e32 v4, s20, v4 ; 10080814 v_mov_b32_e32 v5, s21 ; 7E0A0215 v_mad_f32 v4, v5, s4, v4 ; D2820004 04100905 v_mov_b32_e32 v5, s22 ; 7E0A0216 v_mad_f32 v4, v5, s6, v4 ; D2820004 04100D05 v_mov_b32_e32 v5, s23 ; 7E0A0217 v_mad_f32 v4, v5, s8, v4 ; D2820004 04101105 v_add_f32_e32 v3, v4, v3 ; 06060704 s_buffer_load_dword s10, s[0:3], 0x15 ; C2050115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s10 ; 7E08020A v_mul_f32_e32 v4, s5, v4 ; 10080805 s_buffer_load_dword s21, s[0:3], 0x11 ; C20A8111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s21 ; 7E0A0215 v_mad_f32 v4, v5, s7, v4 ; D2820004 04100F05 s_buffer_load_dword s22, s[0:3], 0x19 ; C20B0119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s22 ; 7E0A0216 v_mad_f32 v4, v5, s9, v4 ; D2820004 04101305 s_buffer_load_dword s23, s[0:3], 0x1d ; C20B811D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s23 ; 7E0A0217 v_mad_f32 v4, v5, s11, v4 ; D2820004 04101705 v_mul_f32_e32 v4, v4, v15 ; 10081F04 v_mov_b32_e32 v5, s10 ; 7E0A020A v_mul_f32_e32 v5, s12, v5 ; 100A0A0C v_mov_b32_e32 v6, s21 ; 7E0C0215 v_mad_f32 v5, v6, s13, v5 ; D2820005 04141B06 v_mov_b32_e32 v6, s22 ; 7E0C0216 v_mad_f32 v5, v6, s14, v5 ; D2820005 04141D06 v_mov_b32_e32 v6, s23 ; 7E0C0217 v_mad_f32 v5, v6, s15, v5 ; D2820005 04141F06 v_mad_f32 v4, v0, v5, v4 ; D2820004 04120B00 v_mov_b32_e32 v5, s10 ; 7E0A020A v_mul_f32_e32 v5, s16, v5 ; 100A0A10 v_mov_b32_e32 v6, s21 ; 7E0C0215 v_mad_f32 v5, v6, s17, v5 ; D2820005 04142306 v_mov_b32_e32 v6, s22 ; 7E0C0216 v_mad_f32 v5, v6, s18, v5 ; D2820005 04142506 v_mov_b32_e32 v6, s23 ; 7E0C0217 v_mad_f32 v5, v6, s19, v5 ; D2820005 04142706 v_mad_f32 v4, v1, v5, v4 ; D2820004 04120B01 v_mov_b32_e32 v5, s10 ; 7E0A020A v_mul_f32_e32 v5, s20, v5 ; 100A0A14 v_mov_b32_e32 v6, s21 ; 7E0C0215 v_mad_f32 v5, v6, s4, v5 ; D2820005 04140906 v_mov_b32_e32 v6, s22 ; 7E0C0216 v_mad_f32 v5, v6, s6, v5 ; D2820005 04140D06 v_mov_b32_e32 v6, s23 ; 7E0C0217 v_mad_f32 v5, v6, s8, v5 ; D2820005 04141106 v_add_f32_e32 v4, v5, v4 ; 06080905 s_buffer_load_dword s10, s[0:3], 0x14 ; C2050114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s10 ; 7E0A020A v_mul_f32_e32 v5, s5, v5 ; 100A0A05 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s5 ; 7E0C0205 v_mad_f32 v5, v6, s7, v5 ; D2820005 04140F06 s_buffer_load_dword s7, s[0:3], 0x18 ; C2038118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s7 ; 7E0C0207 v_mad_f32 v5, v6, s9, v5 ; D2820005 04141306 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v5, v6, s11, v5 ; D2820005 04141706 v_mul_f32_e32 v5, v5, v15 ; 100A1F05 v_mov_b32_e32 v6, s10 ; 7E0C020A v_mul_f32_e32 v6, s12, v6 ; 100C0C0C v_mov_b32_e32 v7, s5 ; 7E0E0205 v_mad_f32 v6, v7, s13, v6 ; D2820006 04181B07 v_mov_b32_e32 v7, s7 ; 7E0E0207 v_mad_f32 v6, v7, s14, v6 ; D2820006 04181D07 v_mov_b32_e32 v7, s0 ; 7E0E0200 v_mad_f32 v6, v7, s15, v6 ; D2820006 04181F07 v_mad_f32 v0, v0, v6, v5 ; D2820000 04160D00 v_mov_b32_e32 v5, s10 ; 7E0A020A v_mul_f32_e32 v5, s16, v5 ; 100A0A10 v_mov_b32_e32 v6, s5 ; 7E0C0205 v_mad_f32 v5, v6, s17, v5 ; D2820005 04142306 v_mov_b32_e32 v6, s7 ; 7E0C0207 v_mad_f32 v5, v6, s18, v5 ; D2820005 04142506 v_mov_b32_e32 v6, s0 ; 7E0C0200 v_mad_f32 v5, v6, s19, v5 ; D2820005 04142706 v_mad_f32 v0, v1, v5, v0 ; D2820000 04020B01 v_mov_b32_e32 v1, s10 ; 7E02020A v_mul_f32_e32 v1, s20, v1 ; 10020214 v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mad_f32 v1, v5, s4, v1 ; D2820001 04040905 v_mov_b32_e32 v5, s7 ; 7E0A0207 v_mad_f32 v1, v5, s6, v1 ; D2820001 04040D05 v_mov_b32_e32 v5, s0 ; 7E0A0200 v_mad_f32 v1, v5, s8, v1 ; D2820001 04041105 v_add_f32_e32 v0, v1, v0 ; 06000101 exp 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MUL TEMP[0], TEMP[0], IN[0] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = fmul float %39, %26 %44 = fmul float %40, %27 %45 = fmul float %41, %28 %46 = fmul float %42, %29 %47 = call i32 @llvm.SI.packf16(float %43, float %44) %48 = bitcast i32 %47 to float %49 = call i32 @llvm.SI.packf16(float %45, float %46) %50 = bitcast i32 %49 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %48, float %50, float %48, float %50) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 v_interp_p1_f32 v6, v0, 3, 0, [m0] ; C8180300 v_interp_p2_f32 v6, [v6], v1, 3, 0, [m0] ; C8190301 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v6, v6, v5 ; 100C0B06 v_interp_p1_f32 v7, v0, 2, 0, [m0] ; C81C0200 v_interp_p2_f32 v7, [v7], v1, 2, 0, [m0] ; C81D0201 v_mul_f32_e32 v7, v7, v4 ; 100E0907 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_mul_f32_e32 v7, v7, v3 ; 100E0707 v_interp_p1_f32 v8, v0, 0, 0, [m0] ; C8200000 v_interp_p2_f32 v8, [v8], v1, 0, 0, [m0] ; C8210001 v_mul_f32_e32 v0, v8, v2 ; 10000508 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[4].zwzw, IN[0] 1: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[0], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: ADD TEMP[1].x, CONST[6].xxxx, CONST[6].yyyy 5: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[1].w, CONST[5].wwww, IN[2].wwww 7: MOV TEMP[1].w, TEMP[1].wwww 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV TEMP[1].xyz, IN[2].xyzx 10: MOV TEMP[2].zw, IMM[0].yyxy 11: MOV OUT[1], TEMP[1] 12: MOV OUT[2], TEMP[2] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = add i32 %5, %7 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fadd float %25, %34 %51 = fadd float %26, %35 %52 = fmul float %51, %17 %53 = fmul float %51, %18 %54 = fmul float %51, %19 %55 = fmul float %51, %20 %56 = fmul float %50, %13 %57 = fadd float %56, %52 %58 = fmul float %50, %14 %59 = fadd float %58, %53 %60 = fmul float %50, %15 %61 = fadd float %60, %54 %62 = fmul float %50, %16 %63 = fadd float %62, %55 %64 = fadd float %57, %21 %65 = fadd float %59, %22 %66 = fadd float %61, %23 %67 = fadd float %63, %24 %68 = fadd float %28, %29 %69 = fmul float %64, %68 %70 = fmul float %65, %68 %71 = fmul float %66, %68 %72 = fmul float %67, %68 %73 = fmul float %27, %49 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %70, float %71, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v4 ; 100A0808 exp 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v4, s4, v0 ; 06080004 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v1 ; 06000204 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 65280.0000} IMM[1] FLT32 {65280.0000, 255.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: DP2 TEMP[5].x, TEMP[3].wxxx, IMM[1].xyyy 24: ADD TEMP[5].x, TEMP[5].xxxx, IMM[0].xxxx 25: FRC TEMP[6].w, TEMP[5].xxxx 26: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].xxxx 27: MOV TEMP[0].z, TEMP[5].zzzz 28: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 29: FRC TEMP[5].z, TEMP[3].wwww 30: MOV TEMP[1].z, TEMP[5].zzzz 31: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 32: MOV TEMP[0].w, TEMP[3].wwww 33: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 34: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 35: UIF TEMP[4].xxxx :0 36: MOV TEMP[4].x, TEMP[3].wwww 37: ELSE :0 38: MOV TEMP[4].x, TEMP[3].zzzz 39: ENDIF 40: MOV TEMP[0].y, TEMP[4].xxxx 41: RCP TEMP[3].x, CONST[0].xxxx 42: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 43: FRC TEMP[5].z, TEMP[3].xxxx 44: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 45: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 46: MOV TEMP[6].x, -TEMP[3].xxxx 47: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 48: UIF TEMP[5].xxxx :0 49: MOV TEMP[5].x, TEMP[3].xxxx 50: ELSE :0 51: MOV TEMP[5].x, TEMP[6].xxxx 52: ENDIF 53: MOV TEMP[0].z, TEMP[5].xxxx 54: RCP TEMP[5].x, TEMP[5].xxxx 55: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 56: FRC TEMP[4].w, TEMP[4].wwww 57: MOV TEMP[0].w, TEMP[4].wwww 58: RCP TEMP[0].x, TEMP[3].xxxx 59: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 60: MOV TEMP[2].xy, TEMP[3].xyxx 61: FRC TEMP[3].xy, TEMP[2] 62: MOV TEMP[0].xy, TEMP[3].xyxx 63: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 64: MOV TEMP[0].xy, TEMP[2].xyxx 65: ADD TEMP[2].xy, TEMP[1], TEMP[0] 66: MOV TEMP[0].xy, TEMP[2].xyxx 67: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: FRC TEMP[2].zw, TEMP[2].xyxy 70: MOV TEMP[0].zw, TEMP[2].wwzw 71: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 74: MOV TEMP[0].xy, TEMP[2].xyxx 75: RCP TEMP[1].x, CONST[3].xxxx 76: RCP TEMP[2].x, CONST[3].yyyy 77: MOV TEMP[1].y, TEMP[2].xxxx 78: MUL TEMP[2].xy, TEMP[0], TEMP[1] 79: MOV TEMP[2].xy, TEMP[2].xyyy 80: MOV TEMP[2].w, IMM[0].zzzz 81: TXL TEMP[2], TEMP[2], SAMP[1], 2D 82: MOV TEMP[0].xyz, TEMP[2] 83: MOV TEMP[1].x, TEMP[2].wwww 84: ELSE :0 85: MOV TEMP[2].xy, IN[1].xyyy 86: TEX TEMP[2], TEMP[2], SAMP[1], 2D 87: MOV TEMP[0].xyz, TEMP[2] 88: MOV TEMP[1].x, TEMP[2].wwww 89: ENDIF 90: MUL TEMP[1].w, TEMP[1].xxxx, IN[0].wwww 91: MOV TEMP[0].w, TEMP[1].wwww 92: MOV OUT[0], TEMP[0] 93: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %41 = fcmp olt float 5.000000e-01, %24 %42 = sext i1 %41 to i32 %43 = bitcast i32 %42 to float %44 = bitcast float %43 to i32 %45 = icmp ne i32 %44, 0 br i1 %45, label %IF, label %ELSE IF: ; preds = %main_body %46 = fmul float %26, %39 %47 = fmul float %27, %40 %48 = call float @llvm.AMDIL.fraction.(float %46) %49 = call float @llvm.AMDIL.fraction.(float %47) %50 = fsub float -0.000000e+00, %48 %51 = fadd float %46, %50 %52 = fsub float -0.000000e+00, %49 %53 = fadd float %47, %52 %54 = fsub float -0.000000e+00, %51 %55 = fmul float %39, %26 %56 = fadd float %55, %54 %57 = fsub float -0.000000e+00, %53 %58 = fmul float %40, %27 %59 = fadd float %58, %57 %60 = fadd float %51, 5.000000e-01 %61 = fadd float %53, 5.000000e-01 %62 = fdiv float 1.000000e+00, %26 %63 = fdiv float 1.000000e+00, %27 %64 = fmul float %60, %62 %65 = fmul float %61, %63 %66 = bitcast float %64 to i32 %67 = bitcast float %65 to i32 %68 = bitcast float 0.000000e+00 to i32 %69 = insertelement <4 x i32> undef, i32 %66, i32 0 %70 = insertelement <4 x i32> %69, i32 %67, i32 1 %71 = insertelement <4 x i32> %70, i32 %68, i32 2 %72 = insertelement <4 x i32> %71, i32 undef, i32 3 %73 = bitcast <8 x i32> %31 to <32 x i8> %74 = bitcast <4 x i32> %33 to <16 x i8> %75 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %72, <32 x i8> %73, <16 x i8> %74, i32 2) %76 = extractelement <4 x float> %75, i32 0 %77 = extractelement <4 x float> %75, i32 3 %78 = fsub float -0.000000e+00, %25 %79 = fadd float 1.024000e+03, %78 %80 = fmul float %77, 6.528000e+04 %81 = fmul float %76, 2.550000e+02 %82 = fadd float %80, %81 %83 = fadd float %82, 5.000000e-01 %84 = call float @llvm.AMDIL.fraction.(float %83) %85 = fsub float -0.000000e+00, %84 %86 = fadd float %85, %83 %87 = fmul float %25, %76 %88 = fadd float %87, 5.000000e-01 %89 = call float @llvm.AMDIL.fraction.(float %88) %90 = fsub float -0.000000e+00, %89 %91 = fadd float %88, %90 %92 = fadd float %86, 5.000000e-01 %93 = fadd float %91, 5.000000e-01 %94 = fcmp oge float %79, 0.000000e+00 %95 = sext i1 %94 to i32 %96 = bitcast i32 %95 to float %97 = bitcast float %96 to i32 %98 = icmp ne i32 %97, 0 %. = select i1 %98, float %93, float %92 %99 = fdiv float 1.000000e+00, %24 %100 = fmul float %28, %99 %101 = fadd float %100, 5.000000e-01 %102 = call float @llvm.AMDIL.fraction.(float %101) %103 = fsub float -0.000000e+00, %102 %104 = fadd float %103, %101 %105 = fmul float %104, %. %106 = fsub float -0.000000e+00, %104 %107 = fcmp oge float %105, 0.000000e+00 %108 = sext i1 %107 to i32 %109 = bitcast i32 %108 to float %110 = bitcast float %109 to i32 %111 = icmp ne i32 %110, 0 %temp20.0 = select i1 %111, float %104, float %106 %112 = fdiv float 1.000000e+00, %temp20.0 %113 = fmul float %112, %. %114 = call float @llvm.AMDIL.fraction.(float %113) %115 = fdiv float 1.000000e+00, %104 %116 = fmul float %114, %temp20.0 %117 = fmul float %115, %. %118 = call float @llvm.AMDIL.fraction.(float %116) %119 = call float @llvm.AMDIL.fraction.(float %117) %120 = fsub float -0.000000e+00, %118 %121 = fadd float %120, %116 %122 = fsub float -0.000000e+00, %119 %123 = fadd float %122, %117 %124 = fadd float %56, %121 %125 = fadd float %59, %123 %126 = fmul float %124, %24 %127 = fmul float %125, %24 %128 = call float @llvm.AMDIL.fraction.(float %126) %129 = call float @llvm.AMDIL.fraction.(float %127) %130 = fsub float -0.000000e+00, %128 %131 = fadd float %130, %126 %132 = fsub float -0.000000e+00, %129 %133 = fadd float %132, %127 %134 = fadd float %131, 5.000000e-01 %135 = fadd float %133, 5.000000e-01 %136 = fdiv float 1.000000e+00, %28 %137 = fdiv float 1.000000e+00, %29 %138 = fmul float %134, %136 %139 = fmul float %135, %137 %140 = bitcast float %138 to i32 %141 = bitcast float %139 to i32 %142 = bitcast float 0.000000e+00 to i32 %143 = insertelement <4 x i32> undef, i32 %140, i32 0 %144 = insertelement <4 x i32> %143, i32 %141, i32 1 %145 = insertelement <4 x i32> %144, i32 %142, i32 2 %146 = insertelement <4 x i32> %145, i32 undef, i32 3 %147 = bitcast <8 x i32> %35 to <32 x i8> %148 = bitcast <4 x i32> %37 to <16 x i8> %149 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %146, <32 x i8> %147, <16 x i8> %148, i32 2) br label %ENDIF ELSE: ; preds = %main_body %150 = bitcast float %39 to i32 %151 = bitcast float %40 to i32 %152 = insertelement <2 x i32> undef, i32 %150, i32 0 %153 = insertelement <2 x i32> %152, i32 %151, i32 1 %154 = bitcast <8 x i32> %35 to <32 x i8> %155 = bitcast <4 x i32> %37 to <16 x i8> %156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %154, <16 x i8> %155, i32 2) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %156, %ELSE ], [ %149, %IF ] %157 = extractelement <4 x float> %.sink, i32 0 %158 = extractelement <4 x float> %.sink, i32 1 %159 = extractelement <4 x float> %.sink, i32 2 %160 = extractelement <4 x float> %.sink, i32 3 %161 = fmul float %160, %38 %162 = call i32 @llvm.SI.packf16(float %157, float %158) %163 = bitcast i32 %162 to float %164 = call i32 @llvm.SI.packf16(float %159, float %161) %165 = bitcast i32 %164 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %163, float %165, float %163, float %165) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800F00 00640503 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s24, s[0:3], 0xc ; C20C010C s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109 s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108 s_buffer_load_dword s27, s[0:3], 0x4 ; C20D8104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v4 ; 10000819 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v4, -v0 ; D2820000 84020819 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v5, s25 ; 7E0A5419 v_mul_f32_e32 v6, v5, v1 ; 100C0305 v_mul_f32_e32 v1, s26, v3 ; 1002061A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v3, -v1 ; D2820001 8406061A v_add_f32_e32 v9, 0.5, v1 ; 061202F0 v_rcp_f32_e32 v10, s26 ; 7E14541A v_mul_f32_e32 v5, v10, v9 ; 100A130A v_mov_b32_e32 v7, 0 ; 7E0E0280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[9:10], 9, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[32:39], s[28:31] ; F0900900 00E80905 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, 0x437f0000, v9 ; 101612FF 437F0000 v_mov_b32_e32 v12, 0x477f0000 ; 7E1802FF 477F0000 v_mad_f32 v11, v12, v10, v11 ; D282000B 042E150C v_add_f32_e32 v11, 0.5, v11 ; 061616F0 v_fract_f32_e32 v12, v11 ; 7E18410B v_subrev_f32_e32 v11, v12, v11 ; 0A16170C v_add_f32_e32 v11, 0.5, v11 ; 061616F0 v_mad_f32 v9, s27, v9, 0.5 ; D2820009 03C2121B v_fract_f32_e32 v10, v9 ; 7E144109 v_subrev_f32_e32 v9, v10, v9 ; 0A12130A v_add_f32_e32 v9, 0.5, v9 ; 061212F0 v_mov_b32_e32 v10, 0x44800000 ; 7E1402FF 44800000 v_subrev_f32_e32 v10, s27, v10 ; 0A14141B v_cmp_ge_f32_e64 s[28:29], v10, 0 ; D00C001C 0001010A v_cndmask_b32_e64 v10, 0, -1, s[28:29] ; D200000A 00718280 v_cmp_ne_i32_e64 s[28:29], v10, 0 ; D10A001C 0001010A v_cndmask_b32_e64 v9, v11, v9, s[28:29] ; D2000809 1872130B v_rcp_f32_e32 v10, s8 ; 7E145408 v_mad_f32 v10, s24, v10, 0.5 ; D282000A 03C21418 v_fract_f32_e32 v11, v10 ; 7E16410A v_subrev_f32_e32 v10, v11, v10 ; 0A14150B v_mul_f32_e32 v11, v9, v10 ; 10161509 v_cmp_ge_f32_e64 s[28:29], v11, 0 ; D00C001C 0001010B v_cndmask_b32_e64 v11, 0, -1, s[28:29] ; D200080B 00718280 v_cmp_ne_i32_e64 s[28:29], v11, 0 ; D10A001C 0001010B v_xor_b32_e32 v11, 0x80000000, v10 ; 3A1614FF 80000000 v_cndmask_b32_e64 v11, v11, v10, s[28:29] ; D200080B 1872150B v_rcp_f32_e32 v12, v11 ; 7E18550B v_mul_f32_e32 v12, v9, v12 ; 10181909 v_fract_f32_e32 v12, v12 ; 7E18410C v_mul_f32_e32 v13, v11, v12 ; 101A190B v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v11, v12, v11, -v13 ; D282000B 8436170C v_mad_f32 v1, s26, v3, -v1 ; D2820001 8406061A v_add_f32_e32 v1, v11, v1 ; 0602030B v_mul_f32_e32 v11, s8, v1 ; 10160208 v_fract_f32_e32 v11, v11 ; 7E16410B v_mad_f32 v1, v1, s8, -v11 ; D2820001 842C1101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v11, s24 ; 7E165418 v_mul_f32_e32 v5, v11, v1 ; 100A030B v_rcp_f32_e32 v1, v10 ; 7E02550A v_mul_f32_e32 v10, v9, v1 ; 10140309 v_fract_f32_e32 v10, v10 ; 7E14410A v_mad_f32 v1, v1, v9, -v10 ; D2820001 842A1301 v_mad_f32 v0, s25, v4, -v0 ; D2820000 84020819 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v6, v1, v0 ; 100C0101 image_sample_l v[5:8], 15, 0, 0, 0, 0, 0, 0, 0, v[5:8], s[16:23], s[12:15] ; F0900F00 00640505 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_cvt_pkrtz_f16_f32_e32 v0, v5, v6 ; 5E000D05 v_mul_f32_e32 v1, v2, v8 ; 10021102 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 1.0000, -1.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[0].xy, TEMP[1].xyxx 84: MOV TEMP[0].zw, IMM[0].zzzz 85: MAD TEMP[1].xyw, TEMP[0], IMM[1].yzyy, IMM[1].wyww 86: MOV TEMP[2].xy, TEMP[1].xyyy 87: MOV TEMP[2].w, TEMP[1].wwww 88: TXL TEMP[2], TEMP[2], SAMP[1], 2D 89: MOV TEMP[0], TEMP[2] 90: ELSE :0 91: MAD TEMP[1].xy, IN[1], IMM[1].yzyy, IMM[1].wyww 92: MOV TEMP[1].xy, TEMP[1].xyyy 93: TEX TEMP[1], TEMP[1], SAMP[1], 2D 94: MOV TEMP[0], TEMP[1] 95: ENDIF 96: MUL TEMP[0], TEMP[0], IN[0] 97: MOV OUT[0], TEMP[0] 98: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = fmul float %146, 1.000000e+00 %149 = fadd float %148, 0.000000e+00 %150 = fmul float %147, -1.000000e+00 %151 = fadd float %150, 1.000000e+00 %152 = fmul float 0.000000e+00, 1.000000e+00 %153 = fadd float %152, 0.000000e+00 %154 = bitcast float %149 to i32 %155 = bitcast float %151 to i32 %156 = bitcast float %153 to i32 %157 = insertelement <4 x i32> undef, i32 %154, i32 0 %158 = insertelement <4 x i32> %157, i32 %155, i32 1 %159 = insertelement <4 x i32> %158, i32 %156, i32 2 %160 = insertelement <4 x i32> %159, i32 undef, i32 3 %161 = bitcast <8 x i32> %35 to <32 x i8> %162 = bitcast <4 x i32> %37 to <16 x i8> %163 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %160, <32 x i8> %161, <16 x i8> %162, i32 2) br label %ENDIF ELSE: ; preds = %main_body %164 = fmul float %42, 1.000000e+00 %165 = fadd float %164, 0.000000e+00 %166 = fmul float %43, -1.000000e+00 %167 = fadd float %166, 1.000000e+00 %168 = bitcast float %165 to i32 %169 = bitcast float %167 to i32 %170 = insertelement <2 x i32> undef, i32 %168, i32 0 %171 = insertelement <2 x i32> %170, i32 %169, i32 1 %172 = bitcast <8 x i32> %35 to <32 x i8> %173 = bitcast <4 x i32> %37 to <16 x i8> %174 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %171, <32 x i8> %172, <16 x i8> %173, i32 2) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %174, %ELSE ], [ %163, %IF ] %175 = extractelement <4 x float> %.sink, i32 0 %176 = extractelement <4 x float> %.sink, i32 1 %177 = extractelement <4 x float> %.sink, i32 2 %178 = extractelement <4 x float> %.sink, i32 3 %179 = fmul float %175, %38 %180 = fmul float %176, %39 %181 = fmul float %177, %40 %182 = fmul float %178, %41 %183 = call i32 @llvm.SI.packf16(float %179, float %180) %184 = bitcast i32 %183 to float %185 = call i32 @llvm.SI.packf16(float %181, float %182) %186 = bitcast i32 %185 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %184, float %186, float %184, float %186) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v6, v0, 1, 1, [m0] ; C8180500 v_interp_p2_f32 v6, [v6], v1, 1, 1, [m0] ; C8190501 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E v_sub_f32_e32 v1, 1.0, v6 ; 08020CF2 v_add_f32_e32 v0, 0, v7 ; 06000E80 image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[16:23], s[12:15] ; F0800F00 00640800 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s24, s[0:3], 0xc ; C20C010C s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109 s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108 s_buffer_load_dword s27, s[0:3], 0x4 ; C20D8104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v6 ; 10000C19 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v6, -v0 ; D2820000 84020C19 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v8, s25 ; 7E105419 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mul_f32_e32 v1, s26, v7 ; 10020E1A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v7, -v1 ; D2820001 84060E1A v_add_f32_e32 v12, 0.5, v1 ; 061802F0 v_rcp_f32_e32 v13, s26 ; 7E1A541A v_mul_f32_e32 v8, v13, v12 ; 1010190D v_mov_b32_e32 v10, 0 ; 7E140280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[12:13], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[32:39], s[28:31] ; F0900900 00E80C08 v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v13, v14, 0.5 ; D282000E 03C21D0D v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v12 ; 101E18FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v12, s27, v12, 0.5 ; D282000C 03C2181B v_fract_f32_e32 v13, v12 ; 7E1A410C v_subrev_f32_e32 v12, v13, v12 ; 0A18190D v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_mov_b32_e32 v13, 0x44800000 ; 7E1A02FF 44800000 v_subrev_f32_e32 v13, s27, v13 ; 0A1A1A1B v_cmp_ge_f32_e64 s[28:29], v13, 0 ; D00C001C 0001010D v_cndmask_b32_e64 v13, 0, -1, s[28:29] ; D200080D 00718280 v_cmp_ne_i32_e64 s[28:29], v13, 0 ; D10A001C 0001010D v_cndmask_b32_e64 v12, v14, v12, s[28:29] ; D200000C 1072190E v_rcp_f32_e32 v13, s8 ; 7E1A5408 v_mad_f32 v13, s24, v13, 0.5 ; D282000D 03C21A18 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v12, v13 ; 101C1B0C v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[28:29] ; D200000E 10721B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v12, v15 ; 101E1F0C v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v7, -v1 ; D2820001 84060E1A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v7, s8, v1 ; 100E0208 v_fract_f32_e32 v7, v7 ; 7E0E4107 v_mad_f32 v1, v1, s8, -v7 ; D2820001 841C1101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v7, s24 ; 7E0E5418 v_mad_f32 v8, v1, v7, 0 ; D2820008 02020F01 v_rcp_f32_e32 v1, v13 ; 7E02550D v_mul_f32_e32 v7, v12, v1 ; 100E030C v_fract_f32_e32 v7, v7 ; 7E0E4107 v_mad_f32 v1, v1, v12, -v7 ; D2820001 841E1901 v_mad_f32 v0, s25, v6, -v0 ; D2820000 84020C19 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mad_f32 v9, -v0, v1, 1.0 ; D2820009 23CA0300 image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[12:15] ; F0900F00 00640808 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v0, v2, v11 ; 10001702 v_mul_f32_e32 v1, v3, v10 ; 10021503 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_mul_f32_e32 v1, v4, v9 ; 10021304 v_mul_f32_e32 v2, v5, v8 ; 10041105 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[8] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 1024.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[8].xxxx, CONST[8].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[3].xz, IMM[0].yyzy 33: FSLT TEMP[1].x, IMM[0].yyyy, CONST[1].xxxx 34: UIF TEMP[1].xxxx :2 35: MUL TEMP[1].yw, CONST[3].xxzy, IN[2].xxzy 36: MOV TEMP[3].yw, TEMP[1].wyww 37: FRC TEMP[1].xy, TEMP[3].ywzw 38: MOV TEMP[1].xy, TEMP[1].xyxx 39: ADD TEMP[2].yw, TEMP[3], -TEMP[1].xxzy 40: MOV TEMP[3].yw, TEMP[2].wyww 41: MAD TEMP[2].xy, IN[2], CONST[3], -TEMP[3].ywzw 42: MOV TEMP[1].xy, TEMP[2].xyxx 43: ADD TEMP[2].yw, TEMP[3], IMM[0].yyyy 44: MOV TEMP[3].yw, TEMP[2].wyww 45: RCP TEMP[2].x, CONST[3].xxxx 46: RCP TEMP[4].x, CONST[3].yyyy 47: MOV TEMP[2].y, TEMP[4].xxxx 48: MUL TEMP[4].xy, TEMP[3].ywzw, TEMP[2] 49: MOV TEMP[4].xy, TEMP[4].xyyy 50: MOV TEMP[4].w, IMM[0].xxxx 51: TXL TEMP[4], TEMP[4], SAMP[0], 2D 52: MOV TEMP[2].zw, TEMP[4] 53: ADD TEMP[5].y, IMM[0].zzzz, -CONST[2].xxxx 54: MOV TEMP[3].y, TEMP[5].yyyy 55: MUL TEMP[6].z, TEMP[4].xxxx, IMM[0].wwww 56: MAD TEMP[7].w, TEMP[4].wwww, IMM[0].wwww, IMM[0].yyyy 57: FRC TEMP[8].z, TEMP[7].wwww 58: ADD TEMP[7].w, TEMP[7].wwww, -TEMP[8].zzzz 59: MAD TEMP[6].z, TEMP[7].wwww, IMM[1].xxxx, TEMP[6].zzzz 60: ADD TEMP[6].z, TEMP[6].zzzz, IMM[0].yyyy 61: FRC TEMP[7].w, TEMP[6].zzzz 62: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].zzzz 63: MOV TEMP[3].z, TEMP[6].zzzz 64: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].yyyy 65: FRC TEMP[6].z, TEMP[4].wwww 66: MOV TEMP[1].z, TEMP[6].zzzz 67: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 68: MOV TEMP[3].w, TEMP[4].wwww 69: ADD TEMP[4].zw, TEMP[3], IMM[0].yyyy 70: FSGE TEMP[5].x, TEMP[5].yyyy, IMM[0].xxxx 71: UIF TEMP[5].xxxx :2 72: MOV TEMP[5].x, TEMP[4].wwww 73: ELSE :2 74: MOV TEMP[5].x, TEMP[4].zzzz 75: ENDIF 76: MOV TEMP[3].y, TEMP[5].xxxx 77: RCP TEMP[4].x, CONST[1].xxxx 78: MAD TEMP[4].x, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].yyyy 79: FRC TEMP[6].z, TEMP[4].xxxx 80: ADD TEMP[4].x, -TEMP[6].zzzz, TEMP[4].xxxx 81: MUL TEMP[6].z, TEMP[4].xxxx, TEMP[5].xxxx 82: MOV TEMP[7].x, -TEMP[4].xxxx 83: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].xxxx 84: UIF TEMP[6].xxxx :2 85: MOV TEMP[6].x, TEMP[4].xxxx 86: ELSE :2 87: MOV TEMP[6].x, TEMP[7].xxxx 88: ENDIF 89: MOV TEMP[3].z, TEMP[6].xxxx 90: RCP TEMP[6].x, TEMP[6].xxxx 91: MUL TEMP[5].w, TEMP[6].xxxx, TEMP[5].xxxx 92: FRC TEMP[5].w, TEMP[5].wwww 93: MOV TEMP[3].w, TEMP[5].wwww 94: RCP TEMP[3].x, TEMP[4].xxxx 95: MUL TEMP[4].xy, TEMP[3].wxzw, TEMP[3].zyzw 96: MOV TEMP[2].xy, TEMP[4].xyxx 97: FRC TEMP[4].xy, TEMP[2] 98: MOV TEMP[3].xy, TEMP[4].xyxx 99: ADD TEMP[2].xy, -TEMP[3], TEMP[2] 100: MOV TEMP[3].xy, TEMP[2].xyxx 101: ADD TEMP[2].xy, TEMP[1], TEMP[3] 102: MOV TEMP[3].xy, TEMP[2].xyxx 103: MUL TEMP[2].xy, TEMP[3], CONST[1].xxxx 104: MOV TEMP[3].xy, TEMP[2].xyxx 105: FRC TEMP[2].zw, TEMP[2].xyxy 106: MOV TEMP[3].zw, TEMP[2].wwzw 107: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[3] 108: MOV TEMP[3].xy, TEMP[2].xyxx 109: ADD TEMP[2].xy, TEMP[3], IMM[0].yyyy 110: MOV TEMP[3].xy, TEMP[2].xyxx 111: RCP TEMP[1].x, CONST[4].xxxx 112: RCP TEMP[2].x, CONST[4].yyyy 113: MOV TEMP[1].y, TEMP[2].xxxx 114: MUL TEMP[1].xy, TEMP[3], TEMP[1] 115: MOV TEMP[1].xy, TEMP[1].xyyy 116: MOV TEMP[1].w, IMM[0].xxxx 117: TXL TEMP[1], TEMP[1], SAMP[1], 2D 118: MOV TEMP[3], TEMP[1] 119: ELSE :2 120: MOV TEMP[1].xy, IN[2].xyyy 121: TEX TEMP[1], TEMP[1], SAMP[1], 2D 122: MOV TEMP[3], TEMP[1] 123: ENDIF 124: MUL TEMP[1], TEMP[3], IN[1] 125: MOV OUT[0], TEMP[1] 126: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %54 = fmul float %15, %38 %55 = fadd float %54, %39 %56 = fmul float %14, %34 %57 = fadd float %56, %36 %58 = fmul float %55, %35 %59 = fadd float %58, %37 %60 = fsub float -0.000000e+00, %24 %61 = fsub float -0.000000e+00, %26 %62 = fadd float %60, %57 %63 = fadd float %60, %57 %64 = fadd float %60, %57 %65 = fcmp olt float %62, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = fcmp olt float %63, 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %64, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = bitcast i32 %66 to float %72 = bitcast i32 %68 to float %73 = bitcast i32 %70 to float %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = bitcast float %72 to i32 %80 = or i32 %78, %79 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 br i1 %83, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %84 = fsub float -0.000000e+00, %57 %85 = fadd float %25, %84 %86 = fsub float -0.000000e+00, %57 %87 = fadd float %25, %86 %88 = fsub float -0.000000e+00, %57 %89 = fadd float %25, %88 %90 = fcmp olt float %85, 0.000000e+00 %91 = sext i1 %90 to i32 %92 = fcmp olt float %87, 0.000000e+00 %93 = sext i1 %92 to i32 %94 = fcmp olt float %89, 0.000000e+00 %95 = sext i1 %94 to i32 %96 = bitcast i32 %91 to float %97 = bitcast i32 %93 to float %98 = bitcast i32 %95 to float %99 = bitcast float %96 to i32 %100 = bitcast float %98 to i32 %101 = or i32 %99, %100 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = bitcast float %97 to i32 %105 = or i32 %103, %104 %106 = bitcast i32 %105 to float %107 = bitcast float %106 to i32 %108 = icmp ne i32 %107, 0 br i1 %108, label %IF37, label %ENDIF36 IF37: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF36 ENDIF36: ; preds = %ENDIF, %IF37 %109 = fadd float %61, %59 %110 = fadd float %61, %59 %111 = fadd float %61, %59 %112 = fcmp olt float %109, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = fcmp olt float %110, 0.000000e+00 %115 = sext i1 %114 to i32 %116 = fcmp olt float %111, 0.000000e+00 %117 = sext i1 %116 to i32 %118 = bitcast i32 %113 to float %119 = bitcast i32 %115 to float %120 = bitcast i32 %117 to float %121 = bitcast float %118 to i32 %122 = bitcast float %120 to i32 %123 = or i32 %121, %122 %124 = bitcast i32 %123 to float %125 = bitcast float %124 to i32 %126 = bitcast float %119 to i32 %127 = or i32 %125, %126 %128 = bitcast i32 %127 to float %129 = bitcast float %128 to i32 %130 = icmp ne i32 %129, 0 br i1 %130, label %IF40, label %ENDIF39 IF40: ; preds = %ENDIF36 call void @llvm.AMDGPU.kilp() br label %ENDIF39 ENDIF39: ; preds = %ENDIF36, %IF40 %131 = fsub float -0.000000e+00, %59 %132 = fadd float %27, %131 %133 = fsub float -0.000000e+00, %59 %134 = fadd float %27, %133 %135 = fsub float -0.000000e+00, %59 %136 = fadd float %27, %135 %137 = fcmp olt float %132, 0.000000e+00 %138 = sext i1 %137 to i32 %139 = fcmp olt float %134, 0.000000e+00 %140 = sext i1 %139 to i32 %141 = fcmp olt float %136, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %138 to float %144 = bitcast i32 %140 to float %145 = bitcast i32 %142 to float %146 = bitcast float %143 to i32 %147 = bitcast float %145 to i32 %148 = or i32 %146, %147 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = bitcast float %144 to i32 %152 = or i32 %150, %151 %153 = bitcast i32 %152 to float %154 = bitcast float %153 to i32 %155 = icmp ne i32 %154, 0 br i1 %155, label %IF43, label %ENDIF42 IF43: ; preds = %ENDIF39 call void @llvm.AMDGPU.kilp() br label %ENDIF42 ENDIF42: ; preds = %ENDIF39, %IF43 %156 = fcmp olt float 5.000000e-01, %28 %157 = sext i1 %156 to i32 %158 = bitcast i32 %157 to float %159 = bitcast float %158 to i32 %160 = icmp ne i32 %159, 0 br i1 %160, label %IF46, label %ELSE47 IF46: ; preds = %ENDIF42 %161 = fmul float %30, %52 %162 = fmul float %31, %53 %163 = call float @llvm.AMDIL.fraction.(float %161) %164 = call float @llvm.AMDIL.fraction.(float %162) %165 = fsub float -0.000000e+00, %163 %166 = fadd float %161, %165 %167 = fsub float -0.000000e+00, %164 %168 = fadd float %162, %167 %169 = fsub float -0.000000e+00, %166 %170 = fmul float %52, %30 %171 = fadd float %170, %169 %172 = fsub float -0.000000e+00, %168 %173 = fmul float %53, %31 %174 = fadd float %173, %172 %175 = fadd float %166, 5.000000e-01 %176 = fadd float %168, 5.000000e-01 %177 = fdiv float 1.000000e+00, %30 %178 = fdiv float 1.000000e+00, %31 %179 = fmul float %175, %177 %180 = fmul float %176, %178 %181 = bitcast float %179 to i32 %182 = bitcast float %180 to i32 %183 = bitcast float 0.000000e+00 to i32 %184 = insertelement <4 x i32> undef, i32 %181, i32 0 %185 = insertelement <4 x i32> %184, i32 %182, i32 1 %186 = insertelement <4 x i32> %185, i32 %183, i32 2 %187 = insertelement <4 x i32> %186, i32 undef, i32 3 %188 = bitcast <8 x i32> %41 to <32 x i8> %189 = bitcast <4 x i32> %43 to <16 x i8> %190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %187, <32 x i8> %188, <16 x i8> %189, i32 2) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 3 %193 = fsub float -0.000000e+00, %29 %194 = fadd float 1.024000e+03, %193 %195 = fmul float %191, 2.550000e+02 %196 = fmul float %192, 2.550000e+02 %197 = fadd float %196, 5.000000e-01 %198 = call float @llvm.AMDIL.fraction.(float %197) %199 = fsub float -0.000000e+00, %198 %200 = fadd float %197, %199 %201 = fmul float %200, 2.560000e+02 %202 = fadd float %201, %195 %203 = fadd float %202, 5.000000e-01 %204 = call float @llvm.AMDIL.fraction.(float %203) %205 = fsub float -0.000000e+00, %204 %206 = fadd float %205, %203 %207 = fmul float %29, %191 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %208, %210 %212 = fadd float %206, 5.000000e-01 %213 = fadd float %211, 5.000000e-01 %214 = fcmp oge float %194, 0.000000e+00 %215 = sext i1 %214 to i32 %216 = bitcast i32 %215 to float %217 = bitcast float %216 to i32 %218 = icmp ne i32 %217, 0 %. = select i1 %218, float %213, float %212 %219 = fdiv float 1.000000e+00, %28 %220 = fmul float %32, %219 %221 = fadd float %220, 5.000000e-01 %222 = call float @llvm.AMDIL.fraction.(float %221) %223 = fsub float -0.000000e+00, %222 %224 = fadd float %223, %221 %225 = fmul float %224, %. %226 = fsub float -0.000000e+00, %224 %227 = fcmp oge float %225, 0.000000e+00 %228 = sext i1 %227 to i32 %229 = bitcast i32 %228 to float %230 = bitcast float %229 to i32 %231 = icmp ne i32 %230, 0 %temp24.0 = select i1 %231, float %224, float %226 %232 = fdiv float 1.000000e+00, %temp24.0 %233 = fmul float %232, %. %234 = call float @llvm.AMDIL.fraction.(float %233) %235 = fdiv float 1.000000e+00, %224 %236 = fmul float %234, %temp24.0 %237 = fmul float %235, %. %238 = call float @llvm.AMDIL.fraction.(float %236) %239 = call float @llvm.AMDIL.fraction.(float %237) %240 = fsub float -0.000000e+00, %238 %241 = fadd float %240, %236 %242 = fsub float -0.000000e+00, %239 %243 = fadd float %242, %237 %244 = fadd float %171, %241 %245 = fadd float %174, %243 %246 = fmul float %244, %28 %247 = fmul float %245, %28 %248 = call float @llvm.AMDIL.fraction.(float %246) %249 = call float @llvm.AMDIL.fraction.(float %247) %250 = fsub float -0.000000e+00, %248 %251 = fadd float %250, %246 %252 = fsub float -0.000000e+00, %249 %253 = fadd float %252, %247 %254 = fadd float %251, 5.000000e-01 %255 = fadd float %253, 5.000000e-01 %256 = fdiv float 1.000000e+00, %32 %257 = fdiv float 1.000000e+00, %33 %258 = fmul float %254, %256 %259 = fmul float %255, %257 %260 = bitcast float %258 to i32 %261 = bitcast float %259 to i32 %262 = bitcast float 0.000000e+00 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 undef, i32 3 %267 = bitcast <8 x i32> %45 to <32 x i8> %268 = bitcast <4 x i32> %47 to <16 x i8> %269 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %266, <32 x i8> %267, <16 x i8> %268, i32 2) br label %ENDIF45 ELSE47: ; preds = %ENDIF42 %270 = bitcast float %52 to i32 %271 = bitcast float %53 to i32 %272 = insertelement <2 x i32> undef, i32 %270, i32 0 %273 = insertelement <2 x i32> %272, i32 %271, i32 1 %274 = bitcast <8 x i32> %45 to <32 x i8> %275 = bitcast <4 x i32> %47 to <16 x i8> %276 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %273, <32 x i8> %274, <16 x i8> %275, i32 2) br label %ENDIF45 ENDIF45: ; preds = %ELSE47, %IF46 %.sink = phi <4 x float> [ %276, %ELSE47 ], [ %269, %IF46 ] %277 = extractelement <4 x float> %.sink, i32 0 %278 = extractelement <4 x float> %.sink, i32 1 %279 = extractelement <4 x float> %.sink, i32 2 %280 = extractelement <4 x float> %.sink, i32 3 %281 = fmul float %277, %48 %282 = fmul float %278, %49 %283 = fmul float %279, %50 %284 = fmul float %280, %51 %285 = call i32 @llvm.SI.packf16(float %281, float %282) %286 = bitcast i32 %285 to float %287 = call i32 @llvm.SI.packf16(float %283, float %284) %288 = bitcast i32 %287 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %286, float %288, float %286, float %288) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v9, v0, 1, 1, [m0] ; C8240500 v_interp_p2_f32 v9, [v9], v1, 1, 1, [m0] ; C8250501 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 s_buffer_load_dword s9, s[0:3], 0x14 ; C2048114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v0, s9, v2, v0 ; D2820000 04020409 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v1, s8, v0 ; 0A020008 v_cmp_lt_f32_e64 s[8:9], v1, 0 ; D0020008 00010101 s_buffer_load_dword s12, s[0:3], 0x20 ; C2060120 s_buffer_load_dword s13, s[0:3], 0x15 ; C2068115 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[14:15], s[8:9] ; BE8E2408 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s8, s[0:3], 0x21 ; C2040121 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 v_mov_b32_e32 v2, s12 ; 7E04020C v_mov_b32_e32 v1, s13 ; 7E02020D s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s11, v0 ; 0800000B v_cmp_lt_f32_e64 s[12:13], v0, 0 ; D002000C 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mad_f32 v0, v2, v3, s8 ; D2820000 00220702 v_mad_f32 v0, v1, v0, s9 ; D2820000 00260101 v_subrev_f32_e32 v1, s10, v0 ; 0A02000A v_cmp_lt_f32_e64 s[8:9], v1, 0 ; D0020008 00010101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[8:9] ; BE8C2408 s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s11, v0 ; 0800000B v_cmp_lt_f32_e64 s[10:11], v0, 0 ; D002000A 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[16:23], s[12:15] ; F0800F00 00640008 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_12 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s24, s[0:3], 0x10 ; C20C0110 s_buffer_load_dword s25, s[0:3], 0xd ; C20C810D s_buffer_load_dword s26, s[0:3], 0xc ; C20D010C s_buffer_load_dword s27, s[0:3], 0x8 ; C20D8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v9 ; 10001219 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v9, -v0 ; D2820000 84021219 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v2, s25 ; 7E045419 v_mul_f32_e32 v11, v2, v1 ; 10160302 v_mul_f32_e32 v1, s26, v8 ; 1002101A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v8, -v1 ; D2820001 8406101A v_add_f32_e32 v2, 0.5, v1 ; 060402F0 v_rcp_f32_e32 v3, s26 ; 7E06541A v_mul_f32_e32 v10, v3, v2 ; 10140503 v_mov_b32_e32 v12, 0 ; 7E180280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[2:3], 9, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[32:39], s[28:31] ; F0900900 00E8020A v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v3, v14, 0.5 ; D282000E 03C21D03 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v2 ; 101E04FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v2, s27, v2, 0.5 ; D2820002 03C2041B v_fract_f32_e32 v3, v2 ; 7E064102 v_subrev_f32_e32 v2, v3, v2 ; 0A040503 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mov_b32_e32 v3, 0x44800000 ; 7E0602FF 44800000 v_subrev_f32_e32 v3, s27, v3 ; 0A06061B v_cmp_ge_f32_e64 s[28:29], v3, 0 ; D00C001C 00010103 v_cndmask_b32_e64 v3, 0, -1, s[28:29] ; D2000803 00718280 v_cmp_ne_i32_e64 s[28:29], v3, 0 ; D10A001C 00010103 v_cndmask_b32_e64 v2, v14, v2, s[28:29] ; D2000002 1072050E v_rcp_f32_e32 v3, s8 ; 7E065408 v_mad_f32 v3, s24, v3, 0.5 ; D2820003 03C20618 v_fract_f32_e32 v14, v3 ; 7E1C4103 v_subrev_f32_e32 v3, v14, v3 ; 0A06070E v_mul_f32_e32 v14, v2, v3 ; 101C0702 v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v3 ; 3A1C06FF 80000000 v_cndmask_b32_e64 v14, v14, v3, s[28:29] ; D200000E 1072070E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v2, v15 ; 101E1F02 v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v8, -v1 ; D2820001 8406101A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v1, v1, s8, -v14 ; D2820001 84381101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_mul_f32_e32 v10, v14, v1 ; 1014030E v_rcp_f32_e32 v1, v3 ; 7E025503 v_mul_f32_e32 v3, v2, v1 ; 10060302 v_fract_f32_e32 v3, v3 ; 7E064103 v_mad_f32 v1, v1, v2, -v3 ; D2820001 840E0501 v_mad_f32 v0, s25, v9, -v0 ; D2820000 84021219 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v11, v1, v0 ; 10160101 image_sample_l v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900F00 0064000A s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v4, v4, v3 ; 10080704 v_mul_f32_e32 v5, v5, v2 ; 100A0505 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_mul_f32_e32 v5, v6, v1 ; 100A0306 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[8] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 1024.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[8].xxxx, CONST[8].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[3].xz, IMM[0].yyzy 33: FSLT TEMP[1].x, IMM[0].yyyy, CONST[1].xxxx 34: UIF TEMP[1].xxxx :2 35: MUL TEMP[1].yw, CONST[3].xxzy, IN[2].xxzy 36: MOV TEMP[3].yw, TEMP[1].wyww 37: FRC TEMP[1].xy, TEMP[3].ywzw 38: MOV TEMP[1].xy, TEMP[1].xyxx 39: ADD TEMP[2].yw, TEMP[3], -TEMP[1].xxzy 40: MOV TEMP[3].yw, TEMP[2].wyww 41: MAD TEMP[2].xy, IN[2], CONST[3], -TEMP[3].ywzw 42: MOV TEMP[1].xy, TEMP[2].xyxx 43: ADD TEMP[2].yw, TEMP[3], IMM[0].yyyy 44: MOV TEMP[3].yw, TEMP[2].wyww 45: RCP TEMP[2].x, CONST[3].xxxx 46: RCP TEMP[4].x, CONST[3].yyyy 47: MOV TEMP[2].y, TEMP[4].xxxx 48: MUL TEMP[4].xy, TEMP[3].ywzw, TEMP[2] 49: MOV TEMP[4].xy, TEMP[4].xyyy 50: MOV TEMP[4].w, IMM[0].xxxx 51: TXL TEMP[4], TEMP[4], SAMP[0], 2D 52: MOV TEMP[2].zw, TEMP[4] 53: ADD TEMP[5].y, IMM[0].zzzz, -CONST[2].xxxx 54: MOV TEMP[3].y, TEMP[5].yyyy 55: MUL TEMP[6].z, TEMP[4].xxxx, IMM[0].wwww 56: MAD TEMP[7].w, TEMP[4].wwww, IMM[0].wwww, IMM[0].yyyy 57: FRC TEMP[8].z, TEMP[7].wwww 58: ADD TEMP[7].w, TEMP[7].wwww, -TEMP[8].zzzz 59: MAD TEMP[6].z, TEMP[7].wwww, IMM[1].xxxx, TEMP[6].zzzz 60: ADD TEMP[6].z, TEMP[6].zzzz, IMM[0].yyyy 61: FRC TEMP[7].w, TEMP[6].zzzz 62: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].zzzz 63: MOV TEMP[3].z, TEMP[6].zzzz 64: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].yyyy 65: FRC TEMP[6].z, TEMP[4].wwww 66: MOV TEMP[1].z, TEMP[6].zzzz 67: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 68: MOV TEMP[3].w, TEMP[4].wwww 69: ADD TEMP[4].zw, TEMP[3], IMM[0].yyyy 70: FSGE TEMP[5].x, TEMP[5].yyyy, IMM[0].xxxx 71: UIF TEMP[5].xxxx :2 72: MOV TEMP[5].x, TEMP[4].wwww 73: ELSE :2 74: MOV TEMP[5].x, TEMP[4].zzzz 75: ENDIF 76: MOV TEMP[3].y, TEMP[5].xxxx 77: RCP TEMP[4].x, CONST[1].xxxx 78: MAD TEMP[4].x, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].yyyy 79: FRC TEMP[6].z, TEMP[4].xxxx 80: ADD TEMP[4].x, -TEMP[6].zzzz, TEMP[4].xxxx 81: MUL TEMP[6].z, TEMP[4].xxxx, TEMP[5].xxxx 82: MOV TEMP[7].x, -TEMP[4].xxxx 83: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].xxxx 84: UIF TEMP[6].xxxx :2 85: MOV TEMP[6].x, TEMP[4].xxxx 86: ELSE :2 87: MOV TEMP[6].x, TEMP[7].xxxx 88: ENDIF 89: MOV TEMP[3].z, TEMP[6].xxxx 90: RCP TEMP[6].x, TEMP[6].xxxx 91: MUL TEMP[5].w, TEMP[6].xxxx, TEMP[5].xxxx 92: FRC TEMP[5].w, TEMP[5].wwww 93: MOV TEMP[3].w, TEMP[5].wwww 94: RCP TEMP[3].x, TEMP[4].xxxx 95: MUL TEMP[4].xy, TEMP[3].wxzw, TEMP[3].zyzw 96: MOV TEMP[2].xy, TEMP[4].xyxx 97: FRC TEMP[4].xy, TEMP[2] 98: MOV TEMP[3].xy, TEMP[4].xyxx 99: ADD TEMP[2].xy, -TEMP[3], TEMP[2] 100: MOV TEMP[3].xy, TEMP[2].xyxx 101: ADD TEMP[2].xy, TEMP[1], TEMP[3] 102: MOV TEMP[3].xy, TEMP[2].xyxx 103: MUL TEMP[2].xy, TEMP[3], CONST[1].xxxx 104: MOV TEMP[3].xy, TEMP[2].xyxx 105: FRC TEMP[2].zw, TEMP[2].xyxy 106: MOV TEMP[3].zw, TEMP[2].wwzw 107: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[3] 108: MOV TEMP[3].xy, TEMP[2].xyxx 109: ADD TEMP[2].xy, TEMP[3], IMM[0].yyyy 110: MOV TEMP[3].xy, TEMP[2].xyxx 111: RCP TEMP[1].x, CONST[4].xxxx 112: RCP TEMP[2].x, CONST[4].yyyy 113: MOV TEMP[1].y, TEMP[2].xxxx 114: MUL TEMP[1].xy, TEMP[3], TEMP[1] 115: MOV TEMP[1].xy, TEMP[1].xyyy 116: MOV TEMP[1].w, IMM[0].xxxx 117: TXL TEMP[1], TEMP[1], SAMP[1], 2D 118: MOV TEMP[3], TEMP[1] 119: ELSE :2 120: MOV TEMP[1].xy, IN[2].xyyy 121: TEX TEMP[1], TEMP[1], SAMP[1], 2D 122: MOV TEMP[3], TEMP[1] 123: ENDIF 124: MUL TEMP[1], TEMP[3], IN[1] 125: MOV OUT[0], TEMP[1] 126: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %54 = fmul float %15, %38 %55 = fadd float %54, %39 %56 = fmul float %14, %34 %57 = fadd float %56, %36 %58 = fmul float %55, %35 %59 = fadd float %58, %37 %60 = fsub float -0.000000e+00, %24 %61 = fsub float -0.000000e+00, %26 %62 = fadd float %60, %57 %63 = fadd float %60, %57 %64 = fadd float %60, %57 %65 = fcmp olt float %62, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = fcmp olt float %63, 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %64, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = bitcast i32 %66 to float %72 = bitcast i32 %68 to float %73 = bitcast i32 %70 to float %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = bitcast float %72 to i32 %80 = or i32 %78, %79 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 br i1 %83, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %84 = fsub float -0.000000e+00, %57 %85 = fadd float %25, %84 %86 = fsub float -0.000000e+00, %57 %87 = fadd float %25, %86 %88 = fsub float -0.000000e+00, %57 %89 = fadd float %25, %88 %90 = fcmp olt float %85, 0.000000e+00 %91 = sext i1 %90 to i32 %92 = fcmp olt float %87, 0.000000e+00 %93 = sext i1 %92 to i32 %94 = fcmp olt float %89, 0.000000e+00 %95 = sext i1 %94 to i32 %96 = bitcast i32 %91 to float %97 = bitcast i32 %93 to float %98 = bitcast i32 %95 to float %99 = bitcast float %96 to i32 %100 = bitcast float %98 to i32 %101 = or i32 %99, %100 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = bitcast float %97 to i32 %105 = or i32 %103, %104 %106 = bitcast i32 %105 to float %107 = bitcast float %106 to i32 %108 = icmp ne i32 %107, 0 br i1 %108, label %IF37, label %ENDIF36 IF37: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF36 ENDIF36: ; preds = %ENDIF, %IF37 %109 = fadd float %61, %59 %110 = fadd float %61, %59 %111 = fadd float %61, %59 %112 = fcmp olt float %109, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = fcmp olt float %110, 0.000000e+00 %115 = sext i1 %114 to i32 %116 = fcmp olt float %111, 0.000000e+00 %117 = sext i1 %116 to i32 %118 = bitcast i32 %113 to float %119 = bitcast i32 %115 to float %120 = bitcast i32 %117 to float %121 = bitcast float %118 to i32 %122 = bitcast float %120 to i32 %123 = or i32 %121, %122 %124 = bitcast i32 %123 to float %125 = bitcast float %124 to i32 %126 = bitcast float %119 to i32 %127 = or i32 %125, %126 %128 = bitcast i32 %127 to float %129 = bitcast float %128 to i32 %130 = icmp ne i32 %129, 0 br i1 %130, label %IF40, label %ENDIF39 IF40: ; preds = %ENDIF36 call void @llvm.AMDGPU.kilp() br label %ENDIF39 ENDIF39: ; preds = %ENDIF36, %IF40 %131 = fsub float -0.000000e+00, %59 %132 = fadd float %27, %131 %133 = fsub float -0.000000e+00, %59 %134 = fadd float %27, %133 %135 = fsub float -0.000000e+00, %59 %136 = fadd float %27, %135 %137 = fcmp olt float %132, 0.000000e+00 %138 = sext i1 %137 to i32 %139 = fcmp olt float %134, 0.000000e+00 %140 = sext i1 %139 to i32 %141 = fcmp olt float %136, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %138 to float %144 = bitcast i32 %140 to float %145 = bitcast i32 %142 to float %146 = bitcast float %143 to i32 %147 = bitcast float %145 to i32 %148 = or i32 %146, %147 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = bitcast float %144 to i32 %152 = or i32 %150, %151 %153 = bitcast i32 %152 to float %154 = bitcast float %153 to i32 %155 = icmp ne i32 %154, 0 br i1 %155, label %IF43, label %ENDIF42 IF43: ; preds = %ENDIF39 call void @llvm.AMDGPU.kilp() br label %ENDIF42 ENDIF42: ; preds = %ENDIF39, %IF43 %156 = fcmp olt float 5.000000e-01, %28 %157 = sext i1 %156 to i32 %158 = bitcast i32 %157 to float %159 = bitcast float %158 to i32 %160 = icmp ne i32 %159, 0 br i1 %160, label %IF46, label %ELSE47 IF46: ; preds = %ENDIF42 %161 = fmul float %30, %52 %162 = fmul float %31, %53 %163 = call float @llvm.AMDIL.fraction.(float %161) %164 = call float @llvm.AMDIL.fraction.(float %162) %165 = fsub float -0.000000e+00, %163 %166 = fadd float %161, %165 %167 = fsub float -0.000000e+00, %164 %168 = fadd float %162, %167 %169 = fsub float -0.000000e+00, %166 %170 = fmul float %52, %30 %171 = fadd float %170, %169 %172 = fsub float -0.000000e+00, %168 %173 = fmul float %53, %31 %174 = fadd float %173, %172 %175 = fadd float %166, 5.000000e-01 %176 = fadd float %168, 5.000000e-01 %177 = fdiv float 1.000000e+00, %30 %178 = fdiv float 1.000000e+00, %31 %179 = fmul float %175, %177 %180 = fmul float %176, %178 %181 = bitcast float %179 to i32 %182 = bitcast float %180 to i32 %183 = bitcast float 0.000000e+00 to i32 %184 = insertelement <4 x i32> undef, i32 %181, i32 0 %185 = insertelement <4 x i32> %184, i32 %182, i32 1 %186 = insertelement <4 x i32> %185, i32 %183, i32 2 %187 = insertelement <4 x i32> %186, i32 undef, i32 3 %188 = bitcast <8 x i32> %41 to <32 x i8> %189 = bitcast <4 x i32> %43 to <16 x i8> %190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %187, <32 x i8> %188, <16 x i8> %189, i32 2) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 3 %193 = fsub float -0.000000e+00, %29 %194 = fadd float 1.024000e+03, %193 %195 = fmul float %191, 2.550000e+02 %196 = fmul float %192, 2.550000e+02 %197 = fadd float %196, 5.000000e-01 %198 = call float @llvm.AMDIL.fraction.(float %197) %199 = fsub float -0.000000e+00, %198 %200 = fadd float %197, %199 %201 = fmul float %200, 2.560000e+02 %202 = fadd float %201, %195 %203 = fadd float %202, 5.000000e-01 %204 = call float @llvm.AMDIL.fraction.(float %203) %205 = fsub float -0.000000e+00, %204 %206 = fadd float %205, %203 %207 = fmul float %29, %191 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %208, %210 %212 = fadd float %206, 5.000000e-01 %213 = fadd float %211, 5.000000e-01 %214 = fcmp oge float %194, 0.000000e+00 %215 = sext i1 %214 to i32 %216 = bitcast i32 %215 to float %217 = bitcast float %216 to i32 %218 = icmp ne i32 %217, 0 %. = select i1 %218, float %213, float %212 %219 = fdiv float 1.000000e+00, %28 %220 = fmul float %32, %219 %221 = fadd float %220, 5.000000e-01 %222 = call float @llvm.AMDIL.fraction.(float %221) %223 = fsub float -0.000000e+00, %222 %224 = fadd float %223, %221 %225 = fmul float %224, %. %226 = fsub float -0.000000e+00, %224 %227 = fcmp oge float %225, 0.000000e+00 %228 = sext i1 %227 to i32 %229 = bitcast i32 %228 to float %230 = bitcast float %229 to i32 %231 = icmp ne i32 %230, 0 %temp24.0 = select i1 %231, float %224, float %226 %232 = fdiv float 1.000000e+00, %temp24.0 %233 = fmul float %232, %. %234 = call float @llvm.AMDIL.fraction.(float %233) %235 = fdiv float 1.000000e+00, %224 %236 = fmul float %234, %temp24.0 %237 = fmul float %235, %. %238 = call float @llvm.AMDIL.fraction.(float %236) %239 = call float @llvm.AMDIL.fraction.(float %237) %240 = fsub float -0.000000e+00, %238 %241 = fadd float %240, %236 %242 = fsub float -0.000000e+00, %239 %243 = fadd float %242, %237 %244 = fadd float %171, %241 %245 = fadd float %174, %243 %246 = fmul float %244, %28 %247 = fmul float %245, %28 %248 = call float @llvm.AMDIL.fraction.(float %246) %249 = call float @llvm.AMDIL.fraction.(float %247) %250 = fsub float -0.000000e+00, %248 %251 = fadd float %250, %246 %252 = fsub float -0.000000e+00, %249 %253 = fadd float %252, %247 %254 = fadd float %251, 5.000000e-01 %255 = fadd float %253, 5.000000e-01 %256 = fdiv float 1.000000e+00, %32 %257 = fdiv float 1.000000e+00, %33 %258 = fmul float %254, %256 %259 = fmul float %255, %257 %260 = bitcast float %258 to i32 %261 = bitcast float %259 to i32 %262 = bitcast float 0.000000e+00 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 undef, i32 3 %267 = bitcast <8 x i32> %45 to <32 x i8> %268 = bitcast <4 x i32> %47 to <16 x i8> %269 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %266, <32 x i8> %267, <16 x i8> %268, i32 2) br label %ENDIF45 ELSE47: ; preds = %ENDIF42 %270 = bitcast float %52 to i32 %271 = bitcast float %53 to i32 %272 = insertelement <2 x i32> undef, i32 %270, i32 0 %273 = insertelement <2 x i32> %272, i32 %271, i32 1 %274 = bitcast <8 x i32> %45 to <32 x i8> %275 = bitcast <4 x i32> %47 to <16 x i8> %276 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %273, <32 x i8> %274, <16 x i8> %275, i32 2) br label %ENDIF45 ENDIF45: ; preds = %ELSE47, %IF46 %.sink = phi <4 x float> [ %276, %ELSE47 ], [ %269, %IF46 ] %277 = extractelement <4 x float> %.sink, i32 0 %278 = extractelement <4 x float> %.sink, i32 1 %279 = extractelement <4 x float> %.sink, i32 2 %280 = extractelement <4 x float> %.sink, i32 3 %281 = fmul float %277, %48 %282 = fmul float %278, %49 %283 = fmul float %279, %50 %284 = fmul float %280, %51 %285 = call i32 @llvm.SI.packf16(float %281, float %282) %286 = bitcast i32 %285 to float %287 = call i32 @llvm.SI.packf16(float %283, float %284) %288 = bitcast i32 %287 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %286, float %288, float %286, float %288) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v9, v0, 1, 1, [m0] ; C8240500 v_interp_p2_f32 v9, [v9], v1, 1, 1, [m0] ; C8250501 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 s_buffer_load_dword s9, s[0:3], 0x14 ; C2048114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v0, s9, v2, v0 ; D2820000 04020409 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v1, s8, v0 ; 0A020008 v_cmp_lt_f32_e64 s[8:9], v1, 0 ; D0020008 00010101 s_buffer_load_dword s12, s[0:3], 0x20 ; C2060120 s_buffer_load_dword s13, s[0:3], 0x15 ; C2068115 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[14:15], s[8:9] ; BE8E2408 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s8, s[0:3], 0x21 ; C2040121 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 v_mov_b32_e32 v2, s12 ; 7E04020C v_mov_b32_e32 v1, s13 ; 7E02020D s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s11, v0 ; 0800000B v_cmp_lt_f32_e64 s[12:13], v0, 0 ; D002000C 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mad_f32 v0, v2, v3, s8 ; D2820000 00220702 v_mad_f32 v0, v1, v0, s9 ; D2820000 00260101 v_subrev_f32_e32 v1, s10, v0 ; 0A02000A v_cmp_lt_f32_e64 s[8:9], v1, 0 ; D0020008 00010101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[8:9] ; BE8C2408 s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s11, v0 ; 0800000B v_cmp_lt_f32_e64 s[10:11], v0, 0 ; D002000A 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[16:23], s[12:15] ; F0800F00 00640008 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_12 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s24, s[0:3], 0x10 ; C20C0110 s_buffer_load_dword s25, s[0:3], 0xd ; C20C810D s_buffer_load_dword s26, s[0:3], 0xc ; C20D010C s_buffer_load_dword s27, s[0:3], 0x8 ; C20D8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v9 ; 10001219 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v9, -v0 ; D2820000 84021219 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v2, s25 ; 7E045419 v_mul_f32_e32 v11, v2, v1 ; 10160302 v_mul_f32_e32 v1, s26, v8 ; 1002101A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v8, -v1 ; D2820001 8406101A v_add_f32_e32 v2, 0.5, v1 ; 060402F0 v_rcp_f32_e32 v3, s26 ; 7E06541A v_mul_f32_e32 v10, v3, v2 ; 10140503 v_mov_b32_e32 v12, 0 ; 7E180280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[2:3], 9, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[32:39], s[28:31] ; F0900900 00E8020A v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v3, v14, 0.5 ; D282000E 03C21D03 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v2 ; 101E04FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v2, s27, v2, 0.5 ; D2820002 03C2041B v_fract_f32_e32 v3, v2 ; 7E064102 v_subrev_f32_e32 v2, v3, v2 ; 0A040503 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mov_b32_e32 v3, 0x44800000 ; 7E0602FF 44800000 v_subrev_f32_e32 v3, s27, v3 ; 0A06061B v_cmp_ge_f32_e64 s[28:29], v3, 0 ; D00C001C 00010103 v_cndmask_b32_e64 v3, 0, -1, s[28:29] ; D2000803 00718280 v_cmp_ne_i32_e64 s[28:29], v3, 0 ; D10A001C 00010103 v_cndmask_b32_e64 v2, v14, v2, s[28:29] ; D2000002 1072050E v_rcp_f32_e32 v3, s8 ; 7E065408 v_mad_f32 v3, s24, v3, 0.5 ; D2820003 03C20618 v_fract_f32_e32 v14, v3 ; 7E1C4103 v_subrev_f32_e32 v3, v14, v3 ; 0A06070E v_mul_f32_e32 v14, v2, v3 ; 101C0702 v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v3 ; 3A1C06FF 80000000 v_cndmask_b32_e64 v14, v14, v3, s[28:29] ; D200000E 1072070E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v2, v15 ; 101E1F02 v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v8, -v1 ; D2820001 8406101A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v1, v1, s8, -v14 ; D2820001 84381101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_mul_f32_e32 v10, v14, v1 ; 1014030E v_rcp_f32_e32 v1, v3 ; 7E025503 v_mul_f32_e32 v3, v2, v1 ; 10060302 v_fract_f32_e32 v3, v3 ; 7E064103 v_mad_f32 v1, v1, v2, -v3 ; D2820001 840E0501 v_mad_f32 v0, s25, v9, -v0 ; D2820000 84021219 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v11, v1, v0 ; 10160101 image_sample_l v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900F00 0064000A s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v4, v4, v3 ; 10080704 v_mul_f32_e32 v5, v5, v2 ; 100A0505 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_mul_f32_e32 v5, v6, v1 ; 100A0306 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[8] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..8], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, 1024.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[8].xxxx, CONST[8].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 8: UIF TEMP[5].xxxx :2 9: KILL 10: ENDIF 11: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 12: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 13: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 14: OR TEMP[5].x, TEMP[5].xxxx, TEMP[4].yyyy 15: UIF TEMP[5].xxxx :2 16: KILL 17: ENDIF 18: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 19: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 20: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 21: OR TEMP[4].x, TEMP[4].xxxx, TEMP[2].yyyy 22: UIF TEMP[4].xxxx :2 23: KILL 24: ENDIF 25: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 26: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 27: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 28: OR TEMP[2].x, TEMP[2].xxxx, TEMP[1].yyyy 29: UIF TEMP[2].xxxx :2 30: KILL 31: ENDIF 32: MOV TEMP[3].xz, IMM[0].yyzy 33: FSLT TEMP[1].x, IMM[0].yyyy, CONST[1].xxxx 34: UIF TEMP[1].xxxx :2 35: MUL TEMP[1].yw, CONST[3].xxzy, IN[2].xxzy 36: MOV TEMP[3].yw, TEMP[1].wyww 37: FRC TEMP[1].xy, TEMP[3].ywzw 38: MOV TEMP[1].xy, TEMP[1].xyxx 39: ADD TEMP[2].yw, TEMP[3], -TEMP[1].xxzy 40: MOV TEMP[3].yw, TEMP[2].wyww 41: MAD TEMP[2].xy, IN[2], CONST[3], -TEMP[3].ywzw 42: MOV TEMP[1].xy, TEMP[2].xyxx 43: ADD TEMP[2].yw, TEMP[3], IMM[0].yyyy 44: MOV TEMP[3].yw, TEMP[2].wyww 45: RCP TEMP[2].x, CONST[3].xxxx 46: RCP TEMP[4].x, CONST[3].yyyy 47: MOV TEMP[2].y, TEMP[4].xxxx 48: MUL TEMP[4].xy, TEMP[3].ywzw, TEMP[2] 49: MOV TEMP[4].xy, TEMP[4].xyyy 50: MOV TEMP[4].w, IMM[0].xxxx 51: TXL TEMP[4], TEMP[4], SAMP[0], 2D 52: MOV TEMP[2].zw, TEMP[4] 53: ADD TEMP[5].y, IMM[0].zzzz, -CONST[2].xxxx 54: MOV TEMP[3].y, TEMP[5].yyyy 55: MUL TEMP[6].z, TEMP[4].xxxx, IMM[0].wwww 56: MAD TEMP[7].w, TEMP[4].wwww, IMM[0].wwww, IMM[0].yyyy 57: FRC TEMP[8].z, TEMP[7].wwww 58: ADD TEMP[7].w, TEMP[7].wwww, -TEMP[8].zzzz 59: MAD TEMP[6].z, TEMP[7].wwww, IMM[1].xxxx, TEMP[6].zzzz 60: ADD TEMP[6].z, TEMP[6].zzzz, IMM[0].yyyy 61: FRC TEMP[7].w, TEMP[6].zzzz 62: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].zzzz 63: MOV TEMP[3].z, TEMP[6].zzzz 64: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].yyyy 65: FRC TEMP[6].z, TEMP[4].wwww 66: MOV TEMP[1].z, TEMP[6].zzzz 67: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 68: MOV TEMP[3].w, TEMP[4].wwww 69: ADD TEMP[4].zw, TEMP[3], IMM[0].yyyy 70: FSGE TEMP[5].x, TEMP[5].yyyy, IMM[0].xxxx 71: UIF TEMP[5].xxxx :2 72: MOV TEMP[5].x, TEMP[4].wwww 73: ELSE :2 74: MOV TEMP[5].x, TEMP[4].zzzz 75: ENDIF 76: MOV TEMP[3].y, TEMP[5].xxxx 77: RCP TEMP[4].x, CONST[1].xxxx 78: MAD TEMP[4].x, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].yyyy 79: FRC TEMP[6].z, TEMP[4].xxxx 80: ADD TEMP[4].x, -TEMP[6].zzzz, TEMP[4].xxxx 81: MUL TEMP[6].z, TEMP[4].xxxx, TEMP[5].xxxx 82: MOV TEMP[7].x, -TEMP[4].xxxx 83: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].xxxx 84: UIF TEMP[6].xxxx :2 85: MOV TEMP[6].x, TEMP[4].xxxx 86: ELSE :2 87: MOV TEMP[6].x, TEMP[7].xxxx 88: ENDIF 89: MOV TEMP[3].z, TEMP[6].xxxx 90: RCP TEMP[6].x, TEMP[6].xxxx 91: MUL TEMP[5].w, TEMP[6].xxxx, TEMP[5].xxxx 92: FRC TEMP[5].w, TEMP[5].wwww 93: MOV TEMP[3].w, TEMP[5].wwww 94: RCP TEMP[3].x, TEMP[4].xxxx 95: MUL TEMP[4].xy, TEMP[3].wxzw, TEMP[3].zyzw 96: MOV TEMP[2].xy, TEMP[4].xyxx 97: FRC TEMP[4].xy, TEMP[2] 98: MOV TEMP[3].xy, TEMP[4].xyxx 99: ADD TEMP[2].xy, -TEMP[3], TEMP[2] 100: MOV TEMP[3].xy, TEMP[2].xyxx 101: ADD TEMP[2].xy, TEMP[1], TEMP[3] 102: MOV TEMP[3].xy, TEMP[2].xyxx 103: MUL TEMP[2].xy, TEMP[3], CONST[1].xxxx 104: MOV TEMP[3].xy, TEMP[2].xyxx 105: FRC TEMP[2].zw, TEMP[2].xyxy 106: MOV TEMP[3].zw, TEMP[2].wwzw 107: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[3] 108: MOV TEMP[3].xy, TEMP[2].xyxx 109: ADD TEMP[2].xy, TEMP[3], IMM[0].yyyy 110: MOV TEMP[3].xy, TEMP[2].xyxx 111: RCP TEMP[1].x, CONST[4].xxxx 112: RCP TEMP[2].x, CONST[4].yyyy 113: MOV TEMP[1].y, TEMP[2].xxxx 114: MUL TEMP[1].xy, TEMP[3], TEMP[1] 115: MOV TEMP[1].xy, TEMP[1].xyyy 116: MOV TEMP[1].w, IMM[0].xxxx 117: TXL TEMP[1], TEMP[1], SAMP[1], 2D 118: MOV TEMP[3], TEMP[1] 119: ELSE :2 120: MOV TEMP[1].xy, IN[2].xyyy 121: TEX TEMP[1], TEMP[1], SAMP[1], 2D 122: MOV TEMP[3], TEMP[1] 123: ENDIF 124: MUL TEMP[1], TEMP[3], IN[1] 125: MOV OUT[0], TEMP[1] 126: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %40 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %54 = fmul float %15, %38 %55 = fadd float %54, %39 %56 = fmul float %14, %34 %57 = fadd float %56, %36 %58 = fmul float %55, %35 %59 = fadd float %58, %37 %60 = fsub float -0.000000e+00, %24 %61 = fsub float -0.000000e+00, %26 %62 = fadd float %60, %57 %63 = fadd float %60, %57 %64 = fadd float %60, %57 %65 = fcmp olt float %62, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = fcmp olt float %63, 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %64, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = bitcast i32 %66 to float %72 = bitcast i32 %68 to float %73 = bitcast i32 %70 to float %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = bitcast float %72 to i32 %80 = or i32 %78, %79 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 br i1 %83, label %IF, label %ENDIF IF: ; preds = %main_body call void @llvm.AMDGPU.kilp() br label %ENDIF ENDIF: ; preds = %main_body, %IF %84 = fsub float -0.000000e+00, %57 %85 = fadd float %25, %84 %86 = fsub float -0.000000e+00, %57 %87 = fadd float %25, %86 %88 = fsub float -0.000000e+00, %57 %89 = fadd float %25, %88 %90 = fcmp olt float %85, 0.000000e+00 %91 = sext i1 %90 to i32 %92 = fcmp olt float %87, 0.000000e+00 %93 = sext i1 %92 to i32 %94 = fcmp olt float %89, 0.000000e+00 %95 = sext i1 %94 to i32 %96 = bitcast i32 %91 to float %97 = bitcast i32 %93 to float %98 = bitcast i32 %95 to float %99 = bitcast float %96 to i32 %100 = bitcast float %98 to i32 %101 = or i32 %99, %100 %102 = bitcast i32 %101 to float %103 = bitcast float %102 to i32 %104 = bitcast float %97 to i32 %105 = or i32 %103, %104 %106 = bitcast i32 %105 to float %107 = bitcast float %106 to i32 %108 = icmp ne i32 %107, 0 br i1 %108, label %IF37, label %ENDIF36 IF37: ; preds = %ENDIF call void @llvm.AMDGPU.kilp() br label %ENDIF36 ENDIF36: ; preds = %ENDIF, %IF37 %109 = fadd float %61, %59 %110 = fadd float %61, %59 %111 = fadd float %61, %59 %112 = fcmp olt float %109, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = fcmp olt float %110, 0.000000e+00 %115 = sext i1 %114 to i32 %116 = fcmp olt float %111, 0.000000e+00 %117 = sext i1 %116 to i32 %118 = bitcast i32 %113 to float %119 = bitcast i32 %115 to float %120 = bitcast i32 %117 to float %121 = bitcast float %118 to i32 %122 = bitcast float %120 to i32 %123 = or i32 %121, %122 %124 = bitcast i32 %123 to float %125 = bitcast float %124 to i32 %126 = bitcast float %119 to i32 %127 = or i32 %125, %126 %128 = bitcast i32 %127 to float %129 = bitcast float %128 to i32 %130 = icmp ne i32 %129, 0 br i1 %130, label %IF40, label %ENDIF39 IF40: ; preds = %ENDIF36 call void @llvm.AMDGPU.kilp() br label %ENDIF39 ENDIF39: ; preds = %ENDIF36, %IF40 %131 = fsub float -0.000000e+00, %59 %132 = fadd float %27, %131 %133 = fsub float -0.000000e+00, %59 %134 = fadd float %27, %133 %135 = fsub float -0.000000e+00, %59 %136 = fadd float %27, %135 %137 = fcmp olt float %132, 0.000000e+00 %138 = sext i1 %137 to i32 %139 = fcmp olt float %134, 0.000000e+00 %140 = sext i1 %139 to i32 %141 = fcmp olt float %136, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %138 to float %144 = bitcast i32 %140 to float %145 = bitcast i32 %142 to float %146 = bitcast float %143 to i32 %147 = bitcast float %145 to i32 %148 = or i32 %146, %147 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = bitcast float %144 to i32 %152 = or i32 %150, %151 %153 = bitcast i32 %152 to float %154 = bitcast float %153 to i32 %155 = icmp ne i32 %154, 0 br i1 %155, label %IF43, label %ENDIF42 IF43: ; preds = %ENDIF39 call void @llvm.AMDGPU.kilp() br label %ENDIF42 ENDIF42: ; preds = %ENDIF39, %IF43 %156 = fcmp olt float 5.000000e-01, %28 %157 = sext i1 %156 to i32 %158 = bitcast i32 %157 to float %159 = bitcast float %158 to i32 %160 = icmp ne i32 %159, 0 br i1 %160, label %IF46, label %ELSE47 IF46: ; preds = %ENDIF42 %161 = fmul float %30, %52 %162 = fmul float %31, %53 %163 = call float @llvm.AMDIL.fraction.(float %161) %164 = call float @llvm.AMDIL.fraction.(float %162) %165 = fsub float -0.000000e+00, %163 %166 = fadd float %161, %165 %167 = fsub float -0.000000e+00, %164 %168 = fadd float %162, %167 %169 = fsub float -0.000000e+00, %166 %170 = fmul float %52, %30 %171 = fadd float %170, %169 %172 = fsub float -0.000000e+00, %168 %173 = fmul float %53, %31 %174 = fadd float %173, %172 %175 = fadd float %166, 5.000000e-01 %176 = fadd float %168, 5.000000e-01 %177 = fdiv float 1.000000e+00, %30 %178 = fdiv float 1.000000e+00, %31 %179 = fmul float %175, %177 %180 = fmul float %176, %178 %181 = bitcast float %179 to i32 %182 = bitcast float %180 to i32 %183 = bitcast float 0.000000e+00 to i32 %184 = insertelement <4 x i32> undef, i32 %181, i32 0 %185 = insertelement <4 x i32> %184, i32 %182, i32 1 %186 = insertelement <4 x i32> %185, i32 %183, i32 2 %187 = insertelement <4 x i32> %186, i32 undef, i32 3 %188 = bitcast <8 x i32> %41 to <32 x i8> %189 = bitcast <4 x i32> %43 to <16 x i8> %190 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %187, <32 x i8> %188, <16 x i8> %189, i32 2) %191 = extractelement <4 x float> %190, i32 0 %192 = extractelement <4 x float> %190, i32 3 %193 = fsub float -0.000000e+00, %29 %194 = fadd float 1.024000e+03, %193 %195 = fmul float %191, 2.550000e+02 %196 = fmul float %192, 2.550000e+02 %197 = fadd float %196, 5.000000e-01 %198 = call float @llvm.AMDIL.fraction.(float %197) %199 = fsub float -0.000000e+00, %198 %200 = fadd float %197, %199 %201 = fmul float %200, 2.560000e+02 %202 = fadd float %201, %195 %203 = fadd float %202, 5.000000e-01 %204 = call float @llvm.AMDIL.fraction.(float %203) %205 = fsub float -0.000000e+00, %204 %206 = fadd float %205, %203 %207 = fmul float %29, %191 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %208, %210 %212 = fadd float %206, 5.000000e-01 %213 = fadd float %211, 5.000000e-01 %214 = fcmp oge float %194, 0.000000e+00 %215 = sext i1 %214 to i32 %216 = bitcast i32 %215 to float %217 = bitcast float %216 to i32 %218 = icmp ne i32 %217, 0 %. = select i1 %218, float %213, float %212 %219 = fdiv float 1.000000e+00, %28 %220 = fmul float %32, %219 %221 = fadd float %220, 5.000000e-01 %222 = call float @llvm.AMDIL.fraction.(float %221) %223 = fsub float -0.000000e+00, %222 %224 = fadd float %223, %221 %225 = fmul float %224, %. %226 = fsub float -0.000000e+00, %224 %227 = fcmp oge float %225, 0.000000e+00 %228 = sext i1 %227 to i32 %229 = bitcast i32 %228 to float %230 = bitcast float %229 to i32 %231 = icmp ne i32 %230, 0 %temp24.0 = select i1 %231, float %224, float %226 %232 = fdiv float 1.000000e+00, %temp24.0 %233 = fmul float %232, %. %234 = call float @llvm.AMDIL.fraction.(float %233) %235 = fdiv float 1.000000e+00, %224 %236 = fmul float %234, %temp24.0 %237 = fmul float %235, %. %238 = call float @llvm.AMDIL.fraction.(float %236) %239 = call float @llvm.AMDIL.fraction.(float %237) %240 = fsub float -0.000000e+00, %238 %241 = fadd float %240, %236 %242 = fsub float -0.000000e+00, %239 %243 = fadd float %242, %237 %244 = fadd float %171, %241 %245 = fadd float %174, %243 %246 = fmul float %244, %28 %247 = fmul float %245, %28 %248 = call float @llvm.AMDIL.fraction.(float %246) %249 = call float @llvm.AMDIL.fraction.(float %247) %250 = fsub float -0.000000e+00, %248 %251 = fadd float %250, %246 %252 = fsub float -0.000000e+00, %249 %253 = fadd float %252, %247 %254 = fadd float %251, 5.000000e-01 %255 = fadd float %253, 5.000000e-01 %256 = fdiv float 1.000000e+00, %32 %257 = fdiv float 1.000000e+00, %33 %258 = fmul float %254, %256 %259 = fmul float %255, %257 %260 = bitcast float %258 to i32 %261 = bitcast float %259 to i32 %262 = bitcast float 0.000000e+00 to i32 %263 = insertelement <4 x i32> undef, i32 %260, i32 0 %264 = insertelement <4 x i32> %263, i32 %261, i32 1 %265 = insertelement <4 x i32> %264, i32 %262, i32 2 %266 = insertelement <4 x i32> %265, i32 undef, i32 3 %267 = bitcast <8 x i32> %45 to <32 x i8> %268 = bitcast <4 x i32> %47 to <16 x i8> %269 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %266, <32 x i8> %267, <16 x i8> %268, i32 2) br label %ENDIF45 ELSE47: ; preds = %ENDIF42 %270 = bitcast float %52 to i32 %271 = bitcast float %53 to i32 %272 = insertelement <2 x i32> undef, i32 %270, i32 0 %273 = insertelement <2 x i32> %272, i32 %271, i32 1 %274 = bitcast <8 x i32> %45 to <32 x i8> %275 = bitcast <4 x i32> %47 to <16 x i8> %276 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %273, <32 x i8> %274, <16 x i8> %275, i32 2) br label %ENDIF45 ENDIF45: ; preds = %ELSE47, %IF46 %.sink = phi <4 x float> [ %276, %ELSE47 ], [ %269, %IF46 ] %277 = extractelement <4 x float> %.sink, i32 0 %278 = extractelement <4 x float> %.sink, i32 1 %279 = extractelement <4 x float> %.sink, i32 2 %280 = extractelement <4 x float> %.sink, i32 3 %281 = fmul float %277, %48 %282 = fmul float %278, %49 %283 = fmul float %279, %50 %284 = fmul float %280, %51 %285 = call i32 @llvm.SI.packf16(float %281, float %282) %286 = bitcast i32 %285 to float %287 = call i32 @llvm.SI.packf16(float %283, float %284) %288 = bitcast i32 %287 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %286, float %288, float %286, float %288) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kilp() ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v9, v0, 1, 1, [m0] ; C8240500 v_interp_p2_f32 v9, [v9], v1, 1, 1, [m0] ; C8250501 v_interp_p1_f32 v8, v0, 0, 1, [m0] ; C8200400 v_interp_p2_f32 v8, [v8], v1, 0, 1, [m0] ; C8210401 v_interp_p1_f32 v4, v0, 3, 0, [m0] ; C8100300 v_interp_p2_f32 v4, [v4], v1, 3, 0, [m0] ; C8110301 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 s_buffer_load_dword s9, s[0:3], 0x14 ; C2048114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v0, s9, v2, v0 ; D2820000 04020409 s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v1, s8, v0 ; 0A020008 v_cmp_lt_f32_e64 s[8:9], v1, 0 ; D0020008 00010101 s_buffer_load_dword s12, s[0:3], 0x20 ; C2060120 s_buffer_load_dword s13, s[0:3], 0x15 ; C2068115 s_buffer_load_dword s11, s[0:3], 0x1 ; C2058101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[14:15], s[8:9] ; BE8E2408 s_xor_b64 s[14:15], exec, s[14:15] ; 898E0E7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E s_buffer_load_dword s8, s[0:3], 0x21 ; C2040121 s_buffer_load_dword s9, s[0:3], 0x17 ; C2048117 s_buffer_load_dword s10, s[0:3], 0x2 ; C2050102 v_mov_b32_e32 v2, s12 ; 7E04020C v_mov_b32_e32 v1, s13 ; 7E02020D s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s11, v0 ; 0800000B v_cmp_lt_f32_e64 s[12:13], v0, 0 ; D002000C 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[12:13] ; BE8C240C s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s11, s[0:3], 0x3 ; C2058103 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_mad_f32 v0, v2, v3, s8 ; D2820000 00220702 v_mad_f32 v0, v1, v0, s9 ; D2820000 00260101 v_subrev_f32_e32 v1, s10, v0 ; 0A02000A v_cmp_lt_f32_e64 s[8:9], v1, 0 ; D0020008 00010101 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[12:13], s[8:9] ; BE8C2408 s_xor_b64 s[12:13], exec, s[12:13] ; 898C0C7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[12:13] ; 88FE0C7E s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_sub_f32_e32 v0, s11, v0 ; 0800000B v_cmp_lt_f32_e64 s[10:11], v0, 0 ; D002000A 00010100 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E s_mov_b64 exec, 0 ; BEFE0480 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[16:23], s[12:15] ; F0800F00 00640008 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_12 ; BF880000 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s24, s[0:3], 0x10 ; C20C0110 s_buffer_load_dword s25, s[0:3], 0xd ; C20C810D s_buffer_load_dword s26, s[0:3], 0xc ; C20D010C s_buffer_load_dword s27, s[0:3], 0x8 ; C20D8108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v9 ; 10001219 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v9, -v0 ; D2820000 84021219 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v2, s25 ; 7E045419 v_mul_f32_e32 v11, v2, v1 ; 10160302 v_mul_f32_e32 v1, s26, v8 ; 1002101A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v8, -v1 ; D2820001 8406101A v_add_f32_e32 v2, 0.5, v1 ; 060402F0 v_rcp_f32_e32 v3, s26 ; 7E06541A v_mul_f32_e32 v10, v3, v2 ; 10140503 v_mov_b32_e32 v12, 0 ; 7E180280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[2:3], 9, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[32:39], s[28:31] ; F0900900 00E8020A v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v3, v14, 0.5 ; D282000E 03C21D03 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v2 ; 101E04FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v2, s27, v2, 0.5 ; D2820002 03C2041B v_fract_f32_e32 v3, v2 ; 7E064102 v_subrev_f32_e32 v2, v3, v2 ; 0A040503 v_add_f32_e32 v2, 0.5, v2 ; 060404F0 v_mov_b32_e32 v3, 0x44800000 ; 7E0602FF 44800000 v_subrev_f32_e32 v3, s27, v3 ; 0A06061B v_cmp_ge_f32_e64 s[28:29], v3, 0 ; D00C001C 00010103 v_cndmask_b32_e64 v3, 0, -1, s[28:29] ; D2000803 00718280 v_cmp_ne_i32_e64 s[28:29], v3, 0 ; D10A001C 00010103 v_cndmask_b32_e64 v2, v14, v2, s[28:29] ; D2000002 1072050E v_rcp_f32_e32 v3, s8 ; 7E065408 v_mad_f32 v3, s24, v3, 0.5 ; D2820003 03C20618 v_fract_f32_e32 v14, v3 ; 7E1C4103 v_subrev_f32_e32 v3, v14, v3 ; 0A06070E v_mul_f32_e32 v14, v2, v3 ; 101C0702 v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v3 ; 3A1C06FF 80000000 v_cndmask_b32_e64 v14, v14, v3, s[28:29] ; D200000E 1072070E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v2, v15 ; 101E1F02 v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v8, -v1 ; D2820001 8406101A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v1, v1, s8, -v14 ; D2820001 84381101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_mul_f32_e32 v10, v14, v1 ; 1014030E v_rcp_f32_e32 v1, v3 ; 7E025503 v_mul_f32_e32 v3, v2, v1 ; 10060302 v_fract_f32_e32 v3, v3 ; 7E064103 v_mad_f32 v1, v1, v2, -v3 ; D2820001 840E0501 v_mad_f32 v0, s25, v9, -v0 ; D2820000 84021219 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v11, v1, v0 ; 10160101 image_sample_l v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[16:23], s[12:15] ; F0900F00 0064000A s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v4, v4, v3 ; 10080704 v_mul_f32_e32 v5, v5, v2 ; 100A0505 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_mul_f32_e32 v5, v6, v1 ; 100A0306 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].xxxx 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: UARL ADDR[0].x, TEMP[1].xxxx 21: MAD TEMP[0], IN[0].wwww, CONST[ADDR[0].x], TEMP[0] 22: MOV TEMP[1].xyz, IN[2].xxxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0 %28 = add i32 %5, %7 %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %28) %30 = extractelement <4 x float> %29, i32 0 %31 = call float @llvm.AMDIL.fraction.(float %25) %32 = fsub float -0.000000e+00, %31 %33 = fadd float %32, %25 %34 = fmul float %33, 4.000000e+00 %35 = fptosi float %34 to i32 %36 = bitcast i32 %35 to float %37 = bitcast float %36 to i32 %38 = add i32 1, %37 %39 = bitcast i32 %38 to float %40 = bitcast float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %18, %42 %44 = shl i32 %40, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = fmul float %18, %46 %48 = shl i32 %40, 4 %49 = add i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %18, %50 %52 = shl i32 %40, 4 %53 = add i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = fmul float %18, %54 %56 = fptosi float %34 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %43 %63 = shl i32 %58, 4 %64 = add i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %17, %65 %67 = fadd float %66, %47 %68 = shl i32 %58, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %17, %70 %72 = fadd float %71, %51 %73 = shl i32 %58, 4 %74 = add i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %17, %75 %77 = fadd float %76, %55 %78 = fptosi float %34 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = add i32 2, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = shl i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %19, %85 %87 = fadd float %86, %62 %88 = shl i32 %83, 4 %89 = add i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fmul float %19, %90 %92 = fadd float %91, %67 %93 = shl i32 %83, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = fmul float %19, %95 %97 = fadd float %96, %72 %98 = shl i32 %83, 4 %99 = add i32 %98, 12 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = fmul float %19, %100 %102 = fadd float %101, %77 %103 = fptosi float %34 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = add i32 3, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %20, %110 %112 = fadd float %111, %87 %113 = shl i32 %108, 4 %114 = add i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %20, %115 %117 = fadd float %116, %92 %118 = shl i32 %108, 4 %119 = add i32 %118, 8 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %97 %123 = shl i32 %108, 4 %124 = add i32 %123, 12 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %20, %125 %127 = fadd float %126, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %30, float %30, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %117, float %122, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v1, v1, v5 ; F800020F 05010101 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v1 ; 7E0A4101 v_subrev_f32_e32 v1, v5, v1 ; 0A020305 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v1, s[0:3], 0 offen ; E0301000 80000201 v_add_i32_e32 v3, 16, v1 ; 4A060290 buffer_load_dword v4, v3, s[0:3], 0 offen ; E0301000 80000403 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v6 ; 10000D04 v_mad_f32 v0, v5, v2, v0 ; D2820000 04020505 v_add_i32_e32 v2, 32, v1 ; 4A0402A0 buffer_load_dword v4, v2, s[0:3], 0 offen ; E0301000 80000402 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v7, v4, v0 ; D2820000 04020907 v_add_i32_e32 v4, 48, v1 ; 4A0802B0 buffer_load_dword v9, v4, s[0:3], 0 offen ; E0301000 80000904 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v9, v0 ; D2820000 04021308 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v6 ; 10140D0A v_mad_f32 v9, v5, v9, v10 ; D2820009 042A1305 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v7, v10, v9 ; D2820009 04261507 v_or_b32_e32 v10, 12, v4 ; 3814088C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v8, v10, v9 ; D2820009 04261508 v_or_b32_e32 v10, 8, v1 ; 38140288 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v11, 8, v3 ; 38160688 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v6 ; 10160D0B v_mad_f32 v10, v5, v10, v11 ; D282000A 042E1505 v_or_b32_e32 v11, 8, v2 ; 38160488 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v7, v11, v10 ; D282000A 042A1707 v_or_b32_e32 v11, 8, v4 ; 38160888 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v8, v11, v10 ; D282000A 042A1708 v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v3, 4, v3 ; 38060684 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v1, v5, v1, v3 ; D2820001 040E0305 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 v_or_b32_e32 v2, 4, v4 ; 38040884 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v8, v2, v1 ; D2820001 04060508 exp 15, 12, 0, 1, 0, v0, v1, v10, v9 ; F80008CF 090A0100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xxxx, IMM[0].xyxx, IMM[0].xxxy 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fmul float %22, 0.000000e+00 %24 = fadd float %23, 0.000000e+00 %25 = fmul float %22, 1.000000e+00 %26 = fadd float %25, 0.000000e+00 %27 = fmul float %22, 0.000000e+00 %28 = fadd float %27, 0.000000e+00 %29 = fmul float %22, 0.000000e+00 %30 = fadd float %29, 1.000000e+00 %31 = call i32 @llvm.SI.packf16(float %24, float %26) %32 = bitcast i32 %31 to float %33 = call i32 @llvm.SI.packf16(float %28, float %30) %34 = bitcast i32 %33 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %32, float %34, float %32, float %34) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_mad_f32 v0, 0, v2, 1.0 ; D2820000 03CA0480 v_mad_f32 v1, 0, v2, 0 ; D2820001 02020480 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_add_f32_e32 v2, 0, v2 ; 06040480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000