[S_API FAIL] SteamAPI_Init() failed; SteamAPI_IsSteamRunning() failed. [S_API FAIL] SteamAPI_Init() failed; unable to locate a running instance of Steam, or a local steamclient.so. GUID Assets\dlc\dlc_01\mongol.civ5pkg 7a036b7fb9a80e8dea7b73fb58c5a288 GUID Assets\dlc\dlc_02\spaininca.civ5pkg 4f75e72761f4c6019b55a0a7b90444a2 GUID Assets\dlc\dlc_03\polynesia.civ5pkg 99ac9d5f6ca4b5bed0ab89c0fd3b9e6d GUID Assets\dlc\dlc_04\denmark.civ5pkg 0efb155307bd6d14c9290b49b5364a3e GUID Assets\dlc\dlc_05\korea.civ5pkg 9f4df81cf712ae9480737f816bf6f4c8 GUID Assets\dlc\dlc_06\ancientwonders.civ5pkg 92b102db9a3c7dc068030c3ce33bbb48 GUID Assets\dlc\dlc_07\civcomplete.civ5pkg eb01a0be4d8e5312f53b042c8a7c30b5 GUID Assets\dlc\dlc_deluxe\babylon.civ5pkg 712495341921f2b288746c6d44fd6867 GUID Assets\dlc\dlc_sp_maps\dlc_sp_maps.civ5pkg 52b285c37939913e0a5b72933bb06067 GUID Assets\dlc\dlc_sp_maps_2\dlc_sp_maps_2.civ5pkg 16a61e7a2a7bb4bc2d1f677b5bb58ff4 GUID Assets\dlc\dlc_sp_maps_3\dlc_sp_maps_3.civ5pkg 1954db58e0a60b018969c49440fa01ef GUID Assets\dlc\expansion\expansion1.civ5pkg 8bc30c58378345cb0911c5848926f1ff GUID Assets\dlc\expansion2\expansion2.civ5pkg 31dfaa9838c5b051d4c2112ddd9e7eb3 GUID Assets\dlc\shared\upgrade1.civ5pkg e818fa28902977b42ee5e3426f5112e6 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[0] 0: MOV OUT[0], IN[0] 1: MOV OUT[1], IN[1] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = extractelement <4 x float> %14, i32 3 %19 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %20 = load <16 x i8> addrspace(2)* %19, !tbaa !0 %21 = add i32 %5, %7 %22 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %20, i32 0, i32 %21) %23 = extractelement <4 x float> %22, i32 0 %24 = extractelement <4 x float> %22, i32 1 %25 = extractelement <4 x float> %22, i32 2 %26 = extractelement <4 x float> %22, i32 3 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %23, float %24, float %25, float %26) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float %17, float %18) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 12, 0, 1, 0, v0, v1, v2, v3 ; F80008CF 03020100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].zw, IMM[0].yyxy 1: MOV TEMP[0].x, IN[0].xxxx 2: MOV TEMP[0].y, IN[0].yyyy 3: MOV TEMP[1].xy, IN[1].xyxx 4: MOV OUT[1], TEMP[1] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %15, float %16, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v5, v5 ; F800020F 05050201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 12, 0, 1, 0, v0, v1, v5, v4 ; F80008CF 04050100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = call i32 @llvm.SI.packf16(float %35, float %36) %40 = bitcast i32 %39 to float %41 = call i32 @llvm.SI.packf16(float %37, float %38) %42 = bitcast i32 %41 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %40, float %42, float %40, float %42) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x0 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[0], LINEAR DCL OUT[0], COLOR DCL SAMP[0] 0: TEX OUT[0], IN[0], SAMP[0], 2D 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 0, float %35, float %36, float %37, float %38) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 0, 0, 1, 1, v0, v1, v2, v3 ; F800180F 03020100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: MOV TEMP[1].w, IMM[0].zzzz 85: TXL TEMP[1], TEMP[1], SAMP[1], 2D 86: MOV TEMP[0], TEMP[1] 87: ELSE :0 88: MOV TEMP[1].xy, IN[1].xyyy 89: TEX TEMP[1], TEMP[1], SAMP[1], 2D 90: MOV TEMP[0], TEMP[1] 91: ENDIF 92: MUL TEMP[0], TEMP[0], IN[0] 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = bitcast float 0.000000e+00 to i32 %151 = insertelement <4 x i32> undef, i32 %148, i32 0 %152 = insertelement <4 x i32> %151, i32 %149, i32 1 %153 = insertelement <4 x i32> %152, i32 %150, i32 2 %154 = insertelement <4 x i32> %153, i32 undef, i32 3 %155 = bitcast <8 x i32> %35 to <32 x i8> %156 = bitcast <4 x i32> %37 to <16 x i8> %157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %154, <32 x i8> %155, <16 x i8> %156, i32 2) br label %ENDIF ELSE: ; preds = %main_body %158 = bitcast float %42 to i32 %159 = bitcast float %43 to i32 %160 = insertelement <2 x i32> undef, i32 %158, i32 0 %161 = insertelement <2 x i32> %160, i32 %159, i32 1 %162 = bitcast <8 x i32> %35 to <32 x i8> %163 = bitcast <4 x i32> %37 to <16 x i8> %164 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %161, <32 x i8> %162, <16 x i8> %163, i32 2) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %164, %ELSE ], [ %157, %IF ] %165 = extractelement <4 x float> %.sink, i32 0 %166 = extractelement <4 x float> %.sink, i32 1 %167 = extractelement <4 x float> %.sink, i32 2 %168 = extractelement <4 x float> %.sink, i32 3 %169 = fmul float %165, %38 %170 = fmul float %166, %39 %171 = fmul float %167, %40 %172 = fmul float %168, %41 %173 = call i32 @llvm.SI.packf16(float %169, float %170) %174 = bitcast i32 %173 to float %175 = call i32 @llvm.SI.packf16(float %171, float %172) %176 = bitcast i32 %175 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %174, float %176, float %174, float %176) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640806 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s24, s[0:3], 0xc ; C20C010C s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109 s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108 s_buffer_load_dword s27, s[0:3], 0x4 ; C20D8104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v7 ; 10000E19 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v8, s25 ; 7E105419 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mul_f32_e32 v1, s26, v6 ; 10020C1A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v12, 0.5, v1 ; 061802F0 v_rcp_f32_e32 v13, s26 ; 7E1A541A v_mul_f32_e32 v8, v13, v12 ; 1010190D v_mov_b32_e32 v10, 0 ; 7E140280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[12:13], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[32:39], s[28:31] ; F0900900 00E80C08 v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v13, v14, 0.5 ; D282000E 03C21D0D v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v12 ; 101E18FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v12, s27, v12, 0.5 ; D282000C 03C2181B v_fract_f32_e32 v13, v12 ; 7E1A410C v_subrev_f32_e32 v12, v13, v12 ; 0A18190D v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_mov_b32_e32 v13, 0x44800000 ; 7E1A02FF 44800000 v_subrev_f32_e32 v13, s27, v13 ; 0A1A1A1B v_cmp_ge_f32_e64 s[28:29], v13, 0 ; D00C001C 0001010D v_cndmask_b32_e64 v13, 0, -1, s[28:29] ; D200080D 00718280 v_cmp_ne_i32_e64 s[28:29], v13, 0 ; D10A001C 0001010D v_cndmask_b32_e64 v12, v14, v12, s[28:29] ; D200000C 1072190E v_rcp_f32_e32 v13, s8 ; 7E1A5408 v_mad_f32 v13, s24, v13, 0.5 ; D282000D 03C21A18 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v12, v13 ; 101C1B0C v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[28:29] ; D200000E 10721B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v12, v15 ; 101E1F0C v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v1, v1, s8, -v14 ; D2820001 84381101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_mul_f32_e32 v8, v14, v1 ; 1010030E v_rcp_f32_e32 v1, v13 ; 7E02550D v_mul_f32_e32 v13, v12, v1 ; 101A030C v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v1, v1, v12, -v13 ; D2820001 84361901 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v9, v1, v0 ; 10120101 image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[12:15] ; F0900F00 00640808 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v0, v2, v11 ; 10001702 v_mul_f32_e32 v1, v3, v10 ; 10021503 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_mul_f32_e32 v1, v4, v9 ; 10021304 v_mul_f32_e32 v2, v5, v8 ; 10041105 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..8] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[6] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[5], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[8] 4: ADD TEMP[1].x, CONST[4].xxxx, CONST[4].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MAD TEMP[2].xy, IN[1], CONST[1], CONST[1].zwzw 7: MOV TEMP[2].xy, TEMP[2].xyxx 8: MUL TEMP[0], CONST[2], IN[2] 9: MUL TEMP[3].w, TEMP[0].wwww, CONST[3].wwww 10: MOV TEMP[3].w, TEMP[3].wwww 11: MOV TEMP[3].xyz, TEMP[0].xyzx 12: MOV TEMP[2].zw, IMM[0].yyxy 13: MOV OUT[1], TEMP[3] 14: MOV OUT[2], TEMP[2] 15: MOV OUT[0], TEMP[1] 16: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %40 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %41 = load <16 x i8> addrspace(2)* %40, !tbaa !0 %42 = add i32 %5, %7 %43 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %41, i32 0, i32 %42) %44 = extractelement <4 x float> %43, i32 0 %45 = extractelement <4 x float> %43, i32 1 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = extractelement <4 x float> %55, i32 3 %60 = fmul float %44, %13 %61 = fadd float %60, %15 %62 = fmul float %45, %14 %63 = fadd float %62, %16 %64 = fmul float %63, %32 %65 = fmul float %63, %33 %66 = fmul float %63, %34 %67 = fmul float %63, %35 %68 = fmul float %61, %28 %69 = fadd float %68, %64 %70 = fmul float %61, %29 %71 = fadd float %70, %65 %72 = fmul float %61, %30 %73 = fadd float %72, %66 %74 = fmul float %61, %31 %75 = fadd float %74, %67 %76 = fadd float %69, %36 %77 = fadd float %71, %37 %78 = fadd float %73, %38 %79 = fadd float %75, %39 %80 = fadd float %26, %27 %81 = fmul float %76, %80 %82 = fmul float %77, %80 %83 = fmul float %78, %80 %84 = fmul float %79, %80 %85 = fmul float %50, %17 %86 = fadd float %85, %19 %87 = fmul float %51, %18 %88 = fadd float %87, %20 %89 = fmul float %21, %56 %90 = fmul float %22, %57 %91 = fmul float %23, %58 %92 = fmul float %24, %59 %93 = fmul float %92, %25 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %89, float %90, float %91, float %93) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %81, float %82, float %83, float %84) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0xa ; C204010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 33, 0, 0, 0, v1, v5, v3, v2 ; F800021F 02030501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x20 ; C2000120 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..3] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 255.0000} IMM[1] FLT32 { 256.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xz, IMM[0].xxyx 1: FSLT TEMP[1].x, IMM[0].xxxx, CONST[0].xxxx 2: UIF TEMP[1].xxxx :0 3: MUL TEMP[1].yw, CONST[2].xxzy, IN[1].xxzy 4: MOV TEMP[0].yw, TEMP[1].wyww 5: FRC TEMP[1].xy, TEMP[0].ywzw 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: ADD TEMP[2].yw, TEMP[0], -TEMP[1].xxzy 8: MOV TEMP[0].yw, TEMP[2].wyww 9: MAD TEMP[2].xy, IN[1], CONST[2], -TEMP[0].ywzw 10: MOV TEMP[1].xy, TEMP[2].xyxx 11: ADD TEMP[2].yw, TEMP[0], IMM[0].xxxx 12: MOV TEMP[0].yw, TEMP[2].wyww 13: RCP TEMP[2].x, CONST[2].xxxx 14: RCP TEMP[3].x, CONST[2].yyyy 15: MOV TEMP[2].y, TEMP[3].xxxx 16: MUL TEMP[3].xy, TEMP[0].ywzw, TEMP[2] 17: MOV TEMP[3].xy, TEMP[3].xyyy 18: MOV TEMP[3].w, IMM[0].zzzz 19: TXL TEMP[3], TEMP[3], SAMP[0], 2D 20: MOV TEMP[2].zw, TEMP[3] 21: ADD TEMP[4].y, IMM[0].yyyy, -CONST[1].xxxx 22: MOV TEMP[0].y, TEMP[4].yyyy 23: MUL TEMP[5].z, TEMP[3].xxxx, IMM[0].wwww 24: MAD TEMP[6].w, TEMP[3].wwww, IMM[0].wwww, IMM[0].xxxx 25: FRC TEMP[7].z, TEMP[6].wwww 26: ADD TEMP[6].w, TEMP[6].wwww, -TEMP[7].zzzz 27: MAD TEMP[5].z, TEMP[6].wwww, IMM[1].xxxx, TEMP[5].zzzz 28: ADD TEMP[5].z, TEMP[5].zzzz, IMM[0].xxxx 29: FRC TEMP[6].w, TEMP[5].zzzz 30: ADD TEMP[5].z, -TEMP[6].wwww, TEMP[5].zzzz 31: MOV TEMP[0].z, TEMP[5].zzzz 32: MAD TEMP[3].w, CONST[1].xxxx, TEMP[3].xxxx, IMM[0].xxxx 33: FRC TEMP[5].z, TEMP[3].wwww 34: MOV TEMP[1].z, TEMP[5].zzzz 35: ADD TEMP[3].w, TEMP[3].wwww, -TEMP[5].zzzz 36: MOV TEMP[0].w, TEMP[3].wwww 37: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 38: FSGE TEMP[4].x, TEMP[4].yyyy, IMM[0].zzzz 39: UIF TEMP[4].xxxx :0 40: MOV TEMP[4].x, TEMP[3].wwww 41: ELSE :0 42: MOV TEMP[4].x, TEMP[3].zzzz 43: ENDIF 44: MOV TEMP[0].y, TEMP[4].xxxx 45: RCP TEMP[3].x, CONST[0].xxxx 46: MAD TEMP[3].x, CONST[3].xxxx, TEMP[3].xxxx, IMM[0].xxxx 47: FRC TEMP[5].z, TEMP[3].xxxx 48: ADD TEMP[3].x, -TEMP[5].zzzz, TEMP[3].xxxx 49: MUL TEMP[5].z, TEMP[3].xxxx, TEMP[4].xxxx 50: MOV TEMP[6].x, -TEMP[3].xxxx 51: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 52: UIF TEMP[5].xxxx :0 53: MOV TEMP[5].x, TEMP[3].xxxx 54: ELSE :0 55: MOV TEMP[5].x, TEMP[6].xxxx 56: ENDIF 57: MOV TEMP[0].z, TEMP[5].xxxx 58: RCP TEMP[5].x, TEMP[5].xxxx 59: MUL TEMP[4].w, TEMP[5].xxxx, TEMP[4].xxxx 60: FRC TEMP[4].w, TEMP[4].wwww 61: MOV TEMP[0].w, TEMP[4].wwww 62: RCP TEMP[0].x, TEMP[3].xxxx 63: MUL TEMP[3].xy, TEMP[0].wxzw, TEMP[0].zyzw 64: MOV TEMP[2].xy, TEMP[3].xyxx 65: FRC TEMP[3].xy, TEMP[2] 66: MOV TEMP[0].xy, TEMP[3].xyxx 67: ADD TEMP[2].xy, -TEMP[0], TEMP[2] 68: MOV TEMP[0].xy, TEMP[2].xyxx 69: ADD TEMP[2].xy, TEMP[1], TEMP[0] 70: MOV TEMP[0].xy, TEMP[2].xyxx 71: MUL TEMP[2].xy, TEMP[0], CONST[0].xxxx 72: MOV TEMP[0].xy, TEMP[2].xyxx 73: FRC TEMP[2].zw, TEMP[2].xyxy 74: MOV TEMP[0].zw, TEMP[2].wwzw 75: ADD TEMP[2].xy, -TEMP[2].zwzw, TEMP[0] 76: MOV TEMP[0].xy, TEMP[2].xyxx 77: ADD TEMP[2].xy, TEMP[0], IMM[0].xxxx 78: MOV TEMP[0].xy, TEMP[2].xyxx 79: RCP TEMP[1].x, CONST[3].xxxx 80: RCP TEMP[2].x, CONST[3].yyyy 81: MOV TEMP[1].y, TEMP[2].xxxx 82: MUL TEMP[1].xy, TEMP[0], TEMP[1] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: MOV TEMP[1].w, IMM[0].zzzz 85: TXL TEMP[1], TEMP[1], SAMP[1], 2D 86: MOV TEMP[0], TEMP[1] 87: ELSE :0 88: MOV TEMP[1].xy, IN[1].xyyy 89: TEX TEMP[1], TEMP[1], SAMP[1], 2D 90: MOV TEMP[0], TEMP[1] 91: ENDIF 92: MUL TEMP[0], TEMP[0], IN[0] 93: MOV OUT[0], TEMP[0] 94: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %39 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %44 = fcmp olt float 5.000000e-01, %24 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 br i1 %48, label %IF, label %ELSE IF: ; preds = %main_body %49 = fmul float %26, %42 %50 = fmul float %27, %43 %51 = call float @llvm.AMDIL.fraction.(float %49) %52 = call float @llvm.AMDIL.fraction.(float %50) %53 = fsub float -0.000000e+00, %51 %54 = fadd float %49, %53 %55 = fsub float -0.000000e+00, %52 %56 = fadd float %50, %55 %57 = fsub float -0.000000e+00, %54 %58 = fmul float %42, %26 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %43, %27 %62 = fadd float %61, %60 %63 = fadd float %54, 5.000000e-01 %64 = fadd float %56, 5.000000e-01 %65 = fdiv float 1.000000e+00, %26 %66 = fdiv float 1.000000e+00, %27 %67 = fmul float %63, %65 %68 = fmul float %64, %66 %69 = bitcast float %67 to i32 %70 = bitcast float %68 to i32 %71 = bitcast float 0.000000e+00 to i32 %72 = insertelement <4 x i32> undef, i32 %69, i32 0 %73 = insertelement <4 x i32> %72, i32 %70, i32 1 %74 = insertelement <4 x i32> %73, i32 %71, i32 2 %75 = insertelement <4 x i32> %74, i32 undef, i32 3 %76 = bitcast <8 x i32> %31 to <32 x i8> %77 = bitcast <4 x i32> %33 to <16 x i8> %78 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %75, <32 x i8> %76, <16 x i8> %77, i32 2) %79 = extractelement <4 x float> %78, i32 0 %80 = extractelement <4 x float> %78, i32 3 %81 = fsub float -0.000000e+00, %25 %82 = fadd float 1.024000e+03, %81 %83 = fmul float %79, 2.550000e+02 %84 = fmul float %80, 2.550000e+02 %85 = fadd float %84, 5.000000e-01 %86 = call float @llvm.AMDIL.fraction.(float %85) %87 = fsub float -0.000000e+00, %86 %88 = fadd float %85, %87 %89 = fmul float %88, 2.560000e+02 %90 = fadd float %89, %83 %91 = fadd float %90, 5.000000e-01 %92 = call float @llvm.AMDIL.fraction.(float %91) %93 = fsub float -0.000000e+00, %92 %94 = fadd float %93, %91 %95 = fmul float %25, %79 %96 = fadd float %95, 5.000000e-01 %97 = call float @llvm.AMDIL.fraction.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %96, %98 %100 = fadd float %94, 5.000000e-01 %101 = fadd float %99, 5.000000e-01 %102 = fcmp oge float %82, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float %101, float %100 %107 = fdiv float 1.000000e+00, %24 %108 = fmul float %28, %107 %109 = fadd float %108, 5.000000e-01 %110 = call float @llvm.AMDIL.fraction.(float %109) %111 = fsub float -0.000000e+00, %110 %112 = fadd float %111, %109 %113 = fmul float %112, %. %114 = fsub float -0.000000e+00, %112 %115 = fcmp oge float %113, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %116 to float %118 = bitcast float %117 to i32 %119 = icmp ne i32 %118, 0 %temp20.0 = select i1 %119, float %112, float %114 %120 = fdiv float 1.000000e+00, %temp20.0 %121 = fmul float %120, %. %122 = call float @llvm.AMDIL.fraction.(float %121) %123 = fdiv float 1.000000e+00, %112 %124 = fmul float %122, %temp20.0 %125 = fmul float %123, %. %126 = call float @llvm.AMDIL.fraction.(float %124) %127 = call float @llvm.AMDIL.fraction.(float %125) %128 = fsub float -0.000000e+00, %126 %129 = fadd float %128, %124 %130 = fsub float -0.000000e+00, %127 %131 = fadd float %130, %125 %132 = fadd float %59, %129 %133 = fadd float %62, %131 %134 = fmul float %132, %24 %135 = fmul float %133, %24 %136 = call float @llvm.AMDIL.fraction.(float %134) %137 = call float @llvm.AMDIL.fraction.(float %135) %138 = fsub float -0.000000e+00, %136 %139 = fadd float %138, %134 %140 = fsub float -0.000000e+00, %137 %141 = fadd float %140, %135 %142 = fadd float %139, 5.000000e-01 %143 = fadd float %141, 5.000000e-01 %144 = fdiv float 1.000000e+00, %28 %145 = fdiv float 1.000000e+00, %29 %146 = fmul float %142, %144 %147 = fmul float %143, %145 %148 = bitcast float %146 to i32 %149 = bitcast float %147 to i32 %150 = bitcast float 0.000000e+00 to i32 %151 = insertelement <4 x i32> undef, i32 %148, i32 0 %152 = insertelement <4 x i32> %151, i32 %149, i32 1 %153 = insertelement <4 x i32> %152, i32 %150, i32 2 %154 = insertelement <4 x i32> %153, i32 undef, i32 3 %155 = bitcast <8 x i32> %35 to <32 x i8> %156 = bitcast <4 x i32> %37 to <16 x i8> %157 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %154, <32 x i8> %155, <16 x i8> %156, i32 2) br label %ENDIF ELSE: ; preds = %main_body %158 = bitcast float %42 to i32 %159 = bitcast float %43 to i32 %160 = insertelement <2 x i32> undef, i32 %158, i32 0 %161 = insertelement <2 x i32> %160, i32 %159, i32 1 %162 = bitcast <8 x i32> %35 to <32 x i8> %163 = bitcast <4 x i32> %37 to <16 x i8> %164 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %161, <32 x i8> %162, <16 x i8> %163, i32 2) br label %ENDIF ENDIF: ; preds = %ELSE, %IF %.sink = phi <4 x float> [ %164, %ELSE ], [ %157, %IF ] %165 = extractelement <4 x float> %.sink, i32 0 %166 = extractelement <4 x float> %.sink, i32 1 %167 = extractelement <4 x float> %.sink, i32 2 %168 = extractelement <4 x float> %.sink, i32 3 %169 = fmul float %165, %38 %170 = fmul float %166, %39 %171 = fmul float %167, %40 %172 = fmul float %168, %41 %173 = call i32 @llvm.SI.packf16(float %169, float %170) %174 = bitcast i32 %173 to float %175 = call i32 @llvm.SI.packf16(float %171, float %172) %176 = bitcast i32 %175 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %174, float %176, float %174, float %176) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v7, v0, 1, 1, [m0] ; C81C0500 v_interp_p2_f32 v7, [v7], v1, 1, 1, [m0] ; C81D0501 v_interp_p1_f32 v6, v0, 0, 1, [m0] ; C8180400 v_interp_p2_f32 v6, [v6], v1, 0, 1, [m0] ; C8190401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[10:11], s8, 0.5 ; D008000A 0001E008 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_eq_i32_e64 s[10:11], v0, 0 ; D104000A 00010100 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x8 ; C0C80708 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[10:11], s[10:11] ; BE8A240A s_xor_b64 s[10:11], exec, s[10:11] ; 898A0A7E image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800F00 00640806 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[10:11], s[10:11] ; BE8A250A s_xor_b64 exec, exec, s[10:11] ; 89FE0A7E s_cbranch_execz BB0_4 ; BF880000 s_buffer_load_dword s9, s[0:3], 0xd ; C204810D s_buffer_load_dword s24, s[0:3], 0xc ; C20C010C s_buffer_load_dword s25, s[0:3], 0x9 ; C20C8109 s_buffer_load_dword s26, s[0:3], 0x8 ; C20D0108 s_buffer_load_dword s27, s[0:3], 0x4 ; C20D8104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s25, v7 ; 10000E19 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v1, 0.5, v0 ; 060200F0 v_rcp_f32_e32 v8, s25 ; 7E105419 v_mul_f32_e32 v9, v8, v1 ; 10120308 v_mul_f32_e32 v1, s26, v6 ; 10020C1A v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v12, 0.5, v1 ; 061802F0 v_rcp_f32_e32 v13, s26 ; 7E1A541A v_mul_f32_e32 v8, v13, v12 ; 1010190D v_mov_b32_e32 v10, 0 ; 7E140280 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v[12:13], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[32:39], s[28:31] ; F0900900 00E80C08 v_mov_b32_e32 v14, 0x437f0000 ; 7E1C02FF 437F0000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v13, v14, 0.5 ; D282000E 03C21D0D v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_mul_f32_e32 v15, 0x437f0000, v12 ; 101E18FF 437F0000 v_mov_b32_e32 v16, 0x43800000 ; 7E2002FF 43800000 v_mad_f32 v14, v16, v14, v15 ; D282000E 043E1D10 v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_mad_f32 v12, s27, v12, 0.5 ; D282000C 03C2181B v_fract_f32_e32 v13, v12 ; 7E1A410C v_subrev_f32_e32 v12, v13, v12 ; 0A18190D v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_mov_b32_e32 v13, 0x44800000 ; 7E1A02FF 44800000 v_subrev_f32_e32 v13, s27, v13 ; 0A1A1A1B v_cmp_ge_f32_e64 s[28:29], v13, 0 ; D00C001C 0001010D v_cndmask_b32_e64 v13, 0, -1, s[28:29] ; D200080D 00718280 v_cmp_ne_i32_e64 s[28:29], v13, 0 ; D10A001C 0001010D v_cndmask_b32_e64 v12, v14, v12, s[28:29] ; D200000C 1072190E v_rcp_f32_e32 v13, s8 ; 7E1A5408 v_mad_f32 v13, s24, v13, 0.5 ; D282000D 03C21A18 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v12, v13 ; 101C1B0C v_cmp_ge_f32_e64 s[28:29], v14, 0 ; D00C001C 0001010E v_cndmask_b32_e64 v14, 0, -1, s[28:29] ; D200000E 00718280 v_cmp_ne_i32_e64 s[28:29], v14, 0 ; D10A001C 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[28:29] ; D200000E 10721B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v12, v15 ; 101E1F0C v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v1, s26, v6, -v1 ; D2820001 84060C1A v_add_f32_e32 v1, v14, v1 ; 0602030E v_mul_f32_e32 v14, s8, v1 ; 101C0208 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v1, v1, s8, -v14 ; D2820001 84381101 v_add_f32_e32 v1, 0.5, v1 ; 060202F0 v_rcp_f32_e32 v14, s24 ; 7E1C5418 v_mul_f32_e32 v8, v14, v1 ; 1010030E v_rcp_f32_e32 v1, v13 ; 7E02550D v_mul_f32_e32 v13, v12, v1 ; 101A030C v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v1, v1, v12, -v13 ; D2820001 84361901 v_mad_f32 v0, s25, v7, -v0 ; D2820000 84020E19 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v1, s8, v0 ; 10020008 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s8, -v1 ; D2820000 84041100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s9 ; 7E025409 v_mul_f32_e32 v9, v1, v0 ; 10120101 image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[12:15] ; F0900F00 00640808 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[10:11] ; 88FE0A7E v_mul_f32_e32 v0, v2, v11 ; 10001702 v_mul_f32_e32 v1, v3, v10 ; 10021503 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_mul_f32_e32 v1, v4, v9 ; 10021304 v_mul_f32_e32 v2, v5, v8 ; 10041105 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table number - table /media/ssd_plain/sliedes/_steam/steam/SteamApps/common/Sid Meier's Civilization V/steamassets/controller.vdf SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..2], LOCAL 0: MAD TEMP[0].xy, IN[0], CONST[0], CONST[0].zwzw 1: MUL TEMP[1], TEMP[0].yyyy, CONST[5] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[4], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[7] 4: ADD TEMP[1].x, CONST[3].xxxx, CONST[3].yyyy 5: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[0], CONST[1], IN[1] 7: MUL TEMP[2].w, TEMP[0].wwww, CONST[2].wwww 8: MOV TEMP[2].w, TEMP[2].wwww 9: MOV TEMP[2].xyz, TEMP[0].xyzx 10: MOV OUT[1], TEMP[2] 11: MOV OUT[0], TEMP[1] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fmul float %40, %13 %51 = fadd float %50, %15 %52 = fmul float %41, %14 %53 = fadd float %52, %16 %54 = fmul float %53, %28 %55 = fmul float %53, %29 %56 = fmul float %53, %30 %57 = fmul float %53, %31 %58 = fmul float %51, %24 %59 = fadd float %58, %54 %60 = fmul float %51, %25 %61 = fadd float %60, %55 %62 = fmul float %51, %26 %63 = fadd float %62, %56 %64 = fmul float %51, %27 %65 = fadd float %64, %57 %66 = fadd float %59, %32 %67 = fadd float %61, %33 %68 = fadd float %63, %34 %69 = fadd float %65, %35 %70 = fadd float %22, %23 %71 = fmul float %66, %70 %72 = fmul float %67, %70 %73 = fmul float %68, %70 %74 = fmul float %69, %70 %75 = fmul float %17, %46 %76 = fmul float %18, %47 %77 = fmul float %19, %48 %78 = fmul float %20, %49 %79 = fmul float %78, %21 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %75, float %76, float %77, float %79) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %71, float %72, float %73, float %74) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v3 ; 100A0608 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v1 ; 100E0208 s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v4 ; 10020808 s_buffer_load_dword s8, s[0:3], 0xb ; C204010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v0, v4 ; D2820004 04120005 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v0, s5, v1, v5 ; D2820000 04160205 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %24 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %25 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_cvt_pkrtz_f16_f32_e32 v0, v4, v3 ; 5E000704 exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..24] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: F2I TEMP[0].x, IN[3].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+4].zwzw 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xy, IN[0], CONST[ADDR[0].x+4], TEMP[1] 6: UARL ADDR[0].x, TEMP[0].xxxx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[2], CONST[ADDR[0].x+13].zwzw 9: UARL ADDR[0].x, TEMP[0].xxxx 10: MAD TEMP[2].xy, IN[1], CONST[ADDR[0].x+13], TEMP[2] 11: MOV TEMP[2].xy, TEMP[2].xyxx 12: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 13: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[3] 14: ADD TEMP[0], TEMP[0], CONST[3] 15: ADD TEMP[1].x, CONST[24].xxxx, CONST[24].yyyy 16: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 17: MUL TEMP[0], CONST[22], IN[2] 18: MUL TEMP[3].w, TEMP[0].wwww, CONST[23].wwww 19: MOV TEMP[3].w, TEMP[3].wwww 20: MOV TEMP[3].xyz, TEMP[0].xyzx 21: MOV TEMP[0].xy, IN[3].yzyy 22: MOV TEMP[2].zw, IMM[0].yyxy 23: MOV TEMP[0].zw, IMM[0].yyxy 24: MOV OUT[1], TEMP[3] 25: MOV OUT[2], TEMP[2] 26: MOV OUT[0], TEMP[1] 27: MOV OUT[3], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = fptosi float %56 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 72 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 76 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = bitcast float %60 to i32 %69 = shl i32 %68, 4 %70 = add i32 %69, 64 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %36, %71 %73 = fadd float %72, %64 %74 = shl i32 %68, 4 %75 = add i32 %74, 68 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %37, %76 %78 = fadd float %77, %67 %79 = bitcast float %60 to i32 %80 = shl i32 %79, 4 %81 = add i32 %80, 216 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %79, 4 %84 = add i32 %83, 220 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = bitcast float %60 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 208 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %42, %89 %91 = fadd float %90, %82 %92 = shl i32 %86, 4 %93 = add i32 %92, 212 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fmul float %43, %94 %96 = fadd float %95, %85 %97 = fmul float %78, %17 %98 = fmul float %78, %18 %99 = fmul float %78, %19 %100 = fmul float %78, %20 %101 = fmul float %73, %13 %102 = fadd float %101, %97 %103 = fmul float %73, %14 %104 = fadd float %103, %98 %105 = fmul float %73, %15 %106 = fadd float %105, %99 %107 = fmul float %73, %16 %108 = fadd float %107, %100 %109 = fadd float %102, %21 %110 = fadd float %104, %22 %111 = fadd float %106, %23 %112 = fadd float %108, %24 %113 = fadd float %30, %31 %114 = fmul float %109, %113 %115 = fmul float %110, %113 %116 = fmul float %111, %113 %117 = fmul float %112, %113 %118 = fmul float %25, %48 %119 = fmul float %26, %49 %120 = fmul float %27, %50 %121 = fmul float %28, %51 %122 = fmul float %121, %29 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %118, float %119, float %120, float %122) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %91, float %96, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %57, float %58, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %115, float %116, float %117) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[16:19], 0 idxen ; E00C2000 80040100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s16, s[0:3], 0x5a ; C208015A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s16, v3 ; 100A0610 s_buffer_load_dword s16, s[0:3], 0x59 ; C2080159 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s16, v2 ; 100C0410 s_buffer_load_dword s16, s[0:3], 0x58 ; C2080158 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s16, v1 ; 100E0210 s_buffer_load_dword s16, s[0:3], 0x5b ; C208015B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v4 ; 10020810 s_buffer_load_dword s16, s[0:3], 0x5f ; C208015F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v1 ; 10020210 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v5, v1 ; 7E0A1101 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_add_i32_e32 v6, 0xdc, v5 ; 4A0C0AFF 000000DC buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 v_add_i32_e32 v7, 0xd4, v5 ; 4A0E0AFF 000000D4 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v9, v7, v6 ; D2820006 041A0F09 v_add_i32_e32 v7, 0xd8, v5 ; 4A0E0AFF 000000D8 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_add_i32_e32 v12, 0xd0, v5 ; 4A180AFF 000000D0 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v12, v7 ; D2820007 041E1908 v_mov_b32_e32 v8, 1.0 ; 7E1002F2 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 33, 0, 0, 0, v7, v6, v9, v8 ; F800021F 08090607 exp 15, 34, 0, 0, 0, v2, v3, v9, v8 ; F800022F 08090302 s_waitcnt expcnt(0) ; BF8C070F v_add_i32_e32 v1, 0x48, v5 ; 4A020AFF 00000048 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_add_i32_e32 v2, 64, v5 ; 4A040AC0 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v2, v1 ; D2820000 04060506 v_add_i32_e32 v1, 0x4c, v5 ; 4A020AFF 0000004C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_add_i32_e32 v2, 0x44, v5 ; 4A040AFF 00000044 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v1 ; 10040204 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x61 ; C2020161 s_buffer_load_dword s5, s[0:3], 0x60 ; C2028160 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_add_f32_e32 v3, s5, v3 ; 06060605 v_mul_f32_e32 v2, v3, v2 ; 10040503 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 v_mul_f32_e32 v4, v3, v4 ; 10080903 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v3, v0 ; 10000103 exp 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..1], LOCAL 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[0], TEMP[0], IN[2].yyyy 3: MOV TEMP[1].xy, IN[1].xyyy 4: TEX TEMP[1], TEMP[1], SAMP[0], 2D 5: MAD TEMP[0], IN[2].xxxx, TEMP[1], TEMP[0] 6: MUL TEMP[0], TEMP[0], IN[0] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %35 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %36 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %37 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %38 = bitcast float %34 to i32 %39 = bitcast float %35 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <8 x i32> %27 to <32 x i8> %43 = bitcast <4 x i32> %29 to <16 x i8> %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %42, <16 x i8> %43, i32 2) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = extractelement <4 x float> %44, i32 3 %49 = fmul float %45, %37 %50 = fmul float %46, %37 %51 = fmul float %47, %37 %52 = fmul float %48, %37 %53 = bitcast float %34 to i32 %54 = bitcast float %35 to i32 %55 = insertelement <2 x i32> undef, i32 %53, i32 0 %56 = insertelement <2 x i32> %55, i32 %54, i32 1 %57 = bitcast <8 x i32> %23 to <32 x i8> %58 = bitcast <4 x i32> %25 to <16 x i8> %59 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %56, <32 x i8> %57, <16 x i8> %58, i32 2) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = fmul float %36, %60 %65 = fadd float %64, %49 %66 = fmul float %36, %61 %67 = fadd float %66, %50 %68 = fmul float %36, %62 %69 = fadd float %68, %51 %70 = fmul float %36, %63 %71 = fadd float %70, %52 %72 = fmul float %65, %30 %73 = fmul float %67, %31 %74 = fmul float %69, %32 %75 = fmul float %71, %33 %76 = call i32 @llvm.SI.packf16(float %72, float %73) %77 = bitcast i32 %76 to float %78 = call i32 @llvm.SI.packf16(float %74, float %75) %79 = bitcast i32 %78 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %77, float %79, float %77, float %79) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800F00 00450402 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v8, v7 ; 10120F08 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030A02 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v2, v13, v9 ; D2820003 04261B02 v_interp_p1_f32 v9, v0, 3, 0, [m0] ; C8240300 v_interp_p2_f32 v9, [v9], v1, 3, 0, [m0] ; C8250301 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v9, v8, v6 ; 10120D08 v_mad_f32 v9, v2, v12, v9 ; D2820009 04261902 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_mul_f32_e32 v9, v14, v9 ; 1012130E v_cvt_pkrtz_f16_f32_e32 v3, v9, v3 ; 5E060709 v_mul_f32_e32 v9, v8, v5 ; 10120B08 v_mad_f32 v9, v2, v11, v9 ; D2820009 04261702 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_mul_f32_e32 v9, v14, v9 ; 1012130E v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mad_f32 v2, v2, v10, v4 ; D2820002 04121502 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mul_f32_e32 v0, v4, v2 ; 10000504 v_cvt_pkrtz_f16_f32_e32 v0, v0, v9 ; 5E001300 exp 15, 0, 1, 1, 1, v0, v3, v0, v3 ; F8001C0F 03000300 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[4].zwzw, IN[0] 1: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[0], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: ADD TEMP[1].x, CONST[6].xxxx, CONST[6].yyyy 5: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[1].w, CONST[5].wwww, IN[2].wwww 7: MOV TEMP[1].w, TEMP[1].wwww 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV TEMP[1].xyz, IN[2].xyzx 10: MOV TEMP[2].zw, IMM[0].yyxy 11: MOV OUT[1], TEMP[1] 12: MOV OUT[2], TEMP[2] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = add i32 %5, %7 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fadd float %25, %34 %51 = fadd float %26, %35 %52 = fmul float %51, %17 %53 = fmul float %51, %18 %54 = fmul float %51, %19 %55 = fmul float %51, %20 %56 = fmul float %50, %13 %57 = fadd float %56, %52 %58 = fmul float %50, %14 %59 = fadd float %58, %53 %60 = fmul float %50, %15 %61 = fadd float %60, %54 %62 = fmul float %50, %16 %63 = fadd float %62, %55 %64 = fadd float %57, %21 %65 = fadd float %59, %22 %66 = fadd float %61, %23 %67 = fadd float %63, %24 %68 = fadd float %28, %29 %69 = fmul float %64, %68 %70 = fmul float %65, %68 %71 = fmul float %66, %68 %72 = fmul float %67, %68 %73 = fmul float %27, %49 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %70, float %71, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v4 ; 100A0808 exp 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v4, s4, v0 ; 06080004 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v1 ; 06000204 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[0..4] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.5000, 1024.0000, 0.0000, 65280.0000} IMM[1] FLT32 {65280.0000, 255.0000, 0.0000, 0.0000} 0: MOV TEMP[0].y, IMM[0].xxxx 1: ADD TEMP[1].x, IMM[0].yyyy, -CONST[2].xxxx 2: MOV TEMP[0].x, TEMP[1].xxxx 3: FSNE TEMP[2].x, CONST[0].xxxx, -CONST[0].xxxx 4: UIF TEMP[2].xxxx :0 5: FSLT TEMP[2].x, IMM[0].xxxx, CONST[1].xxxx 6: UIF TEMP[2].xxxx :0 7: MUL TEMP[2].zw, CONST[3].xyxy, IN[1].xyxy 8: MOV TEMP[0].zw, TEMP[2].wwzw 9: FRC TEMP[2].xy, TEMP[2].zwzw 10: ADD TEMP[3].zw, TEMP[0], -TEMP[2].xyxy 11: MOV TEMP[0].zw, TEMP[3].wwzw 12: MAD TEMP[3].xy, IN[1], CONST[3], -TEMP[3].zwzw 13: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 14: RCP TEMP[5].x, CONST[3].xxxx 15: RCP TEMP[6].x, CONST[3].yyyy 16: MOV TEMP[5].y, TEMP[6].xxxx 17: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[5] 18: MOV TEMP[4].xy, TEMP[4].xyyy 19: MOV TEMP[4].w, IMM[0].zzzz 20: TXL TEMP[4], TEMP[4], SAMP[0], 2D 21: MOV TEMP[5].zw, TEMP[4] 22: DP2 TEMP[6].x, TEMP[4].wxxx, IMM[1].xyyy 23: ADD TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 24: FRC TEMP[7].w, TEMP[6].xxxx 25: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].xxxx 26: MOV TEMP[0].z, TEMP[6].zzzz 27: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].xxxx 28: FRC TEMP[6].z, TEMP[4].wwww 29: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 30: MOV TEMP[0].w, TEMP[4].wwww 31: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 32: FSGE TEMP[6].x, TEMP[1].xxxx, IMM[0].zzzz 33: UIF TEMP[6].xxxx :0 34: MOV TEMP[6].x, TEMP[4].wwww 35: ELSE :0 36: MOV TEMP[6].x, TEMP[4].zzzz 37: ENDIF 38: RCP TEMP[4].x, CONST[1].xxxx 39: MAD TEMP[4].w, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].xxxx 40: FRC TEMP[7].z, TEMP[4].wwww 41: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[7].zzzz 42: MUL TEMP[7].z, TEMP[4].wwww, TEMP[6].xxxx 43: MOV TEMP[8].x, -TEMP[4].wwww 44: FSGE TEMP[7].x, TEMP[7].zzzz, IMM[0].zzzz 45: UIF TEMP[7].xxxx :0 46: MOV TEMP[7].x, TEMP[4].wwww 47: ELSE :0 48: MOV TEMP[7].x, TEMP[8].xxxx 49: ENDIF 50: MOV TEMP[2].z, TEMP[7].xxxx 51: RCP TEMP[8].x, TEMP[7].xxxx 52: MUL TEMP[8].w, TEMP[6].xxxx, TEMP[8].xxxx 53: FRC TEMP[8].w, TEMP[8].wwww 54: MOV TEMP[2].w, TEMP[8].wwww 55: MUL TEMP[7].x, TEMP[8].wwww, TEMP[7].xxxx 56: MOV TEMP[5].x, TEMP[7].xxxx 57: RCP TEMP[4].x, TEMP[4].wwww 58: MUL TEMP[4].y, TEMP[4].xxxx, TEMP[6].xxxx 59: MOV TEMP[5].y, TEMP[4].yyyy 60: FRC TEMP[4].zw, TEMP[5].xyxy 61: MOV TEMP[0].zw, TEMP[4].wwzw 62: ADD TEMP[4].zw, -TEMP[0], TEMP[5].xyxy 63: MOV TEMP[0].zw, TEMP[4].wwzw 64: ADD TEMP[3].zw, TEMP[3].xyxy, TEMP[0] 65: MOV TEMP[0].zw, TEMP[3].wwzw 66: MUL TEMP[3].zw, TEMP[0], CONST[1].xxxx 67: MOV TEMP[0].zw, TEMP[3].wwzw 68: FRC TEMP[3].xy, TEMP[3].zwzw 69: ADD TEMP[3].zw, TEMP[0], -TEMP[3].xyxy 70: MOV TEMP[0].zw, TEMP[3].wwzw 71: ADD TEMP[3].zw, TEMP[0], IMM[0].xxxx 72: MOV TEMP[0].w, TEMP[3].wwzw 73: RCP TEMP[2].x, CONST[4].xxxx 74: RCP TEMP[4].x, CONST[4].yyyy 75: MOV TEMP[2].y, TEMP[4].xxxx 76: MUL TEMP[3].xy, TEMP[3].zwzw, TEMP[2] 77: MOV TEMP[3].xy, TEMP[3].xyyy 78: MOV TEMP[3].w, IMM[0].zzzz 79: TXL TEMP[3], TEMP[3], SAMP[1], 2D 80: MOV TEMP[2], TEMP[3] 81: MOV TEMP[0].z, TEMP[3].wwww 82: ELSE :0 83: MOV TEMP[3].xy, IN[1].xyyy 84: TEX TEMP[3], TEMP[3], SAMP[1], 2D 85: MOV TEMP[2], TEMP[3] 86: MOV TEMP[0].z, TEMP[3].wwww 87: ENDIF 88: MUL TEMP[3].w, TEMP[0].zzzz, IN[0].wwww 89: MOV TEMP[3].w, TEMP[3].wwww 90: MOV TEMP[3].xyz, IN[0].xyzx 91: ELSE :0 92: FSLT TEMP[4].x, IMM[0].xxxx, CONST[1].xxxx 93: UIF TEMP[4].xxxx :0 94: MUL TEMP[4].zw, CONST[3].xyxy, IN[1].xyxy 95: MOV TEMP[0].zw, TEMP[4].wwzw 96: FRC TEMP[4].xy, TEMP[4].zwzw 97: MOV TEMP[2].xy, TEMP[4].xyxx 98: ADD TEMP[4].zw, TEMP[0], -TEMP[4].xyxy 99: MOV TEMP[0].zw, TEMP[4].wwzw 100: MAD TEMP[4].xy, IN[1], CONST[3], -TEMP[4].zwzw 101: MOV TEMP[2].xy, TEMP[4].xyxx 102: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 103: RCP TEMP[5].x, CONST[3].xxxx 104: RCP TEMP[6].x, CONST[3].yyyy 105: MOV TEMP[5].y, TEMP[6].xxxx 106: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[5] 107: MOV TEMP[4].xy, TEMP[4].xyyy 108: MOV TEMP[4].w, IMM[0].zzzz 109: TXL TEMP[4], TEMP[4], SAMP[0], 2D 110: MOV TEMP[5].zw, TEMP[4] 111: DP2 TEMP[6].x, TEMP[4].wxxx, IMM[1].xyyy 112: ADD TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 113: FRC TEMP[7].w, TEMP[6].xxxx 114: ADD TEMP[6].z, -TEMP[7].wwww, TEMP[6].xxxx 115: MOV TEMP[0].z, TEMP[6].zzzz 116: MAD TEMP[4].w, CONST[2].xxxx, TEMP[4].xxxx, IMM[0].xxxx 117: FRC TEMP[6].z, TEMP[4].wwww 118: MOV TEMP[2].z, TEMP[6].zzzz 119: ADD TEMP[4].w, TEMP[4].wwww, -TEMP[6].zzzz 120: MOV TEMP[0].w, TEMP[4].wwww 121: ADD TEMP[4].zw, TEMP[0], IMM[0].xxxx 122: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 123: UIF TEMP[1].xxxx :0 124: MOV TEMP[1].x, TEMP[4].wwww 125: ELSE :0 126: MOV TEMP[1].x, TEMP[4].zzzz 127: ENDIF 128: MOV TEMP[0].x, TEMP[1].xxxx 129: RCP TEMP[4].x, CONST[1].xxxx 130: MAD TEMP[4].y, CONST[4].xxxx, TEMP[4].xxxx, IMM[0].xxxx 131: FRC TEMP[6].z, TEMP[4].yyyy 132: ADD TEMP[4].y, -TEMP[6].zzzz, TEMP[4].yyyy 133: MUL TEMP[6].z, TEMP[4].yyyy, TEMP[1].xxxx 134: MOV TEMP[7].x, -TEMP[4].yyyy 135: FSGE TEMP[6].x, TEMP[6].zzzz, IMM[0].zzzz 136: UIF TEMP[6].xxxx :0 137: MOV TEMP[6].x, TEMP[4].yyyy 138: ELSE :0 139: MOV TEMP[6].x, TEMP[7].xxxx 140: ENDIF 141: MOV TEMP[0].z, TEMP[6].xxxx 142: RCP TEMP[6].x, TEMP[6].xxxx 143: MUL TEMP[1].w, TEMP[6].xxxx, TEMP[1].xxxx 144: FRC TEMP[1].w, TEMP[1].wwww 145: MOV TEMP[0].w, TEMP[1].wwww 146: RCP TEMP[1].x, TEMP[4].yyyy 147: MOV TEMP[0].y, TEMP[1].xxxx 148: MUL TEMP[1].xy, TEMP[0].wyzw, TEMP[0].zxzw 149: MOV TEMP[5].xy, TEMP[1].xyxx 150: FRC TEMP[1].xy, TEMP[5] 151: MOV TEMP[0].xy, TEMP[1].xyxx 152: ADD TEMP[1].xy, -TEMP[0], TEMP[5] 153: MOV TEMP[0].xy, TEMP[1].xyxx 154: ADD TEMP[1].xy, TEMP[2], TEMP[0] 155: MOV TEMP[0].xy, TEMP[1].xyxx 156: MUL TEMP[1].xy, TEMP[0], CONST[1].xxxx 157: MOV TEMP[0].xy, TEMP[1].xyxx 158: FRC TEMP[1].zw, TEMP[1].xyxy 159: MOV TEMP[0].zw, TEMP[1].wwzw 160: ADD TEMP[1].xy, -TEMP[1].zwzw, TEMP[0] 161: MOV TEMP[0].xy, TEMP[1].xyxx 162: ADD TEMP[1].xy, TEMP[0], IMM[0].xxxx 163: MOV TEMP[0].xy, TEMP[1].xyxx 164: RCP TEMP[2].x, CONST[4].xxxx 165: RCP TEMP[1].x, CONST[4].yyyy 166: MOV TEMP[2].y, TEMP[1].xxxx 167: MUL TEMP[1].xy, TEMP[0], TEMP[2] 168: MOV TEMP[1].xy, TEMP[1].xyyy 169: MOV TEMP[1].w, IMM[0].zzzz 170: TXL TEMP[1], TEMP[1], SAMP[1], 2D 171: MOV TEMP[0].xyz, TEMP[1] 172: MOV TEMP[2].x, TEMP[1].wwww 173: ELSE :0 174: MOV TEMP[1].xy, IN[1].xyyy 175: TEX TEMP[1], TEMP[1], SAMP[1], 2D 176: MOV TEMP[0].xyz, TEMP[1] 177: MOV TEMP[2].x, TEMP[1].wwww 178: ENDIF 179: MUL TEMP[1].w, TEMP[2].xxxx, IN[0].wwww 180: MOV TEMP[0].w, TEMP[1].wwww 181: MOV TEMP[3], TEMP[0] 182: ENDIF 183: MOV OUT[0], TEMP[3] 184: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %31 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %32 = load <8 x i32> addrspace(2)* %31, !tbaa !0 %33 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %34 = load <4 x i32> addrspace(2)* %33, !tbaa !0 %35 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %36 = load <8 x i32> addrspace(2)* %35, !tbaa !0 %37 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %38 = load <4 x i32> addrspace(2)* %37, !tbaa !0 %39 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %40 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %42 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %45 = fsub float -0.000000e+00, %26 %46 = fadd float 1.024000e+03, %45 %47 = fsub float -0.000000e+00, %24 %48 = fcmp une float %24, %47 %49 = sext i1 %48 to i32 %50 = bitcast i32 %49 to float %51 = bitcast float %50 to i32 %52 = icmp ne i32 %51, 0 %53 = fcmp olt float 5.000000e-01, %25 %54 = sext i1 %53 to i32 %55 = bitcast i32 %54 to float %56 = bitcast float %55 to i32 %57 = icmp ne i32 %56, 0 br i1 %52, label %IF, label %ELSE IF: ; preds = %main_body br i1 %57, label %IF37, label %ELSE38 ELSE: ; preds = %main_body br i1 %57, label %IF46, label %ELSE47 ENDIF: ; preds = %IF37, %ELSE38, %ENDIF45 %.sink56.sink = phi <4 x float> [ %.sink56, %ENDIF45 ], [ %172, %ELSE38 ], [ %165, %IF37 ] %temp12.0 = phi float [ %282, %ENDIF45 ], [ %39, %ELSE38 ], [ %39, %IF37 ] %temp13.0 = phi float [ %283, %ENDIF45 ], [ %40, %ELSE38 ], [ %40, %IF37 ] %temp14.0 = phi float [ %284, %ENDIF45 ], [ %41, %ELSE38 ], [ %41, %IF37 ] %58 = extractelement <4 x float> %.sink56.sink, i32 3 %59 = fmul float %58, %42 %60 = call i32 @llvm.SI.packf16(float %temp12.0, float %temp13.0) %61 = bitcast i32 %60 to float %62 = call i32 @llvm.SI.packf16(float %temp14.0, float %59) %63 = bitcast i32 %62 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %61, float %63, float %61, float %63) ret void IF37: ; preds = %IF %64 = fmul float %27, %43 %65 = fmul float %28, %44 %66 = call float @llvm.AMDIL.fraction.(float %64) %67 = call float @llvm.AMDIL.fraction.(float %65) %68 = fsub float -0.000000e+00, %66 %69 = fadd float %64, %68 %70 = fsub float -0.000000e+00, %67 %71 = fadd float %65, %70 %72 = fsub float -0.000000e+00, %69 %73 = fmul float %43, %27 %74 = fadd float %73, %72 %75 = fsub float -0.000000e+00, %71 %76 = fmul float %44, %28 %77 = fadd float %76, %75 %78 = fadd float %69, 5.000000e-01 %79 = fadd float %71, 5.000000e-01 %80 = fdiv float 1.000000e+00, %27 %81 = fdiv float 1.000000e+00, %28 %82 = fmul float %78, %80 %83 = fmul float %79, %81 %84 = bitcast float %82 to i32 %85 = bitcast float %83 to i32 %86 = bitcast float 0.000000e+00 to i32 %87 = insertelement <4 x i32> undef, i32 %84, i32 0 %88 = insertelement <4 x i32> %87, i32 %85, i32 1 %89 = insertelement <4 x i32> %88, i32 %86, i32 2 %90 = insertelement <4 x i32> %89, i32 undef, i32 3 %91 = bitcast <8 x i32> %32 to <32 x i8> %92 = bitcast <4 x i32> %34 to <16 x i8> %93 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %90, <32 x i8> %91, <16 x i8> %92, i32 2) %94 = extractelement <4 x float> %93, i32 0 %95 = extractelement <4 x float> %93, i32 3 %96 = fmul float %95, 6.528000e+04 %97 = fmul float %94, 2.550000e+02 %98 = fadd float %96, %97 %99 = fadd float %98, 5.000000e-01 %100 = call float @llvm.AMDIL.fraction.(float %99) %101 = fsub float -0.000000e+00, %100 %102 = fadd float %101, %99 %103 = fmul float %26, %94 %104 = fadd float %103, 5.000000e-01 %105 = call float @llvm.AMDIL.fraction.(float %104) %106 = fsub float -0.000000e+00, %105 %107 = fadd float %104, %106 %108 = fadd float %102, 5.000000e-01 %109 = fadd float %107, 5.000000e-01 %110 = fcmp oge float %46, 0.000000e+00 %111 = sext i1 %110 to i32 %112 = bitcast i32 %111 to float %113 = bitcast float %112 to i32 %114 = icmp ne i32 %113, 0 %. = select i1 %114, float %109, float %108 %115 = fdiv float 1.000000e+00, %25 %116 = fmul float %29, %115 %117 = fadd float %116, 5.000000e-01 %118 = call float @llvm.AMDIL.fraction.(float %117) %119 = fsub float -0.000000e+00, %118 %120 = fadd float %117, %119 %121 = fmul float %120, %. %122 = fsub float -0.000000e+00, %120 %123 = fcmp oge float %121, 0.000000e+00 %124 = sext i1 %123 to i32 %125 = bitcast i32 %124 to float %126 = bitcast float %125 to i32 %127 = icmp ne i32 %126, 0 %temp28.0 = select i1 %127, float %120, float %122 %128 = fdiv float 1.000000e+00, %temp28.0 %129 = fmul float %., %128 %130 = call float @llvm.AMDIL.fraction.(float %129) %131 = fmul float %130, %temp28.0 %132 = fdiv float 1.000000e+00, %120 %133 = fmul float %132, %. %134 = call float @llvm.AMDIL.fraction.(float %131) %135 = call float @llvm.AMDIL.fraction.(float %133) %136 = fsub float -0.000000e+00, %134 %137 = fadd float %136, %131 %138 = fsub float -0.000000e+00, %135 %139 = fadd float %138, %133 %140 = fadd float %74, %137 %141 = fadd float %77, %139 %142 = fmul float %140, %25 %143 = fmul float %141, %25 %144 = call float @llvm.AMDIL.fraction.(float %142) %145 = call float @llvm.AMDIL.fraction.(float %143) %146 = fsub float -0.000000e+00, %144 %147 = fadd float %142, %146 %148 = fsub float -0.000000e+00, %145 %149 = fadd float %143, %148 %150 = fadd float %147, 5.000000e-01 %151 = fadd float %149, 5.000000e-01 %152 = fdiv float 1.000000e+00, %29 %153 = fdiv float 1.000000e+00, %30 %154 = fmul float %150, %152 %155 = fmul float %151, %153 %156 = bitcast float %154 to i32 %157 = bitcast float %155 to i32 %158 = bitcast float 0.000000e+00 to i32 %159 = insertelement <4 x i32> undef, i32 %156, i32 0 %160 = insertelement <4 x i32> %159, i32 %157, i32 1 %161 = insertelement <4 x i32> %160, i32 %158, i32 2 %162 = insertelement <4 x i32> %161, i32 undef, i32 3 %163 = bitcast <8 x i32> %36 to <32 x i8> %164 = bitcast <4 x i32> %38 to <16 x i8> %165 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %162, <32 x i8> %163, <16 x i8> %164, i32 2) br label %ENDIF ELSE38: ; preds = %IF %166 = bitcast float %43 to i32 %167 = bitcast float %44 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %36 to <32 x i8> %171 = bitcast <4 x i32> %38 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) br label %ENDIF IF46: ; preds = %ELSE %173 = fmul float %27, %43 %174 = fmul float %28, %44 %175 = call float @llvm.AMDIL.fraction.(float %173) %176 = call float @llvm.AMDIL.fraction.(float %174) %177 = fsub float -0.000000e+00, %175 %178 = fadd float %173, %177 %179 = fsub float -0.000000e+00, %176 %180 = fadd float %174, %179 %181 = fsub float -0.000000e+00, %178 %182 = fmul float %43, %27 %183 = fadd float %182, %181 %184 = fsub float -0.000000e+00, %180 %185 = fmul float %44, %28 %186 = fadd float %185, %184 %187 = fadd float %178, 5.000000e-01 %188 = fadd float %180, 5.000000e-01 %189 = fdiv float 1.000000e+00, %27 %190 = fdiv float 1.000000e+00, %28 %191 = fmul float %187, %189 %192 = fmul float %188, %190 %193 = bitcast float %191 to i32 %194 = bitcast float %192 to i32 %195 = bitcast float 0.000000e+00 to i32 %196 = insertelement <4 x i32> undef, i32 %193, i32 0 %197 = insertelement <4 x i32> %196, i32 %194, i32 1 %198 = insertelement <4 x i32> %197, i32 %195, i32 2 %199 = insertelement <4 x i32> %198, i32 undef, i32 3 %200 = bitcast <8 x i32> %32 to <32 x i8> %201 = bitcast <4 x i32> %34 to <16 x i8> %202 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %199, <32 x i8> %200, <16 x i8> %201, i32 2) %203 = extractelement <4 x float> %202, i32 0 %204 = extractelement <4 x float> %202, i32 3 %205 = fmul float %204, 6.528000e+04 %206 = fmul float %203, 2.550000e+02 %207 = fadd float %205, %206 %208 = fadd float %207, 5.000000e-01 %209 = call float @llvm.AMDIL.fraction.(float %208) %210 = fsub float -0.000000e+00, %209 %211 = fadd float %210, %208 %212 = fmul float %26, %203 %213 = fadd float %212, 5.000000e-01 %214 = call float @llvm.AMDIL.fraction.(float %213) %215 = fsub float -0.000000e+00, %214 %216 = fadd float %213, %215 %217 = fadd float %211, 5.000000e-01 %218 = fadd float %216, 5.000000e-01 %219 = fcmp oge float %46, 0.000000e+00 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 %.55 = select i1 %223, float %218, float %217 %224 = fdiv float 1.000000e+00, %25 %225 = fmul float %29, %224 %226 = fadd float %225, 5.000000e-01 %227 = call float @llvm.AMDIL.fraction.(float %226) %228 = fsub float -0.000000e+00, %227 %229 = fadd float %228, %226 %230 = fmul float %229, %.55 %231 = fsub float -0.000000e+00, %229 %232 = fcmp oge float %230, 0.000000e+00 %233 = sext i1 %232 to i32 %234 = bitcast i32 %233 to float %235 = bitcast float %234 to i32 %236 = icmp ne i32 %235, 0 %temp24.1 = select i1 %236, float %229, float %231 %237 = fdiv float 1.000000e+00, %temp24.1 %238 = fmul float %237, %.55 %239 = call float @llvm.AMDIL.fraction.(float %238) %240 = fdiv float 1.000000e+00, %229 %241 = fmul float %239, %temp24.1 %242 = fmul float %240, %.55 %243 = call float @llvm.AMDIL.fraction.(float %241) %244 = call float @llvm.AMDIL.fraction.(float %242) %245 = fsub float -0.000000e+00, %243 %246 = fadd float %245, %241 %247 = fsub float -0.000000e+00, %244 %248 = fadd float %247, %242 %249 = fadd float %183, %246 %250 = fadd float %186, %248 %251 = fmul float %249, %25 %252 = fmul float %250, %25 %253 = call float @llvm.AMDIL.fraction.(float %251) %254 = call float @llvm.AMDIL.fraction.(float %252) %255 = fsub float -0.000000e+00, %253 %256 = fadd float %255, %251 %257 = fsub float -0.000000e+00, %254 %258 = fadd float %257, %252 %259 = fadd float %256, 5.000000e-01 %260 = fadd float %258, 5.000000e-01 %261 = fdiv float 1.000000e+00, %29 %262 = fdiv float 1.000000e+00, %30 %263 = fmul float %259, %261 %264 = fmul float %260, %262 %265 = bitcast float %263 to i32 %266 = bitcast float %264 to i32 %267 = bitcast float 0.000000e+00 to i32 %268 = insertelement <4 x i32> undef, i32 %265, i32 0 %269 = insertelement <4 x i32> %268, i32 %266, i32 1 %270 = insertelement <4 x i32> %269, i32 %267, i32 2 %271 = insertelement <4 x i32> %270, i32 undef, i32 3 %272 = bitcast <8 x i32> %36 to <32 x i8> %273 = bitcast <4 x i32> %38 to <16 x i8> %274 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %271, <32 x i8> %272, <16 x i8> %273, i32 2) br label %ENDIF45 ELSE47: ; preds = %ELSE %275 = bitcast float %43 to i32 %276 = bitcast float %44 to i32 %277 = insertelement <2 x i32> undef, i32 %275, i32 0 %278 = insertelement <2 x i32> %277, i32 %276, i32 1 %279 = bitcast <8 x i32> %36 to <32 x i8> %280 = bitcast <4 x i32> %38 to <16 x i8> %281 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %278, <32 x i8> %279, <16 x i8> %280, i32 2) br label %ENDIF45 ENDIF45: ; preds = %ELSE47, %IF46 %.sink56 = phi <4 x float> [ %281, %ELSE47 ], [ %274, %IF46 ] %282 = extractelement <4 x float> %.sink56, i32 0 %283 = extractelement <4 x float> %.sink56, i32 1 %284 = extractelement <4 x float> %.sink56, i32 2 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 s_load_dwordx4 s[32:35], s[2:3], 0x0 ; C0900300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[32:35], 0x0 ; C2002100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_neq_f32_e64 s[0:1], s0, -s0 ; D01A0000 40000000 v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; D2000000 00018280 v_cmp_eq_i32_e64 s[36:37], v0, 0 ; D1040024 00010100 s_buffer_load_dword s0, s[32:35], 0x4 ; C2002104 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[2:3], s0, 0.5 ; D0080002 0001E000 v_cndmask_b32_e64 v1, 0, -1, s[2:3] ; D2000801 00098280 s_buffer_load_dword s1, s[32:35], 0x8 ; C200A108 v_mov_b32_e32 v0, 0x44800000 ; 7E0002FF 44800000 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s1, v0 ; 0A000001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_buffer_load_dword s2, s[32:35], 0x11 ; C2012111 s_buffer_load_dword s3, s[32:35], 0x10 ; C201A110 s_buffer_load_dword s6, s[32:35], 0xd ; C203210D s_buffer_load_dword s7, s[32:35], 0xc ; C203A10C s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[4:5], s[36:37] ; BE842424 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_11 ; BF880000 v_cmp_eq_i32_e64 s[32:33], v1, 0 ; D1040020 00010101 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[12:15] ; F0800F00 00660803 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[32:33], s[32:33] ; BEA02520 s_xor_b64 exec, exec, s[32:33] ; 89FE207E s_cbranch_execz BB0_12 ; BF880000 v_mul_f32_e32 v8, s6, v4 ; 10100806 v_fract_f32_e32 v8, v8 ; 7E104108 v_mad_f32 v8, s6, v4, -v8 ; D2820008 84220806 v_add_f32_e32 v9, 0.5, v8 ; 061210F0 v_rcp_f32_e32 v10, s6 ; 7E145406 v_mul_f32_e32 v10, v10, v9 ; 1014130A v_mul_f32_e32 v13, s7, v3 ; 101A0607 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v13, s7, v3, -v13 ; D282000D 84360607 v_add_f32_e32 v14, 0.5, v13 ; 061C1AF0 v_rcp_f32_e32 v15, s7 ; 7E1E5407 v_mul_f32_e32 v9, v15, v14 ; 10121D0F v_mov_b32_e32 v11, 0 ; 7E160280 image_sample_l v[14:15], 9, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[16:23], s[8:11] ; F0900900 00440E09 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, 0x437f0000, v14 ; 10201CFF 437F0000 v_mov_b32_e32 v17, 0x477f0000 ; 7E2202FF 477F0000 v_mad_f32 v16, v17, v15, v16 ; D2820010 04421F11 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_fract_f32_e32 v17, v16 ; 7E224110 v_subrev_f32_e32 v16, v17, v16 ; 0A202111 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_mad_f32 v14, s1, v14, 0.5 ; D282000E 03C21C01 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_cmp_ge_f32_e64 s[34:35], v0, 0 ; D00C0022 00010100 v_cndmask_b32_e64 v15, 0, -1, s[34:35] ; D200080F 00898280 v_cmp_ne_i32_e64 s[34:35], v15, 0 ; D10A0022 0001010F v_cndmask_b32_e64 v14, v16, v14, s[34:35] ; D200000E 008A1D10 v_rcp_f32_e32 v15, s0 ; 7E1E5400 v_mad_f32 v15, s3, v15, 0.5 ; D282000F 03C21E03 v_fract_f32_e32 v16, v15 ; 7E20410F v_subrev_f32_e32 v15, v16, v15 ; 0A1E1F10 v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_cmp_ge_f32_e64 s[34:35], v16, 0 ; D00C0022 00010110 v_cndmask_b32_e64 v16, 0, -1, s[34:35] ; D2000010 00898280 v_cmp_ne_i32_e64 s[34:35], v16, 0 ; D10A0022 00010110 v_xor_b32_e32 v16, 0x80000000, v15 ; 3A201EFF 80000000 v_cndmask_b32_e64 v16, v16, v15, s[34:35] ; D2000010 008A1F10 v_rcp_f32_e32 v17, v16 ; 7E225510 v_mul_f32_e32 v17, v14, v17 ; 1022230E v_fract_f32_e32 v17, v17 ; 7E224111 v_mul_f32_e32 v18, v16, v17 ; 10242310 v_fract_f32_e32 v18, v18 ; 7E244112 v_mad_f32 v16, v17, v16, -v18 ; D2820010 844A2111 v_mad_f32 v13, s7, v3, -v13 ; D282000D 84360607 v_add_f32_e32 v13, v16, v13 ; 061A1B10 v_mul_f32_e32 v16, s0, v13 ; 10201A00 v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v13, v13, s0, -v16 ; D282000D 8440010D v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0 v_rcp_f32_e32 v16, s3 ; 7E205403 v_mul_f32_e32 v9, v16, v13 ; 10121B10 v_rcp_f32_e32 v13, v15 ; 7E1A550F v_mul_f32_e32 v15, v14, v13 ; 101E1B0E v_fract_f32_e32 v15, v15 ; 7E1E410F v_mad_f32 v13, v13, v14, -v15 ; D282000D 843E1D0D v_mad_f32 v8, s6, v4, -v8 ; D2820008 84220806 v_add_f32_e32 v8, v13, v8 ; 0610110D v_mul_f32_e32 v13, s0, v8 ; 101A1000 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v8, v8, s0, -v13 ; D2820008 84340108 v_add_f32_e32 v8, 0.5, v8 ; 061010F0 v_rcp_f32_e32 v13, s2 ; 7E1A5402 v_mul_f32_e32 v10, v13, v8 ; 1014110D image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[24:31], s[12:15] ; F0900F00 00660809 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mov_b32_e32 v12, v8 ; 7E180308 v_mov_b32_e32 v13, v9 ; 7E1A0309 v_mov_b32_e32 v14, v10 ; 7E1C030A v_mov_b32_e32 v15, v11 ; 7E1E030B s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E s_cbranch_execz BB0_4 ; BF880000 v_cmp_eq_i32_e64 s[32:33], v1, 0 ; D1040020 00010101 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[12:15] ; F0800F00 00660C03 v_mov_b32_e32 v10, v5 ; 7E140305 v_mov_b32_e32 v9, v6 ; 7E120306 v_mov_b32_e32 v8, v7 ; 7E100307 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[32:33], s[32:33] ; BEA02520 s_waitcnt expcnt(0) ; BF8C070F s_xor_b64 exec, exec, s[32:33] ; 89FE207E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v1, s6, v4 ; 10020806 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s6, v4, -v1 ; D2820001 84060806 v_add_f32_e32 v8, 0.5, v1 ; 061002F0 v_rcp_f32_e32 v9, s6 ; 7E125406 v_mul_f32_e32 v9, v9, v8 ; 10121109 v_mul_f32_e32 v12, s7, v3 ; 10180607 v_fract_f32_e32 v12, v12 ; 7E18410C v_mad_f32 v12, s7, v3, -v12 ; D282000C 84320607 v_add_f32_e32 v13, 0.5, v12 ; 061A18F0 v_rcp_f32_e32 v14, s7 ; 7E1C5407 v_mul_f32_e32 v8, v14, v13 ; 10101B0E v_mov_b32_e32 v10, 0 ; 7E140280 image_sample_l v[13:14], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[8:11] ; F0900900 00440D08 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, 0x437f0000, v13 ; 101E1AFF 437F0000 v_mov_b32_e32 v16, 0x477f0000 ; 7E2002FF 477F0000 v_mad_f32 v15, v16, v14, v15 ; D282000F 043E1D10 v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0 v_fract_f32_e32 v16, v15 ; 7E20410F v_subrev_f32_e32 v15, v16, v15 ; 0A1E1F10 v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0 v_mad_f32 v13, s1, v13, 0.5 ; D282000D 03C21A01 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0 v_cmp_ge_f32_e64 s[34:35], v0, 0 ; D00C0022 00010100 v_cndmask_b32_e64 v0, 0, -1, s[34:35] ; D2000000 00898280 v_cmp_ne_i32_e64 s[34:35], v0, 0 ; D10A0022 00010100 v_cndmask_b32_e64 v0, v15, v13, s[34:35] ; D2000000 188A1B0F v_rcp_f32_e32 v13, s0 ; 7E1A5400 v_mad_f32 v13, s3, v13, 0.5 ; D282000D 03C21A03 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v0, v13 ; 101C1B00 v_cmp_ge_f32_e64 s[34:35], v14, 0 ; D00C0022 0001010E v_cndmask_b32_e64 v14, 0, -1, s[34:35] ; D200000E 00898280 v_cmp_ne_i32_e64 s[34:35], v14, 0 ; D10A0022 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[34:35] ; D200000E 108A1B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v15, v0 ; 101E010F v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v12, s7, v3, -v12 ; D282000C 84320607 v_add_f32_e32 v12, v14, v12 ; 0618190E v_mul_f32_e32 v14, s0, v12 ; 101C1800 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v12, v12, s0, -v14 ; D282000C 8438010C v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_rcp_f32_e32 v14, s3 ; 7E1C5403 v_mul_f32_e32 v8, v14, v12 ; 1010190E v_rcp_f32_e32 v12, v13 ; 7E18550D v_mul_f32_e32 v13, v0, v12 ; 101A1900 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v0, v12, v0, -v13 ; D2820000 8436010C v_mad_f32 v1, s6, v4, -v1 ; D2820001 84060806 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v1, s0, v0 ; 10020000 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s0, -v1 ; D2820000 84040100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s2 ; 7E025402 v_mul_f32_e32 v9, v1, v0 ; 10120101 image_sample_l v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[24:31], s[12:15] ; F0900F00 00660C08 v_mov_b32_e32 v10, v5 ; 7E140305 v_mov_b32_e32 v9, v6 ; 7E120306 v_mov_b32_e32 v8, v7 ; 7E100307 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[32:33] ; 88FE207E s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 v_mul_f32_e32 v1, v2, v15 ; 10021F02 v_cvt_pkrtz_f16_f32_e32 v1, v10, v1 ; 5E02030A exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL CONST[0..24] DCL TEMP[0..3], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: F2I TEMP[0].x, IN[3].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+4].zwzw 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xy, IN[0], CONST[ADDR[0].x+4], TEMP[1] 6: UARL ADDR[0].x, TEMP[0].xxxx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[2], CONST[ADDR[0].x+13].zwzw 9: UARL ADDR[0].x, TEMP[0].xxxx 10: MAD TEMP[2].xy, IN[1], CONST[ADDR[0].x+13], TEMP[2] 11: MOV TEMP[2].xy, TEMP[2].xyxx 12: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 13: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[3] 14: ADD TEMP[0], TEMP[0], CONST[3] 15: ADD TEMP[1].x, CONST[24].xxxx, CONST[24].yyyy 16: MUL TEMP[1], TEMP[0], TEMP[1].xxxx 17: MUL TEMP[0], CONST[22], IN[2] 18: MUL TEMP[3].w, TEMP[0].wwww, CONST[23].wwww 19: MOV TEMP[3].w, TEMP[3].wwww 20: MOV TEMP[3].xyz, TEMP[0].xyzx 21: MOV TEMP[0].xy, IN[3].yzyy 22: MOV TEMP[2].zw, IMM[0].yyxy 23: MOV TEMP[0].zw, IMM[0].yyxy 24: MOV OUT[1], TEMP[3] 25: MOV OUT[2], TEMP[2] 26: MOV OUT[0], TEMP[1] 27: MOV OUT[3], TEMP[0] 28: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 352) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 356) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 360) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 364) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 380) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 384) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 388) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = extractelement <4 x float> %47, i32 2 %51 = extractelement <4 x float> %47, i32 3 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = extractelement <4 x float> %55, i32 2 %59 = fptosi float %56 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 72 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 76 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = bitcast float %60 to i32 %69 = shl i32 %68, 4 %70 = add i32 %69, 64 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %36, %71 %73 = fadd float %72, %64 %74 = shl i32 %68, 4 %75 = add i32 %74, 68 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %37, %76 %78 = fadd float %77, %67 %79 = bitcast float %60 to i32 %80 = shl i32 %79, 4 %81 = add i32 %80, 216 %82 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %81) %83 = shl i32 %79, 4 %84 = add i32 %83, 220 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = bitcast float %60 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 208 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %42, %89 %91 = fadd float %90, %82 %92 = shl i32 %86, 4 %93 = add i32 %92, 212 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fmul float %43, %94 %96 = fadd float %95, %85 %97 = fmul float %78, %17 %98 = fmul float %78, %18 %99 = fmul float %78, %19 %100 = fmul float %78, %20 %101 = fmul float %73, %13 %102 = fadd float %101, %97 %103 = fmul float %73, %14 %104 = fadd float %103, %98 %105 = fmul float %73, %15 %106 = fadd float %105, %99 %107 = fmul float %73, %16 %108 = fadd float %107, %100 %109 = fadd float %102, %21 %110 = fadd float %104, %22 %111 = fadd float %106, %23 %112 = fadd float %108, %24 %113 = fadd float %30, %31 %114 = fmul float %109, %113 %115 = fmul float %110, %113 %116 = fmul float %111, %113 %117 = fmul float %112, %113 %118 = fmul float %25, %48 %119 = fmul float %26, %49 %120 = fmul float %27, %50 %121 = fmul float %28, %51 %122 = fmul float %121, %29 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %118, float %119, float %120, float %122) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %91, float %96, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %57, float %58, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %114, float %115, float %116, float %117) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[8:11], s[8:9], 0xc ; C084090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[16:19], 0 idxen ; E00C2000 80040100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s16, s[0:3], 0x5a ; C208015A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s16, v3 ; 100A0610 s_buffer_load_dword s16, s[0:3], 0x59 ; C2080159 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s16, v2 ; 100C0410 s_buffer_load_dword s16, s[0:3], 0x58 ; C2080158 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s16, v1 ; 100E0210 s_buffer_load_dword s16, s[0:3], 0x5b ; C208015B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v4 ; 10020810 s_buffer_load_dword s16, s[0:3], 0x5f ; C208015F s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s16, v1 ; 10020210 exp 15, 32, 0, 0, 0, v7, v6, v5, v1 ; F800020F 01050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v5, v1 ; 7E0A1101 v_lshlrev_b32_e32 v5, 4, v5 ; 340A0A84 v_add_i32_e32 v6, 0xdc, v5 ; 4A0C0AFF 000000DC buffer_load_dword v6, v6, s[0:3], 0 offen ; E0301000 80000606 v_add_i32_e32 v7, 0xd4, v5 ; 4A0E0AFF 000000D4 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 buffer_load_format_xyzw v[8:11], v0, s[12:15], 0 idxen ; E00C2000 80030800 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, v9, v7, v6 ; D2820006 041A0F09 v_add_i32_e32 v7, 0xd8, v5 ; 4A0E0AFF 000000D8 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 v_add_i32_e32 v12, 0xd0, v5 ; 4A180AFF 000000D0 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v8, v12, v7 ; D2820007 041E1908 v_mov_b32_e32 v8, 1.0 ; 7E1002F2 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 33, 0, 0, 0, v7, v6, v9, v8 ; F800021F 08090607 exp 15, 34, 0, 0, 0, v2, v3, v9, v8 ; F800022F 08090302 s_waitcnt expcnt(0) ; BF8C070F v_add_i32_e32 v1, 0x48, v5 ; 4A020AFF 00000048 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_add_i32_e32 v2, 64, v5 ; 4A040AC0 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 buffer_load_format_xyzw v[6:9], v0, s[4:7], 0 idxen ; E00C2000 80010600 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v2, v1 ; D2820000 04060506 v_add_i32_e32 v1, 0x4c, v5 ; 4A020AFF 0000004C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_add_i32_e32 v2, 0x44, v5 ; 4A040AFF 00000044 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v1 ; 10040204 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x61 ; C2020161 s_buffer_load_dword s5, s[0:3], 0x60 ; C2028160 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_add_f32_e32 v3, s5, v3 ; 06060605 v_mul_f32_e32 v2, v3, v2 ; 10040503 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 v_mul_f32_e32 v4, v3, v4 ; 10080903 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v3, v0 ; 10000103 exp 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL IN[3], GENERIC[11], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[4] DCL CONST[0..1] DCL TEMP[0] DCL TEMP[1..5], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[4].xxxx, CONST[4].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[1].xyxx, CONST[1].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[4].x, TEMP[5].xxxx, TEMP[4].yyyy 8: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy 9: KILL_IF -TEMP[4].xxxx 10: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 11: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 12: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 13: OR TEMP[4].x, TEMP[5].xxxx, TEMP[4].yyyy 14: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy 15: KILL_IF -TEMP[4].xxxx 16: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 17: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 18: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 19: OR TEMP[2].x, TEMP[4].xxxx, TEMP[2].yyyy 20: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 21: KILL_IF -TEMP[2].xxxx 22: ADD TEMP[3].xyz, CONST[0].wwww, -TEMP[1].yyyy 23: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 24: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 25: OR TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy 26: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 27: KILL_IF -TEMP[1].xxxx 28: MOV TEMP[1].xy, IN[2].xyyy 29: TEX TEMP[1], TEMP[1], SAMP[1], 2D 30: MUL TEMP[3], TEMP[1], IN[3].yyyy 31: MOV TEMP[1].xy, IN[2].xyyy 32: TEX TEMP[1], TEMP[1], SAMP[0], 2D 33: MAD TEMP[3], IN[3].xxxx, TEMP[1], TEMP[3] 34: MUL TEMP[1], TEMP[3], IN[1] 35: MOV OUT[0], TEMP[1] 36: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %43 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %44 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %45 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %46 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %50 = fmul float %15, %32 %51 = fadd float %50, %33 %52 = fmul float %14, %28 %53 = fadd float %52, %30 %54 = fmul float %51, %29 %55 = fadd float %54, %31 %56 = fsub float -0.000000e+00, %24 %57 = fsub float -0.000000e+00, %26 %58 = fadd float %56, %53 %59 = fadd float %56, %53 %60 = fadd float %56, %53 %61 = fcmp olt float %58, 0.000000e+00 %62 = sext i1 %61 to i32 %63 = fcmp olt float %59, 0.000000e+00 %64 = sext i1 %63 to i32 %65 = fcmp olt float %60, 0.000000e+00 %66 = sext i1 %65 to i32 %67 = bitcast i32 %62 to float %68 = bitcast i32 %64 to float %69 = bitcast i32 %66 to float %70 = bitcast float %67 to i32 %71 = bitcast float %69 to i32 %72 = or i32 %70, %71 %73 = bitcast i32 %72 to float %74 = bitcast float %73 to i32 %75 = bitcast float %68 to i32 %76 = or i32 %74, %75 %77 = bitcast i32 %76 to float %78 = bitcast float %77 to i32 %79 = and i32 %78, 1065353216 %80 = bitcast i32 %79 to float %81 = fsub float -0.000000e+00, %80 %82 = fsub float -0.000000e+00, %80 %83 = fsub float -0.000000e+00, %80 %84 = fsub float -0.000000e+00, %80 call void @llvm.AMDGPU.kill(float %81) call void @llvm.AMDGPU.kill(float %82) call void @llvm.AMDGPU.kill(float %83) call void @llvm.AMDGPU.kill(float %84) %85 = fsub float -0.000000e+00, %53 %86 = fadd float %25, %85 %87 = fsub float -0.000000e+00, %53 %88 = fadd float %25, %87 %89 = fsub float -0.000000e+00, %53 %90 = fadd float %25, %89 %91 = fcmp olt float %86, 0.000000e+00 %92 = sext i1 %91 to i32 %93 = fcmp olt float %88, 0.000000e+00 %94 = sext i1 %93 to i32 %95 = fcmp olt float %90, 0.000000e+00 %96 = sext i1 %95 to i32 %97 = bitcast i32 %92 to float %98 = bitcast i32 %94 to float %99 = bitcast i32 %96 to float %100 = bitcast float %97 to i32 %101 = bitcast float %99 to i32 %102 = or i32 %100, %101 %103 = bitcast i32 %102 to float %104 = bitcast float %103 to i32 %105 = bitcast float %98 to i32 %106 = or i32 %104, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = and i32 %108, 1065353216 %110 = bitcast i32 %109 to float %111 = fsub float -0.000000e+00, %110 %112 = fsub float -0.000000e+00, %110 %113 = fsub float -0.000000e+00, %110 %114 = fsub float -0.000000e+00, %110 call void @llvm.AMDGPU.kill(float %111) call void @llvm.AMDGPU.kill(float %112) call void @llvm.AMDGPU.kill(float %113) call void @llvm.AMDGPU.kill(float %114) %115 = fadd float %57, %55 %116 = fadd float %57, %55 %117 = fadd float %57, %55 %118 = fcmp olt float %115, 0.000000e+00 %119 = sext i1 %118 to i32 %120 = fcmp olt float %116, 0.000000e+00 %121 = sext i1 %120 to i32 %122 = fcmp olt float %117, 0.000000e+00 %123 = sext i1 %122 to i32 %124 = bitcast i32 %119 to float %125 = bitcast i32 %121 to float %126 = bitcast i32 %123 to float %127 = bitcast float %124 to i32 %128 = bitcast float %126 to i32 %129 = or i32 %127, %128 %130 = bitcast i32 %129 to float %131 = bitcast float %130 to i32 %132 = bitcast float %125 to i32 %133 = or i32 %131, %132 %134 = bitcast i32 %133 to float %135 = bitcast float %134 to i32 %136 = and i32 %135, 1065353216 %137 = bitcast i32 %136 to float %138 = fsub float -0.000000e+00, %137 %139 = fsub float -0.000000e+00, %137 %140 = fsub float -0.000000e+00, %137 %141 = fsub float -0.000000e+00, %137 call void @llvm.AMDGPU.kill(float %138) call void @llvm.AMDGPU.kill(float %139) call void @llvm.AMDGPU.kill(float %140) call void @llvm.AMDGPU.kill(float %141) %142 = fsub float -0.000000e+00, %55 %143 = fadd float %27, %142 %144 = fsub float -0.000000e+00, %55 %145 = fadd float %27, %144 %146 = fsub float -0.000000e+00, %55 %147 = fadd float %27, %146 %148 = fcmp olt float %143, 0.000000e+00 %149 = sext i1 %148 to i32 %150 = fcmp olt float %145, 0.000000e+00 %151 = sext i1 %150 to i32 %152 = fcmp olt float %147, 0.000000e+00 %153 = sext i1 %152 to i32 %154 = bitcast i32 %149 to float %155 = bitcast i32 %151 to float %156 = bitcast i32 %153 to float %157 = bitcast float %154 to i32 %158 = bitcast float %156 to i32 %159 = or i32 %157, %158 %160 = bitcast i32 %159 to float %161 = bitcast float %160 to i32 %162 = bitcast float %155 to i32 %163 = or i32 %161, %162 %164 = bitcast i32 %163 to float %165 = bitcast float %164 to i32 %166 = and i32 %165, 1065353216 %167 = bitcast i32 %166 to float %168 = fsub float -0.000000e+00, %167 %169 = fsub float -0.000000e+00, %167 %170 = fsub float -0.000000e+00, %167 %171 = fsub float -0.000000e+00, %167 call void @llvm.AMDGPU.kill(float %168) call void @llvm.AMDGPU.kill(float %169) call void @llvm.AMDGPU.kill(float %170) call void @llvm.AMDGPU.kill(float %171) %172 = bitcast float %46 to i32 %173 = bitcast float %47 to i32 %174 = insertelement <2 x i32> undef, i32 %172, i32 0 %175 = insertelement <2 x i32> %174, i32 %173, i32 1 %176 = bitcast <8 x i32> %39 to <32 x i8> %177 = bitcast <4 x i32> %41 to <16 x i8> %178 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %175, <32 x i8> %176, <16 x i8> %177, i32 2) %179 = extractelement <4 x float> %178, i32 0 %180 = extractelement <4 x float> %178, i32 1 %181 = extractelement <4 x float> %178, i32 2 %182 = extractelement <4 x float> %178, i32 3 %183 = fmul float %179, %49 %184 = fmul float %180, %49 %185 = fmul float %181, %49 %186 = fmul float %182, %49 %187 = bitcast float %46 to i32 %188 = bitcast float %47 to i32 %189 = insertelement <2 x i32> undef, i32 %187, i32 0 %190 = insertelement <2 x i32> %189, i32 %188, i32 1 %191 = bitcast <8 x i32> %35 to <32 x i8> %192 = bitcast <4 x i32> %37 to <16 x i8> %193 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %190, <32 x i8> %191, <16 x i8> %192, i32 2) %194 = extractelement <4 x float> %193, i32 0 %195 = extractelement <4 x float> %193, i32 1 %196 = extractelement <4 x float> %193, i32 2 %197 = extractelement <4 x float> %193, i32 3 %198 = fmul float %48, %194 %199 = fadd float %198, %183 %200 = fmul float %48, %195 %201 = fadd float %200, %184 %202 = fmul float %48, %196 %203 = fadd float %202, %185 %204 = fmul float %48, %197 %205 = fadd float %204, %186 %206 = fmul float %199, %42 %207 = fmul float %201, %43 %208 = fmul float %203, %44 %209 = fmul float %205, %45 %210 = call i32 @llvm.SI.packf16(float %206, float %207) %211 = bitcast i32 %210 to float %212 = call i32 @llvm.SI.packf16(float %208, float %209) %213 = bitcast i32 %212 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %211, float %213, float %211, float %213) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x6 ; C2040106 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s8 ; 7E080208 v_mad_f32 v2, s10, v2, v4 ; D2820002 0412040A s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v4, s8, v2 ; 0A080408 v_cmp_lt_f32_e64 s[10:11], v4, 0 ; D002000A 00010104 v_cndmask_b32_e64 v4, 0, -1, s[10:11] ; D2000004 00298280 v_and_b32_e32 v4, 1.0, v4 ; 360808F2 v_mov_b32_e32 v5, 0x80000000 ; 7E0A02FF 80000000 v_xor_b32_e32 v4, v4, v5 ; 3A080B04 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v2, s8, v2 ; 08040408 v_cmp_lt_f32_e64 s[10:11], v2, 0 ; D002000A 00010102 v_cndmask_b32_e64 v2, 0, -1, s[10:11] ; D2000002 00298280 v_and_b32_e32 v2, 1.0, v2 ; 360404F2 v_xor_b32_e32 v2, v2, v5 ; 3A040B02 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s10, s[0:3], 0x10 ; C2050110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s8 ; 7E040208 v_mad_f32 v2, s10, v3, v2 ; D2820002 040A060A s_buffer_load_dword s8, s[0:3], 0x7 ; C2040107 s_buffer_load_dword s10, s[0:3], 0x5 ; C2050105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v2, s10, v2, v3 ; D2820002 040E040A s_buffer_load_dword s8, s[0:3], 0x2 ; C2040102 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s8, v2 ; 0A060408 v_cmp_lt_f32_e64 s[10:11], v3, 0 ; D002000A 00010103 v_cndmask_b32_e64 v3, 0, -1, s[10:11] ; D2000803 00298280 v_and_b32_e32 v3, 1.0, v3 ; 360606F2 v_xor_b32_e32 v3, v3, v5 ; 3A060B03 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 s_buffer_load_dword s0, s[0:3], 0x3 ; C2000103 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v2, s0, v2 ; 08040400 v_cmp_lt_f32_e64 s[0:1], v2, 0 ; D0020000 00010102 v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; D2000002 00018280 v_and_b32_e32 v2, 1.0, v2 ; 360404F2 v_xor_b32_e32 v2, v2, v5 ; 3A040B02 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800F00 00450402 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v8, v7 ; 10120F08 image_sample v[10:13], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030A02 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v2, v13, v9 ; D2820003 04261B02 v_interp_p1_f32 v9, v0, 3, 0, [m0] ; C8240300 v_interp_p2_f32 v9, [v9], v1, 3, 0, [m0] ; C8250301 v_mul_f32_e32 v3, v9, v3 ; 10060709 v_mul_f32_e32 v9, v8, v6 ; 10120D08 v_mad_f32 v9, v2, v12, v9 ; D2820009 04261902 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_mul_f32_e32 v9, v14, v9 ; 1012130E v_cvt_pkrtz_f16_f32_e32 v3, v9, v3 ; 5E060709 v_mul_f32_e32 v9, v8, v5 ; 10120B08 v_mad_f32 v9, v2, v11, v9 ; D2820009 04261702 v_interp_p1_f32 v14, v0, 1, 0, [m0] ; C8380100 v_interp_p2_f32 v14, [v14], v1, 1, 0, [m0] ; C8390101 v_mul_f32_e32 v9, v14, v9 ; 1012130E v_mul_f32_e32 v4, v8, v4 ; 10080908 v_mad_f32 v2, v2, v10, v4 ; D2820002 04121502 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mul_f32_e32 v0, v4, v2 ; 10000504 v_cvt_pkrtz_f16_f32_e32 v0, v0, v9 ; 5E001300 exp 15, 0, 1, 1, 1, v0, v3, v0, v3 ; F8001C0F 03000300 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[4].zwzw, IN[0] 1: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 2: MAD TEMP[0], TEMP[0].xxxx, CONST[0], TEMP[1] 3: ADD TEMP[0], TEMP[0], CONST[3] 4: ADD TEMP[1].x, CONST[6].xxxx, CONST[6].yyyy 5: MUL TEMP[0], TEMP[0], TEMP[1].xxxx 6: MUL TEMP[1].w, CONST[5].wwww, IN[2].wwww 7: MOV TEMP[1].w, TEMP[1].wwww 8: MOV TEMP[2].xy, IN[1].xyxx 9: MOV TEMP[1].xyz, IN[2].xyzx 10: MOV TEMP[2].zw, IMM[0].yyxy 11: MOV OUT[1], TEMP[1] 12: MOV OUT[2], TEMP[2] 13: MOV OUT[0], TEMP[0] 14: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %30 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %31 = load <16 x i8> addrspace(2)* %30, !tbaa !0 %32 = add i32 %5, %7 %33 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %31, i32 0, i32 %32) %34 = extractelement <4 x float> %33, i32 0 %35 = extractelement <4 x float> %33, i32 1 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %43 = load <16 x i8> addrspace(2)* %42, !tbaa !0 %44 = add i32 %5, %7 %45 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %43, i32 0, i32 %44) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = extractelement <4 x float> %45, i32 3 %50 = fadd float %25, %34 %51 = fadd float %26, %35 %52 = fmul float %51, %17 %53 = fmul float %51, %18 %54 = fmul float %51, %19 %55 = fmul float %51, %20 %56 = fmul float %50, %13 %57 = fadd float %56, %52 %58 = fmul float %50, %14 %59 = fadd float %58, %53 %60 = fmul float %50, %15 %61 = fadd float %60, %54 %62 = fmul float %50, %16 %63 = fadd float %62, %55 %64 = fadd float %57, %21 %65 = fadd float %59, %22 %66 = fadd float %61, %23 %67 = fadd float %63, %24 %68 = fadd float %28, %29 %69 = fmul float %64, %68 %70 = fmul float %65, %68 %71 = fmul float %66, %68 %72 = fmul float %67, %68 %73 = fmul float %27, %49 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %73) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %40, float %41, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %69, float %70, float %71, float %72) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v4 ; 100A0808 exp 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v4, s4, v0 ; 06080004 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v1 ; 06000204 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_add_f32_e32 v2, s5, v2 ; 06040405 v_mul_f32_e32 v1, v2, v1 ; 10020302 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mul_f32_e32 v3, v2, v3 ; 10060702 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v0 ; 100A0004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v4, s4, v5 ; D2820005 04140904 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_mul_f32_e32 v5, v2, v5 ; 100A0B02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 v_mul_f32_e32 v0, v2, v0 ; 10000102 exp 15, 12, 0, 1, 0, v0, v5, v3, v1 ; F80008CF 01030500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL CONST[9] DCL CONST[0..6] DCL TEMP[0] DCL TEMP[1..9], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.5000, 1024.0000} IMM[1] FLT32 {65280.0000, 255.0000, 0.0000, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[9].xxxx, CONST[9].yyyy 2: MAD TEMP[1].xy, TEMP[0], CONST[6].xyxx, CONST[6].zwzz 3: MOV TEMP[2].xz, -CONST[0] 4: ADD TEMP[3].xyz, TEMP[2].xxxx, TEMP[1].xxxx 5: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 6: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 7: OR TEMP[4].x, TEMP[5].xxxx, TEMP[4].yyyy 8: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy 9: KILL_IF -TEMP[4].xxxx 10: ADD TEMP[3].xyz, CONST[0].yyyy, -TEMP[1].xxxx 11: FSLT TEMP[4].xyz, TEMP[3].xyzz, IMM[0].xxxx 12: OR TEMP[5].x, TEMP[4].xxxx, TEMP[4].zzzz 13: OR TEMP[4].x, TEMP[5].xxxx, TEMP[4].yyyy 14: AND TEMP[4].x, TEMP[4].xxxx, IMM[0].yyyy 15: KILL_IF -TEMP[4].xxxx 16: ADD TEMP[3].xyz, TEMP[2].zzzz, TEMP[1].yyyy 17: FSLT TEMP[2].xyz, TEMP[3].xyzz, IMM[0].xxxx 18: OR TEMP[4].x, TEMP[2].xxxx, TEMP[2].zzzz 19: OR TEMP[2].x, TEMP[4].xxxx, TEMP[2].yyyy 20: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].yyyy 21: KILL_IF -TEMP[2].xxxx 22: ADD TEMP[3], CONST[0].wwww, -TEMP[1].yyyy 23: FSLT TEMP[1].xyz, TEMP[3].xyzz, IMM[0].xxxx 24: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 25: OR TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy 26: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].yyyy 27: KILL_IF -TEMP[1].xxxx 28: MOV TEMP[3].y, IMM[0].zzzz 29: ADD TEMP[1].x, IMM[0].wwww, -CONST[3].xxxx 30: MOV TEMP[3].x, TEMP[1].xxxx 31: FSNE TEMP[2].x, CONST[1].xxxx, -CONST[1].xxxx 32: UIF TEMP[2].xxxx :2 33: FSLT TEMP[2].x, IMM[0].zzzz, CONST[2].xxxx 34: UIF TEMP[2].xxxx :2 35: MUL TEMP[2].zw, CONST[4].xyxy, IN[2].xyxy 36: MOV TEMP[3].zw, TEMP[2].wwzw 37: FRC TEMP[2].xy, TEMP[2].zwzw 38: ADD TEMP[4].zw, TEMP[3], -TEMP[2].xyxy 39: MOV TEMP[3].zw, TEMP[4].wwzw 40: MAD TEMP[4].xy, IN[2], CONST[4], -TEMP[4].zwzw 41: ADD TEMP[5].zw, TEMP[3], IMM[0].zzzz 42: RCP TEMP[6].x, CONST[4].xxxx 43: RCP TEMP[7].x, CONST[4].yyyy 44: MOV TEMP[6].y, TEMP[7].xxxx 45: MUL TEMP[5].xy, TEMP[5].zwzw, TEMP[6] 46: MOV TEMP[5].xy, TEMP[5].xyyy 47: MOV TEMP[5].w, IMM[0].xxxx 48: TXL TEMP[5], TEMP[5], SAMP[0], 2D 49: MOV TEMP[6].zw, TEMP[5] 50: DP2 TEMP[7].x, TEMP[5].wxxx, IMM[1].xyyy 51: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].zzzz 52: FRC TEMP[8].w, TEMP[7].xxxx 53: ADD TEMP[7].z, -TEMP[8].wwww, TEMP[7].xxxx 54: MOV TEMP[3].z, TEMP[7].zzzz 55: MAD TEMP[5].w, CONST[3].xxxx, TEMP[5].xxxx, IMM[0].zzzz 56: FRC TEMP[7].z, TEMP[5].wwww 57: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[7].zzzz 58: MOV TEMP[3].w, TEMP[5].wwww 59: ADD TEMP[5].zw, TEMP[3], IMM[0].zzzz 60: FSGE TEMP[7].x, TEMP[1].xxxx, IMM[0].xxxx 61: UIF TEMP[7].xxxx :2 62: MOV TEMP[7].x, TEMP[5].wwww 63: ELSE :2 64: MOV TEMP[7].x, TEMP[5].zzzz 65: ENDIF 66: RCP TEMP[5].x, CONST[2].xxxx 67: MAD TEMP[5].w, CONST[5].xxxx, TEMP[5].xxxx, IMM[0].zzzz 68: FRC TEMP[8].z, TEMP[5].wwww 69: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[8].zzzz 70: MUL TEMP[8].z, TEMP[5].wwww, TEMP[7].xxxx 71: MOV TEMP[9].x, -TEMP[5].wwww 72: FSGE TEMP[8].x, TEMP[8].zzzz, IMM[0].xxxx 73: UIF TEMP[8].xxxx :2 74: MOV TEMP[8].x, TEMP[5].wwww 75: ELSE :2 76: MOV TEMP[8].x, TEMP[9].xxxx 77: ENDIF 78: MOV TEMP[2].z, TEMP[8].xxxx 79: RCP TEMP[9].x, TEMP[8].xxxx 80: MUL TEMP[9].w, TEMP[7].xxxx, TEMP[9].xxxx 81: FRC TEMP[9].w, TEMP[9].wwww 82: MOV TEMP[2].w, TEMP[9].wwww 83: MUL TEMP[8].x, TEMP[9].wwww, TEMP[8].xxxx 84: MOV TEMP[6].x, TEMP[8].xxxx 85: RCP TEMP[5].x, TEMP[5].wwww 86: MUL TEMP[5].y, TEMP[5].xxxx, TEMP[7].xxxx 87: MOV TEMP[6].y, TEMP[5].yyyy 88: FRC TEMP[5].zw, TEMP[6].xyxy 89: MOV TEMP[3].zw, TEMP[5].wwzw 90: ADD TEMP[5].zw, -TEMP[3], TEMP[6].xyxy 91: MOV TEMP[3].zw, TEMP[5].wwzw 92: ADD TEMP[4].zw, TEMP[4].xyxy, TEMP[3] 93: MOV TEMP[3].zw, TEMP[4].wwzw 94: MUL TEMP[4].zw, TEMP[3], CONST[2].xxxx 95: MOV TEMP[3].zw, TEMP[4].wwzw 96: FRC TEMP[4].xy, TEMP[4].zwzw 97: ADD TEMP[4].zw, TEMP[3], -TEMP[4].xyxy 98: MOV TEMP[3].zw, TEMP[4].wwzw 99: ADD TEMP[4].zw, TEMP[3], IMM[0].zzzz 100: MOV TEMP[3].w, TEMP[4].wwzw 101: RCP TEMP[2].x, CONST[5].xxxx 102: RCP TEMP[5].x, CONST[5].yyyy 103: MOV TEMP[2].y, TEMP[5].xxxx 104: MUL TEMP[4].xy, TEMP[4].zwzw, TEMP[2] 105: MOV TEMP[4].xy, TEMP[4].xyyy 106: MOV TEMP[4].w, IMM[0].xxxx 107: TXL TEMP[4], TEMP[4], SAMP[1], 2D 108: MOV TEMP[2], TEMP[4] 109: MOV TEMP[3].z, TEMP[4].wwww 110: ELSE :2 111: MOV TEMP[4].xy, IN[2].xyyy 112: TEX TEMP[4], TEMP[4], SAMP[1], 2D 113: MOV TEMP[2], TEMP[4] 114: MOV TEMP[3].z, TEMP[4].wwww 115: ENDIF 116: MUL TEMP[4].w, TEMP[3].zzzz, IN[1].wwww 117: MOV TEMP[4].w, TEMP[4].wwww 118: MOV TEMP[4].xyz, IN[1].xyzx 119: ELSE :2 120: FSLT TEMP[5].x, IMM[0].zzzz, CONST[2].xxxx 121: UIF TEMP[5].xxxx :2 122: MUL TEMP[5].zw, CONST[4].xyxy, IN[2].xyxy 123: MOV TEMP[3].zw, TEMP[5].wwzw 124: FRC TEMP[5].xy, TEMP[5].zwzw 125: MOV TEMP[2].xy, TEMP[5].xyxx 126: ADD TEMP[5].zw, TEMP[3], -TEMP[5].xyxy 127: MOV TEMP[3].zw, TEMP[5].wwzw 128: MAD TEMP[5].xy, IN[2], CONST[4], -TEMP[5].zwzw 129: MOV TEMP[2].xy, TEMP[5].xyxx 130: ADD TEMP[5].zw, TEMP[3], IMM[0].zzzz 131: RCP TEMP[6].x, CONST[4].xxxx 132: RCP TEMP[7].x, CONST[4].yyyy 133: MOV TEMP[6].y, TEMP[7].xxxx 134: MUL TEMP[5].xy, TEMP[5].zwzw, TEMP[6] 135: MOV TEMP[5].xy, TEMP[5].xyyy 136: MOV TEMP[5].w, IMM[0].xxxx 137: TXL TEMP[5], TEMP[5], SAMP[0], 2D 138: MOV TEMP[6].zw, TEMP[5] 139: DP2 TEMP[7].x, TEMP[5].wxxx, IMM[1].xyyy 140: ADD TEMP[7].x, TEMP[7].xxxx, IMM[0].zzzz 141: FRC TEMP[8].w, TEMP[7].xxxx 142: ADD TEMP[7].z, -TEMP[8].wwww, TEMP[7].xxxx 143: MOV TEMP[3].z, TEMP[7].zzzz 144: MAD TEMP[5].w, CONST[3].xxxx, TEMP[5].xxxx, IMM[0].zzzz 145: FRC TEMP[7].z, TEMP[5].wwww 146: MOV TEMP[2].z, TEMP[7].zzzz 147: ADD TEMP[5].w, TEMP[5].wwww, -TEMP[7].zzzz 148: MOV TEMP[3].w, TEMP[5].wwww 149: ADD TEMP[5].zw, TEMP[3], IMM[0].zzzz 150: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 151: UIF TEMP[1].xxxx :2 152: MOV TEMP[1].x, TEMP[5].wwww 153: ELSE :2 154: MOV TEMP[1].x, TEMP[5].zzzz 155: ENDIF 156: MOV TEMP[3].x, TEMP[1].xxxx 157: RCP TEMP[5].x, CONST[2].xxxx 158: MAD TEMP[5].y, CONST[5].xxxx, TEMP[5].xxxx, IMM[0].zzzz 159: FRC TEMP[7].z, TEMP[5].yyyy 160: ADD TEMP[5].y, -TEMP[7].zzzz, TEMP[5].yyyy 161: MUL TEMP[7].z, TEMP[5].yyyy, TEMP[1].xxxx 162: MOV TEMP[8].x, -TEMP[5].yyyy 163: FSGE TEMP[7].x, TEMP[7].zzzz, IMM[0].xxxx 164: UIF TEMP[7].xxxx :2 165: MOV TEMP[7].x, TEMP[5].yyyy 166: ELSE :2 167: MOV TEMP[7].x, TEMP[8].xxxx 168: ENDIF 169: MOV TEMP[3].z, TEMP[7].xxxx 170: RCP TEMP[7].x, TEMP[7].xxxx 171: MUL TEMP[1].w, TEMP[7].xxxx, TEMP[1].xxxx 172: FRC TEMP[1].w, TEMP[1].wwww 173: MOV TEMP[3].w, TEMP[1].wwww 174: RCP TEMP[1].x, TEMP[5].yyyy 175: MOV TEMP[3].y, TEMP[1].xxxx 176: MUL TEMP[1].xy, TEMP[3].wyzw, TEMP[3].zxzw 177: MOV TEMP[6].xy, TEMP[1].xyxx 178: FRC TEMP[1].xy, TEMP[6] 179: MOV TEMP[3].xy, TEMP[1].xyxx 180: ADD TEMP[1].xy, -TEMP[3], TEMP[6] 181: MOV TEMP[3].xy, TEMP[1].xyxx 182: ADD TEMP[1].xy, TEMP[2], TEMP[3] 183: MOV TEMP[3].xy, TEMP[1].xyxx 184: MUL TEMP[1].xy, TEMP[3], CONST[2].xxxx 185: MOV TEMP[3].xy, TEMP[1].xyxx 186: FRC TEMP[1].zw, TEMP[1].xyxy 187: MOV TEMP[3].zw, TEMP[1].wwzw 188: ADD TEMP[1].xy, -TEMP[1].zwzw, TEMP[3] 189: MOV TEMP[3].xy, TEMP[1].xyxx 190: ADD TEMP[1].xy, TEMP[3], IMM[0].zzzz 191: MOV TEMP[3].xy, TEMP[1].xyxx 192: RCP TEMP[2].x, CONST[5].xxxx 193: RCP TEMP[1].x, CONST[5].yyyy 194: MOV TEMP[2].y, TEMP[1].xxxx 195: MUL TEMP[1].xy, TEMP[3], TEMP[2] 196: MOV TEMP[1].xy, TEMP[1].xyyy 197: MOV TEMP[1].w, IMM[0].xxxx 198: TXL TEMP[1], TEMP[1], SAMP[1], 2D 199: MOV TEMP[3].xyz, TEMP[1] 200: MOV TEMP[2].x, TEMP[1].wwww 201: ELSE :2 202: MOV TEMP[1].xy, IN[2].xyyy 203: TEX TEMP[1], TEMP[1], SAMP[1], 2D 204: MOV TEMP[3].xyz, TEMP[1] 205: MOV TEMP[2].x, TEMP[1].wwww 206: ENDIF 207: MUL TEMP[1].w, TEMP[2].xxxx, IN[1].wwww 208: MOV TEMP[3].w, TEMP[1].wwww 209: MOV TEMP[4], TEMP[3] 210: ENDIF 211: MOV OUT[0], TEMP[4] 212: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %41 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %42 = load <8 x i32> addrspace(2)* %41, !tbaa !0 %43 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %44 = load <4 x i32> addrspace(2)* %43, !tbaa !0 %45 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %46 = load <8 x i32> addrspace(2)* %45, !tbaa !0 %47 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %48 = load <4 x i32> addrspace(2)* %47, !tbaa !0 %49 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %51 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %52 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %53 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %54 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %55 = fmul float %15, %39 %56 = fadd float %55, %40 %57 = fmul float %14, %35 %58 = fadd float %57, %37 %59 = fmul float %56, %36 %60 = fadd float %59, %38 %61 = fsub float -0.000000e+00, %24 %62 = fsub float -0.000000e+00, %26 %63 = fadd float %61, %58 %64 = fadd float %61, %58 %65 = fadd float %61, %58 %66 = fcmp olt float %63, 0.000000e+00 %67 = sext i1 %66 to i32 %68 = fcmp olt float %64, 0.000000e+00 %69 = sext i1 %68 to i32 %70 = fcmp olt float %65, 0.000000e+00 %71 = sext i1 %70 to i32 %72 = bitcast i32 %67 to float %73 = bitcast i32 %69 to float %74 = bitcast i32 %71 to float %75 = bitcast float %72 to i32 %76 = bitcast float %74 to i32 %77 = or i32 %75, %76 %78 = bitcast i32 %77 to float %79 = bitcast float %78 to i32 %80 = bitcast float %73 to i32 %81 = or i32 %79, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = and i32 %83, 1065353216 %85 = bitcast i32 %84 to float %86 = fsub float -0.000000e+00, %85 %87 = fsub float -0.000000e+00, %85 %88 = fsub float -0.000000e+00, %85 %89 = fsub float -0.000000e+00, %85 call void @llvm.AMDGPU.kill(float %86) call void @llvm.AMDGPU.kill(float %87) call void @llvm.AMDGPU.kill(float %88) call void @llvm.AMDGPU.kill(float %89) %90 = fsub float -0.000000e+00, %58 %91 = fadd float %25, %90 %92 = fsub float -0.000000e+00, %58 %93 = fadd float %25, %92 %94 = fsub float -0.000000e+00, %58 %95 = fadd float %25, %94 %96 = fcmp olt float %91, 0.000000e+00 %97 = sext i1 %96 to i32 %98 = fcmp olt float %93, 0.000000e+00 %99 = sext i1 %98 to i32 %100 = fcmp olt float %95, 0.000000e+00 %101 = sext i1 %100 to i32 %102 = bitcast i32 %97 to float %103 = bitcast i32 %99 to float %104 = bitcast i32 %101 to float %105 = bitcast float %102 to i32 %106 = bitcast float %104 to i32 %107 = or i32 %105, %106 %108 = bitcast i32 %107 to float %109 = bitcast float %108 to i32 %110 = bitcast float %103 to i32 %111 = or i32 %109, %110 %112 = bitcast i32 %111 to float %113 = bitcast float %112 to i32 %114 = and i32 %113, 1065353216 %115 = bitcast i32 %114 to float %116 = fsub float -0.000000e+00, %115 %117 = fsub float -0.000000e+00, %115 %118 = fsub float -0.000000e+00, %115 %119 = fsub float -0.000000e+00, %115 call void @llvm.AMDGPU.kill(float %116) call void @llvm.AMDGPU.kill(float %117) call void @llvm.AMDGPU.kill(float %118) call void @llvm.AMDGPU.kill(float %119) %120 = fadd float %62, %60 %121 = fadd float %62, %60 %122 = fadd float %62, %60 %123 = fcmp olt float %120, 0.000000e+00 %124 = sext i1 %123 to i32 %125 = fcmp olt float %121, 0.000000e+00 %126 = sext i1 %125 to i32 %127 = fcmp olt float %122, 0.000000e+00 %128 = sext i1 %127 to i32 %129 = bitcast i32 %124 to float %130 = bitcast i32 %126 to float %131 = bitcast i32 %128 to float %132 = bitcast float %129 to i32 %133 = bitcast float %131 to i32 %134 = or i32 %132, %133 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = bitcast float %130 to i32 %138 = or i32 %136, %137 %139 = bitcast i32 %138 to float %140 = bitcast float %139 to i32 %141 = and i32 %140, 1065353216 %142 = bitcast i32 %141 to float %143 = fsub float -0.000000e+00, %142 %144 = fsub float -0.000000e+00, %142 %145 = fsub float -0.000000e+00, %142 %146 = fsub float -0.000000e+00, %142 call void @llvm.AMDGPU.kill(float %143) call void @llvm.AMDGPU.kill(float %144) call void @llvm.AMDGPU.kill(float %145) call void @llvm.AMDGPU.kill(float %146) %147 = fsub float -0.000000e+00, %60 %148 = fadd float %27, %147 %149 = fsub float -0.000000e+00, %60 %150 = fadd float %27, %149 %151 = fsub float -0.000000e+00, %60 %152 = fadd float %27, %151 %153 = fcmp olt float %148, 0.000000e+00 %154 = sext i1 %153 to i32 %155 = fcmp olt float %150, 0.000000e+00 %156 = sext i1 %155 to i32 %157 = fcmp olt float %152, 0.000000e+00 %158 = sext i1 %157 to i32 %159 = bitcast i32 %154 to float %160 = bitcast i32 %156 to float %161 = bitcast i32 %158 to float %162 = bitcast float %159 to i32 %163 = bitcast float %161 to i32 %164 = or i32 %162, %163 %165 = bitcast i32 %164 to float %166 = bitcast float %165 to i32 %167 = bitcast float %160 to i32 %168 = or i32 %166, %167 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = and i32 %170, 1065353216 %172 = bitcast i32 %171 to float %173 = fsub float -0.000000e+00, %172 %174 = fsub float -0.000000e+00, %172 %175 = fsub float -0.000000e+00, %172 %176 = fsub float -0.000000e+00, %172 call void @llvm.AMDGPU.kill(float %173) call void @llvm.AMDGPU.kill(float %174) call void @llvm.AMDGPU.kill(float %175) call void @llvm.AMDGPU.kill(float %176) %177 = fsub float -0.000000e+00, %30 %178 = fadd float 1.024000e+03, %177 %179 = fsub float -0.000000e+00, %28 %180 = fcmp une float %28, %179 %181 = sext i1 %180 to i32 %182 = bitcast i32 %181 to float %183 = bitcast float %182 to i32 %184 = icmp ne i32 %183, 0 %185 = fcmp olt float 5.000000e-01, %29 %186 = sext i1 %185 to i32 %187 = bitcast i32 %186 to float %188 = bitcast float %187 to i32 %189 = icmp ne i32 %188, 0 br i1 %184, label %IF, label %ELSE IF: ; preds = %main_body br i1 %189, label %IF41, label %ELSE42 ELSE: ; preds = %main_body br i1 %189, label %IF50, label %ELSE51 ENDIF: ; preds = %IF41, %ELSE42, %ENDIF49 %.sink60.sink = phi <4 x float> [ %.sink60, %ENDIF49 ], [ %304, %ELSE42 ], [ %297, %IF41 ] %temp16.0 = phi float [ %414, %ENDIF49 ], [ %49, %ELSE42 ], [ %49, %IF41 ] %temp17.0 = phi float [ %415, %ENDIF49 ], [ %50, %ELSE42 ], [ %50, %IF41 ] %temp18.0 = phi float [ %416, %ENDIF49 ], [ %51, %ELSE42 ], [ %51, %IF41 ] %190 = extractelement <4 x float> %.sink60.sink, i32 3 %191 = fmul float %190, %52 %192 = call i32 @llvm.SI.packf16(float %temp16.0, float %temp17.0) %193 = bitcast i32 %192 to float %194 = call i32 @llvm.SI.packf16(float %temp18.0, float %191) %195 = bitcast i32 %194 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %193, float %195, float %193, float %195) ret void IF41: ; preds = %IF %196 = fmul float %31, %53 %197 = fmul float %32, %54 %198 = call float @llvm.AMDIL.fraction.(float %196) %199 = call float @llvm.AMDIL.fraction.(float %197) %200 = fsub float -0.000000e+00, %198 %201 = fadd float %196, %200 %202 = fsub float -0.000000e+00, %199 %203 = fadd float %197, %202 %204 = fsub float -0.000000e+00, %201 %205 = fmul float %53, %31 %206 = fadd float %205, %204 %207 = fsub float -0.000000e+00, %203 %208 = fmul float %54, %32 %209 = fadd float %208, %207 %210 = fadd float %201, 5.000000e-01 %211 = fadd float %203, 5.000000e-01 %212 = fdiv float 1.000000e+00, %31 %213 = fdiv float 1.000000e+00, %32 %214 = fmul float %210, %212 %215 = fmul float %211, %213 %216 = bitcast float %214 to i32 %217 = bitcast float %215 to i32 %218 = bitcast float 0.000000e+00 to i32 %219 = insertelement <4 x i32> undef, i32 %216, i32 0 %220 = insertelement <4 x i32> %219, i32 %217, i32 1 %221 = insertelement <4 x i32> %220, i32 %218, i32 2 %222 = insertelement <4 x i32> %221, i32 undef, i32 3 %223 = bitcast <8 x i32> %42 to <32 x i8> %224 = bitcast <4 x i32> %44 to <16 x i8> %225 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %222, <32 x i8> %223, <16 x i8> %224, i32 2) %226 = extractelement <4 x float> %225, i32 0 %227 = extractelement <4 x float> %225, i32 3 %228 = fmul float %227, 6.528000e+04 %229 = fmul float %226, 2.550000e+02 %230 = fadd float %228, %229 %231 = fadd float %230, 5.000000e-01 %232 = call float @llvm.AMDIL.fraction.(float %231) %233 = fsub float -0.000000e+00, %232 %234 = fadd float %233, %231 %235 = fmul float %30, %226 %236 = fadd float %235, 5.000000e-01 %237 = call float @llvm.AMDIL.fraction.(float %236) %238 = fsub float -0.000000e+00, %237 %239 = fadd float %236, %238 %240 = fadd float %234, 5.000000e-01 %241 = fadd float %239, 5.000000e-01 %242 = fcmp oge float %178, 0.000000e+00 %243 = sext i1 %242 to i32 %244 = bitcast i32 %243 to float %245 = bitcast float %244 to i32 %246 = icmp ne i32 %245, 0 %. = select i1 %246, float %241, float %240 %247 = fdiv float 1.000000e+00, %29 %248 = fmul float %33, %247 %249 = fadd float %248, 5.000000e-01 %250 = call float @llvm.AMDIL.fraction.(float %249) %251 = fsub float -0.000000e+00, %250 %252 = fadd float %249, %251 %253 = fmul float %252, %. %254 = fsub float -0.000000e+00, %252 %255 = fcmp oge float %253, 0.000000e+00 %256 = sext i1 %255 to i32 %257 = bitcast i32 %256 to float %258 = bitcast float %257 to i32 %259 = icmp ne i32 %258, 0 %temp32.0 = select i1 %259, float %252, float %254 %260 = fdiv float 1.000000e+00, %temp32.0 %261 = fmul float %., %260 %262 = call float @llvm.AMDIL.fraction.(float %261) %263 = fmul float %262, %temp32.0 %264 = fdiv float 1.000000e+00, %252 %265 = fmul float %264, %. %266 = call float @llvm.AMDIL.fraction.(float %263) %267 = call float @llvm.AMDIL.fraction.(float %265) %268 = fsub float -0.000000e+00, %266 %269 = fadd float %268, %263 %270 = fsub float -0.000000e+00, %267 %271 = fadd float %270, %265 %272 = fadd float %206, %269 %273 = fadd float %209, %271 %274 = fmul float %272, %29 %275 = fmul float %273, %29 %276 = call float @llvm.AMDIL.fraction.(float %274) %277 = call float @llvm.AMDIL.fraction.(float %275) %278 = fsub float -0.000000e+00, %276 %279 = fadd float %274, %278 %280 = fsub float -0.000000e+00, %277 %281 = fadd float %275, %280 %282 = fadd float %279, 5.000000e-01 %283 = fadd float %281, 5.000000e-01 %284 = fdiv float 1.000000e+00, %33 %285 = fdiv float 1.000000e+00, %34 %286 = fmul float %282, %284 %287 = fmul float %283, %285 %288 = bitcast float %286 to i32 %289 = bitcast float %287 to i32 %290 = bitcast float 0.000000e+00 to i32 %291 = insertelement <4 x i32> undef, i32 %288, i32 0 %292 = insertelement <4 x i32> %291, i32 %289, i32 1 %293 = insertelement <4 x i32> %292, i32 %290, i32 2 %294 = insertelement <4 x i32> %293, i32 undef, i32 3 %295 = bitcast <8 x i32> %46 to <32 x i8> %296 = bitcast <4 x i32> %48 to <16 x i8> %297 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %294, <32 x i8> %295, <16 x i8> %296, i32 2) br label %ENDIF ELSE42: ; preds = %IF %298 = bitcast float %53 to i32 %299 = bitcast float %54 to i32 %300 = insertelement <2 x i32> undef, i32 %298, i32 0 %301 = insertelement <2 x i32> %300, i32 %299, i32 1 %302 = bitcast <8 x i32> %46 to <32 x i8> %303 = bitcast <4 x i32> %48 to <16 x i8> %304 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %301, <32 x i8> %302, <16 x i8> %303, i32 2) br label %ENDIF IF50: ; preds = %ELSE %305 = fmul float %31, %53 %306 = fmul float %32, %54 %307 = call float @llvm.AMDIL.fraction.(float %305) %308 = call float @llvm.AMDIL.fraction.(float %306) %309 = fsub float -0.000000e+00, %307 %310 = fadd float %305, %309 %311 = fsub float -0.000000e+00, %308 %312 = fadd float %306, %311 %313 = fsub float -0.000000e+00, %310 %314 = fmul float %53, %31 %315 = fadd float %314, %313 %316 = fsub float -0.000000e+00, %312 %317 = fmul float %54, %32 %318 = fadd float %317, %316 %319 = fadd float %310, 5.000000e-01 %320 = fadd float %312, 5.000000e-01 %321 = fdiv float 1.000000e+00, %31 %322 = fdiv float 1.000000e+00, %32 %323 = fmul float %319, %321 %324 = fmul float %320, %322 %325 = bitcast float %323 to i32 %326 = bitcast float %324 to i32 %327 = bitcast float 0.000000e+00 to i32 %328 = insertelement <4 x i32> undef, i32 %325, i32 0 %329 = insertelement <4 x i32> %328, i32 %326, i32 1 %330 = insertelement <4 x i32> %329, i32 %327, i32 2 %331 = insertelement <4 x i32> %330, i32 undef, i32 3 %332 = bitcast <8 x i32> %42 to <32 x i8> %333 = bitcast <4 x i32> %44 to <16 x i8> %334 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %331, <32 x i8> %332, <16 x i8> %333, i32 2) %335 = extractelement <4 x float> %334, i32 0 %336 = extractelement <4 x float> %334, i32 3 %337 = fmul float %336, 6.528000e+04 %338 = fmul float %335, 2.550000e+02 %339 = fadd float %337, %338 %340 = fadd float %339, 5.000000e-01 %341 = call float @llvm.AMDIL.fraction.(float %340) %342 = fsub float -0.000000e+00, %341 %343 = fadd float %342, %340 %344 = fmul float %30, %335 %345 = fadd float %344, 5.000000e-01 %346 = call float @llvm.AMDIL.fraction.(float %345) %347 = fsub float -0.000000e+00, %346 %348 = fadd float %345, %347 %349 = fadd float %343, 5.000000e-01 %350 = fadd float %348, 5.000000e-01 %351 = fcmp oge float %178, 0.000000e+00 %352 = sext i1 %351 to i32 %353 = bitcast i32 %352 to float %354 = bitcast float %353 to i32 %355 = icmp ne i32 %354, 0 %.59 = select i1 %355, float %350, float %349 %356 = fdiv float 1.000000e+00, %29 %357 = fmul float %33, %356 %358 = fadd float %357, 5.000000e-01 %359 = call float @llvm.AMDIL.fraction.(float %358) %360 = fsub float -0.000000e+00, %359 %361 = fadd float %360, %358 %362 = fmul float %361, %.59 %363 = fsub float -0.000000e+00, %361 %364 = fcmp oge float %362, 0.000000e+00 %365 = sext i1 %364 to i32 %366 = bitcast i32 %365 to float %367 = bitcast float %366 to i32 %368 = icmp ne i32 %367, 0 %temp28.1 = select i1 %368, float %361, float %363 %369 = fdiv float 1.000000e+00, %temp28.1 %370 = fmul float %369, %.59 %371 = call float @llvm.AMDIL.fraction.(float %370) %372 = fdiv float 1.000000e+00, %361 %373 = fmul float %371, %temp28.1 %374 = fmul float %372, %.59 %375 = call float @llvm.AMDIL.fraction.(float %373) %376 = call float @llvm.AMDIL.fraction.(float %374) %377 = fsub float -0.000000e+00, %375 %378 = fadd float %377, %373 %379 = fsub float -0.000000e+00, %376 %380 = fadd float %379, %374 %381 = fadd float %315, %378 %382 = fadd float %318, %380 %383 = fmul float %381, %29 %384 = fmul float %382, %29 %385 = call float @llvm.AMDIL.fraction.(float %383) %386 = call float @llvm.AMDIL.fraction.(float %384) %387 = fsub float -0.000000e+00, %385 %388 = fadd float %387, %383 %389 = fsub float -0.000000e+00, %386 %390 = fadd float %389, %384 %391 = fadd float %388, 5.000000e-01 %392 = fadd float %390, 5.000000e-01 %393 = fdiv float 1.000000e+00, %33 %394 = fdiv float 1.000000e+00, %34 %395 = fmul float %391, %393 %396 = fmul float %392, %394 %397 = bitcast float %395 to i32 %398 = bitcast float %396 to i32 %399 = bitcast float 0.000000e+00 to i32 %400 = insertelement <4 x i32> undef, i32 %397, i32 0 %401 = insertelement <4 x i32> %400, i32 %398, i32 1 %402 = insertelement <4 x i32> %401, i32 %399, i32 2 %403 = insertelement <4 x i32> %402, i32 undef, i32 3 %404 = bitcast <8 x i32> %46 to <32 x i8> %405 = bitcast <4 x i32> %48 to <16 x i8> %406 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %403, <32 x i8> %404, <16 x i8> %405, i32 2) br label %ENDIF49 ELSE51: ; preds = %ELSE %407 = bitcast float %53 to i32 %408 = bitcast float %54 to i32 %409 = insertelement <2 x i32> undef, i32 %407, i32 0 %410 = insertelement <2 x i32> %409, i32 %408, i32 1 %411 = bitcast <8 x i32> %46 to <32 x i8> %412 = bitcast <4 x i32> %48 to <16 x i8> %413 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %410, <32 x i8> %411, <16 x i8> %412, i32 2) br label %ENDIF49 ENDIF49: ; preds = %ELSE51, %IF50 %.sink60 = phi <4 x float> [ %413, %ELSE51 ], [ %406, %IF50 ] %414 = extractelement <4 x float> %.sink60, i32 0 %415 = extractelement <4 x float> %.sink60, i32 1 %416 = extractelement <4 x float> %.sink60, i32 2 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[32:35], s[2:3], 0x0 ; C0900300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[32:35], 0x1a ; C200211A s_buffer_load_dword s1, s[32:35], 0x18 ; C200A118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s0 ; 7E080200 v_mad_f32 v2, s1, v2, v4 ; D2820002 04120401 s_buffer_load_dword s0, s[32:35], 0x0 ; C2002100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v4, s0, v2 ; 0A080400 v_cmp_lt_f32_e64 s[0:1], v4, 0 ; D0020000 00010104 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_and_b32_e32 v4, 1.0, v4 ; 360808F2 v_mov_b32_e32 v5, 0x80000000 ; 7E0A02FF 80000000 v_xor_b32_e32 v4, v4, v5 ; 3A080B04 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_buffer_load_dword s0, s[32:35], 0x1 ; C2002101 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v2, s0, v2 ; 08040400 v_cmp_lt_f32_e64 s[0:1], v2, 0 ; D0020000 00010102 v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; D2000002 00018280 v_and_b32_e32 v2, 1.0, v2 ; 360404F2 v_xor_b32_e32 v2, v2, v5 ; 3A040B02 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_buffer_load_dword s0, s[32:35], 0x25 ; C2002125 s_buffer_load_dword s1, s[32:35], 0x24 ; C200A124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s0 ; 7E040200 v_mad_f32 v2, s1, v3, v2 ; D2820002 040A0601 s_buffer_load_dword s0, s[32:35], 0x1b ; C200211B s_buffer_load_dword s1, s[32:35], 0x19 ; C200A119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s0 ; 7E060200 v_mad_f32 v2, s1, v2, v3 ; D2820002 040E0401 s_buffer_load_dword s0, s[32:35], 0x2 ; C2002102 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s0, v2 ; 0A060400 v_cmp_lt_f32_e64 s[0:1], v3, 0 ; D0020000 00010103 v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; D2000803 00018280 v_and_b32_e32 v3, 1.0, v3 ; 360606F2 v_xor_b32_e32 v3, v3, v5 ; 3A060B03 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_buffer_load_dword s0, s[32:35], 0x3 ; C2002103 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v2, s0, v2 ; 08040400 v_cmp_lt_f32_e64 s[0:1], v2, 0 ; D0020000 00010102 v_cndmask_b32_e64 v2, 0, -1, s[0:1] ; D2000002 00018280 v_and_b32_e32 v2, 1.0, v2 ; 360404F2 v_xor_b32_e32 v2, v2, v5 ; 3A040B02 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 v_cmpx_le_f32_e32 vcc, 0, v2 ; 7C260480 s_cbranch_execnz 3 ; BF890003 exp 0, 9, 0, 1, 1, v0, v0, v0, v0 ; F8001890 00000000 s_endpgm ; BF810000 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v4, v0, 1, 1, [m0] ; C8100500 v_interp_p2_f32 v4, [v4], v1, 1, 1, [m0] ; C8110501 v_interp_p1_f32 v3, v0, 0, 1, [m0] ; C80C0400 v_interp_p2_f32 v3, [v3], v1, 0, 1, [m0] ; C80D0401 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_interp_p1_f32 v7, v0, 0, 0, [m0] ; C81C0000 v_interp_p2_f32 v7, [v7], v1, 0, 0, [m0] ; C81D0001 s_buffer_load_dword s0, s[32:35], 0x4 ; C2002104 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_neq_f32_e64 s[0:1], s0, -s0 ; D01A0000 40000000 v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; D2000000 00018280 v_cmp_eq_i32_e64 s[36:37], v0, 0 ; D1040024 00010100 s_buffer_load_dword s0, s[32:35], 0x8 ; C2002108 s_waitcnt lgkmcnt(0) ; BF8C007F v_cmp_gt_f32_e64 s[2:3], s0, 0.5 ; D0080002 0001E000 v_cndmask_b32_e64 v1, 0, -1, s[2:3] ; D2000801 00098280 s_buffer_load_dword s1, s[32:35], 0xc ; C200A10C v_mov_b32_e32 v0, 0x44800000 ; 7E0002FF 44800000 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s1, v0 ; 0A000001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_load_dwordx8 s[24:31], s[6:7], 0x8 ; C0CC0708 s_buffer_load_dword s2, s[32:35], 0x15 ; C2012115 s_buffer_load_dword s3, s[32:35], 0x14 ; C201A114 s_buffer_load_dword s6, s[32:35], 0x11 ; C2032111 s_buffer_load_dword s7, s[32:35], 0x10 ; C203A110 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[4:5], s[36:37] ; BE842424 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E s_cbranch_execz BB0_11 ; BF880000 v_cmp_eq_i32_e64 s[32:33], v1, 0 ; D1040020 00010101 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[12:15] ; F0800F00 00660803 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[32:33], s[32:33] ; BEA02520 s_xor_b64 exec, exec, s[32:33] ; 89FE207E s_cbranch_execz BB0_12 ; BF880000 v_mul_f32_e32 v8, s6, v4 ; 10100806 v_fract_f32_e32 v8, v8 ; 7E104108 v_mad_f32 v8, s6, v4, -v8 ; D2820008 84220806 v_add_f32_e32 v9, 0.5, v8 ; 061210F0 v_rcp_f32_e32 v10, s6 ; 7E145406 v_mul_f32_e32 v10, v10, v9 ; 1014130A v_mul_f32_e32 v13, s7, v3 ; 101A0607 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v13, s7, v3, -v13 ; D282000D 84360607 v_add_f32_e32 v14, 0.5, v13 ; 061C1AF0 v_rcp_f32_e32 v15, s7 ; 7E1E5407 v_mul_f32_e32 v9, v15, v14 ; 10121D0F v_mov_b32_e32 v11, 0 ; 7E160280 image_sample_l v[14:15], 9, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[16:23], s[8:11] ; F0900900 00440E09 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, 0x437f0000, v14 ; 10201CFF 437F0000 v_mov_b32_e32 v17, 0x477f0000 ; 7E2202FF 477F0000 v_mad_f32 v16, v17, v15, v16 ; D2820010 04421F11 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_fract_f32_e32 v17, v16 ; 7E224110 v_subrev_f32_e32 v16, v17, v16 ; 0A202111 v_add_f32_e32 v16, 0.5, v16 ; 062020F0 v_mad_f32 v14, s1, v14, 0.5 ; D282000E 03C21C01 v_fract_f32_e32 v15, v14 ; 7E1E410E v_subrev_f32_e32 v14, v15, v14 ; 0A1C1D0F v_add_f32_e32 v14, 0.5, v14 ; 061C1CF0 v_cmp_ge_f32_e64 s[34:35], v0, 0 ; D00C0022 00010100 v_cndmask_b32_e64 v15, 0, -1, s[34:35] ; D200080F 00898280 v_cmp_ne_i32_e64 s[34:35], v15, 0 ; D10A0022 0001010F v_cndmask_b32_e64 v14, v16, v14, s[34:35] ; D200000E 008A1D10 v_rcp_f32_e32 v15, s0 ; 7E1E5400 v_mad_f32 v15, s3, v15, 0.5 ; D282000F 03C21E03 v_fract_f32_e32 v16, v15 ; 7E20410F v_subrev_f32_e32 v15, v16, v15 ; 0A1E1F10 v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_cmp_ge_f32_e64 s[34:35], v16, 0 ; D00C0022 00010110 v_cndmask_b32_e64 v16, 0, -1, s[34:35] ; D2000010 00898280 v_cmp_ne_i32_e64 s[34:35], v16, 0 ; D10A0022 00010110 v_xor_b32_e32 v16, 0x80000000, v15 ; 3A201EFF 80000000 v_cndmask_b32_e64 v16, v16, v15, s[34:35] ; D2000010 008A1F10 v_rcp_f32_e32 v17, v16 ; 7E225510 v_mul_f32_e32 v17, v14, v17 ; 1022230E v_fract_f32_e32 v17, v17 ; 7E224111 v_mul_f32_e32 v18, v16, v17 ; 10242310 v_fract_f32_e32 v18, v18 ; 7E244112 v_mad_f32 v16, v17, v16, -v18 ; D2820010 844A2111 v_mad_f32 v13, s7, v3, -v13 ; D282000D 84360607 v_add_f32_e32 v13, v16, v13 ; 061A1B10 v_mul_f32_e32 v16, s0, v13 ; 10201A00 v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v13, v13, s0, -v16 ; D282000D 8440010D v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0 v_rcp_f32_e32 v16, s3 ; 7E205403 v_mul_f32_e32 v9, v16, v13 ; 10121B10 v_rcp_f32_e32 v13, v15 ; 7E1A550F v_mul_f32_e32 v15, v14, v13 ; 101E1B0E v_fract_f32_e32 v15, v15 ; 7E1E410F v_mad_f32 v13, v13, v14, -v15 ; D282000D 843E1D0D v_mad_f32 v8, s6, v4, -v8 ; D2820008 84220806 v_add_f32_e32 v8, v13, v8 ; 0610110D v_mul_f32_e32 v13, s0, v8 ; 101A1000 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v8, v8, s0, -v13 ; D2820008 84340108 v_add_f32_e32 v8, 0.5, v8 ; 061010F0 v_rcp_f32_e32 v13, s2 ; 7E1A5402 v_mul_f32_e32 v10, v13, v8 ; 1014110D image_sample_l v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[24:31], s[12:15] ; F0900F00 00660809 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mov_b32_e32 v12, v8 ; 7E180308 v_mov_b32_e32 v13, v9 ; 7E1A0309 v_mov_b32_e32 v14, v10 ; 7E1C030A v_mov_b32_e32 v15, v11 ; 7E1E030B s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E s_cbranch_execz BB0_4 ; BF880000 v_cmp_eq_i32_e64 s[32:33], v1, 0 ; D1040020 00010101 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[24:31], s[12:15] ; F0800F00 00660C03 v_mov_b32_e32 v10, v5 ; 7E140305 v_mov_b32_e32 v9, v6 ; 7E120306 v_mov_b32_e32 v8, v7 ; 7E100307 s_waitcnt vmcnt(0) ; BF8C0770 s_or_saveexec_b64 s[32:33], s[32:33] ; BEA02520 s_waitcnt expcnt(0) ; BF8C070F s_xor_b64 exec, exec, s[32:33] ; 89FE207E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v1, s6, v4 ; 10020806 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v1, s6, v4, -v1 ; D2820001 84060806 v_add_f32_e32 v8, 0.5, v1 ; 061002F0 v_rcp_f32_e32 v9, s6 ; 7E125406 v_mul_f32_e32 v9, v9, v8 ; 10121109 v_mul_f32_e32 v12, s7, v3 ; 10180607 v_fract_f32_e32 v12, v12 ; 7E18410C v_mad_f32 v12, s7, v3, -v12 ; D282000C 84320607 v_add_f32_e32 v13, 0.5, v12 ; 061A18F0 v_rcp_f32_e32 v14, s7 ; 7E1C5407 v_mul_f32_e32 v8, v14, v13 ; 10101B0E v_mov_b32_e32 v10, 0 ; 7E140280 image_sample_l v[13:14], 9, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[16:23], s[8:11] ; F0900900 00440D08 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, 0x437f0000, v13 ; 101E1AFF 437F0000 v_mov_b32_e32 v16, 0x477f0000 ; 7E2002FF 477F0000 v_mad_f32 v15, v16, v14, v15 ; D282000F 043E1D10 v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0 v_fract_f32_e32 v16, v15 ; 7E20410F v_subrev_f32_e32 v15, v16, v15 ; 0A1E1F10 v_add_f32_e32 v15, 0.5, v15 ; 061E1EF0 v_mad_f32 v13, s1, v13, 0.5 ; D282000D 03C21A01 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_add_f32_e32 v13, 0.5, v13 ; 061A1AF0 v_cmp_ge_f32_e64 s[34:35], v0, 0 ; D00C0022 00010100 v_cndmask_b32_e64 v0, 0, -1, s[34:35] ; D2000000 00898280 v_cmp_ne_i32_e64 s[34:35], v0, 0 ; D10A0022 00010100 v_cndmask_b32_e64 v0, v15, v13, s[34:35] ; D2000000 188A1B0F v_rcp_f32_e32 v13, s0 ; 7E1A5400 v_mad_f32 v13, s3, v13, 0.5 ; D282000D 03C21A03 v_fract_f32_e32 v14, v13 ; 7E1C410D v_subrev_f32_e32 v13, v14, v13 ; 0A1A1B0E v_mul_f32_e32 v14, v0, v13 ; 101C1B00 v_cmp_ge_f32_e64 s[34:35], v14, 0 ; D00C0022 0001010E v_cndmask_b32_e64 v14, 0, -1, s[34:35] ; D200000E 00898280 v_cmp_ne_i32_e64 s[34:35], v14, 0 ; D10A0022 0001010E v_xor_b32_e32 v14, 0x80000000, v13 ; 3A1C1AFF 80000000 v_cndmask_b32_e64 v14, v14, v13, s[34:35] ; D200000E 108A1B0E v_rcp_f32_e32 v15, v14 ; 7E1E550E v_mul_f32_e32 v15, v15, v0 ; 101E010F v_fract_f32_e32 v15, v15 ; 7E1E410F v_mul_f32_e32 v16, v14, v15 ; 10201F0E v_fract_f32_e32 v16, v16 ; 7E204110 v_mad_f32 v14, v15, v14, -v16 ; D282000E 84421D0F v_mad_f32 v12, s7, v3, -v12 ; D282000C 84320607 v_add_f32_e32 v12, v14, v12 ; 0618190E v_mul_f32_e32 v14, s0, v12 ; 101C1800 v_fract_f32_e32 v14, v14 ; 7E1C410E v_mad_f32 v12, v12, s0, -v14 ; D282000C 8438010C v_add_f32_e32 v12, 0.5, v12 ; 061818F0 v_rcp_f32_e32 v14, s3 ; 7E1C5403 v_mul_f32_e32 v8, v14, v12 ; 1010190E v_rcp_f32_e32 v12, v13 ; 7E18550D v_mul_f32_e32 v13, v0, v12 ; 101A1900 v_fract_f32_e32 v13, v13 ; 7E1A410D v_mad_f32 v0, v12, v0, -v13 ; D2820000 8436010C v_mad_f32 v1, s6, v4, -v1 ; D2820001 84060806 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v1, s0, v0 ; 10020000 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v0, v0, s0, -v1 ; D2820000 84040100 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 v_rcp_f32_e32 v1, s2 ; 7E025402 v_mul_f32_e32 v9, v1, v0 ; 10120101 image_sample_l v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[8:11], s[24:31], s[12:15] ; F0900F00 00660C08 v_mov_b32_e32 v10, v5 ; 7E140305 v_mov_b32_e32 v9, v6 ; 7E120306 v_mov_b32_e32 v8, v7 ; 7E100307 s_waitcnt vmcnt(0) ; BF8C0770 s_or_b64 exec, exec, s[32:33] ; 88FE207E s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_pkrtz_f16_f32_e32 v0, v8, v9 ; 5E001308 v_mul_f32_e32 v1, v2, v15 ; 10021F02 v_cvt_pkrtz_f16_f32_e32 v1, v10, v1 ; 5E02030A exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV TEMP[1].xy, IN[1].xyxx 2: MOV TEMP[1].zw, IMM[0].xxyx 3: MOV OUT[1], TEMP[1] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 0, v0, 1.0 ; D2820004 03CA0080 v_add_f32_e32 v5, 0, v2 ; 060A0480 v_add_f32_e32 v6, 0, v1 ; 060C0280 v_add_f32_e32 v0, 0, v0 ; 06000080 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..3] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 17.0000, 3721.0000} IMM[1] FLT32 { 13.0000, 930.2500, 1860.5000, -0.5000} IMM[2] FLT32 { -0.0000, -1.0000, -6.0000, 6.0000} IMM[3] FLT32 { -2.0000, 3.0000, 0.5000, -1.0000} IMM[4] FLT32 { -1.0000, 1.0000, 0.0000, 0.5000} IMM[5] FLT32 { 1.0000, -1.0000, -2.0000, 2.0000} IMM[6] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: RCP TEMP[0].x, CONST[2].xxxx 1: MUL TEMP[0], TEMP[0].xxxx, CONST[0].yyxx 2: FRC TEMP[1], TEMP[0].yyww 3: MOV TEMP[2].w, TEMP[1].wwww 4: ADD TEMP[0], TEMP[0], -TEMP[1] 5: RCP TEMP[2].x, TEMP[0].yyyy 6: MUL TEMP[1].yz, TEMP[0].xyww, IN[0].xyxw 7: MOV TEMP[2].yz, TEMP[1].zyzz 8: FRC TEMP[1].xy, TEMP[2].zyzw 9: MOV TEMP[3].xy, TEMP[1].xyxx 10: MAD TEMP[4], IN[0].yyxx, TEMP[0], -TEMP[1].yyxx 11: RCP TEMP[0].x, TEMP[0].wwww 12: ADD TEMP[4], TEMP[4], IMM[0].xyxy 13: MUL TEMP[5], TEMP[4].xyxy, IMM[0].zzzz 14: MUL TEMP[2], TEMP[2].xxxx, TEMP[5] 15: FRC TEMP[6], TEMP[2] 16: MUL TEMP[2], TEMP[6], TEMP[6] 17: MAD TEMP[2], TEMP[2], IMM[0].wwww, TEMP[4].zzww 18: MUL TEMP[2], TEMP[2], IMM[1].xxxx 19: MUL TEMP[0], TEMP[0].xxxx, TEMP[2] 20: FRC TEMP[6], TEMP[0] 21: MUL TEMP[0], TEMP[6], TEMP[6] 22: MUL TEMP[2], TEMP[0], IMM[1].yyyy 23: MUL TEMP[0], TEMP[0], IMM[1].zzzz 24: FRC TEMP[6], TEMP[0] 25: ADD TEMP[0], TEMP[6], IMM[1].wwww 26: FRC TEMP[6], TEMP[2] 27: ADD TEMP[2], TEMP[6], IMM[1].wwww 28: MOV TEMP[6], -TEMP[2] 29: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 30: UIF TEMP[7].xxxx :0 31: MOV TEMP[7].x, IMM[0].xxxx 32: ELSE :0 33: MOV TEMP[7].x, IMM[0].yyyy 34: ENDIF 35: MOV TEMP[7].x, TEMP[7].xxxx 36: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].xxxx 37: UIF TEMP[8].xxxx :0 38: MOV TEMP[8].x, IMM[0].xxxx 39: ELSE :0 40: MOV TEMP[8].x, IMM[0].yyyy 41: ENDIF 42: MOV TEMP[7].y, TEMP[8].xxxx 43: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].xxxx 44: UIF TEMP[8].xxxx :0 45: MOV TEMP[8].x, IMM[0].xxxx 46: ELSE :0 47: MOV TEMP[8].x, IMM[0].yyyy 48: ENDIF 49: MOV TEMP[7].z, TEMP[8].xxxx 50: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].xxxx 51: UIF TEMP[6].xxxx :0 52: MOV TEMP[6].x, IMM[0].xxxx 53: ELSE :0 54: MOV TEMP[6].x, IMM[0].yyyy 55: ENDIF 56: MOV TEMP[7].w, TEMP[6].xxxx 57: FSGE TEMP[6].x, TEMP[2].xxxx, IMM[0].xxxx 58: UIF TEMP[6].xxxx :0 59: MOV TEMP[6].x, IMM[2].xxxx 60: ELSE :0 61: MOV TEMP[6].x, IMM[2].yyyy 62: ENDIF 63: MOV TEMP[6].x, TEMP[6].xxxx 64: FSGE TEMP[8].x, TEMP[2].yyyy, IMM[0].xxxx 65: UIF TEMP[8].xxxx :0 66: MOV TEMP[8].x, IMM[2].xxxx 67: ELSE :0 68: MOV TEMP[8].x, IMM[2].yyyy 69: ENDIF 70: MOV TEMP[6].y, TEMP[8].xxxx 71: FSGE TEMP[8].x, TEMP[2].zzzz, IMM[0].xxxx 72: UIF TEMP[8].xxxx :0 73: MOV TEMP[8].x, IMM[2].xxxx 74: ELSE :0 75: MOV TEMP[8].x, IMM[2].yyyy 76: ENDIF 77: MOV TEMP[6].z, TEMP[8].xxxx 78: FSGE TEMP[8].x, TEMP[2].wwww, IMM[0].xxxx 79: UIF TEMP[8].xxxx :0 80: MOV TEMP[8].x, IMM[2].xxxx 81: ELSE :0 82: MOV TEMP[8].x, IMM[2].yyyy 83: ENDIF 84: MOV TEMP[6].w, TEMP[8].xxxx 85: ADD TEMP[2], TEMP[6], TEMP[7] 86: ADD TEMP[4], TEMP[1].xxyy, IMM[2].xyxy 87: MUL TEMP[5], TEMP[2], TEMP[4].zwzw 88: MOV TEMP[6], -TEMP[0] 89: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 90: UIF TEMP[7].xxxx :0 91: MOV TEMP[7].x, IMM[0].xxxx 92: ELSE :0 93: MOV TEMP[7].x, IMM[0].yyyy 94: ENDIF 95: MOV TEMP[7].x, TEMP[7].xxxx 96: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].xxxx 97: UIF TEMP[8].xxxx :0 98: MOV TEMP[8].x, IMM[0].xxxx 99: ELSE :0 100: MOV TEMP[8].x, IMM[0].yyyy 101: ENDIF 102: MOV TEMP[7].y, TEMP[8].xxxx 103: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].xxxx 104: UIF TEMP[8].xxxx :0 105: MOV TEMP[8].x, IMM[0].xxxx 106: ELSE :0 107: MOV TEMP[8].x, IMM[0].yyyy 108: ENDIF 109: MOV TEMP[7].z, TEMP[8].xxxx 110: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].xxxx 111: UIF TEMP[6].xxxx :0 112: MOV TEMP[6].x, IMM[0].xxxx 113: ELSE :0 114: MOV TEMP[6].x, IMM[0].yyyy 115: ENDIF 116: MOV TEMP[7].w, TEMP[6].xxxx 117: FSGE TEMP[6].x, TEMP[0].xxxx, IMM[0].xxxx 118: UIF TEMP[6].xxxx :0 119: MOV TEMP[6].x, IMM[2].xxxx 120: ELSE :0 121: MOV TEMP[6].x, IMM[2].yyyy 122: ENDIF 123: MOV TEMP[6].x, TEMP[6].xxxx 124: FSGE TEMP[8].x, TEMP[0].yyyy, IMM[0].xxxx 125: UIF TEMP[8].xxxx :0 126: MOV TEMP[8].x, IMM[2].xxxx 127: ELSE :0 128: MOV TEMP[8].x, IMM[2].yyyy 129: ENDIF 130: MOV TEMP[6].y, TEMP[8].xxxx 131: FSGE TEMP[8].x, TEMP[0].zzzz, IMM[0].xxxx 132: UIF TEMP[8].xxxx :0 133: MOV TEMP[8].x, IMM[2].xxxx 134: ELSE :0 135: MOV TEMP[8].x, IMM[2].yyyy 136: ENDIF 137: MOV TEMP[6].z, TEMP[8].xxxx 138: FSGE TEMP[8].x, TEMP[0].wwww, IMM[0].xxxx 139: UIF TEMP[8].xxxx :0 140: MOV TEMP[8].x, IMM[2].xxxx 141: ELSE :0 142: MOV TEMP[8].x, IMM[2].yyyy 143: ENDIF 144: MOV TEMP[6].w, TEMP[8].xxxx 145: ADD TEMP[0], TEMP[6], TEMP[7] 146: MAD TEMP[4], TEMP[4].xxyy, TEMP[0], TEMP[5] 147: ADD TEMP[4], -TEMP[4].xyxz, TEMP[4].zwyw 148: MAD TEMP[6].zw, TEMP[1].xyxy, IMM[2].zzzz, IMM[2].wwww 149: MOV TEMP[3].zw, TEMP[6].wwzw 150: MUL TEMP[5], TEMP[1].xyxy, TEMP[3] 151: MAD TEMP[3], TEMP[1].xyxx, IMM[3].xxxx, IMM[3].yyyy 152: MUL TEMP[3], TEMP[3], TEMP[5].xyxx 153: LRP TEMP[6].xy, TEMP[3].wwww, TEMP[0].zwzw, TEMP[0] 154: MOV TEMP[1].xy, TEMP[6].xyxx 155: LRP TEMP[6].zw, TEMP[3].yyyy, TEMP[2].xyyw, TEMP[2].xyxz 156: MOV TEMP[1].zw, TEMP[6].wwzw 157: MAD TEMP[0], TEMP[4], TEMP[5].zzww, TEMP[1] 158: LRP TEMP[1], TEMP[3], TEMP[0].wyww, TEMP[0].zxzz 159: MOV TEMP[2].z, TEMP[1].zyzz 160: MUL TEMP[0], TEMP[1], CONST[1].xxxx 161: MUL TEMP[0], TEMP[0], IMM[3].zzzz 162: MAD TEMP[0], TEMP[0], IMM[4].xyzz, IMM[4].wwzy 163: MOV TEMP[2].w, IMM[0].xxxx 164: RCP TEMP[2].x, CONST[0].xxxx 165: ADD TEMP[1].xy, TEMP[2].xwzw, IN[0] 166: MOV TEMP[1].xy, TEMP[1].xyyy 167: TEX TEMP[1].xw, TEMP[1], SAMP[0], 2D 168: MOV TEMP[3].x, TEMP[1].xxxw 169: MOV TEMP[2].y, -TEMP[2].xxxx 170: MOV TEMP[4].yw, TEMP[2].xyxx 171: RCP TEMP[4].x, CONST[0].yyyy 172: ADD TEMP[1].xy, TEMP[4].yxzw, IN[0] 173: ADD TEMP[6].yw, TEMP[4].xwzx, IN[0].xxzy 174: MOV TEMP[6].xy, TEMP[6].ywww 175: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 176: MOV TEMP[3].z, TEMP[6].xxxx 177: MOV TEMP[1].xy, TEMP[1].xyyy 178: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 179: MOV TEMP[3].y, TEMP[1].xxxx 180: DP3 TEMP[1].x, TEMP[3].yzxx, IMM[5].xyzz 181: ADD TEMP[6].yw, TEMP[2], IN[0].xxzy 182: MOV TEMP[6].xy, TEMP[6].ywww 183: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 184: MOV TEMP[5].z, TEMP[6].xxxx 185: MOV TEMP[2].z, -TEMP[4].xxxx 186: ADD TEMP[6].yw, TEMP[2].xxzz, IN[0].xxzy 187: MOV TEMP[4].yw, TEMP[6].wyww 188: MOV TEMP[6].xy, TEMP[6].ywww 189: TEX TEMP[6].x, TEMP[6], SAMP[0], 2D 190: MOV TEMP[5].y, TEMP[6].xxxx 191: ADD TEMP[6].xy, TEMP[2].yzzw, IN[0] 192: MOV TEMP[2].xy, TEMP[6].xyxx 193: ADD TEMP[7].zw, TEMP[2].xywz, IN[0].xyxy 194: MOV TEMP[7].xy, TEMP[7].zwww 195: TEX TEMP[7].x, TEMP[7], SAMP[0], 2D 196: MOV TEMP[5].w, TEMP[7].xxxx 197: MOV TEMP[6].xy, TEMP[6].xyyy 198: TEX TEMP[6].xw, TEMP[6], SAMP[0], 2D 199: MOV TEMP[2].w, TEMP[6].wwww 200: MOV TEMP[5].x, TEMP[6].xxxx 201: DP3 TEMP[6].x, TEMP[5].xyww, IMM[5].xxww 202: DP3 TEMP[7].x, TEMP[5].xyzz, IMM[5].xyww 203: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[7].xxxx 204: MOV TEMP[5].x, TEMP[1].xxxx 205: MOV TEMP[4].z, IMM[0].xxxx 206: ADD TEMP[1].xz, TEMP[4].zyxw, IN[0].xyyw 207: MOV TEMP[1].xy, TEMP[1].xzzz 208: TEX TEMP[1].x, TEMP[1], SAMP[0], 2D 209: MOV TEMP[3].w, TEMP[1].xxxx 210: DP3 TEMP[1].x, TEMP[3].yzww, IMM[3].wwxx 211: ADD TEMP[1].y, TEMP[1].xxxx, TEMP[6].xxxx 212: MOV TEMP[5].y, TEMP[1].yyyy 213: MUL TEMP[1].xy, TEMP[5], CONST[3].xxxx 214: MOV TEMP[2].xy, TEMP[1].xyxx 215: MUL TEMP[4].xyz, TEMP[2].yxyw, IMM[3].zzzz 216: DP2 TEMP[2].x, TEMP[1].xyyy, TEMP[1].xyyy 217: MAX TEMP[1].x, TEMP[2].xxxx, IMM[6].xxxx 218: RSQ TEMP[5].x, TEMP[1].xxxx 219: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[1].xxxx 220: CMP TEMP[2].x, -TEMP[1].xxxx, TEMP[5].xxxx, IMM[0].xxxx 221: MAD TEMP[1].yzw, TEMP[4].xxyz, IMM[4].xxyz, IMM[4].xwwz 222: MOV TEMP[2].yzw, TEMP[1].zyzw 223: ADD TEMP[3], -TEMP[0], TEMP[2].yzwx 224: MOV_SAT TEMP[1].x, TEMP[2].xxxx 225: MAD TEMP[0], TEMP[1].xxxx, TEMP[3], TEMP[0] 226: MOV OUT[0], TEMP[0] 227: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %29 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %30 = load <8 x i32> addrspace(2)* %29, !tbaa !0 %31 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %32 = load <4 x i32> addrspace(2)* %31, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fdiv float 1.000000e+00, %27 %36 = fmul float %35, %25 %37 = fmul float %35, %25 %38 = fmul float %35, %24 %39 = fmul float %35, %24 %40 = call float @llvm.AMDIL.fraction.(float %37) %41 = call float @llvm.AMDIL.fraction.(float %37) %42 = call float @llvm.AMDIL.fraction.(float %39) %43 = call float @llvm.AMDIL.fraction.(float %39) %44 = fsub float -0.000000e+00, %40 %45 = fadd float %36, %44 %46 = fsub float -0.000000e+00, %41 %47 = fadd float %37, %46 %48 = fsub float -0.000000e+00, %42 %49 = fadd float %38, %48 %50 = fsub float -0.000000e+00, %43 %51 = fadd float %39, %50 %52 = fdiv float 1.000000e+00, %47 %53 = fmul float %47, %34 %54 = fmul float %51, %33 %55 = call float @llvm.AMDIL.fraction.(float %54) %56 = call float @llvm.AMDIL.fraction.(float %53) %57 = fsub float -0.000000e+00, %56 %58 = fmul float %34, %45 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %56 %61 = fmul float %34, %47 %62 = fadd float %61, %60 %63 = fsub float -0.000000e+00, %55 %64 = fmul float %33, %49 %65 = fadd float %64, %63 %66 = fsub float -0.000000e+00, %55 %67 = fmul float %33, %51 %68 = fadd float %67, %66 %69 = fdiv float 1.000000e+00, %51 %70 = fadd float %59, 0.000000e+00 %71 = fadd float %62, 1.000000e+00 %72 = fadd float %65, 0.000000e+00 %73 = fadd float %68, 1.000000e+00 %74 = fmul float %70, 1.700000e+01 %75 = fmul float %71, 1.700000e+01 %76 = fmul float %70, 1.700000e+01 %77 = fmul float %71, 1.700000e+01 %78 = fmul float %52, %74 %79 = fmul float %52, %75 %80 = fmul float %52, %76 %81 = fmul float %52, %77 %82 = call float @llvm.AMDIL.fraction.(float %78) %83 = call float @llvm.AMDIL.fraction.(float %79) %84 = call float @llvm.AMDIL.fraction.(float %80) %85 = call float @llvm.AMDIL.fraction.(float %81) %86 = fmul float %82, %82 %87 = fmul float %83, %83 %88 = fmul float %84, %84 %89 = fmul float %85, %85 %90 = fmul float %86, 3.721000e+03 %91 = fadd float %90, %72 %92 = fmul float %87, 3.721000e+03 %93 = fadd float %92, %72 %94 = fmul float %88, 3.721000e+03 %95 = fadd float %94, %73 %96 = fmul float %89, 3.721000e+03 %97 = fadd float %96, %73 %98 = fmul float %91, 1.300000e+01 %99 = fmul float %93, 1.300000e+01 %100 = fmul float %95, 1.300000e+01 %101 = fmul float %97, 1.300000e+01 %102 = fmul float %69, %98 %103 = fmul float %69, %99 %104 = fmul float %69, %100 %105 = fmul float %69, %101 %106 = call float @llvm.AMDIL.fraction.(float %102) %107 = call float @llvm.AMDIL.fraction.(float %103) %108 = call float @llvm.AMDIL.fraction.(float %104) %109 = call float @llvm.AMDIL.fraction.(float %105) %110 = fmul float %106, %106 %111 = fmul float %107, %107 %112 = fmul float %108, %108 %113 = fmul float %109, %109 %114 = fmul float %110, 9.302500e+02 %115 = fmul float %111, 9.302500e+02 %116 = fmul float %112, 9.302500e+02 %117 = fmul float %113, 9.302500e+02 %118 = fmul float %110, 1.860500e+03 %119 = fmul float %111, 1.860500e+03 %120 = fmul float %112, 1.860500e+03 %121 = fmul float %113, 1.860500e+03 %122 = call float @llvm.AMDIL.fraction.(float %118) %123 = call float @llvm.AMDIL.fraction.(float %119) %124 = call float @llvm.AMDIL.fraction.(float %120) %125 = call float @llvm.AMDIL.fraction.(float %121) %126 = fadd float %122, -5.000000e-01 %127 = fadd float %123, -5.000000e-01 %128 = fadd float %124, -5.000000e-01 %129 = fadd float %125, -5.000000e-01 %130 = call float @llvm.AMDIL.fraction.(float %114) %131 = call float @llvm.AMDIL.fraction.(float %115) %132 = call float @llvm.AMDIL.fraction.(float %116) %133 = call float @llvm.AMDIL.fraction.(float %117) %134 = fadd float %130, -5.000000e-01 %135 = fadd float %131, -5.000000e-01 %136 = fadd float %132, -5.000000e-01 %137 = fadd float %133, -5.000000e-01 %138 = fsub float -0.000000e+00, %134 %139 = fsub float -0.000000e+00, %135 %140 = fsub float -0.000000e+00, %136 %141 = fsub float -0.000000e+00, %137 %142 = fcmp oge float %138, 0.000000e+00 %143 = sext i1 %142 to i32 %144 = bitcast i32 %143 to float %145 = bitcast float %144 to i32 %146 = icmp ne i32 %145, 0 %. = select i1 %146, float 0.000000e+00, float 1.000000e+00 %147 = fcmp oge float %139, 0.000000e+00 %148 = sext i1 %147 to i32 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = icmp ne i32 %150, 0 %temp32.0 = select i1 %151, float 0.000000e+00, float 1.000000e+00 %152 = fcmp oge float %140, 0.000000e+00 %153 = sext i1 %152 to i32 %154 = bitcast i32 %153 to float %155 = bitcast float %154 to i32 %156 = icmp ne i32 %155, 0 %.81 = select i1 %156, float 0.000000e+00, float 1.000000e+00 %157 = fcmp oge float %141, 0.000000e+00 %158 = sext i1 %157 to i32 %159 = bitcast i32 %158 to float %160 = bitcast float %159 to i32 %161 = icmp ne i32 %160, 0 %temp24.0 = select i1 %161, float 0.000000e+00, float 1.000000e+00 %162 = fcmp oge float %134, 0.000000e+00 %163 = sext i1 %162 to i32 %164 = bitcast i32 %163 to float %165 = bitcast float %164 to i32 %166 = icmp ne i32 %165, 0 %.82 = select i1 %166, float -0.000000e+00, float -1.000000e+00 %167 = fcmp oge float %135, 0.000000e+00 %168 = sext i1 %167 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = icmp ne i32 %170, 0 %temp32.2 = select i1 %171, float -0.000000e+00, float -1.000000e+00 %172 = fcmp oge float %136, 0.000000e+00 %173 = sext i1 %172 to i32 %174 = bitcast i32 %173 to float %175 = bitcast float %174 to i32 %176 = icmp ne i32 %175, 0 %.83 = select i1 %176, float -0.000000e+00, float -1.000000e+00 %177 = fcmp oge float %137, 0.000000e+00 %178 = sext i1 %177 to i32 %179 = bitcast i32 %178 to float %180 = bitcast float %179 to i32 %181 = icmp ne i32 %180, 0 %temp32.4 = select i1 %181, float -0.000000e+00, float -1.000000e+00 %182 = fadd float %.82, %. %183 = fadd float %temp32.2, %temp32.0 %184 = fadd float %.83, %.81 %185 = fadd float %temp32.4, %temp24.0 %186 = fadd float %55, -0.000000e+00 %187 = fadd float %55, -1.000000e+00 %188 = fadd float %56, -0.000000e+00 %189 = fadd float %56, -1.000000e+00 %190 = fmul float %182, %188 %191 = fmul float %183, %189 %192 = fmul float %184, %188 %193 = fmul float %185, %189 %194 = fsub float -0.000000e+00, %126 %195 = fsub float -0.000000e+00, %127 %196 = fsub float -0.000000e+00, %128 %197 = fsub float -0.000000e+00, %129 %198 = fcmp oge float %194, 0.000000e+00 %199 = sext i1 %198 to i32 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = icmp ne i32 %201, 0 %.84 = select i1 %202, float 0.000000e+00, float 1.000000e+00 %203 = fcmp oge float %195, 0.000000e+00 %204 = sext i1 %203 to i32 %205 = bitcast i32 %204 to float %206 = bitcast float %205 to i32 %207 = icmp ne i32 %206, 0 %temp32.5 = select i1 %207, float 0.000000e+00, float 1.000000e+00 %208 = fcmp oge float %196, 0.000000e+00 %209 = sext i1 %208 to i32 %210 = bitcast i32 %209 to float %211 = bitcast float %210 to i32 %212 = icmp ne i32 %211, 0 %.85 = select i1 %212, float 0.000000e+00, float 1.000000e+00 %213 = fcmp oge float %197, 0.000000e+00 %214 = sext i1 %213 to i32 %215 = bitcast i32 %214 to float %216 = bitcast float %215 to i32 %217 = icmp ne i32 %216, 0 %temp24.2 = select i1 %217, float 0.000000e+00, float 1.000000e+00 %218 = fcmp oge float %126, 0.000000e+00 %219 = sext i1 %218 to i32 %220 = bitcast i32 %219 to float %221 = bitcast float %220 to i32 %222 = icmp ne i32 %221, 0 %.86 = select i1 %222, float -0.000000e+00, float -1.000000e+00 %223 = fcmp oge float %127, 0.000000e+00 %224 = sext i1 %223 to i32 %225 = bitcast i32 %224 to float %226 = bitcast float %225 to i32 %227 = icmp ne i32 %226, 0 %temp32.7 = select i1 %227, float -0.000000e+00, float -1.000000e+00 %228 = fcmp oge float %128, 0.000000e+00 %229 = sext i1 %228 to i32 %230 = bitcast i32 %229 to float %231 = bitcast float %230 to i32 %232 = icmp ne i32 %231, 0 %.87 = select i1 %232, float -0.000000e+00, float -1.000000e+00 %233 = fcmp oge float %129, 0.000000e+00 %234 = sext i1 %233 to i32 %235 = bitcast i32 %234 to float %236 = bitcast float %235 to i32 %237 = icmp ne i32 %236, 0 %temp32.9 = select i1 %237, float -0.000000e+00, float -1.000000e+00 %238 = fadd float %.86, %.84 %239 = fadd float %temp32.7, %temp32.5 %240 = fadd float %.87, %.85 %241 = fadd float %temp32.9, %temp24.2 %242 = fmul float %186, %238 %243 = fadd float %242, %190 %244 = fmul float %186, %239 %245 = fadd float %244, %191 %246 = fmul float %187, %240 %247 = fadd float %246, %192 %248 = fmul float %187, %241 %249 = fadd float %248, %193 %250 = fsub float -0.000000e+00, %243 %251 = fadd float %250, %247 %252 = fsub float -0.000000e+00, %245 %253 = fadd float %252, %249 %254 = fsub float -0.000000e+00, %243 %255 = fadd float %254, %245 %256 = fsub float -0.000000e+00, %247 %257 = fadd float %256, %249 %258 = fmul float %55, -6.000000e+00 %259 = fadd float %258, 6.000000e+00 %260 = fmul float %56, -6.000000e+00 %261 = fadd float %260, 6.000000e+00 %262 = fmul float %55, %55 %263 = fmul float %56, %56 %264 = fmul float %55, %259 %265 = fmul float %56, %261 %266 = fmul float %55, -2.000000e+00 %267 = fadd float %266, 3.000000e+00 %268 = fmul float %56, -2.000000e+00 %269 = fadd float %268, 3.000000e+00 %270 = fmul float %55, -2.000000e+00 %271 = fadd float %270, 3.000000e+00 %272 = fmul float %55, -2.000000e+00 %273 = fadd float %272, 3.000000e+00 %274 = fmul float %267, %262 %275 = fmul float %269, %263 %276 = fmul float %271, %262 %277 = fmul float %273, %262 %278 = call float @llvm.AMDGPU.lrp(float %277, float %240, float %238) %279 = call float @llvm.AMDGPU.lrp(float %277, float %241, float %239) %280 = call float @llvm.AMDGPU.lrp(float %275, float %183, float %182) %281 = call float @llvm.AMDGPU.lrp(float %275, float %185, float %184) %282 = fmul float %251, %264 %283 = fadd float %282, %278 %284 = fmul float %253, %264 %285 = fadd float %284, %279 %286 = fmul float %255, %265 %287 = fadd float %286, %280 %288 = fmul float %257, %265 %289 = fadd float %288, %281 %290 = call float @llvm.AMDGPU.lrp(float %274, float %289, float %287) %291 = call float @llvm.AMDGPU.lrp(float %275, float %285, float %283) %292 = call float @llvm.AMDGPU.lrp(float %276, float %289, float %287) %293 = call float @llvm.AMDGPU.lrp(float %277, float %289, float %287) %294 = fmul float %290, %26 %295 = fmul float %291, %26 %296 = fmul float %292, %26 %297 = fmul float %293, %26 %298 = fmul float %294, 5.000000e-01 %299 = fmul float %295, 5.000000e-01 %300 = fmul float %296, 5.000000e-01 %301 = fmul float %297, 5.000000e-01 %302 = fmul float %298, -1.000000e+00 %303 = fadd float %302, 5.000000e-01 %304 = fmul float %299, 1.000000e+00 %305 = fadd float %304, 5.000000e-01 %306 = fmul float %300, 0.000000e+00 %307 = fadd float %306, 0.000000e+00 %308 = fmul float %301, 0.000000e+00 %309 = fadd float %308, 1.000000e+00 %310 = fdiv float 1.000000e+00, %24 %311 = fadd float %310, %33 %312 = fadd float 0.000000e+00, %34 %313 = bitcast float %311 to i32 %314 = bitcast float %312 to i32 %315 = insertelement <2 x i32> undef, i32 %313, i32 0 %316 = insertelement <2 x i32> %315, i32 %314, i32 1 %317 = bitcast <8 x i32> %30 to <32 x i8> %318 = bitcast <4 x i32> %32 to <16 x i8> %319 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %316, <32 x i8> %317, <16 x i8> %318, i32 2) %320 = extractelement <4 x float> %319, i32 0 %321 = fsub float -0.000000e+00, %310 %322 = fdiv float 1.000000e+00, %25 %323 = fadd float %321, %33 %324 = fadd float %322, %34 %325 = fadd float %310, %33 %326 = fadd float %322, %34 %327 = bitcast float %325 to i32 %328 = bitcast float %326 to i32 %329 = insertelement <2 x i32> undef, i32 %327, i32 0 %330 = insertelement <2 x i32> %329, i32 %328, i32 1 %331 = bitcast <8 x i32> %30 to <32 x i8> %332 = bitcast <4 x i32> %32 to <16 x i8> %333 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %330, <32 x i8> %331, <16 x i8> %332, i32 2) %334 = extractelement <4 x float> %333, i32 0 %335 = bitcast float %323 to i32 %336 = bitcast float %324 to i32 %337 = insertelement <2 x i32> undef, i32 %335, i32 0 %338 = insertelement <2 x i32> %337, i32 %336, i32 1 %339 = bitcast <8 x i32> %30 to <32 x i8> %340 = bitcast <4 x i32> %32 to <16 x i8> %341 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %338, <32 x i8> %339, <16 x i8> %340, i32 2) %342 = extractelement <4 x float> %341, i32 0 %343 = fmul float %342, 1.000000e+00 %344 = fmul float %334, -1.000000e+00 %345 = fadd float %344, %343 %346 = fmul float %320, -2.000000e+00 %347 = fadd float %345, %346 %348 = fadd float %321, %33 %349 = fadd float 0.000000e+00, %34 %350 = bitcast float %348 to i32 %351 = bitcast float %349 to i32 %352 = insertelement <2 x i32> undef, i32 %350, i32 0 %353 = insertelement <2 x i32> %352, i32 %351, i32 1 %354 = bitcast <8 x i32> %30 to <32 x i8> %355 = bitcast <4 x i32> %32 to <16 x i8> %356 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %353, <32 x i8> %354, <16 x i8> %355, i32 2) %357 = extractelement <4 x float> %356, i32 0 %358 = fsub float -0.000000e+00, %322 %359 = fadd float %310, %33 %360 = fadd float %358, %34 %361 = bitcast float %359 to i32 %362 = bitcast float %360 to i32 %363 = insertelement <2 x i32> undef, i32 %361, i32 0 %364 = insertelement <2 x i32> %363, i32 %362, i32 1 %365 = bitcast <8 x i32> %30 to <32 x i8> %366 = bitcast <4 x i32> %32 to <16 x i8> %367 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %364, <32 x i8> %365, <16 x i8> %366, i32 2) %368 = extractelement <4 x float> %367, i32 0 %369 = fadd float %321, %33 %370 = fadd float %358, %34 %371 = fadd float 0.000000e+00, %33 %372 = fadd float %358, %34 %373 = bitcast float %371 to i32 %374 = bitcast float %372 to i32 %375 = insertelement <2 x i32> undef, i32 %373, i32 0 %376 = insertelement <2 x i32> %375, i32 %374, i32 1 %377 = bitcast <8 x i32> %30 to <32 x i8> %378 = bitcast <4 x i32> %32 to <16 x i8> %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %376, <32 x i8> %377, <16 x i8> %378, i32 2) %380 = extractelement <4 x float> %379, i32 0 %381 = bitcast float %369 to i32 %382 = bitcast float %370 to i32 %383 = insertelement <2 x i32> undef, i32 %381, i32 0 %384 = insertelement <2 x i32> %383, i32 %382, i32 1 %385 = bitcast <8 x i32> %30 to <32 x i8> %386 = bitcast <4 x i32> %32 to <16 x i8> %387 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %384, <32 x i8> %385, <16 x i8> %386, i32 2) %388 = extractelement <4 x float> %387, i32 0 %389 = fmul float %388, 1.000000e+00 %390 = fmul float %368, 1.000000e+00 %391 = fadd float %390, %389 %392 = fmul float %380, 2.000000e+00 %393 = fadd float %391, %392 %394 = fmul float %388, 1.000000e+00 %395 = fmul float %368, -1.000000e+00 %396 = fadd float %395, %394 %397 = fmul float %357, 2.000000e+00 %398 = fadd float %396, %397 %399 = fadd float %347, %398 %400 = fadd float 0.000000e+00, %33 %401 = fadd float %322, %34 %402 = bitcast float %400 to i32 %403 = bitcast float %401 to i32 %404 = insertelement <2 x i32> undef, i32 %402, i32 0 %405 = insertelement <2 x i32> %404, i32 %403, i32 1 %406 = bitcast <8 x i32> %30 to <32 x i8> %407 = bitcast <4 x i32> %32 to <16 x i8> %408 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %405, <32 x i8> %406, <16 x i8> %407, i32 2) %409 = extractelement <4 x float> %408, i32 0 %410 = fmul float %342, -1.000000e+00 %411 = fmul float %334, -1.000000e+00 %412 = fadd float %411, %410 %413 = fmul float %409, -2.000000e+00 %414 = fadd float %412, %413 %415 = fadd float %414, %393 %416 = fmul float %399, %28 %417 = fmul float %415, %28 %418 = fmul float %417, 5.000000e-01 %419 = fmul float %416, 5.000000e-01 %420 = fmul float %417, 5.000000e-01 %421 = fmul float %416, %416 %422 = fmul float %417, %417 %423 = fadd float %421, %422 %424 = call float @llvm.maxnum.f32(float %423, float 0x3E7AD7F2A0000000) %425 = call float @llvm.AMDGPU.rsq.clamped.f32(float %424) %426 = fmul float %425, %424 %427 = fsub float -0.000000e+00, %424 %428 = call float @llvm.AMDGPU.cndlt(float %427, float %426, float 0.000000e+00) %429 = fmul float %418, -1.000000e+00 %430 = fadd float %429, 5.000000e-01 %431 = fmul float %419, 1.000000e+00 %432 = fadd float %431, 5.000000e-01 %433 = fmul float %420, 0.000000e+00 %434 = fadd float %433, 0.000000e+00 %435 = fsub float -0.000000e+00, %303 %436 = fadd float %435, %430 %437 = fsub float -0.000000e+00, %305 %438 = fadd float %437, %432 %439 = fsub float -0.000000e+00, %307 %440 = fadd float %439, %434 %441 = fsub float -0.000000e+00, %309 %442 = fadd float %441, %428 %443 = call float @llvm.AMDIL.clamp.(float %428, float 0.000000e+00, float 1.000000e+00) %444 = fmul float %443, %436 %445 = fadd float %444, %303 %446 = fmul float %443, %438 %447 = fadd float %446, %305 %448 = fmul float %443, %440 %449 = fadd float %448, %307 %450 = fmul float %443, %442 %451 = fadd float %450, %309 %452 = call i32 @llvm.SI.packf16(float %445, float %447) %453 = bitcast i32 %452 to float %454 = call i32 @llvm.SI.packf16(float %449, float %451) %455 = bitcast i32 %454 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %453, float %455, float %453, float %455) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100 s_buffer_load_dword s9, s[0:3], 0x8 ; C2048108 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v3, s9 ; 7E065409 v_mul_f32_e32 v4, s8, v3 ; 10080608 v_fract_f32_e32 v4, v4 ; 7E084104 v_mad_f32 v4, v3, s8, -v4 ; D2820004 84101103 v_mul_f32_e32 v5, v2, v4 ; 100A0902 v_fract_f32_e32 v5, v5 ; 7E0A4105 v_mad_f32 v6, v4, v2, -v5 ; D2820006 84160504 v_add_f32_e32 v7, 0, v6 ; 060E0C80 v_interp_p1_f32 v8, v0, 1, 0, [m0] ; C8200100 v_interp_p2_f32 v8, [v8], v1, 1, 0, [m0] ; C8210101 s_buffer_load_dword s9, s[0:3], 0x1 ; C2048101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s9, v3 ; 10000609 v_fract_f32_e32 v0, v0 ; 7E004100 v_mad_f32 v0, v3, s9, -v0 ; D2820000 84001303 v_mul_f32_e32 v1, v8, v0 ; 10020108 v_fract_f32_e32 v1, v1 ; 7E024101 v_mad_f32 v3, v0, v8, -v1 ; D2820003 84061100 v_add_f32_e32 v9, 1.0, v3 ; 061206F2 v_mul_f32_e32 v9, 0x41880000, v9 ; 101212FF 41880000 v_rcp_f32_e32 v0, v0 ; 7E005500 v_mul_f32_e32 v9, v9, v0 ; 10120109 v_fract_f32_e32 v9, v9 ; 7E124109 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mov_b32_e32 v10, 0x45689000 ; 7E1402FF 45689000 v_mad_f32 v11, v9, v10, v7 ; D282000B 041E1509 v_mul_f32_e32 v11, 0x41500000, v11 ; 101616FF 41500000 v_rcp_f32_e32 v4, v4 ; 7E085504 v_mul_f32_e32 v11, v11, v4 ; 1016090B v_fract_f32_e32 v11, v11 ; 7E16410B v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mul_f32_e32 v12, 0x44e89000, v11 ; 101816FF 44E89000 v_fract_f32_e32 v12, v12 ; 7E18410C v_add_f32_e32 v12, -0.5, v12 ; 061818F1 v_cmp_ge_f32_e64 s[10:11], -v12, 0 ; D00C000A 2001010C v_cndmask_b32_e64 v13, 0, -1, s[10:11] ; D200080D 00298280 v_cmp_ne_i32_e64 s[10:11], v13, 0 ; D10A000A 0001010D v_cndmask_b32_e64 v13, 1.0, 0, s[10:11] ; D200080D 102900F2 v_cmp_ge_f32_e64 s[10:11], v12, 0 ; D00C000A 0001010C v_cndmask_b32_e64 v12, 0, -1, s[10:11] ; D200000C 00298280 v_cmp_ne_i32_e64 s[10:11], v12, 0 ; D10A000A 0001010C v_mov_b32_e32 v12, 0x80000000 ; 7E1802FF 80000000 v_cndmask_b32_e64 v14, -1.0, v12, s[10:11] ; D200000E 182A18F3 v_add_f32_e32 v13, v13, v14 ; 061A1D0D v_mul_f32_e32 v11, 0x44689000, v11 ; 101616FF 44689000 v_fract_f32_e32 v11, v11 ; 7E16410B v_add_f32_e32 v11, -0.5, v11 ; 061616F1 v_cmp_ge_f32_e64 s[10:11], -v11, 0 ; D00C000A 2001010B v_cndmask_b32_e64 v14, 0, -1, s[10:11] ; D200000E 00298280 v_cmp_ne_i32_e64 s[10:11], v14, 0 ; D10A000A 0001010E v_cndmask_b32_e64 v14, 1.0, 0, s[10:11] ; D200000E 102900F2 v_cmp_ge_f32_e64 s[10:11], v11, 0 ; D00C000A 0001010B v_cndmask_b32_e64 v11, 0, -1, s[10:11] ; D200080B 00298280 v_cmp_ne_i32_e64 s[10:11], v11, 0 ; D10A000A 0001010B v_cndmask_b32_e64 v11, -1.0, v12, s[10:11] ; D200080B 182A18F3 v_add_f32_e32 v11, v14, v11 ; 0616170E v_add_f32_e32 v14, -1.0, v1 ; 061C02F3 v_mul_f32_e32 v15, v14, v11 ; 101E170E v_add_f32_e32 v16, v12, v5 ; 06200B0C v_mad_f32 v15, v16, v13, v15 ; D282000F 043E1B10 v_add_f32_e32 v6, 1.0, v6 ; 060C0CF2 v_mad_f32 v9, v9, v10, v6 ; D2820009 041A1509 v_mul_f32_e32 v9, 0x41500000, v9 ; 101212FF 41500000 v_mul_f32_e32 v9, v9, v4 ; 10120909 v_fract_f32_e32 v9, v9 ; 7E124109 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mul_f32_e32 v17, 0x44e89000, v9 ; 102212FF 44E89000 v_fract_f32_e32 v17, v17 ; 7E224111 v_add_f32_e32 v17, -0.5, v17 ; 062222F1 v_cmp_ge_f32_e64 s[10:11], -v17, 0 ; D00C000A 20010111 v_cndmask_b32_e64 v18, 0, -1, s[10:11] ; D2000012 00298280 v_cmp_ne_i32_e64 s[10:11], v18, 0 ; D10A000A 00010112 v_cndmask_b32_e64 v18, 1.0, 0, s[10:11] ; D2000012 102900F2 v_cmp_ge_f32_e64 s[10:11], v17, 0 ; D00C000A 00010111 v_cndmask_b32_e64 v17, 0, -1, s[10:11] ; D2000811 00298280 v_cmp_ne_i32_e64 s[10:11], v17, 0 ; D10A000A 00010111 v_cndmask_b32_e64 v17, -1.0, v12, s[10:11] ; D2000811 182A18F3 v_add_f32_e32 v17, v18, v17 ; 06222312 v_mul_f32_e32 v9, 0x44689000, v9 ; 101212FF 44689000 v_fract_f32_e32 v9, v9 ; 7E124109 v_add_f32_e32 v9, -0.5, v9 ; 061212F1 v_cmp_ge_f32_e64 s[10:11], -v9, 0 ; D00C000A 20010109 v_cndmask_b32_e64 v18, 0, -1, s[10:11] ; D2000012 00298280 v_cmp_ne_i32_e64 s[10:11], v18, 0 ; D10A000A 00010112 v_cndmask_b32_e64 v18, 1.0, 0, s[10:11] ; D2000012 102900F2 v_cmp_ge_f32_e64 s[10:11], v9, 0 ; D00C000A 00010109 v_cndmask_b32_e64 v9, 0, -1, s[10:11] ; D2000809 00298280 v_cmp_ne_i32_e64 s[10:11], v9, 0 ; D10A000A 00010109 v_cndmask_b32_e64 v9, -1.0, v12, s[10:11] ; D2000809 182A18F3 v_add_f32_e32 v9, v18, v9 ; 06121312 v_mul_f32_e32 v14, v14, v9 ; 101C130E v_add_f32_e32 v18, -1.0, v5 ; 06240AF3 v_mad_f32 v14, v18, v17, v14 ; D282000E 043A2312 v_subrev_f32_e32 v19, v15, v14 ; 0A261D0F v_mov_b32_e32 v20, 0x40400000 ; 7E2802FF 40400000 v_mad_f32 v21, -2.0, v5, v20 ; D2820015 04520AF5 v_mul_f32_e32 v22, v5, v5 ; 102C0B05 v_mad_f32 v23, -v21, v22, 1.0 ; D2820017 23CA2D15 v_mul_f32_e32 v13, v13, v23 ; 101A2F0D v_mul_f32_e32 v21, v22, v21 ; 102A2B16 v_mad_f32 v13, v21, v17, v13 ; D282000D 04362315 v_mov_b32_e32 v17, 0x40c00000 ; 7E2202FF 40C00000 v_mov_b32_e32 v22, 0xc0c00000 ; 7E2C02FF C0C00000 v_mad_f32 v24, v5, v22, v17 ; D2820018 04462D05 v_mul_f32_e32 v5, v24, v5 ; 100A0B18 v_mad_f32 v13, v19, v5, v13 ; D282000D 04360B13 v_add_f32_e32 v3, 0, v3 ; 06060680 v_mul_f32_e32 v3, 0x41880000, v3 ; 100606FF 41880000 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_fract_f32_e32 v0, v0 ; 7E004100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mad_f32 v3, v0, v10, v7 ; D2820003 041E1500 v_mul_f32_e32 v3, 0x41500000, v3 ; 100606FF 41500000 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_fract_f32_e32 v3, v3 ; 7E064103 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mul_f32_e32 v7, 0x44e89000, v3 ; 100E06FF 44E89000 v_fract_f32_e32 v7, v7 ; 7E0E4107 v_add_f32_e32 v7, -0.5, v7 ; 060E0EF1 v_cmp_ge_f32_e64 s[10:11], -v7, 0 ; D00C000A 20010107 v_cndmask_b32_e64 v19, 0, -1, s[10:11] ; D2000813 00298280 v_cmp_ne_i32_e64 s[10:11], v19, 0 ; D10A000A 00010113 v_cndmask_b32_e64 v19, 1.0, 0, s[10:11] ; D2000813 102900F2 v_cmp_ge_f32_e64 s[10:11], v7, 0 ; D00C000A 00010107 v_cndmask_b32_e64 v7, 0, -1, s[10:11] ; D2000807 00298280 v_cmp_ne_i32_e64 s[10:11], v7, 0 ; D10A000A 00010107 v_cndmask_b32_e64 v7, -1.0, v12, s[10:11] ; D2000807 182A18F3 v_add_f32_e32 v7, v19, v7 ; 060E0F13 v_mul_f32_e32 v3, 0x44689000, v3 ; 100606FF 44689000 v_fract_f32_e32 v3, v3 ; 7E064103 v_add_f32_e32 v3, -0.5, v3 ; 060606F1 v_cmp_ge_f32_e64 s[10:11], -v3, 0 ; D00C000A 20010103 v_cndmask_b32_e64 v19, 0, -1, s[10:11] ; D2000813 00298280 v_cmp_ne_i32_e64 s[10:11], v19, 0 ; D10A000A 00010113 v_cndmask_b32_e64 v19, 1.0, 0, s[10:11] ; D2000813 102900F2 v_cmp_ge_f32_e64 s[10:11], v3, 0 ; D00C000A 00010103 v_cndmask_b32_e64 v3, 0, -1, s[10:11] ; D2000803 00298280 v_cmp_ne_i32_e64 s[10:11], v3, 0 ; D10A000A 00010103 v_cndmask_b32_e64 v3, -1.0, v12, s[10:11] ; D2000803 182A18F3 v_add_f32_e32 v3, v19, v3 ; 06060713 v_add_f32_e32 v19, v12, v1 ; 0626030C v_mul_f32_e32 v24, v19, v3 ; 10300713 v_mad_f32 v16, v16, v7, v24 ; D2820010 04620F10 v_mad_f32 v0, v0, v10, v6 ; D2820000 041A1500 v_mul_f32_e32 v0, 0x41500000, v0 ; 100000FF 41500000 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_fract_f32_e32 v0, v0 ; 7E004100 v_mul_f32_e32 v0, v0, v0 ; 10000100 v_mul_f32_e32 v4, 0x44e89000, v0 ; 100800FF 44E89000 v_fract_f32_e32 v4, v4 ; 7E084104 v_add_f32_e32 v4, -0.5, v4 ; 060808F1 v_cmp_ge_f32_e64 s[10:11], -v4, 0 ; D00C000A 20010104 v_cndmask_b32_e64 v6, 0, -1, s[10:11] ; D2000006 00298280 v_cmp_ne_i32_e64 s[10:11], v6, 0 ; D10A000A 00010106 v_cndmask_b32_e64 v6, 1.0, 0, s[10:11] ; D2000006 102900F2 v_cmp_ge_f32_e64 s[10:11], v4, 0 ; D00C000A 00010104 v_cndmask_b32_e64 v4, 0, -1, s[10:11] ; D2000004 00298280 v_cmp_ne_i32_e64 s[10:11], v4, 0 ; D10A000A 00010104 v_cndmask_b32_e64 v4, -1.0, v12, s[10:11] ; D2000004 182A18F3 v_add_f32_e32 v4, v6, v4 ; 06080906 v_mul_f32_e32 v0, 0x44689000, v0 ; 100000FF 44689000 v_fract_f32_e32 v0, v0 ; 7E004100 v_add_f32_e32 v0, -0.5, v0 ; 060000F1 v_cmp_ge_f32_e64 s[10:11], -v0, 0 ; D00C000A 20010100 v_cndmask_b32_e64 v6, 0, -1, s[10:11] ; D2000006 00298280 v_cmp_ne_i32_e64 s[10:11], v6, 0 ; D10A000A 00010106 v_cndmask_b32_e64 v6, 1.0, 0, s[10:11] ; D2000006 102900F2 v_cmp_ge_f32_e64 s[10:11], v0, 0 ; D00C000A 00010100 v_cndmask_b32_e64 v0, 0, -1, s[10:11] ; D2000000 00298280 v_cmp_ne_i32_e64 s[10:11], v0, 0 ; D10A000A 00010100 v_cndmask_b32_e64 v0, -1.0, v12, s[10:11] ; D2000000 182A18F3 v_add_f32_e32 v0, v6, v0 ; 06000106 v_mul_f32_e32 v6, v19, v0 ; 100C0113 v_mad_f32 v6, v18, v4, v6 ; D2820006 041A0912 v_subrev_f32_e32 v10, v16, v6 ; 0A140D10 v_mul_f32_e32 v7, v7, v23 ; 100E2F07 v_mad_f32 v4, v21, v4, v7 ; D2820004 041E0915 v_mad_f32 v4, v10, v5, v4 ; D2820004 04120B0A v_mad_f32 v5, -2.0, v1, v20 ; D2820005 045202F5 v_mul_f32_e32 v7, v1, v1 ; 100E0301 v_mad_f32 v10, -v5, v7, 1.0 ; D282000A 23CA0F05 v_mul_f32_e32 v4, v4, v10 ; 10081504 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 v_mad_f32 v4, v5, v13, v4 ; D2820004 04121B05 s_buffer_load_dword s10, s[0:3], 0x4 ; C2050104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s10, v4 ; 1008080A v_mad_f32 v4, 0.5, v4, 0.5 ; D2820004 03C208F0 v_rcp_f32_e32 v7, s9 ; 7E0E5409 v_subrev_f32_e32 v13, v7, v8 ; 0A1A1107 v_rcp_f32_e32 v18, s8 ; 7E245408 v_add_f32_e32 v12, v2, v18 ; 06182502 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v19, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[12:15] ; F0800100 0064130C v_subrev_f32_e32 v24, v18, v2 ; 0A300512 v_mov_b32_e32 v25, v13 ; 7E32030D image_sample v18, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800100 00641218 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v20, v19, v18 ; 0A282513 v_add_f32_e32 v26, 0, v8 ; 06341080 v_mov_b32_e32 v25, v26 ; 7E32031A image_sample v27, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800100 00641B18 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, 2.0, v27, v20 ; D2820014 045236F4 v_add_f32_e32 v25, v8, v7 ; 06320F08 v_mov_b32_e32 v7, v12 ; 7E0E030C v_mov_b32_e32 v8, v13 ; 7E10030D v_mov_b32_e32 v8, v25 ; 7E100319 image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[16:23], s[12:15] ; F0800100 00640707 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[12:15] ; F0800100 00640818 s_waitcnt vmcnt(0) ; BF8C0770 v_subrev_f32_e32 v27, v7, v8 ; 0A361107 v_mov_b32_e32 v28, v12 ; 7E38030C v_mov_b32_e32 v29, v13 ; 7E3A030D v_mov_b32_e32 v29, v26 ; 7E3A031A image_sample v26, 1, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[16:23], s[12:15] ; F0800100 00641A1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, -2.0, v26, v27 ; D282001A 046E34F5 v_add_f32_e32 v20, v20, v26 ; 06283514 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s0, v20 ; 10282800 v_mad_f32 v26, 0.5, v20, 0.5 ; D282001A 03C228F0 v_subrev_f32_e32 v26, v4, v26 ; 0A343504 v_add_f32_e32 v18, v18, v19 ; 06242712 v_add_f32_e32 v12, 0, v2 ; 06180480 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[12:15] ; F0800100 0064020C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, 2.0, v2, v18 ; D2820002 044A04F4 v_sub_f32_e64 v7, -v7, v8 ; D2080007 20021107 v_mov_b32_e32 v13, v25 ; 7E1A0319 image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[16:23], s[12:15] ; F0800100 0064080C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, -2.0, v8, v7 ; D2820007 041E10F5 v_add_f32_e32 v2, v2, v7 ; 06040F02 v_mul_f32_e32 v2, s0, v2 ; 10040400 v_mul_f32_e32 v7, v2, v2 ; 100E0502 v_mad_f32 v7, v20, v20, v7 ; D2820007 041E2914 v_max_f32_e32 v7, 0x33d6bf95, v7 ; 200E0EFF 33D6BF95 v_rsq_clamp_f32_e32 v8, v7 ; 7E105907 v_mul_f32_e32 v8, v7, v8 ; 10101107 v_xor_b32_e32 v7, 0x80000000, v7 ; 3A0E0EFF 80000000 v_cmp_gt_f32_e32 vcc, 0, v7 ; 7C080E80 v_cndmask_b32_e64 v7, 0, v8, vcc ; D2000807 01AA1080 v_add_f32_e64 v8, 0, v7 clamp ; D2060808 00020E80 v_mad_f32 v4, v8, v26, v4 ; D2820004 04123508 v_subrev_f32_e32 v6, v6, v14 ; 0A0C1D06 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v0, v5, v9, v0 ; D2820000 04021305 v_mad_f32 v9, v1, v22, v17 ; D2820009 04462D01 v_mul_f32_e32 v1, v9, v1 ; 10020309 v_mad_f32 v0, v6, v1, v0 ; D2820000 04020306 v_subrev_f32_e32 v6, v16, v15 ; 0A0C1F10 v_mul_f32_e32 v3, v3, v10 ; 10061503 v_mad_f32 v3, v5, v11, v3 ; D2820003 040E1705 v_mad_f32 v1, v6, v1, v3 ; D2820001 040E0306 v_mul_f32_e32 v1, v1, v23 ; 10022F01 v_mad_f32 v0, v21, v0, v1 ; D2820000 04060115 v_mul_f32_e32 v0, s10, v0 ; 1000000A v_mad_f32 v1, 0.5, -v0, 0.5 ; D2820001 43C200F0 v_mad_f32 v3, 0.5, -v2, 0.5 ; D2820003 43C204F0 v_subrev_f32_e32 v3, v1, v3 ; 0A060701 v_mad_f32 v1, v8, v3, v1 ; D2820001 04060708 v_cvt_pkrtz_f16_f32_e32 v1, v1, v4 ; 5E020901 v_mul_f32_e32 v0, 0.5, v0 ; 100000F0 v_mad_f32 v3, 0, v0, 1.0 ; D2820003 03CA0080 v_subrev_f32_e32 v4, v3, v7 ; 0A080F03 v_mad_f32 v3, v8, v4, v3 ; D2820003 040E0908 v_mad_f32 v0, 0, v0, 0 ; D2820000 02020080 v_mul_f32_e32 v2, 0.5, v2 ; 100404F0 v_mad_f32 v2, 0, v2, 0 ; D2820002 02020480 v_subrev_f32_e32 v2, v0, v2 ; 0A040500 v_mad_f32 v0, v8, v2, v0 ; D2820000 04020508 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 1 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 DCL IN[0], GENERIC[0], CONSTANT DCL OUT[0], COLOR 0: MOV OUT[0], IN[0] 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %5) %23 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %5) %24 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %5) %25 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %5) %26 = call i32 @llvm.SI.packf16(float %22, float %23) %27 = bitcast i32 %26 to float %28 = call i32 @llvm.SI.packf16(float %24, float %25) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %22, float %23) %31 = bitcast i32 %30 to float %32 = call i32 @llvm.SI.packf16(float %24, float %25) %33 = bitcast i32 %32 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 1, i32 1, float %31, float %33, float %31, float %33) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %27, float %29, float %27, float %29) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.constant(i32, i32, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_mov_f32 v0, P0, 3, 0, [m0] ; C8020302 v_interp_mov_f32 v1, P0, 2, 0, [m0] ; C8060202 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102 v_interp_mov_f32 v2, P0, 0, 0, [m0] ; C80A0002 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 1, 1, 0, 0, v1, v0, v1, v0 ; F800041F 00010001 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL IN[5] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL OUT[8], GENERIC[16] DCL CONST[0..96] DCL TEMP[0..9], LOCAL DCL ADDR[0] IMM[0] FLT32 { -1.0000, -2.0000, 0.6600, 0.3300} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MOV TEMP[1].xy, IMM[0].xyxx 3: F2I TEMP[2].x, IN[4].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: ADD TEMP[2].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 6: MOV TEMP[1].zw, TEMP[2].wwzw 7: ABS TEMP[2].z, TEMP[1] 8: ABS TEMP[3].z, TEMP[1] 9: FSGE TEMP[2].x, -TEMP[2].zzzz, TEMP[3].zzzz 10: UIF TEMP[2].xxxx :0 11: F2I TEMP[2].x, IN[4].xxxx 12: UARL ADDR[0].x, TEMP[2].xxxx 13: MUL TEMP[2].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 14: MOV TEMP[2].xy, TEMP[2].xyxx 15: MUL TEMP[3].xy, TEMP[2], IMM[0].zzzz 16: MOV TEMP[2].xy, TEMP[3].xyxx 17: ELSE :0 18: F2I TEMP[3].x, IN[4].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: MUL TEMP[3].xy, IN[1], CONST[ADDR[0].x+1].xxxx 21: MOV TEMP[3].xy, TEMP[3].xyxx 22: MUL TEMP[4].xy, TEMP[3], IMM[0].wwww 23: MOV TEMP[2].xy, TEMP[4].xyxx 24: ENDIF 25: ABS TEMP[4].w, TEMP[1] 26: ABS TEMP[5].w, TEMP[1] 27: FSGE TEMP[4].x, -TEMP[4].wwww, TEMP[5].wwww 28: UIF TEMP[4].xxxx :0 29: F2I TEMP[4].x, IN[4].xxxx 30: UARL ADDR[0].x, TEMP[4].xxxx 31: MUL TEMP[4].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 32: MUL TEMP[4].xy, TEMP[4].zwzw, IMM[0].zzzz 33: MOV TEMP[2].xy, TEMP[4].xyxx 34: ENDIF 35: F2I TEMP[4].x, IN[4].yyyy 36: UARL ADDR[0].x, TEMP[4].xxxx 37: ADD TEMP[4].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 38: MOV TEMP[1].zw, TEMP[4].wwzw 39: ABS TEMP[4].z, TEMP[1] 40: ABS TEMP[5].z, TEMP[1] 41: FSGE TEMP[4].x, -TEMP[4].zzzz, TEMP[5].zzzz 42: UIF TEMP[4].xxxx :0 43: F2I TEMP[4].x, IN[4].yyyy 44: UARL ADDR[0].x, TEMP[4].xxxx 45: MUL TEMP[4].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 46: MOV TEMP[3].xy, TEMP[4].xyxx 47: MUL TEMP[4].xy, TEMP[3], IMM[0].zzzz 48: MOV TEMP[3].xy, TEMP[4].xyxx 49: ELSE :0 50: F2I TEMP[4].x, IN[4].yyyy 51: UARL ADDR[0].x, TEMP[4].xxxx 52: MUL TEMP[4].xy, IN[1], CONST[ADDR[0].x+1].xxxx 53: MOV TEMP[4].xy, TEMP[4].xyxx 54: MUL TEMP[5].xy, TEMP[4], IMM[0].wwww 55: MOV TEMP[3].xy, TEMP[5].xyxx 56: ENDIF 57: ABS TEMP[5].w, TEMP[1] 58: ABS TEMP[6].w, TEMP[1] 59: FSGE TEMP[5].x, -TEMP[5].wwww, TEMP[6].wwww 60: UIF TEMP[5].xxxx :0 61: F2I TEMP[5].x, IN[4].yyyy 62: UARL ADDR[0].x, TEMP[5].xxxx 63: MUL TEMP[5].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 64: MUL TEMP[5].xy, TEMP[5].zwzw, IMM[0].zzzz 65: MOV TEMP[3].xy, TEMP[5].xyxx 66: ENDIF 67: F2I TEMP[5].x, IN[4].zzzz 68: UARL ADDR[0].x, TEMP[5].xxxx 69: ADD TEMP[5].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 70: MOV TEMP[1].zw, TEMP[5].wwzw 71: ABS TEMP[5].z, TEMP[1] 72: ABS TEMP[6].z, TEMP[1] 73: FSGE TEMP[5].x, -TEMP[5].zzzz, TEMP[6].zzzz 74: UIF TEMP[5].xxxx :0 75: F2I TEMP[5].x, IN[4].zzzz 76: UARL ADDR[0].x, TEMP[5].xxxx 77: MUL TEMP[5].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 78: MOV TEMP[4].xy, TEMP[5].xyxx 79: MUL TEMP[5].xy, TEMP[4], IMM[0].zzzz 80: MOV TEMP[4].xy, TEMP[5].xyxx 81: ELSE :0 82: F2I TEMP[5].x, IN[4].zzzz 83: UARL ADDR[0].x, TEMP[5].xxxx 84: MUL TEMP[5].xy, IN[1], CONST[ADDR[0].x+1].xxxx 85: MOV TEMP[5].xy, TEMP[5].xyxx 86: MUL TEMP[6].xy, TEMP[5], IMM[0].wwww 87: MOV TEMP[4].xy, TEMP[6].xyxx 88: ENDIF 89: ABS TEMP[6].w, TEMP[1] 90: ABS TEMP[7].w, TEMP[1] 91: FSGE TEMP[6].x, -TEMP[6].wwww, TEMP[7].wwww 92: UIF TEMP[6].xxxx :0 93: F2I TEMP[6].x, IN[4].zzzz 94: UARL ADDR[0].x, TEMP[6].xxxx 95: MUL TEMP[6].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 96: MUL TEMP[6].xy, TEMP[6].zwzw, IMM[0].zzzz 97: MOV TEMP[4].xy, TEMP[6].xyxx 98: ENDIF 99: F2I TEMP[6].x, IN[4].wwww 100: UARL ADDR[0].x, TEMP[6].xxxx 101: ADD TEMP[6].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 102: MOV TEMP[1].zw, TEMP[6].wwzw 103: ABS TEMP[6].z, TEMP[1] 104: ABS TEMP[7].z, TEMP[1] 105: FSGE TEMP[6].x, -TEMP[6].zzzz, TEMP[7].zzzz 106: UIF TEMP[6].xxxx :0 107: F2I TEMP[6].x, IN[4].wwww 108: UARL ADDR[0].x, TEMP[6].xxxx 109: MUL TEMP[6].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 110: MOV TEMP[5].xy, TEMP[6].xyxx 111: MUL TEMP[6].xy, TEMP[5], IMM[0].zzzz 112: MOV TEMP[5].xy, TEMP[6].xyxx 113: ELSE :0 114: F2I TEMP[6].x, IN[4].wwww 115: UARL ADDR[0].x, TEMP[6].xxxx 116: MUL TEMP[6].xy, IN[1], CONST[ADDR[0].x+1].xxxx 117: MOV TEMP[6].xy, TEMP[6].xyxx 118: MUL TEMP[7].xy, TEMP[6], IMM[0].wwww 119: MOV TEMP[5].xy, TEMP[7].xyxx 120: ENDIF 121: ABS TEMP[7].w, TEMP[1] 122: ABS TEMP[8].w, TEMP[1] 123: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[8].wwww 124: UIF TEMP[7].xxxx :0 125: F2I TEMP[7].x, IN[4].wwww 126: UARL ADDR[0].x, TEMP[7].xxxx 127: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 128: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 129: MOV TEMP[5].xy, TEMP[7].xyxx 130: ENDIF 131: F2I TEMP[7].x, IN[5].xxxx 132: UARL ADDR[0].x, TEMP[7].xxxx 133: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 134: MOV TEMP[1].zw, TEMP[7].wwzw 135: ABS TEMP[7].z, TEMP[1] 136: ABS TEMP[8].z, TEMP[1] 137: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[8].zzzz 138: UIF TEMP[7].xxxx :0 139: F2I TEMP[7].x, IN[5].xxxx 140: UARL ADDR[0].x, TEMP[7].xxxx 141: MUL TEMP[7].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 142: MOV TEMP[6].xy, TEMP[7].xyxx 143: MUL TEMP[7].xy, TEMP[6], IMM[0].zzzz 144: MOV TEMP[6].xy, TEMP[7].xyxx 145: ELSE :0 146: F2I TEMP[7].x, IN[5].xxxx 147: UARL ADDR[0].x, TEMP[7].xxxx 148: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 149: MOV TEMP[6].zw, TEMP[7].wwzw 150: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].wwww 151: MOV TEMP[6].xy, TEMP[7].xyxx 152: ENDIF 153: ABS TEMP[7].w, TEMP[1] 154: ABS TEMP[8].w, TEMP[1] 155: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[8].wwww 156: UIF TEMP[7].xxxx :0 157: F2I TEMP[7].x, IN[5].xxxx 158: UARL ADDR[0].x, TEMP[7].xxxx 159: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 160: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 161: MOV TEMP[6].xy, TEMP[7].xyxx 162: ENDIF 163: F2I TEMP[7].x, IN[5].yyyy 164: UARL ADDR[0].x, TEMP[7].xxxx 165: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 166: MOV TEMP[1].zw, TEMP[7].wwzw 167: ABS TEMP[7].z, TEMP[1] 168: ABS TEMP[8].z, TEMP[1] 169: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[8].zzzz 170: UIF TEMP[7].xxxx :0 171: F2I TEMP[7].x, IN[5].yyyy 172: UARL ADDR[0].x, TEMP[7].xxxx 173: MUL TEMP[7].zw, IN[1].xyxz, CONST[ADDR[0].x+1].xxxx 174: MOV TEMP[6].zw, TEMP[7].wwzw 175: MUL TEMP[7].zw, TEMP[6], IMM[0].zzzz 176: MOV TEMP[6].zw, TEMP[7].wwzw 177: ELSE :0 178: F2I TEMP[7].x, IN[5].yyyy 179: UARL ADDR[0].x, TEMP[7].xxxx 180: MUL TEMP[7].xy, IN[1], CONST[ADDR[0].x+1].xxxx 181: MOV TEMP[8].xy, TEMP[7].xyxx 182: MUL TEMP[7].zw, TEMP[7].xyxy, IMM[0].wwww 183: MOV TEMP[6].zw, TEMP[7].wwzw 184: ENDIF 185: ABS TEMP[7].w, TEMP[1] 186: ABS TEMP[9].w, TEMP[1] 187: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[9].wwww 188: UIF TEMP[7].xxxx :0 189: F2I TEMP[7].x, IN[5].yyyy 190: UARL ADDR[0].x, TEMP[7].xxxx 191: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 192: MOV TEMP[1].zw, TEMP[7].wwzw 193: MUL TEMP[7].zw, TEMP[1], IMM[0].zzzz 194: MOV TEMP[6].zw, TEMP[7].wwzw 195: ENDIF 196: F2I TEMP[7].x, IN[5].zzzz 197: UARL ADDR[0].x, TEMP[7].xxxx 198: ADD TEMP[7].zw, IMM[0].xyxy, CONST[ADDR[0].x+1].yyyy 199: MOV TEMP[1].zw, TEMP[7].wwzw 200: ABS TEMP[7].z, TEMP[1] 201: ABS TEMP[9].z, TEMP[1] 202: FSGE TEMP[7].x, -TEMP[7].zzzz, TEMP[9].zzzz 203: UIF TEMP[7].xxxx :0 204: F2I TEMP[7].x, IN[5].zzzz 205: UARL ADDR[0].x, TEMP[7].xxxx 206: MUL TEMP[7].xy, IN[1].xzzw, CONST[ADDR[0].x+1].xxxx 207: MOV TEMP[8].xy, TEMP[7].xyxx 208: MUL TEMP[7].xy, TEMP[8], IMM[0].zzzz 209: MOV TEMP[8].xy, TEMP[7].xyxx 210: ELSE :0 211: F2I TEMP[7].x, IN[5].zzzz 212: UARL ADDR[0].x, TEMP[7].xxxx 213: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 214: MOV TEMP[8].zw, TEMP[7].wwzw 215: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].wwww 216: MOV TEMP[8].xy, TEMP[7].xyxx 217: ENDIF 218: ABS TEMP[7].w, TEMP[1] 219: ABS TEMP[9].w, TEMP[1] 220: FSGE TEMP[7].x, -TEMP[7].wwww, TEMP[9].wwww 221: UIF TEMP[7].xxxx :0 222: F2I TEMP[7].x, IN[5].zzzz 223: UARL ADDR[0].x, TEMP[7].xxxx 224: MUL TEMP[7].zw, IN[1].xyyz, CONST[ADDR[0].x+1].xxxx 225: MOV TEMP[1].zw, TEMP[7].wwzw 226: MUL TEMP[7].xy, TEMP[7].zwzw, IMM[0].zzzz 227: MOV TEMP[8].xy, TEMP[7].xyxx 228: ENDIF 229: F2I TEMP[7].x, IN[5].wwww 230: UARL ADDR[0].x, TEMP[7].xxxx 231: ADD TEMP[7].xy, TEMP[1], CONST[ADDR[0].x+1].yyyy 232: MOV TEMP[1].xy, TEMP[7].xyxx 233: ABS TEMP[7].x, TEMP[1] 234: ABS TEMP[9].x, TEMP[1] 235: FSGE TEMP[7].x, -TEMP[7].xxxx, TEMP[9].xxxx 236: UIF TEMP[7].xxxx :0 237: F2I TEMP[7].x, IN[5].wwww 238: UARL ADDR[0].x, TEMP[7].xxxx 239: MUL TEMP[7].xz, IN[1], CONST[ADDR[0].x+1].xxxx 240: MOV TEMP[1].xz, TEMP[7].xxzx 241: MUL TEMP[7].xz, TEMP[1], IMM[0].zzzz 242: MOV TEMP[1].xz, TEMP[7].xxzx 243: ELSE :0 244: F2I TEMP[7].x, IN[5].wwww 245: UARL ADDR[0].x, TEMP[7].xxxx 246: MUL TEMP[7].zw, IN[1].xyxy, CONST[ADDR[0].x+1].xxxx 247: MOV TEMP[8].zw, TEMP[7].wwzw 248: MUL TEMP[7].xz, TEMP[8].zyww, IMM[0].wwww 249: MOV TEMP[1].xz, TEMP[7].xxzx 250: ENDIF 251: ABS TEMP[7].y, TEMP[1] 252: ABS TEMP[9].y, TEMP[1] 253: FSGE TEMP[7].x, -TEMP[7].yyyy, TEMP[9].yyyy 254: UIF TEMP[7].xxxx :0 255: F2I TEMP[7].x, IN[5].wwww 256: UARL ADDR[0].x, TEMP[7].xxxx 257: MUL TEMP[7].yw, IN[1].xyzz, CONST[ADDR[0].x+1].xxxx 258: MUL TEMP[7].xz, TEMP[7].yyww, IMM[0].zzzz 259: MOV TEMP[1].xz, TEMP[7].xxzx 260: ENDIF 261: MAD TEMP[7].zw, IN[0].zzzz, IMM[1].xyxy, IMM[1].xyyx 262: MOV TEMP[0].zw, TEMP[7].wwzw 263: MOV TEMP[2].zw, IN[2].yyxy 264: MOV TEMP[3].zw, IN[2].wwzw 265: MOV TEMP[4].zw, IN[3].yyxy 266: MOV TEMP[5].zw, IN[3].wwzw 267: MOV TEMP[7].xy, TEMP[6].xyxx 268: MOV TEMP[7].zw, IMM[1].yyyy 269: MOV TEMP[6].xy, TEMP[6].zwzz 270: MOV TEMP[6].zw, IMM[1].yyyy 271: MOV TEMP[8].xy, TEMP[8].xyxx 272: MOV TEMP[8].zw, IMM[1].yyyy 273: MOV TEMP[1].xy, TEMP[1].xzxx 274: MOV TEMP[1].zw, IMM[1].yyyy 275: MOV OUT[6], TEMP[6] 276: MOV OUT[7], TEMP[8] 277: MOV OUT[8], TEMP[1] 278: MOV OUT[1], TEMP[2] 279: MOV OUT[2], TEMP[3] 280: MOV OUT[0], TEMP[0] 281: MOV OUT[3], TEMP[4] 282: MOV OUT[4], TEMP[5] 283: MOV OUT[5], TEMP[7] 284: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0 %17 = add i32 %5, %7 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %17) %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = add i32 %5, %7 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = extractelement <4 x float> %25, i32 2 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %38 = load <16 x i8> addrspace(2)* %37, !tbaa !0 %39 = add i32 %5, %7 %40 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %38, i32 0, i32 %39) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = extractelement <4 x float> %40, i32 3 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 5 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = extractelement <4 x float> %56, i32 3 %61 = fadd float %13, %19 %62 = fadd float %14, %20 %63 = fptosi float %49 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = shl i32 %65, 4 %67 = add i32 %66, 20 %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %67) %69 = fadd float -1.000000e+00, %68 %70 = shl i32 %65, 4 %71 = add i32 %70, 20 %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %71) %73 = fadd float -2.000000e+00, %72 %74 = call float @fabs(float %69) %75 = call float @fabs(float %69) %76 = fsub float -0.000000e+00, %74 %77 = fcmp oge float %76, %75 %78 = sext i1 %77 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = icmp ne i32 %80, 0 %82 = fptosi float %49 to i32 %83 = bitcast i32 %82 to float %84 = bitcast float %83 to i32 %85 = shl i32 %84, 4 %86 = add i32 %85, 16 %87 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %86) %88 = fmul float %26, %87 %89 = shl i32 %84, 4 %90 = add i32 %89, 16 %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %90) br i1 %81, label %IF, label %ELSE IF: ; preds = %main_body %92 = fmul float %28, %91 %93 = fmul float %88, 0x3FE51EB860000000 %94 = fmul float %92, 0x3FE51EB860000000 br label %ENDIF ELSE: ; preds = %main_body %95 = fmul float %27, %91 %96 = fmul float %88, 0x3FD51EB860000000 %97 = fmul float %95, 0x3FD51EB860000000 br label %ENDIF ENDIF: ; preds = %ELSE, %IF %temp8.0 = phi float [ %93, %IF ], [ %96, %ELSE ] %temp9.0 = phi float [ %94, %IF ], [ %97, %ELSE ] %98 = call float @fabs(float %73) %99 = call float @fabs(float %73) %100 = fsub float -0.000000e+00, %98 %101 = fcmp oge float %100, %99 %102 = sext i1 %101 to i32 %103 = bitcast i32 %102 to float %104 = bitcast float %103 to i32 %105 = icmp ne i32 %104, 0 br i1 %105, label %IF46, label %ENDIF45 IF46: ; preds = %ENDIF %106 = fptosi float %49 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = add i32 %109, 16 %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %110) %112 = fmul float %27, %111 %113 = shl i32 %108, 4 %114 = add i32 %113, 16 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %28, %115 %117 = fmul float %112, 0x3FE51EB860000000 %118 = fmul float %116, 0x3FE51EB860000000 br label %ENDIF45 ENDIF45: ; preds = %ENDIF, %IF46 %temp8.1 = phi float [ %117, %IF46 ], [ %temp8.0, %ENDIF ] %temp9.1 = phi float [ %118, %IF46 ], [ %temp9.0, %ENDIF ] %119 = fptosi float %50 to i32 %120 = bitcast i32 %119 to float %121 = bitcast float %120 to i32 %122 = shl i32 %121, 4 %123 = add i32 %122, 20 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = fadd float -1.000000e+00, %124 %126 = shl i32 %121, 4 %127 = add i32 %126, 20 %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %127) %129 = fadd float -2.000000e+00, %128 %130 = call float @fabs(float %125) %131 = call float @fabs(float %125) %132 = fsub float -0.000000e+00, %130 %133 = fcmp oge float %132, %131 %134 = sext i1 %133 to i32 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = icmp ne i32 %136, 0 %138 = fptosi float %50 to i32 %139 = bitcast i32 %138 to float %140 = bitcast float %139 to i32 %141 = shl i32 %140, 4 %142 = add i32 %141, 16 %143 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %142) %144 = fmul float %26, %143 %145 = shl i32 %140, 4 %146 = add i32 %145, 16 %147 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %146) br i1 %137, label %IF53, label %ELSE54 IF53: ; preds = %ENDIF45 %148 = fmul float %28, %147 %149 = fmul float %144, 0x3FE51EB860000000 %150 = fmul float %148, 0x3FE51EB860000000 br label %ENDIF52 ELSE54: ; preds = %ENDIF45 %151 = fmul float %27, %147 %152 = fmul float %144, 0x3FD51EB860000000 %153 = fmul float %151, 0x3FD51EB860000000 br label %ENDIF52 ENDIF52: ; preds = %ELSE54, %IF53 %temp12.0 = phi float [ %149, %IF53 ], [ %152, %ELSE54 ] %temp13.0 = phi float [ %150, %IF53 ], [ %153, %ELSE54 ] %154 = call float @fabs(float %129) %155 = call float @fabs(float %129) %156 = fsub float -0.000000e+00, %154 %157 = fcmp oge float %156, %155 %158 = sext i1 %157 to i32 %159 = bitcast i32 %158 to float %160 = bitcast float %159 to i32 %161 = icmp ne i32 %160, 0 br i1 %161, label %IF60, label %ENDIF59 IF60: ; preds = %ENDIF52 %162 = fptosi float %50 to i32 %163 = bitcast i32 %162 to float %164 = bitcast float %163 to i32 %165 = shl i32 %164, 4 %166 = add i32 %165, 16 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = fmul float %27, %167 %169 = shl i32 %164, 4 %170 = add i32 %169, 16 %171 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %170) %172 = fmul float %28, %171 %173 = fmul float %168, 0x3FE51EB860000000 %174 = fmul float %172, 0x3FE51EB860000000 br label %ENDIF59 ENDIF59: ; preds = %ENDIF52, %IF60 %temp12.1 = phi float [ %173, %IF60 ], [ %temp12.0, %ENDIF52 ] %temp13.1 = phi float [ %174, %IF60 ], [ %temp13.0, %ENDIF52 ] %175 = fptosi float %51 to i32 %176 = bitcast i32 %175 to float %177 = bitcast float %176 to i32 %178 = shl i32 %177, 4 %179 = add i32 %178, 20 %180 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %179) %181 = fadd float -1.000000e+00, %180 %182 = shl i32 %177, 4 %183 = add i32 %182, 20 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = fadd float -2.000000e+00, %184 %186 = call float @fabs(float %181) %187 = call float @fabs(float %181) %188 = fsub float -0.000000e+00, %186 %189 = fcmp oge float %188, %187 %190 = sext i1 %189 to i32 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = icmp ne i32 %192, 0 %194 = fptosi float %51 to i32 %195 = bitcast i32 %194 to float %196 = bitcast float %195 to i32 %197 = shl i32 %196, 4 %198 = add i32 %197, 16 %199 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %198) %200 = fmul float %26, %199 %201 = shl i32 %196, 4 %202 = add i32 %201, 16 %203 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %202) br i1 %193, label %IF67, label %ELSE68 IF67: ; preds = %ENDIF59 %204 = fmul float %28, %203 %205 = fmul float %200, 0x3FE51EB860000000 %206 = fmul float %204, 0x3FE51EB860000000 br label %ENDIF66 ELSE68: ; preds = %ENDIF59 %207 = fmul float %27, %203 %208 = fmul float %200, 0x3FD51EB860000000 %209 = fmul float %207, 0x3FD51EB860000000 br label %ENDIF66 ENDIF66: ; preds = %ELSE68, %IF67 %temp16.0 = phi float [ %205, %IF67 ], [ %208, %ELSE68 ] %temp17.0 = phi float [ %206, %IF67 ], [ %209, %ELSE68 ] %210 = call float @fabs(float %185) %211 = call float @fabs(float %185) %212 = fsub float -0.000000e+00, %210 %213 = fcmp oge float %212, %211 %214 = sext i1 %213 to i32 %215 = bitcast i32 %214 to float %216 = bitcast float %215 to i32 %217 = icmp ne i32 %216, 0 br i1 %217, label %IF74, label %ENDIF73 IF74: ; preds = %ENDIF66 %218 = fptosi float %51 to i32 %219 = bitcast i32 %218 to float %220 = bitcast float %219 to i32 %221 = shl i32 %220, 4 %222 = add i32 %221, 16 %223 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %222) %224 = fmul float %27, %223 %225 = shl i32 %220, 4 %226 = add i32 %225, 16 %227 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %226) %228 = fmul float %28, %227 %229 = fmul float %224, 0x3FE51EB860000000 %230 = fmul float %228, 0x3FE51EB860000000 br label %ENDIF73 ENDIF73: ; preds = %ENDIF66, %IF74 %temp16.1 = phi float [ %229, %IF74 ], [ %temp16.0, %ENDIF66 ] %temp17.1 = phi float [ %230, %IF74 ], [ %temp17.0, %ENDIF66 ] %231 = fptosi float %52 to i32 %232 = bitcast i32 %231 to float %233 = bitcast float %232 to i32 %234 = shl i32 %233, 4 %235 = add i32 %234, 20 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = fadd float -1.000000e+00, %236 %238 = shl i32 %233, 4 %239 = add i32 %238, 20 %240 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %239) %241 = fadd float -2.000000e+00, %240 %242 = call float @fabs(float %237) %243 = call float @fabs(float %237) %244 = fsub float -0.000000e+00, %242 %245 = fcmp oge float %244, %243 %246 = sext i1 %245 to i32 %247 = bitcast i32 %246 to float %248 = bitcast float %247 to i32 %249 = icmp ne i32 %248, 0 %250 = fptosi float %52 to i32 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 16 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = fmul float %26, %255 %257 = shl i32 %252, 4 %258 = add i32 %257, 16 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) br i1 %249, label %IF81, label %ELSE82 IF81: ; preds = %ENDIF73 %260 = fmul float %28, %259 %261 = fmul float %256, 0x3FE51EB860000000 %262 = fmul float %260, 0x3FE51EB860000000 br label %ENDIF80 ELSE82: ; preds = %ENDIF73 %263 = fmul float %27, %259 %264 = fmul float %256, 0x3FD51EB860000000 %265 = fmul float %263, 0x3FD51EB860000000 br label %ENDIF80 ENDIF80: ; preds = %ELSE82, %IF81 %temp20.0 = phi float [ %261, %IF81 ], [ %264, %ELSE82 ] %temp21.0 = phi float [ %262, %IF81 ], [ %265, %ELSE82 ] %266 = call float @fabs(float %241) %267 = call float @fabs(float %241) %268 = fsub float -0.000000e+00, %266 %269 = fcmp oge float %268, %267 %270 = sext i1 %269 to i32 %271 = bitcast i32 %270 to float %272 = bitcast float %271 to i32 %273 = icmp ne i32 %272, 0 br i1 %273, label %IF88, label %ENDIF87 IF88: ; preds = %ENDIF80 %274 = fptosi float %52 to i32 %275 = bitcast i32 %274 to float %276 = bitcast float %275 to i32 %277 = shl i32 %276, 4 %278 = add i32 %277, 16 %279 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %278) %280 = fmul float %27, %279 %281 = shl i32 %276, 4 %282 = add i32 %281, 16 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = fmul float %28, %283 %285 = fmul float %280, 0x3FE51EB860000000 %286 = fmul float %284, 0x3FE51EB860000000 br label %ENDIF87 ENDIF87: ; preds = %ENDIF80, %IF88 %temp20.1 = phi float [ %285, %IF88 ], [ %temp20.0, %ENDIF80 ] %temp21.1 = phi float [ %286, %IF88 ], [ %temp21.0, %ENDIF80 ] %287 = fptosi float %57 to i32 %288 = bitcast i32 %287 to float %289 = bitcast float %288 to i32 %290 = shl i32 %289, 4 %291 = add i32 %290, 20 %292 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %291) %293 = fadd float -1.000000e+00, %292 %294 = shl i32 %289, 4 %295 = add i32 %294, 20 %296 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %295) %297 = fadd float -2.000000e+00, %296 %298 = call float @fabs(float %293) %299 = call float @fabs(float %293) %300 = fsub float -0.000000e+00, %298 %301 = fcmp oge float %300, %299 %302 = sext i1 %301 to i32 %303 = bitcast i32 %302 to float %304 = bitcast float %303 to i32 %305 = icmp ne i32 %304, 0 %306 = fptosi float %57 to i32 %307 = bitcast i32 %306 to float %308 = bitcast float %307 to i32 %309 = shl i32 %308, 4 %310 = add i32 %309, 16 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = fmul float %26, %311 %313 = shl i32 %308, 4 %314 = add i32 %313, 16 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) br i1 %305, label %IF95, label %ELSE96 IF95: ; preds = %ENDIF87 %316 = fmul float %28, %315 %317 = fmul float %312, 0x3FE51EB860000000 %318 = fmul float %316, 0x3FE51EB860000000 br label %ENDIF94 ELSE96: ; preds = %ENDIF87 %319 = fmul float %27, %315 %320 = fmul float %312, 0x3FD51EB860000000 %321 = fmul float %319, 0x3FD51EB860000000 br label %ENDIF94 ENDIF94: ; preds = %ELSE96, %IF95 %temp24.0 = phi float [ %317, %IF95 ], [ %320, %ELSE96 ] %temp25.0 = phi float [ %318, %IF95 ], [ %321, %ELSE96 ] %322 = call float @fabs(float %297) %323 = call float @fabs(float %297) %324 = fsub float -0.000000e+00, %322 %325 = fcmp oge float %324, %323 %326 = sext i1 %325 to i32 %327 = bitcast i32 %326 to float %328 = bitcast float %327 to i32 %329 = icmp ne i32 %328, 0 br i1 %329, label %IF102, label %ENDIF101 IF102: ; preds = %ENDIF94 %330 = fptosi float %57 to i32 %331 = bitcast i32 %330 to float %332 = bitcast float %331 to i32 %333 = shl i32 %332, 4 %334 = add i32 %333, 16 %335 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %334) %336 = fmul float %27, %335 %337 = shl i32 %332, 4 %338 = add i32 %337, 16 %339 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %338) %340 = fmul float %28, %339 %341 = fmul float %336, 0x3FE51EB860000000 %342 = fmul float %340, 0x3FE51EB860000000 br label %ENDIF101 ENDIF101: ; preds = %ENDIF94, %IF102 %temp24.1 = phi float [ %341, %IF102 ], [ %temp24.0, %ENDIF94 ] %temp25.1 = phi float [ %342, %IF102 ], [ %temp25.0, %ENDIF94 ] %343 = fptosi float %58 to i32 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 20 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = fadd float -1.000000e+00, %348 %350 = shl i32 %345, 4 %351 = add i32 %350, 20 %352 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %351) %353 = fadd float -2.000000e+00, %352 %354 = call float @fabs(float %349) %355 = call float @fabs(float %349) %356 = fsub float -0.000000e+00, %354 %357 = fcmp oge float %356, %355 %358 = sext i1 %357 to i32 %359 = bitcast i32 %358 to float %360 = bitcast float %359 to i32 %361 = icmp ne i32 %360, 0 %362 = fptosi float %58 to i32 %363 = bitcast i32 %362 to float %364 = bitcast float %363 to i32 %365 = shl i32 %364, 4 %366 = add i32 %365, 16 %367 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %366) %368 = fmul float %26, %367 %369 = shl i32 %364, 4 %370 = add i32 %369, 16 %371 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %370) br i1 %361, label %IF109, label %ELSE110 IF109: ; preds = %ENDIF101 %372 = fmul float %28, %371 %373 = fmul float %368, 0x3FE51EB860000000 %374 = fmul float %372, 0x3FE51EB860000000 br label %ENDIF108 ELSE110: ; preds = %ENDIF101 %375 = fmul float %27, %371 %376 = fmul float %368, 0x3FD51EB860000000 %377 = fmul float %375, 0x3FD51EB860000000 br label %ENDIF108 ENDIF108: ; preds = %ELSE110, %IF109 %temp26.0 = phi float [ %373, %IF109 ], [ %376, %ELSE110 ] %temp27.0 = phi float [ %374, %IF109 ], [ %377, %ELSE110 ] %378 = call float @fabs(float %353) %379 = call float @fabs(float %353) %380 = fsub float -0.000000e+00, %378 %381 = fcmp oge float %380, %379 %382 = sext i1 %381 to i32 %383 = bitcast i32 %382 to float %384 = bitcast float %383 to i32 %385 = icmp ne i32 %384, 0 br i1 %385, label %IF116, label %ENDIF115 IF116: ; preds = %ENDIF108 %386 = fptosi float %58 to i32 %387 = bitcast i32 %386 to float %388 = bitcast float %387 to i32 %389 = shl i32 %388, 4 %390 = add i32 %389, 16 %391 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %390) %392 = fmul float %27, %391 %393 = shl i32 %388, 4 %394 = add i32 %393, 16 %395 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %394) %396 = fmul float %28, %395 %397 = fmul float %392, 0x3FE51EB860000000 %398 = fmul float %396, 0x3FE51EB860000000 br label %ENDIF115 ENDIF115: ; preds = %ENDIF108, %IF116 %temp26.1 = phi float [ %397, %IF116 ], [ %temp26.0, %ENDIF108 ] %temp27.1 = phi float [ %398, %IF116 ], [ %temp27.0, %ENDIF108 ] %399 = fptosi float %59 to i32 %400 = bitcast i32 %399 to float %401 = bitcast float %400 to i32 %402 = shl i32 %401, 4 %403 = add i32 %402, 20 %404 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %403) %405 = fadd float -1.000000e+00, %404 %406 = shl i32 %401, 4 %407 = add i32 %406, 20 %408 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %407) %409 = fadd float -2.000000e+00, %408 %410 = call float @fabs(float %405) %411 = call float @fabs(float %405) %412 = fsub float -0.000000e+00, %410 %413 = fcmp oge float %412, %411 %414 = sext i1 %413 to i32 %415 = bitcast i32 %414 to float %416 = bitcast float %415 to i32 %417 = icmp ne i32 %416, 0 %418 = fptosi float %59 to i32 %419 = bitcast i32 %418 to float %420 = bitcast float %419 to i32 %421 = shl i32 %420, 4 %422 = add i32 %421, 16 %423 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %422) %424 = fmul float %26, %423 %425 = shl i32 %420, 4 %426 = add i32 %425, 16 %427 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %426) br i1 %417, label %IF123, label %ELSE124 IF123: ; preds = %ENDIF115 %428 = fmul float %28, %427 %429 = fmul float %424, 0x3FE51EB860000000 %430 = fmul float %428, 0x3FE51EB860000000 br label %ENDIF122 ELSE124: ; preds = %ENDIF115 %431 = fmul float %27, %427 %432 = fmul float %424, 0x3FD51EB860000000 %433 = fmul float %431, 0x3FD51EB860000000 br label %ENDIF122 ENDIF122: ; preds = %ELSE124, %IF123 %temp32.0 = phi float [ %429, %IF123 ], [ %432, %ELSE124 ] %temp33.0 = phi float [ %430, %IF123 ], [ %433, %ELSE124 ] %434 = call float @fabs(float %409) %435 = call float @fabs(float %409) %436 = fsub float -0.000000e+00, %434 %437 = fcmp oge float %436, %435 %438 = sext i1 %437 to i32 %439 = bitcast i32 %438 to float %440 = bitcast float %439 to i32 %441 = icmp ne i32 %440, 0 br i1 %441, label %IF130, label %ENDIF129 IF130: ; preds = %ENDIF122 %442 = fptosi float %59 to i32 %443 = bitcast i32 %442 to float %444 = bitcast float %443 to i32 %445 = shl i32 %444, 4 %446 = add i32 %445, 16 %447 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %446) %448 = fmul float %27, %447 %449 = shl i32 %444, 4 %450 = add i32 %449, 16 %451 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %450) %452 = fmul float %28, %451 %453 = fmul float %448, 0x3FE51EB860000000 %454 = fmul float %452, 0x3FE51EB860000000 br label %ENDIF129 ENDIF129: ; preds = %ENDIF122, %IF130 %temp32.1 = phi float [ %453, %IF130 ], [ %temp32.0, %ENDIF122 ] %temp33.1 = phi float [ %454, %IF130 ], [ %temp33.0, %ENDIF122 ] %455 = fptosi float %60 to i32 %456 = bitcast i32 %455 to float %457 = bitcast float %456 to i32 %458 = shl i32 %457, 4 %459 = add i32 %458, 20 %460 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %459) %461 = fadd float -1.000000e+00, %460 %462 = shl i32 %457, 4 %463 = add i32 %462, 20 %464 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %463) %465 = fadd float -2.000000e+00, %464 %466 = call float @fabs(float %461) %467 = call float @fabs(float %461) %468 = fsub float -0.000000e+00, %466 %469 = fcmp oge float %468, %467 %470 = sext i1 %469 to i32 %471 = bitcast i32 %470 to float %472 = bitcast float %471 to i32 %473 = icmp ne i32 %472, 0 %474 = fptosi float %60 to i32 %475 = bitcast i32 %474 to float %476 = bitcast float %475 to i32 %477 = shl i32 %476, 4 %478 = add i32 %477, 16 %479 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %478) %480 = fmul float %26, %479 %481 = shl i32 %476, 4 %482 = add i32 %481, 16 %483 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %482) br i1 %473, label %IF137, label %ELSE138 IF137: ; preds = %ENDIF129 %484 = fmul float %28, %483 %485 = fmul float %480, 0x3FE51EB860000000 %486 = fmul float %484, 0x3FE51EB860000000 br label %ENDIF136 ELSE138: ; preds = %ENDIF129 %487 = fmul float %27, %483 %488 = fmul float %480, 0x3FD51EB860000000 %489 = fmul float %487, 0x3FD51EB860000000 br label %ENDIF136 ENDIF136: ; preds = %ELSE138, %IF137 %temp4.0 = phi float [ %485, %IF137 ], [ %488, %ELSE138 ] %temp6.0 = phi float [ %486, %IF137 ], [ %489, %ELSE138 ] %490 = call float @fabs(float %465) %491 = call float @fabs(float %465) %492 = fsub float -0.000000e+00, %490 %493 = fcmp oge float %492, %491 %494 = sext i1 %493 to i32 %495 = bitcast i32 %494 to float %496 = bitcast float %495 to i32 %497 = icmp ne i32 %496, 0 br i1 %497, label %IF144, label %ENDIF143 IF144: ; preds = %ENDIF136 %498 = fptosi float %60 to i32 %499 = bitcast i32 %498 to float %500 = bitcast float %499 to i32 %501 = shl i32 %500, 4 %502 = add i32 %501, 16 %503 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %502) %504 = fmul float %27, %503 %505 = shl i32 %500, 4 %506 = add i32 %505, 16 %507 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %506) %508 = fmul float %28, %507 %509 = fmul float %504, 0x3FE51EB860000000 %510 = fmul float %508, 0x3FE51EB860000000 br label %ENDIF143 ENDIF143: ; preds = %ENDIF136, %IF144 %temp4.1 = phi float [ %509, %IF144 ], [ %temp4.0, %ENDIF136 ] %temp6.1 = phi float [ %510, %IF144 ], [ %temp6.0, %ENDIF136 ] %511 = fmul float %21, 1.000000e+00 %512 = fadd float %511, 0.000000e+00 %513 = fmul float %21, 0.000000e+00 %514 = fadd float %513, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %temp8.1, float %temp9.1, float %33, float %34) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %temp12.1, float %temp13.1, float %35, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp16.1, float %temp17.1, float %41, float %42) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %temp20.1, float %temp21.1, float %43, float %44) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %temp24.1, float %temp25.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %temp26.1, float %temp27.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %temp32.1, float %temp33.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 39, i32 0, float %temp4.1, float %temp6.1, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %61, float %62, float %512, float %514) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[16:19], s[8:9], 0x8 ; C0880908 s_load_dwordx4 s[20:23], s[8:9], 0xc ; C08A090C s_load_dwordx4 s[24:27], s[8:9], 0x10 ; C08C0910 s_load_dwordx4 s[8:11], s[8:9], 0x14 ; C0840914 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[28:31], v0, s[24:27], 0 idxen ; E00C2000 80061C00 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v1, v28 ; 7E02111C v_lshlrev_b32_e32 v20, 4, v1 ; 34280284 v_add_i32_e32 v1, 16, v20 ; 4A022890 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v22, v1, s[0:3], 0 offen ; E0301000 80001601 buffer_load_format_xyzw v[12:15], v0, s[12:15], 0 idxen ; E00C2000 80030C00 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v24, v22, v12 ; 10301916 buffer_load_format_xyzw v[16:19], v0, s[8:11], 0 idxen ; E00C2000 80021000 buffer_load_format_xyzw v[4:7], v0, s[20:23], 0 idxen ; E00C2000 80050400 buffer_load_format_xyzw v[8:11], v0, s[16:19], 0 idxen ; E00C2000 80040800 buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_add_i32_e32 v20, 20, v20 ; 4A282894 buffer_load_dword v23, v20, s[0:3], 0 offen ; E0301000 80001714 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v20, -1.0, v23 ; 06282EF3 v_cmp_ge_f32_e64 s[4:5], -|v20|, |v20| ; D00C0304 20022914 v_cndmask_b32_e64 v20, 0, -1, s[4:5] ; D2000014 00118280 v_cmp_eq_i32_e64 s[4:5], v20, 0 ; D1040004 00010114 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v20, 0x3ea8f5c3, v24 ; 102830FF 3EA8F5C3 v_mul_f32_e32 v21, v22, v13 ; 102A1B16 v_mul_f32_e32 v21, 0x3ea8f5c3, v21 ; 102A2AFF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v20, 0x3f28f5c3, v24 ; 102830FF 3F28F5C3 v_mul_f32_e32 v21, v22, v14 ; 102A1D16 v_mul_f32_e32 v21, 0x3f28f5c3, v21 ; 102A2AFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v22, -2.0, v23 ; 062C2EF5 v_cmp_ge_f32_e64 s[4:5], -|v22|, |v22| ; D00C0304 20022D16 v_cndmask_b32_e64 v22, 0, -1, s[4:5] ; D2000016 00118280 v_cmp_ne_i32_e64 s[4:5], v22, 0 ; D10A0004 00010116 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v20, v28 ; 7E28111C v_lshlrev_b32_e32 v20, 4, v20 ; 34282884 v_add_i32_e32 v20, 16, v20 ; 4A282890 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v20, v14 ; 102A1D14 v_mul_f32_e32 v21, 0x3f28f5c3, v21 ; 102A2AFF 3F28F5C3 v_mul_f32_e32 v20, v20, v13 ; 10281B14 v_mul_f32_e32 v20, 0x3f28f5c3, v20 ; 102828FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v22, v29 ; 7E2C111D v_lshlrev_b32_e32 v22, 4, v22 ; 342C2C84 v_add_i32_e32 v23, 16, v22 ; 4A2E2C90 buffer_load_dword v25, v23, s[0:3], 0 offen ; E0301000 80001917 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v25, v12 ; 10341919 v_add_i32_e32 v22, 20, v22 ; 4A2C2C94 buffer_load_dword v24, v22, s[0:3], 0 offen ; E0301000 80001816 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v22, -1.0, v24 ; 062C30F3 v_cmp_ge_f32_e64 s[4:5], -|v22|, |v22| ; D00C0304 20022D16 v_cndmask_b32_e64 v22, 0, -1, s[4:5] ; D2000016 00118280 v_cmp_eq_i32_e64 s[4:5], v22, 0 ; D1040004 00010116 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v22, 0x3ea8f5c3, v26 ; 102C34FF 3EA8F5C3 v_mul_f32_e32 v23, v25, v13 ; 102E1B19 v_mul_f32_e32 v23, 0x3ea8f5c3, v23 ; 102E2EFF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v22, 0x3f28f5c3, v26 ; 102C34FF 3F28F5C3 v_mul_f32_e32 v23, v25, v14 ; 102E1D19 v_mul_f32_e32 v23, 0x3f28f5c3, v23 ; 102E2EFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v24, -2.0, v24 ; 063030F5 v_cmp_ge_f32_e64 s[4:5], -|v24|, |v24| ; D00C0304 20023118 v_cndmask_b32_e64 v24, 0, -1, s[4:5] ; D2000018 00118280 v_cmp_ne_i32_e64 s[4:5], v24, 0 ; D10A0004 00010118 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v22, v29 ; 7E2C111D v_lshlrev_b32_e32 v22, 4, v22 ; 342C2C84 v_add_i32_e32 v22, 16, v22 ; 4A2C2C90 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v23, v22, v14 ; 102E1D16 v_mul_f32_e32 v23, 0x3f28f5c3, v23 ; 102E2EFF 3F28F5C3 v_mul_f32_e32 v22, v22, v13 ; 102C1B16 v_mul_f32_e32 v22, 0x3f28f5c3, v22 ; 102C2CFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v24, v30 ; 7E30111E v_lshlrev_b32_e32 v24, 4, v24 ; 34303084 v_add_i32_e32 v25, 16, v24 ; 4A323090 buffer_load_dword v27, v25, s[0:3], 0 offen ; E0301000 80001B19 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v27, v12 ; 1040191B v_add_i32_e32 v24, 20, v24 ; 4A303094 buffer_load_dword v26, v24, s[0:3], 0 offen ; E0301000 80001A18 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v24, -1.0, v26 ; 063034F3 v_cmp_ge_f32_e64 s[4:5], -|v24|, |v24| ; D00C0304 20023118 v_cndmask_b32_e64 v24, 0, -1, s[4:5] ; D2000018 00118280 v_cmp_eq_i32_e64 s[4:5], v24, 0 ; D1040004 00010118 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v24, 0x3ea8f5c3, v32 ; 103040FF 3EA8F5C3 v_mul_f32_e32 v25, v27, v13 ; 10321B1B v_mul_f32_e32 v25, 0x3ea8f5c3, v25 ; 103232FF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v24, 0x3f28f5c3, v32 ; 103040FF 3F28F5C3 v_mul_f32_e32 v25, v27, v14 ; 10321D1B v_mul_f32_e32 v25, 0x3f28f5c3, v25 ; 103232FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v26, -2.0, v26 ; 063434F5 v_cmp_ge_f32_e64 s[4:5], -|v26|, |v26| ; D00C0304 2002351A v_cndmask_b32_e64 v26, 0, -1, s[4:5] ; D200001A 00118280 v_cmp_ne_i32_e64 s[4:5], v26, 0 ; D10A0004 0001011A s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v24, v30 ; 7E30111E v_lshlrev_b32_e32 v24, 4, v24 ; 34303084 v_add_i32_e32 v24, 16, v24 ; 4A303090 buffer_load_dword v24, v24, s[0:3], 0 offen ; E0301000 80001818 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v25, v24, v14 ; 10321D18 v_mul_f32_e32 v25, 0x3f28f5c3, v25 ; 103232FF 3F28F5C3 v_mul_f32_e32 v24, v24, v13 ; 10301B18 v_mul_f32_e32 v24, 0x3f28f5c3, v24 ; 103030FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v26, v31 ; 7E34111F v_lshlrev_b32_e32 v26, 4, v26 ; 34343484 v_add_i32_e32 v27, 16, v26 ; 4A363490 buffer_load_dword v33, v27, s[0:3], 0 offen ; E0301000 8000211B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v34, v33, v12 ; 10441921 v_add_i32_e32 v26, 20, v26 ; 4A343494 buffer_load_dword v32, v26, s[0:3], 0 offen ; E0301000 8000201A s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v26, -1.0, v32 ; 063440F3 v_cmp_ge_f32_e64 s[4:5], -|v26|, |v26| ; D00C0304 2002351A v_cndmask_b32_e64 v26, 0, -1, s[4:5] ; D200001A 00118280 v_cmp_eq_i32_e64 s[4:5], v26, 0 ; D1040004 0001011A s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v26, 0x3ea8f5c3, v34 ; 103444FF 3EA8F5C3 v_mul_f32_e32 v27, v33, v13 ; 10361B21 v_mul_f32_e32 v27, 0x3ea8f5c3, v27 ; 103636FF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v26, 0x3f28f5c3, v34 ; 103444FF 3F28F5C3 v_mul_f32_e32 v27, v33, v14 ; 10361D21 v_mul_f32_e32 v27, 0x3f28f5c3, v27 ; 103636FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v32, -2.0, v32 ; 064040F5 v_cmp_ge_f32_e64 s[4:5], -|v32|, |v32| ; D00C0304 20024120 v_cndmask_b32_e64 v32, 0, -1, s[4:5] ; D2000020 00118280 v_cmp_ne_i32_e64 s[4:5], v32, 0 ; D10A0004 00010120 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v26, v31 ; 7E34111F v_lshlrev_b32_e32 v26, 4, v26 ; 34343484 v_add_i32_e32 v26, 16, v26 ; 4A343490 buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v27, v26, v14 ; 10361D1A v_mul_f32_e32 v27, 0x3f28f5c3, v27 ; 103636FF 3F28F5C3 v_mul_f32_e32 v26, v26, v13 ; 10341B1A v_mul_f32_e32 v26, 0x3f28f5c3, v26 ; 103434FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v28, v16 ; 7E381110 v_lshlrev_b32_e32 v28, 4, v28 ; 34383884 v_add_i32_e32 v29, 16, v28 ; 4A3A3890 buffer_load_dword v31, v29, s[0:3], 0 offen ; E0301000 80001F1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v31, v12 ; 1040191F v_add_i32_e32 v28, 20, v28 ; 4A383894 buffer_load_dword v30, v28, s[0:3], 0 offen ; E0301000 80001E1C s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v28, -1.0, v30 ; 06383CF3 v_cmp_ge_f32_e64 s[4:5], -|v28|, |v28| ; D00C0304 2002391C v_cndmask_b32_e64 v28, 0, -1, s[4:5] ; D200001C 00118280 v_cmp_eq_i32_e64 s[4:5], v28, 0 ; D1040004 0001011C s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v28, 0x3ea8f5c3, v32 ; 103840FF 3EA8F5C3 v_mul_f32_e32 v29, v31, v13 ; 103A1B1F v_mul_f32_e32 v29, 0x3ea8f5c3, v29 ; 103A3AFF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v28, 0x3f28f5c3, v32 ; 103840FF 3F28F5C3 v_mul_f32_e32 v29, v31, v14 ; 103A1D1F v_mul_f32_e32 v29, 0x3f28f5c3, v29 ; 103A3AFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v30, -2.0, v30 ; 063C3CF5 v_cmp_ge_f32_e64 s[4:5], -|v30|, |v30| ; D00C0304 20023D1E v_cndmask_b32_e64 v30, 0, -1, s[4:5] ; D200001E 00118280 v_cmp_ne_i32_e64 s[4:5], v30, 0 ; D10A0004 0001011E s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v28, v16 ; 7E381110 v_lshlrev_b32_e32 v28, 4, v28 ; 34383884 v_add_i32_e32 v28, 16, v28 ; 4A383890 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v28, v14 ; 103A1D1C v_mul_f32_e32 v29, 0x3f28f5c3, v29 ; 103A3AFF 3F28F5C3 v_mul_f32_e32 v28, v28, v13 ; 10381B1C v_mul_f32_e32 v28, 0x3f28f5c3, v28 ; 103838FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v30, v17 ; 7E3C1111 v_lshlrev_b32_e32 v30, 4, v30 ; 343C3C84 v_add_i32_e32 v31, 16, v30 ; 4A3E3C90 buffer_load_dword v33, v31, s[0:3], 0 offen ; E0301000 8000211F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v34, v33, v12 ; 10441921 v_add_i32_e32 v30, 20, v30 ; 4A3C3C94 buffer_load_dword v32, v30, s[0:3], 0 offen ; E0301000 8000201E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v30, -1.0, v32 ; 063C40F3 v_cmp_ge_f32_e64 s[4:5], -|v30|, |v30| ; D00C0304 20023D1E v_cndmask_b32_e64 v30, 0, -1, s[4:5] ; D200001E 00118280 v_cmp_eq_i32_e64 s[4:5], v30, 0 ; D1040004 0001011E s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v30, 0x3ea8f5c3, v34 ; 103C44FF 3EA8F5C3 v_mul_f32_e32 v31, v33, v13 ; 103E1B21 v_mul_f32_e32 v31, 0x3ea8f5c3, v31 ; 103E3EFF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v30, 0x3f28f5c3, v34 ; 103C44FF 3F28F5C3 v_mul_f32_e32 v31, v33, v14 ; 103E1D21 v_mul_f32_e32 v31, 0x3f28f5c3, v31 ; 103E3EFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v32, -2.0, v32 ; 064040F5 v_cmp_ge_f32_e64 s[4:5], -|v32|, |v32| ; D00C0304 20024120 v_cndmask_b32_e64 v32, 0, -1, s[4:5] ; D2000020 00118280 v_cmp_ne_i32_e64 s[4:5], v32, 0 ; D10A0004 00010120 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v30, v17 ; 7E3C1111 v_lshlrev_b32_e32 v30, 4, v30 ; 343C3C84 v_add_i32_e32 v30, 16, v30 ; 4A3C3C90 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v31, v30, v14 ; 103E1D1E v_mul_f32_e32 v31, 0x3f28f5c3, v31 ; 103E3EFF 3F28F5C3 v_mul_f32_e32 v30, v30, v13 ; 103C1B1E v_mul_f32_e32 v30, 0x3f28f5c3, v30 ; 103C3CFF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v32, v18 ; 7E401112 v_lshlrev_b32_e32 v32, 4, v32 ; 34404084 v_add_i32_e32 v33, 16, v32 ; 4A424090 buffer_load_dword v35, v33, s[0:3], 0 offen ; E0301000 80002321 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, v35, v12 ; 10481923 v_add_i32_e32 v32, 20, v32 ; 4A404094 buffer_load_dword v34, v32, s[0:3], 0 offen ; E0301000 80002220 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v32, -1.0, v34 ; 064044F3 v_cmp_ge_f32_e64 s[4:5], -|v32|, |v32| ; D00C0304 20024120 v_cndmask_b32_e64 v32, 0, -1, s[4:5] ; D2000020 00118280 v_cmp_eq_i32_e64 s[4:5], v32, 0 ; D1040004 00010120 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_mul_f32_e32 v32, 0x3ea8f5c3, v36 ; 104048FF 3EA8F5C3 v_mul_f32_e32 v33, v35, v13 ; 10421B23 v_mul_f32_e32 v33, 0x3ea8f5c3, v33 ; 104242FF 3EA8F5C3 s_or_saveexec_b64 s[4:5], s[4:5] ; BE842504 s_xor_b64 exec, exec, s[4:5] ; 89FE047E v_mul_f32_e32 v32, 0x3f28f5c3, v36 ; 104048FF 3F28F5C3 v_mul_f32_e32 v33, v35, v14 ; 10421D23 v_mul_f32_e32 v33, 0x3f28f5c3, v33 ; 104242FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_add_f32_e32 v34, -2.0, v34 ; 064444F5 v_cmp_ge_f32_e64 s[4:5], -|v34|, |v34| ; D00C0304 20024522 v_cndmask_b32_e64 v34, 0, -1, s[4:5] ; D2000022 00118280 v_cmp_ne_i32_e64 s[4:5], v34, 0 ; D10A0004 00010122 s_and_saveexec_b64 s[4:5], s[4:5] ; BE842404 s_xor_b64 s[4:5], exec, s[4:5] ; 8984047E v_cvt_i32_f32_e32 v32, v18 ; 7E401112 v_lshlrev_b32_e32 v32, 4, v32 ; 34404084 v_add_i32_e32 v32, 16, v32 ; 4A404090 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, v32, v14 ; 10421D20 v_mul_f32_e32 v33, 0x3f28f5c3, v33 ; 104242FF 3F28F5C3 v_mul_f32_e32 v32, v32, v13 ; 10401B20 v_mul_f32_e32 v32, 0x3f28f5c3, v32 ; 104040FF 3F28F5C3 s_or_b64 exec, exec, s[4:5] ; 88FE047E v_cvt_i32_f32_e32 v34, v19 ; 7E441113 v_lshlrev_b32_e32 v34, 4, v34 ; 34444484 v_add_i32_e32 v35, 16, v34 ; 4A464490 buffer_load_dword v37, v35, s[0:3], 0 offen ; E0301000 80002523 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v38, v37, v12 ; 104C1925 v_add_i32_e32 v34, 20, v34 ; 4A444494 buffer_load_dword v36, v34, s[0:3], 0 offen ; E0301000 80002422 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v34, -1.0, v36 ; 064448F3 v_cmp_ge_f32_e64 s[4:5], -|v34|, |v34| ; D00C0304 20024522 v_cndmask_b32_e64 v34, 0, -1, s[4:5] ; D2000022 00118280 v_cmp_eq_i32_e64 s[4:5], v34, 0 ; D1040004 00010122 s_and_saveexec_b64 s[6:7], s[4:5] ; BE862404 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E v_mul_f32_e32 v34, 0x3ea8f5c3, v38 ; 10444CFF 3EA8F5C3 v_mul_f32_e32 v35, v37, v13 ; 10461B25 v_mul_f32_e32 v35, 0x3ea8f5c3, v35 ; 104646FF 3EA8F5C3 s_or_saveexec_b64 s[6:7], s[6:7] ; BE862506 s_xor_b64 exec, exec, s[6:7] ; 89FE067E v_mul_f32_e32 v34, 0x3f28f5c3, v38 ; 10444CFF 3F28F5C3 v_mul_f32_e32 v35, v37, v14 ; 10461D25 v_mul_f32_e32 v35, 0x3f28f5c3, v35 ; 104646FF 3F28F5C3 s_or_b64 exec, exec, s[6:7] ; 88FE067E s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 v_add_f32_e32 v36, -2.0, v36 ; 064848F5 v_cmp_ge_f32_e64 s[6:7], -|v36|, |v36| ; D00C0306 20024924 v_cndmask_b32_e64 v36, 0, -1, s[6:7] ; D2000024 00198280 v_cmp_ne_i32_e64 s[6:7], v36, 0 ; D10A0006 00010124 s_waitcnt lgkmcnt(0) ; BF8C007F s_and_saveexec_b64 s[6:7], s[6:7] ; BE862406 s_xor_b64 s[6:7], exec, s[6:7] ; 8986067E v_cvt_i32_f32_e32 v16, v19 ; 7E201113 v_lshlrev_b32_e32 v16, 4, v16 ; 34202084 v_add_i32_e32 v16, 16, v16 ; 4A202090 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v16, v14 ; 10221D10 v_mul_f32_e32 v35, 0x3f28f5c3, v17 ; 104622FF 3F28F5C3 v_mul_f32_e32 v12, v16, v13 ; 10181B10 v_mul_f32_e32 v34, 0x3f28f5c3, v12 ; 104418FF 3F28F5C3 s_or_b64 exec, exec, s[6:7] ; 88FE067E exp 15, 32, 0, 0, 0, v20, v21, v8, v9 ; F800020F 09081514 exp 15, 33, 0, 0, 0, v22, v23, v10, v11 ; F800021F 0B0A1716 exp 15, 34, 0, 0, 0, v24, v25, v4, v5 ; F800022F 05041918 exp 15, 35, 0, 0, 0, v26, v27, v6, v7 ; F800023F 07061B1A s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v4, 0 ; 7E080280 exp 15, 36, 0, 0, 0, v28, v29, v4, v4 ; F800024F 04041D1C exp 15, 37, 0, 0, 0, v30, v31, v4, v4 ; F800025F 04041F1E exp 15, 38, 0, 0, 0, v32, v33, v4, v4 ; F800026F 04042120 exp 15, 39, 0, 0, 0, v34, v35, v4, v4 ; F800027F 04042322 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v4, 0, v2, 1.0 ; D2820004 03CA0480 v_add_f32_e32 v5, 0, v2 ; 060A0480 v_add_f32_e32 v6, s5, v1 ; 060C0205 v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL IN[7], GENERIC[16], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL SAMP[12] DCL SAMP[13] DCL SAMP[14] DCL SAMP[15] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[1], 2D 2: MUL TEMP[0], TEMP[0], TEMP[0] 3: MOV TEMP[1].y, TEMP[0].wwww 4: MOV TEMP[2].xy, IN[0].xyyy 5: TEX TEMP[2], TEMP[2], SAMP[0], 2D 6: MUL TEMP[2], TEMP[2], TEMP[2] 7: MOV TEMP[1].x, TEMP[2].wwww 8: MOV TEMP[3].xy, IN[2].xyyy 9: TEX TEMP[3], TEMP[3], SAMP[2], 2D 10: MUL TEMP[3], TEMP[3], TEMP[3] 11: MOV TEMP[1].z, TEMP[3].wwww 12: MOV TEMP[4].xy, IN[3].xyyy 13: TEX TEMP[4], TEMP[4], SAMP[3], 2D 14: MUL TEMP[4], TEMP[4], TEMP[4] 15: MOV TEMP[1].w, TEMP[4].wwww 16: MUL TEMP[1], TEMP[1], TEMP[1] 17: MUL TEMP[1], TEMP[1], TEMP[1] 18: MOV TEMP[5].xy, IN[0].zwzz 19: MOV TEMP[5].zw, IN[1].wwzw 20: MUL TEMP[1], TEMP[1], TEMP[5] 21: MUL TEMP[0], TEMP[0], TEMP[1].yyyy 22: MAD TEMP[0], TEMP[1].xxxx, TEMP[2], TEMP[0] 23: MAD TEMP[0], TEMP[1].zzzz, TEMP[3], TEMP[0] 24: MAD TEMP[0], TEMP[1].wwww, TEMP[4], TEMP[0] 25: MOV TEMP[6].xy, IN[4].xyyy 26: TEX TEMP[6], TEMP[6], SAMP[4], 2D 27: MUL TEMP[2], TEMP[6], TEMP[6] 28: MOV TEMP[3].x, TEMP[2].wwww 29: MOV TEMP[6].xy, IN[5].xyyy 30: TEX TEMP[6], TEMP[6], SAMP[5], 2D 31: MUL TEMP[4], TEMP[6], TEMP[6] 32: MOV TEMP[3].y, TEMP[4].wwww 33: MOV TEMP[6].xy, IN[6].xyyy 34: TEX TEMP[6], TEMP[6], SAMP[6], 2D 35: MUL TEMP[5], TEMP[6], TEMP[6] 36: MOV TEMP[3].z, TEMP[5].wwww 37: MOV TEMP[6].xy, IN[7].xyyy 38: TEX TEMP[6], TEMP[6], SAMP[7], 2D 39: MUL TEMP[6], TEMP[6], TEMP[6] 40: MOV TEMP[3].w, TEMP[6].wwww 41: MUL TEMP[3], TEMP[3], TEMP[3] 42: MUL TEMP[3], TEMP[3], TEMP[3] 43: MOV TEMP[7].xy, IN[2].zwzz 44: MOV TEMP[7].zw, IN[3].wwzw 45: MUL TEMP[3], TEMP[3], TEMP[7] 46: MAD TEMP[0], TEMP[3].xxxx, TEMP[2], TEMP[0] 47: MAD TEMP[0], TEMP[3].yyyy, TEMP[4], TEMP[0] 48: MAD TEMP[0], TEMP[3].zzzz, TEMP[5], TEMP[0] 49: MAD TEMP[0], TEMP[3].wwww, TEMP[6], TEMP[0] 50: DP4 TEMP[4].x, TEMP[3], IMM[0].xxxx 51: DP4 TEMP[5].x, TEMP[1], IMM[0].xxxx 52: ADD TEMP[4].x, TEMP[4].xxxx, TEMP[5].xxxx 53: RCP TEMP[2].x, TEMP[4].xxxx 54: MUL TEMP[0], TEMP[0], TEMP[2].xxxx 55: MOV TEMP[4].xy, IN[1].xyyy 56: TEX TEMP[4].x, TEMP[4], SAMP[9], 2D 57: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[1].yyyy 58: MOV TEMP[5].xy, IN[0].xyyy 59: TEX TEMP[5].x, TEMP[5], SAMP[8], 2D 60: MAD TEMP[4].x, TEMP[1].xxxx, TEMP[5].xxxx, TEMP[4].xxxx 61: MOV TEMP[5].xy, IN[2].xyyy 62: TEX TEMP[5].x, TEMP[5], SAMP[10], 2D 63: MAD TEMP[4].x, TEMP[1].zzzz, TEMP[5].xxxx, TEMP[4].xxxx 64: MOV TEMP[5].xy, IN[3].xyyy 65: TEX TEMP[5].x, TEMP[5], SAMP[11], 2D 66: MAD TEMP[1].x, TEMP[1].wwww, TEMP[5].xxxx, TEMP[4].xxxx 67: MOV TEMP[4].xy, IN[4].xyyy 68: TEX TEMP[4].x, TEMP[4], SAMP[12], 2D 69: MAD TEMP[1].x, TEMP[3].xxxx, TEMP[4].xxxx, TEMP[1].xxxx 70: MOV TEMP[4].xy, IN[5].xyyy 71: TEX TEMP[4].x, TEMP[4], SAMP[13], 2D 72: MAD TEMP[1].x, TEMP[3].yyyy, TEMP[4].xxxx, TEMP[1].xxxx 73: MOV TEMP[4].xy, IN[6].xyyy 74: TEX TEMP[4].x, TEMP[4], SAMP[14], 2D 75: MAD TEMP[1].x, TEMP[3].zzzz, TEMP[4].xxxx, TEMP[1].xxxx 76: MOV TEMP[4].xy, IN[7].xyyy 77: TEX TEMP[4].x, TEMP[4], SAMP[15], 2D 78: MAD TEMP[1].x, TEMP[3].wwww, TEMP[4].xxxx, TEMP[1].xxxx 79: MUL TEMP[1], TEMP[2].xxxx, TEMP[1].xxxx 80: MOV OUT[1], TEMP[1] 81: MOV OUT[0], TEMP[0] 82: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 11 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 11 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 12 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 12 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 13 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 13 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 14 %79 = load <8 x i32> addrspace(2)* %78, !tbaa !0 %80 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 14 %81 = load <4 x i32> addrspace(2)* %80, !tbaa !0 %82 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 15 %83 = load <8 x i32> addrspace(2)* %82, !tbaa !0 %84 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 15 %85 = load <4 x i32> addrspace(2)* %84, !tbaa !0 %86 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %91 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %92 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %93 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %94 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %95 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %96 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %97 = call float @llvm.SI.fs.interp(i32 3, i32 2, i32 %5, <2 x i32> %7) %98 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %99 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 3, i32 3, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 0, i32 7, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 1, i32 7, i32 %5, <2 x i32> %7) %110 = bitcast float %90 to i32 %111 = bitcast float %91 to i32 %112 = insertelement <2 x i32> undef, i32 %110, i32 0 %113 = insertelement <2 x i32> %112, i32 %111, i32 1 %114 = bitcast <8 x i32> %27 to <32 x i8> %115 = bitcast <4 x i32> %29 to <16 x i8> %116 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %113, <32 x i8> %114, <16 x i8> %115, i32 2) %117 = extractelement <4 x float> %116, i32 0 %118 = extractelement <4 x float> %116, i32 1 %119 = extractelement <4 x float> %116, i32 2 %120 = extractelement <4 x float> %116, i32 3 %121 = fmul float %117, %117 %122 = fmul float %118, %118 %123 = fmul float %119, %119 %124 = fmul float %120, %120 %125 = bitcast float %86 to i32 %126 = bitcast float %87 to i32 %127 = insertelement <2 x i32> undef, i32 %125, i32 0 %128 = insertelement <2 x i32> %127, i32 %126, i32 1 %129 = bitcast <8 x i32> %23 to <32 x i8> %130 = bitcast <4 x i32> %25 to <16 x i8> %131 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %128, <32 x i8> %129, <16 x i8> %130, i32 2) %132 = extractelement <4 x float> %131, i32 0 %133 = extractelement <4 x float> %131, i32 1 %134 = extractelement <4 x float> %131, i32 2 %135 = extractelement <4 x float> %131, i32 3 %136 = fmul float %132, %132 %137 = fmul float %133, %133 %138 = fmul float %134, %134 %139 = fmul float %135, %135 %140 = bitcast float %94 to i32 %141 = bitcast float %95 to i32 %142 = insertelement <2 x i32> undef, i32 %140, i32 0 %143 = insertelement <2 x i32> %142, i32 %141, i32 1 %144 = bitcast <8 x i32> %31 to <32 x i8> %145 = bitcast <4 x i32> %33 to <16 x i8> %146 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %143, <32 x i8> %144, <16 x i8> %145, i32 2) %147 = extractelement <4 x float> %146, i32 0 %148 = extractelement <4 x float> %146, i32 1 %149 = extractelement <4 x float> %146, i32 2 %150 = extractelement <4 x float> %146, i32 3 %151 = fmul float %147, %147 %152 = fmul float %148, %148 %153 = fmul float %149, %149 %154 = fmul float %150, %150 %155 = bitcast float %98 to i32 %156 = bitcast float %99 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %35 to <32 x i8> %160 = bitcast <4 x i32> %37 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 1 %164 = extractelement <4 x float> %161, i32 2 %165 = extractelement <4 x float> %161, i32 3 %166 = fmul float %162, %162 %167 = fmul float %163, %163 %168 = fmul float %164, %164 %169 = fmul float %165, %165 %170 = fmul float %139, %139 %171 = fmul float %124, %124 %172 = fmul float %154, %154 %173 = fmul float %169, %169 %174 = fmul float %170, %170 %175 = fmul float %171, %171 %176 = fmul float %172, %172 %177 = fmul float %173, %173 %178 = fmul float %174, %88 %179 = fmul float %175, %89 %180 = fmul float %176, %92 %181 = fmul float %177, %93 %182 = fmul float %121, %179 %183 = fmul float %122, %179 %184 = fmul float %123, %179 %185 = fmul float %124, %179 %186 = fmul float %178, %136 %187 = fadd float %186, %182 %188 = fmul float %178, %137 %189 = fadd float %188, %183 %190 = fmul float %178, %138 %191 = fadd float %190, %184 %192 = fmul float %178, %139 %193 = fadd float %192, %185 %194 = fmul float %180, %151 %195 = fadd float %194, %187 %196 = fmul float %180, %152 %197 = fadd float %196, %189 %198 = fmul float %180, %153 %199 = fadd float %198, %191 %200 = fmul float %180, %154 %201 = fadd float %200, %193 %202 = fmul float %181, %166 %203 = fadd float %202, %195 %204 = fmul float %181, %167 %205 = fadd float %204, %197 %206 = fmul float %181, %168 %207 = fadd float %206, %199 %208 = fmul float %181, %169 %209 = fadd float %208, %201 %210 = bitcast float %102 to i32 %211 = bitcast float %103 to i32 %212 = insertelement <2 x i32> undef, i32 %210, i32 0 %213 = insertelement <2 x i32> %212, i32 %211, i32 1 %214 = bitcast <8 x i32> %39 to <32 x i8> %215 = bitcast <4 x i32> %41 to <16 x i8> %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %213, <32 x i8> %214, <16 x i8> %215, i32 2) %217 = extractelement <4 x float> %216, i32 0 %218 = extractelement <4 x float> %216, i32 1 %219 = extractelement <4 x float> %216, i32 2 %220 = extractelement <4 x float> %216, i32 3 %221 = fmul float %217, %217 %222 = fmul float %218, %218 %223 = fmul float %219, %219 %224 = fmul float %220, %220 %225 = bitcast float %104 to i32 %226 = bitcast float %105 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = bitcast <8 x i32> %43 to <32 x i8> %230 = bitcast <4 x i32> %45 to <16 x i8> %231 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %229, <16 x i8> %230, i32 2) %232 = extractelement <4 x float> %231, i32 0 %233 = extractelement <4 x float> %231, i32 1 %234 = extractelement <4 x float> %231, i32 2 %235 = extractelement <4 x float> %231, i32 3 %236 = fmul float %232, %232 %237 = fmul float %233, %233 %238 = fmul float %234, %234 %239 = fmul float %235, %235 %240 = bitcast float %106 to i32 %241 = bitcast float %107 to i32 %242 = insertelement <2 x i32> undef, i32 %240, i32 0 %243 = insertelement <2 x i32> %242, i32 %241, i32 1 %244 = bitcast <8 x i32> %47 to <32 x i8> %245 = bitcast <4 x i32> %49 to <16 x i8> %246 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %243, <32 x i8> %244, <16 x i8> %245, i32 2) %247 = extractelement <4 x float> %246, i32 0 %248 = extractelement <4 x float> %246, i32 1 %249 = extractelement <4 x float> %246, i32 2 %250 = extractelement <4 x float> %246, i32 3 %251 = fmul float %247, %247 %252 = fmul float %248, %248 %253 = fmul float %249, %249 %254 = fmul float %250, %250 %255 = bitcast float %108 to i32 %256 = bitcast float %109 to i32 %257 = insertelement <2 x i32> undef, i32 %255, i32 0 %258 = insertelement <2 x i32> %257, i32 %256, i32 1 %259 = bitcast <8 x i32> %51 to <32 x i8> %260 = bitcast <4 x i32> %53 to <16 x i8> %261 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %258, <32 x i8> %259, <16 x i8> %260, i32 2) %262 = extractelement <4 x float> %261, i32 0 %263 = extractelement <4 x float> %261, i32 1 %264 = extractelement <4 x float> %261, i32 2 %265 = extractelement <4 x float> %261, i32 3 %266 = fmul float %262, %262 %267 = fmul float %263, %263 %268 = fmul float %264, %264 %269 = fmul float %265, %265 %270 = fmul float %224, %224 %271 = fmul float %239, %239 %272 = fmul float %254, %254 %273 = fmul float %269, %269 %274 = fmul float %270, %270 %275 = fmul float %271, %271 %276 = fmul float %272, %272 %277 = fmul float %273, %273 %278 = fmul float %274, %96 %279 = fmul float %275, %97 %280 = fmul float %276, %100 %281 = fmul float %277, %101 %282 = fmul float %278, %221 %283 = fadd float %282, %203 %284 = fmul float %278, %222 %285 = fadd float %284, %205 %286 = fmul float %278, %223 %287 = fadd float %286, %207 %288 = fmul float %278, %224 %289 = fadd float %288, %209 %290 = fmul float %279, %236 %291 = fadd float %290, %283 %292 = fmul float %279, %237 %293 = fadd float %292, %285 %294 = fmul float %279, %238 %295 = fadd float %294, %287 %296 = fmul float %279, %239 %297 = fadd float %296, %289 %298 = fmul float %280, %251 %299 = fadd float %298, %291 %300 = fmul float %280, %252 %301 = fadd float %300, %293 %302 = fmul float %280, %253 %303 = fadd float %302, %295 %304 = fmul float %280, %254 %305 = fadd float %304, %297 %306 = fmul float %281, %266 %307 = fadd float %306, %299 %308 = fmul float %281, %267 %309 = fadd float %308, %301 %310 = fmul float %281, %268 %311 = fadd float %310, %303 %312 = fmul float %281, %269 %313 = fadd float %312, %305 %314 = fmul float %278, 1.000000e+00 %315 = fmul float %279, 1.000000e+00 %316 = fadd float %314, %315 %317 = fmul float %280, 1.000000e+00 %318 = fadd float %316, %317 %319 = fmul float %281, 1.000000e+00 %320 = fadd float %318, %319 %321 = fmul float %178, 1.000000e+00 %322 = fmul float %179, 1.000000e+00 %323 = fadd float %321, %322 %324 = fmul float %180, 1.000000e+00 %325 = fadd float %323, %324 %326 = fmul float %181, 1.000000e+00 %327 = fadd float %325, %326 %328 = fadd float %320, %327 %329 = fdiv float 1.000000e+00, %328 %330 = fmul float %307, %329 %331 = fmul float %309, %329 %332 = fmul float %311, %329 %333 = fmul float %313, %329 %334 = bitcast float %90 to i32 %335 = bitcast float %91 to i32 %336 = insertelement <2 x i32> undef, i32 %334, i32 0 %337 = insertelement <2 x i32> %336, i32 %335, i32 1 %338 = bitcast <8 x i32> %59 to <32 x i8> %339 = bitcast <4 x i32> %61 to <16 x i8> %340 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %337, <32 x i8> %338, <16 x i8> %339, i32 2) %341 = extractelement <4 x float> %340, i32 0 %342 = fmul float %341, %179 %343 = bitcast float %86 to i32 %344 = bitcast float %87 to i32 %345 = insertelement <2 x i32> undef, i32 %343, i32 0 %346 = insertelement <2 x i32> %345, i32 %344, i32 1 %347 = bitcast <8 x i32> %55 to <32 x i8> %348 = bitcast <4 x i32> %57 to <16 x i8> %349 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %346, <32 x i8> %347, <16 x i8> %348, i32 2) %350 = extractelement <4 x float> %349, i32 0 %351 = fmul float %178, %350 %352 = fadd float %351, %342 %353 = bitcast float %94 to i32 %354 = bitcast float %95 to i32 %355 = insertelement <2 x i32> undef, i32 %353, i32 0 %356 = insertelement <2 x i32> %355, i32 %354, i32 1 %357 = bitcast <8 x i32> %63 to <32 x i8> %358 = bitcast <4 x i32> %65 to <16 x i8> %359 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %356, <32 x i8> %357, <16 x i8> %358, i32 2) %360 = extractelement <4 x float> %359, i32 0 %361 = fmul float %180, %360 %362 = fadd float %361, %352 %363 = bitcast float %98 to i32 %364 = bitcast float %99 to i32 %365 = insertelement <2 x i32> undef, i32 %363, i32 0 %366 = insertelement <2 x i32> %365, i32 %364, i32 1 %367 = bitcast <8 x i32> %67 to <32 x i8> %368 = bitcast <4 x i32> %69 to <16 x i8> %369 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %366, <32 x i8> %367, <16 x i8> %368, i32 2) %370 = extractelement <4 x float> %369, i32 0 %371 = fmul float %181, %370 %372 = fadd float %371, %362 %373 = bitcast float %102 to i32 %374 = bitcast float %103 to i32 %375 = insertelement <2 x i32> undef, i32 %373, i32 0 %376 = insertelement <2 x i32> %375, i32 %374, i32 1 %377 = bitcast <8 x i32> %71 to <32 x i8> %378 = bitcast <4 x i32> %73 to <16 x i8> %379 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %376, <32 x i8> %377, <16 x i8> %378, i32 2) %380 = extractelement <4 x float> %379, i32 0 %381 = fmul float %278, %380 %382 = fadd float %381, %372 %383 = bitcast float %104 to i32 %384 = bitcast float %105 to i32 %385 = insertelement <2 x i32> undef, i32 %383, i32 0 %386 = insertelement <2 x i32> %385, i32 %384, i32 1 %387 = bitcast <8 x i32> %75 to <32 x i8> %388 = bitcast <4 x i32> %77 to <16 x i8> %389 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %386, <32 x i8> %387, <16 x i8> %388, i32 2) %390 = extractelement <4 x float> %389, i32 0 %391 = fmul float %279, %390 %392 = fadd float %391, %382 %393 = bitcast float %106 to i32 %394 = bitcast float %107 to i32 %395 = insertelement <2 x i32> undef, i32 %393, i32 0 %396 = insertelement <2 x i32> %395, i32 %394, i32 1 %397 = bitcast <8 x i32> %79 to <32 x i8> %398 = bitcast <4 x i32> %81 to <16 x i8> %399 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %396, <32 x i8> %397, <16 x i8> %398, i32 2) %400 = extractelement <4 x float> %399, i32 0 %401 = fmul float %280, %400 %402 = fadd float %401, %392 %403 = bitcast float %108 to i32 %404 = bitcast float %109 to i32 %405 = insertelement <2 x i32> undef, i32 %403, i32 0 %406 = insertelement <2 x i32> %405, i32 %404, i32 1 %407 = bitcast <8 x i32> %83 to <32 x i8> %408 = bitcast <4 x i32> %85 to <16 x i8> %409 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %406, <32 x i8> %407, <16 x i8> %408, i32 2) %410 = extractelement <4 x float> %409, i32 0 %411 = fmul float %281, %410 %412 = fadd float %411, %402 %413 = fmul float %329, %412 %414 = fmul float %329, %412 %415 = fmul float %329, %412 %416 = fmul float %329, %412 %417 = call i32 @llvm.SI.packf16(float %330, float %331) %418 = bitcast i32 %417 to float %419 = call i32 @llvm.SI.packf16(float %332, float %333) %420 = bitcast i32 %419 to float %421 = call i32 @llvm.SI.packf16(float %413, float %414) %422 = bitcast i32 %421 to float %423 = call i32 @llvm.SI.packf16(float %415, float %416) %424 = bitcast i32 %423 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %418, float %420, float %418, float %420) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %422, float %424, float %422, float %424) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 s[100:101], s[6:7] ; BEE40406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[24:27], s[4:5], 0x0 ; C08C0500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx4 s[96:99], s[4:5], 0x8 ; C0B00508 s_load_dwordx4 s[84:87], s[4:5], 0xc ; C0AA050C s_load_dwordx4 s[60:63], s[4:5], 0x10 ; C09E0510 s_load_dwordx4 s[72:75], s[4:5], 0x14 ; C0A40514 s_load_dwordx4 s[48:51], s[4:5], 0x18 ; C0980518 s_load_dwordx4 s[36:39], s[4:5], 0x1c ; C092051C s_load_dwordx4 s[12:15], s[4:5], 0x20 ; C0860520 s_load_dwordx4 s[0:3], s[4:5], 0x24 ; C0800524 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 24 ; 04953000 v_writelane_b32 v74, s1, 25 ; 04953201 v_writelane_b32 v74, s2, 26 ; 04953402 v_writelane_b32 v74, s3, 27 ; 04953603 s_load_dwordx4 s[0:3], s[4:5], 0x28 ; C0800528 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 20 ; 04952800 v_writelane_b32 v74, s1, 21 ; 04952A01 v_writelane_b32 v74, s2, 22 ; 04952C02 v_writelane_b32 v74, s3, 23 ; 04952E03 s_load_dwordx4 s[0:3], s[4:5], 0x2c ; C080052C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 16 ; 04952000 v_writelane_b32 v74, s1, 17 ; 04952201 v_writelane_b32 v74, s2, 18 ; 04952402 v_writelane_b32 v74, s3, 19 ; 04952603 s_load_dwordx4 s[0:3], s[4:5], 0x30 ; C0800530 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 12 ; 04951800 v_writelane_b32 v74, s1, 13 ; 04951A01 v_writelane_b32 v74, s2, 14 ; 04951C02 v_writelane_b32 v74, s3, 15 ; 04951E03 s_load_dwordx4 s[0:3], s[4:5], 0x34 ; C0800534 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 8 ; 04951000 v_writelane_b32 v74, s1, 9 ; 04951201 v_writelane_b32 v74, s2, 10 ; 04951402 v_writelane_b32 v74, s3, 11 ; 04951603 s_load_dwordx4 s[0:3], s[4:5], 0x38 ; C0800538 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 4 ; 04950800 v_writelane_b32 v74, s1, 5 ; 04950A01 v_writelane_b32 v74, s2, 6 ; 04950C02 v_writelane_b32 v74, s3, 7 ; 04950E03 s_load_dwordx4 s[0:3], s[4:5], 0x3c ; C080053C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v74, s0, 0 ; 04950000 v_writelane_b32 v74, s1, 1 ; 04950201 v_writelane_b32 v74, s2, 2 ; 04950402 v_writelane_b32 v74, s3, 3 ; 04950603 s_load_dwordx8 s[28:35], s[100:101], 0x0 ; C0CE6500 s_load_dwordx8 s[0:7], s[100:101], 0x8 ; C0C06508 s_load_dwordx8 s[16:23], s[100:101], 0x10 ; C0C86510 s_load_dwordx8 s[88:95], s[100:101], 0x18 ; C0EC6518 s_load_dwordx8 s[64:71], s[100:101], 0x20 ; C0E06520 s_load_dwordx8 s[76:83], s[100:101], 0x28 ; C0E66528 s_load_dwordx8 s[52:59], s[100:101], 0x30 ; C0DA6530 s_load_dwordx8 s[40:47], s[100:101], 0x38 ; C0D46538 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[0:7], s[8:11] ; F0800F00 00400402 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v7, v7 ; 10100F07 v_mul_f32_e32 v9, v8, v8 ; 10121108 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_interp_p1_f32 v10, v0, 3, 0, [m0] ; C8280300 v_interp_p2_f32 v10, [v10], v1, 3, 0, [m0] ; C8290301 v_mul_f32_e32 v9, v10, v9 ; 1012130A v_interp_p1_f32 v11, v0, 1, 0, [m0] ; C82C0100 v_interp_p2_f32 v11, [v11], v1, 1, 0, [m0] ; C82D0101 v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[28:35], s[24:27] ; F0800F00 00C70C0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, v15, v15 ; 10201F0F v_mul_f32_e32 v17, v16, v16 ; 10222110 v_mul_f32_e32 v17, v17, v17 ; 10222311 v_interp_p1_f32 v18, v0, 2, 0, [m0] ; C8480200 v_interp_p2_f32 v18, [v18], v1, 2, 0, [m0] ; C8490201 v_mad_f32 v19, v17, v18, v9 ; D2820013 04262511 v_interp_p1_f32 v21, v0, 1, 2, [m0] ; C8540900 v_interp_p2_f32 v21, [v21], v1, 1, 2, [m0] ; C8550901 v_interp_p1_f32 v20, v0, 0, 2, [m0] ; C8500800 v_interp_p2_f32 v20, [v20], v1, 0, 2, [m0] ; C8510801 image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[16:23], s[96:99] ; F0800F00 03041614 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v25, v25 ; 10343319 v_mul_f32_e32 v27, v26, v26 ; 1036351A v_mul_f32_e32 v27, v27, v27 ; 1036371B v_interp_p1_f32 v28, v0, 2, 1, [m0] ; C8700600 v_interp_p2_f32 v28, [v28], v1, 2, 1, [m0] ; C8710601 v_mad_f32 v19, v27, v28, v19 ; D2820013 044E391B v_interp_p1_f32 v30, v0, 1, 3, [m0] ; C8780D00 v_interp_p2_f32 v30, [v30], v1, 1, 3, [m0] ; C8790D01 v_interp_p1_f32 v29, v0, 0, 3, [m0] ; C8740C00 v_interp_p2_f32 v29, [v29], v1, 0, 3, [m0] ; C8750C01 image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[88:95], s[84:87] ; F0800F00 02B61F1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v35, v34, v34 ; 10464522 v_mul_f32_e32 v36, v35, v35 ; 10484723 v_mul_f32_e32 v36, v36, v36 ; 10484924 v_interp_p1_f32 v37, v0, 3, 1, [m0] ; C8940700 v_interp_p2_f32 v37, [v37], v1, 3, 1, [m0] ; C8950701 v_mad_f32 v19, v36, v37, v19 ; D2820013 044E4B24 v_interp_p1_f32 v39, v0, 1, 5, [m0] ; C89C1500 v_interp_p2_f32 v39, [v39], v1, 1, 5, [m0] ; C89D1501 v_interp_p1_f32 v38, v0, 0, 5, [m0] ; C8981400 v_interp_p2_f32 v38, [v38], v1, 0, 5, [m0] ; C8991401 image_sample v[40:43], 15, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[76:83], s[72:75] ; F0800F00 02532826 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v44, v43, v43 ; 1058572B v_mul_f32_e32 v45, v44, v44 ; 105A592C v_mul_f32_e32 v45, v45, v45 ; 105A5B2D v_interp_p1_f32 v46, v0, 3, 2, [m0] ; C8B80B00 v_interp_p2_f32 v46, [v46], v1, 3, 2, [m0] ; C8B90B01 v_mul_f32_e32 v45, v46, v45 ; 105A5B2E v_interp_p1_f32 v47, v0, 1, 4, [m0] ; C8BC1100 v_interp_p2_f32 v47, [v47], v1, 1, 4, [m0] ; C8BD1101 v_interp_p1_f32 v46, v0, 0, 4, [m0] ; C8B81000 v_interp_p2_f32 v46, [v46], v1, 0, 4, [m0] ; C8B91001 image_sample v[48:51], 15, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[64:71], s[60:63] ; F0800F00 01F0302E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v52, v51, v51 ; 10686733 v_mul_f32_e32 v53, v52, v52 ; 106A6934 v_mul_f32_e32 v53, v53, v53 ; 106A6B35 v_interp_p1_f32 v54, v0, 2, 2, [m0] ; C8D80A00 v_interp_p2_f32 v54, [v54], v1, 2, 2, [m0] ; C8D90A01 v_mad_f32 v55, v53, v54, v45 ; D2820037 04B66D35 v_interp_p1_f32 v57, v0, 1, 6, [m0] ; C8E41900 v_interp_p2_f32 v57, [v57], v1, 1, 6, [m0] ; C8E51901 v_interp_p1_f32 v56, v0, 0, 6, [m0] ; C8E01800 v_interp_p2_f32 v56, [v56], v1, 0, 6, [m0] ; C8E11801 image_sample v[58:61], 15, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[52:59], s[48:51] ; F0800F00 018D3A38 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v62, v61, v61 ; 107C7B3D v_mul_f32_e32 v63, v62, v62 ; 107E7D3E v_mul_f32_e32 v63, v63, v63 ; 107E7F3F v_interp_p1_f32 v64, v0, 2, 3, [m0] ; C9000E00 v_interp_p2_f32 v64, [v64], v1, 2, 3, [m0] ; C9010E01 v_mad_f32 v55, v63, v64, v55 ; D2820037 04DE813F v_interp_p1_f32 v66, v0, 1, 7, [m0] ; C9081D00 v_interp_p2_f32 v66, [v66], v1, 1, 7, [m0] ; C9091D01 v_interp_p1_f32 v65, v0, 0, 7, [m0] ; C9041C00 v_interp_p2_f32 v65, [v65], v1, 0, 7, [m0] ; C9051C01 image_sample v[67:70], 15, 0, 0, 0, 0, 0, 0, 0, v[65:66], s[40:47], s[36:39] ; F0800F00 012A4341 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v71, v70, v70 ; 108E8D46 v_mul_f32_e32 v72, v71, v71 ; 10908F47 v_mul_f32_e32 v72, v72, v72 ; 10909148 v_interp_p1_f32 v73, v0, 3, 3, [m0] ; C9240F00 v_interp_p2_f32 v73, [v73], v1, 3, 3, [m0] ; C9250F01 v_mad_f32 v0, v72, v73, v55 ; D2820000 04DE9348 v_add_f32_e32 v0, v19, v0 ; 06000113 v_rcp_f32_e32 v0, v0 ; 7E005500 v_mul_f32_e32 v1, v9, v8 ; 10021109 v_mul_f32_e32 v8, v18, v17 ; 10102312 v_mad_f32 v1, v8, v16, v1 ; D2820001 04062108 v_mul_f32_e32 v16, v28, v27 ; 1020371C v_mad_f32 v1, v16, v26, v1 ; D2820001 04063510 v_mul_f32_e32 v17, v37, v36 ; 10224925 v_mad_f32 v1, v17, v35, v1 ; D2820001 04064711 v_mul_f32_e32 v18, v54, v53 ; 10246B36 v_mad_f32 v1, v18, v52, v1 ; D2820001 04066912 v_mad_f32 v1, v45, v44, v1 ; D2820001 0406592D v_mul_f32_e32 v19, v64, v63 ; 10267F40 v_mad_f32 v1, v19, v62, v1 ; D2820001 04067D13 v_mul_f32_e32 v26, v73, v72 ; 10349149 v_mad_f32 v1, v26, v71, v1 ; D2820001 04068F1A v_mul_f32_e32 v1, v0, v1 ; 10020300 v_mul_f32_e32 v27, v6, v6 ; 10360D06 v_mul_f32_e32 v27, v9, v27 ; 10363709 v_mul_f32_e32 v28, v14, v14 ; 10381D0E v_mad_f32 v27, v8, v28, v27 ; D282001B 046E3908 v_mul_f32_e32 v28, v24, v24 ; 10383118 v_mad_f32 v27, v16, v28, v27 ; D282001B 046E3910 v_mul_f32_e32 v28, v33, v33 ; 10384321 v_mad_f32 v27, v17, v28, v27 ; D282001B 046E3911 v_mul_f32_e32 v28, v50, v50 ; 10386532 v_mad_f32 v27, v18, v28, v27 ; D282001B 046E3912 v_mul_f32_e32 v28, v42, v42 ; 1038552A v_mad_f32 v27, v45, v28, v27 ; D282001B 046E392D v_mul_f32_e32 v28, v60, v60 ; 1038793C v_mad_f32 v27, v19, v28, v27 ; D282001B 046E3913 v_mul_f32_e32 v28, v69, v69 ; 10388B45 v_mad_f32 v27, v26, v28, v27 ; D282001B 046E391A v_mul_f32_e32 v27, v0, v27 ; 10363700 v_cvt_pkrtz_f16_f32_e32 v1, v27, v1 ; 5E02031B v_mul_f32_e32 v27, v5, v5 ; 10360B05 v_mul_f32_e32 v27, v9, v27 ; 10363709 v_mul_f32_e32 v28, v13, v13 ; 10381B0D v_mad_f32 v27, v8, v28, v27 ; D282001B 046E3908 v_mul_f32_e32 v28, v23, v23 ; 10382F17 v_mad_f32 v27, v16, v28, v27 ; D282001B 046E3910 v_mul_f32_e32 v28, v32, v32 ; 10384120 v_mad_f32 v27, v17, v28, v27 ; D282001B 046E3911 v_mul_f32_e32 v28, v49, v49 ; 10386331 v_mad_f32 v27, v18, v28, v27 ; D282001B 046E3912 v_mul_f32_e32 v28, v41, v41 ; 10385329 v_mad_f32 v27, v45, v28, v27 ; D282001B 046E392D v_mul_f32_e32 v28, v59, v59 ; 1038773B v_mad_f32 v27, v19, v28, v27 ; D282001B 046E3913 v_mul_f32_e32 v28, v68, v68 ; 10388944 v_mad_f32 v27, v26, v28, v27 ; D282001B 046E391A v_mul_f32_e32 v27, v0, v27 ; 10363700 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mul_f32_e32 v4, v9, v4 ; 10080909 v_mul_f32_e32 v5, v12, v12 ; 100A190C v_mad_f32 v4, v8, v5, v4 ; D2820004 04120B08 v_mul_f32_e32 v5, v22, v22 ; 100A2D16 v_mad_f32 v4, v16, v5, v4 ; D2820004 04120B10 v_mul_f32_e32 v5, v31, v31 ; 100A3F1F v_mad_f32 v4, v17, v5, v4 ; D2820004 04120B11 v_mul_f32_e32 v5, v48, v48 ; 100A6130 v_mad_f32 v4, v18, v5, v4 ; D2820004 04120B12 v_mul_f32_e32 v5, v40, v40 ; 100A5128 v_mad_f32 v4, v45, v5, v4 ; D2820004 04120B2D v_mul_f32_e32 v5, v58, v58 ; 100A753A v_mad_f32 v4, v19, v5, v4 ; D2820004 04120B13 v_mul_f32_e32 v5, v67, v67 ; 100A8743 v_mad_f32 v4, v26, v5, v4 ; D2820004 04120B1A v_mul_f32_e32 v4, v0, v4 ; 10080900 v_cvt_pkrtz_f16_f32_e32 v4, v4, v27 ; 5E083704 exp 15, 0, 1, 0, 0, v4, v1, v4, v1 ; F800040F 01040104 s_load_dwordx8 s[0:7], s[100:101], 0x48 ; C0C06548 v_readlane_b32 s8, v74, 24 ; 0211314A v_readlane_b32 s9, v74, 25 ; 0213334A v_readlane_b32 s10, v74, 26 ; 0215354A v_readlane_b32 s11, v74, 27 ; 0217374A s_nop 2 ; BF800002 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[0:7], s[8:11] ; F0800100 00400102 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v9, v1 ; 10020309 s_load_dwordx8 s[0:7], s[100:101], 0x40 ; C0C06540 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[0:7], s[12:15] ; F0800100 0060020A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v8, v2, v1 ; D2820001 04060508 s_load_dwordx8 s[0:7], s[100:101], 0x50 ; C0C06550 v_readlane_b32 s8, v74, 20 ; 0211294A v_readlane_b32 s9, v74, 21 ; 02132B4A v_readlane_b32 s10, v74, 22 ; 02152D4A v_readlane_b32 s11, v74, 23 ; 02172F4A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[0:7], s[8:11] ; F0800100 00400214 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v16, v2, v1 ; D2820001 04060510 s_load_dwordx8 s[0:7], s[100:101], 0x58 ; C0C06558 v_readlane_b32 s8, v74, 16 ; 0211214A v_readlane_b32 s9, v74, 17 ; 0213234A v_readlane_b32 s10, v74, 18 ; 0215254A v_readlane_b32 s11, v74, 19 ; 0217274A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[0:7], s[8:11] ; F0800100 0040021D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v17, v2, v1 ; D2820001 04060511 s_load_dwordx8 s[0:7], s[100:101], 0x60 ; C0C06560 v_readlane_b32 s8, v74, 12 ; 0211194A v_readlane_b32 s9, v74, 13 ; 02131B4A v_readlane_b32 s10, v74, 14 ; 02151D4A v_readlane_b32 s11, v74, 15 ; 02171F4A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[46:47], s[0:7], s[8:11] ; F0800100 0040022E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v18, v2, v1 ; D2820001 04060512 s_load_dwordx8 s[0:7], s[100:101], 0x68 ; C0C06568 v_readlane_b32 s8, v74, 8 ; 0211114A v_readlane_b32 s9, v74, 9 ; 0213134A v_readlane_b32 s10, v74, 10 ; 0215154A v_readlane_b32 s11, v74, 11 ; 0217174A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[0:7], s[8:11] ; F0800100 00400226 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v45, v2, v1 ; D2820001 0406052D s_load_dwordx8 s[0:7], s[100:101], 0x70 ; C0C06570 v_readlane_b32 s8, v74, 4 ; 0211094A v_readlane_b32 s9, v74, 5 ; 02130B4A v_readlane_b32 s10, v74, 6 ; 02150D4A v_readlane_b32 s11, v74, 7 ; 02170F4A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[56:57], s[0:7], s[8:11] ; F0800100 00400238 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v19, v2, v1 ; D2820001 04060513 s_load_dwordx8 s[0:7], s[100:101], 0x78 ; C0C06578 v_readlane_b32 s8, v74, 0 ; 0211014A v_readlane_b32 s9, v74, 1 ; 0213034A v_readlane_b32 s10, v74, 2 ; 0215054A v_readlane_b32 s11, v74, 3 ; 0217074A s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[65:66], s[0:7], s[8:11] ; F0800100 00400241 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v26, v2, v1 ; D2820001 0406051A v_mul_f32_e32 v0, v1, v0 ; 10000101 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 1, 1, 1, 1, v0, v0, v0, v0 ; F8001C1F 00000000 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV OUT[1], IN[1].xyxy 2: MOV OUT[0], TEMP[0] 3: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float %22, float %23) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v1, v2 ; F800020F 02010201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 0, v0, 1.0 ; D2820004 03CA0080 v_add_f32_e32 v5, 0, v2 ; 060A0480 v_add_f32_e32 v6, 0, v1 ; 060C0280 v_add_f32_e32 v0, 0, v0 ; 06000080 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..1] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 2.0000} IMM[1] FLT32 { 1.0000, -1.0000, -2.0000, 0.5000} IMM[2] FLT32 { 2.0000, -2.0000, 0.0000, 4.0000} IMM[3] FLT32 { 0.0000, 0.5000, 2.0000, 0.0000} 0: MOV TEMP[0].xy, IMM[0].xyxx 1: MAD TEMP[1].zw, CONST[0].xyxz, IMM[0].xyyx, IN[0].xyxy 2: MAD TEMP[1].xy, TEMP[1].zwzw, IMM[0].xyxx, IMM[0].zxzz 3: MOV TEMP[2].xy, TEMP[1].xyyy 4: TEX TEMP[2].x, TEMP[2], SAMP[0], 2D 5: MOV TEMP[2].z, TEMP[2].xxxx 6: ADD TEMP[3].zw, -CONST[0].xyxy, IN[0].xyxy 7: MAD TEMP[1].xy, TEMP[3].zwzw, IMM[0].xyxx, IMM[0].zxzz 8: MOV TEMP[3].xy, TEMP[1].xyyy 9: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 10: MOV TEMP[2].x, TEMP[3].xxxx 11: MAD TEMP[3].zw, CONST[0].xyxy, IMM[0].xyxy, IN[0].xyxy 12: MOV TEMP[0].w, TEMP[3].wwzw 13: MAD TEMP[1].xy, TEMP[3].zwzw, IMM[0].xyxx, IMM[0].zxzz 14: MOV TEMP[3].xy, TEMP[1].xyyy 15: TEX TEMP[3].xzw, TEMP[3], SAMP[0], 2D 16: MOV TEMP[4].zw, TEMP[3].wwzw 17: MOV TEMP[2].y, TEMP[3].xxxx 18: DP3 TEMP[3].x, TEMP[2].xyzz, IMM[0].xyww 19: MOV TEMP[0].z, TEMP[3].xxxx 20: ADD TEMP[5].xy, CONST[0].xzzw, IN[0] 21: MOV TEMP[4].xy, TEMP[5].xyxx 22: MAD TEMP[1].xy, TEMP[4], IMM[0].xyxx, IMM[0].zxzz 23: MOV TEMP[5].xy, TEMP[1].xyyy 24: TEX TEMP[5].xw, TEMP[5], SAMP[0], 2D 25: MOV TEMP[4].x, TEMP[5].xxxw 26: MAD TEMP[5].xy, CONST[0], TEMP[0].yxzw, IN[0] 27: MOV TEMP[5].xy, TEMP[5].xyxx 28: MAD TEMP[1].xy, TEMP[5], IMM[0].xyxx, IMM[0].zxzz 29: MOV TEMP[6].xy, TEMP[1].xyyy 30: TEX TEMP[6].xzw, TEMP[6], SAMP[0], 2D 31: MOV TEMP[5].zw, TEMP[6].wwzw 32: MOV TEMP[4].y, TEMP[6].xxxx 33: ADD TEMP[6].xy, CONST[0], IN[0] 34: MOV TEMP[5].xy, TEMP[6].xyxx 35: MAD TEMP[1].xy, TEMP[5], IMM[0].xyxx, IMM[0].zxzz 36: MOV TEMP[6].xy, TEMP[1].xyyy 37: TEX TEMP[6].xzw, TEMP[6], SAMP[0], 2D 38: MOV TEMP[5].zw, TEMP[6].wwzw 39: MOV TEMP[4].z, TEMP[6].xxxx 40: DP3 TEMP[6].x, TEMP[4].yzxx, IMM[1].xyzz 41: ADD TEMP[3].z, TEMP[6].xxxx, TEMP[3].xxxx 42: MOV TEMP[0].z, TEMP[3].zzzz 43: MUL TEMP[6].w, IMM[1].wwww, CONST[1].xxxx 44: MOV TEMP[0].w, TEMP[6].wwww 45: MUL TEMP[3].x, TEMP[6].wwww, TEMP[3].zzzz 46: MOV TEMP[5].x, TEMP[3].xxxx 47: MAD TEMP[3].xy, CONST[0].zyzw, TEMP[0], IN[0] 48: MOV TEMP[0].xy, TEMP[3].xyxx 49: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 50: MOV TEMP[3].xy, TEMP[1].xyyy 51: TEX TEMP[3].x, TEMP[3], SAMP[0], 2D 52: MOV TEMP[2].w, TEMP[3].xxxx 53: DP3 TEMP[3].x, TEMP[2].xyww, IMM[0].xxww 54: ADD TEMP[7].yz, CONST[0].xzyw, IN[0].xxyw 55: MOV TEMP[0].yz, TEMP[7].zyzz 56: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 57: MOV TEMP[1].xy, TEMP[1].xyyy 58: TEX TEMP[1].xw, TEMP[1], SAMP[0], 2D 59: MOV TEMP[2].w, TEMP[1].wwww 60: MOV TEMP[4].w, TEMP[1].xxxx 61: DP3 TEMP[1].x, TEMP[4].yzww, IMM[1].yyzz 62: ADD TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 63: MUL TEMP[1].y, TEMP[6].wwww, TEMP[1].xxxx 64: MOV TEMP[5].y, TEMP[1].yyyy 65: MUL TEMP[1].xy, TEMP[5], IMM[2].xyxy 66: MOV TEMP[0].xy, TEMP[1].xyxx 67: MOV TEMP[0].z, IMM[0].zzzz 68: ADD TEMP[1].xyz, -TEMP[0], IMM[2].zzwx 69: MOV TEMP[0].xy, TEMP[1].xyzx 70: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 71: MOV TEMP[0].z, TEMP[1].xxxx 72: MAX TEMP[1].x, TEMP[1].xxxx, IMM[3].xxxx 73: RSQ TEMP[1].x, TEMP[1].xxxx 74: MOV TEMP[2].z, TEMP[1].xxxx 75: MUL TEMP[0].xy, TEMP[0], TEMP[1].xxxx 76: MOV TEMP[2].xy, TEMP[0].xyxx 77: MAD TEMP[0].xyz, TEMP[2], IMM[3].yyzz, IMM[1].wwww 78: MOV TEMP[0].xyz, TEMP[0].xyzx 79: MOV TEMP[0].w, IMM[0].zzzz 80: MOV OUT[0], TEMP[0] 81: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %31 = load <4 x i32> addrspace(2)* %30, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = fmul float %24, -1.000000e+00 %35 = fadd float %34, %32 %36 = fmul float %26, 1.000000e+00 %37 = fadd float %36, %33 %38 = fmul float %35, 1.000000e+00 %39 = fadd float %38, 0.000000e+00 %40 = fmul float %37, -1.000000e+00 %41 = fadd float %40, 1.000000e+00 %42 = bitcast float %39 to i32 %43 = bitcast float %41 to i32 %44 = insertelement <2 x i32> undef, i32 %42, i32 0 %45 = insertelement <2 x i32> %44, i32 %43, i32 1 %46 = bitcast <8 x i32> %29 to <32 x i8> %47 = bitcast <4 x i32> %31 to <16 x i8> %48 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %45, <32 x i8> %46, <16 x i8> %47, i32 2) %49 = extractelement <4 x float> %48, i32 0 %50 = fsub float -0.000000e+00, %24 %51 = fadd float %50, %32 %52 = fsub float -0.000000e+00, %25 %53 = fadd float %52, %33 %54 = fmul float %51, 1.000000e+00 %55 = fadd float %54, 0.000000e+00 %56 = fmul float %53, -1.000000e+00 %57 = fadd float %56, 1.000000e+00 %58 = bitcast float %55 to i32 %59 = bitcast float %57 to i32 %60 = insertelement <2 x i32> undef, i32 %58, i32 0 %61 = insertelement <2 x i32> %60, i32 %59, i32 1 %62 = bitcast <8 x i32> %29 to <32 x i8> %63 = bitcast <4 x i32> %31 to <16 x i8> %64 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %61, <32 x i8> %62, <16 x i8> %63, i32 2) %65 = extractelement <4 x float> %64, i32 0 %66 = fmul float %24, 1.000000e+00 %67 = fadd float %66, %32 %68 = fmul float %25, -1.000000e+00 %69 = fadd float %68, %33 %70 = fmul float %67, 1.000000e+00 %71 = fadd float %70, 0.000000e+00 %72 = fmul float %69, -1.000000e+00 %73 = fadd float %72, 1.000000e+00 %74 = bitcast float %71 to i32 %75 = bitcast float %73 to i32 %76 = insertelement <2 x i32> undef, i32 %74, i32 0 %77 = insertelement <2 x i32> %76, i32 %75, i32 1 %78 = bitcast <8 x i32> %29 to <32 x i8> %79 = bitcast <4 x i32> %31 to <16 x i8> %80 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %77, <32 x i8> %78, <16 x i8> %79, i32 2) %81 = extractelement <4 x float> %80, i32 0 %82 = fmul float %65, 1.000000e+00 %83 = fmul float %81, -1.000000e+00 %84 = fadd float %83, %82 %85 = fmul float %49, 2.000000e+00 %86 = fadd float %84, %85 %87 = fadd float %24, %32 %88 = fadd float %26, %33 %89 = fmul float %87, 1.000000e+00 %90 = fadd float %89, 0.000000e+00 %91 = fmul float %88, -1.000000e+00 %92 = fadd float %91, 1.000000e+00 %93 = bitcast float %90 to i32 %94 = bitcast float %92 to i32 %95 = insertelement <2 x i32> undef, i32 %93, i32 0 %96 = insertelement <2 x i32> %95, i32 %94, i32 1 %97 = bitcast <8 x i32> %29 to <32 x i8> %98 = bitcast <4 x i32> %31 to <16 x i8> %99 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 0 %101 = fmul float %24, -1.000000e+00 %102 = fadd float %101, %32 %103 = fmul float %25, 1.000000e+00 %104 = fadd float %103, %33 %105 = fmul float %102, 1.000000e+00 %106 = fadd float %105, 0.000000e+00 %107 = fmul float %104, -1.000000e+00 %108 = fadd float %107, 1.000000e+00 %109 = bitcast float %106 to i32 %110 = bitcast float %108 to i32 %111 = insertelement <2 x i32> undef, i32 %109, i32 0 %112 = insertelement <2 x i32> %111, i32 %110, i32 1 %113 = bitcast <8 x i32> %29 to <32 x i8> %114 = bitcast <4 x i32> %31 to <16 x i8> %115 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %112, <32 x i8> %113, <16 x i8> %114, i32 2) %116 = extractelement <4 x float> %115, i32 0 %117 = fadd float %24, %32 %118 = fadd float %25, %33 %119 = fmul float %117, 1.000000e+00 %120 = fadd float %119, 0.000000e+00 %121 = fmul float %118, -1.000000e+00 %122 = fadd float %121, 1.000000e+00 %123 = bitcast float %120 to i32 %124 = bitcast float %122 to i32 %125 = insertelement <2 x i32> undef, i32 %123, i32 0 %126 = insertelement <2 x i32> %125, i32 %124, i32 1 %127 = bitcast <8 x i32> %29 to <32 x i8> %128 = bitcast <4 x i32> %31 to <16 x i8> %129 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %126, <32 x i8> %127, <16 x i8> %128, i32 2) %130 = extractelement <4 x float> %129, i32 0 %131 = fmul float %116, 1.000000e+00 %132 = fmul float %130, -1.000000e+00 %133 = fadd float %132, %131 %134 = fmul float %100, -2.000000e+00 %135 = fadd float %133, %134 %136 = fadd float %135, %86 %137 = fmul float 5.000000e-01, %27 %138 = fmul float %137, %136 %139 = fmul float %26, 1.000000e+00 %140 = fadd float %139, %32 %141 = fmul float %25, -1.000000e+00 %142 = fadd float %141, %33 %143 = fmul float %140, 1.000000e+00 %144 = fadd float %143, 0.000000e+00 %145 = fmul float %142, -1.000000e+00 %146 = fadd float %145, 1.000000e+00 %147 = bitcast float %144 to i32 %148 = bitcast float %146 to i32 %149 = insertelement <2 x i32> undef, i32 %147, i32 0 %150 = insertelement <2 x i32> %149, i32 %148, i32 1 %151 = bitcast <8 x i32> %29 to <32 x i8> %152 = bitcast <4 x i32> %31 to <16 x i8> %153 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %150, <32 x i8> %151, <16 x i8> %152, i32 2) %154 = extractelement <4 x float> %153, i32 0 %155 = fmul float %65, 1.000000e+00 %156 = fmul float %81, 1.000000e+00 %157 = fadd float %156, %155 %158 = fmul float %154, 2.000000e+00 %159 = fadd float %157, %158 %160 = fadd float %26, %32 %161 = fadd float %25, %33 %162 = fmul float %160, 1.000000e+00 %163 = fadd float %162, 0.000000e+00 %164 = fmul float %161, -1.000000e+00 %165 = fadd float %164, 1.000000e+00 %166 = bitcast float %163 to i32 %167 = bitcast float %165 to i32 %168 = insertelement <2 x i32> undef, i32 %166, i32 0 %169 = insertelement <2 x i32> %168, i32 %167, i32 1 %170 = bitcast <8 x i32> %29 to <32 x i8> %171 = bitcast <4 x i32> %31 to <16 x i8> %172 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %169, <32 x i8> %170, <16 x i8> %171, i32 2) %173 = extractelement <4 x float> %172, i32 0 %174 = fmul float %116, -1.000000e+00 %175 = fmul float %130, -1.000000e+00 %176 = fadd float %175, %174 %177 = fmul float %173, -2.000000e+00 %178 = fadd float %176, %177 %179 = fadd float %178, %159 %180 = fmul float %137, %179 %181 = fmul float %138, 2.000000e+00 %182 = fmul float %180, -2.000000e+00 %183 = fsub float -0.000000e+00, %181 %184 = fadd float %183, 0.000000e+00 %185 = fsub float -0.000000e+00, %182 %186 = fadd float %185, 0.000000e+00 %187 = fsub float -0.000000e+00, 0.000000e+00 %188 = fadd float %187, 4.000000e+00 %189 = fmul float %184, %184 %190 = fmul float %186, %186 %191 = fadd float %190, %189 %192 = fmul float %188, %188 %193 = fadd float %191, %192 %194 = call float @llvm.maxnum.f32(float %193, float 0x3E7AD7F2A0000000) %195 = call float @llvm.AMDGPU.rsq.clamped.f32(float %194) %196 = fmul float %184, %195 %197 = fmul float %186, %195 %198 = fmul float %196, 5.000000e-01 %199 = fadd float %198, 5.000000e-01 %200 = fmul float %197, 5.000000e-01 %201 = fadd float %200, 5.000000e-01 %202 = fmul float %195, 2.000000e+00 %203 = fadd float %202, 5.000000e-01 %204 = call i32 @llvm.SI.packf16(float %199, float %201) %205 = bitcast i32 %204 to float %206 = call i32 @llvm.SI.packf16(float %203, float 0.000000e+00) %207 = bitcast i32 %206 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %205, float %207, float %205, float %207) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s8, v2 ; 0A060408 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s9, v5 ; 0A000A09 v_add_f32_e32 v3, 0, v0 ; 06060080 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800100 00640003 v_add_f32_e32 v1, s9, v5 ; 06020A09 v_add_f32_e32 v6, 0, v1 ; 060C0280 v_mov_b32_e32 v7, v4 ; 7E0E0304 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800100 00640106 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v8, v0, v1 ; 06100300 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 v_add_f32_e32 v9, 0, v5 ; 06120A80 v_mov_b32_e32 v10, v4 ; 7E140304 image_sample v5, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[16:23], s[12:15] ; F0800100 00640509 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, 2.0, v5, v8 ; D2820005 04220AF4 v_add_f32_e32 v8, s8, v2 ; 06100408 v_sub_f32_e32 v10, 1.0, v8 ; 081410F2 v_mov_b32_e32 v4, v10 ; 7E08030A image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800100 00640803 v_mov_b32_e32 v7, v10 ; 7E0E030A image_sample v11, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800100 00640B06 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e64 v12, -v11, v8 ; D208000C 2002110B image_sample v9, 1, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[16:23], s[12:15] ; F0800100 00640909 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, -2.0, v9, v12 ; D2820009 043212F5 v_add_f32_e32 v5, v5, v9 ; 060A1305 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v9, 0.5, s0 ; D2100009 000000F0 v_mul_f32_e32 v5, v5, v9 ; 100A1305 v_mad_f32 v5, -2.0, -v5, 0 ; D2820005 42020AF5 v_subrev_f32_e32 v0, v1, v0 ; 0A000101 v_add_f32_e32 v1, s4, v2 ; 06020404 v_sub_f32_e32 v4, 1.0, v1 ; 080802F2 image_sample v1, 1, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800100 00640103 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, 2.0, v1, v0 ; D2820000 040202F4 v_subrev_f32_e32 v1, v11, v8 ; 0A02110B v_mov_b32_e32 v7, v4 ; 7E0E0304 image_sample v2, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800100 00640206 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, -2.0, v2, v1 ; D2820001 040604F5 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mad_f32 v0, -2.0, v0, 0 ; D2820000 020200F5 v_mul_f32_e32 v1, v0, v0 ; 10020100 v_mad_f32 v1, v5, v5, v1 ; D2820001 04060B05 v_add_f32_e32 v1, 0x41800000, v1 ; 060202FF 41800000 v_max_f32_e32 v1, 0x33d6bf95, v1 ; 200202FF 33D6BF95 v_rsq_clamp_f32_e32 v1, v1 ; 7E025901 v_mul_f32_e32 v2, v1, v5 ; 10040B01 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_mul_f32_e32 v0, v1, v0 ; 10000101 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 v_mad_f32 v1, 2.0, v1, 0.5 ; D2820001 03C202F4 v_cvt_pkrtz_f16_f32_e64 v1, v1, 0 ; D25E0001 00010101 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1].zw, IN[0].zzzz, IMM[0].xyxy, IMM[0].xyyx 3: MOV TEMP[0].zw, TEMP[1].wwzw 4: MOV TEMP[1].xy, IN[1].xyxx 5: MOV TEMP[1].zw, IMM[0].xxyx 6: MOV OUT[0], TEMP[0] 7: MOV OUT[1], TEMP[1] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %16 = load <16 x i8> addrspace(2)* %15, !tbaa !0 %17 = add i32 %5, %7 %18 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %17) %19 = extractelement <4 x float> %18, i32 0 %20 = extractelement <4 x float> %18, i32 1 %21 = extractelement <4 x float> %18, i32 2 %22 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = add i32 %5, %7 %25 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %24) %26 = extractelement <4 x float> %25, i32 0 %27 = extractelement <4 x float> %25, i32 1 %28 = fadd float %13, %19 %29 = fadd float %14, %20 %30 = fmul float %21, 1.000000e+00 %31 = fadd float %30, 0.000000e+00 %32 = fmul float %21, 0.000000e+00 %33 = fadd float %32, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %26, float %27, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %28, float %29, float %31, float %33) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 0, v2, 1.0 ; D2820004 03CA0480 v_add_f32_e32 v5, 0, v2 ; 060A0480 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v1 ; 060C0204 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..4] DCL TEMP[0..21], LOCAL IMM[0] FLT32 { 0.0000, 255.0000, 0.0000, 0.3145} IMM[1] FLT32 { 1.0000, -64.0000, 0.0104, 0.0000} IMM[2] INT32 {40, 0, -1, 0} 0: ABS TEMP[0], CONST[1] 1: ADD TEMP[1], -TEMP[0].yyyy, TEMP[0].xxxx 2: MOV TEMP[2].z, TEMP[1].zzzz 3: RCP TEMP[3].x, TEMP[0].yyyy 4: MUL TEMP[3], TEMP[3].xxxx, CONST[1].xxzy 5: MOV TEMP[2].yw, TEMP[3].wyww 6: RCP TEMP[2].x, TEMP[0].xxxx 7: MUL TEMP[0], TEMP[2].xxxx, CONST[1] 8: MOV TEMP[4], TEMP[2].ywzw 9: FSGE TEMP[3].x, TEMP[1].zzzz, IMM[0].xxxx 10: UIF TEMP[3].xxxx :0 11: MOV TEMP[3].x, TEMP[0].xxxx 12: ELSE :0 13: MOV TEMP[3].x, TEMP[2].yyyy 14: ENDIF 15: MOV TEMP[3].x, TEMP[3].xxxx 16: FSGE TEMP[5].x, TEMP[1].zzzz, IMM[0].xxxx 17: UIF TEMP[5].xxxx :0 18: MOV TEMP[5].x, TEMP[0].yyyy 19: ELSE :0 20: MOV TEMP[5].x, TEMP[2].wwww 21: ENDIF 22: MOV TEMP[3].y, TEMP[5].xxxx 23: FSGE TEMP[5].x, TEMP[1].zzzz, IMM[0].xxxx 24: UIF TEMP[5].xxxx :0 25: MOV TEMP[5].x, TEMP[0].zzzz 26: ELSE :0 27: MOV TEMP[5].x, TEMP[2].zzzz 28: ENDIF 29: MOV TEMP[3].z, TEMP[5].xxxx 30: FSGE TEMP[1].x, TEMP[1].zzzz, IMM[0].xxxx 31: UIF TEMP[1].xxxx :0 32: MOV TEMP[1].x, TEMP[0].wwww 33: ELSE :0 34: MOV TEMP[1].x, TEMP[2].wwww 35: ENDIF 36: MOV TEMP[3].w, TEMP[1].xxxx 37: MOV TEMP[2].xy, TEMP[3].xyxx 38: ADD TEMP[1], CONST[3].xyxy, IN[0].xyxy 39: MOV TEMP[2].zw, TEMP[1].wwzw 40: MUL TEMP[1], TEMP[2], CONST[2].xyxy 41: MOV TEMP[2].w, TEMP[1].wwzw 42: MOV TEMP[1].xy, TEMP[1].zwww 43: TEX TEMP[1], TEMP[1], SAMP[0], 2D 44: MUL TEMP[1], TEMP[1].wwww, CONST[0].zzzz 45: MAD TEMP[1], TEMP[1].zzzz, IMM[0].yyyy, CONST[4].zzzz 46: MOV TEMP[2].z, TEMP[1].zzzz 47: ADD TEMP[5], TEMP[2], IN[0] 48: MOV TEMP[0].xy, TEMP[5].xyxx 49: MUL TEMP[3], TEMP[3].xyxy, CONST[0].xyxy 50: MOV TEMP[0].zw, TEMP[3].wwzw 51: DP2 TEMP[3].x, TEMP[3].zwww, TEMP[3].zwww 52: MAX TEMP[3].x, TEMP[3].xxxx, IMM[0].zzzz 53: RSQ TEMP[5].x, TEMP[3].xxxx 54: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[3].xxxx 55: CMP TEMP[5].x, -TEMP[3].xxxx, TEMP[5].xxxx, IMM[0].xxxx 56: MOV TEMP[2].w, TEMP[5].xxxx 57: MAD TEMP[1], TEMP[5].xxxx, IMM[0].wwww, TEMP[1].zzzz 58: MOV TEMP[2].z, TEMP[1].zzzz 59: ADD TEMP[3], TEMP[0], CONST[3] 60: MOV TEMP[0].xy, TEMP[3].xyxx 61: MUL TEMP[3], TEMP[0], CONST[2] 62: MOV TEMP[0].xy, TEMP[3].xyxx 63: MOV TEMP[0].zw, TEMP[3].yyxy 64: MOV TEMP[3].x, IMM[1].xxxx 65: MOV TEMP[3].y, TEMP[1].zzzz 66: MOV TEMP[1].x, IMM[2].xxxx 67: BGNLOOP :0 68: ISGE TEMP[5].x, IMM[2].yyyy, TEMP[1].xxxx 69: UIF TEMP[5].xxxx :0 70: BRK 71: ENDIF 72: MAD TEMP[6], TEMP[2].xyxy, CONST[2].xyxy, TEMP[0] 73: MOV TEMP[0].zw, TEMP[6].wwzw 74: MAD TEMP[7], TEMP[2].wwww, IMM[0].wwww, TEMP[3].yyyy 75: MOV TEMP[3].y, TEMP[7].yyyy 76: MOV TEMP[8].xy, TEMP[6].zwww 77: TEX TEMP[9], TEMP[8], SAMP[0], 2D 78: MOV TEMP[10], TEMP[9] 79: MOV TEMP[11].yzw, TEMP[9].zyzw 80: MUL TEMP[12], TEMP[9].wwww, IMM[0].yyyy 81: MAD TEMP[13], TEMP[9].wwww, IMM[0].yyyy, IMM[1].yyyy 82: MUL TEMP[4], TEMP[13].wwww, IMM[1].zzzz 83: MOV_SAT TEMP[14], TEMP[4] 84: ADD TEMP[15], -TEMP[14].wwww, IMM[1].xxxx 85: MOV TEMP[3].w, TEMP[15].wwww 86: MAD TEMP[16], TEMP[12].zzzz, -CONST[0].zzzz, TEMP[7].yyyy 87: MOV TEMP[3].z, TEMP[16].zzzz 88: MIN TEMP[17], TEMP[15].wwww, TEMP[3].xxxx 89: MOV TEMP[11].x, TEMP[17].xxxx 90: MOV TEMP[18].x, TEMP[3].xxxx 91: MOV TEMP[19].x, TEMP[17].xxxx 92: FSGE TEMP[20].x, TEMP[16].zzzz, IMM[0].xxxx 93: UIF TEMP[20].xxxx :0 94: MOV TEMP[21].x, TEMP[3].xxxx 95: ELSE :0 96: MOV TEMP[21].x, TEMP[17].xxxx 97: ENDIF 98: MOV TEMP[3].x, TEMP[21].xxxx 99: UADD TEMP[1].x, TEMP[1].xxxx, IMM[2].zzzz 100: ENDLOOP :0 101: ADD TEMP[0].x, -TEMP[3].xxxx, IMM[1].xxxx 102: MUL TEMP[0], TEMP[0].xxxx, CONST[4].zzzz 103: MOV OUT[0], TEMP[0] 104: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %36 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %37 = load <8 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %39 = load <4 x i32> addrspace(2)* %38, !tbaa !0 %40 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %41 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %42 = call float @fabs(float %27) %43 = call float @fabs(float %28) %44 = call float @fabs(float %29) %45 = call float @fabs(float %30) %46 = fsub float -0.000000e+00, %43 %47 = fadd float %46, %42 %48 = fdiv float 1.000000e+00, %43 %49 = fmul float %48, %27 %50 = fmul float %48, %28 %51 = fdiv float 1.000000e+00, %42 %52 = fmul float %51, %27 %53 = fmul float %51, %28 %54 = fcmp oge float %47, 0.000000e+00 %55 = sext i1 %54 to i32 %56 = bitcast i32 %55 to float %57 = bitcast float %56 to i32 %58 = icmp ne i32 %57, 0 %. = select i1 %58, float %52, float %49 %59 = fcmp oge float %47, 0.000000e+00 %60 = sext i1 %59 to i32 %61 = bitcast i32 %60 to float %62 = bitcast float %61 to i32 %63 = icmp ne i32 %62, 0 %temp20.0 = select i1 %63, float %53, float %50 %64 = fadd float %33, %40 %65 = fadd float %34, %41 %66 = fmul float %64, %31 %67 = fmul float %65, %32 %68 = bitcast float %66 to i32 %69 = bitcast float %67 to i32 %70 = insertelement <2 x i32> undef, i32 %68, i32 0 %71 = insertelement <2 x i32> %70, i32 %69, i32 1 %72 = bitcast <8 x i32> %37 to <32 x i8> %73 = bitcast <4 x i32> %39 to <16 x i8> %74 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %71, <32 x i8> %72, <16 x i8> %73, i32 2) %75 = extractelement <4 x float> %74, i32 3 %76 = fmul float %75, %26 %77 = fmul float %76, 2.550000e+02 %78 = fadd float %77, %35 %79 = fadd float %., %40 %80 = fadd float %temp20.0, %41 %81 = fmul float %., %24 %82 = fmul float %temp20.0, %25 %83 = fmul float %81, %81 %84 = fmul float %82, %82 %85 = fadd float %83, %84 %86 = call float @llvm.maxnum.f32(float %85, float 0x3E7AD7F2A0000000) %87 = call float @llvm.AMDGPU.rsq.clamped.f32(float %86) %88 = fmul float %87, %86 %89 = fsub float -0.000000e+00, %86 %90 = call float @llvm.AMDGPU.cndlt(float %89, float %88, float 0.000000e+00) %91 = fmul float %90, 0x3FD42085C0000000 %92 = fadd float %91, %78 %93 = fadd float %79, %33 %94 = fadd float %80, %34 %95 = fmul float %93, %31 %96 = fmul float %94, %32 %97 = fmul float %., %31 %98 = fmul float %temp20.0, %32 %99 = fmul float %90, 0x3FD42085C0000000 %100 = bitcast <8 x i32> %37 to <32 x i8> %101 = bitcast <4 x i32> %39 to <16 x i8> %102 = fsub float -0.000000e+00, %26 br label %LOOP LOOP: ; preds = %ENDIF97, %main_body %temp12.1 = phi float [ 1.000000e+00, %main_body ], [ %temp12.1., %ENDIF97 ] %temp13.0 = phi float [ %92, %main_body ], [ %121, %ENDIF97 ] %temp4.1 = phi float [ 0x36F4000000000000, %main_body ], [ %151, %ENDIF97 ] %temp3.0 = phi float [ %96, %main_body ], [ %120, %ENDIF97 ] %temp2.0 = phi float [ %95, %main_body ], [ %119, %ENDIF97 ] %103 = bitcast float %temp4.1 to i32 %104 = icmp sge i32 0, %103 %105 = sext i1 %104 to i32 %106 = bitcast i32 %105 to float %107 = bitcast float %106 to i32 %108 = icmp ne i32 %107, 0 br i1 %108, label %IF98, label %ENDIF97 IF98: ; preds = %LOOP %temp12.1.lcssa = phi float [ %temp12.1, %LOOP ] %109 = fsub float -0.000000e+00, %temp12.1.lcssa %110 = fadd float %109, 1.000000e+00 %111 = fmul float %110, %35 %112 = fmul float %110, %35 %113 = fmul float %110, %35 %114 = fmul float %110, %35 %115 = call i32 @llvm.SI.packf16(float %111, float %112) %116 = bitcast i32 %115 to float %117 = call i32 @llvm.SI.packf16(float %113, float %114) %118 = bitcast i32 %117 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %116, float %118, float %116, float %118) ret void ENDIF97: ; preds = %LOOP %119 = fadd float %97, %temp2.0 %120 = fadd float %98, %temp3.0 %121 = fadd float %99, %temp13.0 %122 = bitcast float %119 to i32 %123 = bitcast float %120 to i32 %124 = insertelement <2 x i32> undef, i32 %122, i32 0 %125 = insertelement <2 x i32> %124, i32 %123, i32 1 %126 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %125, <32 x i8> %100, <16 x i8> %101, i32 2) %127 = extractelement <4 x float> %126, i32 3 %128 = fmul float %127, 2.550000e+02 %129 = fmul float %127, 2.550000e+02 %130 = fadd float %129, -6.400000e+01 %131 = fmul float %130, 0x3F855559C0000000 %132 = fmul float %130, 0x3F855559C0000000 %133 = fmul float %130, 0x3F855559C0000000 %134 = fmul float %130, 0x3F855559C0000000 %135 = call float @llvm.AMDIL.clamp.(float %131, float 0.000000e+00, float 1.000000e+00) %136 = call float @llvm.AMDIL.clamp.(float %132, float 0.000000e+00, float 1.000000e+00) %137 = call float @llvm.AMDIL.clamp.(float %133, float 0.000000e+00, float 1.000000e+00) %138 = call float @llvm.AMDIL.clamp.(float %134, float 0.000000e+00, float 1.000000e+00) %139 = fsub float -0.000000e+00, %138 %140 = fadd float %139, 1.000000e+00 %141 = fmul float %128, %102 %142 = fadd float %141, %121 %143 = call float @llvm.minnum.f32(float %140, float %temp12.1) %144 = fcmp oge float %142, 0.000000e+00 %145 = sext i1 %144 to i32 %146 = bitcast i32 %145 to float %147 = bitcast float %146 to i32 %148 = icmp ne i32 %147, 0 %temp12.1. = select i1 %148, float %temp12.1, float %143 %149 = bitcast float %temp4.1 to i32 %150 = add i32 %149, -1 %151 = bitcast i32 %150 to float br label %LOOP } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare float @llvm.minnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_load_dwordx4 s[12:15], s[2:3], 0x0 ; C0860300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[12:15], 0x4 ; C2000D04 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e64 v2, |s0| ; D3540102 00000000 v_mul_f32_e32 v3, s0, v2 ; 10060400 s_buffer_load_dword s1, s[12:15], 0x5 ; C2008D05 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e64 v4, |s1| ; D3540104 00000001 v_mul_f32_e32 v5, s0, v4 ; 100A0800 v_mov_b32_e32 v6, s1 ; 7E0C0201 v_sub_f32_e64 v6, |s0|, |v6| ; D2080306 00020C00 v_cmp_ge_f32_e64 s[2:3], v6, 0 ; D00C0002 00010106 v_cndmask_b32_e64 v6, 0, -1, s[2:3] ; D2000006 00098280 v_cmp_ne_i32_e64 s[2:3], v6, 0 ; D10A0002 00010106 v_cndmask_b32_e64 v6, v5, v3, s[2:3] ; D2000006 080A0705 s_buffer_load_dword s0, s[12:15], 0x0 ; C2000D00 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s0, v6 ; 10060C00 v_mul_f32_e32 v2, s1, v2 ; 10040401 v_mul_f32_e32 v4, s1, v4 ; 10080801 v_cndmask_b32_e64 v4, v4, v2, s[2:3] ; D2000004 000A0504 s_buffer_load_dword s0, s[12:15], 0x1 ; C2000D01 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s0, v4 ; 10040800 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v2, v3, v3, v2 ; D2820002 040A0703 v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v3, v2 ; 7E065902 v_mul_f32_e32 v3, v2, v3 ; 10060702 v_mov_b32_e32 v7, 0x80000000 ; 7E0E02FF 80000000 v_xor_b32_e32 v2, v2, v7 ; 3A040F02 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, 0, v3, vcc ; D2000002 01AA0680 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 s_buffer_load_dword s16, s[12:15], 0xd ; C2080D0D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s16, v5 ; 06060A10 s_buffer_load_dword s17, s[12:15], 0x9 ; C2088D09 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s17, v3 ; 10120611 v_interp_p1_f32 v10, v0, 0, 0, [m0] ; C8280000 v_interp_p2_f32 v10, [v10], v1, 0, 0, [m0] ; C8290001 s_buffer_load_dword s18, s[12:15], 0xc ; C2090D0C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s18, v10 ; 06001412 s_buffer_load_dword s19, s[12:15], 0x8 ; C2098D08 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s19, v0 ; 10100013 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[4:11], s[0:3] ; F0800800 00010008 s_buffer_load_dword s20, s[12:15], 0x2 ; C20A0D02 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s20, v0 ; 10000014 s_buffer_load_dword s12, s[12:15], 0x12 ; C2060D12 v_mov_b32_e32 v1, 0x437f0000 ; 7E0202FF 437F0000 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, v0, s12 ; D2820000 00320101 v_mov_b32_e32 v1, 0x3ea1042e ; 7E0202FF 3EA1042E v_mad_f32 v0, v2, v1, v0 ; D2820000 04020302 v_mul_f32_e32 v1, 0x3ea1042e, v2 ; 100204FF 3EA1042E v_mul_f32_e32 v2, s17, v4 ; 10040811 v_mul_f32_e32 v3, s19, v6 ; 10060C13 v_add_f32_e32 v4, v5, v4 ; 06080905 v_add_f32_e32 v4, s16, v4 ; 06080810 v_mul_f32_e32 v5, s17, v4 ; 100A0811 v_add_f32_e32 v6, v10, v6 ; 060C0D0A v_add_f32_e32 v6, s18, v6 ; 060C0C12 v_mul_f32_e32 v4, s19, v6 ; 10080C13 v_xor_b32_e32 v6, s20, v7 ; 3A0C0E14 v_mov_b32_e32 v7, 40 ; 7E0E02A8 v_mov_b32_e32 v9, 1.0 ; 7E1202F2 s_mov_b64 s[14:15], 0 ; BE8E0480 v_mov_b32_e32 v8, v9 ; 7E100309 v_cmp_lt_i32_e64 s[16:17], v7, 1 ; D1020010 00010307 v_cndmask_b32_e64 v9, 0, -1, s[16:17] ; D2000809 00418280 v_cmp_eq_i32_e64 s[16:17], v9, 0 ; D1040010 00010109 s_and_saveexec_b64 s[16:17], s[16:17] ; BE902410 s_xor_b64 s[16:17], exec, s[16:17] ; 8990107E s_cbranch_execz BB0_2 ; BF880000 v_add_f32_e32 v5, v5, v2 ; 060A0505 v_add_f32_e32 v4, v4, v3 ; 06080704 image_sample v9, 8, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[4:11], s[0:3] ; F0800800 00010904 v_mov_b32_e32 v10, 0x437f0000 ; 7E1402FF 437F0000 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v11, 0x437f0000, v9 ; 101612FF 437F0000 v_add_f32_e32 v0, v0, v1 ; 06000300 v_mad_f32 v11, v11, v6, v0 ; D282000B 04020D0B v_cmp_ge_f32_e64 s[18:19], v11, 0 ; D00C0012 0001010B v_cndmask_b32_e64 v11, 0, -1, s[18:19] ; D200080B 00498280 v_cmp_ne_i32_e64 s[18:19], v11, 0 ; D10A0012 0001010B v_mov_b32_e32 v11, 0xc2800000 ; 7E1602FF C2800000 v_mad_f32 v9, v10, v9, v11 ; D2820009 042E130A v_mul_f32_e32 v9, 0x3c2aaace, v9 ; 101212FF 3C2AAACE v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_min_f32_e32 v9, v8, v9 ; 1E121308 v_cndmask_b32_e64 v9, v9, v8, s[18:19] ; D2000809 084A1109 v_add_i32_e32 v7, -1, v7 ; 4A0E0EC1 s_or_b64 exec, exec, s[16:17] ; 88FE107E s_or_b64 s[14:15], s[16:17], s[14:15] ; 888E0E10 s_andn2_b64 exec, exec, s[14:15] ; 8AFE0E7E s_cbranch_execnz BB0_1 ; BF890000 s_or_b64 exec, exec, s[14:15] ; 88FE0E7E v_sub_f32_e32 v0, 1.0, v8 ; 080010F2 v_mul_f32_e32 v0, s12, v0 ; 1000000C v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.0000, -2.0000, 0.0000, 1.0000} IMM[1] FLT32 { -1.0000, 1.0000, 0.5000, 0.0000} 0: MAD TEMP[0], IN[1].xyxx, IMM[0].xyzz, IMM[1].xyzy 1: MOV TEMP[1].xyz, IN[0].xyzx 2: MUL TEMP[2], IMM[0].wwzz, IN[1].xyxx 3: MOV OUT[2], TEMP[2] 4: MOV OUT[1], TEMP[1] 5: MOV OUT[0], TEMP[0] 6: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %22, 2.000000e+00 %25 = fadd float %24, -1.000000e+00 %26 = fmul float %23, -2.000000e+00 %27 = fadd float %26, 1.000000e+00 %28 = fmul float %22, 0.000000e+00 %29 = fadd float %28, 5.000000e-01 %30 = fmul float %22, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 %32 = fmul float 1.000000e+00, %22 %33 = fmul float 1.000000e+00, %23 %34 = fmul float 0.000000e+00, %22 %35 = fmul float 0.000000e+00, %22 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %15, float %16, float %17, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %32, float %33, float %34, float %35) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[0:3], 0 idxen ; E00C2000 80000100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v5 ; F800020F 05030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, 0, v0 ; 10080080 exp 15, 33, 0, 0, 0, v0, v1, v4, v4 ; F800021F 04040100 s_waitcnt expcnt(0) ; BF8C070F v_mad_f32 v4, 0, v0, 1.0 ; D2820004 03CA0080 v_mad_f32 v5, 0, v0, 0.5 ; D2820005 03C20080 v_mad_f32 v6, -2.0, v1, 1.0 ; D2820006 03CA02F5 v_mad_f32 v0, 2.0, v0, -1.0 ; D2820000 03CE00F4 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, -0.0000, -1.0000} IMM[1] FLT32 { 0.0000, 2.0000, 2.2000, 1.0000} IMM[2] FLT32 { 1.7000, 0.0010, 0.4545, 0.0000} 0: MUL TEMP[0].xyz, IMM[0].xyxy, IN[0].yzxw 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].xyz, IN[0].yzxw, IMM[0].xxyy, -TEMP[0].yzxw 3: MOV TEMP[2].xyz, TEMP[1].xyzx 4: MAD TEMP[3].xyz, IN[0].zxyw, IMM[0].zzww, TEMP[0] 5: MOV TEMP[0].xyz, TEMP[3].xyzx 6: DP2 TEMP[1].x, TEMP[1].xzzz, TEMP[1].xzzz 7: MAX TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[2] 10: MOV TEMP[2].xyz, TEMP[1].xyzx 11: DP2 TEMP[1].x, TEMP[3].yzzz, TEMP[3].yzzz 12: MAX TEMP[1].x, TEMP[1].xxxx, IMM[1].xxxx 13: RSQ TEMP[1].x, TEMP[1].xxxx 14: MOV TEMP[0].w, TEMP[1].xxxx 15: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[0] 16: MOV TEMP[0].xyz, TEMP[1].xyzx 17: MUL TEMP[1], IN[1], IMM[0].ywyy 18: ADD TEMP[3].xy, TEMP[1], IMM[0].xyxx 19: MOV TEMP[3].xy, TEMP[3].xyyy 20: TEX TEMP[3], TEMP[3], SAMP[0], 2D 21: MAD TEMP[3].xyz, TEMP[3], IMM[1].yyyy, IMM[0].wwww 22: MUL TEMP[4].xyz, TEMP[0], TEMP[3].yyyy 23: MOV TEMP[0].xyz, TEMP[4].xyzx 24: MAD TEMP[4].xyz, TEMP[3].xxxx, TEMP[2], TEMP[0] 25: MOV TEMP[0].xyz, TEMP[4].xyzx 26: MAD TEMP[3].xyz, TEMP[3].zzzz, IN[0], TEMP[0] 27: MOV TEMP[4].xyz, TEMP[3].xyzz 28: TEX TEMP[4], TEMP[4], SAMP[4], CUBE 29: POW TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz 30: POW TEMP[5].y, TEMP[4].yyyy, IMM[1].zzzz 31: POW TEMP[5].z, TEMP[4].zzzz, IMM[1].zzzz 32: POW TEMP[5].w, TEMP[4].wwww, IMM[1].wwww 33: MOV TEMP[2].w, TEMP[5].wwww 34: MOV TEMP[3].xyz, TEMP[3].xyzz 35: TEX TEMP[3], TEMP[3], SAMP[3], CUBE 36: POW TEMP[4].x, TEMP[3].xxxx, IMM[1].zzzz 37: POW TEMP[4].y, TEMP[3].yyyy, IMM[1].zzzz 38: POW TEMP[4].z, TEMP[3].zzzz, IMM[1].zzzz 39: POW TEMP[4].w, TEMP[3].wwww, IMM[1].wwww 40: MOV TEMP[0].w, TEMP[4].wwww 41: MUL TEMP[3].xyz, TEMP[4], IMM[2].xxxx 42: MOV TEMP[0].xyz, TEMP[3].xyzx 43: MAD TEMP[3].xyz, TEMP[5], IMM[2].xxxx, -TEMP[0] 44: MOV TEMP[2].xyz, TEMP[3].xyzx 45: ADD TEMP[3].xy, TEMP[1], IMM[0].xyxx 46: MOV TEMP[3].xy, TEMP[3].xyyy 47: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D 48: MAD TEMP[3].xyz, TEMP[3].xxxx, TEMP[2], TEMP[0] 49: MOV TEMP[0].xyz, TEMP[3].xyzx 50: ADD TEMP[1].xy, TEMP[1], IMM[0].xyxx 51: MOV TEMP[1].xy, TEMP[1].xyyy 52: TEX TEMP[1], TEMP[1], SAMP[1], 2D 53: MOV TEMP[2].w, TEMP[1].wwww 54: MAD TEMP[0].xyz, TEMP[1], TEMP[0], IMM[2].yyyy 55: ABS TEMP[1].x, TEMP[0].xxxx 56: LG2 TEMP[2].x, TEMP[1].xxxx 57: ABS TEMP[1].x, TEMP[0].yyyy 58: LG2 TEMP[1].x, TEMP[1].xxxx 59: MOV TEMP[2].y, TEMP[1].xxxx 60: ABS TEMP[0].x, TEMP[0].zzzz 61: LG2 TEMP[0].x, TEMP[0].xxxx 62: MOV TEMP[2].z, TEMP[0].xxxx 63: MUL TEMP[0].xyz, TEMP[2], IMM[2].zzzz 64: EX2 TEMP[2].x, TEMP[0].xxxx 65: EX2 TEMP[1].x, TEMP[0].yyyy 66: MOV TEMP[2].y, TEMP[1].xxxx 67: EX2 TEMP[0].x, TEMP[0].zzzz 68: MOV TEMP[2].z, TEMP[0].xxxx 69: MOV TEMP[0].xyz, TEMP[2].xyzz 70: TEX TEMP[0].xyz, TEMP[0], SAMP[5], 3D 71: MOV TEMP[0].xyz, TEMP[0].xyzx 72: MOV TEMP[0].w, IMM[0].yyyy 73: MOV OUT[0], TEMP[0] 74: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %31 = load <8 x i32> addrspace(2)* %30, !tbaa !0 %32 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %33 = load <4 x i32> addrspace(2)* %32, !tbaa !0 %34 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %35 = load <8 x i32> addrspace(2)* %34, !tbaa !0 %36 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %37 = load <4 x i32> addrspace(2)* %36, !tbaa !0 %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %47 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %48 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %49 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %50 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %51 = fmul float 0.000000e+00, %47 %52 = fmul float 1.000000e+00, %48 %53 = fmul float 0.000000e+00, %46 %54 = fsub float -0.000000e+00, %52 %55 = fmul float %47, 0.000000e+00 %56 = fadd float %55, %54 %57 = fsub float -0.000000e+00, %53 %58 = fmul float %48, 0.000000e+00 %59 = fadd float %58, %57 %60 = fsub float -0.000000e+00, %51 %61 = fmul float %46, 1.000000e+00 %62 = fadd float %61, %60 %63 = fmul float %48, -0.000000e+00 %64 = fadd float %63, %51 %65 = fmul float %46, -0.000000e+00 %66 = fadd float %65, %52 %67 = fmul float %47, -1.000000e+00 %68 = fadd float %67, %53 %69 = fmul float %56, %56 %70 = fmul float %62, %62 %71 = fadd float %69, %70 %72 = call float @llvm.maxnum.f32(float %71, float 0x3E7AD7F2A0000000) %73 = call float @llvm.AMDGPU.rsq.clamped.f32(float %72) %74 = fmul float %73, %56 %75 = fmul float %73, %59 %76 = fmul float %73, %62 %77 = fmul float %66, %66 %78 = fmul float %68, %68 %79 = fadd float %77, %78 %80 = call float @llvm.maxnum.f32(float %79, float 0x3E7AD7F2A0000000) %81 = call float @llvm.AMDGPU.rsq.clamped.f32(float %80) %82 = fmul float %81, %64 %83 = fmul float %81, %66 %84 = fmul float %81, %68 %85 = fmul float %49, 1.000000e+00 %86 = fmul float %50, -1.000000e+00 %87 = fadd float %85, 0.000000e+00 %88 = fadd float %86, 1.000000e+00 %89 = bitcast float %87 to i32 %90 = bitcast float %88 to i32 %91 = insertelement <2 x i32> undef, i32 %89, i32 0 %92 = insertelement <2 x i32> %91, i32 %90, i32 1 %93 = bitcast <8 x i32> %23 to <32 x i8> %94 = bitcast <4 x i32> %25 to <16 x i8> %95 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %92, <32 x i8> %93, <16 x i8> %94, i32 2) %96 = extractelement <4 x float> %95, i32 0 %97 = extractelement <4 x float> %95, i32 1 %98 = extractelement <4 x float> %95, i32 2 %99 = extractelement <4 x float> %95, i32 3 %100 = fmul float %96, 2.000000e+00 %101 = fadd float %100, -1.000000e+00 %102 = fmul float %97, 2.000000e+00 %103 = fadd float %102, -1.000000e+00 %104 = fmul float %98, 2.000000e+00 %105 = fadd float %104, -1.000000e+00 %106 = fmul float %82, %103 %107 = fmul float %83, %103 %108 = fmul float %84, %103 %109 = fmul float %101, %74 %110 = fadd float %109, %106 %111 = fmul float %101, %75 %112 = fadd float %111, %107 %113 = fmul float %101, %76 %114 = fadd float %113, %108 %115 = fmul float %105, %46 %116 = fadd float %115, %110 %117 = fmul float %105, %47 %118 = fadd float %117, %112 %119 = fmul float %105, %48 %120 = fadd float %119, %114 %121 = insertelement <4 x float> undef, float %116, i32 0 %122 = insertelement <4 x float> %121, float %118, i32 1 %123 = insertelement <4 x float> %122, float %120, i32 2 %124 = insertelement <4 x float> %123, float 0.000000e+00, i32 3 %125 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %124) %126 = extractelement <4 x float> %125, i32 0 %127 = extractelement <4 x float> %125, i32 1 %128 = extractelement <4 x float> %125, i32 2 %129 = extractelement <4 x float> %125, i32 3 %130 = call float @fabs(float %128) %131 = fdiv float 1.000000e+00, %130 %132 = fmul float %126, %131 %133 = fadd float %132, 1.500000e+00 %134 = fmul float %127, %131 %135 = fadd float %134, 1.500000e+00 %136 = bitcast float %135 to i32 %137 = bitcast float %133 to i32 %138 = bitcast float %129 to i32 %139 = insertelement <4 x i32> undef, i32 %136, i32 0 %140 = insertelement <4 x i32> %139, i32 %137, i32 1 %141 = insertelement <4 x i32> %140, i32 %138, i32 2 %142 = insertelement <4 x i32> %141, i32 undef, i32 3 %143 = bitcast <8 x i32> %39 to <32 x i8> %144 = bitcast <4 x i32> %41 to <16 x i8> %145 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %142, <32 x i8> %143, <16 x i8> %144, i32 4) %146 = extractelement <4 x float> %145, i32 0 %147 = extractelement <4 x float> %145, i32 1 %148 = extractelement <4 x float> %145, i32 2 %149 = call float @llvm.pow.f32(float %146, float 0x40019999A0000000) %150 = call float @llvm.pow.f32(float %147, float 0x40019999A0000000) %151 = call float @llvm.pow.f32(float %148, float 0x40019999A0000000) %152 = insertelement <4 x float> undef, float %116, i32 0 %153 = insertelement <4 x float> %152, float %118, i32 1 %154 = insertelement <4 x float> %153, float %120, i32 2 %155 = insertelement <4 x float> %154, float %99, i32 3 %156 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %155) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = extractelement <4 x float> %156, i32 2 %160 = extractelement <4 x float> %156, i32 3 %161 = call float @fabs(float %159) %162 = fdiv float 1.000000e+00, %161 %163 = fmul float %157, %162 %164 = fadd float %163, 1.500000e+00 %165 = fmul float %158, %162 %166 = fadd float %165, 1.500000e+00 %167 = bitcast float %166 to i32 %168 = bitcast float %164 to i32 %169 = bitcast float %160 to i32 %170 = insertelement <4 x i32> undef, i32 %167, i32 0 %171 = insertelement <4 x i32> %170, i32 %168, i32 1 %172 = insertelement <4 x i32> %171, i32 %169, i32 2 %173 = insertelement <4 x i32> %172, i32 undef, i32 3 %174 = bitcast <8 x i32> %35 to <32 x i8> %175 = bitcast <4 x i32> %37 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 4) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = call float @llvm.pow.f32(float %177, float 0x40019999A0000000) %181 = call float @llvm.pow.f32(float %178, float 0x40019999A0000000) %182 = call float @llvm.pow.f32(float %179, float 0x40019999A0000000) %183 = fmul float %180, 0x3FFB333340000000 %184 = fmul float %181, 0x3FFB333340000000 %185 = fmul float %182, 0x3FFB333340000000 %186 = fsub float -0.000000e+00, %183 %187 = fmul float %149, 0x3FFB333340000000 %188 = fadd float %187, %186 %189 = fsub float -0.000000e+00, %184 %190 = fmul float %150, 0x3FFB333340000000 %191 = fadd float %190, %189 %192 = fsub float -0.000000e+00, %185 %193 = fmul float %151, 0x3FFB333340000000 %194 = fadd float %193, %192 %195 = fadd float %85, 0.000000e+00 %196 = fadd float %86, 1.000000e+00 %197 = bitcast float %195 to i32 %198 = bitcast float %196 to i32 %199 = insertelement <2 x i32> undef, i32 %197, i32 0 %200 = insertelement <2 x i32> %199, i32 %198, i32 1 %201 = bitcast <8 x i32> %31 to <32 x i8> %202 = bitcast <4 x i32> %33 to <16 x i8> %203 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %200, <32 x i8> %201, <16 x i8> %202, i32 2) %204 = extractelement <4 x float> %203, i32 0 %205 = fmul float %204, %188 %206 = fadd float %205, %183 %207 = fmul float %204, %191 %208 = fadd float %207, %184 %209 = fmul float %204, %194 %210 = fadd float %209, %185 %211 = fadd float %85, 0.000000e+00 %212 = fadd float %86, 1.000000e+00 %213 = bitcast float %211 to i32 %214 = bitcast float %212 to i32 %215 = insertelement <2 x i32> undef, i32 %213, i32 0 %216 = insertelement <2 x i32> %215, i32 %214, i32 1 %217 = bitcast <8 x i32> %27 to <32 x i8> %218 = bitcast <4 x i32> %29 to <16 x i8> %219 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %216, <32 x i8> %217, <16 x i8> %218, i32 2) %220 = extractelement <4 x float> %219, i32 0 %221 = extractelement <4 x float> %219, i32 1 %222 = extractelement <4 x float> %219, i32 2 %223 = fmul float %220, %206 %224 = fadd float %223, 0x3F50624DE0000000 %225 = fmul float %221, %208 %226 = fadd float %225, 0x3F50624DE0000000 %227 = fmul float %222, %210 %228 = fadd float %227, 0x3F50624DE0000000 %229 = call float @fabs(float %224) %230 = call float @llvm.log2.f32(float %229) %231 = call float @fabs(float %226) %232 = call float @llvm.log2.f32(float %231) %233 = call float @fabs(float %228) %234 = call float @llvm.log2.f32(float %233) %235 = fmul float %230, 0x3FDD1743E0000000 %236 = fmul float %232, 0x3FDD1743E0000000 %237 = fmul float %234, 0x3FDD1743E0000000 %238 = call float @llvm.AMDIL.exp.(float %235) %239 = call float @llvm.AMDIL.exp.(float %236) %240 = call float @llvm.AMDIL.exp.(float %237) %241 = bitcast float %238 to i32 %242 = bitcast float %239 to i32 %243 = bitcast float %240 to i32 %244 = insertelement <4 x i32> undef, i32 %241, i32 0 %245 = insertelement <4 x i32> %244, i32 %242, i32 1 %246 = insertelement <4 x i32> %245, i32 %243, i32 2 %247 = insertelement <4 x i32> %246, i32 undef, i32 3 %248 = bitcast <8 x i32> %43 to <32 x i8> %249 = bitcast <4 x i32> %45 to <16 x i8> %250 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %247, <32 x i8> %248, <16 x i8> %249, i32 3) %251 = extractelement <4 x float> %250, i32 0 %252 = extractelement <4 x float> %250, i32 1 %253 = extractelement <4 x float> %250, i32 2 %254 = call i32 @llvm.SI.packf16(float %251, float %252) %255 = bitcast i32 %254 to float %256 = call i32 @llvm.SI.packf16(float %253, float 1.000000e+00) %257 = bitcast i32 %256 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %255, float %257, float %255, float %257) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 2, 0, [m0] ; C8080200 v_interp_p2_f32 v2, [v2], v1, 2, 0, [m0] ; C8090201 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mov_b32_e32 v4, 0 ; 7E080280 v_mad_f32 v5, 0, v3, -v2 ; D2820005 840A0680 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 v_mad_f32 v7, 0, -v3, v6 ; D2820007 441A0680 v_mul_f32_e32 v8, v7, v7 ; 10100F07 v_mad_f32 v8, v5, v5, v8 ; D2820008 04220B05 v_max_f32_e32 v8, 0x33d6bf95, v8 ; 201010FF 33D6BF95 v_rsq_clamp_f32_e32 v8, v8 ; 7E105908 v_mul_f32_e32 v7, v7, v8 ; 100E1107 v_mov_b32_e32 v9, 0x80000000 ; 7E1202FF 80000000 v_mad_f32 v10, v6, v9, v2 ; D282000A 040A1306 v_mad_f32 v11, 0, v6, -v3 ; D282000B 840E0C80 v_mul_f32_e32 v12, v11, v11 ; 1018170B v_mad_f32 v12, v10, v10, v12 ; D282000C 0432150A v_max_f32_e32 v12, 0x33d6bf95, v12 ; 201818FF 33D6BF95 v_rsq_clamp_f32_e32 v12, v12 ; 7E18590C v_mul_f32_e32 v11, v11, v12 ; 1016190B v_interp_p1_f32 v13, v0, 1, 1, [m0] ; C8340500 v_interp_p2_f32 v13, [v13], v1, 1, 1, [m0] ; C8350501 v_sub_f32_e32 v14, 1.0, v13 ; 081C1AF2 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 v_add_f32_e32 v13, 0, v15 ; 061A1E80 s_load_dwordx4 s[60:63], s[4:5], 0x0 ; C09E0500 s_load_dwordx4 s[12:15], s[4:5], 0x4 ; C0860504 s_load_dwordx4 s[16:19], s[4:5], 0x8 ; C0880508 s_load_dwordx4 s[48:51], s[4:5], 0xc ; C098050C s_load_dwordx4 s[36:39], s[4:5], 0x10 ; C0920510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_load_dwordx8 s[64:71], s[6:7], 0x0 ; C0E00700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_load_dwordx8 s[28:35], s[6:7], 0x10 ; C0CE0710 s_load_dwordx8 s[52:59], s[6:7], 0x18 ; C0DA0718 s_load_dwordx8 s[40:47], s[6:7], 0x20 ; C0D40720 s_load_dwordx8 s[4:11], s[6:7], 0x28 ; C0C20728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[64:71], s[60:63] ; F0800F00 01F00F0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, 2.0, v16, -1.0 ; D2820000 03CE20F4 v_mul_f32_e32 v1, v0, v11 ; 10021700 v_mad_f32 v11, 2.0, v15, -1.0 ; D282000B 03CE1EF4 v_mad_f32 v1, v11, v7, v1 ; D2820001 04060F0B v_mad_f32 v7, 2.0, v17, -1.0 ; D2820007 03CE22F4 v_mad_f32 v17, v7, v2, v1 ; D2820011 04060507 v_mul_f32_e32 v1, v9, v6 ; 10020D09 v_mad_f32 v1, 0, v2, v1 ; D2820001 04060480 v_mul_f32_e32 v1, v1, v8 ; 10021101 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_mul_f32_e32 v10, v0, v10 ; 10141500 v_mad_f32 v1, v11, v1, v10 ; D2820001 042A030B v_mad_f32 v16, v7, v3, v1 ; D2820010 04060707 v_mul_f32_e32 v1, v5, v8 ; 10021105 v_mul_f32_e32 v3, 0, v3 ; 10060680 v_mad_f32 v2, v2, v9, v3 ; D2820002 040E1302 v_mul_f32_e32 v2, v2, v12 ; 10041902 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mad_f32 v0, v11, v1, v0 ; D2820000 0402030B v_mad_f32 v15, v7, v6, v0 ; D282000F 04020D07 v_cubeid_f32 v8, v15, v16, v17 ; D2880008 0446210F v_cubema_f32 v7, v15, v16, v17 ; D28E0007 0446210F v_cubesc_f32 v6, v15, v16, v17 ; D28A0006 0446210F v_cubetc_f32 v5, v15, v16, v17 ; D28C0005 0446210F v_rcp_f32_e64 v0, |v7| ; D3540100 00000107 v_mov_b32_e32 v1, 0x3fc00000 ; 7E0202FF 3FC00000 v_mad_f32 v7, v5, v0, v1 ; D2820007 04060105 v_mad_f32 v6, v6, v0, v1 ; D2820006 04060106 image_sample v[5:7], 7, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[52:59], s[48:51] ; F0800700 018D0506 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v0, v7 ; 7E004F07 v_mov_b32_e32 v2, 0x400ccccd ; 7E0402FF 400CCCCD v_mul_legacy_f32_e32 v0, v2, v0 ; 0E000102 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mov_b32_e32 v3, 0xbfd9999a ; 7E0602FF BFD9999A v_mul_f32_e32 v8, v3, v0 ; 10100103 v_mov_b32_e32 v18, v4 ; 7E240304 v_cubeid_f32 v22, v15, v16, v17 ; D2880016 0446210F v_cubema_f32 v21, v15, v16, v17 ; D28E0015 0446210F v_cubesc_f32 v20, v15, v16, v17 ; D28A0014 0446210F v_cubetc_f32 v19, v15, v16, v17 ; D28C0013 0446210F v_rcp_f32_e64 v4, |v21| ; D3540104 00000115 v_mad_f32 v21, v19, v4, v1 ; D2820015 04060913 v_mad_f32 v20, v20, v4, v1 ; D2820014 04060914 image_sample v[9:11], 7, 0, 0, 0, 0, 0, 0, 0, v[20:23], s[40:47], s[36:39] ; F0800700 012A0914 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v1, v11 ; 7E024F0B v_mul_legacy_f32_e32 v1, v2, v1 ; 0E020302 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mov_b32_e32 v4, 0x3fd9999a ; 7E0802FF 3FD9999A v_mad_f32 v1, v1, v4, v8 ; D2820001 04220901 v_mul_f32_e32 v0, 0x3fd9999a, v0 ; 100000FF 3FD9999A image_sample v8, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[28:35], s[16:19] ; F0800100 0087080D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v1, v0 ; D2820000 04020308 image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[20:27], s[12:15] ; F0800700 00650C0D v_mov_b32_e32 v1, 0x3a83126f ; 7E0202FF 3A83126F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v14, v0, v1 ; D2820000 0406010E v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v17, v0 ; 7E224B00 v_log_f32_e32 v0, v6 ; 7E004F06 v_mul_legacy_f32_e32 v0, v2, v0 ; 0E000102 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v19, v3, v0 ; 10260103 v_log_f32_e32 v20, v10 ; 7E284F0A v_mul_legacy_f32_e32 v20, v2, v20 ; 0E282902 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mad_f32 v19, v20, v4, v19 ; D2820013 044E0914 v_mul_f32_e32 v0, 0x3fd9999a, v0 ; 100000FF 3FD9999A v_mad_f32 v0, v8, v19, v0 ; D2820000 04022708 v_mad_f32 v0, v13, v0, v1 ; D2820000 0406010D v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v16, v0 ; 7E204B00 v_log_f32_e32 v0, v5 ; 7E004F05 v_mul_legacy_f32_e32 v0, v2, v0 ; 0E000102 v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v3, v3, v0 ; 10060103 v_log_f32_e32 v5, v9 ; 7E0A4F09 v_mul_legacy_f32_e32 v2, v2, v5 ; 0E040B02 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mad_f32 v2, v2, v4, v3 ; D2820002 040E0902 v_mul_f32_e32 v0, 0x3fd9999a, v0 ; 100000FF 3FD9999A v_mad_f32 v0, v8, v2, v0 ; D2820000 04020508 v_mad_f32 v0, v12, v0, v1 ; D2820000 0406010C v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v15, v0 ; 7E1E4B00 image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[4:11], s[0:3] ; F0800700 0001000F s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v3, v0, v1 ; 5E060300 v_cvt_pkrtz_f16_f32_e64 v0, v2, 1.0 ; D25E0000 0001E502 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..6] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: LRP TEMP[0].xy, IN[0], CONST[5].zwzw, CONST[5] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: ADD TEMP[1].xy, TEMP[0], IMM[0].xxxx 3: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 4: MAD TEMP[0], TEMP[1].xxxx, CONST[0], TEMP[2] 5: MAD TEMP[0], CONST[6].xxxx, CONST[2], TEMP[0] 6: ADD TEMP[0], TEMP[0], CONST[3] 7: MAD TEMP[1].xy, IN[1], CONST[4], CONST[4].zwzw 8: MOV TEMP[1].xy, TEMP[1].xyxx 9: MOV TEMP[1].zw, IMM[0].yyyy 10: MOV OUT[1], TEMP[1] 11: MOV OUT[0], TEMP[0] 12: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %39 = load <16 x i8> addrspace(2)* %38, !tbaa !0 %40 = add i32 %5, %7 %41 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %39, i32 0, i32 %40) %42 = extractelement <4 x float> %41, i32 0 %43 = extractelement <4 x float> %41, i32 1 %44 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %45 = load <16 x i8> addrspace(2)* %44, !tbaa !0 %46 = add i32 %5, %7 %47 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %45, i32 0, i32 %46) %48 = extractelement <4 x float> %47, i32 0 %49 = extractelement <4 x float> %47, i32 1 %50 = call float @llvm.AMDGPU.lrp(float %42, float %35, float %33) %51 = call float @llvm.AMDGPU.lrp(float %43, float %36, float %34) %52 = fadd float %50, 1.000000e+00 %53 = fadd float %51, 1.000000e+00 %54 = fmul float %53, %17 %55 = fmul float %53, %18 %56 = fmul float %53, %19 %57 = fmul float %53, %20 %58 = fmul float %52, %13 %59 = fadd float %58, %54 %60 = fmul float %52, %14 %61 = fadd float %60, %55 %62 = fmul float %52, %15 %63 = fadd float %62, %56 %64 = fmul float %52, %16 %65 = fadd float %64, %57 %66 = fmul float %37, %21 %67 = fadd float %66, %59 %68 = fmul float %37, %22 %69 = fadd float %68, %61 %70 = fmul float %37, %23 %71 = fadd float %70, %63 %72 = fmul float %37, %24 %73 = fadd float %72, %65 %74 = fadd float %67, %25 %75 = fadd float %69, %26 %76 = fadd float %71, %27 %77 = fadd float %73, %28 %78 = fmul float %48, %29 %79 = fadd float %78, %31 %80 = fmul float %49, %30 %81 = fadd float %80, %32 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %79, float %81, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %74, float %75, float %76, float %77) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s8 ; 7E0A0208 v_mad_f32 v5, s9, v2, v5 ; D2820005 04160409 s_buffer_load_dword s8, s[0:3], 0x12 ; C2040112 s_buffer_load_dword s9, s[0:3], 0x10 ; C2048110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s8 ; 7E0C0208 v_mad_f32 v1, s9, v1, v6 ; D2820001 041A0209 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 32, 0, 0, 0, v1, v5, v2, v2 ; F800020F 02020501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v4, 1.0, v0 ; 080800F2 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 v_add_f32_e32 v4, 1.0, v4 ; 060808F2 v_sub_f32_e32 v5, 1.0, v1 ; 080A02F2 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s4, v5 ; D2820000 04140901 v_add_f32_e32 v0, 1.0, v0 ; 060000F2 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v0 ; 10020004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_mad_f32 v1, v2, s5, v1 ; D2820001 04040B02 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v0 ; 10040004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_mad_f32 v2, v3, s5, v2 ; D2820002 04080B03 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v3, v5, s5, v3 ; D2820003 040C0B05 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v0, v4, s5, v0 ; D2820000 04000B04 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL OUT[1], COLOR[1] DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..5], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 2.0000, -0.9999} IMM[1] FLT32 { 0.0000, -1.0000, -2.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].xy, IN[0].xyyy 7: TEX TEMP[0].xy, TEMP[0], SAMP[1], 2D 8: MAD TEMP[2].z, TEMP[0].yyyy, IMM[0].zzzz, IMM[0].wwww 9: MAD TEMP[3], TEMP[0].xxxy, IMM[0].yyyz, IMM[1].xxxy 10: MAD TEMP[0], TEMP[0].xxxy, IMM[1].yyyz, IMM[0].yyyy 11: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[1].xxxx 12: UIF TEMP[4].xxxx :0 13: MOV TEMP[4].x, TEMP[3].xxxx 14: ELSE :0 15: MOV TEMP[4].x, TEMP[0].xxxx 16: ENDIF 17: MOV TEMP[4].x, TEMP[4].xxxx 18: FSGE TEMP[5].x, TEMP[2].zzzz, IMM[1].xxxx 19: UIF TEMP[5].xxxx :0 20: MOV TEMP[5].x, TEMP[3].yyyy 21: ELSE :0 22: MOV TEMP[5].x, TEMP[0].yyyy 23: ENDIF 24: MOV TEMP[4].y, TEMP[5].xxxx 25: FSGE TEMP[5].x, TEMP[2].zzzz, IMM[1].xxxx 26: UIF TEMP[5].xxxx :0 27: MOV TEMP[5].x, TEMP[3].zzzz 28: ELSE :0 29: MOV TEMP[5].x, TEMP[0].zzzz 30: ENDIF 31: MOV TEMP[4].z, TEMP[5].xxxx 32: FSGE TEMP[2].x, TEMP[2].zzzz, IMM[1].xxxx 33: UIF TEMP[2].xxxx :0 34: MOV TEMP[2].x, TEMP[3].wwww 35: ELSE :0 36: MOV TEMP[2].x, TEMP[0].wwww 37: ENDIF 38: MOV TEMP[4].w, TEMP[2].xxxx 39: MOV OUT[1], TEMP[4] 40: MOV OUT[0], TEMP[1] 41: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %44 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %42, float 1.000000e+00) %47 = bitcast float %30 to i32 %48 = bitcast float %31 to i32 %49 = insertelement <2 x i32> undef, i32 %47, i32 0 %50 = insertelement <2 x i32> %49, i32 %48, i32 1 %51 = bitcast <8 x i32> %27 to <32 x i8> %52 = bitcast <4 x i32> %29 to <16 x i8> %53 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %50, <32 x i8> %51, <16 x i8> %52, i32 2) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float %55, 2.000000e+00 %57 = fadd float %56, 0xBFEFFF2E40000000 %58 = fmul float %54, 1.000000e+00 %59 = fadd float %58, 0.000000e+00 %60 = fmul float %54, 1.000000e+00 %61 = fadd float %60, 0.000000e+00 %62 = fmul float %54, 1.000000e+00 %63 = fadd float %62, 0.000000e+00 %64 = fmul float %55, 2.000000e+00 %65 = fadd float %64, -1.000000e+00 %66 = fmul float %54, -1.000000e+00 %67 = fadd float %66, 1.000000e+00 %68 = fmul float %54, -1.000000e+00 %69 = fadd float %68, 1.000000e+00 %70 = fmul float %54, -1.000000e+00 %71 = fadd float %70, 1.000000e+00 %72 = fmul float %55, -2.000000e+00 %73 = fadd float %72, 1.000000e+00 %74 = fcmp oge float %57, 0.000000e+00 %75 = sext i1 %74 to i32 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = icmp ne i32 %77, 0 %. = select i1 %78, float %59, float %67 %79 = fcmp oge float %57, 0.000000e+00 %80 = sext i1 %79 to i32 %81 = bitcast i32 %80 to float %82 = bitcast float %81 to i32 %83 = icmp ne i32 %82, 0 %temp20.0 = select i1 %83, float %61, float %69 %84 = fcmp oge float %57, 0.000000e+00 %85 = sext i1 %84 to i32 %86 = bitcast i32 %85 to float %87 = bitcast float %86 to i32 %88 = icmp ne i32 %87, 0 %.33 = select i1 %88, float %63, float %71 %89 = fcmp oge float %57, 0.000000e+00 %90 = sext i1 %89 to i32 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = icmp ne i32 %92, 0 %temp8.0 = select i1 %93, float %65, float %73 %94 = call i32 @llvm.SI.packf16(float %43, float %44) %95 = bitcast i32 %94 to float %96 = call i32 @llvm.SI.packf16(float %45, float %46) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %., float %temp20.0) %99 = bitcast i32 %98 to float %100 = call i32 @llvm.SI.packf16(float %.33, float %temp8.0) %101 = bitcast i32 %100 to float call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 0, i32 1, float %95, float %97, float %95, float %97) call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 1, i32 1, float %99, float %101, float %99, float %101) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[0:3] ; F0800F00 00030402 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v0, v7 ; 7E004F07 v_mul_legacy_f32_e32 v0, 1.0, v0 ; 0E0000F2 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e32 v1, v6 ; 7E024F06 v_mov_b32_e32 v8, 0x400ccccd ; 7E1002FF 400CCCCD v_mul_legacy_f32_e32 v1, v8, v1 ; 0E020308 v_exp_f32_e32 v1, v1 ; 7E024B01 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_log_f32_e32 v1, v5 ; 7E024F05 v_mul_legacy_f32_e32 v1, v8, v1 ; 0E020308 v_exp_f32_e32 v1, v1 ; 7E024B01 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_legacy_f32_e32 v4, v8, v4 ; 0E080908 v_exp_f32_e32 v4, v4 ; 7E084B04 v_cvt_pkrtz_f16_f32_e32 v1, v4, v1 ; 5E020304 exp 15, 0, 1, 0, 0, v1, v0, v1, v0 ; F800040F 00010001 s_waitcnt expcnt(0) ; BF8C070F image_sample v[0:1], 3, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800300 00450002 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, 2.0, v1, -1.0 ; D2820002 03CE02F4 v_mad_f32 v3, -2.0, v1, 1.0 ; D2820003 03CA02F5 v_mov_b32_e32 v4, 0xbf7ff972 ; 7E0802FF BF7FF972 v_mad_f32 v4, 2.0, v1, v4 ; D2820004 041202F4 v_cmp_ge_f32_e64 s[0:1], v4, 0 ; D00C0000 00010104 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_cmp_ne_i32_e64 s[0:1], v4, 0 ; D10A0000 00010104 v_cndmask_b32_e64 v2, v3, v2, s[0:1] ; D2000002 18020503 v_add_f32_e32 v3, 0, v0 ; 06060080 v_sub_f32_e32 v0, 1.0, v0 ; 080000F2 v_cndmask_b32_e64 v0, v0, v3, s[0:1] ; D2000000 00020700 v_cvt_pkrtz_f16_f32_e32 v1, v0, v2 ; 5E020500 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 1, 1, 1, 1, v0, v1, v0, v1 ; F8001C1F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..3] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[2].xyxx 1: ADD TEMP[1].xy, -TEMP[0], CONST[3] 2: MOV TEMP[0].xy, TEMP[1].xyxx 3: MAD TEMP[0].xy, IN[0], TEMP[0], CONST[2] 4: MAD TEMP[1].zw, TEMP[0].xyxy, CONST[1].xyxy, -CONST[0].xyxy 5: MOV TEMP[0].xy, TEMP[0].xyxx 6: MOV TEMP[1].xy, TEMP[1].zwzz 7: MOV TEMP[1].zw, IMM[0].yyxy 8: MOV TEMP[0].zw, IMM[0].yyxy 9: MOV OUT[1], TEMP[0] 10: MOV OUT[0], TEMP[1] 11: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = extractelement <4 x float> %24, i32 1 %27 = fsub float -0.000000e+00, %17 %28 = fadd float %27, %19 %29 = fsub float -0.000000e+00, %18 %30 = fadd float %29, %20 %31 = fmul float %25, %28 %32 = fadd float %31, %17 %33 = fmul float %26, %30 %34 = fadd float %33, %18 %35 = fsub float -0.000000e+00, %13 %36 = fmul float %32, %15 %37 = fadd float %36, %35 %38 = fsub float -0.000000e+00, %14 %39 = fmul float %34, %16 %40 = fadd float %39, %38 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %32, float %34, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %37, float %40, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x0 ; C0840900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[8:11], 0 idxen ; E00C2000 80020200 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v3, v1, s4 ; D2820000 00120303 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 v_mad_f32 v1, v2, v1, s4 ; D2820001 00120302 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 32, 0, 0, 0, v1, v0, v3, v2 ; F800020F 02030001 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v0, s5, v0, -v4 ; D2820000 84120005 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v1, s0, v1, -v4 ; D2820001 84120200 exp 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: ABS TEMP[0].x, TEMP[1].xxxx 7: LG2 TEMP[0].x, TEMP[0].xxxx 8: ABS TEMP[2].x, TEMP[1].yyyy 9: LG2 TEMP[2].x, TEMP[2].xxxx 10: MOV TEMP[0].y, TEMP[2].xxxx 11: ABS TEMP[1].x, TEMP[1].zzzz 12: LG2 TEMP[1].x, TEMP[1].xxxx 13: MOV TEMP[0].z, TEMP[1].xxxx 14: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 15: EX2 TEMP[1].x, TEMP[0].xxxx 16: EX2 TEMP[2].x, TEMP[0].yyyy 17: MOV TEMP[1].y, TEMP[2].xxxx 18: EX2 TEMP[0].x, TEMP[0].zzzz 19: MOV TEMP[1].z, TEMP[0].xxxx 20: MOV TEMP[1].w, IMM[0].yyyy 21: MOV OUT[0], TEMP[1] 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %49 = call float @fabs(float %46) %50 = call float @llvm.log2.f32(float %49) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = fmul float %50, 0x3FDD1743E0000000 %56 = fmul float %52, 0x3FDD1743E0000000 %57 = fmul float %54, 0x3FDD1743E0000000 %58 = call float @llvm.AMDIL.exp.(float %55) %59 = call float @llvm.AMDIL.exp.(float %56) %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call i32 @llvm.SI.packf16(float %58, float %59) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %60, float 1.000000e+00) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800700 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v1 ; 7E064F01 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v3, v4, v3 ; 0E060704 v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v5, v0 ; 7E0A4F00 v_mul_legacy_f32_e32 v5, v4, v5 ; 0E0A0B04 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_log_f32_e32 v0, v2 ; 7E004F02 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v1, -v6 ; D2820006 841A0205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v4, v3 ; 7E084F03 v_mul_legacy_f32_e32 v4, 1.0, v4 ; 0E0808F2 v_exp_f32_e32 v4, v4 ; 7E084B04 v_log_f32_e32 v5, v2 ; 7E0A4F02 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_log_f32_e32 v5, v1 ; 7E0A4F01 v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_legacy_f32_e32 v0, v6, v0 ; 0E000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v1, -v6 ; D2820006 841A0205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v4, v3 ; 7E084F03 v_mul_legacy_f32_e32 v4, 1.0, v4 ; 0E0808F2 v_exp_f32_e32 v4, v4 ; 7E084B04 v_log_f32_e32 v5, v2 ; 7E0A4F02 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_log_f32_e32 v5, v1 ; 7E0A4F01 v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_legacy_f32_e32 v0, v6, v0 ; 0E000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v1, -v6 ; D2820006 841A0205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0..1] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[1].xxxx, CONST[1].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: MUL TEMP[0].xyz, TEMP[1], CONST[0] 8: ABS TEMP[1].x, TEMP[0].xxxx 9: LG2 TEMP[1].x, TEMP[1].xxxx 10: ABS TEMP[2].x, TEMP[0].yyyy 11: LG2 TEMP[2].x, TEMP[2].xxxx 12: MOV TEMP[1].y, TEMP[2].xxxx 13: ABS TEMP[0].x, TEMP[0].zzzz 14: LG2 TEMP[0].x, TEMP[0].xxxx 15: MOV TEMP[1].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[1], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV TEMP[1].w, IMM[0].yyyy 23: MOV OUT[0], TEMP[1] 24: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %30 = load <8 x i32> addrspace(2)* %29, !tbaa !0 %31 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %32 = load <4 x i32> addrspace(2)* %31, !tbaa !0 %33 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %34 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %35 = fmul float %33, %27 %36 = fadd float %35, %28 %37 = fmul float %34, %27 %38 = fadd float %37, %28 %39 = bitcast float %36 to i32 %40 = bitcast float %38 to i32 %41 = insertelement <2 x i32> undef, i32 %39, i32 0 %42 = insertelement <2 x i32> %41, i32 %40, i32 1 %43 = bitcast <8 x i32> %30 to <32 x i8> %44 = bitcast <4 x i32> %32 to <16 x i8> %45 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %42, <32 x i8> %43, <16 x i8> %44, i32 2) %46 = extractelement <4 x float> %45, i32 0 %47 = extractelement <4 x float> %45, i32 1 %48 = extractelement <4 x float> %45, i32 2 %49 = call float @llvm.pow.f32(float %46, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %47, float 0x40019999A0000000) %51 = call float @llvm.pow.f32(float %48, float 0x40019999A0000000) %52 = fmul float %49, %24 %53 = fmul float %50, %25 %54 = fmul float %51, %26 %55 = call float @fabs(float %52) %56 = call float @llvm.log2.f32(float %55) %57 = call float @fabs(float %53) %58 = call float @llvm.log2.f32(float %57) %59 = call float @fabs(float %54) %60 = call float @llvm.log2.f32(float %59) %61 = fmul float %56, 0x3FDD1743E0000000 %62 = fmul float %58, 0x3FDD1743E0000000 %63 = fmul float %60, 0x3FDD1743E0000000 %64 = call float @llvm.AMDIL.exp.(float %61) %65 = call float @llvm.AMDIL.exp.(float %62) %66 = call float @llvm.AMDIL.exp.(float %63) %67 = call i32 @llvm.SI.packf16(float %64, float %65) %68 = bitcast i32 %67 to float %69 = call i32 @llvm.SI.packf16(float %66, float 1.000000e+00) %70 = bitcast i32 %69 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %68, float %70, float %68, float %70) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_buffer_load_dword s9, s[0:3], 0x4 ; C2048104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s9, v2, v3 ; D2820003 040E0409 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s9, v4, v0 ; D2820002 04020809 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v1 ; 7E064F01 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v3, v4, v3 ; 0E060704 v_exp_f32_e32 v3, v3 ; 7E064B03 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v3 ; 10060604 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v5, v0 ; 7E0A4F00 v_mul_legacy_f32_e32 v5, v4, v5 ; 0E0A0B04 v_exp_f32_e32 v5, v5 ; 7E0A4B05 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_log_f32_e32 v0, v2 ; 7E004F02 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s0, v0 ; 10000000 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..4] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 1.0000, 0.0000} 0: MAD TEMP[0].x, IN[0].xxxx, IMM[0].xxxx, IMM[0].xxxx 1: MOV TEMP[1].xy, CONST[3].xyxx 2: ADD TEMP[1].yz, -TEMP[1].xxyw, CONST[4].xxyw 3: MAD TEMP[2].x, TEMP[0].xxxx, TEMP[1].yyyy, CONST[3].xxxx 4: MOV TEMP[0].x, TEMP[2].xxxx 5: MAD TEMP[2].w, IN[0].yyyy, IMM[0].xxxx, IMM[0].xxxx 6: MOV TEMP[0].w, TEMP[2].wwww 7: MAD TEMP[1].z, TEMP[2].wwww, TEMP[1].zzzz, CONST[3].yyyy 8: MOV TEMP[0].z, TEMP[1].zzzz 9: ADD TEMP[0].xy, TEMP[0].xzzw, CONST[2] 10: MAD TEMP[1].zw, TEMP[0].xyxy, CONST[1].xyxy, -CONST[0].xyxy 11: MOV TEMP[0].xy, TEMP[0].xyxx 12: MOV TEMP[1].xy, TEMP[1].zwzz 13: MOV TEMP[1].zw, IMM[0].zzyz 14: MOV TEMP[0].zw, IMM[0].zzyz 15: MOV OUT[1], TEMP[0] 16: MOV OUT[0], TEMP[1] 17: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = fmul float %27, 5.000000e-01 %30 = fadd float %29, 5.000000e-01 %31 = fsub float -0.000000e+00, %19 %32 = fadd float %31, %21 %33 = fsub float -0.000000e+00, %20 %34 = fadd float %33, %22 %35 = fmul float %30, %32 %36 = fadd float %35, %19 %37 = fmul float %28, 5.000000e-01 %38 = fadd float %37, 5.000000e-01 %39 = fmul float %38, %34 %40 = fadd float %39, %20 %41 = fadd float %36, %17 %42 = fadd float %40, %18 %43 = fsub float -0.000000e+00, %13 %44 = fmul float %41, %15 %45 = fadd float %44, %43 %46 = fsub float -0.000000e+00, %14 %47 = fmul float %42, %16 %48 = fadd float %47, %46 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %41, float %42, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %45, float %48, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x0 ; C0840900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[2:5], v0, s[8:11], 0 idxen ; E00C2000 80020200 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, 0.5, v3, 0.5 ; D2820000 03C206F0 v_mad_f32 v0, v0, v1, s4 ; D2820000 00120300 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 v_mad_f32 v2, 0.5, v2, 0.5 ; D2820002 03C204F0 v_mad_f32 v1, v2, v1, s4 ; D2820001 00120302 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 32, 0, 0, 0, v1, v0, v3, v2 ; F800020F 02030001 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v0, s5, v0, -v4 ; D2820000 84120005 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v1, s0, v1, -v4 ; D2820001 84120200 exp 15, 12, 0, 1, 0, v1, v0, v3, v2 ; F80008CF 02030001 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: ABS TEMP[0].x, TEMP[1].xxxx 7: LG2 TEMP[0].x, TEMP[0].xxxx 8: ABS TEMP[2].x, TEMP[1].yyyy 9: LG2 TEMP[2].x, TEMP[2].xxxx 10: MOV TEMP[0].y, TEMP[2].xxxx 11: ABS TEMP[1].x, TEMP[1].zzzz 12: LG2 TEMP[1].x, TEMP[1].xxxx 13: MOV TEMP[0].z, TEMP[1].xxxx 14: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 15: EX2 TEMP[1].x, TEMP[0].xxxx 16: EX2 TEMP[2].x, TEMP[0].yyyy 17: MOV TEMP[1].y, TEMP[2].xxxx 18: EX2 TEMP[0].x, TEMP[0].zzzz 19: MOV TEMP[1].z, TEMP[0].xxxx 20: MOV TEMP[1].w, IMM[0].yyyy 21: MOV OUT[0], TEMP[1] 22: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %49 = call float @fabs(float %46) %50 = call float @llvm.log2.f32(float %49) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = fmul float %50, 0x3FDD1743E0000000 %56 = fmul float %52, 0x3FDD1743E0000000 %57 = fmul float %54, 0x3FDD1743E0000000 %58 = call float @llvm.AMDIL.exp.(float %55) %59 = call float @llvm.AMDIL.exp.(float %56) %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call i32 @llvm.SI.packf16(float %58, float %59) %62 = bitcast i32 %61 to float %63 = call i32 @llvm.SI.packf16(float %60, float 1.000000e+00) %64 = bitcast i32 %63 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %62, float %64, float %62, float %64) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800700 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v1 ; 7E064F01 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v3, v4, v3 ; 0E060704 v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v5, v0 ; 7E0A4F00 v_mul_legacy_f32_e32 v5, v4, v5 ; 0E0A0B04 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_log_f32_e32 v0, v2 ; 7E004F02 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].xy, IN[0].xyxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %21, float %22, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 v_mov_b32_e32 v4, 1.0 ; 7E0802F2 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v0, v1, v5, v4 ; F800020F 04050100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v6, s5, v1, -v6 ; D2820006 841A0205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[0].xxxx, CONST[0].yyyy 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 4: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 5: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 6: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 7: ABS TEMP[0].x, TEMP[1].xxxx 8: LG2 TEMP[0].x, TEMP[0].xxxx 9: ABS TEMP[2].x, TEMP[1].yyyy 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: MOV TEMP[0].y, TEMP[2].xxxx 12: ABS TEMP[2].x, TEMP[1].zzzz 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[0].z, TEMP[2].xxxx 15: MOV TEMP[1].w, TEMP[1].wwww 16: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 17: EX2 TEMP[1].x, TEMP[0].xxxx 18: EX2 TEMP[2].x, TEMP[0].yyyy 19: MOV TEMP[1].y, TEMP[2].xxxx 20: EX2 TEMP[0].x, TEMP[0].zzzz 21: MOV TEMP[1].z, TEMP[0].xxxx 22: MOV OUT[0], TEMP[1] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %30, %24 %33 = fadd float %32, %25 %34 = fmul float %31, %24 %35 = fadd float %34, %25 %36 = bitcast float %33 to i32 %37 = bitcast float %35 to i32 %38 = insertelement <2 x i32> undef, i32 %36, i32 0 %39 = insertelement <2 x i32> %38, i32 %37, i32 1 %40 = bitcast <8 x i32> %27 to <32 x i8> %41 = bitcast <4 x i32> %29 to <16 x i8> %42 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %39, <32 x i8> %40, <16 x i8> %41, i32 2) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = extractelement <4 x float> %42, i32 3 %47 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %48 = call float @llvm.pow.f32(float %44, float 0x40019999A0000000) %49 = call float @llvm.pow.f32(float %45, float 0x40019999A0000000) %50 = call float @llvm.pow.f32(float %46, float 1.000000e+00) %51 = call float @fabs(float %47) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %48) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %49) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %50) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x1 ; C2040101 s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s8 ; 7E060208 v_mad_f32 v3, s0, v2, v3 ; D2820003 040E0400 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_mov_b32_e32 v0, s8 ; 7E000208 v_mad_f32 v2, s0, v4, v0 ; D2820002 04020800 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v4, v3 ; 7E084F03 v_mul_legacy_f32_e32 v4, 1.0, v4 ; 0E0808F2 v_exp_f32_e32 v4, v4 ; 7E084B04 v_log_f32_e32 v5, v2 ; 7E0A4F02 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_log_f32_e32 v5, v1 ; 7E0A4F01 v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e32 v0, v0 ; 7E004F00 v_mul_legacy_f32_e32 v0, v6, v0 ; 0E000106 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v5 ; 5E000B00 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 0.0000, 1.0000, 0.0000} 0: ABS TEMP[0].x, IN[2].xxxx 1: LG2 TEMP[0].x, TEMP[0].xxxx 2: ABS TEMP[1].x, IN[2].yyyy 3: LG2 TEMP[1].x, TEMP[1].xxxx 4: MOV TEMP[0].y, TEMP[1].xxxx 5: ABS TEMP[1].x, IN[2].zzzz 6: LG2 TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].z, TEMP[1].xxxx 8: MUL TEMP[1].xyz, TEMP[0], IMM[0].xxxx 9: MOV TEMP[0].z, TEMP[1].xyzx 10: EX2 TEMP[2].x, TEMP[1].xxxx 11: EX2 TEMP[3].x, TEMP[1].yyyy 12: MOV TEMP[2].y, TEMP[3].xxxx 13: EX2 TEMP[1].x, TEMP[1].zzzz 14: MOV TEMP[2].z, TEMP[1].xxxx 15: MOV TEMP[0].xy, CONST[1].xyxx 16: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 17: MOV TEMP[0].xy, TEMP[0].xyxx 18: MOV TEMP[1].xy, IN[1].xyxx 19: MOV TEMP[2].w, IN[2].wwww 20: MOV TEMP[0].zw, IMM[0].zzyz 21: MOV TEMP[1].zw, IMM[0].zzyz 22: MOV OUT[1], TEMP[2] 23: MOV OUT[2], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call float @fabs(float %33) %38 = call float @llvm.log2.f32(float %37) %39 = call float @fabs(float %34) %40 = call float @llvm.log2.f32(float %39) %41 = call float @fabs(float %35) %42 = call float @llvm.log2.f32(float %41) %43 = fmul float %38, 0x40019999A0000000 %44 = fmul float %40, 0x40019999A0000000 %45 = fmul float %42, 0x40019999A0000000 %46 = call float @llvm.AMDIL.exp.(float %43) %47 = call float @llvm.AMDIL.exp.(float %44) %48 = call float @llvm.AMDIL.exp.(float %45) %49 = fsub float -0.000000e+00, %13 %50 = fmul float %21, %15 %51 = fadd float %50, %49 %52 = fsub float -0.000000e+00, %14 %53 = fmul float %22, %16 %54 = fadd float %53, %52 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %27, float %28, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %54, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e64 v5, |v3| ; D34E0105 00000103 v_mul_f32_e32 v5, 0x400ccccd, v5 ; 100A0AFF 400CCCCD v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v6, |v2| ; D34E0106 00000102 v_mul_f32_e32 v6, 0x400ccccd, v6 ; 100C0CFF 400CCCCD v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_log_f32_e64 v7, |v1| ; D34E0107 00000101 v_mul_f32_e32 v7, 0x400ccccd, v7 ; 100E0EFF 400CCCCD v_exp_f32_e32 v7, v7 ; 7E0E4B07 exp 15, 32, 0, 0, 0, v7, v6, v5, v4 ; F800020F 04050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, -v4 ; D2820004 84120205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { -0.5000, 0.0000, 0.0000, 2.0000} IMM[1] FLT32 { -2.0000, 3.0000, 1.0000, 0.4545} 0: ADD TEMP[0].xy, IMM[0].xxxx, IN[1] 1: DP2 TEMP[0].x, TEMP[0].xyyy, TEMP[0].xyyy 2: MAX TEMP[1].x, TEMP[0].xxxx, IMM[0].yyyy 3: RSQ TEMP[2].x, TEMP[1].xxxx 4: MUL TEMP[2].x, TEMP[2].xxxx, TEMP[1].xxxx 5: CMP TEMP[0].x, -TEMP[1].xxxx, TEMP[2].xxxx, IMM[0].zzzz 6: MOV TEMP[1].x, -CONST[0] 7: MAD TEMP[2].x, TEMP[0].xxxx, IMM[0].wwww, TEMP[1].xxxx 8: ADD TEMP[1].y, TEMP[1].xxxx, CONST[0].yyyy 9: RCP TEMP[1].x, TEMP[1].yyyy 10: MUL TEMP[1], TEMP[1].xxxx, TEMP[2].xxxx 11: MOV_SAT TEMP[1], TEMP[1] 12: MAD TEMP[2].y, TEMP[1].xxxx, IMM[1].xxxx, IMM[1].yyyy 13: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[1].xxxx 14: MAD TEMP[1].x, TEMP[2].yyyy, -TEMP[1].xxxx, IMM[1].zzzz 15: MUL TEMP[1].w, TEMP[1].xxxx, IN[0].wwww 16: MOV TEMP[1].w, TEMP[1].wwww 17: ABS TEMP[2].x, IN[0].xxxx 18: LG2 TEMP[0].x, TEMP[2].xxxx 19: ABS TEMP[2].x, IN[0].yyyy 20: LG2 TEMP[2].x, TEMP[2].xxxx 21: MOV TEMP[0].y, TEMP[2].xxxx 22: ABS TEMP[2].x, IN[0].zzzz 23: LG2 TEMP[2].x, TEMP[2].xxxx 24: MOV TEMP[0].z, TEMP[2].xxxx 25: MUL TEMP[0].xyz, TEMP[0], IMM[1].wwww 26: EX2 TEMP[1].x, TEMP[0].xxxx 27: EX2 TEMP[2].x, TEMP[0].yyyy 28: MOV TEMP[1].y, TEMP[2].xxxx 29: EX2 TEMP[0].x, TEMP[0].zzzz 30: MOV TEMP[1].z, TEMP[0].xxxx 31: MOV OUT[0], TEMP[1] 32: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = fadd float -5.000000e-01, %30 %33 = fadd float -5.000000e-01, %31 %34 = fmul float %32, %32 %35 = fmul float %33, %33 %36 = fadd float %34, %35 %37 = call float @llvm.maxnum.f32(float %36, float 0x3E7AD7F2A0000000) %38 = call float @llvm.AMDGPU.rsq.clamped.f32(float %37) %39 = fmul float %38, %37 %40 = fsub float -0.000000e+00, %37 %41 = call float @llvm.AMDGPU.cndlt(float %40, float %39, float 0.000000e+00) %42 = fsub float -0.000000e+00, %24 %43 = fmul float %41, 2.000000e+00 %44 = fadd float %43, %42 %45 = fadd float %42, %25 %46 = fdiv float 1.000000e+00, %45 %47 = fmul float %46, %44 %48 = fmul float %46, %44 %49 = fmul float %46, %44 %50 = fmul float %46, %44 %51 = call float @llvm.AMDIL.clamp.(float %47, float 0.000000e+00, float 1.000000e+00) %52 = call float @llvm.AMDIL.clamp.(float %48, float 0.000000e+00, float 1.000000e+00) %53 = call float @llvm.AMDIL.clamp.(float %49, float 0.000000e+00, float 1.000000e+00) %54 = call float @llvm.AMDIL.clamp.(float %50, float 0.000000e+00, float 1.000000e+00) %55 = fmul float %51, -2.000000e+00 %56 = fadd float %55, 3.000000e+00 %57 = fmul float %51, %51 %58 = fsub float -0.000000e+00, %57 %59 = fmul float %56, %58 %60 = fadd float %59, 1.000000e+00 %61 = fmul float %60, %29 %62 = call float @fabs(float %26) %63 = call float @llvm.log2.f32(float %62) %64 = call float @fabs(float %27) %65 = call float @llvm.log2.f32(float %64) %66 = call float @fabs(float %28) %67 = call float @llvm.log2.f32(float %66) %68 = fmul float %63, 0x3FDD1743E0000000 %69 = fmul float %65, 0x3FDD1743E0000000 %70 = fmul float %67, 0x3FDD1743E0000000 %71 = call float @llvm.AMDIL.exp.(float %68) %72 = call float @llvm.AMDIL.exp.(float %69) %73 = call float @llvm.AMDIL.exp.(float %70) %74 = call i32 @llvm.SI.packf16(float %71, float %72) %75 = bitcast i32 %74 to float %76 = call i32 @llvm.SI.packf16(float %73, float %61) %77 = bitcast i32 %76 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %75, float %77, float %75, float %77) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #4 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } attributes #3 = { readonly } attributes #4 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_add_f32_e32 v2, -0.5, v2 ; 060404F1 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_add_f32_e32 v3, -0.5, v3 ; 060606F1 v_mul_f32_e32 v3, v3, v3 ; 10060703 v_mad_f32 v2, v2, v2, v3 ; D2820002 040E0502 v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v3, v2 ; 7E065902 v_mul_f32_e32 v3, v2, v3 ; 10060702 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, 0, v3, vcc ; D2000002 01AA0680 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, 2.0, v2, -s4 ; D2820002 801204F4 s_buffer_load_dword s0, s[0:3], 0x1 ; C2000101 v_mov_b32_e32 v3, s4 ; 7E060204 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s0, v3 ; 08060600 v_rcp_f32_e32 v3, v3 ; 7E065503 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_mov_b32_e32 v3, 0x40400000 ; 7E0602FF 40400000 v_mad_f32 v3, -2.0, v2, v3 ; D2820003 040E04F5 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v2, -v3, v2, 1.0 ; D2820002 23CA0503 v_interp_p1_f32 v3, v0, 3, 0, [m0] ; C80C0300 v_interp_p2_f32 v3, [v3], v1, 3, 0, [m0] ; C80D0301 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_interp_p1_f32 v3, v0, 2, 0, [m0] ; C80C0200 v_interp_p2_f32 v3, [v3], v1, 2, 0, [m0] ; C80D0201 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_log_f32_e64 v0, |v4| ; D34E0100 00000104 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v3 ; 5E000700 exp 15, 0, 1, 1, 1, v0, v2, v0, v2 ; F8001C0F 02000200 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 0.0000, 1.0000, 0.0000} 0: ABS TEMP[0].x, IN[2].xxxx 1: LG2 TEMP[0].x, TEMP[0].xxxx 2: ABS TEMP[1].x, IN[2].yyyy 3: LG2 TEMP[1].x, TEMP[1].xxxx 4: MOV TEMP[0].y, TEMP[1].xxxx 5: ABS TEMP[1].x, IN[2].zzzz 6: LG2 TEMP[1].x, TEMP[1].xxxx 7: MOV TEMP[0].z, TEMP[1].xxxx 8: MUL TEMP[1].xyz, TEMP[0], IMM[0].xxxx 9: MOV TEMP[0].z, TEMP[1].xyzx 10: EX2 TEMP[2].x, TEMP[1].xxxx 11: EX2 TEMP[3].x, TEMP[1].yyyy 12: MOV TEMP[2].y, TEMP[3].xxxx 13: EX2 TEMP[1].x, TEMP[1].zzzz 14: MOV TEMP[2].z, TEMP[1].xxxx 15: MOV TEMP[0].xy, CONST[1].xyxx 16: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 17: MOV TEMP[0].xy, TEMP[0].xyxx 18: MOV TEMP[1].xy, IN[1].xyxx 19: MOV TEMP[2].w, IN[2].wwww 20: MOV TEMP[0].zw, IMM[0].zzyz 21: MOV TEMP[1].zw, IMM[0].zzyz 22: MOV OUT[1], TEMP[2] 23: MOV OUT[2], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = extractelement <4 x float> %26, i32 1 %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = extractelement <4 x float> %32, i32 3 %37 = call float @fabs(float %33) %38 = call float @llvm.log2.f32(float %37) %39 = call float @fabs(float %34) %40 = call float @llvm.log2.f32(float %39) %41 = call float @fabs(float %35) %42 = call float @llvm.log2.f32(float %41) %43 = fmul float %38, 0x40019999A0000000 %44 = fmul float %40, 0x40019999A0000000 %45 = fmul float %42, 0x40019999A0000000 %46 = call float @llvm.AMDIL.exp.(float %43) %47 = call float @llvm.AMDIL.exp.(float %44) %48 = call float @llvm.AMDIL.exp.(float %45) %49 = fsub float -0.000000e+00, %13 %50 = fmul float %21, %15 %51 = fadd float %50, %49 %52 = fsub float -0.000000e+00, %14 %53 = fmul float %22, %16 %54 = fadd float %53, %52 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %46, float %47, float %48, float %36) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %27, float %28, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %51, float %54, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e64 v5, |v3| ; D34E0105 00000103 v_mul_f32_e32 v5, 0x400ccccd, v5 ; 100A0AFF 400CCCCD v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v6, |v2| ; D34E0106 00000102 v_mul_f32_e32 v6, 0x400ccccd, v6 ; 100C0CFF 400CCCCD v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_log_f32_e64 v7, |v1| ; D34E0107 00000101 v_mul_f32_e32 v7, 0x400ccccd, v7 ; 100E0EFF 400CCCCD v_exp_f32_e32 v7, v7 ; 7E0E4B07 exp 15, 32, 0, 0, 0, v7, v6, v5, v4 ; F800020F 04050607 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v1, v2, v6, v5 ; F800021F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, -v4 ; D2820004 84120205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MUL TEMP[0].w, TEMP[1].wwww, IN[0].wwww 7: MOV TEMP[0].w, TEMP[0].wwww 8: MUL TEMP[1].xyz, TEMP[1], IN[0] 9: ABS TEMP[2].x, TEMP[1].xxxx 10: LG2 TEMP[2].x, TEMP[2].xxxx 11: ABS TEMP[3].x, TEMP[1].yyyy 12: LG2 TEMP[3].x, TEMP[3].xxxx 13: MOV TEMP[2].y, TEMP[3].xxxx 14: ABS TEMP[1].x, TEMP[1].zzzz 15: LG2 TEMP[1].x, TEMP[1].xxxx 16: MOV TEMP[2].z, TEMP[1].xxxx 17: MUL TEMP[1].xyz, TEMP[2], IMM[0].zzzz 18: EX2 TEMP[0].x, TEMP[1].xxxx 19: EX2 TEMP[2].x, TEMP[1].yyyy 20: MOV TEMP[0].y, TEMP[2].xxxx 21: EX2 TEMP[1].x, TEMP[1].zzzz 22: MOV TEMP[0].z, TEMP[1].xxxx 23: MOV OUT[0], TEMP[0] 24: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %32 = bitcast float %30 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %44 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %42, float 1.000000e+00) %47 = fmul float %46, %29 %48 = fmul float %43, %26 %49 = fmul float %44, %27 %50 = fmul float %45, %28 %51 = call float @fabs(float %48) %52 = call float @llvm.log2.f32(float %51) %53 = call float @fabs(float %49) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %50) %56 = call float @llvm.log2.f32(float %55) %57 = fmul float %52, 0x3FDD1743E0000000 %58 = fmul float %54, 0x3FDD1743E0000000 %59 = fmul float %56, 0x3FDD1743E0000000 %60 = call float @llvm.AMDIL.exp.(float %57) %61 = call float @llvm.AMDIL.exp.(float %58) %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call i32 @llvm.SI.packf16(float %60, float %61) %64 = bitcast i32 %63 to float %65 = call i32 @llvm.SI.packf16(float %62, float %47) %66 = bitcast i32 %65 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %64, float %66, float %64, float %66) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:5], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010202 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v6, v5 ; 7E0C4F05 v_mul_legacy_f32_e32 v6, 1.0, v6 ; 0E0C0CF2 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_interp_p1_f32 v7, v0, 3, 0, [m0] ; C81C0300 v_interp_p2_f32 v7, [v7], v1, 3, 0, [m0] ; C81D0301 v_mul_f32_e32 v6, v7, v6 ; 100C0D07 v_log_f32_e32 v7, v4 ; 7E0E4F04 v_mov_b32_e32 v8, 0x400ccccd ; 7E1002FF 400CCCCD v_mul_legacy_f32_e32 v7, v8, v7 ; 0E0E0F08 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_interp_p1_f32 v9, v0, 2, 0, [m0] ; C8240200 v_interp_p2_f32 v9, [v9], v1, 2, 0, [m0] ; C8250201 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_mul_f32_e32 v7, 0x3ee8ba1f, v7 ; 100E0EFF 3EE8BA1F v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_cvt_pkrtz_f16_f32_e32 v6, v7, v6 ; 5E0C0D07 v_log_f32_e32 v7, v3 ; 7E0E4F03 v_mul_legacy_f32_e32 v7, v8, v7 ; 0E0E0F08 v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_interp_p1_f32 v9, v0, 1, 0, [m0] ; C8240100 v_interp_p2_f32 v9, [v9], v1, 1, 0, [m0] ; C8250101 v_mul_f32_e32 v7, v9, v7 ; 100E0F09 v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_mul_f32_e32 v7, 0x3ee8ba1f, v7 ; 100E0EFF 3EE8BA1F v_exp_f32_e32 v7, v7 ; 7E0E4B07 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, v8, v2 ; 0E040508 v_exp_f32_e32 v2, v2 ; 7E044B02 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 v_mul_f32_e32 v0, v3, v2 ; 10000503 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v0, v6, v0, v6 ; F8001C0F 06000600 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[1].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[1].xyxx 3: MAD TEMP[0].xy, TEMP[0], IMM[0], IMM[0].xxxx 4: MOV TEMP[0].xy, TEMP[0].xyxx 5: MOV TEMP[1].xy, TEMP[1].xyxx 6: MOV TEMP[1].zw, IMM[0].wwzw 7: MOV TEMP[0].zw, IMM[0].wwzw 8: MOV OUT[0], TEMP[1] 9: MOV OUT[1], TEMP[0] 10: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = fsub float -0.000000e+00, %13 %24 = fmul float %21, %15 %25 = fadd float %24, %23 %26 = fsub float -0.000000e+00, %14 %27 = fmul float %22, %16 %28 = fadd float %27, %26 %29 = fmul float %25, 5.000000e-01 %30 = fadd float %29, 5.000000e-01 %31 = fmul float %28, -5.000000e-01 %32 = fadd float %31, 5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %32, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %28, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, -v4 ; D2820004 84120205 v_mad_f32 v5, -0.5, v4, 0.5 ; D2820005 03C208F1 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v0, s0, v0, -v6 ; D2820000 841A0000 v_mad_f32 v1, 0.5, v0, 0.5 ; D2820001 03C200F0 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 v_mov_b32_e32 v3, 0 ; 7E060280 exp 15, 32, 0, 0, 0, v1, v5, v3, v2 ; F800020F 02030501 exp 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0].xyz, TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: ABS TEMP[0].x, TEMP[1].xxxx 6: LG2 TEMP[0].x, TEMP[0].xxxx 7: ABS TEMP[2].x, TEMP[1].yyyy 8: LG2 TEMP[2].x, TEMP[2].xxxx 9: MOV TEMP[0].y, TEMP[2].xxxx 10: ABS TEMP[1].x, TEMP[1].zzzz 11: LG2 TEMP[1].x, TEMP[1].xxxx 12: MOV TEMP[0].z, TEMP[1].xxxx 13: MUL TEMP[0].xyz, TEMP[0], IMM[0].zzzz 14: EX2 TEMP[1].x, TEMP[0].xxxx 15: EX2 TEMP[2].x, TEMP[0].yyyy 16: MOV TEMP[1].y, TEMP[2].xxxx 17: EX2 TEMP[0].x, TEMP[0].zzzz 18: MOV TEMP[1].z, TEMP[0].xxxx 19: MOV TEMP[1].w, CONST[0].xxxx 20: MOV OUT[0], TEMP[1] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = bitcast float %29 to i32 %32 = bitcast float %30 to i32 %33 = insertelement <2 x i32> undef, i32 %31, i32 0 %34 = insertelement <2 x i32> %33, i32 %32, i32 1 %35 = bitcast <8 x i32> %26 to <32 x i8> %36 = bitcast <4 x i32> %28 to <16 x i8> %37 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %34, <32 x i8> %35, <16 x i8> %36, i32 2) %38 = extractelement <4 x float> %37, i32 0 %39 = extractelement <4 x float> %37, i32 1 %40 = extractelement <4 x float> %37, i32 2 %41 = call float @llvm.pow.f32(float %38, float 0x40019999A0000000) %42 = call float @llvm.pow.f32(float %39, float 0x40019999A0000000) %43 = call float @llvm.pow.f32(float %40, float 0x40019999A0000000) %44 = call float @fabs(float %41) %45 = call float @llvm.log2.f32(float %44) %46 = call float @fabs(float %42) %47 = call float @llvm.log2.f32(float %46) %48 = call float @fabs(float %43) %49 = call float @llvm.log2.f32(float %48) %50 = fmul float %45, 0x3FDD1743E0000000 %51 = fmul float %47, 0x3FDD1743E0000000 %52 = fmul float %49, 0x3FDD1743E0000000 %53 = call float @llvm.AMDIL.exp.(float %50) %54 = call float @llvm.AMDIL.exp.(float %51) %55 = call float @llvm.AMDIL.exp.(float %52) %56 = call i32 @llvm.SI.packf16(float %53, float %54) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float %55, float %24) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:2], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v3, v1 ; 7E064F01 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v3, v4, v3 ; 0E060704 v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e64 v3, |v3| ; D34E0103 00000103 v_mul_f32_e32 v3, 0x3ee8ba1f, v3 ; 100606FF 3EE8BA1F v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v5, v0 ; 7E0A4F00 v_mul_legacy_f32_e32 v5, v4, v5 ; 0E0A0B04 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_cvt_pkrtz_f16_f32_e32 v3, v5, v3 ; 5E060705 v_log_f32_e32 v0, v2 ; 7E004F02 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_cvt_pkrtz_f16_f32_e64 v0, v0, s0 ; D25E0000 00000100 exp 15, 0, 1, 1, 1, v3, v0, v3, v0 ; F8001C0F 00030003 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, CONST[1].xyxx 1: MAD TEMP[0].xy, IN[0], TEMP[0], -CONST[0] 2: MOV TEMP[0].xy, TEMP[0].xyxx 3: MOV TEMP[1].x, IN[1].xxxx 4: MOV TEMP[0].zw, IMM[0].yyxy 5: MOV TEMP[1].zw, IMM[0].yyxy 6: MOV OUT[1], TEMP[1] 7: MOV OUT[0], TEMP[0] 8: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %17 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %18 = load <16 x i8> addrspace(2)* %17, !tbaa !0 %19 = add i32 %5, %7 %20 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %18, i32 0, i32 %19) %21 = extractelement <4 x float> %20, i32 0 %22 = extractelement <4 x float> %20, i32 1 %23 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %24 = load <16 x i8> addrspace(2)* %23, !tbaa !0 %25 = add i32 %5, %7 %26 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %24, i32 0, i32 %25) %27 = extractelement <4 x float> %26, i32 0 %28 = fsub float -0.000000e+00, %13 %29 = fmul float %21, %15 %30 = fadd float %29, %28 %31 = fsub float -0.000000e+00, %14 %32 = fmul float %22, %16 %33 = fadd float %32, %31 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %27, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %30, float %33, float 0.000000e+00, float 1.000000e+00) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v6, v6, v5 ; F800020F 05060601 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, -v4 ; D2820004 84120205 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s4 ; 7E0E0204 v_mad_f32 v0, s0, v0, -v7 ; D2820000 841E0000 exp 15, 12, 0, 1, 0, v0, v4, v6, v5 ; F80008CF 05060400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MAX TEMP[0].x, IN[0].xxxx, IMM[0].xxxx 1: RSQ TEMP[1].x, TEMP[0].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[0].xxxx 3: CMP TEMP[1].x, -TEMP[0].xxxx, TEMP[1].xxxx, IMM[0].yyyy 4: MOV TEMP[0].w, TEMP[1].xxxx 5: MOV TEMP[0].xyz, IN[0].xxxx 6: MOV OUT[0], TEMP[0] 7: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = call float @llvm.maxnum.f32(float %22, float 0x3E7AD7F2A0000000) %24 = call float @llvm.AMDGPU.rsq.clamped.f32(float %23) %25 = fmul float %24, %23 %26 = fsub float -0.000000e+00, %23 %27 = call float @llvm.AMDGPU.cndlt(float %26, float %25, float 0.000000e+00) %28 = call i32 @llvm.SI.packf16(float %22, float %22) %29 = bitcast i32 %28 to float %30 = call i32 @llvm.SI.packf16(float %22, float %27) %31 = bitcast i32 %30 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %29, float %31, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_max_f32_e32 v0, 0x33d6bf95, v2 ; 200004FF 33D6BF95 v_rsq_clamp_f32_e32 v1, v0 ; 7E025900 v_mul_f32_e32 v1, v0, v1 ; 10020300 v_xor_b32_e32 v0, 0x80000000, v0 ; 3A0000FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v0 ; 7C080080 v_cndmask_b32_e64 v0, 0, v1, vcc ; D2000000 01AA0280 v_cvt_pkrtz_f16_f32_e32 v0, v2, v0 ; 5E000102 v_cvt_pkrtz_f16_f32_e32 v1, v2, v2 ; 5E020502 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..99] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+4] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[0].x, TEMP[0].xxxx 12: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 13: UARL ADDR[0].x, TEMP[0].xxxx 14: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 15: MOV TEMP[2].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[0].xyzx 18: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 19: UIF TEMP[0].xxxx :0 20: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 21: MOV TEMP[2].w, TEMP[0].wwww 22: F2I TEMP[3].x, TEMP[0].wwww 23: UARL ADDR[0].x, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 26: F2I TEMP[4].x, TEMP[0].wwww 27: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 28: UARL ADDR[0].x, TEMP[4].xxxx 29: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 30: MOV TEMP[3].y, TEMP[4].xxxx 31: F2I TEMP[0].x, TEMP[0].wwww 32: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 33: UARL ADDR[0].x, TEMP[0].xxxx 34: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 35: MOV TEMP[3].z, TEMP[0].xxxx 36: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[3], TEMP[2] 37: MOV TEMP[2].xyz, TEMP[0].xyzx 38: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 39: UIF TEMP[0].xxxx :0 40: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 41: MOV TEMP[2].w, TEMP[0].wwww 42: F2I TEMP[4].x, TEMP[0].wwww 43: UARL ADDR[0].x, TEMP[4].xxxx 44: UARL ADDR[0].x, TEMP[4].xxxx 45: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 46: F2I TEMP[4].x, TEMP[0].wwww 47: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 48: UARL ADDR[0].x, TEMP[4].xxxx 49: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 50: MOV TEMP[3].y, TEMP[4].xxxx 51: F2I TEMP[0].x, TEMP[0].wwww 52: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 53: UARL ADDR[0].x, TEMP[0].xxxx 54: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 55: MOV TEMP[3].z, TEMP[0].xxxx 56: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[3], TEMP[2] 57: MOV TEMP[2].xyz, TEMP[0].xyzx 58: ENDIF 59: ENDIF 60: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 61: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 62: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 63: ADD TEMP[0], TEMP[1], CONST[3] 64: MOV TEMP[2].xy, IN[3].xyxx 65: MOV TEMP[1].xyz, TEMP[2].xyzx 66: MOV TEMP[1].w, IMM[0].yyyy 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float 3.000000e+00, %47 %57 = fmul float %33, 1.000000e+00 %58 = fadd float %57, 0.000000e+00 %59 = fmul float %34, 1.000000e+00 %60 = fadd float %59, 0.000000e+00 %61 = fmul float %35, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %33, 0.000000e+00 %64 = fadd float %63, 1.000000e+00 %65 = fptosi float %56 to i32 %66 = bitcast i32 %65 to float %67 = bitcast float %66 to i32 %68 = shl i32 %67, 4 %69 = add i32 %68, 64 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = shl i32 %67, 4 %72 = add i32 %71, 68 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %67, 4 %75 = add i32 %74, 72 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %67, 4 %78 = add i32 %77, 76 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %58, %70 %81 = fmul float %60, %73 %82 = fadd float %80, %81 %83 = fmul float %62, %76 %84 = fadd float %82, %83 %85 = fmul float %64, %79 %86 = fadd float %84, %85 %87 = fptosi float %56 to i32 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = add i32 1, %89 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = shl i32 %92, 4 %94 = add i32 %93, 64 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %92, 4 %97 = add i32 %96, 68 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = shl i32 %92, 4 %100 = add i32 %99, 72 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = shl i32 %92, 4 %103 = add i32 %102, 76 %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %103) %105 = fmul float %58, %95 %106 = fmul float %60, %98 %107 = fadd float %105, %106 %108 = fmul float %62, %101 %109 = fadd float %107, %108 %110 = fmul float %64, %104 %111 = fadd float %109, %110 %112 = fptosi float %56 to i32 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = add i32 2, %114 %116 = bitcast i32 %115 to float %117 = bitcast float %116 to i32 %118 = shl i32 %117, 4 %119 = add i32 %118, 64 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %117, 4 %122 = add i32 %121, 68 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = shl i32 %117, 4 %125 = add i32 %124, 72 %126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %125) %127 = shl i32 %117, 4 %128 = add i32 %127, 76 %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %128) %130 = fmul float %58, %120 %131 = fmul float %60, %123 %132 = fadd float %130, %131 %133 = fmul float %62, %126 %134 = fadd float %132, %133 %135 = fmul float %64, %129 %136 = fadd float %134, %135 %137 = fmul float %86, %40 %138 = fmul float %111, %40 %139 = fmul float %136, %40 %140 = fcmp olt float 0.000000e+00, %41 %141 = sext i1 %140 to i32 %142 = bitcast i32 %141 to float %143 = bitcast float %142 to i32 %144 = icmp ne i32 %143, 0 br i1 %144, label %IF, label %ENDIF IF: ; preds = %main_body %145 = fmul float 3.000000e+00, %48 %146 = fptosi float %145 to i32 %147 = bitcast i32 %146 to float %148 = bitcast float %147 to i32 %149 = shl i32 %148, 4 %150 = add i32 %149, 64 %151 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %150) %152 = shl i32 %148, 4 %153 = add i32 %152, 68 %154 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %153) %155 = shl i32 %148, 4 %156 = add i32 %155, 72 %157 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %156) %158 = shl i32 %148, 4 %159 = add i32 %158, 76 %160 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %159) %161 = fmul float %58, %151 %162 = fmul float %60, %154 %163 = fadd float %161, %162 %164 = fmul float %62, %157 %165 = fadd float %163, %164 %166 = fmul float %64, %160 %167 = fadd float %165, %166 %168 = fptosi float %145 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = add i32 1, %170 %172 = bitcast i32 %171 to float %173 = bitcast float %172 to i32 %174 = shl i32 %173, 4 %175 = add i32 %174, 64 %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %175) %177 = shl i32 %173, 4 %178 = add i32 %177, 68 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = shl i32 %173, 4 %181 = add i32 %180, 72 %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %181) %183 = shl i32 %173, 4 %184 = add i32 %183, 76 %185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %184) %186 = fmul float %58, %176 %187 = fmul float %60, %179 %188 = fadd float %186, %187 %189 = fmul float %62, %182 %190 = fadd float %188, %189 %191 = fmul float %64, %185 %192 = fadd float %190, %191 %193 = fptosi float %145 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = add i32 2, %195 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = shl i32 %198, 4 %200 = add i32 %199, 64 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = shl i32 %198, 4 %203 = add i32 %202, 68 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %198, 4 %206 = add i32 %205, 72 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %198, 4 %209 = add i32 %208, 76 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %58, %201 %212 = fmul float %60, %204 %213 = fadd float %211, %212 %214 = fmul float %62, %207 %215 = fadd float %213, %214 %216 = fmul float %64, %210 %217 = fadd float %215, %216 %218 = fmul float %41, %167 %219 = fadd float %218, %137 %220 = fmul float %41, %192 %221 = fadd float %220, %138 %222 = fmul float %41, %217 %223 = fadd float %222, %139 %224 = fcmp olt float 0.000000e+00, %42 %225 = sext i1 %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = icmp ne i32 %227, 0 br i1 %228, label %IF44, label %ENDIF ENDIF: ; preds = %IF44, %IF, %main_body %temp8.0 = phi float [ %137, %main_body ], [ %327, %IF44 ], [ %219, %IF ] %temp9.0 = phi float [ %138, %main_body ], [ %329, %IF44 ], [ %221, %IF ] %temp10.0 = phi float [ %139, %main_body ], [ %331, %IF44 ], [ %223, %IF ] %229 = fmul float %temp9.0, %17 %230 = fmul float %temp9.0, %18 %231 = fmul float %temp9.0, %19 %232 = fmul float %temp9.0, %20 %233 = fmul float %temp8.0, %13 %234 = fadd float %233, %229 %235 = fmul float %temp8.0, %14 %236 = fadd float %235, %230 %237 = fmul float %temp8.0, %15 %238 = fadd float %237, %231 %239 = fmul float %temp8.0, %16 %240 = fadd float %239, %232 %241 = fmul float %temp10.0, %21 %242 = fadd float %241, %234 %243 = fmul float %temp10.0, %22 %244 = fadd float %243, %236 %245 = fmul float %temp10.0, %23 %246 = fadd float %245, %238 %247 = fmul float %temp10.0, %24 %248 = fadd float %247, %240 %249 = fadd float %242, %25 %250 = fadd float %244, %26 %251 = fadd float %246, %27 %252 = fadd float %248, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %54, float %55, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %249, float %250, float %251, float %252) ret void IF44: ; preds = %IF %253 = fmul float 3.000000e+00, %49 %254 = fptosi float %253 to i32 %255 = bitcast i32 %254 to float %256 = bitcast float %255 to i32 %257 = shl i32 %256, 4 %258 = add i32 %257, 64 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) %260 = shl i32 %256, 4 %261 = add i32 %260, 68 %262 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %261) %263 = shl i32 %256, 4 %264 = add i32 %263, 72 %265 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %264) %266 = shl i32 %256, 4 %267 = add i32 %266, 76 %268 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %267) %269 = fmul float %58, %259 %270 = fmul float %60, %262 %271 = fadd float %269, %270 %272 = fmul float %62, %265 %273 = fadd float %271, %272 %274 = fmul float %64, %268 %275 = fadd float %273, %274 %276 = fptosi float %253 to i32 %277 = bitcast i32 %276 to float %278 = bitcast float %277 to i32 %279 = add i32 1, %278 %280 = bitcast i32 %279 to float %281 = bitcast float %280 to i32 %282 = shl i32 %281, 4 %283 = add i32 %282, 64 %284 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %283) %285 = shl i32 %281, 4 %286 = add i32 %285, 68 %287 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %286) %288 = shl i32 %281, 4 %289 = add i32 %288, 72 %290 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %289) %291 = shl i32 %281, 4 %292 = add i32 %291, 76 %293 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %292) %294 = fmul float %58, %284 %295 = fmul float %60, %287 %296 = fadd float %294, %295 %297 = fmul float %62, %290 %298 = fadd float %296, %297 %299 = fmul float %64, %293 %300 = fadd float %298, %299 %301 = fptosi float %253 to i32 %302 = bitcast i32 %301 to float %303 = bitcast float %302 to i32 %304 = add i32 2, %303 %305 = bitcast i32 %304 to float %306 = bitcast float %305 to i32 %307 = shl i32 %306, 4 %308 = add i32 %307, 64 %309 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %308) %310 = shl i32 %306, 4 %311 = add i32 %310, 68 %312 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %311) %313 = shl i32 %306, 4 %314 = add i32 %313, 72 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) %316 = shl i32 %306, 4 %317 = add i32 %316, 76 %318 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %317) %319 = fmul float %58, %309 %320 = fmul float %60, %312 %321 = fadd float %319, %320 %322 = fmul float %62, %315 %323 = fadd float %321, %322 %324 = fmul float %64, %318 %325 = fadd float %323, %324 %326 = fmul float %42, %275 %327 = fadd float %326, %219 %328 = fmul float %42, %300 %329 = fadd float %328, %221 %330 = fmul float %42, %325 %331 = fadd float %330, %223 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v11, s10, v0 ; 4A16000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[4:7], s[8:9], 0xc ; C082090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v11, s[12:15], 0 idxen ; E00C2000 8003060B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, 0, v7 ; 06040E80 buffer_load_format_xyzw v[15:18], v11, s[20:23], 0 idxen ; E00C2000 80050F0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v15 ; 10001EFF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v3, 4, v0 ; 34060084 v_add_i32_e32 v0, 0x64, v3 ; 4A0006FF 00000064 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_add_f32_e32 v4, 0, v6 ; 06080C80 v_add_i32_e32 v1, 0x60, v3 ; 4A0206FF 00000060 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v4, v1, v0 ; D2820000 04020304 v_add_f32_e32 v5, 0, v8 ; 060A1080 v_add_i32_e32 v1, 0x68, v3 ; 4A0206FF 00000068 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v5, v1, v0 ; D2820000 04020305 v_mad_f32 v6, 0, v6, 1.0 ; D2820006 03CA0C80 v_add_i32_e32 v1, 0x6c, v3 ; 4A0206FF 0000006C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v1, v0 ; D2820000 04020306 buffer_load_format_xyzw v[7:10], v11, s[16:19], 0 idxen ; E00C2000 8004070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_add_i32_e32 v1, 0x54, v3 ; 4A0206FF 00000054 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_add_i32_e32 v12, 0x50, v3 ; 4A1806FF 00000050 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v4, v12, v1 ; D2820001 04061904 v_add_i32_e32 v12, 0x58, v3 ; 4A1806FF 00000058 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v5, v12, v1 ; D2820001 04061905 v_add_i32_e32 v12, 0x5c, v3 ; 4A1806FF 0000005C buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v6, v12, v1 ; D2820001 04061906 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_i32_e32 v12, 0x44, v3 ; 4A1806FF 00000044 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v2 ; 1018050C v_add_i32_e32 v13, 64, v3 ; 4A1A06C0 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v4, v13, v12 ; D282000C 04321B04 v_add_i32_e32 v13, 0x48, v3 ; 4A1A06FF 00000048 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v5, v13, v12 ; D282000C 04321B05 v_add_i32_e32 v3, 0x4c, v3 ; 4A0606FF 0000004C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v6, v3, v12 ; D2820003 04320706 v_mul_f32_e32 v3, v7, v3 ; 10060707 buffer_load_format_xyzw v[11:14], v11, s[4:7], 0 idxen ; E00C2000 80010B0B v_cmp_gt_f32_e64 s[4:5], v8, 0 ; D0080004 00010108 v_cndmask_b32_e64 v19, 0, -1, s[4:5] ; D2000813 00118280 v_cmp_ne_i32_e64 s[20:21], v19, 0 ; D10A0014 00010113 s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_buffer_load_dword s7, s[0:3], 0xe ; C203810E s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s12, s[0:3], 0xb ; C206010B s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s17, s[0:3], 0x7 ; C2088107 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s19, s[0:3], 0x3 ; C2098103 s_buffer_load_dword s18, s[0:3], 0x2 ; C2090102 s_buffer_load_dword s16, s[0:3], 0x1 ; C2080101 s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[20:21], s[20:21] ; BE942414 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v19, 0x40400000, v16 ; 102620FF 40400000 v_cvt_i32_f32_e32 v19, v19 ; 7E261113 v_lshlrev_b32_e32 v19, 4, v19 ; 34262684 v_add_i32_e32 v20, 0x60, v19 ; 4A2826FF 00000060 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x64, v19 ; 4A2A26FF 00000064 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x68, v19 ; 4A2A26FF 00000068 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v21, 0x6c, v19 ; 4A2A26FF 0000006C buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v6, v21, v20 ; D2820014 04522B06 v_mad_f32 v0, v8, v20, v0 ; D2820000 04022908 v_add_i32_e32 v20, 0x50, v19 ; 4A2826FF 00000050 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x54, v19 ; 4A2A26FF 00000054 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x58, v19 ; 4A2A26FF 00000058 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v21, 0x5c, v19 ; 4A2A26FF 0000005C buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v6, v21, v20 ; D2820014 04522B06 v_mad_f32 v1, v8, v20, v1 ; D2820001 04062908 v_add_i32_e32 v20, 64, v19 ; 4A2826C0 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x44, v19 ; 4A2A26FF 00000044 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x48, v19 ; 4A2A26FF 00000048 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v19, 0x4c, v19 ; 4A2626FF 0000004C buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, v6, v19, v20 ; D2820013 04522706 v_mad_f32 v3, v8, v19, v3 ; D2820003 040E2708 v_cmp_gt_f32_e64 s[22:23], v9, 0 ; D0080016 00010109 v_cndmask_b32_e64 v19, 0, -1, s[22:23] ; D2000813 00598280 v_cmp_ne_i32_e64 s[22:23], v19, 0 ; D10A0016 00010113 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v15, 0x40400000, v17 ; 101E22FF 40400000 v_cvt_i32_f32_e32 v15, v15 ; 7E1E110F v_lshlrev_b32_e32 v15, 4, v15 ; 341E1E84 v_add_i32_e32 v16, 0x60, v15 ; 4A201EFF 00000060 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x64, v15 ; 4A221EFF 00000064 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v17, v17, v2 ; 10220511 v_mad_f32 v16, v4, v16, v17 ; D2820010 04462104 v_add_i32_e32 v17, 0x68, v15 ; 4A221EFF 00000068 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v5, v17, v16 ; D2820010 04422305 v_add_i32_e32 v17, 0x6c, v15 ; 4A221EFF 0000006C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v6, v17, v16 ; D2820010 04422306 v_mad_f32 v0, v9, v16, v0 ; D2820000 04022109 v_add_i32_e32 v16, 0x50, v15 ; 4A201EFF 00000050 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x54, v15 ; 4A221EFF 00000054 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v17, v2 ; 10220511 v_mad_f32 v16, v4, v16, v17 ; D2820010 04462104 v_add_i32_e32 v17, 0x58, v15 ; 4A221EFF 00000058 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v5, v17, v16 ; D2820010 04422305 v_add_i32_e32 v17, 0x5c, v15 ; 4A221EFF 0000005C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v6, v17, v16 ; D2820010 04422306 v_mad_f32 v1, v9, v16, v1 ; D2820001 04062109 v_add_i32_e32 v16, 64, v15 ; 4A201EC0 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x44, v15 ; 4A221EFF 00000044 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mad_f32 v2, v4, v16, v2 ; D2820002 040A2104 v_add_i32_e32 v4, 0x48, v15 ; 4A081EFF 00000048 buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v5, v4, v2 ; D2820002 040A0905 v_add_i32_e32 v4, 0x4c, v15 ; 4A081EFF 0000004C buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v6, v4, v2 ; D2820002 040A0906 v_mad_f32 v3, v9, v2, v3 ; D2820003 040E0509 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_or_b64 exec, exec, s[20:21] ; 88FE147E v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 32, 0, 0, 0, v11, v12, v0, v2 ; F800020F 02000C0B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s17, v1 ; 10040211 v_mad_f32 v2, v3, s19, v2 ; D2820002 04082703 v_mad_f32 v2, v0, s12, v2 ; D2820002 04081900 v_add_f32_e32 v2, s9, v2 ; 06040409 v_mul_f32_e32 v4, s15, v1 ; 1008020F v_mad_f32 v4, v3, s18, v4 ; D2820004 04102503 v_mad_f32 v4, v0, s10, v4 ; D2820004 04101500 v_add_f32_e32 v4, s7, v4 ; 06080807 v_mul_f32_e32 v5, s13, v1 ; 100A020D v_mad_f32 v5, v3, s16, v5 ; D2820005 04142103 v_mad_f32 v5, v0, s8, v5 ; D2820005 04141100 v_add_f32_e32 v5, s5, v5 ; 060A0A05 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mad_f32 v1, v3, s14, v1 ; D2820001 04041D03 v_mad_f32 v0, v0, s6, v1 ; D2820000 04040D00 v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0..1] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 0.0000, 1.0000, 0.3300, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].xxxx 2: UIF TEMP[0].xxxx :0 3: MOV TEMP[0].x, IMM[0].yyyy 4: ELSE :0 5: MOV TEMP[0].x, IMM[0].zzzz 6: ENDIF 7: ADD TEMP[1].y, -CONST[1].xxxx, CONST[1].yyyy 8: MAD TEMP[1].y, CONST[1].zzzz, TEMP[1].yyyy, CONST[1].xxxx 9: MUL TEMP[0].w, TEMP[1].yyyy, TEMP[0].xxxx 10: MOV TEMP[0].w, TEMP[0].wwww 11: MOV TEMP[0].xyz, IMM[0].yyyy 12: MOV OUT[0], TEMP[0] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %29 = fsub float -0.000000e+00, %24 %30 = fadd float %29, %28 %31 = fcmp oge float %30, 0.000000e+00 %32 = sext i1 %31 to i32 %33 = bitcast i32 %32 to float %34 = bitcast float %33 to i32 %35 = icmp ne i32 %34, 0 %. = select i1 %35, float 1.000000e+00, float 0x3FD51EB860000000 %36 = fsub float -0.000000e+00, %25 %37 = fadd float %36, %26 %38 = fmul float %27, %37 %39 = fadd float %38, %25 %40 = fmul float %39, %. %41 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %42 = bitcast i32 %41 to float %43 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %40) %44 = bitcast i32 %43 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %42, float %44, float %42, float %44) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 2, 0, [m0] ; C8080200 v_interp_p2_f32 v2, [v2], v1, 2, 0, [m0] ; C8090201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s4, v2 ; 0A000404 v_cmp_ge_f32_e64 s[4:5], v0, 0 ; D00C0004 00010100 v_cndmask_b32_e64 v0, 0, -1, s[4:5] ; D2000000 00118280 v_cmp_ne_i32_e64 s[4:5], v0, 0 ; D10A0004 00010100 v_mov_b32_e32 v0, 0x3ea8f5c3 ; 7E0002FF 3EA8F5C3 v_cndmask_b32_e64 v0, v0, 1.0, s[4:5] ; D2000000 0011E500 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_sub_f32_e32 v1, s5, v1 ; 08020205 s_buffer_load_dword s0, s[0:3], 0x6 ; C2000106 v_mov_b32_e32 v2, s4 ; 7E040204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v1, s0, v2 ; D2820001 04080101 v_mul_f32_e32 v0, v0, v1 ; 10000300 v_cvt_pkrtz_f16_f32_e32 v0, 1.0, v0 ; 5E0000F2 v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 1.0 ; D25E0001 0001E4F2 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[6], TEMP[0] 3: ADD TEMP[0], TEMP[0], CONST[7] 4: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 5: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 6: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 7: MAD TEMP[1], TEMP[0].wwww, CONST[3], TEMP[1] 8: MOV TEMP[0].z, TEMP[0].zzzz 9: MOV TEMP[0].xy, IN[1].xyxx 10: MOV TEMP[0].w, IMM[0].xxxx 11: MOV OUT[1], TEMP[0] 12: MOV OUT[0], TEMP[1] 13: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %53 = load <16 x i8> addrspace(2)* %52, !tbaa !0 %54 = add i32 %5, %7 %55 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %53, i32 0, i32 %54) %56 = extractelement <4 x float> %55, i32 0 %57 = extractelement <4 x float> %55, i32 1 %58 = fmul float %33, %50 %59 = fmul float %34, %50 %60 = fmul float %35, %50 %61 = fmul float %36, %50 %62 = fmul float %49, %29 %63 = fadd float %62, %58 %64 = fmul float %49, %30 %65 = fadd float %64, %59 %66 = fmul float %49, %31 %67 = fadd float %66, %60 %68 = fmul float %49, %32 %69 = fadd float %68, %61 %70 = fmul float %51, %37 %71 = fadd float %70, %63 %72 = fmul float %51, %38 %73 = fadd float %72, %65 %74 = fmul float %51, %39 %75 = fadd float %74, %67 %76 = fmul float %51, %40 %77 = fadd float %76, %69 %78 = fadd float %71, %41 %79 = fadd float %73, %42 %80 = fadd float %75, %43 %81 = fadd float %77, %44 %82 = fmul float %79, %17 %83 = fmul float %79, %18 %84 = fmul float %79, %19 %85 = fmul float %79, %20 %86 = fmul float %78, %13 %87 = fadd float %86, %82 %88 = fmul float %78, %14 %89 = fadd float %88, %83 %90 = fmul float %78, %15 %91 = fadd float %90, %84 %92 = fmul float %78, %16 %93 = fadd float %92, %85 %94 = fmul float %80, %21 %95 = fadd float %94, %87 %96 = fmul float %80, %22 %97 = fadd float %96, %89 %98 = fmul float %80, %23 %99 = fadd float %98, %91 %100 = fmul float %80, %24 %101 = fadd float %100, %93 %102 = fmul float %81, %25 %103 = fadd float %102, %95 %104 = fmul float %81, %26 %105 = fadd float %104, %97 %106 = fmul float %81, %27 %107 = fadd float %106, %99 %108 = fmul float %81, %28 %109 = fadd float %108, %101 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %56, float %57, float %80, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %103, float %105, float %107, float %109) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v2 ; 100A0404 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s4, v5 ; D2820005 04140901 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 buffer_load_format_xyzw v[6:9], v0, s[8:11], 0 idxen ; E00C2000 80020600 v_mov_b32_e32 v0, 1.0 ; 7E0002F2 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v6, v7, v5, v0 ; F800020F 00050706 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s4, v2 ; 10000404 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s4, v0 ; D2820000 04000901 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s4, v0 ; D2820000 04000903 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v2 ; 100C0404 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s4, v6 ; D2820006 04180903 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v6 ; 060C0C04 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v6 ; 100E0C04 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s4, v7 ; D2820007 041C0905 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v2 ; 10100404 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v1, s4, v8 ; D2820008 04200901 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v3, s4, v8 ; D2820001 04200903 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s4, v7 ; D2820002 041C0901 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v6 ; 10060C04 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v5, s4, v3 ; D2820003 040C0905 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v1, s4, v3 ; D2820003 040C0901 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v6 ; 10080C04 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s4, v4 ; D2820004 04100905 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s4, v4 ; D2820004 04100901 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v6 ; 100C0C04 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v6 ; D2820000 04180900 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s4, v0 ; D2820000 04000905 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s0, v0 ; D2820000 04000101 exp 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: POW TEMP[1].w, TEMP[1].wwww, IMM[0].yyyy 4: MUL TEMP[2].y, TEMP[1].wwww, IMM[0].zzzz 5: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 6: UIF TEMP[0].xxxx :0 7: MOV TEMP[0].x, TEMP[1].wwww 8: ELSE :0 9: MOV TEMP[0].x, TEMP[2].yyyy 10: ENDIF 11: ADD TEMP[1].y, -TEMP[1].wwww, IMM[1].xxxx 12: FSGE TEMP[1].x, TEMP[1].yyyy, IMM[0].wwww 13: UIF TEMP[1].xxxx :0 14: MOV TEMP[1].x, IMM[0].wwww 15: ELSE :0 16: MOV TEMP[1].x, TEMP[0].xxxx 17: ENDIF 18: MOV TEMP[0].w, TEMP[1].xxxx 19: MOV TEMP[0].xyz, IMM[0].yyyy 20: MOV OUT[0], TEMP[0] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %32 = fsub float -0.000000e+00, %24 %33 = fadd float %32, %31 %34 = bitcast float %29 to i32 %35 = bitcast float %30 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %26 to <32 x i8> %39 = bitcast <4 x i32> %28 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 3 %42 = call float @llvm.pow.f32(float %41, float 1.000000e+00) %43 = fmul float %42, 5.000000e-01 %44 = fcmp oge float %33, 0.000000e+00 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 %. = select i1 %48, float %42, float %43 %49 = fsub float -0.000000e+00, %42 %50 = fadd float %49, 0x3FB99999A0000000 %51 = fcmp oge float %50, 0.000000e+00 %52 = sext i1 %51 to i32 %53 = bitcast i32 %52 to float %54 = bitcast float %53 to i32 %55 = icmp ne i32 %54, 0 %temp4.0 = select i1 %55, float 0.000000e+00, float %. %56 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %temp4.0) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800800 00430202 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v3, 0.5, v2 ; 100604F0 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s0, v4 ; 0A000800 v_cmp_ge_f32_e64 s[0:1], v0, 0 ; D00C0000 00010100 v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; D2000000 00018280 v_cmp_ne_i32_e64 s[0:1], v0, 0 ; D10A0000 00010100 v_cndmask_b32_e64 v0, v3, v2, s[0:1] ; D2000000 18020503 v_sub_f32_e32 v1, 0x3dcccccd, v2 ; 080204FF 3DCCCCCD v_cmp_ge_f32_e64 s[0:1], v1, 0 ; D00C0000 00010101 v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; D2000801 00018280 v_cmp_ne_i32_e64 s[0:1], v1, 0 ; D10A0000 00010101 v_cndmask_b32_e64 v0, v0, 0, s[0:1] ; D2000000 00010100 v_cvt_pkrtz_f16_f32_e32 v0, 1.0, v0 ; 5E0000F2 v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 1.0 ; D25E0001 0001E4F2 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..99] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+4] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[0].x, TEMP[0].xxxx 12: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 13: UARL ADDR[0].x, TEMP[0].xxxx 14: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 15: MOV TEMP[2].z, TEMP[0].xxxx 16: MUL TEMP[0].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[0].xyzx 18: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 19: UIF TEMP[0].xxxx :0 20: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 21: MOV TEMP[2].w, TEMP[0].wwww 22: F2I TEMP[3].x, TEMP[0].wwww 23: UARL ADDR[0].x, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 26: F2I TEMP[4].x, TEMP[0].wwww 27: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 28: UARL ADDR[0].x, TEMP[4].xxxx 29: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 30: MOV TEMP[3].y, TEMP[4].xxxx 31: F2I TEMP[0].x, TEMP[0].wwww 32: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 33: UARL ADDR[0].x, TEMP[0].xxxx 34: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 35: MOV TEMP[3].z, TEMP[0].xxxx 36: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[3], TEMP[2] 37: MOV TEMP[2].xyz, TEMP[0].xyzx 38: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 39: UIF TEMP[0].xxxx :0 40: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 41: MOV TEMP[2].w, TEMP[0].wwww 42: F2I TEMP[4].x, TEMP[0].wwww 43: UARL ADDR[0].x, TEMP[4].xxxx 44: UARL ADDR[0].x, TEMP[4].xxxx 45: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+4] 46: F2I TEMP[4].x, TEMP[0].wwww 47: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 48: UARL ADDR[0].x, TEMP[4].xxxx 49: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+4] 50: MOV TEMP[3].y, TEMP[4].xxxx 51: F2I TEMP[0].x, TEMP[0].wwww 52: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 53: UARL ADDR[0].x, TEMP[0].xxxx 54: DP4 TEMP[0].x, TEMP[1], CONST[ADDR[0].x+4] 55: MOV TEMP[3].z, TEMP[0].xxxx 56: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[3], TEMP[2] 57: MOV TEMP[2].xyz, TEMP[0].xyzx 58: ENDIF 59: ENDIF 60: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 61: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 62: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 63: ADD TEMP[0], TEMP[1], CONST[3] 64: MOV TEMP[2].xy, IN[3].xyxx 65: MOV TEMP[1].xyz, TEMP[2].xyzx 66: MOV TEMP[1].w, IMM[0].yyyy 67: MOV OUT[1], TEMP[1] 68: MOV OUT[0], TEMP[0] 69: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %30 = load <16 x i8> addrspace(2)* %29, !tbaa !0 %31 = add i32 %5, %7 %32 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %30, i32 0, i32 %31) %33 = extractelement <4 x float> %32, i32 0 %34 = extractelement <4 x float> %32, i32 1 %35 = extractelement <4 x float> %32, i32 2 %36 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %37 = load <16 x i8> addrspace(2)* %36, !tbaa !0 %38 = add i32 %5, %7 %39 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %37, i32 0, i32 %38) %40 = extractelement <4 x float> %39, i32 0 %41 = extractelement <4 x float> %39, i32 1 %42 = extractelement <4 x float> %39, i32 2 %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = fmul float 3.000000e+00, %47 %57 = fmul float %33, 1.000000e+00 %58 = fadd float %57, 0.000000e+00 %59 = fmul float %34, 1.000000e+00 %60 = fadd float %59, 0.000000e+00 %61 = fmul float %35, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %33, 0.000000e+00 %64 = fadd float %63, 1.000000e+00 %65 = fptosi float %56 to i32 %66 = bitcast i32 %65 to float %67 = bitcast float %66 to i32 %68 = shl i32 %67, 4 %69 = add i32 %68, 64 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = shl i32 %67, 4 %72 = add i32 %71, 68 %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %72) %74 = shl i32 %67, 4 %75 = add i32 %74, 72 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = shl i32 %67, 4 %78 = add i32 %77, 76 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %58, %70 %81 = fmul float %60, %73 %82 = fadd float %80, %81 %83 = fmul float %62, %76 %84 = fadd float %82, %83 %85 = fmul float %64, %79 %86 = fadd float %84, %85 %87 = fptosi float %56 to i32 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = add i32 1, %89 %91 = bitcast i32 %90 to float %92 = bitcast float %91 to i32 %93 = shl i32 %92, 4 %94 = add i32 %93, 64 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %92, 4 %97 = add i32 %96, 68 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = shl i32 %92, 4 %100 = add i32 %99, 72 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = shl i32 %92, 4 %103 = add i32 %102, 76 %104 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %103) %105 = fmul float %58, %95 %106 = fmul float %60, %98 %107 = fadd float %105, %106 %108 = fmul float %62, %101 %109 = fadd float %107, %108 %110 = fmul float %64, %104 %111 = fadd float %109, %110 %112 = fptosi float %56 to i32 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = add i32 2, %114 %116 = bitcast i32 %115 to float %117 = bitcast float %116 to i32 %118 = shl i32 %117, 4 %119 = add i32 %118, 64 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %117, 4 %122 = add i32 %121, 68 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = shl i32 %117, 4 %125 = add i32 %124, 72 %126 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %125) %127 = shl i32 %117, 4 %128 = add i32 %127, 76 %129 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %128) %130 = fmul float %58, %120 %131 = fmul float %60, %123 %132 = fadd float %130, %131 %133 = fmul float %62, %126 %134 = fadd float %132, %133 %135 = fmul float %64, %129 %136 = fadd float %134, %135 %137 = fmul float %86, %40 %138 = fmul float %111, %40 %139 = fmul float %136, %40 %140 = fcmp olt float 0.000000e+00, %41 %141 = sext i1 %140 to i32 %142 = bitcast i32 %141 to float %143 = bitcast float %142 to i32 %144 = icmp ne i32 %143, 0 br i1 %144, label %IF, label %ENDIF IF: ; preds = %main_body %145 = fmul float 3.000000e+00, %48 %146 = fptosi float %145 to i32 %147 = bitcast i32 %146 to float %148 = bitcast float %147 to i32 %149 = shl i32 %148, 4 %150 = add i32 %149, 64 %151 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %150) %152 = shl i32 %148, 4 %153 = add i32 %152, 68 %154 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %153) %155 = shl i32 %148, 4 %156 = add i32 %155, 72 %157 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %156) %158 = shl i32 %148, 4 %159 = add i32 %158, 76 %160 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %159) %161 = fmul float %58, %151 %162 = fmul float %60, %154 %163 = fadd float %161, %162 %164 = fmul float %62, %157 %165 = fadd float %163, %164 %166 = fmul float %64, %160 %167 = fadd float %165, %166 %168 = fptosi float %145 to i32 %169 = bitcast i32 %168 to float %170 = bitcast float %169 to i32 %171 = add i32 1, %170 %172 = bitcast i32 %171 to float %173 = bitcast float %172 to i32 %174 = shl i32 %173, 4 %175 = add i32 %174, 64 %176 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %175) %177 = shl i32 %173, 4 %178 = add i32 %177, 68 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = shl i32 %173, 4 %181 = add i32 %180, 72 %182 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %181) %183 = shl i32 %173, 4 %184 = add i32 %183, 76 %185 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %184) %186 = fmul float %58, %176 %187 = fmul float %60, %179 %188 = fadd float %186, %187 %189 = fmul float %62, %182 %190 = fadd float %188, %189 %191 = fmul float %64, %185 %192 = fadd float %190, %191 %193 = fptosi float %145 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = add i32 2, %195 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = shl i32 %198, 4 %200 = add i32 %199, 64 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = shl i32 %198, 4 %203 = add i32 %202, 68 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %198, 4 %206 = add i32 %205, 72 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %198, 4 %209 = add i32 %208, 76 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %58, %201 %212 = fmul float %60, %204 %213 = fadd float %211, %212 %214 = fmul float %62, %207 %215 = fadd float %213, %214 %216 = fmul float %64, %210 %217 = fadd float %215, %216 %218 = fmul float %41, %167 %219 = fadd float %218, %137 %220 = fmul float %41, %192 %221 = fadd float %220, %138 %222 = fmul float %41, %217 %223 = fadd float %222, %139 %224 = fcmp olt float 0.000000e+00, %42 %225 = sext i1 %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = icmp ne i32 %227, 0 br i1 %228, label %IF44, label %ENDIF ENDIF: ; preds = %IF44, %IF, %main_body %temp8.0 = phi float [ %137, %main_body ], [ %327, %IF44 ], [ %219, %IF ] %temp9.0 = phi float [ %138, %main_body ], [ %329, %IF44 ], [ %221, %IF ] %temp10.0 = phi float [ %139, %main_body ], [ %331, %IF44 ], [ %223, %IF ] %229 = fmul float %temp9.0, %17 %230 = fmul float %temp9.0, %18 %231 = fmul float %temp9.0, %19 %232 = fmul float %temp9.0, %20 %233 = fmul float %temp8.0, %13 %234 = fadd float %233, %229 %235 = fmul float %temp8.0, %14 %236 = fadd float %235, %230 %237 = fmul float %temp8.0, %15 %238 = fadd float %237, %231 %239 = fmul float %temp8.0, %16 %240 = fadd float %239, %232 %241 = fmul float %temp10.0, %21 %242 = fadd float %241, %234 %243 = fmul float %temp10.0, %22 %244 = fadd float %243, %236 %245 = fmul float %temp10.0, %23 %246 = fadd float %245, %238 %247 = fmul float %temp10.0, %24 %248 = fadd float %247, %240 %249 = fadd float %242, %25 %250 = fadd float %244, %26 %251 = fadd float %246, %27 %252 = fadd float %248, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %54, float %55, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %249, float %250, float %251, float %252) ret void IF44: ; preds = %IF %253 = fmul float 3.000000e+00, %49 %254 = fptosi float %253 to i32 %255 = bitcast i32 %254 to float %256 = bitcast float %255 to i32 %257 = shl i32 %256, 4 %258 = add i32 %257, 64 %259 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %258) %260 = shl i32 %256, 4 %261 = add i32 %260, 68 %262 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %261) %263 = shl i32 %256, 4 %264 = add i32 %263, 72 %265 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %264) %266 = shl i32 %256, 4 %267 = add i32 %266, 76 %268 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %267) %269 = fmul float %58, %259 %270 = fmul float %60, %262 %271 = fadd float %269, %270 %272 = fmul float %62, %265 %273 = fadd float %271, %272 %274 = fmul float %64, %268 %275 = fadd float %273, %274 %276 = fptosi float %253 to i32 %277 = bitcast i32 %276 to float %278 = bitcast float %277 to i32 %279 = add i32 1, %278 %280 = bitcast i32 %279 to float %281 = bitcast float %280 to i32 %282 = shl i32 %281, 4 %283 = add i32 %282, 64 %284 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %283) %285 = shl i32 %281, 4 %286 = add i32 %285, 68 %287 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %286) %288 = shl i32 %281, 4 %289 = add i32 %288, 72 %290 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %289) %291 = shl i32 %281, 4 %292 = add i32 %291, 76 %293 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %292) %294 = fmul float %58, %284 %295 = fmul float %60, %287 %296 = fadd float %294, %295 %297 = fmul float %62, %290 %298 = fadd float %296, %297 %299 = fmul float %64, %293 %300 = fadd float %298, %299 %301 = fptosi float %253 to i32 %302 = bitcast i32 %301 to float %303 = bitcast float %302 to i32 %304 = add i32 2, %303 %305 = bitcast i32 %304 to float %306 = bitcast float %305 to i32 %307 = shl i32 %306, 4 %308 = add i32 %307, 64 %309 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %308) %310 = shl i32 %306, 4 %311 = add i32 %310, 68 %312 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %311) %313 = shl i32 %306, 4 %314 = add i32 %313, 72 %315 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %314) %316 = shl i32 %306, 4 %317 = add i32 %316, 76 %318 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %317) %319 = fmul float %58, %309 %320 = fmul float %60, %312 %321 = fadd float %319, %320 %322 = fmul float %62, %315 %323 = fadd float %321, %322 %324 = fmul float %64, %318 %325 = fadd float %323, %324 %326 = fmul float %42, %275 %327 = fadd float %326, %219 %328 = fmul float %42, %300 %329 = fadd float %328, %221 %330 = fmul float %42, %325 %331 = fadd float %330, %223 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v11, s10, v0 ; 4A16000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[4:7], s[8:9], 0xc ; C082090C s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[6:9], v11, s[12:15], 0 idxen ; E00C2000 8003060B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v2, 0, v7 ; 06040E80 buffer_load_format_xyzw v[15:18], v11, s[20:23], 0 idxen ; E00C2000 80050F0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v15 ; 10001EFF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v3, 4, v0 ; 34060084 v_add_i32_e32 v0, 0x64, v3 ; 4A0006FF 00000064 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_add_f32_e32 v4, 0, v6 ; 06080C80 v_add_i32_e32 v1, 0x60, v3 ; 4A0206FF 00000060 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v4, v1, v0 ; D2820000 04020304 v_add_f32_e32 v5, 0, v8 ; 060A1080 v_add_i32_e32 v1, 0x68, v3 ; 4A0206FF 00000068 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v5, v1, v0 ; D2820000 04020305 v_mad_f32 v6, 0, v6, 1.0 ; D2820006 03CA0C80 v_add_i32_e32 v1, 0x6c, v3 ; 4A0206FF 0000006C buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v1, v0 ; D2820000 04020306 buffer_load_format_xyzw v[7:10], v11, s[16:19], 0 idxen ; E00C2000 8004070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_add_i32_e32 v1, 0x54, v3 ; 4A0206FF 00000054 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_add_i32_e32 v12, 0x50, v3 ; 4A1806FF 00000050 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v4, v12, v1 ; D2820001 04061904 v_add_i32_e32 v12, 0x58, v3 ; 4A1806FF 00000058 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v5, v12, v1 ; D2820001 04061905 v_add_i32_e32 v12, 0x5c, v3 ; 4A1806FF 0000005C buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v6, v12, v1 ; D2820001 04061906 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_i32_e32 v12, 0x44, v3 ; 4A1806FF 00000044 buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v12, v12, v2 ; 1018050C v_add_i32_e32 v13, 64, v3 ; 4A1A06C0 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v4, v13, v12 ; D282000C 04321B04 v_add_i32_e32 v13, 0x48, v3 ; 4A1A06FF 00000048 buffer_load_dword v13, v13, s[0:3], 0 offen ; E0301000 80000D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, v5, v13, v12 ; D282000C 04321B05 v_add_i32_e32 v3, 0x4c, v3 ; 4A0606FF 0000004C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v6, v3, v12 ; D2820003 04320706 v_mul_f32_e32 v3, v7, v3 ; 10060707 buffer_load_format_xyzw v[11:14], v11, s[4:7], 0 idxen ; E00C2000 80010B0B v_cmp_gt_f32_e64 s[4:5], v8, 0 ; D0080004 00010108 v_cndmask_b32_e64 v19, 0, -1, s[4:5] ; D2000813 00118280 v_cmp_ne_i32_e64 s[20:21], v19, 0 ; D10A0014 00010113 s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_buffer_load_dword s7, s[0:3], 0xe ; C203810E s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_buffer_load_dword s12, s[0:3], 0xb ; C206010B s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s8, s[0:3], 0x9 ; C2040109 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_buffer_load_dword s17, s[0:3], 0x7 ; C2088107 s_buffer_load_dword s15, s[0:3], 0x6 ; C2078106 s_buffer_load_dword s13, s[0:3], 0x5 ; C2068105 s_buffer_load_dword s11, s[0:3], 0x4 ; C2058104 s_buffer_load_dword s19, s[0:3], 0x3 ; C2098103 s_buffer_load_dword s18, s[0:3], 0x2 ; C2090102 s_buffer_load_dword s16, s[0:3], 0x1 ; C2080101 s_buffer_load_dword s14, s[0:3], 0x0 ; C2070100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[20:21], s[20:21] ; BE942414 s_xor_b64 s[20:21], exec, s[20:21] ; 8994147E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v19, 0x40400000, v16 ; 102620FF 40400000 v_cvt_i32_f32_e32 v19, v19 ; 7E261113 v_lshlrev_b32_e32 v19, 4, v19 ; 34262684 v_add_i32_e32 v20, 0x60, v19 ; 4A2826FF 00000060 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x64, v19 ; 4A2A26FF 00000064 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x68, v19 ; 4A2A26FF 00000068 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v21, 0x6c, v19 ; 4A2A26FF 0000006C buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v6, v21, v20 ; D2820014 04522B06 v_mad_f32 v0, v8, v20, v0 ; D2820000 04022908 v_add_i32_e32 v20, 0x50, v19 ; 4A2826FF 00000050 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x54, v19 ; 4A2A26FF 00000054 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x58, v19 ; 4A2A26FF 00000058 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v21, 0x5c, v19 ; 4A2A26FF 0000005C buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v6, v21, v20 ; D2820014 04522B06 v_mad_f32 v1, v8, v20, v1 ; D2820001 04062908 v_add_i32_e32 v20, 64, v19 ; 4A2826C0 buffer_load_dword v20, v20, s[0:3], 0 offen ; E0301000 80001414 v_add_i32_e32 v21, 0x44, v19 ; 4A2A26FF 00000044 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v21, v21, v2 ; 102A0515 v_mad_f32 v20, v4, v20, v21 ; D2820014 04562904 v_add_i32_e32 v21, 0x48, v19 ; 4A2A26FF 00000048 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, v5, v21, v20 ; D2820014 04522B05 v_add_i32_e32 v19, 0x4c, v19 ; 4A2626FF 0000004C buffer_load_dword v19, v19, s[0:3], 0 offen ; E0301000 80001313 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, v6, v19, v20 ; D2820013 04522706 v_mad_f32 v3, v8, v19, v3 ; D2820003 040E2708 v_cmp_gt_f32_e64 s[22:23], v9, 0 ; D0080016 00010109 v_cndmask_b32_e64 v19, 0, -1, s[22:23] ; D2000813 00598280 v_cmp_ne_i32_e64 s[22:23], v19, 0 ; D10A0016 00010113 s_and_saveexec_b64 s[22:23], s[22:23] ; BE962416 s_xor_b64 s[22:23], exec, s[22:23] ; 8996167E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v15, 0x40400000, v17 ; 101E22FF 40400000 v_cvt_i32_f32_e32 v15, v15 ; 7E1E110F v_lshlrev_b32_e32 v15, 4, v15 ; 341E1E84 v_add_i32_e32 v16, 0x60, v15 ; 4A201EFF 00000060 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x64, v15 ; 4A221EFF 00000064 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v17, v17, v2 ; 10220511 v_mad_f32 v16, v4, v16, v17 ; D2820010 04462104 v_add_i32_e32 v17, 0x68, v15 ; 4A221EFF 00000068 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v5, v17, v16 ; D2820010 04422305 v_add_i32_e32 v17, 0x6c, v15 ; 4A221EFF 0000006C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v6, v17, v16 ; D2820010 04422306 v_mad_f32 v0, v9, v16, v0 ; D2820000 04022109 v_add_i32_e32 v16, 0x50, v15 ; 4A201EFF 00000050 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x54, v15 ; 4A221EFF 00000054 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v17, v17, v2 ; 10220511 v_mad_f32 v16, v4, v16, v17 ; D2820010 04462104 v_add_i32_e32 v17, 0x58, v15 ; 4A221EFF 00000058 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v5, v17, v16 ; D2820010 04422305 v_add_i32_e32 v17, 0x5c, v15 ; 4A221EFF 0000005C buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v16, v6, v17, v16 ; D2820010 04422306 v_mad_f32 v1, v9, v16, v1 ; D2820001 04062109 v_add_i32_e32 v16, 64, v15 ; 4A201EC0 buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 v_add_i32_e32 v17, 0x44, v15 ; 4A221EFF 00000044 buffer_load_dword v17, v17, s[0:3], 0 offen ; E0301000 80001111 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v17, v2 ; 10040511 v_mad_f32 v2, v4, v16, v2 ; D2820002 040A2104 v_add_i32_e32 v4, 0x48, v15 ; 4A081EFF 00000048 buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v5, v4, v2 ; D2820002 040A0905 v_add_i32_e32 v4, 0x4c, v15 ; 4A081EFF 0000004C buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v6, v4, v2 ; D2820002 040A0906 v_mad_f32 v3, v9, v2, v3 ; D2820003 040E0509 s_or_b64 exec, exec, s[22:23] ; 88FE167E s_or_b64 exec, exec, s[20:21] ; 88FE147E v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 32, 0, 0, 0, v11, v12, v0, v2 ; F800020F 02000C0B s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v2, s17, v1 ; 10040211 v_mad_f32 v2, v3, s19, v2 ; D2820002 04082703 v_mad_f32 v2, v0, s12, v2 ; D2820002 04081900 v_add_f32_e32 v2, s9, v2 ; 06040409 v_mul_f32_e32 v4, s15, v1 ; 1008020F v_mad_f32 v4, v3, s18, v4 ; D2820004 04102503 v_mad_f32 v4, v0, s10, v4 ; D2820004 04101500 v_add_f32_e32 v4, s7, v4 ; 06080807 v_mul_f32_e32 v5, s13, v1 ; 100A020D v_mad_f32 v5, v3, s16, v5 ; D2820005 04142103 v_mad_f32 v5, v0, s8, v5 ; D2820005 04141100 v_add_f32_e32 v5, s5, v5 ; 060A0A05 v_mul_f32_e32 v1, s11, v1 ; 1002020B v_mad_f32 v1, v3, s14, v1 ; D2820001 04041D03 v_mad_f32 v0, v0, s6, v1 ; D2820000 04040D00 v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 12, 0, 1, 0, v0, v5, v4, v2 ; F80008CF 02040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.1000, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].x, -CONST[0].xxxx, IN[0].zzzz 1: MOV TEMP[1].xy, IN[0].xyyy 2: TEX TEMP[1].w, TEMP[1], SAMP[0], 2D 3: POW TEMP[1].w, TEMP[1].wwww, IMM[0].yyyy 4: MUL TEMP[2].y, TEMP[1].wwww, IMM[0].zzzz 5: FSGE TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 6: UIF TEMP[0].xxxx :0 7: MOV TEMP[0].x, TEMP[1].wwww 8: ELSE :0 9: MOV TEMP[0].x, TEMP[2].yyyy 10: ENDIF 11: ADD TEMP[1].y, -TEMP[1].wwww, IMM[1].xxxx 12: FSGE TEMP[1].x, TEMP[1].yyyy, IMM[0].wwww 13: UIF TEMP[1].xxxx :0 14: MOV TEMP[1].x, IMM[0].wwww 15: ELSE :0 16: MOV TEMP[1].x, TEMP[0].xxxx 17: ENDIF 18: MOV TEMP[0].w, TEMP[1].xxxx 19: MOV TEMP[0].xyz, IMM[0].yyyy 20: MOV OUT[0], TEMP[0] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %26 = load <8 x i32> addrspace(2)* %25, !tbaa !0 %27 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %28 = load <4 x i32> addrspace(2)* %27, !tbaa !0 %29 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %32 = fsub float -0.000000e+00, %24 %33 = fadd float %32, %31 %34 = bitcast float %29 to i32 %35 = bitcast float %30 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %26 to <32 x i8> %39 = bitcast <4 x i32> %28 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 3 %42 = call float @llvm.pow.f32(float %41, float 1.000000e+00) %43 = fmul float %42, 5.000000e-01 %44 = fcmp oge float %33, 0.000000e+00 %45 = sext i1 %44 to i32 %46 = bitcast i32 %45 to float %47 = bitcast float %46 to i32 %48 = icmp ne i32 %47, 0 %. = select i1 %48, float %42, float %43 %49 = fsub float -0.000000e+00, %42 %50 = fadd float %49, 0x3FB99999A0000000 %51 = fcmp oge float %50, 0.000000e+00 %52 = sext i1 %51 to i32 %53 = bitcast i32 %52 to float %54 = bitcast float %53 to i32 %55 = icmp ne i32 %54, 0 %temp4.0 = select i1 %55, float 0.000000e+00, float %. %56 = call i32 @llvm.SI.packf16(float 1.000000e+00, float 1.000000e+00) %57 = bitcast i32 %56 to float %58 = call i32 @llvm.SI.packf16(float 1.000000e+00, float %temp4.0) %59 = bitcast i32 %58 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %57, float %59, float %57, float %59) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800800 00430202 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_mul_f32_e32 v3, 0.5, v2 ; 100604F0 v_interp_p1_f32 v4, v0, 2, 0, [m0] ; C8100200 v_interp_p2_f32 v4, [v4], v1, 2, 0, [m0] ; C8110201 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s0, s[0:3], 0x0 ; C2000100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v0, s0, v4 ; 0A000800 v_cmp_ge_f32_e64 s[0:1], v0, 0 ; D00C0000 00010100 v_cndmask_b32_e64 v0, 0, -1, s[0:1] ; D2000000 00018280 v_cmp_ne_i32_e64 s[0:1], v0, 0 ; D10A0000 00010100 v_cndmask_b32_e64 v0, v3, v2, s[0:1] ; D2000000 18020503 v_sub_f32_e32 v1, 0x3dcccccd, v2 ; 080204FF 3DCCCCCD v_cmp_ge_f32_e64 s[0:1], v1, 0 ; D00C0000 00010101 v_cndmask_b32_e64 v1, 0, -1, s[0:1] ; D2000801 00018280 v_cmp_ne_i32_e64 s[0:1], v1, 0 ; D10A0000 00010101 v_cndmask_b32_e64 v0, v0, 0, s[0:1] ; D2000000 00010100 v_cvt_pkrtz_f16_f32_e32 v0, 1.0, v0 ; 5E0000F2 v_cvt_pkrtz_f16_f32_e64 v1, 1.0, 1.0 ; D25E0001 0001E4F2 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xyzx, IMM[0].xxxy, IMM[0].yyyx 1: MOV TEMP[1].xy, IN[1].xyxx 2: MOV TEMP[1].zw, IMM[0].xxyx 3: MOV OUT[1], TEMP[1] 4: MOV OUT[0], TEMP[0] 5: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = add i32 %5, %7 %14 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %12, i32 0, i32 %13) %15 = extractelement <4 x float> %14, i32 0 %16 = extractelement <4 x float> %14, i32 1 %17 = extractelement <4 x float> %14, i32 2 %18 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %19 = load <16 x i8> addrspace(2)* %18, !tbaa !0 %20 = add i32 %5, %7 %21 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %19, i32 0, i32 %20) %22 = extractelement <4 x float> %21, i32 0 %23 = extractelement <4 x float> %21, i32 1 %24 = fmul float %15, 1.000000e+00 %25 = fadd float %24, 0.000000e+00 %26 = fmul float %16, 1.000000e+00 %27 = fadd float %26, 0.000000e+00 %28 = fmul float %17, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %15, 0.000000e+00 %31 = fadd float %30, 1.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %22, float %23, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %25, float %27, float %29, float %31) ret void } ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[0:3], s[8:9], 0x0 ; C0800900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[4:7], 0 idxen ; E00C2000 80010100 v_mov_b32_e32 v5, 1.0 ; 7E0A02F2 v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v6, v5 ; F800020F 05060201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[0:3], 0 idxen ; E00C2000 80000000 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 0, v0, 1.0 ; D2820004 03CA0080 v_add_f32_e32 v5, 0, v2 ; 060A0480 v_add_f32_e32 v6, 0, v1 ; 060C0280 v_add_f32_e32 v0, 0, v0 ; 06000080 exp 15, 12, 0, 1, 0, v0, v6, v5, v4 ; F80008CF 04050600 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.5000} IMM[1] FLT32 { 0.1250, 0.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[0], IN[0] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 3: MOV TEMP[2].xy, TEMP[1].xyyy 4: TEX TEMP[2].zw, TEMP[2], SAMP[0], 2D 5: MOV TEMP[0].zw, TEMP[2].wwzw 6: ADD TEMP[3].xy, CONST[0].xwzw, IN[0] 7: MOV TEMP[0].xy, TEMP[3].xyxx 8: MAD TEMP[1].xy, TEMP[0], IMM[0].xyxx, IMM[0].zxzz 9: MOV TEMP[3].xy, TEMP[1].xyyy 10: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 11: ADD TEMP[2].x, TEMP[2].wwww, TEMP[3].wwww 12: ADD TEMP[3].yz, CONST[0].xzyw, IN[0].xxyw 13: MOV TEMP[0].yz, TEMP[3].zyzz 14: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 15: MOV TEMP[3].xy, TEMP[1].xyyy 16: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 17: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 18: ADD TEMP[3].yz, CONST[0].xzww, IN[0].xxyw 19: MOV TEMP[0].yz, TEMP[3].zyzz 20: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 21: MOV TEMP[3].xy, TEMP[1].xyyy 22: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 23: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 24: MAD TEMP[3].yz, CONST[0].xxyw, IMM[0].wwww, IN[0].xxyw 25: MOV TEMP[0].yz, TEMP[3].zyzz 26: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 27: MOV TEMP[3].xy, TEMP[1].xyyy 28: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 29: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 30: MAD TEMP[3].yz, CONST[0].xxww, IMM[0].wwww, IN[0].xxyw 31: MOV TEMP[0].yz, TEMP[3].zyzz 32: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 33: MOV TEMP[3].xy, TEMP[1].xyyy 34: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 35: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 36: MAD TEMP[3].yz, CONST[0].xzyw, IMM[0].wwww, IN[0].xxyw 37: MOV TEMP[0].yz, TEMP[3].zyzz 38: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 39: MOV TEMP[3].xy, TEMP[1].xyyy 40: TEX TEMP[3].w, TEMP[3], SAMP[0], 2D 41: ADD TEMP[2].x, TEMP[2].xxxx, TEMP[3].wwww 42: MAD TEMP[3].yz, CONST[0].xzww, IMM[0].wwww, IN[0].xxyw 43: MOV TEMP[0].yz, TEMP[3].zyzz 44: MAD TEMP[1].xy, TEMP[0].yzzw, IMM[0].xyxx, IMM[0].zxzz 45: MOV TEMP[0].xy, TEMP[1].xyyy 46: TEX TEMP[0].w, TEMP[0], SAMP[0], 2D 47: ADD TEMP[0].x, TEMP[2].xxxx, TEMP[0].wwww 48: MUL TEMP[0], TEMP[0].xxxx, IMM[1].xxxx 49: MOV OUT[0], TEMP[0] 50: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %29 = load <8 x i32> addrspace(2)* %28, !tbaa !0 %30 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %31 = load <4 x i32> addrspace(2)* %30, !tbaa !0 %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %34 = fadd float %24, %32 %35 = fadd float %25, %33 %36 = fmul float %34, 1.000000e+00 %37 = fadd float %36, 0.000000e+00 %38 = fmul float %35, -1.000000e+00 %39 = fadd float %38, 1.000000e+00 %40 = bitcast float %37 to i32 %41 = bitcast float %39 to i32 %42 = insertelement <2 x i32> undef, i32 %40, i32 0 %43 = insertelement <2 x i32> %42, i32 %41, i32 1 %44 = bitcast <8 x i32> %29 to <32 x i8> %45 = bitcast <4 x i32> %31 to <16 x i8> %46 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %43, <32 x i8> %44, <16 x i8> %45, i32 2) %47 = extractelement <4 x float> %46, i32 3 %48 = fadd float %24, %32 %49 = fadd float %27, %33 %50 = fmul float %48, 1.000000e+00 %51 = fadd float %50, 0.000000e+00 %52 = fmul float %49, -1.000000e+00 %53 = fadd float %52, 1.000000e+00 %54 = bitcast float %51 to i32 %55 = bitcast float %53 to i32 %56 = insertelement <2 x i32> undef, i32 %54, i32 0 %57 = insertelement <2 x i32> %56, i32 %55, i32 1 %58 = bitcast <8 x i32> %29 to <32 x i8> %59 = bitcast <4 x i32> %31 to <16 x i8> %60 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %57, <32 x i8> %58, <16 x i8> %59, i32 2) %61 = extractelement <4 x float> %60, i32 3 %62 = fadd float %47, %61 %63 = fadd float %26, %32 %64 = fadd float %25, %33 %65 = fmul float %63, 1.000000e+00 %66 = fadd float %65, 0.000000e+00 %67 = fmul float %64, -1.000000e+00 %68 = fadd float %67, 1.000000e+00 %69 = bitcast float %66 to i32 %70 = bitcast float %68 to i32 %71 = insertelement <2 x i32> undef, i32 %69, i32 0 %72 = insertelement <2 x i32> %71, i32 %70, i32 1 %73 = bitcast <8 x i32> %29 to <32 x i8> %74 = bitcast <4 x i32> %31 to <16 x i8> %75 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %72, <32 x i8> %73, <16 x i8> %74, i32 2) %76 = extractelement <4 x float> %75, i32 3 %77 = fadd float %62, %76 %78 = fadd float %26, %32 %79 = fadd float %27, %33 %80 = fmul float %78, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %79, -1.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = bitcast float %81 to i32 %85 = bitcast float %83 to i32 %86 = insertelement <2 x i32> undef, i32 %84, i32 0 %87 = insertelement <2 x i32> %86, i32 %85, i32 1 %88 = bitcast <8 x i32> %29 to <32 x i8> %89 = bitcast <4 x i32> %31 to <16 x i8> %90 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %87, <32 x i8> %88, <16 x i8> %89, i32 2) %91 = extractelement <4 x float> %90, i32 3 %92 = fadd float %77, %91 %93 = fmul float %24, 5.000000e-01 %94 = fadd float %93, %32 %95 = fmul float %25, 5.000000e-01 %96 = fadd float %95, %33 %97 = fmul float %94, 1.000000e+00 %98 = fadd float %97, 0.000000e+00 %99 = fmul float %96, -1.000000e+00 %100 = fadd float %99, 1.000000e+00 %101 = bitcast float %98 to i32 %102 = bitcast float %100 to i32 %103 = insertelement <2 x i32> undef, i32 %101, i32 0 %104 = insertelement <2 x i32> %103, i32 %102, i32 1 %105 = bitcast <8 x i32> %29 to <32 x i8> %106 = bitcast <4 x i32> %31 to <16 x i8> %107 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %104, <32 x i8> %105, <16 x i8> %106, i32 2) %108 = extractelement <4 x float> %107, i32 3 %109 = fadd float %92, %108 %110 = fmul float %24, 5.000000e-01 %111 = fadd float %110, %32 %112 = fmul float %27, 5.000000e-01 %113 = fadd float %112, %33 %114 = fmul float %111, 1.000000e+00 %115 = fadd float %114, 0.000000e+00 %116 = fmul float %113, -1.000000e+00 %117 = fadd float %116, 1.000000e+00 %118 = bitcast float %115 to i32 %119 = bitcast float %117 to i32 %120 = insertelement <2 x i32> undef, i32 %118, i32 0 %121 = insertelement <2 x i32> %120, i32 %119, i32 1 %122 = bitcast <8 x i32> %29 to <32 x i8> %123 = bitcast <4 x i32> %31 to <16 x i8> %124 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %121, <32 x i8> %122, <16 x i8> %123, i32 2) %125 = extractelement <4 x float> %124, i32 3 %126 = fadd float %109, %125 %127 = fmul float %26, 5.000000e-01 %128 = fadd float %127, %32 %129 = fmul float %25, 5.000000e-01 %130 = fadd float %129, %33 %131 = fmul float %128, 1.000000e+00 %132 = fadd float %131, 0.000000e+00 %133 = fmul float %130, -1.000000e+00 %134 = fadd float %133, 1.000000e+00 %135 = bitcast float %132 to i32 %136 = bitcast float %134 to i32 %137 = insertelement <2 x i32> undef, i32 %135, i32 0 %138 = insertelement <2 x i32> %137, i32 %136, i32 1 %139 = bitcast <8 x i32> %29 to <32 x i8> %140 = bitcast <4 x i32> %31 to <16 x i8> %141 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %138, <32 x i8> %139, <16 x i8> %140, i32 2) %142 = extractelement <4 x float> %141, i32 3 %143 = fadd float %126, %142 %144 = fmul float %26, 5.000000e-01 %145 = fadd float %144, %32 %146 = fmul float %27, 5.000000e-01 %147 = fadd float %146, %33 %148 = fmul float %145, 1.000000e+00 %149 = fadd float %148, 0.000000e+00 %150 = fmul float %147, -1.000000e+00 %151 = fadd float %150, 1.000000e+00 %152 = bitcast float %149 to i32 %153 = bitcast float %151 to i32 %154 = insertelement <2 x i32> undef, i32 %152, i32 0 %155 = insertelement <2 x i32> %154, i32 %153, i32 1 %156 = bitcast <8 x i32> %29 to <32 x i8> %157 = bitcast <4 x i32> %31 to <16 x i8> %158 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %155, <32 x i8> %156, <16 x i8> %157, i32 2) %159 = extractelement <4 x float> %158, i32 3 %160 = fadd float %143, %159 %161 = fmul float %160, 1.250000e-01 %162 = fmul float %160, 1.250000e-01 %163 = fmul float %160, 1.250000e-01 %164 = fmul float %160, 1.250000e-01 %165 = call i32 @llvm.SI.packf16(float %161, float %162) %166 = bitcast i32 %165 to float %167 = call i32 @llvm.SI.packf16(float %163, float %164) %168 = bitcast i32 %167 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %166, float %168, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x3 ; C2040103 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s8, v2 ; 06060408 v_sub_f32_e32 v4, 1.0, v3 ; 080806F2 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_buffer_load_dword s9, s[0:3], 0x0 ; C2048100 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s9, v5 ; 06000A09 v_add_f32_e32 v3, 0, v0 ; 06060080 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v0, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800800 00640003 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v1, s4, v2 ; 06020404 v_sub_f32_e32 v7, 1.0, v1 ; 080E02F2 v_mov_b32_e32 v8, v3 ; 7E100303 v_mov_b32_e32 v9, v4 ; 7E120304 v_mov_b32_e32 v9, v7 ; 7E120307 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[8:9], s[16:23], s[12:15] ; F0800800 00640108 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v0, v1 ; 06000300 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s0, v5 ; 06020A00 v_add_f32_e32 v6, 0, v1 ; 060C0280 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800800 00640106 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mov_b32_e32 v7, v4 ; 7E0E0304 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800800 00640106 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mad_f32 v1, 0.5, s4, v2 ; D2820001 040808F0 v_sub_f32_e32 v4, 1.0, v1 ; 080802F2 v_mad_f32 v1, 0.5, s9, v5 ; D2820001 041412F0 v_add_f32_e32 v3, 0, v1 ; 06060280 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800800 00640103 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mad_f32 v1, 0.5, s8, v2 ; D2820001 040810F0 v_sub_f32_e32 v1, 1.0, v1 ; 080202F2 v_mov_b32_e32 v6, v3 ; 7E0C0303 v_mov_b32_e32 v7, v4 ; 7E0E0304 v_mov_b32_e32 v7, v1 ; 7E0E0301 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[16:23], s[12:15] ; F0800800 00640206 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v2, v0 ; 06000102 v_mad_f32 v2, 0.5, s0, v5 ; D2820002 041400F0 v_add_f32_e32 v3, 0, v2 ; 06060480 image_sample v2, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800800 00640203 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v2, v0 ; 06000102 v_mov_b32_e32 v4, v1 ; 7E080301 image_sample v1, 8, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[16:23], s[12:15] ; F0800800 00640103 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v1, v0 ; 06000101 v_mul_f32_e32 v0, 0x3e000000, v0 ; 100000FF 3E000000 v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; 5E000100 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..7] DCL TEMP[0..1], LOCAL 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: ADD TEMP[0], TEMP[0], CONST[7] 3: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 4: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 5: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 6: MAD TEMP[0], TEMP[0].wwww, CONST[3], TEMP[1] 7: MOV OUT[1], IN[1] 8: MOV OUT[0], TEMP[0] 9: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %48 = load <16 x i8> addrspace(2)* %47, !tbaa !0 %49 = add i32 %5, %7 %50 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %48, i32 0, i32 %49) %51 = extractelement <4 x float> %50, i32 0 %52 = extractelement <4 x float> %50, i32 1 %53 = extractelement <4 x float> %50, i32 2 %54 = extractelement <4 x float> %50, i32 3 %55 = fmul float %33, %46 %56 = fmul float %34, %46 %57 = fmul float %35, %46 %58 = fmul float %36, %46 %59 = fmul float %45, %29 %60 = fadd float %59, %55 %61 = fmul float %45, %30 %62 = fadd float %61, %56 %63 = fmul float %45, %31 %64 = fadd float %63, %57 %65 = fmul float %45, %32 %66 = fadd float %65, %58 %67 = fadd float %60, %37 %68 = fadd float %62, %38 %69 = fadd float %64, %39 %70 = fadd float %66, %40 %71 = fmul float %68, %17 %72 = fmul float %68, %18 %73 = fmul float %68, %19 %74 = fmul float %68, %20 %75 = fmul float %67, %13 %76 = fadd float %75, %71 %77 = fmul float %67, %14 %78 = fadd float %77, %72 %79 = fmul float %67, %15 %80 = fadd float %79, %73 %81 = fmul float %67, %16 %82 = fadd float %81, %74 %83 = fmul float %69, %21 %84 = fadd float %83, %76 %85 = fmul float %69, %22 %86 = fadd float %85, %78 %87 = fmul float %69, %23 %88 = fadd float %87, %80 %89 = fmul float %69, %24 %90 = fadd float %89, %82 %91 = fmul float %70, %25 %92 = fadd float %91, %84 %93 = fmul float %70, %26 %94 = fadd float %93, %86 %95 = fmul float %70, %27 %96 = fadd float %95, %88 %97 = fmul float %70, %28 %98 = fadd float %97, %90 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %51, float %52, float %53, float %54) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %92, float %94, float %96, float %98) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v2, v3, v4 ; F800020F 04030201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v5 ; 100C0A04 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v7 ; 060E0E04 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v7, s4, v6 ; D2820006 04180907 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v1 ; 10100204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v8 ; D2820000 04200900 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v6 ; D2820001 04180900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v5 ; 10040A04 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v7, s4, v2 ; D2820002 04080907 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v5 ; 10060A04 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v7, s4, v3 ; D2820003 040C0907 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v4, s4, v5 ; D2820004 04140904 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v7, s4, v4 ; D2820004 04100907 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v4 ; D2820000 04100100 exp 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL CONST[0] DCL TEMP[0..1], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0], CONST[0], IN[0] 1: MAD TEMP[1], TEMP[0].wwww, IMM[0].xxxy, IMM[0].yyyx 2: MUL TEMP[0], TEMP[0], TEMP[1] 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 12) %28 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %29 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %30 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %32 = fmul float %24, %28 %33 = fmul float %25, %29 %34 = fmul float %26, %30 %35 = fmul float %27, %31 %36 = fmul float %35, 1.000000e+00 %37 = fadd float %36, 0.000000e+00 %38 = fmul float %35, 1.000000e+00 %39 = fadd float %38, 0.000000e+00 %40 = fmul float %35, 1.000000e+00 %41 = fadd float %40, 0.000000e+00 %42 = fmul float %35, 0.000000e+00 %43 = fadd float %42, 1.000000e+00 %44 = fmul float %32, %37 %45 = fmul float %33, %39 %46 = fmul float %34, %41 %47 = fmul float %35, %43 %48 = call i32 @llvm.SI.packf16(float %44, float %45) %49 = bitcast i32 %48 to float %50 = call i32 @llvm.SI.packf16(float %46, float %47) %51 = bitcast i32 %50 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %49, float %51, float %49, float %51) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 3, 0, [m0] ; C8080300 v_interp_p2_f32 v2, [v2], v1, 3, 0, [m0] ; C8090301 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, s4, v2, 0 ; D2820003 02020404 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s5, v4 ; 10080805 v_mul_f32_e32 v4, v3, v4 ; 10080903 v_interp_p1_f32 v5, v0, 0, 0, [m0] ; C8140000 v_interp_p2_f32 v5, [v5], v1, 0, 0, [m0] ; C8150001 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s5, v5 ; 100A0A05 v_mul_f32_e32 v5, v3, v5 ; 100A0B03 v_cvt_pkrtz_f16_f32_e32 v4, v5, v4 ; 5E080905 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s0, v5 ; 10000A00 v_mul_f32_e32 v0, v3, v0 ; 10000103 v_mul_f32_e32 v1, s4, v2 ; 10020404 v_mad_f32 v2, 0, v1, 1.0 ; D2820002 03CA0280 v_mul_f32_e32 v1, v2, v1 ; 10020302 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v4, v0, v4, v0 ; F8001C0F 00040004 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..15] DCL TEMP[0..3], LOCAL 0: MUL TEMP[0], CONST[5], IN[0].yyyy 1: MAD TEMP[0], IN[0].xxxx, CONST[4], TEMP[0] 2: MAD TEMP[0], IN[0].zzzz, CONST[6], TEMP[0] 3: MAD TEMP[0], IN[0].wwww, CONST[7], TEMP[0] 4: MUL TEMP[1], TEMP[0].yyyy, CONST[1] 5: MAD TEMP[1], TEMP[0].xxxx, CONST[0], TEMP[1] 6: MAD TEMP[1], TEMP[0].zzzz, CONST[2], TEMP[1] 7: MAD TEMP[1], TEMP[0].wwww, CONST[3], TEMP[1] 8: MUL TEMP[2].xy, CONST[13], IN[1].yyyy 9: MOV TEMP[0].xy, TEMP[2].xyxx 10: MAD TEMP[2].xy, IN[1].xxxx, CONST[12], TEMP[0] 11: MOV TEMP[0].xy, TEMP[2].xyxx 12: ADD TEMP[2].xy, TEMP[0], CONST[15] 13: MOV TEMP[2].xy, TEMP[2].xyxx 14: MUL TEMP[3].xy, CONST[9], IN[1].yyyy 15: MOV TEMP[0].xy, TEMP[3].xyxx 16: MAD TEMP[0].xy, IN[1].xxxx, CONST[8], TEMP[0] 17: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[11].xyxy 18: MOV TEMP[2].zw, TEMP[0].wwzw 19: MOV OUT[1], TEMP[2] 20: MOV OUT[0], TEMP[1] 21: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = extractelement <4 x float> %60, i32 3 %65 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %66 = load <16 x i8> addrspace(2)* %65, !tbaa !0 %67 = add i32 %5, %7 %68 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %66, i32 0, i32 %67) %69 = extractelement <4 x float> %68, i32 0 %70 = extractelement <4 x float> %68, i32 1 %71 = fmul float %33, %62 %72 = fmul float %34, %62 %73 = fmul float %35, %62 %74 = fmul float %36, %62 %75 = fmul float %61, %29 %76 = fadd float %75, %71 %77 = fmul float %61, %30 %78 = fadd float %77, %72 %79 = fmul float %61, %31 %80 = fadd float %79, %73 %81 = fmul float %61, %32 %82 = fadd float %81, %74 %83 = fmul float %63, %37 %84 = fadd float %83, %76 %85 = fmul float %63, %38 %86 = fadd float %85, %78 %87 = fmul float %63, %39 %88 = fadd float %87, %80 %89 = fmul float %63, %40 %90 = fadd float %89, %82 %91 = fmul float %64, %41 %92 = fadd float %91, %84 %93 = fmul float %64, %42 %94 = fadd float %93, %86 %95 = fmul float %64, %43 %96 = fadd float %95, %88 %97 = fmul float %64, %44 %98 = fadd float %97, %90 %99 = fmul float %94, %17 %100 = fmul float %94, %18 %101 = fmul float %94, %19 %102 = fmul float %94, %20 %103 = fmul float %92, %13 %104 = fadd float %103, %99 %105 = fmul float %92, %14 %106 = fadd float %105, %100 %107 = fmul float %92, %15 %108 = fadd float %107, %101 %109 = fmul float %92, %16 %110 = fadd float %109, %102 %111 = fmul float %96, %21 %112 = fadd float %111, %104 %113 = fmul float %96, %22 %114 = fadd float %113, %106 %115 = fmul float %96, %23 %116 = fadd float %115, %108 %117 = fmul float %96, %24 %118 = fadd float %117, %110 %119 = fmul float %98, %25 %120 = fadd float %119, %112 %121 = fmul float %98, %26 %122 = fadd float %121, %114 %123 = fmul float %98, %27 %124 = fadd float %123, %116 %125 = fmul float %98, %28 %126 = fadd float %125, %118 %127 = fmul float %53, %70 %128 = fmul float %54, %70 %129 = fmul float %69, %51 %130 = fadd float %129, %127 %131 = fmul float %69, %52 %132 = fadd float %131, %128 %133 = fadd float %130, %55 %134 = fadd float %132, %56 %135 = fmul float %47, %70 %136 = fmul float %48, %70 %137 = fmul float %69, %45 %138 = fadd float %137, %135 %139 = fmul float %69, %46 %140 = fadd float %139, %136 %141 = fadd float %138, %49 %142 = fadd float %140, %50 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %133, float %134, float %141, float %142) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %120, float %122, float %124, float %126) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x25 ; C2040125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v2 ; 100A0408 s_buffer_load_dword s8, s[0:3], 0x21 ; C2040121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s8, v5 ; D2820005 04141101 s_buffer_load_dword s8, s[0:3], 0x2d ; C204012D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s8, v5 ; 060A0A08 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v2 ; 100C0408 s_buffer_load_dword s8, s[0:3], 0x20 ; C2040120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s8, v6 ; D2820006 04181101 s_buffer_load_dword s8, s[0:3], 0x2c ; C204012C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s8, v6 ; 060C0C08 s_buffer_load_dword s8, s[0:3], 0x35 ; C2040135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v2 ; 100E0408 s_buffer_load_dword s8, s[0:3], 0x31 ; C2040131 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s8, v7 ; D2820007 041C1101 s_buffer_load_dword s8, s[0:3], 0x3d ; C204013D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s8, v7 ; 060E0E08 s_buffer_load_dword s8, s[0:3], 0x34 ; C2040134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s8, v2 ; 10100408 s_buffer_load_dword s8, s[0:3], 0x30 ; C2040130 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v1, s8, v8 ; D2820001 04201101 s_buffer_load_dword s8, s[0:3], 0x3c ; C204013C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s8, v1 ; 06020208 exp 15, 32, 0, 0, 0, v1, v7, v6, v5 ; F800020F 05060701 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v3, s4, v4 ; D2820004 04100903 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s4, v5 ; D2820005 04140903 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v5 ; 100C0A04 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v3, s4, v7 ; D2820007 041C0903 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v7, s4, v6 ; D2820006 04180907 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v1 ; 10100204 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v0, s4, v8 ; D2820008 04200900 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v2, s4, v8 ; D2820008 04200902 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s4, v8 ; D2820000 04200903 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v6 ; D2820001 04180900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v5 ; 10040A04 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v7, s4, v2 ; D2820002 04080907 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v5 ; 10060A04 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v7, s4, v3 ; D2820003 040C0907 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v4, s4, v5 ; D2820004 04140904 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v7, s4, v4 ; D2820004 04100907 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v4 ; D2820000 04100100 exp 15, 12, 0, 1, 0, v0, v3, v2, v1 ; F80008CF 01020300 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { -0.0010, 0.0000, -1.0000, 1.0000} IMM[1] FLT32 { 1.0000, -0.0000, 0.0000, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: ADD TEMP[1].x, TEMP[0].wwww, IMM[0].xxxx 3: FSGE TEMP[2].x, TEMP[1].xxxx, IMM[0].yyyy 4: UIF TEMP[2].xxxx :0 5: MOV TEMP[2].x, IMM[0].yyyy 6: ELSE :0 7: MOV TEMP[2].x, IMM[0].zzzz 8: ENDIF 9: MOV TEMP[2].x, TEMP[2].xxxx 10: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 11: UIF TEMP[3].xxxx :0 12: MOV TEMP[3].x, IMM[0].yyyy 13: ELSE :0 14: MOV TEMP[3].x, IMM[0].zzzz 15: ENDIF 16: MOV TEMP[2].y, TEMP[3].xxxx 17: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 18: UIF TEMP[3].xxxx :0 19: MOV TEMP[3].x, IMM[0].yyyy 20: ELSE :0 21: MOV TEMP[3].x, IMM[0].zzzz 22: ENDIF 23: MOV TEMP[2].z, TEMP[3].xxxx 24: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].yyyy 25: UIF TEMP[3].xxxx :0 26: ELSE :0 27: ENDIF 28: FSLT TEMP[2].xyz, TEMP[2].xyzz, IMM[0].yyyy 29: OR TEMP[3].x, TEMP[2].xxxx, TEMP[2].zzzz 30: OR TEMP[2].x, TEMP[3].xxxx, TEMP[2].yyyy 31: AND TEMP[2].x, TEMP[2].xxxx, IMM[0].wwww 32: KILL_IF -TEMP[2].xxxx 33: MAD TEMP[1], TEMP[0].wwww, IMM[1].xxxy, IMM[1].yyyx 34: MUL TEMP[0], TEMP[0], TEMP[1] 35: MOV OUT[0], TEMP[0] 36: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = bitcast float %26 to i32 %29 = bitcast float %27 to i32 %30 = insertelement <2 x i32> undef, i32 %28, i32 0 %31 = insertelement <2 x i32> %30, i32 %29, i32 1 %32 = bitcast <8 x i32> %23 to <32 x i8> %33 = bitcast <4 x i32> %25 to <16 x i8> %34 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %32, <16 x i8> %33, i32 2) %35 = extractelement <4 x float> %34, i32 0 %36 = extractelement <4 x float> %34, i32 1 %37 = extractelement <4 x float> %34, i32 2 %38 = extractelement <4 x float> %34, i32 3 %39 = fadd float %38, 0xBF50624DE0000000 %40 = fcmp oge float %39, 0.000000e+00 %41 = sext i1 %40 to i32 %42 = bitcast i32 %41 to float %43 = bitcast float %42 to i32 %44 = icmp ne i32 %43, 0 %. = select i1 %44, float 0.000000e+00, float -1.000000e+00 %45 = fcmp oge float %39, 0.000000e+00 %46 = sext i1 %45 to i32 %47 = bitcast i32 %46 to float %48 = bitcast float %47 to i32 %49 = icmp ne i32 %48, 0 %temp12.0 = select i1 %49, float 0.000000e+00, float -1.000000e+00 %50 = fcmp oge float %39, 0.000000e+00 %51 = sext i1 %50 to i32 %52 = bitcast i32 %51 to float %53 = bitcast float %52 to i32 %54 = icmp ne i32 %53, 0 %.25 = select i1 %54, float 0.000000e+00, float -1.000000e+00 %55 = fcmp oge float %39, 0.000000e+00 %56 = sext i1 %55 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = icmp ne i32 %58, 0 %60 = fcmp olt float %., 0.000000e+00 %61 = sext i1 %60 to i32 %62 = fcmp olt float %temp12.0, 0.000000e+00 %63 = sext i1 %62 to i32 %64 = fcmp olt float %.25, 0.000000e+00 %65 = sext i1 %64 to i32 %66 = bitcast i32 %61 to float %67 = bitcast i32 %63 to float %68 = bitcast i32 %65 to float %69 = bitcast float %66 to i32 %70 = bitcast float %68 to i32 %71 = or i32 %69, %70 %72 = bitcast i32 %71 to float %73 = bitcast float %72 to i32 %74 = bitcast float %67 to i32 %75 = or i32 %73, %74 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = and i32 %77, 1065353216 %79 = bitcast i32 %78 to float %80 = fsub float -0.000000e+00, %79 %81 = fsub float -0.000000e+00, %79 %82 = fsub float -0.000000e+00, %79 %83 = fsub float -0.000000e+00, %79 call void @llvm.AMDGPU.kill(float %80) call void @llvm.AMDGPU.kill(float %81) call void @llvm.AMDGPU.kill(float %82) call void @llvm.AMDGPU.kill(float %83) %84 = fmul float %38, 1.000000e+00 %85 = fadd float %84, -0.000000e+00 %86 = fmul float %38, 1.000000e+00 %87 = fadd float %86, -0.000000e+00 %88 = fmul float %38, 1.000000e+00 %89 = fadd float %88, -0.000000e+00 %90 = fmul float %38, -0.000000e+00 %91 = fadd float %90, 1.000000e+00 %92 = fmul float %35, %85 %93 = fmul float %36, %87 %94 = fmul float %37, %89 %95 = fmul float %38, %91 %96 = call i32 @llvm.SI.packf16(float %92, float %93) %97 = bitcast i32 %96 to float %98 = call i32 @llvm.SI.packf16(float %94, float %95) %99 = bitcast i32 %98 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %97, float %99, float %97, float %99) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 v_mov_b32_e32 v4, 0xba83126f ; 7E0802FF BA83126F s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v4, v3, v4 ; 06080903 v_cmp_ge_f32_e64 s[0:1], v4, 0 ; D00C0000 00010104 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_cmp_ne_i32_e64 s[0:1], v4, 0 ; D10A0000 00010104 v_cndmask_b32_e64 v4, -1.0, 0, s[0:1] ; D2000004 180100F3 v_cmp_lt_f32_e64 s[0:1], v4, 0 ; D0020000 00010104 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_and_b32_e32 v4, 1.0, v4 ; 360808F2 v_xor_b32_e32 v4, 0x80000000, v4 ; 3A0808FF 80000000 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_mad_f32 v5, v3, v4, 1.0 ; D2820005 03CA0903 v_mul_f32_e32 v5, v5, v3 ; 100A0705 v_add_f32_e32 v4, v4, v3 ; 06080704 v_mul_f32_e32 v6, v4, v2 ; 100C0504 v_cvt_pkrtz_f16_f32_e32 v5, v6, v5 ; 5E0A0B06 v_mul_f32_e32 v6, v4, v1 ; 100C0304 v_mul_f32_e32 v0, v4, v0 ; 10000104 v_cvt_pkrtz_f16_f32_e32 v0, v0, v6 ; 5E000D00 exp 15, 0, 1, 1, 1, v0, v5, v0, v5 ; F8001C0F 05000500 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].xxxx 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: UARL ADDR[0].x, TEMP[1].xxxx 21: MAD TEMP[0], IN[0].wwww, CONST[ADDR[0].x], TEMP[0] 22: MOV TEMP[1].xyz, IN[2].xxxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0 %28 = add i32 %5, %7 %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %28) %30 = extractelement <4 x float> %29, i32 0 %31 = call float @llvm.AMDIL.fraction.(float %25) %32 = fsub float -0.000000e+00, %31 %33 = fadd float %32, %25 %34 = fmul float %33, 4.000000e+00 %35 = fptosi float %34 to i32 %36 = bitcast i32 %35 to float %37 = bitcast float %36 to i32 %38 = add i32 1, %37 %39 = bitcast i32 %38 to float %40 = bitcast float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %18, %42 %44 = shl i32 %40, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = fmul float %18, %46 %48 = shl i32 %40, 4 %49 = add i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %18, %50 %52 = shl i32 %40, 4 %53 = add i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = fmul float %18, %54 %56 = fptosi float %34 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %43 %63 = shl i32 %58, 4 %64 = add i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %17, %65 %67 = fadd float %66, %47 %68 = shl i32 %58, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %17, %70 %72 = fadd float %71, %51 %73 = shl i32 %58, 4 %74 = add i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %17, %75 %77 = fadd float %76, %55 %78 = fptosi float %34 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = add i32 2, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = shl i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %19, %85 %87 = fadd float %86, %62 %88 = shl i32 %83, 4 %89 = add i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fmul float %19, %90 %92 = fadd float %91, %67 %93 = shl i32 %83, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = fmul float %19, %95 %97 = fadd float %96, %72 %98 = shl i32 %83, 4 %99 = add i32 %98, 12 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = fmul float %19, %100 %102 = fadd float %101, %77 %103 = fptosi float %34 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = add i32 3, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %20, %110 %112 = fadd float %111, %87 %113 = shl i32 %108, 4 %114 = add i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %20, %115 %117 = fadd float %116, %92 %118 = shl i32 %108, 4 %119 = add i32 %118, 8 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %97 %123 = shl i32 %108, 4 %124 = add i32 %123, 12 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %20, %125 %127 = fadd float %126, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %30, float %30, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %117, float %122, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v1, v1, v5 ; F800020F 05010101 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v1 ; 7E0A4101 v_subrev_f32_e32 v1, v5, v1 ; 0A020305 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v1, s[0:3], 0 offen ; E0301000 80000201 v_add_i32_e32 v3, 16, v1 ; 4A060290 buffer_load_dword v4, v3, s[0:3], 0 offen ; E0301000 80000403 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v6 ; 10000D04 v_mad_f32 v0, v5, v2, v0 ; D2820000 04020505 v_add_i32_e32 v2, 32, v1 ; 4A0402A0 buffer_load_dword v4, v2, s[0:3], 0 offen ; E0301000 80000402 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v7, v4, v0 ; D2820000 04020907 v_add_i32_e32 v4, 48, v1 ; 4A0802B0 buffer_load_dword v9, v4, s[0:3], 0 offen ; E0301000 80000904 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v9, v0 ; D2820000 04021308 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v6 ; 10140D0A v_mad_f32 v9, v5, v9, v10 ; D2820009 042A1305 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v7, v10, v9 ; D2820009 04261507 v_or_b32_e32 v10, 12, v4 ; 3814088C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v8, v10, v9 ; D2820009 04261508 v_or_b32_e32 v10, 8, v1 ; 38140288 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v11, 8, v3 ; 38160688 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v6 ; 10160D0B v_mad_f32 v10, v5, v10, v11 ; D282000A 042E1505 v_or_b32_e32 v11, 8, v2 ; 38160488 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v7, v11, v10 ; D282000A 042A1707 v_or_b32_e32 v11, 8, v4 ; 38160888 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v8, v11, v10 ; D282000A 042A1708 v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v3, 4, v3 ; 38060684 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v1, v5, v1, v3 ; D2820001 040E0305 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 v_or_b32_e32 v2, 4, v4 ; 38040884 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v8, v2, v1 ; D2820001 04060508 exp 15, 12, 0, 1, 0, v0, v1, v10, v9 ; F80008CF 090A0100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xxxx, IMM[0].xyyy, IMM[0].yyyx 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fmul float %22, 1.000000e+00 %24 = fadd float %23, 0.000000e+00 %25 = fmul float %22, 0.000000e+00 %26 = fadd float %25, 0.000000e+00 %27 = fmul float %22, 0.000000e+00 %28 = fadd float %27, 0.000000e+00 %29 = fmul float %22, 0.000000e+00 %30 = fadd float %29, 1.000000e+00 %31 = call i32 @llvm.SI.packf16(float %24, float %26) %32 = bitcast i32 %31 to float %33 = call i32 @llvm.SI.packf16(float %28, float %30) %34 = bitcast i32 %33 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %32, float %34, float %32, float %34) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_mad_f32 v0, 0, v2, 1.0 ; D2820000 03CA0480 v_mad_f32 v1, 0, v2, 0 ; D2820001 02020480 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_add_f32_e32 v2, 0, v2 ; 06040480 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].xxxx 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].xxxx 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[1].x, TEMP[1].xxxx 18: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 19: UARL ADDR[0].x, TEMP[1].xxxx 20: UARL ADDR[0].x, TEMP[1].xxxx 21: MAD TEMP[0], IN[0].wwww, CONST[ADDR[0].x], TEMP[0] 22: MOV TEMP[1].xyz, IN[2].xxxx 23: MOV OUT[1], TEMP[1] 24: MOV OUT[0], TEMP[0] 25: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = extractelement <4 x float> %16, i32 3 %21 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %22 = load <16 x i8> addrspace(2)* %21, !tbaa !0 %23 = add i32 %5, %7 %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %22, i32 0, i32 %23) %25 = extractelement <4 x float> %24, i32 0 %26 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %27 = load <16 x i8> addrspace(2)* %26, !tbaa !0 %28 = add i32 %5, %7 %29 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %27, i32 0, i32 %28) %30 = extractelement <4 x float> %29, i32 0 %31 = call float @llvm.AMDIL.fraction.(float %25) %32 = fsub float -0.000000e+00, %31 %33 = fadd float %32, %25 %34 = fmul float %33, 4.000000e+00 %35 = fptosi float %34 to i32 %36 = bitcast i32 %35 to float %37 = bitcast float %36 to i32 %38 = add i32 1, %37 %39 = bitcast i32 %38 to float %40 = bitcast float %39 to i32 %41 = shl i32 %40, 4 %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %41) %43 = fmul float %18, %42 %44 = shl i32 %40, 4 %45 = add i32 %44, 4 %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %45) %47 = fmul float %18, %46 %48 = shl i32 %40, 4 %49 = add i32 %48, 8 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %18, %50 %52 = shl i32 %40, 4 %53 = add i32 %52, 12 %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %53) %55 = fmul float %18, %54 %56 = fptosi float %34 to i32 %57 = bitcast i32 %56 to float %58 = bitcast float %57 to i32 %59 = shl i32 %58, 4 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %43 %63 = shl i32 %58, 4 %64 = add i32 %63, 4 %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %64) %66 = fmul float %17, %65 %67 = fadd float %66, %47 %68 = shl i32 %58, 4 %69 = add i32 %68, 8 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %17, %70 %72 = fadd float %71, %51 %73 = shl i32 %58, 4 %74 = add i32 %73, 12 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %17, %75 %77 = fadd float %76, %55 %78 = fptosi float %34 to i32 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = add i32 2, %80 %82 = bitcast i32 %81 to float %83 = bitcast float %82 to i32 %84 = shl i32 %83, 4 %85 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %84) %86 = fmul float %19, %85 %87 = fadd float %86, %62 %88 = shl i32 %83, 4 %89 = add i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fmul float %19, %90 %92 = fadd float %91, %67 %93 = shl i32 %83, 4 %94 = add i32 %93, 8 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = fmul float %19, %95 %97 = fadd float %96, %72 %98 = shl i32 %83, 4 %99 = add i32 %98, 12 %100 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %99) %101 = fmul float %19, %100 %102 = fadd float %101, %77 %103 = fptosi float %34 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = add i32 3, %105 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = shl i32 %108, 4 %110 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %109) %111 = fmul float %20, %110 %112 = fadd float %111, %87 %113 = shl i32 %108, 4 %114 = add i32 %113, 4 %115 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %114) %116 = fmul float %20, %115 %117 = fadd float %116, %92 %118 = shl i32 %108, 4 %119 = add i32 %118, 8 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fmul float %20, %120 %122 = fadd float %121, %97 %123 = shl i32 %108, 4 %124 = add i32 %123, 12 %125 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %124) %126 = fmul float %20, %125 %127 = fadd float %126, %102 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %30, float %30, float %30, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %112, float %117, float %122, float %127) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 v_mov_b32_e32 v5, 0 ; 7E0A0280 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v1, v1, v1, v5 ; F800020F 05010101 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v1 ; 7E0A4101 v_subrev_f32_e32 v1, v5, v1 ; 0A020305 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v1, s[0:3], 0 offen ; E0301000 80000201 v_add_i32_e32 v3, 16, v1 ; 4A060290 buffer_load_dword v4, v3, s[0:3], 0 offen ; E0301000 80000403 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v6 ; 10000D04 v_mad_f32 v0, v5, v2, v0 ; D2820000 04020505 v_add_i32_e32 v2, 32, v1 ; 4A0402A0 buffer_load_dword v4, v2, s[0:3], 0 offen ; E0301000 80000402 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v7, v4, v0 ; D2820000 04020907 v_add_i32_e32 v4, 48, v1 ; 4A0802B0 buffer_load_dword v9, v4, s[0:3], 0 offen ; E0301000 80000904 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v8, v9, v0 ; D2820000 04021308 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v6 ; 10140D0A v_mad_f32 v9, v5, v9, v10 ; D2820009 042A1305 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v7, v10, v9 ; D2820009 04261507 v_or_b32_e32 v10, 12, v4 ; 3814088C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v8, v10, v9 ; D2820009 04261508 v_or_b32_e32 v10, 8, v1 ; 38140288 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v11, 8, v3 ; 38160688 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v6 ; 10160D0B v_mad_f32 v10, v5, v10, v11 ; D282000A 042E1505 v_or_b32_e32 v11, 8, v2 ; 38160488 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v7, v11, v10 ; D282000A 042A1707 v_or_b32_e32 v11, 8, v4 ; 38160888 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v8, v11, v10 ; D282000A 042A1708 v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v3, 4, v3 ; 38060684 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v1, v5, v1, v3 ; D2820001 040E0305 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 v_or_b32_e32 v2, 4, v4 ; 38040884 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v8, v2, v1 ; D2820001 04060508 exp 15, 12, 0, 1, 0, v0, v1, v10, v9 ; F80008CF 090A0100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0], IN[0].xxxx, IMM[0].xyxy, IMM[0].yyyx 1: MOV OUT[0], TEMP[0] 2: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %23 = fmul float %22, 1.000000e+00 %24 = fadd float %23, 0.000000e+00 %25 = fmul float %22, 0.000000e+00 %26 = fadd float %25, 0.000000e+00 %27 = fmul float %22, 1.000000e+00 %28 = fadd float %27, 0.000000e+00 %29 = fmul float %22, 0.000000e+00 %30 = fadd float %29, 1.000000e+00 %31 = call i32 @llvm.SI.packf16(float %24, float %26) %32 = bitcast i32 %31 to float %33 = call i32 @llvm.SI.packf16(float %28, float %30) %34 = bitcast i32 %33 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %32, float %34, float %32, float %34) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_mad_f32 v0, 0, v2, 1.0 ; D2820000 03CA0480 v_add_f32_e32 v1, 0, v2 ; 06020480 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_mad_f32 v2, 0, v2, 0 ; D2820002 02020480 v_cvt_pkrtz_f16_f32_e32 v1, v1, v2 ; 5E020501 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL SAMP[0] DCL CONST[0..15] DCL TEMP[0..4], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 255.0000, 1.0000} IMM[1] FLT32 { 0.5000, -0.5000, 1.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[15], IN[0] 1: MOV TEMP[1].xy, TEMP[0].xyxx 2: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[15] 3: MOV TEMP[1].zw, TEMP[0].wwzw 4: MUL TEMP[0].xy, TEMP[1], CONST[9] 5: MOV TEMP[1].xy, TEMP[0].xyxx 6: ADD TEMP[2].zw, TEMP[1], IMM[0].xxxx 7: MUL TEMP[2].xy, TEMP[2].zwzw, CONST[10] 8: MOV TEMP[3].xy, TEMP[2].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 11: MUL TEMP[3].z, TEMP[3].wwww, CONST[9].zzzz 12: MUL TEMP[3].z, TEMP[3].zzzz, IMM[0].zzzz 13: MUL TEMP[2], TEMP[0].yyyy, CONST[12] 14: MAD TEMP[2], TEMP[0].xxxx, CONST[11], TEMP[2] 15: MAD TEMP[1], TEMP[3].zzzz, CONST[13], TEMP[2] 16: ADD TEMP[1], TEMP[1], CONST[14] 17: ADD TEMP[0].xy, TEMP[1], CONST[8].zwzw 18: MUL TEMP[0].zw, TEMP[0].xyxy, CONST[8].xyxy 19: MOV TEMP[0].zw, TEMP[0].wwzw 20: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 21: MAD TEMP[2], TEMP[1].xxxx, CONST[0], TEMP[2] 22: MAD TEMP[2], TEMP[1].zzzz, CONST[2], TEMP[2] 23: MAD TEMP[2], TEMP[1].wwww, CONST[3], TEMP[2] 24: RCP TEMP[3].x, TEMP[2].wwww 25: MUL TEMP[3].xy, TEMP[2], TEMP[3].xxxx 26: MOV TEMP[3].xy, TEMP[3].xyxx 27: MOV TEMP[4], TEMP[2] 28: MOV TEMP[0].xy, IN[1].xyxx 29: MUL TEMP[2], TEMP[1].yyyy, CONST[5] 30: MAD TEMP[2], TEMP[1].xxxx, CONST[4], TEMP[2] 31: MAD TEMP[2], TEMP[1].zzzz, CONST[6], TEMP[2] 32: MAD TEMP[2], TEMP[1].wwww, CONST[7], TEMP[2] 33: MOV TEMP[1].xyz, TEMP[1].xyzx 34: MAD TEMP[2], TEMP[2], IMM[1].xyzz, IMM[0].xxyy 35: MOV TEMP[1].w, IMM[0].wwww 36: MOV TEMP[3].zw, IMM[0].wwyw 37: MOV OUT[1], TEMP[0] 38: MOV OUT[2], TEMP[2] 39: MOV OUT[0], TEMP[4] 40: MOV OUT[3], TEMP[1] 41: MOV OUT[4], TEMP[3] 42: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %72 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %73 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %79 = load <16 x i8> addrspace(2)* %78, !tbaa !0 %80 = add i32 %5, %7 %81 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %79, i32 0, i32 %80) %82 = extractelement <4 x float> %81, i32 0 %83 = extractelement <4 x float> %81, i32 1 %84 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %85 = load <16 x i8> addrspace(2)* %84, !tbaa !0 %86 = add i32 %5, %7 %87 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %85, i32 0, i32 %86) %88 = extractelement <4 x float> %87, i32 0 %89 = extractelement <4 x float> %87, i32 1 %90 = fadd float %70, %82 %91 = fadd float %71, %83 %92 = fadd float %90, %72 %93 = fadd float %91, %73 %94 = fmul float %90, %49 %95 = fmul float %91, %50 %96 = fadd float %92, 5.000000e-01 %97 = fadd float %93, 5.000000e-01 %98 = fmul float %96, %52 %99 = fmul float %97, %53 %100 = bitcast float %98 to i32 %101 = bitcast float %99 to i32 %102 = bitcast float 0.000000e+00 to i32 %103 = insertelement <4 x i32> undef, i32 %100, i32 0 %104 = insertelement <4 x i32> %103, i32 %101, i32 1 %105 = insertelement <4 x i32> %104, i32 %102, i32 2 %106 = insertelement <4 x i32> %105, i32 undef, i32 3 %107 = bitcast <8 x i32> %75 to <32 x i8> %108 = bitcast <4 x i32> %77 to <16 x i8> %109 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %106, <32 x i8> %107, <16 x i8> %108, i32 2) %110 = extractelement <4 x float> %109, i32 3 %111 = fmul float %110, %51 %112 = fmul float %111, 2.550000e+02 %113 = fmul float %95, %58 %114 = fmul float %95, %59 %115 = fmul float %95, %60 %116 = fmul float %95, %61 %117 = fmul float %94, %54 %118 = fadd float %117, %113 %119 = fmul float %94, %55 %120 = fadd float %119, %114 %121 = fmul float %94, %56 %122 = fadd float %121, %115 %123 = fmul float %94, %57 %124 = fadd float %123, %116 %125 = fmul float %112, %62 %126 = fadd float %125, %118 %127 = fmul float %112, %63 %128 = fadd float %127, %120 %129 = fmul float %112, %64 %130 = fadd float %129, %122 %131 = fmul float %112, %65 %132 = fadd float %131, %124 %133 = fadd float %126, %66 %134 = fadd float %128, %67 %135 = fadd float %130, %68 %136 = fadd float %132, %69 %137 = fadd float %133, %47 %138 = fadd float %134, %48 %139 = fmul float %137, %45 %140 = fmul float %138, %46 %141 = fmul float %134, %17 %142 = fmul float %134, %18 %143 = fmul float %134, %19 %144 = fmul float %134, %20 %145 = fmul float %133, %13 %146 = fadd float %145, %141 %147 = fmul float %133, %14 %148 = fadd float %147, %142 %149 = fmul float %133, %15 %150 = fadd float %149, %143 %151 = fmul float %133, %16 %152 = fadd float %151, %144 %153 = fmul float %135, %21 %154 = fadd float %153, %146 %155 = fmul float %135, %22 %156 = fadd float %155, %148 %157 = fmul float %135, %23 %158 = fadd float %157, %150 %159 = fmul float %135, %24 %160 = fadd float %159, %152 %161 = fmul float %136, %25 %162 = fadd float %161, %154 %163 = fmul float %136, %26 %164 = fadd float %163, %156 %165 = fmul float %136, %27 %166 = fadd float %165, %158 %167 = fmul float %136, %28 %168 = fadd float %167, %160 %169 = fdiv float 1.000000e+00, %168 %170 = fmul float %162, %169 %171 = fmul float %164, %169 %172 = fmul float %134, %33 %173 = fmul float %134, %34 %174 = fmul float %134, %35 %175 = fmul float %134, %36 %176 = fmul float %133, %29 %177 = fadd float %176, %172 %178 = fmul float %133, %30 %179 = fadd float %178, %173 %180 = fmul float %133, %31 %181 = fadd float %180, %174 %182 = fmul float %133, %32 %183 = fadd float %182, %175 %184 = fmul float %135, %37 %185 = fadd float %184, %177 %186 = fmul float %135, %38 %187 = fadd float %186, %179 %188 = fmul float %135, %39 %189 = fadd float %188, %181 %190 = fmul float %135, %40 %191 = fadd float %190, %183 %192 = fmul float %136, %41 %193 = fadd float %192, %185 %194 = fmul float %136, %42 %195 = fadd float %194, %187 %196 = fmul float %136, %43 %197 = fadd float %196, %189 %198 = fmul float %136, %44 %199 = fadd float %198, %191 %200 = fmul float %193, 5.000000e-01 %201 = fadd float %200, 5.000000e-01 %202 = fmul float %195, -5.000000e-01 %203 = fadd float %202, 5.000000e-01 %204 = fmul float %197, 1.000000e+00 %205 = fadd float %204, 0.000000e+00 %206 = fmul float %199, 1.000000e+00 %207 = fadd float %206, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %88, float %89, float %139, float %140) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %201, float %203, float %205, float %207) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %133, float %134, float %135, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %170, float %171, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %162, float %164, float %166, float %168) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s12, s[0:3], 0x3c ; C206013C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s12, v1 ; 060A020C s_buffer_load_dword s12, s[0:3], 0x24 ; C2060124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s12, v5 ; 100C0A0C s_buffer_load_dword s12, s[0:3], 0x3d ; C206013D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s12, v2 ; 0602040C s_buffer_load_dword s12, s[0:3], 0x25 ; C2060125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s12, v1 ; 1004020C s_buffer_load_dword s12, s[0:3], 0x31 ; C2060131 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s12, v2 ; 1006040C s_buffer_load_dword s12, s[0:3], 0x2d ; C206012D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v6, s12, v3 ; D2820003 040C1906 s_buffer_load_dword s12, s[0:3], 0x3f ; C206013F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s12, v1 ; 0602020C v_add_f32_e32 v1, 0.5, v1 ; 060202F0 s_buffer_load_dword s12, s[0:3], 0x29 ; C2060129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s12, v1 ; 1010020C s_buffer_load_dword s12, s[0:3], 0x3e ; C206013E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s12, v5 ; 06020A0C v_add_f32_e32 v1, 0.5, v1 ; 060202F0 s_buffer_load_dword s12, s[0:3], 0x28 ; C2060128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s12, v1 ; 100E020C v_mov_b32_e32 v9, 0 ; 7E120280 s_load_dwordx4 s[12:15], s[4:5], 0x0 ; C0860500 s_load_dwordx8 s[16:23], s[6:7], 0x0 ; C0C80700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v1, 8, 0, 0, 0, 0, 0, 0, 0, v[7:10], s[16:23], s[12:15] ; F0900800 00640107 s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v1, s4, v1 ; 10020204 v_mul_f32_e32 v1, 0x437f0000, v1 ; 100202FF 437F0000 s_buffer_load_dword s4, s[0:3], 0x35 ; C2020135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v1, s4, v3 ; D2820003 040C0901 s_buffer_load_dword s4, s[0:3], 0x39 ; C2020139 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v3 ; 06080604 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v2 ; 100A0404 s_buffer_load_dword s4, s[0:3], 0x2c ; C202012C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v6, s4, v5 ; D2820005 04140906 s_buffer_load_dword s4, s[0:3], 0x34 ; C2020134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s4, v5 ; D2820005 04140901 s_buffer_load_dword s4, s[0:3], 0x38 ; C2020138 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v11, s4, v5 ; 06160A04 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s4, v11 ; 10161604 buffer_load_format_xyzw v[12:15], v0, s[8:11], 0 idxen ; E00C2000 80020C00 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v12, v13, v11, v4 ; F800020F 040B0D0C s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s4, v3 ; 10000604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s4, v0 ; D2820000 04000905 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v2 ; 10080404 s_buffer_load_dword s4, s[0:3], 0x2e ; C202012E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s4, v4 ; D2820004 04100906 s_buffer_load_dword s4, s[0:3], 0x36 ; C2020136 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s4, v4 ; D2820004 04100901 s_buffer_load_dword s4, s[0:3], 0x3a ; C202013A s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s4, s[0:3], 0x33 ; C2020133 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v2 ; 10040404 s_buffer_load_dword s4, s[0:3], 0x2f ; C202012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s4, v2 ; D2820002 04080906 s_buffer_load_dword s4, s[0:3], 0x37 ; C2020137 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v1, s4, v2 ; D2820001 04080901 s_buffer_load_dword s4, s[0:3], 0x3b ; C202013B s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s4, v0 ; D2820000 04000901 v_mad_f32 v0, -0.5, v0, 0.5 ; D2820000 03C200F1 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v3 ; 10040604 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s4, v2 ; D2820002 04080905 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s4, v2 ; D2820002 04080901 v_add_f32_e32 v2, 0, v2 ; 06040480 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v3 ; 100C0604 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v6 ; D2820006 04180905 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 v_add_f32_e32 v6, 0, v6 ; 060C0C80 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s4, v3 ; 10160604 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, v5, s4, v11 ; D282000B 042C0905 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, v4, s4, v11 ; D282000B 042C0904 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v11, v1, s4, v11 ; D282000B 042C0901 v_mad_f32 v11, 0.5, v11, 0.5 ; D282000B 03C216F0 exp 15, 33, 0, 0, 0, v11, v0, v6, v2 ; F800021F 0206000B s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v5, v3, v4, v0 ; F800022F 00040305 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v3 ; 10040604 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s4, v2 ; D2820002 04080905 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s4, v2 ; D2820002 04080901 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v3 ; 100C0604 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v6 ; D2820006 04180905 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 v_rcp_f32_e32 v11, v6 ; 7E165506 v_mul_f32_e32 v12, v11, v2 ; 1018050B s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s4, v3 ; 101A0604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v5, s4, v13 ; D282000D 04340905 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v4, s4, v13 ; D282000D 04340904 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v1, s4, v13 ; D282000D 04340901 v_mul_f32_e32 v11, v11, v13 ; 10161B0B exp 15, 35, 0, 0, 0, v11, v12, v9, v0 ; F800023F 00090C0B s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s4, v3 ; 10000604 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s4, v0 ; D2820000 04000905 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s0, v0 ; D2820000 04000101 exp 15, 12, 0, 1, 0, v13, v2, v0, v6 ; F80008CF 0600020D s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[0..8] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 1.4427, 1.0000, 0.5000, -0.5000} IMM[1] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} 0: ADD TEMP[0].xyz, -CONST[0], IN[2] 1: MUL TEMP[1].w, TEMP[0].zzzz, CONST[8].xxxx 2: MUL TEMP[1].w, TEMP[1].wwww, IMM[0].xxxx 3: EX2 TEMP[1].x, TEMP[1].wwww 4: ADD TEMP[1].w, -TEMP[1].xxxx, IMM[0].yyyy 5: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 6: RCP TEMP[3].x, TEMP[0].zzzz 7: MUL TEMP[2].x, TEMP[2].xxxx, CONST[8].yyyy 8: MUL TEMP[1].x, TEMP[1].wwww, TEMP[2].xxxx 9: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 10: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MOV TEMP[0].x, TEMP[1].xxxx 12: EX2 TEMP[1].x, TEMP[1].xxxx 13: MOV_SAT TEMP[0].x, TEMP[1].xxxx 14: ADD TEMP[1].x, -TEMP[0].xxxx, IMM[0].yyyy 15: MAD TEMP[2].x, IN[3].yyyy, IMM[0].zzzz, IMM[0].zzzz 16: MOV TEMP[2].x, TEMP[2].xxxx 17: MOV TEMP[2].y, CONST[8].wwww 18: MOV TEMP[3].xy, TEMP[2].xyyy 19: TEX TEMP[3].x, TEMP[3], SAMP[2], 2D 20: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 21: MOV TEMP[0].x, TEMP[1].xxxx 22: MUL TEMP[3].yzw, CONST[4].xxyw, IN[2].yyyy 23: MOV TEMP[0].yzw, TEMP[3].zyzw 24: MAD TEMP[3].yzw, IN[2].xxxx, CONST[3].xxyw, TEMP[0] 25: MOV TEMP[0].yzw, TEMP[3].zyzw 26: MAD TEMP[3].yzw, IN[2].zzzz, CONST[5].xxyw, TEMP[0] 27: MOV TEMP[0].yzw, TEMP[3].zyzw 28: ADD TEMP[3].yzw, TEMP[0], CONST[6].xxyw 29: MOV TEMP[0].yz, TEMP[3].zyzw 30: RCP TEMP[3].x, TEMP[3].wwww 31: MOV TEMP[0].w, TEMP[3].xxxx 32: MUL TEMP[3].yz, TEMP[3].xxxx, TEMP[0] 33: MOV TEMP[0].yz, TEMP[3].zyzz 34: MAD TEMP[3].yz, TEMP[0], IMM[0].zzwy, IMM[0].wwww 35: MOV TEMP[0].yz, TEMP[3].zyzz 36: MAD TEMP[0].xy, TEMP[0].yzzw, IMM[1].xyxx, IMM[1].zxzz 37: MOV TEMP[0].xy, TEMP[0].xyyy 38: TEX TEMP[0].x, TEMP[0], SAMP[3], 2D 39: MOV_SAT TEMP[0].x, TEMP[0].xxxx 40: MUL TEMP[0].y, TEMP[0].xxxx, CONST[1].xxxx 41: MAD TEMP[3].xy, IN[0], IMM[1].xyxx, IMM[1].zxzz 42: MOV TEMP[3].xy, TEMP[3].xyyy 43: TEX TEMP[3], TEMP[3], SAMP[0], 2D 44: MOV TEMP[2].w, TEMP[3] 45: MOV TEMP[4], -CONST[2].xxxx 46: FSGE TEMP[5].x, TEMP[4].xxxx, IMM[1].zzzz 47: UIF TEMP[5].xxxx :0 48: MOV TEMP[5].x, IMM[1].zzzz 49: ELSE :0 50: MOV TEMP[5].x, TEMP[3].xxxx 51: ENDIF 52: MOV TEMP[5].x, TEMP[5].xxxx 53: FSGE TEMP[6].x, TEMP[4].yyyy, IMM[1].zzzz 54: UIF TEMP[6].xxxx :0 55: MOV TEMP[6].x, IMM[1].zzzz 56: ELSE :0 57: MOV TEMP[6].x, TEMP[3].yyyy 58: ENDIF 59: MOV TEMP[5].y, TEMP[6].xxxx 60: FSGE TEMP[6].x, TEMP[4].zzzz, IMM[1].zzzz 61: UIF TEMP[6].xxxx :0 62: MOV TEMP[6].x, IMM[1].zzzz 63: ELSE :0 64: MOV TEMP[6].x, TEMP[3].zzzz 65: ENDIF 66: MOV TEMP[5].z, TEMP[6].xxxx 67: FSGE TEMP[3].x, TEMP[4].wwww, IMM[1].zzzz 68: UIF TEMP[3].xxxx :0 69: ELSE :0 70: ENDIF 71: MOV TEMP[2].xyz, TEMP[5].xyzx 72: MOV TEMP[3].xyz, TEMP[5].xyzz 73: TEX TEMP[3], TEMP[3], SAMP[7], 3D 74: LRP TEMP[0].xyz, TEMP[0].yyyy, TEMP[3], TEMP[2] 75: MUL TEMP[2], IN[1], IMM[1].xyxx 76: ADD TEMP[3].xy, TEMP[2], IMM[1].zxzz 77: MOV TEMP[3].xy, TEMP[3].xyyy 78: TEX TEMP[3], TEMP[3], SAMP[1], 2D 79: ADD TEMP[4].y, -TEMP[3].wwww, IMM[0].yyyy 80: MAD TEMP[0].yzw, TEMP[0].xxyz, TEMP[4].yyyy, TEMP[3].xxyz 81: ADD TEMP[2].xy, TEMP[2], IMM[1].zxzz 82: MOV TEMP[2].xy, TEMP[2].xyyy 83: TEX TEMP[2].xzw, TEMP[2], SAMP[4], 2D 84: MOV TEMP[3].w, TEMP[2].wwww 85: MOV TEMP[4].xy, IN[0].zwww 86: TEX TEMP[4], TEMP[4], SAMP[5], 2D 87: LRP TEMP[0].xyz, TEMP[2].xxxx, TEMP[0].yzww, TEMP[4] 88: MOV TEMP[4].xyz, TEMP[0].xyzx 89: MAD TEMP[2].y, TEMP[2].zzzz, -TEMP[2].xxxx, TEMP[2].xxxx 90: MOV TEMP[0].xyz, TEMP[0].xyzz 91: TEX TEMP[0], TEMP[0], SAMP[6], 3D 92: LRP TEMP[0].xyz, TEMP[2].yyyy, TEMP[0], TEMP[4] 93: MOV TEMP[3].xyz, TEMP[0].xyzx 94: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[7].xxyz 95: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[3] 96: MOV TEMP[0].xyz, TEMP[0].xyzx 97: MOV TEMP[0].w, IMM[0].yyyy 98: MOV OUT[0], TEMP[0] 99: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 140) %47 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %48 = load <8 x i32> addrspace(2)* %47, !tbaa !0 %49 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %50 = load <4 x i32> addrspace(2)* %49, !tbaa !0 %51 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %52 = load <8 x i32> addrspace(2)* %51, !tbaa !0 %53 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %54 = load <4 x i32> addrspace(2)* %53, !tbaa !0 %55 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %56 = load <8 x i32> addrspace(2)* %55, !tbaa !0 %57 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %58 = load <4 x i32> addrspace(2)* %57, !tbaa !0 %59 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %60 = load <8 x i32> addrspace(2)* %59, !tbaa !0 %61 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %62 = load <4 x i32> addrspace(2)* %61, !tbaa !0 %63 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %64 = load <8 x i32> addrspace(2)* %63, !tbaa !0 %65 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %66 = load <4 x i32> addrspace(2)* %65, !tbaa !0 %67 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %68 = load <8 x i32> addrspace(2)* %67, !tbaa !0 %69 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %70 = load <4 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %72 = load <8 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %74 = load <4 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %76 = load <8 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %78 = load <4 x i32> addrspace(2)* %77, !tbaa !0 %79 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = fsub float -0.000000e+00, %24 %90 = fadd float %89, %85 %91 = fsub float -0.000000e+00, %25 %92 = fadd float %91, %86 %93 = fsub float -0.000000e+00, %26 %94 = fadd float %93, %87 %95 = fmul float %94, %44 %96 = fmul float %95, 0x3FF7154CA0000000 %97 = call float @llvm.AMDIL.exp.(float %96) %98 = fsub float -0.000000e+00, %97 %99 = fadd float %98, 1.000000e+00 %100 = fmul float %90, %90 %101 = fmul float %92, %92 %102 = fadd float %101, %100 %103 = fmul float %94, %94 %104 = fadd float %102, %103 %105 = fdiv float 1.000000e+00, %94 %106 = fmul float %104, %45 %107 = fmul float %99, %106 %108 = fmul float %105, %107 %109 = fmul float %108, 0x3FF7154CA0000000 %110 = call float @llvm.AMDIL.exp.(float %109) %111 = call float @llvm.AMDIL.clamp.(float %110, float 0.000000e+00, float 1.000000e+00) %112 = fsub float -0.000000e+00, %111 %113 = fadd float %112, 1.000000e+00 %114 = fmul float %88, 5.000000e-01 %115 = fadd float %114, 5.000000e-01 %116 = bitcast float %115 to i32 %117 = bitcast float %46 to i32 %118 = insertelement <2 x i32> undef, i32 %116, i32 0 %119 = insertelement <2 x i32> %118, i32 %117, i32 1 %120 = bitcast <8 x i32> %56 to <32 x i8> %121 = bitcast <4 x i32> %58 to <16 x i8> %122 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %119, <32 x i8> %120, <16 x i8> %121, i32 2) %123 = extractelement <4 x float> %122, i32 0 %124 = fmul float %113, %123 %125 = fmul float %32, %86 %126 = fmul float %33, %86 %127 = fmul float %34, %86 %128 = fmul float %85, %29 %129 = fadd float %128, %125 %130 = fmul float %85, %30 %131 = fadd float %130, %126 %132 = fmul float %85, %31 %133 = fadd float %132, %127 %134 = fmul float %87, %35 %135 = fadd float %134, %129 %136 = fmul float %87, %36 %137 = fadd float %136, %131 %138 = fmul float %87, %37 %139 = fadd float %138, %133 %140 = fadd float %135, %38 %141 = fadd float %137, %39 %142 = fadd float %139, %40 %143 = fdiv float 1.000000e+00, %142 %144 = fmul float %143, %140 %145 = fmul float %143, %141 %146 = fmul float %144, 5.000000e-01 %147 = fadd float %146, -5.000000e-01 %148 = fmul float %145, -5.000000e-01 %149 = fadd float %148, -5.000000e-01 %150 = fmul float %147, 1.000000e+00 %151 = fadd float %150, 0.000000e+00 %152 = fmul float %149, -1.000000e+00 %153 = fadd float %152, 1.000000e+00 %154 = bitcast float %151 to i32 %155 = bitcast float %153 to i32 %156 = insertelement <2 x i32> undef, i32 %154, i32 0 %157 = insertelement <2 x i32> %156, i32 %155, i32 1 %158 = bitcast <8 x i32> %60 to <32 x i8> %159 = bitcast <4 x i32> %62 to <16 x i8> %160 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %157, <32 x i8> %158, <16 x i8> %159, i32 2) %161 = extractelement <4 x float> %160, i32 0 %162 = call float @llvm.AMDIL.clamp.(float %161, float 0.000000e+00, float 1.000000e+00) %163 = fmul float %162, %27 %164 = fmul float %79, 1.000000e+00 %165 = fadd float %164, 0.000000e+00 %166 = fmul float %80, -1.000000e+00 %167 = fadd float %166, 1.000000e+00 %168 = bitcast float %165 to i32 %169 = bitcast float %167 to i32 %170 = insertelement <2 x i32> undef, i32 %168, i32 0 %171 = insertelement <2 x i32> %170, i32 %169, i32 1 %172 = bitcast <8 x i32> %48 to <32 x i8> %173 = bitcast <4 x i32> %50 to <16 x i8> %174 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %171, <32 x i8> %172, <16 x i8> %173, i32 2) %175 = extractelement <4 x float> %174, i32 0 %176 = extractelement <4 x float> %174, i32 1 %177 = extractelement <4 x float> %174, i32 2 %178 = fsub float -0.000000e+00, %28 %179 = fsub float -0.000000e+00, %28 %180 = fsub float -0.000000e+00, %28 %181 = fsub float -0.000000e+00, %28 %182 = fcmp oge float %178, 0.000000e+00 %183 = sext i1 %182 to i32 %184 = bitcast i32 %183 to float %185 = bitcast float %184 to i32 %186 = icmp ne i32 %185, 0 %. = select i1 %186, float 0.000000e+00, float %175 %187 = fcmp oge float %179, 0.000000e+00 %188 = sext i1 %187 to i32 %189 = bitcast i32 %188 to float %190 = bitcast float %189 to i32 %191 = icmp ne i32 %190, 0 %temp24.0 = select i1 %191, float 0.000000e+00, float %176 %192 = fcmp oge float %180, 0.000000e+00 %193 = sext i1 %192 to i32 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = icmp ne i32 %195, 0 %.37 = select i1 %196, float 0.000000e+00, float %177 %197 = fcmp oge float %181, 0.000000e+00 %198 = sext i1 %197 to i32 %199 = bitcast i32 %198 to float %200 = bitcast float %199 to i32 %201 = icmp ne i32 %200, 0 %202 = bitcast float %. to i32 %203 = bitcast float %temp24.0 to i32 %204 = bitcast float %.37 to i32 %205 = insertelement <4 x i32> undef, i32 %202, i32 0 %206 = insertelement <4 x i32> %205, i32 %203, i32 1 %207 = insertelement <4 x i32> %206, i32 %204, i32 2 %208 = insertelement <4 x i32> %207, i32 undef, i32 3 %209 = bitcast <8 x i32> %76 to <32 x i8> %210 = bitcast <4 x i32> %78 to <16 x i8> %211 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %208, <32 x i8> %209, <16 x i8> %210, i32 3) %212 = extractelement <4 x float> %211, i32 0 %213 = extractelement <4 x float> %211, i32 1 %214 = extractelement <4 x float> %211, i32 2 %215 = call float @llvm.AMDGPU.lrp(float %163, float %212, float %.) %216 = call float @llvm.AMDGPU.lrp(float %163, float %213, float %temp24.0) %217 = call float @llvm.AMDGPU.lrp(float %163, float %214, float %.37) %218 = fmul float %83, 1.000000e+00 %219 = fmul float %84, -1.000000e+00 %220 = fadd float %218, 0.000000e+00 %221 = fadd float %219, 1.000000e+00 %222 = bitcast float %220 to i32 %223 = bitcast float %221 to i32 %224 = insertelement <2 x i32> undef, i32 %222, i32 0 %225 = insertelement <2 x i32> %224, i32 %223, i32 1 %226 = bitcast <8 x i32> %52 to <32 x i8> %227 = bitcast <4 x i32> %54 to <16 x i8> %228 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %225, <32 x i8> %226, <16 x i8> %227, i32 2) %229 = extractelement <4 x float> %228, i32 0 %230 = extractelement <4 x float> %228, i32 1 %231 = extractelement <4 x float> %228, i32 2 %232 = extractelement <4 x float> %228, i32 3 %233 = fsub float -0.000000e+00, %232 %234 = fadd float %233, 1.000000e+00 %235 = fmul float %215, %234 %236 = fadd float %235, %229 %237 = fmul float %216, %234 %238 = fadd float %237, %230 %239 = fmul float %217, %234 %240 = fadd float %239, %231 %241 = fadd float %218, 0.000000e+00 %242 = fadd float %219, 1.000000e+00 %243 = bitcast float %241 to i32 %244 = bitcast float %242 to i32 %245 = insertelement <2 x i32> undef, i32 %243, i32 0 %246 = insertelement <2 x i32> %245, i32 %244, i32 1 %247 = bitcast <8 x i32> %64 to <32 x i8> %248 = bitcast <4 x i32> %66 to <16 x i8> %249 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %246, <32 x i8> %247, <16 x i8> %248, i32 2) %250 = extractelement <4 x float> %249, i32 0 %251 = extractelement <4 x float> %249, i32 2 %252 = bitcast float %81 to i32 %253 = bitcast float %82 to i32 %254 = insertelement <2 x i32> undef, i32 %252, i32 0 %255 = insertelement <2 x i32> %254, i32 %253, i32 1 %256 = bitcast <8 x i32> %68 to <32 x i8> %257 = bitcast <4 x i32> %70 to <16 x i8> %258 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %255, <32 x i8> %256, <16 x i8> %257, i32 2) %259 = extractelement <4 x float> %258, i32 0 %260 = extractelement <4 x float> %258, i32 1 %261 = extractelement <4 x float> %258, i32 2 %262 = call float @llvm.AMDGPU.lrp(float %250, float %236, float %259) %263 = call float @llvm.AMDGPU.lrp(float %250, float %238, float %260) %264 = call float @llvm.AMDGPU.lrp(float %250, float %240, float %261) %265 = fsub float -0.000000e+00, %250 %266 = fmul float %251, %265 %267 = fadd float %266, %250 %268 = bitcast float %262 to i32 %269 = bitcast float %263 to i32 %270 = bitcast float %264 to i32 %271 = insertelement <4 x i32> undef, i32 %268, i32 0 %272 = insertelement <4 x i32> %271, i32 %269, i32 1 %273 = insertelement <4 x i32> %272, i32 %270, i32 2 %274 = insertelement <4 x i32> %273, i32 undef, i32 3 %275 = bitcast <8 x i32> %72 to <32 x i8> %276 = bitcast <4 x i32> %74 to <16 x i8> %277 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %274, <32 x i8> %275, <16 x i8> %276, i32 3) %278 = extractelement <4 x float> %277, i32 0 %279 = extractelement <4 x float> %277, i32 1 %280 = extractelement <4 x float> %277, i32 2 %281 = call float @llvm.AMDGPU.lrp(float %267, float %278, float %262) %282 = call float @llvm.AMDGPU.lrp(float %267, float %279, float %263) %283 = call float @llvm.AMDGPU.lrp(float %267, float %280, float %264) %284 = fsub float -0.000000e+00, %281 %285 = fadd float %284, %41 %286 = fsub float -0.000000e+00, %282 %287 = fadd float %286, %42 %288 = fsub float -0.000000e+00, %283 %289 = fadd float %288, %43 %290 = fmul float %124, %285 %291 = fadd float %290, %281 %292 = fmul float %124, %287 %293 = fadd float %292, %282 %294 = fmul float %124, %289 %295 = fadd float %294, %283 %296 = call i32 @llvm.SI.packf16(float %291, float %293) %297 = bitcast i32 %296 to float %298 = call i32 @llvm.SI.packf16(float %295, float 1.000000e+00) %299 = bitcast i32 %298 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %297, float %299, float %297, float %299) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 2, [m0] ; C8080800 v_interp_p2_f32 v2, [v2], v1, 0, 2, [m0] ; C8090801 v_interp_p1_f32 v3, v0, 1, 2, [m0] ; C80C0900 v_interp_p2_f32 v3, [v3], v1, 1, 2, [m0] ; C80D0901 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s8, v3 ; 10080608 s_buffer_load_dword s8, s[0:3], 0xd ; C204010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s8, v4 ; D2820004 04101102 v_interp_p1_f32 v5, v0, 2, 2, [m0] ; C8140A00 v_interp_p2_f32 v5, [v5], v1, 2, 2, [m0] ; C8150A01 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s8, v4 ; D2820004 04101105 s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s8, v4 ; 06080808 s_buffer_load_dword s8, s[0:3], 0x13 ; C2040113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v3 ; 100C0608 s_buffer_load_dword s8, s[0:3], 0xf ; C204010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v2, s8, v6 ; D2820006 04181102 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s8, v6 ; D2820006 04181105 s_buffer_load_dword s8, s[0:3], 0x1b ; C204011B s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s8, v6 ; 060C0C08 v_rcp_f32_e32 v6, v6 ; 7E0C5506 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mad_f32 v4, -0.5, v4, -0.5 ; D2820004 03C608F1 v_sub_f32_e32 v8, 1.0, v4 ; 081008F2 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s8, v3 ; 10080608 s_buffer_load_dword s8, s[0:3], 0xc ; C204010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s8, v4 ; D2820004 04101102 s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s8, v4 ; D2820004 04101105 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s8, v4 ; 06080808 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_mad_f32 v4, 0.5, v4, -0.5 ; D2820004 03C608F0 v_add_f32_e32 v7, 0, v4 ; 060E0880 s_load_dwordx4 s[68:71], s[4:5], 0x0 ; C0A20500 s_load_dwordx4 s[48:51], s[4:5], 0x4 ; C0980504 s_load_dwordx4 s[96:99], s[4:5], 0x8 ; C0B00508 s_load_dwordx4 s[72:75], s[4:5], 0xc ; C0A4050C s_load_dwordx4 s[44:47], s[4:5], 0x10 ; C0960510 s_load_dwordx4 s[32:35], s[4:5], 0x14 ; C0900514 s_load_dwordx4 s[20:23], s[4:5], 0x18 ; C08A0518 s_load_dwordx4 s[76:79], s[4:5], 0x1c ; C0A6051C s_load_dwordx8 s[80:87], s[6:7], 0x0 ; C0E80700 s_load_dwordx8 s[60:67], s[6:7], 0x8 ; C0DE0708 s_load_dwordx8 s[12:19], s[6:7], 0x10 ; C0C60710 s_load_dwordx8 s[88:95], s[6:7], 0x18 ; C0EC0718 s_load_dwordx8 s[52:59], s[6:7], 0x20 ; C0DA0720 s_load_dwordx8 s[36:43], s[6:7], 0x28 ; C0D20728 s_load_dwordx8 s[24:31], s[6:7], 0x30 ; C0CC0730 s_load_dwordx8 s[4:11], s[6:7], 0x38 ; C0C20738 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v4, 1, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[88:95], s[72:75] ; F0800100 02560407 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 s_buffer_load_dword s72, s[0:3], 0x4 ; C2240104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s72, v4 ; 100C0848 v_mad_f32 v4, -v4, s72, 1.0 ; D2820004 23C89104 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_sub_f32_e32 v8, 1.0, v7 ; 08100EF2 v_interp_p1_f32 v9, v0, 0, 0, [m0] ; C8240000 v_interp_p2_f32 v9, [v9], v1, 0, 0, [m0] ; C8250001 v_add_f32_e32 v7, 0, v9 ; 060E1280 image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[80:87], s[68:71] ; F0800700 02340707 s_buffer_load_dword s68, s[0:3], 0x8 ; C2220108 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_cmp_ge_f32_e64 s[68:69], -s68, 0 ; D00C0044 20010044 v_cndmask_b32_e64 v10, 0, -1, s[68:69] ; D200000A 01118280 v_cmp_ne_i32_e64 s[68:69], v10, 0 ; D10A0044 0001010A v_cndmask_b32_e64 v12, v9, 0, s[68:69] ; D200000C 09110109 v_mul_f32_e32 v14, v12, v4 ; 101C090C v_cndmask_b32_e64 v11, v8, 0, s[68:69] ; D200080B 01110108 v_cndmask_b32_e64 v10, v7, 0, s[68:69] ; D200000A 19110107 image_sample v[7:9], 7, 0, 0, 0, 0, 0, 0, 0, v[10:13], s[4:11], s[76:79] ; F0800700 0261070A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v6, v9, v14 ; D282000E 043A1306 v_interp_p1_f32 v15, v0, 1, 1, [m0] ; C83C0500 v_interp_p2_f32 v15, [v15], v1, 1, 1, [m0] ; C83D0501 v_sub_f32_e32 v16, 1.0, v15 ; 08201EF2 v_interp_p1_f32 v17, v0, 0, 1, [m0] ; C8440400 v_interp_p2_f32 v17, [v17], v1, 0, 1, [m0] ; C8450401 v_add_f32_e32 v15, 0, v17 ; 061E2280 image_sample v[17:20], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[60:67], s[48:51] ; F0800F00 018F110F s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v21, 1.0, v20 ; 082A28F2 v_mad_f32 v14, v14, v21, v19 ; D282000E 044E2B0E image_sample v[15:16], 5, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[52:59], s[44:47] ; F0800500 016D0F0F s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v22, 1.0, v15 ; 082C1EF2 v_interp_p1_f32 v24, v0, 3, 0, [m0] ; C8600300 v_interp_p2_f32 v24, [v24], v1, 3, 0, [m0] ; C8610301 v_interp_p1_f32 v23, v0, 2, 0, [m0] ; C85C0200 v_interp_p2_f32 v23, [v23], v1, 2, 0, [m0] ; C85D0201 image_sample v[23:25], 7, 0, 0, 0, 0, 0, 0, 0, v[23:24], s[36:43], s[32:35] ; F0800700 01091717 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v26, v25, v22 ; 10342D19 v_mad_f32 v28, v15, v14, v26 ; D282001C 046A1D0F v_mul_f32_e32 v14, v11, v4 ; 101C090B v_mad_f32 v14, v6, v8, v14 ; D282000E 043A1106 v_mad_f32 v14, v14, v21, v18 ; D282000E 044A2B0E v_mul_f32_e32 v30, v24, v22 ; 103C2D18 v_mad_f32 v27, v15, v14, v30 ; D282001B 047A1D0F v_mul_f32_e32 v4, v10, v4 ; 1008090A v_mad_f32 v4, v6, v7, v4 ; D2820004 04120F06 v_mad_f32 v4, v4, v21, v17 ; D2820004 04462B04 v_mul_f32_e32 v6, v23, v22 ; 100C2D17 v_mad_f32 v26, v15, v4, v6 ; D282001A 041A090F image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[26:29], s[24:31], s[20:23] ; F0800700 00A6061A v_mad_f32 v4, -v16, v15, v15 ; D2820004 243E1F10 v_sub_f32_e32 v9, 1.0, v4 ; 081208F2 v_mul_f32_e32 v10, v27, v9 ; 1014131B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v4, v7, v10 ; D282000A 042A0F04 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v11, s4, v10 ; 08161404 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s4, v3 ; 0A060604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v2, s4, v2 ; 0A040404 v_mul_f32_e32 v2, v2, v2 ; 10040502 v_mad_f32 v2, v3, v3, v2 ; D2820002 040A0703 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s4, v5 ; 0A060A04 v_mad_f32 v2, v3, v3, v2 ; D2820002 040A0703 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v2 ; 10040404 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v3 ; 100A0604 v_mul_f32_e32 v5, 0x3fb8aa65, v5 ; 100A0AFF 3FB8AA65 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_sub_f32_e32 v5, 1.0, v5 ; 080A0AF2 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_rcp_f32_e32 v3, v3 ; 7E065503 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_mul_f32_e32 v2, 0x3fb8aa65, v2 ; 100404FF 3FB8AA65 v_exp_f32_e32 v2, v2 ; 7E044B02 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v2, 1.0, v2 ; 080404F2 v_interp_p1_f32 v3, v0, 1, 3, [m0] ; C80C0D00 v_interp_p2_f32 v3, [v3], v1, 1, 3, [m0] ; C80D0D01 v_mad_f32 v0, 0.5, v3, 0.5 ; D2820000 03C206F0 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[96:99] ; F0800100 03030000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v2 ; 10000500 v_mad_f32 v1, v0, v11, v10 ; D2820001 042A1700 v_mul_f32_e32 v2, v26, v9 ; 1004131A v_mad_f32 v2, v4, v6, v2 ; D2820002 040A0D04 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v2, v0, v3, v2 ; D2820002 040A0700 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 v_mul_f32_e32 v2, v28, v9 ; 1004131C v_mad_f32 v2, v4, v8, v2 ; D2820002 040A1104 s_buffer_load_dword s0, s[0:3], 0x1e ; C200011E s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s0, v2 ; 08060400 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_cvt_pkrtz_f16_f32_e64 v0, v0, 1.0 ; D25E0000 0001E500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL CONST[0..10] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 1.0000, 0.0000, -1.0000, -0.5000} IMM[1] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], CONST[8].xxxx, CONST[9].xxxx 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: MAD TEMP[1], CONST[8].yyyy, IMM[0].xyyx, IMM[0].yxzy 3: MUL TEMP[2].xy, TEMP[1], IN[0] 4: MOV TEMP[1].xy, TEMP[2].xyxx 5: MAD TEMP[1].xy, TEMP[1], TEMP[1].zwzw, CONST[9].xxxx 6: MOV TEMP[1].xy, TEMP[1].xyxx 7: MAD TEMP[2].xy, IN[0], -CONST[8].zzzz, CONST[9].xxxx 8: MOV TEMP[2].xy, TEMP[2].xyxx 9: MAD TEMP[3], CONST[8].ywwy, IMM[0].yxxy, IMM[0].xyyz 10: MUL TEMP[4].xy, TEMP[3], IN[0] 11: MOV TEMP[3].xy, TEMP[4].xyxx 12: MAD TEMP[4].xy, TEMP[3], TEMP[3].zwzw, CONST[9].xxxx 13: MOV TEMP[4].xy, TEMP[4].xyxx 14: MUL TEMP[5].xy, CONST[5], IN[0].yyyy 15: MOV TEMP[3].xy, TEMP[5].xyxx 16: MAD TEMP[5].xy, IN[0].xxxx, CONST[4], TEMP[3] 17: MOV TEMP[3].xy, TEMP[5].xyxx 18: MAD TEMP[5].xy, CONST[10].xxxx, CONST[6], TEMP[3] 19: MOV TEMP[3].xy, TEMP[5].xyxx 20: ADD TEMP[5].xy, TEMP[3], CONST[7] 21: MOV TEMP[3].xy, TEMP[5].xyxx 22: MAD TEMP[5].xy, TEMP[3], IMM[1].xyzz, IMM[0].wwww 23: MOV TEMP[5].xy, TEMP[5].xyxx 24: MUL TEMP[3], CONST[1], IN[0].yyyy 25: MAD TEMP[3], IN[0].xxxx, CONST[0], TEMP[3] 26: MAD TEMP[3], CONST[10].xxxx, CONST[2], TEMP[3] 27: ADD TEMP[3], TEMP[3], CONST[3] 28: MOV TEMP[6].xyz, TEMP[3].xywx 29: MOV TEMP[7].xy, IN[0].xyxx 30: MOV TEMP[7].z, CONST[10].xxxx 31: MOV TEMP[5].zw, IN[0].wwzw 32: MOV TEMP[0].zw, IMM[0].xxyx 33: MOV TEMP[1].zw, IMM[0].xxyx 34: MOV TEMP[2].zw, IMM[0].xxyx 35: MOV TEMP[4].zw, IMM[0].xxyx 36: MOV TEMP[7].w, IMM[0].xxxx 37: MOV TEMP[6].w, IMM[0].xxxx 38: MOV OUT[6], TEMP[6] 39: MOV OUT[7], TEMP[5] 40: MOV OUT[1], TEMP[0] 41: MOV OUT[0], TEMP[3] 42: MOV OUT[2], TEMP[1] 43: MOV OUT[3], TEMP[2] 44: MOV OUT[4], TEMP[4] 45: MOV OUT[5], TEMP[7] 46: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %43 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %44 = load <16 x i8> addrspace(2)* %43, !tbaa !0 %45 = add i32 %5, %7 %46 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %44, i32 0, i32 %45) %47 = extractelement <4 x float> %46, i32 0 %48 = extractelement <4 x float> %46, i32 1 %49 = extractelement <4 x float> %46, i32 2 %50 = extractelement <4 x float> %46, i32 3 %51 = fmul float %47, %37 %52 = fadd float %51, %41 %53 = fmul float %48, %37 %54 = fadd float %53, %41 %55 = fmul float %38, 1.000000e+00 %56 = fadd float %55, 0.000000e+00 %57 = fmul float %38, 0.000000e+00 %58 = fadd float %57, 1.000000e+00 %59 = fmul float %38, 0.000000e+00 %60 = fadd float %59, -1.000000e+00 %61 = fmul float %38, 1.000000e+00 %62 = fadd float %61, 0.000000e+00 %63 = fmul float %56, %47 %64 = fmul float %58, %48 %65 = fmul float %63, %60 %66 = fadd float %65, %41 %67 = fmul float %64, %62 %68 = fadd float %67, %41 %69 = fsub float -0.000000e+00, %39 %70 = fmul float %47, %69 %71 = fadd float %70, %41 %72 = fsub float -0.000000e+00, %39 %73 = fmul float %48, %72 %74 = fadd float %73, %41 %75 = fmul float %38, 0.000000e+00 %76 = fadd float %75, 1.000000e+00 %77 = fmul float %40, 1.000000e+00 %78 = fadd float %77, 0.000000e+00 %79 = fmul float %40, 1.000000e+00 %80 = fadd float %79, 0.000000e+00 %81 = fmul float %38, 0.000000e+00 %82 = fadd float %81, -1.000000e+00 %83 = fmul float %76, %47 %84 = fmul float %78, %48 %85 = fmul float %83, %80 %86 = fadd float %85, %41 %87 = fmul float %84, %82 %88 = fadd float %87, %41 %89 = fmul float %31, %48 %90 = fmul float %32, %48 %91 = fmul float %47, %29 %92 = fadd float %91, %89 %93 = fmul float %47, %30 %94 = fadd float %93, %90 %95 = fmul float %42, %33 %96 = fadd float %95, %92 %97 = fmul float %42, %34 %98 = fadd float %97, %94 %99 = fadd float %96, %35 %100 = fadd float %98, %36 %101 = fmul float %99, 5.000000e-01 %102 = fadd float %101, -5.000000e-01 %103 = fmul float %100, -5.000000e-01 %104 = fadd float %103, -5.000000e-01 %105 = fmul float %17, %48 %106 = fmul float %18, %48 %107 = fmul float %19, %48 %108 = fmul float %20, %48 %109 = fmul float %47, %13 %110 = fadd float %109, %105 %111 = fmul float %47, %14 %112 = fadd float %111, %106 %113 = fmul float %47, %15 %114 = fadd float %113, %107 %115 = fmul float %47, %16 %116 = fadd float %115, %108 %117 = fmul float %42, %21 %118 = fadd float %117, %110 %119 = fmul float %42, %22 %120 = fadd float %119, %112 %121 = fmul float %42, %23 %122 = fadd float %121, %114 %123 = fmul float %42, %24 %124 = fadd float %123, %116 %125 = fadd float %118, %25 %126 = fadd float %120, %26 %127 = fadd float %122, %27 %128 = fadd float %124, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %54, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %66, float %68, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %71, float %74, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %86, float %88, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %47, float %48, float %42, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %125, float %126, float %128, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %102, float %104, float %49, float %50) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %125, float %126, float %127, float %128) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v4, s5, v1, v4 ; D2820004 04120205 v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v5, s5, v0, v5 ; D2820005 04160005 v_mov_b32_e32 v6, 1.0 ; 7E0C02F2 v_mov_b32_e32 v7, 0 ; 7E0E0280 exp 15, 32, 0, 0, 0, v5, v4, v7, v6 ; F800020F 06070405 s_buffer_load_dword s5, s[0:3], 0x21 ; C2028121 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mad_f32 v4, 0, s5, -1.0 ; D2820004 03CC0A80 v_add_f32_e64 v5, 0, s5 ; D2060005 00000A80 v_mul_f32_e32 v8, v0, v5 ; 10100B00 v_mad_f32 v8, v8, v4, s4 ; D2820008 00120908 v_mad_f32 v9, 0, s5, 1.0 ; D2820009 03C80A80 v_mul_f32_e32 v10, v1, v9 ; 10141301 v_mad_f32 v5, v10, v5, s4 ; D2820005 00120B0A exp 15, 33, 0, 0, 0, v8, v5, v7, v6 ; F800021F 06070508 s_buffer_load_dword s5, s[0:3], 0x22 ; C2028122 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v5, s4 ; 7E0A0204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, -v1, s5, v5 ; D2820005 24140B01 v_mov_b32_e32 v8, s4 ; 7E100204 v_mad_f32 v8, -v0, s5, v8 ; D2820008 24200B00 exp 15, 34, 0, 0, 0, v8, v5, v7, v6 ; F800022F 06070508 s_buffer_load_dword s5, s[0:3], 0x23 ; C2028123 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e64 v5, 0, s5 ; D2060005 00000A80 v_mul_f32_e32 v8, v1, v5 ; 10100B01 v_mad_f32 v4, v8, v4, s4 ; D2820004 00120908 v_mul_f32_e32 v8, v0, v9 ; 10101300 v_mad_f32 v5, v8, v5, s4 ; D2820005 00120B08 exp 15, 35, 0, 0, 0, v5, v4, v7, v6 ; F800023F 06070405 s_buffer_load_dword s4, s[0:3], 0x28 ; C2020128 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mov_b32_e32 v4, s4 ; 7E080204 exp 15, 36, 0, 0, 0, v0, v1, v4, v6 ; F800024F 06040100 s_buffer_load_dword s5, s[0:3], 0x7 ; C2028107 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s5, v1 ; 10080205 s_buffer_load_dword s5, s[0:3], 0x3 ; C2028103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s5, v4 ; D2820004 04100B00 s_buffer_load_dword s5, s[0:3], 0xb ; C202810B s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s5 ; 7E0A0205 v_mad_f32 v4, v5, s4, v4 ; D2820004 04100905 s_buffer_load_dword s5, s[0:3], 0xf ; C202810F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s5, v4 ; 06080805 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s5, v1 ; 100A0205 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s5, v5 ; D2820005 04140B00 s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v7, s5 ; 7E0E0205 v_mad_f32 v5, v7, s4, v5 ; D2820005 04140907 s_buffer_load_dword s5, s[0:3], 0xd ; C202810D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s5, v5 ; 060A0A05 s_buffer_load_dword s5, s[0:3], 0x4 ; C2028104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s5, v1 ; 100E0205 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s5, v7 ; D2820007 041C0B00 s_buffer_load_dword s5, s[0:3], 0x8 ; C2028108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 v_mad_f32 v7, v8, s4, v7 ; D2820007 041C0908 s_buffer_load_dword s5, s[0:3], 0xc ; C202810C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s5, v7 ; 060E0E05 exp 15, 37, 0, 0, 0, v7, v5, v4, v6 ; F800025F 06040507 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v6, s5, v1 ; 100C0205 s_buffer_load_dword s5, s[0:3], 0x10 ; C2028110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s5, v6 ; D2820006 04180B00 s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v8, s5 ; 7E100205 v_mad_f32 v6, v8, s4, v6 ; D2820006 04180908 s_buffer_load_dword s5, s[0:3], 0x1c ; C202811C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s5, v6 ; 060C0C05 v_mad_f32 v6, 0.5, v6, -0.5 ; D2820006 03C60CF0 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s5, v1 ; 10100205 s_buffer_load_dword s5, s[0:3], 0x11 ; C2028111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v0, s5, v8 ; D2820008 04200B00 s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v9, s5 ; 7E120205 v_mad_f32 v8, v9, s4, v8 ; D2820008 04200909 s_buffer_load_dword s5, s[0:3], 0x1d ; C202811D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v8, s5, v8 ; 06101005 v_mad_f32 v8, -0.5, v8, -0.5 ; D2820008 03C610F1 exp 15, 38, 0, 0, 0, v6, v8, v2, v3 ; F800026F 03020806 s_buffer_load_dword s5, s[0:3], 0x6 ; C2028106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v6, s5, v1 ; 100C0205 s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s5, v6 ; D2820000 04180B00 s_buffer_load_dword s5, s[0:3], 0xa ; C202810A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s5 ; 7E020205 v_mad_f32 v0, v1, s4, v0 ; D2820000 04000901 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v7, v5, v0, v4 ; F80008CF 04000507 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL CONST[0..23] DCL TEMP[0..24], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0000} IMM[1] FLT32 { 4.0000, 2.2000, 1.0000, 2.0000} IMM[2] FLT32 { -1.0000, 1.0000, 4.0000, -2.0000} IMM[3] FLT32 { 0.0000, 0.5000, -16.0000, -1.4427} IMM[4] FLT32 { 16.0000, -1.4427, 0.0050, -0.5000} IMM[5] FLT32 { 0.5000, -0.5000, 6.0000, 0.2500} IMM[6] FLT32 { 2.0000, -2.0000, 6.0000, 2.2000} IMM[7] FLT32 { -0.6700, 0.4545, -0.4000, 0.4000} IMM[8] FLT32 { 1.4427, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[8], 2D 3: MOV TEMP[1].xz, TEMP[0] 4: ABS TEMP[2].x, TEMP[0] 5: MOV TEMP[2], -TEMP[2].xxxx 6: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 7: UIF TEMP[3].xxxx :0 8: MOV TEMP[3].x, IMM[0].yyyy 9: ELSE :0 10: MOV TEMP[3].x, IMM[0].wwww 11: ENDIF 12: MOV TEMP[3].x, TEMP[3].xxxx 13: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 14: UIF TEMP[4].xxxx :0 15: MOV TEMP[4].x, IMM[0].yyyy 16: ELSE :0 17: MOV TEMP[4].x, IMM[0].wwww 18: ENDIF 19: MOV TEMP[3].y, TEMP[4].xxxx 20: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 21: UIF TEMP[4].xxxx :0 22: MOV TEMP[4].x, IMM[0].yyyy 23: ELSE :0 24: MOV TEMP[4].x, IMM[0].wwww 25: ENDIF 26: MOV TEMP[3].z, TEMP[4].xxxx 27: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 28: UIF TEMP[2].xxxx :0 29: ELSE :0 30: ENDIF 31: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 32: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 33: OR TEMP[3].x, TEMP[4].xxxx, TEMP[3].yyyy 34: AND TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx 35: KILL_IF -TEMP[3].xxxx 36: MUL TEMP[3].yw, CONST[18].xxzy, IN[4].yyyy 37: MOV TEMP[1].yw, TEMP[3].wyww 38: MAD TEMP[3].yw, IN[4].xxxx, CONST[17].xxzy, TEMP[1] 39: MOV TEMP[1].yw, TEMP[3].wyww 40: MAD TEMP[3].yw, IN[4].zzzz, CONST[19].xxzy, TEMP[1] 41: MOV TEMP[1].yw, TEMP[3].wyww 42: ADD TEMP[3].yw, TEMP[1], CONST[20].xxzy 43: MOV TEMP[1].yw, TEMP[3].wyww 44: MOV TEMP[3].xy, IN[6].zwww 45: TEX TEMP[3].yzw, TEMP[3], SAMP[5], 2D 46: MOV TEMP[2].w, TEMP[3].zyzw 47: MUL TEMP[3], TEMP[3].wwww, IMM[1].xxxx 48: MOV_SAT TEMP[3], TEMP[3] 49: MOV TEMP[2].x, TEMP[3].xxxx 50: ADD TEMP[4].x, -TEMP[3].xxxx, IMM[0].xxxx 51: MOV TEMP[4].x, TEMP[4].xxxx 52: MOV TEMP[4].y, IMM[0].zzzz 53: MOV TEMP[5].xy, TEMP[4].xyyy 54: MOV TEMP[5].w, IMM[0].zzzz 55: TXL TEMP[5], TEMP[5], SAMP[4], 2D 56: POW TEMP[6].x, TEMP[5].xxxx, IMM[1].yyyy 57: POW TEMP[6].y, TEMP[5].yyyy, IMM[1].yyyy 58: POW TEMP[6].z, TEMP[5].zzzz, IMM[1].yyyy 59: POW TEMP[6].w, TEMP[5].wwww, IMM[1].zzzz 60: MOV TEMP[4].w, TEMP[6] 61: MAD TEMP[5].xy, TEMP[1].ywzw, IMM[0].xyxx, IMM[0].zxzz 62: MOV TEMP[5].xy, TEMP[5].xyyy 63: TEX TEMP[5].xyz, TEMP[5], SAMP[3], 2D 64: MAD TEMP[7].yw, TEMP[5].xxzy, IMM[1].wwww, IMM[0].yyyy 65: DP2 TEMP[5].x, IMM[0].xyyy, TEMP[7].ywww 66: DP2 TEMP[7].x, IMM[0].yyyy, TEMP[7].ywww 67: MOV TEMP[5].y, TEMP[7].xxxx 68: MOV TEMP[5].zw, -TEMP[5].yyxy 69: ADD TEMP[5], TEMP[5], IMM[1].wwww 70: MUL TEMP[5], TEMP[5], TEMP[5] 71: MUL TEMP[5], TEMP[5], TEMP[5] 72: DP4 TEMP[7].x, IMM[0].xxxx, TEMP[5] 73: RCP TEMP[7].x, TEMP[7].xxxx 74: MUL TEMP[5], TEMP[7].xxxx, TEMP[5] 75: RCP TEMP[7].x, CONST[0].xxxx 76: MUL TEMP[5], TEMP[7].xxxx, TEMP[5] 77: MUL TEMP[7], TEMP[5], TEMP[5] 78: MOV TEMP[8].xy, IN[0].xyyy 79: TEX TEMP[8], TEMP[8], SAMP[0], 2D 80: MOV TEMP[9].xy, IN[0].xyyy 81: TEX TEMP[9].xw, TEMP[9], SAMP[1], 2D 82: MOV TEMP[10].w, TEMP[9].wwww 83: MAD TEMP[11].yw, TEMP[8].xxzy, IMM[1].wwww, IMM[0].yyyy 84: MOV TEMP[1].yw, TEMP[11].wyww 85: MUL TEMP[11].yz, TEMP[5].xxxx, TEMP[1].xyww 86: MOV TEMP[2].yz, TEMP[11].zyzz 87: MAD TEMP[9].z, TEMP[9].xxxx, IMM[1].wwww, IMM[0].yyyy 88: MOV TEMP[10].z, TEMP[9].zzzz 89: MOV TEMP[10].xy, TEMP[8].zwzz 90: MOV TEMP[8].xy, IN[1].xyyy 91: TEX TEMP[8], TEMP[8], SAMP[0], 2D 92: MOV TEMP[9].zw, TEMP[8].wwzw 93: MOV TEMP[12].xy, IN[1].xyyy 94: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 95: MOV TEMP[13].w, TEMP[12].wwww 96: ADD TEMP[14].xy, TEMP[8], TEMP[8] 97: MOV TEMP[9].xy, TEMP[14].xyxx 98: MAD TEMP[14].xy, TEMP[9], IMM[2].xyzy, IMM[2].yxzy 99: MOV TEMP[9].xy, TEMP[14].xyxx 100: MUL TEMP[14].xy, TEMP[5].yyyy, TEMP[9] 101: MOV TEMP[9].xy, TEMP[14].xyxx 102: MAD TEMP[15].yw, TEMP[1], TEMP[5].xxxx, TEMP[9].xxzy 103: MOV TEMP[1].yw, TEMP[15].wyww 104: MAD TEMP[12].z, TEMP[12].xxxx, IMM[2].wwww, IMM[0].xxxx 105: MOV TEMP[13].z, TEMP[12].zzzz 106: MOV TEMP[13].xy, TEMP[8].zwzz 107: MUL TEMP[8].xyz, TEMP[7].yyyy, TEMP[13] 108: MOV TEMP[13].xyz, TEMP[8].xyzx 109: MAD TEMP[8].xyz, TEMP[10], TEMP[7].xxxx, TEMP[13] 110: MOV TEMP[10].xyz, TEMP[8].xyzx 111: ADD TEMP[8].xy, TEMP[2].yzzw, TEMP[2].yzzw 112: MOV TEMP[5].xy, TEMP[8].xyxx 113: MUL TEMP[8].xy, TEMP[9], TEMP[5] 114: MOV TEMP[13].xy, TEMP[8].xyxx 115: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[14].yxxx 116: MOV TEMP[13].z, TEMP[8].xxxx 117: ADD TEMP[8].xyz, TEMP[10], TEMP[13] 118: MOV TEMP[10].xyz, TEMP[8].xyzx 119: MOV TEMP[8].xy, IN[2].xyyy 120: TEX TEMP[8], TEMP[8], SAMP[0], 2D 121: MOV TEMP[13].w, TEMP[8].wwww 122: MOV TEMP[12].xy, IN[2].xyyy 123: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 124: MOV TEMP[15].w, TEMP[12].wwww 125: MAD TEMP[16].xy, TEMP[8], IMM[2].wwww, IMM[0].xxxx 126: MOV TEMP[7].xy, TEMP[16].xyxx 127: MUL TEMP[16].zw, TEMP[5].zzzz, TEMP[16].xyxy 128: MOV TEMP[9].zw, TEMP[16].wwzw 129: MAD TEMP[17].yw, TEMP[7].xxzy, TEMP[5].zzzz, TEMP[1] 130: MOV TEMP[1].yw, TEMP[17].wyww 131: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].wwww, IMM[0].yyyy 132: MOV TEMP[15].z, TEMP[12].zzzz 133: MOV TEMP[15].xy, TEMP[8].zwzz 134: MAD TEMP[8].xyz, TEMP[15], TEMP[7].zzzz, TEMP[10] 135: MOV TEMP[7].xyz, TEMP[8].xyzx 136: MUL TEMP[8].xy, TEMP[5], TEMP[16].zwzw 137: MOV TEMP[10].xy, TEMP[8].xyxx 138: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[16].wzzz 139: MOV TEMP[10].z, TEMP[8].xxxx 140: ADD TEMP[8].xyz, TEMP[7], TEMP[10] 141: MOV TEMP[7].xyz, TEMP[8].xyzx 142: ADD TEMP[8].xy, TEMP[9], TEMP[9] 143: MOV TEMP[10].xy, TEMP[8].xyxx 144: MUL TEMP[8].xy, TEMP[16].zwzw, TEMP[10] 145: MOV TEMP[13].xy, TEMP[8].xyxx 146: DP2 TEMP[8].x, TEMP[14].xyyy, TEMP[16].wzzz 147: MOV TEMP[13].z, TEMP[8].xxxx 148: ADD TEMP[8].xyz, TEMP[7], TEMP[13] 149: MOV TEMP[7].xyz, TEMP[8].xyzx 150: MOV TEMP[8].xy, IN[3].xyyy 151: TEX TEMP[8], TEMP[8], SAMP[0], 2D 152: MOV TEMP[13].zw, TEMP[8].wwzw 153: MOV TEMP[12].xy, IN[3].xyyy 154: TEX TEMP[12].xw, TEMP[12], SAMP[1], 2D 155: MOV TEMP[15].w, TEMP[12].wwww 156: ADD TEMP[17].zw, TEMP[8].xyyx, TEMP[8].xyyx 157: MOV TEMP[10].zw, TEMP[17].wwzw 158: MAD TEMP[17].zw, TEMP[10], IMM[0].ywyx, IMM[0].ywxy 159: MOV TEMP[10].zw, TEMP[17].wwzw 160: MUL TEMP[18].xy, TEMP[5].wwww, TEMP[17].zwzw 161: MOV TEMP[13].xy, TEMP[18].xyxx 162: MAD TEMP[17].xy, TEMP[17].wzzw, TEMP[5].wwww, TEMP[1].ywzw 163: MOV TEMP[19].xy, TEMP[17].xyxx 164: MAD TEMP[12].z, TEMP[12].xxxx, IMM[2].wwww, IMM[0].xxxx 165: MOV TEMP[15].z, TEMP[12].zzzz 166: MOV TEMP[15].xy, TEMP[8].zwzz 167: MAD TEMP[7].xyz, TEMP[15], TEMP[7].wwww, TEMP[7] 168: MUL TEMP[8].xy, TEMP[5], TEMP[13].yxzw 169: MOV TEMP[5].xy, TEMP[8].xyxx 170: DP2 TEMP[8].x, TEMP[11].yzzz, TEMP[18].xyyy 171: MOV TEMP[5].z, TEMP[8].xxxx 172: ADD TEMP[7].yzw, TEMP[5].xxyz, TEMP[7].xxyz 173: MOV TEMP[2].yzw, TEMP[7].zyzw 174: MUL TEMP[7].xy, TEMP[10], TEMP[13].yxzw 175: MOV TEMP[5].xy, TEMP[7].xyxx 176: DP2 TEMP[7].x, TEMP[14].xyyy, TEMP[18].xyyy 177: MOV TEMP[5].z, TEMP[7].xxxx 178: ADD TEMP[7].yzw, TEMP[2], TEMP[5].xxyz 179: MOV TEMP[2].yzw, TEMP[7].zyzw 180: MUL TEMP[7].yw, TEMP[9].xzzw, TEMP[13].xyzx 181: MOV TEMP[1].yw, TEMP[7].wyww 182: ADD TEMP[7].xy, TEMP[1].ywzw, TEMP[1].ywzw 183: MOV TEMP[5].xy, TEMP[7].xyxx 184: DP2 TEMP[7].x, TEMP[16].zwww, TEMP[18].xyyy 185: MOV TEMP[5].z, TEMP[7].xxxx 186: ADD TEMP[7].yzw, TEMP[2], TEMP[5].xxyz 187: MOV TEMP[2].yzw, TEMP[7].zyzw 188: MAD TEMP[7].yzw, TEMP[17].xxyx, -TEMP[17].xxyy, TEMP[2] 189: MOV TEMP[2].yz, TEMP[7].zyzw 190: MOV TEMP[19].z, IMM[0].xxxx 191: DP3 TEMP[8].x, CONST[1].xyzz, TEMP[19].xyzz 192: MOV_SAT TEMP[8].x, TEMP[8].xxxx 193: ADD TEMP[11].xyz, CONST[8], -IN[4] 194: MOV TEMP[5].xyz, TEMP[11].xyzx 195: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[11].xyzz 196: MAX TEMP[11].x, TEMP[11].xxxx, IMM[3].xxxx 197: RSQ TEMP[11].x, TEMP[11].xxxx 198: MUL TEMP[12].xyz, TEMP[11].xxxx, TEMP[5] 199: MAD TEMP[14].xyz, TEMP[5], TEMP[11].xxxx, -CONST[5] 200: MOV TEMP[9].xyz, TEMP[14].xyzx 201: MAD TEMP[14].xyz, CONST[5].wwww, TEMP[9], CONST[5] 202: MOV TEMP[9].xyz, TEMP[14].xyzx 203: DP3 TEMP[14].x, TEMP[14].xyzz, TEMP[14].xyzz 204: MAX TEMP[14].x, TEMP[14].xxxx, IMM[3].xxxx 205: RSQ TEMP[14].x, TEMP[14].xxxx 206: MAD TEMP[14].xyz, TEMP[9], TEMP[14].xxxx, CONST[1] 207: MOV TEMP[9].xyz, TEMP[14].xyzx 208: RCP TEMP[14].x, TEMP[14].zzzz 209: MAD TEMP[14].xy, TEMP[9], TEMP[14].xxxx, -TEMP[19] 210: RCP TEMP[15].x, CONST[3].wwww 211: ADD TEMP[16].zw, TEMP[2].xyyz, TEMP[15].xxxx 212: MUL TEMP[17].w, TEMP[7].wwww, TEMP[7].wwww 213: MAD TEMP[18].x, TEMP[16].zzzz, TEMP[16].wwww, -TEMP[17].wwww 214: MUL TEMP[20].y, TEMP[14].xxxx, TEMP[14].xxxx 215: ADD TEMP[7].w, TEMP[7].wwww, TEMP[7].wwww 216: MOV TEMP[2].w, TEMP[7].wwww 217: MUL TEMP[21].x, TEMP[14].xxxx, TEMP[7].wwww 218: MAD TEMP[22].z, TEMP[16].zzzz, TEMP[14].yyyy, -TEMP[21].xxxx 219: MUL TEMP[22].z, TEMP[14].yyyy, TEMP[22].zzzz 220: MAD TEMP[16].z, TEMP[20].yyyy, TEMP[16].wwww, TEMP[22].zzzz 221: MUL TEMP[16].z, TEMP[16].zzzz, IMM[3].yyyy 222: RCP TEMP[22].x, TEMP[18].xxxx 223: MUL TEMP[23].z, TEMP[22].xxxx, TEMP[16].zzzz 224: MOV TEMP[24].x, -TEMP[18].xxxx 225: FSGE TEMP[24].x, TEMP[24].xxxx, IMM[0].zzzz 226: UIF TEMP[24].xxxx :0 227: MOV TEMP[24].x, IMM[0].xxxx 228: ELSE :0 229: MOV TEMP[24].x, IMM[0].zzzz 230: ENDIF 231: MOV TEMP[10].w, TEMP[24].xxxx 232: MAD TEMP[16].z, TEMP[16].zzzz, TEMP[22].xxxx, IMM[3].zzzz 233: FSGE TEMP[16].x, TEMP[16].zzzz, IMM[0].zzzz 234: UIF TEMP[16].xxxx :0 235: MOV TEMP[16].x, IMM[0].xxxx 236: ELSE :0 237: MOV TEMP[16].x, IMM[0].zzzz 238: ENDIF 239: MOV TEMP[9].z, TEMP[16].xxxx 240: MUL TEMP[16].w, TEMP[23].zzzz, IMM[3].wwww 241: EX2 TEMP[16].x, TEMP[16].wwww 242: MAX TEMP[18].x, TEMP[18].xxxx, IMM[3].xxxx 243: RSQ TEMP[18].x, TEMP[18].xxxx 244: MUL TEMP[16].w, TEMP[16].xxxx, TEMP[18].xxxx 245: MOV TEMP[9].w, TEMP[16].wwww 246: MAD TEMP[15].xz, TEMP[15].xxxx, IMM[4].xxxx, TEMP[2].yyzw 247: MAD TEMP[18].w, TEMP[15].xxxx, TEMP[15].zzzz, -TEMP[17].wwww 248: MAD TEMP[21].x, TEMP[15].xxxx, TEMP[14].yyyy, -TEMP[21].xxxx 249: MUL TEMP[14].x, TEMP[14].yyyy, TEMP[21].xxxx 250: MAD TEMP[14].x, TEMP[20].yyyy, TEMP[15].zzzz, TEMP[14].xxxx 251: MUL TEMP[14].x, TEMP[14].xxxx, IMM[3].yyyy 252: RCP TEMP[15].x, TEMP[18].wwww 253: MOV TEMP[9].y, TEMP[15].xxxx 254: MUL TEMP[20].x, TEMP[15].xxxx, TEMP[14].xxxx 255: MOV TEMP[21].x, -TEMP[18].wwww 256: FSGE TEMP[21].x, TEMP[21].xxxx, IMM[0].zzzz 257: UIF TEMP[21].xxxx :0 258: MOV TEMP[21].x, IMM[0].xxxx 259: ELSE :0 260: MOV TEMP[21].x, IMM[0].zzzz 261: ENDIF 262: MOV TEMP[10].y, TEMP[21].xxxx 263: MAD TEMP[14].x, TEMP[14].xxxx, TEMP[15].xxxx, IMM[3].zzzz 264: FSGE TEMP[14].x, TEMP[14].xxxx, IMM[0].zzzz 265: UIF TEMP[14].xxxx :0 266: MOV TEMP[14].x, IMM[0].xxxx 267: ELSE :0 268: MOV TEMP[14].x, IMM[0].zzzz 269: ENDIF 270: MOV TEMP[9].x, TEMP[14].xxxx 271: ADD TEMP[9].xz, TEMP[9], TEMP[10].yyww 272: MUL TEMP[14].y, TEMP[20].xxxx, IMM[3].wwww 273: EX2 TEMP[14].x, TEMP[14].yyyy 274: MAX TEMP[15].x, TEMP[18].wwww, IMM[3].xxxx 275: RSQ TEMP[15].x, TEMP[15].xxxx 276: MUL TEMP[14].w, TEMP[15].xxxx, TEMP[14].xxxx 277: MOV TEMP[5].w, TEMP[14].wwww 278: MAD TEMP[11].xyz, TEMP[5], TEMP[11].xxxx, -CONST[6] 279: MOV TEMP[5].xyz, TEMP[11].xyzx 280: MAD TEMP[11].xyz, CONST[6].wwww, TEMP[5], CONST[6] 281: MOV TEMP[5].xyz, TEMP[11].xyzx 282: DP3 TEMP[11].x, TEMP[11].xyzz, TEMP[11].xyzz 283: MAX TEMP[11].x, TEMP[11].xxxx, IMM[3].xxxx 284: RSQ TEMP[11].x, TEMP[11].xxxx 285: MAD TEMP[11].xyz, TEMP[5], TEMP[11].xxxx, CONST[2] 286: MOV TEMP[5].xyz, TEMP[11].xyzx 287: RCP TEMP[11].x, TEMP[11].zzzz 288: MAD TEMP[11].xy, TEMP[5], TEMP[11].xxxx, -TEMP[19] 289: RCP TEMP[14].x, CONST[4].wwww 290: ADD TEMP[14].xy, TEMP[14].xxxx, TEMP[2].yzzw 291: MAD TEMP[15].w, TEMP[14].xxxx, TEMP[14].yyyy, -TEMP[17].wwww 292: MUL TEMP[17].z, TEMP[11].xxxx, TEMP[11].xxxx 293: MUL TEMP[7].w, TEMP[7].wwww, TEMP[11].xxxx 294: MAD TEMP[7].w, TEMP[14].xxxx, TEMP[11].yyyy, -TEMP[7].wwww 295: MUL TEMP[7].w, TEMP[11].yyyy, TEMP[7].wwww 296: MAD TEMP[7].w, TEMP[17].zzzz, TEMP[14].yyyy, TEMP[7].wwww 297: MUL TEMP[7].w, TEMP[7].wwww, IMM[3].yyyy 298: RCP TEMP[5].x, TEMP[15].wwww 299: MUL TEMP[11].y, TEMP[7].wwww, TEMP[5].xxxx 300: MOV TEMP[5].y, TEMP[11].yyyy 301: MOV TEMP[14].x, -TEMP[15].wwww 302: FSGE TEMP[14].x, TEMP[14].xxxx, IMM[0].zzzz 303: UIF TEMP[14].xxxx :0 304: MOV TEMP[14].x, IMM[0].xxxx 305: ELSE :0 306: MOV TEMP[14].x, IMM[0].zzzz 307: ENDIF 308: MOV TEMP[5].z, TEMP[14].xxxx 309: MAD TEMP[7].w, TEMP[7].wwww, TEMP[5].xxxx, IMM[3].zzzz 310: FSGE TEMP[7].x, TEMP[7].wwww, IMM[0].zzzz 311: UIF TEMP[7].xxxx :0 312: MOV TEMP[7].x, IMM[0].xxxx 313: ELSE :0 314: MOV TEMP[7].x, IMM[0].zzzz 315: ENDIF 316: ADD TEMP[7].w, TEMP[7].xxxx, TEMP[14].xxxx 317: MOV TEMP[2].w, TEMP[7].wwww 318: MUL TEMP[14].xw, TEMP[5].yyzw, IMM[4].yyxz 319: MAX TEMP[17].x, TEMP[15].wwww, IMM[3].xxxx 320: RSQ TEMP[17].x, TEMP[17].xxxx 321: EX2 TEMP[18].x, TEMP[14].xxxx 322: MUL TEMP[17].x, TEMP[17].xxxx, TEMP[18].xxxx 323: MUL TEMP[18].xyz, CONST[10].xyww, IN[4].yyyy 324: MOV TEMP[10].xyz, TEMP[18].xyzx 325: MAD TEMP[18].xyz, IN[4].xxxx, CONST[9].xyww, TEMP[10] 326: MOV TEMP[10].xyz, TEMP[18].xyzx 327: MAD TEMP[18].xyz, IN[4].zzzz, CONST[11].xyww, TEMP[10] 328: MOV TEMP[10].xyz, TEMP[18].xyzx 329: ADD TEMP[18].xyz, TEMP[10], CONST[12].xyww 330: MOV TEMP[10].xyz, TEMP[18].xyzx 331: RCP TEMP[18].x, TEMP[18].zzzz 332: MUL TEMP[18].xy, TEMP[18].xxxx, TEMP[10] 333: MOV TEMP[10].xy, TEMP[18].xyxx 334: MAD TEMP[18].xy, TEMP[10], IMM[5], IMM[4].wwww 335: MOV TEMP[10].xy, TEMP[18].xyxx 336: MAD TEMP[18].yz, TEMP[2], IMM[5].zzzz, TEMP[10].xxyw 337: MOV TEMP[2].yz, TEMP[18].zyzz 338: MAD TEMP[18].xy, TEMP[15].wwww, IMM[1].wwww, TEMP[2].yzzw 339: MAD TEMP[20].xy, TEMP[15].wwww, IMM[6], TEMP[2].yzzw 340: MOV TEMP[20].xy, TEMP[20].xyyy 341: TEX TEMP[20], TEMP[20], SAMP[2], 2D 342: MUL TEMP[13], TEMP[20], IMM[5].wwww 343: MOV TEMP[18].xy, TEMP[18].xyyy 344: TEX TEMP[18], TEMP[18], SAMP[2], 2D 345: MAD TEMP[10], TEMP[18], IMM[5].wwww, TEMP[13] 346: MAD TEMP[18].xy, TEMP[15].wwww, IMM[6].yzzw, TEMP[2].yzzw 347: MOV TEMP[18].xy, TEMP[18].xyyy 348: TEX TEMP[18], TEMP[18], SAMP[2], 2D 349: MAD TEMP[10], TEMP[18], IMM[5].wwww, TEMP[10] 350: MAD TEMP[15].yz, TEMP[15].wwww, IMM[2].wwww, TEMP[2] 351: MOV TEMP[15].xy, TEMP[15].yzzz 352: TEX TEMP[15], TEMP[15], SAMP[2], 2D 353: MOV TEMP[13].w, TEMP[15].wwww 354: MAD TEMP[10], TEMP[15], IMM[5].wwww, TEMP[10] 355: ABS TEMP[15].x, TEMP[10].xxxx 356: LG2 TEMP[13].x, TEMP[15].xxxx 357: ABS TEMP[15].x, TEMP[10].yyyy 358: LG2 TEMP[15].x, TEMP[15].xxxx 359: MOV TEMP[13].y, TEMP[15].xxxx 360: ABS TEMP[15].x, TEMP[10].zzzz 361: LG2 TEMP[15].x, TEMP[15].xxxx 362: MOV TEMP[13].z, TEMP[15].xxxx 363: MUL TEMP[15].xyz, TEMP[13], IMM[1].yyyy 364: EX2 TEMP[13].x, TEMP[15].xxxx 365: EX2 TEMP[18].x, TEMP[15].yyyy 366: MOV TEMP[13].y, TEMP[18].xxxx 367: EX2 TEMP[15].x, TEMP[15].zzzz 368: MOV TEMP[13].z, TEMP[15].xxxx 369: DP3 TEMP[12].x, TEMP[19].xyzz, TEMP[12].xyzz 370: ADD TEMP[12].w, -TEMP[12].xxxx, IMM[0].xxxx 371: MUL TEMP[15].y, TEMP[12].wwww, TEMP[12].wwww 372: MUL TEMP[15].y, TEMP[15].yyyy, TEMP[15].yyyy 373: MUL TEMP[12].w, TEMP[12].wwww, TEMP[15].yyyy 374: MUL TEMP[15].xyz, TEMP[16].wwww, CONST[3] 375: MOV TEMP[16], -TEMP[9].zzzz 376: FSGE TEMP[18].x, TEMP[16].xxxx, IMM[0].zzzz 377: UIF TEMP[18].xxxx :0 378: MOV TEMP[18].x, TEMP[15].xxxx 379: ELSE :0 380: MOV TEMP[18].x, IMM[0].zzzz 381: ENDIF 382: MOV TEMP[18].x, TEMP[18].xxxx 383: FSGE TEMP[19].x, TEMP[16].yyyy, IMM[0].zzzz 384: UIF TEMP[19].xxxx :0 385: MOV TEMP[19].x, TEMP[15].yyyy 386: ELSE :0 387: MOV TEMP[19].x, IMM[0].zzzz 388: ENDIF 389: MOV TEMP[18].y, TEMP[19].xxxx 390: FSGE TEMP[19].x, TEMP[16].zzzz, IMM[0].zzzz 391: UIF TEMP[19].xxxx :0 392: MOV TEMP[15].x, TEMP[15].zzzz 393: ELSE :0 394: MOV TEMP[15].x, IMM[0].zzzz 395: ENDIF 396: MOV TEMP[18].z, TEMP[15].xxxx 397: FSGE TEMP[15].x, TEMP[16].wwww, IMM[0].zzzz 398: UIF TEMP[15].xxxx :0 399: ELSE :0 400: ENDIF 401: MUL TEMP[15].yzw, TEMP[17].xxxx, CONST[4].xxyz 402: MOV TEMP[7], -TEMP[7].wwww 403: FSGE TEMP[16].x, TEMP[7].xxxx, IMM[0].zzzz 404: UIF TEMP[16].xxxx :0 405: ELSE :0 406: ENDIF 407: FSGE TEMP[17].x, TEMP[7].yyyy, IMM[0].zzzz 408: UIF TEMP[17].xxxx :0 409: MOV TEMP[17].x, TEMP[15].yyyy 410: ELSE :0 411: MOV TEMP[17].x, IMM[0].zzzz 412: ENDIF 413: MOV TEMP[16].y, TEMP[17].xxxx 414: FSGE TEMP[17].x, TEMP[7].zzzz, IMM[0].zzzz 415: UIF TEMP[17].xxxx :0 416: MOV TEMP[17].x, TEMP[15].zzzz 417: ELSE :0 418: MOV TEMP[17].x, IMM[0].zzzz 419: ENDIF 420: MOV TEMP[16].z, TEMP[17].xxxx 421: FSGE TEMP[7].x, TEMP[7].wwww, IMM[0].zzzz 422: UIF TEMP[7].xxxx :0 423: MOV TEMP[7].x, TEMP[15].wwww 424: ELSE :0 425: MOV TEMP[7].x, IMM[0].zzzz 426: ENDIF 427: MOV TEMP[16].w, TEMP[7].xxxx 428: MOV TEMP[2].yzw, TEMP[16].zyzw 429: ADD TEMP[7].yzw, TEMP[2], TEMP[18].xxyz 430: MOV TEMP[2].yzw, TEMP[7].zyzw 431: MAD TEMP[7].yzw, TEMP[2], TEMP[12].wwww, TEMP[6].xxyz 432: MOV TEMP[2].yzw, TEMP[7].zyzw 433: MUL TEMP[7].xyz, TEMP[6], CONST[3] 434: MUL TEMP[7].xzw, TEMP[14].wwww, TEMP[7].xyyz 435: MOV TEMP[9], -TEMP[9].xxxx 436: FSGE TEMP[12].x, TEMP[9].xxxx, IMM[0].zzzz 437: UIF TEMP[12].xxxx :0 438: MOV TEMP[12].x, TEMP[7].xxxx 439: ELSE :0 440: MOV TEMP[12].x, IMM[0].zzzz 441: ENDIF 442: MOV TEMP[12].x, TEMP[12].xxxx 443: FSGE TEMP[14].x, TEMP[9].yyyy, IMM[0].zzzz 444: UIF TEMP[14].xxxx :0 445: ELSE :0 446: ENDIF 447: FSGE TEMP[14].x, TEMP[9].zzzz, IMM[0].zzzz 448: UIF TEMP[14].xxxx :0 449: MOV TEMP[14].x, TEMP[7].zzzz 450: ELSE :0 451: MOV TEMP[14].x, IMM[0].zzzz 452: ENDIF 453: MOV TEMP[12].z, TEMP[14].xxxx 454: FSGE TEMP[9].x, TEMP[9].wwww, IMM[0].zzzz 455: UIF TEMP[9].xxxx :0 456: MOV TEMP[7].x, TEMP[7].wwww 457: ELSE :0 458: MOV TEMP[7].x, IMM[0].zzzz 459: ENDIF 460: MOV TEMP[12].w, TEMP[7].xxxx 461: MAD TEMP[7].yzw, TEMP[2], TEMP[8].xxxx, TEMP[12].xxzw 462: MOV TEMP[2].yzw, TEMP[7].zyzw 463: MAD TEMP[7].yzw, TEMP[13].xxyz, CONST[23].xxxx, TEMP[2] 464: MOV TEMP[2].w, TEMP[7].zyzw 465: MUL TEMP[8].y, TEMP[11].yyyy, TEMP[11].yyyy 466: MUL TEMP[8].y, TEMP[11].yyyy, TEMP[8].yyyy 467: MUL TEMP[3].y, TEMP[3].xxxx, TEMP[8].yyyy 468: MAD TEMP[3].xyz, TEMP[3].yyyy, TEMP[6], TEMP[7].yzww 469: MOV TEMP[2].xyz, TEMP[3].xyzx 470: MUL TEMP[3].xyz, CONST[14].xyww, IN[4].yyyy 471: MOV TEMP[4].xyz, TEMP[3].xyzx 472: MAD TEMP[3].xyz, IN[4].xxxx, CONST[13].xyww, TEMP[4] 473: MOV TEMP[4].xyz, TEMP[3].xyzx 474: MAD TEMP[3].xyz, IN[4].zzzz, CONST[15].xyww, TEMP[4] 475: MOV TEMP[4].xyz, TEMP[3].xyzx 476: ADD TEMP[3].xyz, TEMP[4], CONST[16].xyww 477: RCP TEMP[7].x, TEMP[3].zzzz 478: MUL TEMP[3].yw, TEMP[7].xxxx, TEMP[3].xxzy 479: MOV TEMP[1].yw, TEMP[3].wyww 480: MAD TEMP[3].yw, TEMP[1], IMM[5].xxzy, IMM[4].wwww 481: MOV TEMP[1].yw, TEMP[3].wyww 482: MAD TEMP[3].xy, TEMP[1].ywzw, IMM[0].xyxx, IMM[0].zxzz 483: MOV TEMP[3].xy, TEMP[3].xyyy 484: TEX TEMP[3].xw, TEMP[3], SAMP[10], 2D 485: MOV TEMP[5].w, TEMP[3].wwww 486: MOV_SAT TEMP[3].x, TEMP[3].xxxx 487: MUL TEMP[3].y, TEMP[3].xxxx, CONST[21].xxxx 488: MUL TEMP[3].xyz, TEMP[3].yyyy, TEMP[2] 489: MOV TEMP[4].xyz, TEMP[3].xyzx 490: MAD TEMP[3].xyz, TEMP[4], IMM[7].xxxx, TEMP[2] 491: MAD TEMP[6], TEMP[10].wwww, TEMP[6].wwww, TEMP[6].wwww 492: MOV_SAT TEMP[6], TEMP[6] 493: MOV TEMP[6].w, TEMP[6].wwww 494: ABS TEMP[7].x, TEMP[3].xxxx 495: LG2 TEMP[4].x, TEMP[7].xxxx 496: ABS TEMP[7].x, TEMP[3].yyyy 497: LG2 TEMP[7].x, TEMP[7].xxxx 498: MOV TEMP[4].y, TEMP[7].xxxx 499: ABS TEMP[3].x, TEMP[3].zzzz 500: LG2 TEMP[3].x, TEMP[3].xxxx 501: MOV TEMP[4].z, TEMP[3].xxxx 502: MUL TEMP[3].xyz, TEMP[4], IMM[7].yyyy 503: EX2 TEMP[4].x, TEMP[3].xxxx 504: EX2 TEMP[7].x, TEMP[3].yyyy 505: MOV TEMP[4].y, TEMP[7].xxxx 506: EX2 TEMP[3].x, TEMP[3].zzzz 507: MOV TEMP[4].z, TEMP[3].xxxx 508: MAD TEMP[3].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 509: MOV TEMP[3].xy, TEMP[3].xyyy 510: TEX TEMP[3], TEMP[3], SAMP[7], 2D 511: MOV TEMP[2].w, TEMP[3].wwww 512: ADD TEMP[7].y, -TEMP[3].wwww, IMM[0].xxxx 513: MAD TEMP[3].xyz, TEMP[4], TEMP[7].yyyy, TEMP[3] 514: MOV TEMP[2].xyz, TEMP[3].xyzx 515: ADD TEMP[3].xyz, TEMP[2], IMM[7].zzzz 516: MOV TEMP[2].xyz, TEMP[3].xyzx 517: MAD TEMP[3].xyz, TEMP[0].xxxx, TEMP[2], IMM[7].wwww 518: MOV TEMP[2].xyz, TEMP[3].xyzx 519: MOV TEMP[3].xyz, TEMP[3].xyzz 520: TEX TEMP[3], TEMP[3], SAMP[9], 3D 521: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 522: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[3], TEMP[2] 523: MOV TEMP[5].xyz, TEMP[0].xyzx 524: RCP TEMP[1].x, IN[5].zzzz 525: MUL TEMP[2].x, TEMP[1].xxxx, IN[5].yyyy 526: MAD TEMP[2].x, TEMP[2].xxxx, IMM[3].yyyy, IMM[3].yyyy 527: MOV TEMP[1].x, TEMP[2].xxxx 528: MOV TEMP[1].y, CONST[7].wwww 529: MOV TEMP[2].xy, TEMP[1].xyyy 530: TEX TEMP[2].x, TEMP[2], SAMP[6], 2D 531: MOV TEMP[1].x, TEMP[2].xxxx 532: ADD TEMP[3].yzw, -CONST[8].xxyz, IN[4].xxyz 533: MOV TEMP[1].w, TEMP[3].zyzw 534: DP3 TEMP[4].x, TEMP[3].yzww, TEMP[3].yzww 535: MOV TEMP[1].y, TEMP[4].xxxx 536: MUL TEMP[1].yz, TEMP[1].xyww, CONST[7].xyxw 537: MUL TEMP[4].z, TEMP[1].zzzz, IMM[8].xxxx 538: EX2 TEMP[4].x, TEMP[4].zzzz 539: ADD TEMP[4].z, -TEMP[4].xxxx, IMM[0].xxxx 540: MUL TEMP[1].y, TEMP[4].zzzz, TEMP[1].yyyy 541: RCP TEMP[3].x, TEMP[3].wwww 542: MUL TEMP[1].y, TEMP[3].xxxx, TEMP[1].yyyy 543: MUL TEMP[1].y, TEMP[1].yyyy, IMM[8].xxxx 544: EX2 TEMP[1].x, TEMP[1].yyyy 545: MOV_SAT TEMP[1].x, TEMP[1].xxxx 546: ADD TEMP[1].y, -TEMP[1].xxxx, IMM[0].xxxx 547: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[2].xxxx 548: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[22].xxyz 549: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[5] 550: MOV TEMP[6].xyz, TEMP[0].xyzx 551: MOV OUT[0], TEMP[6] 552: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 220) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %69 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %70 = call float @llvm.SI.load.const(<16 x i8> %23, i32 236) %71 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %72 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %73 = call float @llvm.SI.load.const(<16 x i8> %23, i32 252) %74 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %75 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %76 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %77 = call float @llvm.SI.load.const(<16 x i8> %23, i32 272) %78 = call float @llvm.SI.load.const(<16 x i8> %23, i32 276) %79 = call float @llvm.SI.load.const(<16 x i8> %23, i32 288) %80 = call float @llvm.SI.load.const(<16 x i8> %23, i32 292) %81 = call float @llvm.SI.load.const(<16 x i8> %23, i32 304) %82 = call float @llvm.SI.load.const(<16 x i8> %23, i32 308) %83 = call float @llvm.SI.load.const(<16 x i8> %23, i32 320) %84 = call float @llvm.SI.load.const(<16 x i8> %23, i32 324) %85 = call float @llvm.SI.load.const(<16 x i8> %23, i32 336) %86 = call float @llvm.SI.load.const(<16 x i8> %23, i32 352) %87 = call float @llvm.SI.load.const(<16 x i8> %23, i32 356) %88 = call float @llvm.SI.load.const(<16 x i8> %23, i32 360) %89 = call float @llvm.SI.load.const(<16 x i8> %23, i32 368) %90 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %91 = load <8 x i32> addrspace(2)* %90, !tbaa !0 %92 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %93 = load <4 x i32> addrspace(2)* %92, !tbaa !0 %94 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %95 = load <8 x i32> addrspace(2)* %94, !tbaa !0 %96 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %97 = load <4 x i32> addrspace(2)* %96, !tbaa !0 %98 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %99 = load <8 x i32> addrspace(2)* %98, !tbaa !0 %100 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %101 = load <4 x i32> addrspace(2)* %100, !tbaa !0 %102 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %103 = load <8 x i32> addrspace(2)* %102, !tbaa !0 %104 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %105 = load <4 x i32> addrspace(2)* %104, !tbaa !0 %106 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %107 = load <8 x i32> addrspace(2)* %106, !tbaa !0 %108 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %109 = load <4 x i32> addrspace(2)* %108, !tbaa !0 %110 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %111 = load <8 x i32> addrspace(2)* %110, !tbaa !0 %112 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %113 = load <4 x i32> addrspace(2)* %112, !tbaa !0 %114 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %115 = load <8 x i32> addrspace(2)* %114, !tbaa !0 %116 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %117 = load <4 x i32> addrspace(2)* %116, !tbaa !0 %118 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %119 = load <8 x i32> addrspace(2)* %118, !tbaa !0 %120 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %121 = load <4 x i32> addrspace(2)* %120, !tbaa !0 %122 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %123 = load <8 x i32> addrspace(2)* %122, !tbaa !0 %124 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %125 = load <4 x i32> addrspace(2)* %124, !tbaa !0 %126 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %127 = load <8 x i32> addrspace(2)* %126, !tbaa !0 %128 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %129 = load <4 x i32> addrspace(2)* %128, !tbaa !0 %130 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %131 = load <8 x i32> addrspace(2)* %130, !tbaa !0 %132 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %133 = load <4 x i32> addrspace(2)* %132, !tbaa !0 %134 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %135 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %136 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %137 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %138 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %139 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %140 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %141 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %142 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %143 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %144 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %145 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %146 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %147 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %148 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %149 = call float @llvm.SI.fs.interp(i32 2, i32 6, i32 %5, <2 x i32> %7) %150 = call float @llvm.SI.fs.interp(i32 3, i32 6, i32 %5, <2 x i32> %7) %151 = fmul float %147, 1.000000e+00 %152 = fadd float %151, 0.000000e+00 %153 = fmul float %148, -1.000000e+00 %154 = fadd float %153, 1.000000e+00 %155 = bitcast float %152 to i32 %156 = bitcast float %154 to i32 %157 = insertelement <2 x i32> undef, i32 %155, i32 0 %158 = insertelement <2 x i32> %157, i32 %156, i32 1 %159 = bitcast <8 x i32> %123 to <32 x i8> %160 = bitcast <4 x i32> %125 to <16 x i8> %161 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %158, <32 x i8> %159, <16 x i8> %160, i32 2) %162 = extractelement <4 x float> %161, i32 0 %163 = extractelement <4 x float> %161, i32 2 %164 = call float @fabs(float %162) %165 = fsub float -0.000000e+00, %164 %166 = fsub float -0.000000e+00, %164 %167 = fsub float -0.000000e+00, %164 %168 = fsub float -0.000000e+00, %164 %169 = fcmp oge float %165, 0.000000e+00 %170 = sext i1 %169 to i32 %171 = bitcast i32 %170 to float %172 = bitcast float %171 to i32 %173 = icmp ne i32 %172, 0 %. = select i1 %173, float -1.000000e+00, float -0.000000e+00 %174 = fcmp oge float %166, 0.000000e+00 %175 = sext i1 %174 to i32 %176 = bitcast i32 %175 to float %177 = bitcast float %176 to i32 %178 = icmp ne i32 %177, 0 %temp16.0 = select i1 %178, float -1.000000e+00, float -0.000000e+00 %179 = fcmp oge float %167, 0.000000e+00 %180 = sext i1 %179 to i32 %181 = bitcast i32 %180 to float %182 = bitcast float %181 to i32 %183 = icmp ne i32 %182, 0 %.163 = select i1 %183, float -1.000000e+00, float -0.000000e+00 %184 = fcmp oge float %168, 0.000000e+00 %185 = sext i1 %184 to i32 %186 = bitcast i32 %185 to float %187 = bitcast float %186 to i32 %188 = icmp ne i32 %187, 0 %189 = fcmp olt float %., 0.000000e+00 %190 = sext i1 %189 to i32 %191 = fcmp olt float %temp16.0, 0.000000e+00 %192 = sext i1 %191 to i32 %193 = fcmp olt float %.163, 0.000000e+00 %194 = sext i1 %193 to i32 %195 = bitcast i32 %190 to float %196 = bitcast i32 %192 to float %197 = bitcast i32 %194 to float %198 = bitcast float %195 to i32 %199 = bitcast float %197 to i32 %200 = or i32 %198, %199 %201 = bitcast i32 %200 to float %202 = bitcast float %201 to i32 %203 = bitcast float %196 to i32 %204 = or i32 %202, %203 %205 = bitcast i32 %204 to float %206 = bitcast float %205 to i32 %207 = and i32 %206, 1065353216 %208 = bitcast i32 %207 to float %209 = fsub float -0.000000e+00, %208 %210 = fsub float -0.000000e+00, %208 %211 = fsub float -0.000000e+00, %208 %212 = fsub float -0.000000e+00, %208 call void @llvm.AMDGPU.kill(float %209) call void @llvm.AMDGPU.kill(float %210) call void @llvm.AMDGPU.kill(float %211) call void @llvm.AMDGPU.kill(float %212) %213 = fmul float %79, %143 %214 = fmul float %80, %143 %215 = fmul float %142, %77 %216 = fadd float %215, %213 %217 = fmul float %142, %78 %218 = fadd float %217, %214 %219 = fmul float %144, %81 %220 = fadd float %219, %216 %221 = fmul float %144, %82 %222 = fadd float %221, %218 %223 = fadd float %220, %83 %224 = fadd float %222, %84 %225 = bitcast float %149 to i32 %226 = bitcast float %150 to i32 %227 = insertelement <2 x i32> undef, i32 %225, i32 0 %228 = insertelement <2 x i32> %227, i32 %226, i32 1 %229 = bitcast <8 x i32> %111 to <32 x i8> %230 = bitcast <4 x i32> %113 to <16 x i8> %231 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %228, <32 x i8> %229, <16 x i8> %230, i32 2) %232 = extractelement <4 x float> %231, i32 3 %233 = fmul float %232, 4.000000e+00 %234 = fmul float %232, 4.000000e+00 %235 = fmul float %232, 4.000000e+00 %236 = fmul float %232, 4.000000e+00 %237 = call float @llvm.AMDIL.clamp.(float %233, float 0.000000e+00, float 1.000000e+00) %238 = call float @llvm.AMDIL.clamp.(float %234, float 0.000000e+00, float 1.000000e+00) %239 = call float @llvm.AMDIL.clamp.(float %235, float 0.000000e+00, float 1.000000e+00) %240 = call float @llvm.AMDIL.clamp.(float %236, float 0.000000e+00, float 1.000000e+00) %241 = fsub float -0.000000e+00, %237 %242 = fadd float %241, 1.000000e+00 %243 = bitcast float %242 to i32 %244 = bitcast float 0.000000e+00 to i32 %245 = bitcast float 0.000000e+00 to i32 %246 = insertelement <4 x i32> undef, i32 %243, i32 0 %247 = insertelement <4 x i32> %246, i32 %244, i32 1 %248 = insertelement <4 x i32> %247, i32 %245, i32 2 %249 = insertelement <4 x i32> %248, i32 undef, i32 3 %250 = bitcast <8 x i32> %107 to <32 x i8> %251 = bitcast <4 x i32> %109 to <16 x i8> %252 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %249, <32 x i8> %250, <16 x i8> %251, i32 2) %253 = extractelement <4 x float> %252, i32 0 %254 = extractelement <4 x float> %252, i32 1 %255 = extractelement <4 x float> %252, i32 2 %256 = extractelement <4 x float> %252, i32 3 %257 = call float @llvm.pow.f32(float %253, float 0x40019999A0000000) %258 = call float @llvm.pow.f32(float %254, float 0x40019999A0000000) %259 = call float @llvm.pow.f32(float %255, float 0x40019999A0000000) %260 = call float @llvm.pow.f32(float %256, float 1.000000e+00) %261 = fmul float %223, 1.000000e+00 %262 = fadd float %261, 0.000000e+00 %263 = fmul float %224, -1.000000e+00 %264 = fadd float %263, 1.000000e+00 %265 = bitcast float %262 to i32 %266 = bitcast float %264 to i32 %267 = insertelement <2 x i32> undef, i32 %265, i32 0 %268 = insertelement <2 x i32> %267, i32 %266, i32 1 %269 = bitcast <8 x i32> %103 to <32 x i8> %270 = bitcast <4 x i32> %105 to <16 x i8> %271 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %268, <32 x i8> %269, <16 x i8> %270, i32 2) %272 = extractelement <4 x float> %271, i32 0 %273 = extractelement <4 x float> %271, i32 1 %274 = fmul float %272, 2.000000e+00 %275 = fadd float %274, -1.000000e+00 %276 = fmul float %273, 2.000000e+00 %277 = fadd float %276, -1.000000e+00 %278 = fmul float 1.000000e+00, %275 %279 = fmul float -1.000000e+00, %277 %280 = fadd float %278, %279 %281 = fmul float -1.000000e+00, %275 %282 = fmul float -1.000000e+00, %277 %283 = fadd float %281, %282 %284 = fsub float -0.000000e+00, %280 %285 = fsub float -0.000000e+00, %283 %286 = fadd float %280, 2.000000e+00 %287 = fadd float %283, 2.000000e+00 %288 = fadd float %284, 2.000000e+00 %289 = fadd float %285, 2.000000e+00 %290 = fmul float %286, %286 %291 = fmul float %287, %287 %292 = fmul float %288, %288 %293 = fmul float %289, %289 %294 = fmul float %290, %290 %295 = fmul float %291, %291 %296 = fmul float %292, %292 %297 = fmul float %293, %293 %298 = fmul float 1.000000e+00, %294 %299 = fmul float 1.000000e+00, %295 %300 = fadd float %298, %299 %301 = fmul float 1.000000e+00, %296 %302 = fadd float %300, %301 %303 = fmul float 1.000000e+00, %297 %304 = fadd float %302, %303 %305 = fdiv float 1.000000e+00, %304 %306 = fmul float %305, %294 %307 = fmul float %305, %295 %308 = fmul float %305, %296 %309 = fmul float %305, %297 %310 = fdiv float 1.000000e+00, %24 %311 = fmul float %310, %306 %312 = fmul float %310, %307 %313 = fmul float %310, %308 %314 = fmul float %310, %309 %315 = fmul float %311, %311 %316 = fmul float %312, %312 %317 = fmul float %313, %313 %318 = fmul float %314, %314 %319 = bitcast float %134 to i32 %320 = bitcast float %135 to i32 %321 = insertelement <2 x i32> undef, i32 %319, i32 0 %322 = insertelement <2 x i32> %321, i32 %320, i32 1 %323 = bitcast <8 x i32> %91 to <32 x i8> %324 = bitcast <4 x i32> %93 to <16 x i8> %325 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %322, <32 x i8> %323, <16 x i8> %324, i32 2) %326 = extractelement <4 x float> %325, i32 0 %327 = extractelement <4 x float> %325, i32 1 %328 = extractelement <4 x float> %325, i32 2 %329 = extractelement <4 x float> %325, i32 3 %330 = bitcast float %134 to i32 %331 = bitcast float %135 to i32 %332 = insertelement <2 x i32> undef, i32 %330, i32 0 %333 = insertelement <2 x i32> %332, i32 %331, i32 1 %334 = bitcast <8 x i32> %95 to <32 x i8> %335 = bitcast <4 x i32> %97 to <16 x i8> %336 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %333, <32 x i8> %334, <16 x i8> %335, i32 2) %337 = extractelement <4 x float> %336, i32 0 %338 = fmul float %326, 2.000000e+00 %339 = fadd float %338, -1.000000e+00 %340 = fmul float %327, 2.000000e+00 %341 = fadd float %340, -1.000000e+00 %342 = fmul float %311, %339 %343 = fmul float %311, %341 %344 = fmul float %337, 2.000000e+00 %345 = fadd float %344, -1.000000e+00 %346 = bitcast float %136 to i32 %347 = bitcast float %137 to i32 %348 = insertelement <2 x i32> undef, i32 %346, i32 0 %349 = insertelement <2 x i32> %348, i32 %347, i32 1 %350 = bitcast <8 x i32> %91 to <32 x i8> %351 = bitcast <4 x i32> %93 to <16 x i8> %352 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %349, <32 x i8> %350, <16 x i8> %351, i32 2) %353 = extractelement <4 x float> %352, i32 0 %354 = extractelement <4 x float> %352, i32 1 %355 = extractelement <4 x float> %352, i32 2 %356 = extractelement <4 x float> %352, i32 3 %357 = bitcast float %136 to i32 %358 = bitcast float %137 to i32 %359 = insertelement <2 x i32> undef, i32 %357, i32 0 %360 = insertelement <2 x i32> %359, i32 %358, i32 1 %361 = bitcast <8 x i32> %95 to <32 x i8> %362 = bitcast <4 x i32> %97 to <16 x i8> %363 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %360, <32 x i8> %361, <16 x i8> %362, i32 2) %364 = extractelement <4 x float> %363, i32 0 %365 = fadd float %353, %353 %366 = fadd float %354, %354 %367 = fmul float %365, -1.000000e+00 %368 = fadd float %367, 1.000000e+00 %369 = fmul float %366, 1.000000e+00 %370 = fadd float %369, -1.000000e+00 %371 = fmul float %312, %368 %372 = fmul float %312, %370 %373 = fmul float %339, %311 %374 = fadd float %373, %371 %375 = fmul float %341, %311 %376 = fadd float %375, %372 %377 = fmul float %364, -2.000000e+00 %378 = fadd float %377, 1.000000e+00 %379 = fmul float %316, %355 %380 = fmul float %316, %356 %381 = fmul float %316, %378 %382 = fmul float %328, %315 %383 = fadd float %382, %379 %384 = fmul float %329, %315 %385 = fadd float %384, %380 %386 = fmul float %345, %315 %387 = fadd float %386, %381 %388 = fadd float %342, %342 %389 = fadd float %343, %343 %390 = fmul float %371, %388 %391 = fmul float %372, %389 %392 = fmul float %342, %372 %393 = fmul float %343, %371 %394 = fadd float %392, %393 %395 = fadd float %383, %390 %396 = fadd float %385, %391 %397 = fadd float %387, %394 %398 = bitcast float %138 to i32 %399 = bitcast float %139 to i32 %400 = insertelement <2 x i32> undef, i32 %398, i32 0 %401 = insertelement <2 x i32> %400, i32 %399, i32 1 %402 = bitcast <8 x i32> %91 to <32 x i8> %403 = bitcast <4 x i32> %93 to <16 x i8> %404 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %401, <32 x i8> %402, <16 x i8> %403, i32 2) %405 = extractelement <4 x float> %404, i32 0 %406 = extractelement <4 x float> %404, i32 1 %407 = extractelement <4 x float> %404, i32 2 %408 = extractelement <4 x float> %404, i32 3 %409 = bitcast float %138 to i32 %410 = bitcast float %139 to i32 %411 = insertelement <2 x i32> undef, i32 %409, i32 0 %412 = insertelement <2 x i32> %411, i32 %410, i32 1 %413 = bitcast <8 x i32> %95 to <32 x i8> %414 = bitcast <4 x i32> %97 to <16 x i8> %415 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %412, <32 x i8> %413, <16 x i8> %414, i32 2) %416 = extractelement <4 x float> %415, i32 0 %417 = fmul float %405, -2.000000e+00 %418 = fadd float %417, 1.000000e+00 %419 = fmul float %406, -2.000000e+00 %420 = fadd float %419, 1.000000e+00 %421 = fmul float %313, %418 %422 = fmul float %313, %420 %423 = fmul float %418, %313 %424 = fadd float %423, %374 %425 = fmul float %420, %313 %426 = fadd float %425, %376 %427 = fmul float %416, 2.000000e+00 %428 = fadd float %427, -1.000000e+00 %429 = fmul float %407, %317 %430 = fadd float %429, %395 %431 = fmul float %408, %317 %432 = fadd float %431, %396 %433 = fmul float %428, %317 %434 = fadd float %433, %397 %435 = fmul float %388, %421 %436 = fmul float %389, %422 %437 = fmul float %342, %422 %438 = fmul float %343, %421 %439 = fadd float %437, %438 %440 = fadd float %430, %435 %441 = fadd float %432, %436 %442 = fadd float %434, %439 %443 = fadd float %371, %371 %444 = fadd float %372, %372 %445 = fmul float %421, %443 %446 = fmul float %422, %444 %447 = fmul float %371, %422 %448 = fmul float %372, %421 %449 = fadd float %447, %448 %450 = fadd float %440, %445 %451 = fadd float %441, %446 %452 = fadd float %442, %449 %453 = bitcast float %140 to i32 %454 = bitcast float %141 to i32 %455 = insertelement <2 x i32> undef, i32 %453, i32 0 %456 = insertelement <2 x i32> %455, i32 %454, i32 1 %457 = bitcast <8 x i32> %91 to <32 x i8> %458 = bitcast <4 x i32> %93 to <16 x i8> %459 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %456, <32 x i8> %457, <16 x i8> %458, i32 2) %460 = extractelement <4 x float> %459, i32 0 %461 = extractelement <4 x float> %459, i32 1 %462 = extractelement <4 x float> %459, i32 2 %463 = extractelement <4 x float> %459, i32 3 %464 = bitcast float %140 to i32 %465 = bitcast float %141 to i32 %466 = insertelement <2 x i32> undef, i32 %464, i32 0 %467 = insertelement <2 x i32> %466, i32 %465, i32 1 %468 = bitcast <8 x i32> %95 to <32 x i8> %469 = bitcast <4 x i32> %97 to <16 x i8> %470 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %467, <32 x i8> %468, <16 x i8> %469, i32 2) %471 = extractelement <4 x float> %470, i32 0 %472 = fadd float %461, %461 %473 = fadd float %460, %460 %474 = fmul float %472, -1.000000e+00 %475 = fadd float %474, 1.000000e+00 %476 = fmul float %473, 1.000000e+00 %477 = fadd float %476, -1.000000e+00 %478 = fmul float %314, %475 %479 = fmul float %314, %477 %480 = fmul float %477, %314 %481 = fadd float %480, %424 %482 = fmul float %475, %314 %483 = fadd float %482, %426 %484 = fmul float %471, -2.000000e+00 %485 = fadd float %484, 1.000000e+00 %486 = fmul float %462, %318 %487 = fadd float %486, %450 %488 = fmul float %463, %318 %489 = fadd float %488, %451 %490 = fmul float %485, %318 %491 = fadd float %490, %452 %492 = fmul float %388, %479 %493 = fmul float %389, %478 %494 = fmul float %342, %478 %495 = fmul float %343, %479 %496 = fadd float %494, %495 %497 = fadd float %492, %487 %498 = fadd float %493, %489 %499 = fadd float %496, %491 %500 = fmul float %443, %479 %501 = fmul float %444, %478 %502 = fmul float %371, %478 %503 = fmul float %372, %479 %504 = fadd float %502, %503 %505 = fadd float %497, %500 %506 = fadd float %498, %501 %507 = fadd float %499, %504 %508 = fmul float %421, %479 %509 = fmul float %422, %478 %510 = fadd float %508, %508 %511 = fadd float %509, %509 %512 = fmul float %421, %478 %513 = fmul float %422, %479 %514 = fadd float %512, %513 %515 = fadd float %505, %510 %516 = fadd float %506, %511 %517 = fadd float %507, %514 %518 = fsub float -0.000000e+00, %481 %519 = fmul float %481, %518 %520 = fadd float %519, %515 %521 = fsub float -0.000000e+00, %483 %522 = fmul float %483, %521 %523 = fadd float %522, %516 %524 = fsub float -0.000000e+00, %483 %525 = fmul float %481, %524 %526 = fadd float %525, %517 %527 = fmul float %25, %481 %528 = fmul float %26, %483 %529 = fadd float %528, %527 %530 = fmul float %27, 1.000000e+00 %531 = fadd float %529, %530 %532 = call float @llvm.AMDIL.clamp.(float %531, float 0.000000e+00, float 1.000000e+00) %533 = fsub float -0.000000e+00, %142 %534 = fadd float %50, %533 %535 = fsub float -0.000000e+00, %143 %536 = fadd float %51, %535 %537 = fsub float -0.000000e+00, %144 %538 = fadd float %52, %537 %539 = fmul float %534, %534 %540 = fmul float %536, %536 %541 = fadd float %540, %539 %542 = fmul float %538, %538 %543 = fadd float %541, %542 %544 = call float @llvm.maxnum.f32(float %543, float 0x3E7AD7F2A0000000) %545 = call float @llvm.AMDGPU.rsq.clamped.f32(float %544) %546 = fmul float %545, %534 %547 = fmul float %545, %536 %548 = fmul float %545, %538 %549 = fsub float -0.000000e+00, %39 %550 = fmul float %534, %545 %551 = fadd float %550, %549 %552 = fsub float -0.000000e+00, %40 %553 = fmul float %536, %545 %554 = fadd float %553, %552 %555 = fsub float -0.000000e+00, %41 %556 = fmul float %538, %545 %557 = fadd float %556, %555 %558 = fmul float %42, %551 %559 = fadd float %558, %39 %560 = fmul float %42, %554 %561 = fadd float %560, %40 %562 = fmul float %42, %557 %563 = fadd float %562, %41 %564 = fmul float %559, %559 %565 = fmul float %561, %561 %566 = fadd float %565, %564 %567 = fmul float %563, %563 %568 = fadd float %566, %567 %569 = call float @llvm.maxnum.f32(float %568, float 0x3E7AD7F2A0000000) %570 = call float @llvm.AMDGPU.rsq.clamped.f32(float %569) %571 = fmul float %559, %570 %572 = fadd float %571, %25 %573 = fmul float %561, %570 %574 = fadd float %573, %26 %575 = fmul float %563, %570 %576 = fadd float %575, %27 %577 = fdiv float 1.000000e+00, %576 %578 = fsub float -0.000000e+00, %481 %579 = fmul float %572, %577 %580 = fadd float %579, %578 %581 = fsub float -0.000000e+00, %483 %582 = fmul float %574, %577 %583 = fadd float %582, %581 %584 = fdiv float 1.000000e+00, %34 %585 = fadd float %520, %584 %586 = fadd float %523, %584 %587 = fmul float %526, %526 %588 = fsub float -0.000000e+00, %587 %589 = fmul float %585, %586 %590 = fadd float %589, %588 %591 = fmul float %580, %580 %592 = fadd float %526, %526 %593 = fmul float %580, %592 %594 = fsub float -0.000000e+00, %593 %595 = fmul float %585, %583 %596 = fadd float %595, %594 %597 = fmul float %583, %596 %598 = fmul float %591, %586 %599 = fadd float %598, %597 %600 = fmul float %599, 5.000000e-01 %601 = fdiv float 1.000000e+00, %590 %602 = fmul float %601, %600 %603 = fsub float -0.000000e+00, %590 %604 = fcmp oge float %603, 0.000000e+00 %605 = sext i1 %604 to i32 %606 = bitcast i32 %605 to float %607 = bitcast float %606 to i32 %608 = icmp ne i32 %607, 0 %temp96.0 = select i1 %608, float 1.000000e+00, float 0.000000e+00 %609 = fmul float %600, %601 %610 = fadd float %609, -1.600000e+01 %611 = fcmp oge float %610, 0.000000e+00 %612 = sext i1 %611 to i32 %613 = bitcast i32 %612 to float %614 = bitcast float %613 to i32 %615 = icmp ne i32 %614, 0 %.164 = select i1 %615, float 1.000000e+00, float 0.000000e+00 %616 = fmul float %602, 0xBFF7154CA0000000 %617 = call float @llvm.AMDIL.exp.(float %616) %618 = call float @llvm.maxnum.f32(float %590, float 0x3E7AD7F2A0000000) %619 = call float @llvm.AMDGPU.rsq.clamped.f32(float %618) %620 = fmul float %617, %619 %621 = fmul float %584, 1.600000e+01 %622 = fadd float %621, %520 %623 = fmul float %584, 1.600000e+01 %624 = fadd float %623, %523 %625 = fsub float -0.000000e+00, %587 %626 = fmul float %622, %624 %627 = fadd float %626, %625 %628 = fsub float -0.000000e+00, %593 %629 = fmul float %622, %583 %630 = fadd float %629, %628 %631 = fmul float %583, %630 %632 = fmul float %591, %624 %633 = fadd float %632, %631 %634 = fmul float %633, 5.000000e-01 %635 = fdiv float 1.000000e+00, %627 %636 = fmul float %635, %634 %637 = fsub float -0.000000e+00, %627 %638 = fcmp oge float %637, 0.000000e+00 %639 = sext i1 %638 to i32 %640 = bitcast i32 %639 to float %641 = bitcast float %640 to i32 %642 = icmp ne i32 %641, 0 %temp84.0 = select i1 %642, float 1.000000e+00, float 0.000000e+00 %643 = fmul float %634, %635 %644 = fadd float %643, -1.600000e+01 %645 = fcmp oge float %644, 0.000000e+00 %646 = sext i1 %645 to i32 %647 = bitcast i32 %646 to float %648 = bitcast float %647 to i32 %649 = icmp ne i32 %648, 0 %.165 = select i1 %649, float 1.000000e+00, float 0.000000e+00 %650 = fadd float %.165, %temp84.0 %651 = fadd float %.164, %temp96.0 %652 = fmul float %636, 0xBFF7154CA0000000 %653 = call float @llvm.AMDIL.exp.(float %652) %654 = call float @llvm.maxnum.f32(float %627, float 0x3E7AD7F2A0000000) %655 = call float @llvm.AMDGPU.rsq.clamped.f32(float %654) %656 = fmul float %655, %653 %657 = fsub float -0.000000e+00, %43 %658 = fmul float %534, %545 %659 = fadd float %658, %657 %660 = fsub float -0.000000e+00, %44 %661 = fmul float %536, %545 %662 = fadd float %661, %660 %663 = fsub float -0.000000e+00, %45 %664 = fmul float %538, %545 %665 = fadd float %664, %663 %666 = fmul float %46, %659 %667 = fadd float %666, %43 %668 = fmul float %46, %662 %669 = fadd float %668, %44 %670 = fmul float %46, %665 %671 = fadd float %670, %45 %672 = fmul float %667, %667 %673 = fmul float %669, %669 %674 = fadd float %673, %672 %675 = fmul float %671, %671 %676 = fadd float %674, %675 %677 = call float @llvm.maxnum.f32(float %676, float 0x3E7AD7F2A0000000) %678 = call float @llvm.AMDGPU.rsq.clamped.f32(float %677) %679 = fmul float %667, %678 %680 = fadd float %679, %28 %681 = fmul float %669, %678 %682 = fadd float %681, %29 %683 = fmul float %671, %678 %684 = fadd float %683, %30 %685 = fdiv float 1.000000e+00, %684 %686 = fsub float -0.000000e+00, %481 %687 = fmul float %680, %685 %688 = fadd float %687, %686 %689 = fsub float -0.000000e+00, %483 %690 = fmul float %682, %685 %691 = fadd float %690, %689 %692 = fdiv float 1.000000e+00, %38 %693 = fadd float %692, %520 %694 = fadd float %692, %523 %695 = fsub float -0.000000e+00, %587 %696 = fmul float %693, %694 %697 = fadd float %696, %695 %698 = fmul float %688, %688 %699 = fmul float %592, %688 %700 = fsub float -0.000000e+00, %699 %701 = fmul float %693, %691 %702 = fadd float %701, %700 %703 = fmul float %691, %702 %704 = fmul float %698, %694 %705 = fadd float %704, %703 %706 = fmul float %705, 5.000000e-01 %707 = fdiv float 1.000000e+00, %697 %708 = fmul float %706, %707 %709 = fsub float -0.000000e+00, %697 %710 = fcmp oge float %709, 0.000000e+00 %711 = sext i1 %710 to i32 %712 = bitcast i32 %711 to float %713 = bitcast float %712 to i32 %714 = icmp ne i32 %713, 0 %temp56.1 = select i1 %714, float 1.000000e+00, float 0.000000e+00 %715 = fmul float %706, %707 %716 = fadd float %715, -1.600000e+01 %717 = fcmp oge float %716, 0.000000e+00 %718 = sext i1 %717 to i32 %719 = bitcast i32 %718 to float %720 = bitcast float %719 to i32 %721 = icmp ne i32 %720, 0 %.166 = select i1 %721, float 1.000000e+00, float 0.000000e+00 %722 = fadd float %.166, %temp56.1 %723 = fmul float %708, 0xBFF7154CA0000000 %724 = fmul float %656, 0x3F747AE140000000 %725 = call float @llvm.maxnum.f32(float %697, float 0x3E7AD7F2A0000000) %726 = call float @llvm.AMDGPU.rsq.clamped.f32(float %725) %727 = call float @llvm.AMDIL.exp.(float %723) %728 = fmul float %726, %727 %729 = fmul float %56, %143 %730 = fmul float %57, %143 %731 = fmul float %58, %143 %732 = fmul float %142, %53 %733 = fadd float %732, %729 %734 = fmul float %142, %54 %735 = fadd float %734, %730 %736 = fmul float %142, %55 %737 = fadd float %736, %731 %738 = fmul float %144, %59 %739 = fadd float %738, %733 %740 = fmul float %144, %60 %741 = fadd float %740, %735 %742 = fmul float %144, %61 %743 = fadd float %742, %737 %744 = fadd float %739, %62 %745 = fadd float %741, %63 %746 = fadd float %743, %64 %747 = fdiv float 1.000000e+00, %746 %748 = fmul float %747, %744 %749 = fmul float %747, %745 %750 = fmul float %748, 5.000000e-01 %751 = fadd float %750, -5.000000e-01 %752 = fmul float %749, -5.000000e-01 %753 = fadd float %752, -5.000000e-01 %754 = fmul float %520, 6.000000e+00 %755 = fadd float %754, %751 %756 = fmul float %523, 6.000000e+00 %757 = fadd float %756, %753 %758 = fmul float %697, 2.000000e+00 %759 = fadd float %758, %755 %760 = fmul float %697, 2.000000e+00 %761 = fadd float %760, %757 %762 = fmul float %697, 2.000000e+00 %763 = fadd float %762, %755 %764 = fmul float %697, -2.000000e+00 %765 = fadd float %764, %757 %766 = bitcast float %763 to i32 %767 = bitcast float %765 to i32 %768 = insertelement <2 x i32> undef, i32 %766, i32 0 %769 = insertelement <2 x i32> %768, i32 %767, i32 1 %770 = bitcast <8 x i32> %99 to <32 x i8> %771 = bitcast <4 x i32> %101 to <16 x i8> %772 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %769, <32 x i8> %770, <16 x i8> %771, i32 2) %773 = extractelement <4 x float> %772, i32 0 %774 = extractelement <4 x float> %772, i32 1 %775 = extractelement <4 x float> %772, i32 2 %776 = extractelement <4 x float> %772, i32 3 %777 = fmul float %773, 2.500000e-01 %778 = fmul float %774, 2.500000e-01 %779 = fmul float %775, 2.500000e-01 %780 = fmul float %776, 2.500000e-01 %781 = bitcast float %759 to i32 %782 = bitcast float %761 to i32 %783 = insertelement <2 x i32> undef, i32 %781, i32 0 %784 = insertelement <2 x i32> %783, i32 %782, i32 1 %785 = bitcast <8 x i32> %99 to <32 x i8> %786 = bitcast <4 x i32> %101 to <16 x i8> %787 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %784, <32 x i8> %785, <16 x i8> %786, i32 2) %788 = extractelement <4 x float> %787, i32 0 %789 = extractelement <4 x float> %787, i32 1 %790 = extractelement <4 x float> %787, i32 2 %791 = extractelement <4 x float> %787, i32 3 %792 = fmul float %788, 2.500000e-01 %793 = fadd float %792, %777 %794 = fmul float %789, 2.500000e-01 %795 = fadd float %794, %778 %796 = fmul float %790, 2.500000e-01 %797 = fadd float %796, %779 %798 = fmul float %791, 2.500000e-01 %799 = fadd float %798, %780 %800 = fmul float %697, -2.000000e+00 %801 = fadd float %800, %755 %802 = fmul float %697, 6.000000e+00 %803 = fadd float %802, %757 %804 = bitcast float %801 to i32 %805 = bitcast float %803 to i32 %806 = insertelement <2 x i32> undef, i32 %804, i32 0 %807 = insertelement <2 x i32> %806, i32 %805, i32 1 %808 = bitcast <8 x i32> %99 to <32 x i8> %809 = bitcast <4 x i32> %101 to <16 x i8> %810 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %807, <32 x i8> %808, <16 x i8> %809, i32 2) %811 = extractelement <4 x float> %810, i32 0 %812 = extractelement <4 x float> %810, i32 1 %813 = extractelement <4 x float> %810, i32 2 %814 = extractelement <4 x float> %810, i32 3 %815 = fmul float %811, 2.500000e-01 %816 = fadd float %815, %793 %817 = fmul float %812, 2.500000e-01 %818 = fadd float %817, %795 %819 = fmul float %813, 2.500000e-01 %820 = fadd float %819, %797 %821 = fmul float %814, 2.500000e-01 %822 = fadd float %821, %799 %823 = fmul float %697, -2.000000e+00 %824 = fadd float %823, %755 %825 = fmul float %697, -2.000000e+00 %826 = fadd float %825, %757 %827 = bitcast float %824 to i32 %828 = bitcast float %826 to i32 %829 = insertelement <2 x i32> undef, i32 %827, i32 0 %830 = insertelement <2 x i32> %829, i32 %828, i32 1 %831 = bitcast <8 x i32> %99 to <32 x i8> %832 = bitcast <4 x i32> %101 to <16 x i8> %833 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %830, <32 x i8> %831, <16 x i8> %832, i32 2) %834 = extractelement <4 x float> %833, i32 0 %835 = extractelement <4 x float> %833, i32 1 %836 = extractelement <4 x float> %833, i32 2 %837 = extractelement <4 x float> %833, i32 3 %838 = fmul float %834, 2.500000e-01 %839 = fadd float %838, %816 %840 = fmul float %835, 2.500000e-01 %841 = fadd float %840, %818 %842 = fmul float %836, 2.500000e-01 %843 = fadd float %842, %820 %844 = fmul float %837, 2.500000e-01 %845 = fadd float %844, %822 %846 = call float @fabs(float %839) %847 = call float @llvm.log2.f32(float %846) %848 = call float @fabs(float %841) %849 = call float @llvm.log2.f32(float %848) %850 = call float @fabs(float %843) %851 = call float @llvm.log2.f32(float %850) %852 = fmul float %847, 0x40019999A0000000 %853 = fmul float %849, 0x40019999A0000000 %854 = fmul float %851, 0x40019999A0000000 %855 = call float @llvm.AMDIL.exp.(float %852) %856 = call float @llvm.AMDIL.exp.(float %853) %857 = call float @llvm.AMDIL.exp.(float %854) %858 = fmul float %481, %546 %859 = fmul float %483, %547 %860 = fadd float %859, %858 %861 = fmul float 1.000000e+00, %548 %862 = fadd float %860, %861 %863 = fsub float -0.000000e+00, %862 %864 = fadd float %863, 1.000000e+00 %865 = fmul float %864, %864 %866 = fmul float %865, %865 %867 = fmul float %864, %866 %868 = fmul float %620, %31 %869 = fmul float %620, %32 %870 = fmul float %620, %33 %871 = fsub float -0.000000e+00, %651 %872 = fsub float -0.000000e+00, %651 %873 = fsub float -0.000000e+00, %651 %874 = fcmp oge float %871, 0.000000e+00 %875 = sext i1 %874 to i32 %876 = bitcast i32 %875 to float %877 = bitcast float %876 to i32 %878 = icmp ne i32 %877, 0 %temp72.0 = select i1 %878, float %868, float 0.000000e+00 %879 = fcmp oge float %872, 0.000000e+00 %880 = sext i1 %879 to i32 %881 = bitcast i32 %880 to float %882 = bitcast float %881 to i32 %883 = icmp ne i32 %882, 0 %.167 = select i1 %883, float %869, float 0.000000e+00 %884 = fcmp oge float %873, 0.000000e+00 %885 = sext i1 %884 to i32 %886 = bitcast i32 %885 to float %887 = bitcast float %886 to i32 %888 = icmp ne i32 %887, 0 %temp60.0 = select i1 %888, float %870, float 0.000000e+00 %889 = fmul float %728, %35 %890 = fmul float %728, %36 %891 = fmul float %728, %37 %892 = fsub float -0.000000e+00, %722 %893 = fsub float -0.000000e+00, %722 %894 = fsub float -0.000000e+00, %722 %895 = fcmp oge float %892, 0.000000e+00 %896 = sext i1 %895 to i32 %897 = bitcast i32 %896 to float %898 = bitcast float %897 to i32 %899 = icmp ne i32 %898, 0 %temp68.0 = select i1 %899, float %889, float 0.000000e+00 %900 = fcmp oge float %893, 0.000000e+00 %901 = sext i1 %900 to i32 %902 = bitcast i32 %901 to float %903 = bitcast float %902 to i32 %904 = icmp ne i32 %903, 0 %.168 = select i1 %904, float %890, float 0.000000e+00 %905 = fcmp oge float %894, 0.000000e+00 %906 = sext i1 %905 to i32 %907 = bitcast i32 %906 to float %908 = bitcast float %907 to i32 %909 = icmp ne i32 %908, 0 %temp28.1 = select i1 %909, float %891, float 0.000000e+00 %910 = fadd float %temp68.0, %temp72.0 %911 = fadd float %.168, %.167 %912 = fadd float %temp28.1, %temp60.0 %913 = fmul float %910, %867 %914 = fadd float %913, %257 %915 = fmul float %911, %867 %916 = fadd float %915, %258 %917 = fmul float %912, %867 %918 = fadd float %917, %259 %919 = fmul float %257, %31 %920 = fmul float %258, %32 %921 = fmul float %259, %33 %922 = fmul float %724, %919 %923 = fmul float %724, %920 %924 = fmul float %724, %921 %925 = fsub float -0.000000e+00, %650 %926 = fsub float -0.000000e+00, %650 %927 = fsub float -0.000000e+00, %650 %928 = fsub float -0.000000e+00, %650 %929 = fcmp oge float %925, 0.000000e+00 %930 = sext i1 %929 to i32 %931 = bitcast i32 %930 to float %932 = bitcast float %931 to i32 %933 = icmp ne i32 %932, 0 %.169 = select i1 %933, float %922, float 0.000000e+00 %934 = fcmp oge float %926, 0.000000e+00 %935 = sext i1 %934 to i32 %936 = bitcast i32 %935 to float %937 = bitcast float %936 to i32 %938 = icmp ne i32 %937, 0 %939 = fcmp oge float %927, 0.000000e+00 %940 = sext i1 %939 to i32 %941 = bitcast i32 %940 to float %942 = bitcast float %941 to i32 %943 = icmp ne i32 %942, 0 %temp56.2 = select i1 %943, float %923, float 0.000000e+00 %944 = fcmp oge float %928, 0.000000e+00 %945 = sext i1 %944 to i32 %946 = bitcast i32 %945 to float %947 = bitcast float %946 to i32 %948 = icmp ne i32 %947, 0 %.170 = select i1 %948, float %924, float 0.000000e+00 %949 = fmul float %914, %532 %950 = fadd float %949, %.169 %951 = fmul float %916, %532 %952 = fadd float %951, %temp56.2 %953 = fmul float %918, %532 %954 = fadd float %953, %.170 %955 = fmul float %855, %89 %956 = fadd float %955, %950 %957 = fmul float %856, %89 %958 = fadd float %957, %952 %959 = fmul float %857, %89 %960 = fadd float %959, %954 %961 = fmul float %708, %708 %962 = fmul float %708, %961 %963 = fmul float %237, %962 %964 = fmul float %963, %257 %965 = fadd float %964, %956 %966 = fmul float %963, %258 %967 = fadd float %966, %958 %968 = fmul float %963, %259 %969 = fadd float %968, %960 %970 = fmul float %68, %143 %971 = fmul float %69, %143 %972 = fmul float %70, %143 %973 = fmul float %142, %65 %974 = fadd float %973, %970 %975 = fmul float %142, %66 %976 = fadd float %975, %971 %977 = fmul float %142, %67 %978 = fadd float %977, %972 %979 = fmul float %144, %71 %980 = fadd float %979, %974 %981 = fmul float %144, %72 %982 = fadd float %981, %976 %983 = fmul float %144, %73 %984 = fadd float %983, %978 %985 = fadd float %980, %74 %986 = fadd float %982, %75 %987 = fadd float %984, %76 %988 = fdiv float 1.000000e+00, %987 %989 = fmul float %988, %985 %990 = fmul float %988, %986 %991 = fmul float %989, 5.000000e-01 %992 = fadd float %991, -5.000000e-01 %993 = fmul float %990, -5.000000e-01 %994 = fadd float %993, -5.000000e-01 %995 = fmul float %992, 1.000000e+00 %996 = fadd float %995, 0.000000e+00 %997 = fmul float %994, -1.000000e+00 %998 = fadd float %997, 1.000000e+00 %999 = bitcast float %996 to i32 %1000 = bitcast float %998 to i32 %1001 = insertelement <2 x i32> undef, i32 %999, i32 0 %1002 = insertelement <2 x i32> %1001, i32 %1000, i32 1 %1003 = bitcast <8 x i32> %131 to <32 x i8> %1004 = bitcast <4 x i32> %133 to <16 x i8> %1005 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1002, <32 x i8> %1003, <16 x i8> %1004, i32 2) %1006 = extractelement <4 x float> %1005, i32 0 %1007 = call float @llvm.AMDIL.clamp.(float %1006, float 0.000000e+00, float 1.000000e+00) %1008 = fmul float %1007, %85 %1009 = fmul float %1008, %965 %1010 = fmul float %1008, %967 %1011 = fmul float %1008, %969 %1012 = fmul float %1009, 0xBFE570A3E0000000 %1013 = fadd float %1012, %965 %1014 = fmul float %1010, 0xBFE570A3E0000000 %1015 = fadd float %1014, %967 %1016 = fmul float %1011, 0xBFE570A3E0000000 %1017 = fadd float %1016, %969 %1018 = fmul float %845, %260 %1019 = fadd float %1018, %260 %1020 = fmul float %845, %260 %1021 = fadd float %1020, %260 %1022 = fmul float %845, %260 %1023 = fadd float %1022, %260 %1024 = fmul float %845, %260 %1025 = fadd float %1024, %260 %1026 = call float @llvm.AMDIL.clamp.(float %1019, float 0.000000e+00, float 1.000000e+00) %1027 = call float @llvm.AMDIL.clamp.(float %1021, float 0.000000e+00, float 1.000000e+00) %1028 = call float @llvm.AMDIL.clamp.(float %1023, float 0.000000e+00, float 1.000000e+00) %1029 = call float @llvm.AMDIL.clamp.(float %1025, float 0.000000e+00, float 1.000000e+00) %1030 = call float @fabs(float %1013) %1031 = call float @llvm.log2.f32(float %1030) %1032 = call float @fabs(float %1015) %1033 = call float @llvm.log2.f32(float %1032) %1034 = call float @fabs(float %1017) %1035 = call float @llvm.log2.f32(float %1034) %1036 = fmul float %1031, 0x3FDD1743E0000000 %1037 = fmul float %1033, 0x3FDD1743E0000000 %1038 = fmul float %1035, 0x3FDD1743E0000000 %1039 = call float @llvm.AMDIL.exp.(float %1036) %1040 = call float @llvm.AMDIL.exp.(float %1037) %1041 = call float @llvm.AMDIL.exp.(float %1038) %1042 = fmul float %147, 1.000000e+00 %1043 = fadd float %1042, 0.000000e+00 %1044 = fmul float %148, -1.000000e+00 %1045 = fadd float %1044, 1.000000e+00 %1046 = bitcast float %1043 to i32 %1047 = bitcast float %1045 to i32 %1048 = insertelement <2 x i32> undef, i32 %1046, i32 0 %1049 = insertelement <2 x i32> %1048, i32 %1047, i32 1 %1050 = bitcast <8 x i32> %119 to <32 x i8> %1051 = bitcast <4 x i32> %121 to <16 x i8> %1052 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1049, <32 x i8> %1050, <16 x i8> %1051, i32 2) %1053 = extractelement <4 x float> %1052, i32 0 %1054 = extractelement <4 x float> %1052, i32 1 %1055 = extractelement <4 x float> %1052, i32 2 %1056 = extractelement <4 x float> %1052, i32 3 %1057 = fsub float -0.000000e+00, %1056 %1058 = fadd float %1057, 1.000000e+00 %1059 = fmul float %1039, %1058 %1060 = fadd float %1059, %1053 %1061 = fmul float %1040, %1058 %1062 = fadd float %1061, %1054 %1063 = fmul float %1041, %1058 %1064 = fadd float %1063, %1055 %1065 = fadd float %1060, 0xBFD99999A0000000 %1066 = fadd float %1062, 0xBFD99999A0000000 %1067 = fadd float %1064, 0xBFD99999A0000000 %1068 = fmul float %162, %1065 %1069 = fadd float %1068, 0x3FD99999A0000000 %1070 = fmul float %162, %1066 %1071 = fadd float %1070, 0x3FD99999A0000000 %1072 = fmul float %162, %1067 %1073 = fadd float %1072, 0x3FD99999A0000000 %1074 = bitcast float %1069 to i32 %1075 = bitcast float %1071 to i32 %1076 = bitcast float %1073 to i32 %1077 = insertelement <4 x i32> undef, i32 %1074, i32 0 %1078 = insertelement <4 x i32> %1077, i32 %1075, i32 1 %1079 = insertelement <4 x i32> %1078, i32 %1076, i32 2 %1080 = insertelement <4 x i32> %1079, i32 undef, i32 3 %1081 = bitcast <8 x i32> %127 to <32 x i8> %1082 = bitcast <4 x i32> %129 to <16 x i8> %1083 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1080, <32 x i8> %1081, <16 x i8> %1082, i32 3) %1084 = extractelement <4 x float> %1083, i32 0 %1085 = extractelement <4 x float> %1083, i32 1 %1086 = extractelement <4 x float> %1083, i32 2 %1087 = fsub float -0.000000e+00, %162 %1088 = fmul float %163, %1087 %1089 = fadd float %1088, %162 %1090 = call float @llvm.AMDGPU.lrp(float %1089, float %1084, float %1069) %1091 = call float @llvm.AMDGPU.lrp(float %1089, float %1085, float %1071) %1092 = call float @llvm.AMDGPU.lrp(float %1089, float %1086, float %1073) %1093 = fdiv float 1.000000e+00, %146 %1094 = fmul float %1093, %145 %1095 = fmul float %1094, 5.000000e-01 %1096 = fadd float %1095, 5.000000e-01 %1097 = bitcast float %1096 to i32 %1098 = bitcast float %49 to i32 %1099 = insertelement <2 x i32> undef, i32 %1097, i32 0 %1100 = insertelement <2 x i32> %1099, i32 %1098, i32 1 %1101 = bitcast <8 x i32> %115 to <32 x i8> %1102 = bitcast <4 x i32> %117 to <16 x i8> %1103 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1100, <32 x i8> %1101, <16 x i8> %1102, i32 2) %1104 = extractelement <4 x float> %1103, i32 0 %1105 = fsub float -0.000000e+00, %50 %1106 = fadd float %1105, %142 %1107 = fsub float -0.000000e+00, %51 %1108 = fadd float %1107, %143 %1109 = fsub float -0.000000e+00, %52 %1110 = fadd float %1109, %144 %1111 = fmul float %1106, %1106 %1112 = fmul float %1108, %1108 %1113 = fadd float %1112, %1111 %1114 = fmul float %1110, %1110 %1115 = fadd float %1113, %1114 %1116 = fmul float %1115, %48 %1117 = fmul float %1110, %47 %1118 = fmul float %1117, 0x3FF7154CA0000000 %1119 = call float @llvm.AMDIL.exp.(float %1118) %1120 = fsub float -0.000000e+00, %1119 %1121 = fadd float %1120, 1.000000e+00 %1122 = fmul float %1121, %1116 %1123 = fdiv float 1.000000e+00, %1110 %1124 = fmul float %1123, %1122 %1125 = fmul float %1124, 0x3FF7154CA0000000 %1126 = call float @llvm.AMDIL.exp.(float %1125) %1127 = call float @llvm.AMDIL.clamp.(float %1126, float 0.000000e+00, float 1.000000e+00) %1128 = fsub float -0.000000e+00, %1127 %1129 = fadd float %1128, 1.000000e+00 %1130 = fmul float %1129, %1104 %1131 = fsub float -0.000000e+00, %1090 %1132 = fadd float %1131, %86 %1133 = fsub float -0.000000e+00, %1091 %1134 = fadd float %1133, %87 %1135 = fsub float -0.000000e+00, %1092 %1136 = fadd float %1135, %88 %1137 = fmul float %1130, %1132 %1138 = fadd float %1137, %1090 %1139 = fmul float %1130, %1134 %1140 = fadd float %1139, %1091 %1141 = fmul float %1130, %1136 %1142 = fadd float %1141, %1092 %1143 = call i32 @llvm.SI.packf16(float %1138, float %1140) %1144 = bitcast i32 %1143 to float %1145 = call i32 @llvm.SI.packf16(float %1142, float %1029) %1146 = bitcast i32 %1145 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1144, float %1146, float %1144, float %1146) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #4 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } attributes #4 = { nounwind readnone readonly } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 s[100:101], s[6:7] ; BEE40406 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 6, [m0] ; C8081900 v_interp_p2_f32 v2, [v2], v1, 1, 6, [m0] ; C8091901 v_sub_f32_e32 v8, 1.0, v2 ; 081004F2 v_interp_p1_f32 v2, v0, 0, 6, [m0] ; C8081800 v_interp_p2_f32 v2, [v2], v1, 0, 6, [m0] ; C8091801 v_add_f32_e32 v7, 0, v2 ; 060E0480 s_load_dwordx4 s[88:91], s[4:5], 0x0 ; C0AC0500 s_load_dwordx4 s[60:63], s[4:5], 0x4 ; C09E0504 s_load_dwordx4 s[32:35], s[4:5], 0x8 ; C0900508 s_load_dwordx4 s[12:15], s[4:5], 0xc ; C086050C s_load_dwordx4 s[52:55], s[4:5], 0x10 ; C09A0510 s_load_dwordx4 s[56:59], s[4:5], 0x14 ; C09C0514 s_load_dwordx4 s[8:11], s[4:5], 0x18 ; C0840518 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v57, s8, 0 ; 04730008 v_writelane_b32 v57, s9, 1 ; 04730209 v_writelane_b32 v57, s10, 2 ; 0473040A v_writelane_b32 v57, s11, 3 ; 0473060B s_load_dwordx4 s[24:27], s[4:5], 0x1c ; C08C051C s_load_dwordx4 s[16:19], s[4:5], 0x20 ; C0880520 s_load_dwordx4 s[8:11], s[4:5], 0x24 ; C0840524 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v57, s8, 4 ; 04730808 v_writelane_b32 v57, s9, 5 ; 04730A09 v_writelane_b32 v57, s10, 6 ; 04730C0A v_writelane_b32 v57, s11, 7 ; 04730E0B s_load_dwordx4 s[28:31], s[4:5], 0x28 ; C08E0528 s_load_dwordx8 s[36:43], s[100:101], 0x40 ; C0D26540 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:3], 5, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[36:43], s[16:19] ; F0800500 00890207 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_ge_f32_e64 s[0:1], -|v2|, 0 ; D00C0100 20010102 v_mov_b32_e32 v10, 0 ; 7E140280 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_cmp_ne_i32_e64 s[0:1], v4, 0 ; D10A0000 00010104 v_mov_b32_e32 v4, 0x80000000 ; 7E0802FF 80000000 v_cndmask_b32_e64 v4, v4, -1.0, s[0:1] ; D2000004 0001E704 v_cmp_lt_f32_e64 s[0:1], v4, 0 ; D0020000 00010104 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_and_b32_e32 v4, 1.0, v4 ; 360808F2 v_xor_b32_e32 v4, 0x80000000, v4 ; 3A0808FF 80000000 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_interp_p1_f32 v4, v0, 0, 4, [m0] ; C8101000 v_interp_p2_f32 v4, [v4], v1, 0, 4, [m0] ; C8111001 v_interp_p1_f32 v5, v0, 1, 4, [m0] ; C8141100 v_interp_p2_f32 v5, [v5], v1, 1, 4, [m0] ; C8151101 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x49 ; C2020149 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v5 ; 100C0A04 s_buffer_load_dword s4, s[0:3], 0x45 ; C2020145 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v4, s4, v6 ; D282000D 04180904 v_interp_p1_f32 v6, v0, 2, 4, [m0] ; C8181200 v_interp_p2_f32 v6, [v6], v1, 2, 4, [m0] ; C8191201 s_buffer_load_dword s4, s[0:3], 0x4d ; C202014D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v6, s4, v13 ; D282000D 04340906 s_buffer_load_dword s4, s[0:3], 0x51 ; C2020151 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v13, s4, v13 ; 061A1A04 v_sub_f32_e32 v14, 1.0, v13 ; 081C1AF2 s_buffer_load_dword s4, s[0:3], 0x48 ; C2020148 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v15, s4, v5 ; 101E0A04 s_buffer_load_dword s4, s[0:3], 0x44 ; C2020144 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v15, v4, s4, v15 ; D282000F 043C0904 s_buffer_load_dword s4, s[0:3], 0x4c ; C202014C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v15, v6, s4, v15 ; D282000F 043C0906 s_buffer_load_dword s4, s[0:3], 0x50 ; C2020150 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v15, s4, v15 ; 061E1E04 v_add_f32_e32 v13, 0, v15 ; 061A1E80 s_load_dwordx8 s[92:99], s[100:101], 0x0 ; C0EE6500 s_load_dwordx8 s[80:87], s[100:101], 0x8 ; C0E86508 s_load_dwordx8 s[44:51], s[100:101], 0x10 ; C0D66510 s_load_dwordx8 s[4:11], s[100:101], 0x18 ; C0C26518 s_load_dwordx8 s[64:71], s[100:101], 0x20 ; C0E06520 s_load_dwordx8 s[72:79], s[100:101], 0x28 ; C0E46528 s_load_dwordx8 s[16:23], s[100:101], 0x30 ; C0C86530 s_load_dwordx8 s[36:43], s[100:101], 0x38 ; C0D26538 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[13:14], 3, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[4:11], s[12:15] ; F0800300 00610D0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, 2.0, v14, -1.0 ; D282000F 03CE1CF4 v_mad_f32 v13, 2.0, v13, -1.0 ; D282000D 03CE1AF4 v_subrev_f32_e32 v14, v15, v13 ; 0A1C1B0F v_add_f32_e32 v16, 2.0, v14 ; 06201CF4 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_sub_f32_e64 v13, -v15, v13 ; D208000D 20021B0F v_add_f32_e32 v15, 2.0, v13 ; 061E1AF4 v_mul_f32_e32 v15, v15, v15 ; 101E1F0F v_mul_f32_e32 v15, v15, v15 ; 101E1F0F v_mad_f32 v17, v16, v16, v15 ; D2820011 043E2110 v_sub_f32_e32 v14, 2.0, v14 ; 081C1CF4 v_mul_f32_e32 v14, v14, v14 ; 101C1D0E v_mad_f32 v17, v14, v14, v17 ; D2820011 04461D0E v_sub_f32_e32 v13, 2.0, v13 ; 081A1AF4 v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mad_f32 v17, v13, v13, v17 ; D2820011 04461B0D v_rcp_f32_e32 v17, v17 ; 7E225511 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_mul_f32_e32 v16, v16, v17 ; 10202310 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v18, s4 ; 7E245404 v_mul_f32_e32 v16, v16, v18 ; 10202510 v_interp_p1_f32 v20, v0, 1, 0, [m0] ; C8500100 v_interp_p2_f32 v20, [v20], v1, 1, 0, [m0] ; C8510101 v_interp_p1_f32 v19, v0, 0, 0, [m0] ; C84C0000 v_interp_p2_f32 v19, [v19], v1, 0, 0, [m0] ; C84D0001 image_sample v[21:24], 15, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[92:99], s[88:91] ; F0800F00 02D71513 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v25, 2.0, v22, -1.0 ; D2820019 03CE2CF4 v_mul_f32_e32 v26, v25, v16 ; 10342119 v_mad_f32 v27, v16, v25, v26 ; D282001B 046A3310 v_mul_f32_e32 v15, v15, v17 ; 101E230F v_mul_f32_e32 v15, v15, v18 ; 101E250F v_mul_f32_e32 v28, v15, v15 ; 10381F0F v_interp_p1_f32 v30, v0, 1, 1, [m0] ; C8780500 v_interp_p2_f32 v30, [v30], v1, 1, 1, [m0] ; C8790501 v_interp_p1_f32 v29, v0, 0, 1, [m0] ; C8740400 v_interp_p2_f32 v29, [v29], v1, 0, 1, [m0] ; C8750401 image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[92:99], s[88:91] ; F0800F00 02D71F1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v35, v34, v28 ; 10463922 v_mul_f32_e32 v36, v16, v16 ; 10482110 v_mad_f32 v35, v24, v36, v35 ; D2820023 048E4918 v_mad_f32 v37, 2.0, v32, -1.0 ; D2820025 03CE40F4 v_mul_f32_e32 v38, v37, v15 ; 104C1F25 v_mad_f32 v35, v38, v27, v35 ; D2820023 048E3726 v_mul_f32_e32 v14, v14, v14 ; 101C1D0E v_mul_f32_e32 v14, v14, v17 ; 101C230E v_mul_f32_e32 v14, v14, v18 ; 101C250E v_mul_f32_e32 v39, v14, v14 ; 104E1D0E v_interp_p1_f32 v41, v0, 1, 2, [m0] ; C8A40900 v_interp_p2_f32 v41, [v41], v1, 1, 2, [m0] ; C8A50901 v_interp_p1_f32 v40, v0, 0, 2, [m0] ; C8A00800 v_interp_p2_f32 v40, [v40], v1, 0, 2, [m0] ; C8A10801 image_sample v[42:45], 15, 0, 0, 0, 0, 0, 0, 0, v[40:41], s[92:99], s[88:91] ; F0800F00 02D72A28 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, v45, v39, v35 ; D2820023 048E4F2D v_mad_f32 v46, -2.0, v43, 1.0 ; D282002E 03CA56F5 v_mul_f32_e32 v47, v46, v14 ; 105E1D2E v_mad_f32 v35, v27, v47, v35 ; D2820023 048E5F1B v_mad_f32 v37, v15, v37, v38 ; D2820025 049A4B0F v_mad_f32 v35, v47, v37, v35 ; D2820023 048E4B2F v_mul_f32_e32 v13, v13, v13 ; 101A1B0D v_mul_f32_e32 v13, v13, v17 ; 101A230D v_mul_f32_e32 v13, v13, v18 ; 101A250D v_mul_f32_e32 v17, v13, v13 ; 10221B0D v_interp_p1_f32 v49, v0, 1, 3, [m0] ; C8C40D00 v_interp_p2_f32 v49, [v49], v1, 1, 3, [m0] ; C8C50D01 v_interp_p1_f32 v48, v0, 0, 3, [m0] ; C8C00C00 v_interp_p2_f32 v48, [v48], v1, 0, 3, [m0] ; C8C10C01 image_sample v[50:53], 15, 0, 0, 0, 0, 0, 0, 0, v[48:49], s[92:99], s[88:91] ; F0800F00 02D73230 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v18, v53, v17, v35 ; D2820012 048E2335 v_mad_f32 v35, -2.0, v51, 1.0 ; D2820023 03CA66F5 v_mul_f32_e32 v54, v35, v13 ; 106C1B23 v_mad_f32 v18, v27, v54, v18 ; D2820012 044A6D1B v_mad_f32 v18, v37, v54, v18 ; D2820012 044A6D25 v_mul_f32_e32 v27, v54, v47 ; 10365F36 v_mad_f32 v18, 2.0, v27, v18 ; D2820012 044A36F4 v_mad_f32 v25, v16, v25, v38 ; D2820019 049A3310 v_mad_f32 v25, v14, v46, v25 ; D2820019 04665D0E v_mad_f32 v25, v13, v35, v25 ; D2820019 0466470D v_mad_f32 v18, -v25, v25, v18 ; D2820012 244A3319 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v27, s4 ; 7E365404 v_add_f32_e32 v35, v27, v18 ; 0646251B v_mad_f32 v37, 2.0, v21, -1.0 ; D2820025 03CE2AF4 v_mul_f32_e32 v46, v37, v16 ; 105C2125 v_mad_f32 v55, v16, v37, v46 ; D2820037 04BA4B10 v_mul_f32_e32 v56, v33, v28 ; 10703921 v_mad_f32 v21, v23, v36, v56 ; D2820015 04E24917 v_mad_f32 v22, -2.0, v31, 1.0 ; D2820016 03CA3EF5 v_mul_f32_e32 v23, v22, v15 ; 102E1F16 v_mad_f32 v21, v23, v55, v21 ; D2820015 04566F17 v_mad_f32 v21, v44, v39, v21 ; D2820015 04564F2C v_mad_f32 v24, -2.0, v42, 1.0 ; D2820018 03CA54F5 v_mul_f32_e32 v31, v24, v14 ; 103E1D18 v_mad_f32 v21, v55, v31, v21 ; D2820015 04563F37 v_mad_f32 v15, v15, v22, v23 ; D282000F 045E2D0F v_mad_f32 v21, v31, v15, v21 ; D2820015 04561F1F v_mad_f32 v21, v52, v17, v21 ; D2820015 04562334 v_mad_f32 v22, 2.0, v50, -1.0 ; D2820016 03CE64F4 v_mul_f32_e32 v32, v22, v13 ; 10401B16 v_mad_f32 v21, v55, v32, v21 ; D2820015 04564137 v_mad_f32 v15, v15, v32, v21 ; D282000F 0456410F v_mul_f32_e32 v21, v32, v31 ; 102A3F20 v_mad_f32 v15, 2.0, v21, v15 ; D282000F 043E2AF4 v_mad_f32 v16, v16, v37, v23 ; D2820010 045E4B10 v_mad_f32 v14, v14, v24, v16 ; D282000E 0442310E v_mad_f32 v13, v13, v22, v14 ; D282000D 043A2D0D v_mad_f32 v14, -v13, v13, v15 ; D282000E 243E1B0D v_add_f32_e32 v15, v27, v14 ; 061E1D1B v_mul_f32_e32 v16, v23, v26 ; 10203517 v_mad_f32 v16, v46, v38, v16 ; D2820010 04424D2E image_sample v21, 1, 0, 0, 0, 0, 0, 0, 0, v[29:30], s[80:87], s[60:63] ; F0800100 01F4151D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v21, -2.0, v21, 1.0 ; D2820015 03CA2AF5 v_mul_f32_e32 v21, v21, v28 ; 102A3915 image_sample v19, 1, 0, 0, 0, 0, 0, 0, 0, v[19:20], s[80:87], s[60:63] ; F0800100 01F41313 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, 2.0, v19, -1.0 ; D2820013 03CE26F4 v_mad_f32 v19, v19, v36, v21 ; D2820013 04564913 v_add_f32_e32 v16, v16, v19 ; 06202710 image_sample v19, 1, 0, 0, 0, 0, 0, 0, 0, v[40:41], s[80:87], s[60:63] ; F0800100 01F41328 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, 2.0, v19, -1.0 ; D2820013 03CE26F4 v_mad_f32 v16, v19, v39, v16 ; D2820010 04424F13 v_mul_f32_e32 v19, v31, v26 ; 1026351F v_mad_f32 v19, v46, v47, v19 ; D2820013 044E5F2E v_add_f32_e32 v16, v19, v16 ; 06202113 v_mul_f32_e32 v19, v31, v38 ; 10264D1F v_mad_f32 v19, v23, v47, v19 ; D2820013 044E5F17 v_add_f32_e32 v16, v19, v16 ; 06202113 image_sample v19, 1, 0, 0, 0, 0, 0, 0, 0, v[48:49], s[80:87], s[60:63] ; F0800100 01F41330 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, -2.0, v19, 1.0 ; D2820013 03CA26F5 v_mad_f32 v16, v19, v17, v16 ; D2820010 04422313 v_mul_f32_e32 v17, v32, v26 ; 10223520 v_mad_f32 v17, v46, v54, v17 ; D2820011 04466D2E v_add_f32_e32 v16, v16, v17 ; 06202310 v_mul_f32_e32 v17, v32, v38 ; 10224D20 v_mad_f32 v17, v23, v54, v17 ; D2820011 04466D17 v_add_f32_e32 v16, v17, v16 ; 06202111 v_mul_f32_e32 v17, v32, v47 ; 10225F20 v_mad_f32 v17, v31, v54, v17 ; D2820011 04466D1F v_add_f32_e32 v16, v17, v16 ; 06202111 v_mad_f32 v16, -v13, v25, v16 ; D2820010 2442330D v_mul_f32_e32 v17, v16, v16 ; 10222110 v_mad_f32 v19, v15, v35, -v17 ; D2820013 8446470F v_max_f32_e32 v20, 0x33d6bf95, v19 ; 202826FF 33D6BF95 v_rsq_clamp_f32_e32 v20, v20 ; 7E285914 v_rcp_f32_e32 v21, v19 ; 7E2A5513 v_add_f32_e32 v16, v16, v16 ; 06202110 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v22, s4, v5 ; 082C0A04 s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v23, s5, v4 ; 082E0805 v_mul_f32_e32 v24, v23, v23 ; 10302F17 v_mad_f32 v24, v22, v22, v24 ; D2820018 04622D16 s_buffer_load_dword s60, s[0:3], 0x22 ; C21E0122 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v26, s60, v6 ; 08340C3C v_mad_f32 v24, v26, v26, v24 ; D2820018 0462351A v_max_f32_e32 v24, 0x33d6bf95, v24 ; 203030FF 33D6BF95 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v28, v24, v22, -s6 ; D282001C 801A2D18 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 v_mov_b32_e32 v29, s6 ; 7E3A0206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v28, v28, s7, v29 ; D282001C 04740F1C s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v29, v24, v23, -s6 ; D282001D 801A2F18 v_mov_b32_e32 v30, s6 ; 7E3C0206 v_mad_f32 v29, v29, s7, v30 ; D282001D 04780F1D v_mul_f32_e32 v30, v29, v29 ; 103C3B1D v_mad_f32 v30, v28, v28, v30 ; D282001E 047A391C s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v31, v24, v26, -s6 ; D282001F 801A3518 v_mov_b32_e32 v32, s6 ; 7E400206 v_mad_f32 v31, v31, s7, v32 ; D282001F 04800F1F v_mad_f32 v30, v31, v31, v30 ; D282001E 047A3F1F v_max_f32_e32 v30, 0x33d6bf95, v30 ; 203C3CFF 33D6BF95 v_rsq_clamp_f32_e32 v30, v30 ; 7E3C591E s_buffer_load_dword s6, s[0:3], 0x4 ; C2030104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v29, v29, v30, s6 ; D282001D 001A3D1D s_buffer_load_dword s7, s[0:3], 0x6 ; C2038106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v31, v31, v30, s7 ; D282001F 001E3D1F v_rcp_f32_e32 v31, v31 ; 7E3E551F v_mad_f32 v29, v29, v31, -v13 ; D282001D 84363F1D v_mul_f32_e32 v32, v16, v29 ; 10403B10 s_buffer_load_dword s8, s[0:3], 0x5 ; C2040105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v28, v28, v30, s8 ; D282001C 00223D1C v_mad_f32 v28, v28, v31, -v25 ; D282001C 84663F1C v_mad_f32 v15, v15, v28, -v32 ; D282000F 8482390F v_mul_f32_e32 v15, v15, v28 ; 101E390F v_mul_f32_e32 v29, v29, v29 ; 103A3B1D v_mad_f32 v15, v29, v35, v15 ; D282000F 043E471D v_mul_f32_e32 v15, 0.5, v15 ; 101E1EF0 v_mul_f32_e32 v30, v15, v21 ; 103C2B0F v_mov_b32_e32 v31, 0xbfb8aa65 ; 7E3E02FF BFB8AA65 v_mul_f32_e32 v30, v31, v30 ; 103C3D1F v_exp_f32_e32 v30, v30 ; 7E3C4B1E v_mul_f32_e32 v20, v20, v30 ; 10283D14 s_buffer_load_dword s9, s[0:3], 0xe ; C204810E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v30, s9, v20 ; 103C2809 v_cmp_ge_f32_e64 s[10:11], -v19, 0 ; D00C000A 20010113 v_cndmask_b32_e64 v19, 0, -1, s[10:11] ; D2000813 00298280 v_cmp_ne_i32_e64 s[10:11], v19, 0 ; D10A000A 00010113 v_cndmask_b32_e64 v19, 0, 1.0, s[10:11] ; D2000813 0029E480 v_mov_b32_e32 v33, 0xc1800000 ; 7E4202FF C1800000 v_mad_f32 v15, v21, v15, v33 ; D282000F 04861F15 v_cmp_ge_f32_e64 s[10:11], v15, 0 ; D00C000A 0001010F v_cndmask_b32_e64 v15, 0, -1, s[10:11] ; D200080F 00298280 v_cmp_ne_i32_e64 s[10:11], v15, 0 ; D10A000A 0001010F v_cndmask_b32_e64 v15, 0, 1.0, s[10:11] ; D200080F 0029E480 v_add_f32_e32 v15, v19, v15 ; 061E1F13 v_cmp_ge_f32_e64 s[10:11], -v15, 0 ; D00C000A 2001010F v_cndmask_b32_e64 v15, 0, -1, s[10:11] ; D200080F 00298280 v_cmp_ne_i32_e64 s[62:63], v15, 0 ; D10A003E 0001010F v_cndmask_b32_e64 v15, 0, v30, s[62:63] ; D200080F 00FA3C80 s_buffer_load_dword s10, s[0:3], 0x13 ; C2050113 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v19, s10 ; 7E26540A v_add_f32_e32 v21, v18, v19 ; 062A2712 v_add_f32_e32 v19, v14, v19 ; 0626270E v_mad_f32 v30, v19, v21, -v17 ; D282001E 84462B13 v_max_f32_e32 v34, 0x33d6bf95, v30 ; 20443CFF 33D6BF95 v_rsq_clamp_f32_e32 v34, v34 ; 7E445922 v_rcp_f32_e32 v35, v30 ; 7E46551E s_buffer_load_dword s10, s[0:3], 0x19 ; C2050119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v36, v24, v22, -s10 ; D2820024 802A2D18 s_buffer_load_dword s11, s[0:3], 0x1b ; C205811B v_mov_b32_e32 v37, s10 ; 7E4A020A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v36, v36, s11, v37 ; D2820024 04941724 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v37, v24, v23, -s10 ; D2820025 802A2F18 v_mov_b32_e32 v38, s10 ; 7E4C020A v_mad_f32 v37, v37, s11, v38 ; D2820025 04981725 v_mul_f32_e32 v38, v37, v37 ; 104C4B25 v_mad_f32 v38, v36, v36, v38 ; D2820026 049A4924 s_buffer_load_dword s10, s[0:3], 0x1a ; C205011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v39, v24, v26, -s10 ; D2820027 802A3518 v_mov_b32_e32 v40, s10 ; 7E50020A v_mad_f32 v39, v39, s11, v40 ; D2820027 04A01727 v_mad_f32 v38, v39, v39, v38 ; D2820026 049A4F27 v_max_f32_e32 v38, 0x33d6bf95, v38 ; 204C4CFF 33D6BF95 v_rsq_clamp_f32_e32 v38, v38 ; 7E4C5926 s_buffer_load_dword s10, s[0:3], 0x8 ; C2050108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v37, v37, v38, s10 ; D2820025 002A4D25 s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v39, v39, v38, s10 ; D2820027 002A4D27 v_rcp_f32_e32 v39, v39 ; 7E4E5527 v_mad_f32 v37, v37, v39, -v13 ; D2820025 84364F25 v_mul_f32_e32 v16, v37, v16 ; 10202125 s_buffer_load_dword s10, s[0:3], 0x9 ; C2050109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v36, v36, v38, s10 ; D2820024 002A4D24 v_mad_f32 v36, v36, v39, -v25 ; D2820024 84664F24 v_mad_f32 v16, v19, v36, -v16 ; D2820010 84424913 v_mul_f32_e32 v16, v16, v36 ; 10204910 v_mul_f32_e32 v19, v37, v37 ; 10264B25 v_mad_f32 v16, v19, v21, v16 ; D2820010 04422B13 v_mul_f32_e32 v16, 0.5, v16 ; 102020F0 v_mul_f32_e32 v19, v35, v16 ; 10262123 v_mul_f32_e32 v21, v31, v19 ; 102A271F v_exp_f32_e32 v21, v21 ; 7E2A4B15 v_mul_f32_e32 v21, v21, v34 ; 102A4515 s_buffer_load_dword s10, s[0:3], 0x12 ; C2050112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v34, s10, v21 ; 10442A0A v_cmp_ge_f32_e64 s[10:11], -v30, 0 ; D00C000A 2001011E v_cndmask_b32_e64 v36, 0, -1, s[10:11] ; D2000024 00298280 v_cmp_ne_i32_e64 s[10:11], v36, 0 ; D10A000A 00010124 v_cndmask_b32_e64 v36, 0, 1.0, s[10:11] ; D2000024 0029E480 v_mad_f32 v16, v16, v35, v33 ; D2820010 04864710 v_cmp_ge_f32_e64 s[10:11], v16, 0 ; D00C000A 00010110 v_cndmask_b32_e64 v16, 0, -1, s[10:11] ; D2000010 00298280 v_cmp_ne_i32_e64 s[10:11], v16, 0 ; D10A000A 00010110 v_cndmask_b32_e64 v16, 0, 1.0, s[10:11] ; D2000010 0029E480 v_add_f32_e32 v16, v36, v16 ; 06202124 v_cmp_ge_f32_e64 s[10:11], -v16, 0 ; D00C000A 20010110 v_cndmask_b32_e64 v16, 0, -1, s[10:11] ; D2000010 00298280 v_cmp_ne_i32_e64 s[80:81], v16, 0 ; D10A0050 00010110 v_cndmask_b32_e64 v16, 0, v34, s[80:81] ; D2000010 01424480 v_add_f32_e32 v15, v15, v16 ; 061E210F v_mul_f32_e32 v16, v23, v24 ; 10203117 v_mul_f32_e32 v16, v16, v13 ; 10201B10 v_mul_f32_e32 v22, v22, v24 ; 102C3116 v_mad_f32 v16, v25, v22, v16 ; D2820010 04422D19 v_mad_f32 v16, v24, v26, v16 ; D2820010 04423518 v_sub_f32_e32 v16, 1.0, v16 ; 082020F2 v_mul_f32_e32 v22, v16, v16 ; 102C2110 v_mul_f32_e32 v22, v22, v22 ; 102C2D16 v_mul_f32_e32 v16, v22, v16 ; 10202116 v_interp_p1_f32 v23, v0, 3, 6, [m0] ; C85C1B00 v_interp_p2_f32 v23, [v23], v1, 3, 6, [m0] ; C85D1B01 v_interp_p1_f32 v22, v0, 2, 6, [m0] ; C8581A00 v_interp_p2_f32 v22, [v22], v1, 2, 6, [m0] ; C8591A01 image_sample v22, 8, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[72:79], s[56:59] ; F0800800 01D21616 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v22, 4.0, v22 ; 102C2CF6 v_add_f32_e64 v22, 0, v22 clamp ; D2060816 00022C80 v_sub_f32_e32 v9, 1.0, v22 ; 08122CF2 v_mov_b32_e32 v11, v10 ; 7E16030A image_sample_l v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[9:12], s[64:71], s[52:55] ; F0900F00 01B00909 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v23, v11 ; 7E2E4F0B v_mov_b32_e32 v24, 0x400ccccd ; 7E3002FF 400CCCCD v_mul_legacy_f32_e32 v23, v24, v23 ; 0E2E2F18 v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mad_f32 v15, v15, v16, v23 ; D282000F 045E210F v_mov_b32_e32 v26, 0x41800000 ; 7E3402FF 41800000 v_mad_f32 v34, v27, v26, v18 ; D2820022 044A351B v_mad_f32 v26, v27, v26, v14 ; D282001A 043A351B v_mad_f32 v17, v26, v34, -v17 ; D2820011 8446451A v_max_f32_e32 v27, 0x33d6bf95, v17 ; 203622FF 33D6BF95 v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B v_rcp_f32_e32 v35, v17 ; 7E465511 v_mad_f32 v26, v26, v28, -v32 ; D282001A 8482391A v_mul_f32_e32 v26, v26, v28 ; 1034391A v_mad_f32 v26, v29, v34, v26 ; D282001A 046A451D v_mul_f32_e32 v26, 0.5, v26 ; 103434F0 v_mul_f32_e32 v28, v26, v35 ; 1038471A v_mul_f32_e32 v28, v31, v28 ; 1038391F v_exp_f32_e32 v28, v28 ; 7E384B1C v_mul_f32_e32 v27, v28, v27 ; 1036371C v_mul_f32_e32 v27, 0x3ba3d70a, v27 ; 103636FF 3BA3D70A v_mul_f32_e32 v28, s9, v23 ; 10382E09 v_mul_f32_e32 v28, v28, v27 ; 1038371C v_cmp_ge_f32_e64 s[10:11], -v17, 0 ; D00C000A 20010111 v_cndmask_b32_e64 v17, 0, -1, s[10:11] ; D2000811 00298280 v_cmp_ne_i32_e64 s[10:11], v17, 0 ; D10A000A 00010111 v_cndmask_b32_e64 v17, 0, 1.0, s[10:11] ; D2000811 0029E480 v_mad_f32 v26, v35, v26, v33 ; D282001A 04863523 v_cmp_ge_f32_e64 s[10:11], v26, 0 ; D00C000A 0001011A v_cndmask_b32_e64 v26, 0, -1, s[10:11] ; D200001A 00298280 v_cmp_ne_i32_e64 s[10:11], v26, 0 ; D10A000A 0001011A v_cndmask_b32_e64 v26, 0, 1.0, s[10:11] ; D200001A 0029E480 v_add_f32_e32 v17, v17, v26 ; 06223511 v_cmp_ge_f32_e64 s[10:11], -v17, 0 ; D00C000A 20010111 v_cndmask_b32_e64 v17, 0, -1, s[10:11] ; D2000811 00298280 v_cmp_ne_i32_e64 s[52:53], v17, 0 ; D10A0034 00010111 v_cndmask_b32_e64 v17, 0, v28, s[52:53] ; D2000811 00D23880 v_mul_f32_e32 v13, s6, v13 ; 101A1A06 v_mad_f32 v13, s8, v25, v13 ; D282000D 04363208 v_add_f32_e32 v13, s7, v13 ; 061A1A07 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_mad_f32 v15, v15, v13, v17 ; D282000F 04461B0F s_buffer_load_dword s6, s[0:3], 0x29 ; C2030129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v17, s6, v5 ; 10220A06 s_buffer_load_dword s6, s[0:3], 0x25 ; C2030125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v4, s6, v17 ; D2820011 04440D04 s_buffer_load_dword s6, s[0:3], 0x2d ; C203012D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v6, s6, v17 ; D2820011 04440D06 s_buffer_load_dword s6, s[0:3], 0x31 ; C2030131 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v17, s6, v17 ; 06222206 s_buffer_load_dword s6, s[0:3], 0x2b ; C203012B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v25, s6, v5 ; 10320A06 s_buffer_load_dword s6, s[0:3], 0x27 ; C2030127 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v25, v4, s6, v25 ; D2820019 04640D04 s_buffer_load_dword s6, s[0:3], 0x2f ; C203012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v25, v6, s6, v25 ; D2820019 04640D06 s_buffer_load_dword s6, s[0:3], 0x33 ; C2030133 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v25, s6, v25 ; 06323206 v_rcp_f32_e32 v25, v25 ; 7E325519 v_mul_f32_e32 v17, v17, v25 ; 10223311 v_mad_f32 v17, -0.5, v17, -0.5 ; D2820011 03C622F1 v_mov_b32_e32 v26, 0x40c00000 ; 7E3402FF 40C00000 v_mad_f32 v17, v18, v26, v17 ; D2820011 04463512 v_mad_f32 v29, -2.0, v30, v17 ; D282001D 04463CF5 s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v18, s6, v5 ; 10240A06 s_buffer_load_dword s6, s[0:3], 0x24 ; C2030124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, v4, s6, v18 ; D2820012 04480D04 s_buffer_load_dword s6, s[0:3], 0x2c ; C203012C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, v6, s6, v18 ; D2820012 04480D06 s_buffer_load_dword s6, s[0:3], 0x30 ; C2030130 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v18, s6, v18 ; 06242406 v_mul_f32_e32 v18, v18, v25 ; 10243312 v_mad_f32 v18, 0.5, v18, -0.5 ; D2820012 03C624F0 v_mad_f32 v14, v14, v26, v18 ; D282000E 044A350E v_mad_f32 v28, 2.0, v30, v14 ; D282001C 043A3CF4 image_sample v[31:34], 15, 0, 0, 0, 0, 0, 0, 0, v[28:29], s[44:51], s[32:35] ; F0800F00 010B1F1C v_mov_b32_e32 v18, 0x3e800000 ; 7E2402FF 3E800000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v25, 0x3e800000, v33 ; 103242FF 3E800000 v_mad_f32 v35, 2.0, v30, v17 ; D2820023 04463CF4 v_mov_b32_e32 v36, v28 ; 7E48031C v_mov_b32_e32 v37, v29 ; 7E4A031D v_mov_b32_e32 v37, v35 ; 7E4A0323 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[36:37], s[44:51], s[32:35] ; F0800F00 010B2324 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v25, v37, v18, v25 ; D2820019 04662525 v_mad_f32 v40, v30, v26, v17 ; D2820028 0446351E v_mad_f32 v39, -2.0, v30, v14 ; D2820027 043A3CF5 image_sample v[41:44], 15, 0, 0, 0, 0, 0, 0, 0, v[39:40], s[44:51], s[32:35] ; F0800F00 010B2927 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v43, v18, v25 ; D282000E 0466252B v_mov_b32_e32 v40, v29 ; 7E50031D image_sample v[45:48], 15, 0, 0, 0, 0, 0, 0, 0, v[39:40], s[44:51], s[32:35] ; F0800F00 010B2D27 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, v47, v18, v14 ; D282000E 043A252F v_log_f32_e64 v14, |v14| ; D34E010E 0000010E v_mul_f32_e32 v14, 0x400ccccd, v14 ; 101C1CFF 400CCCCD v_exp_f32_e32 v14, v14 ; 7E1C4B0E s_buffer_load_dword s6, s[0:3], 0x5c ; C203015C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v14, v14, s6, v15 ; D282000E 043C0D0E v_mul_f32_e32 v15, v19, v19 ; 101E2713 v_mul_f32_e32 v15, v15, v19 ; 101E270F v_mul_f32_e32 v15, v15, v22 ; 101E2D0F v_mad_f32 v14, v15, v23, v14 ; D282000E 043A2F0F s_buffer_load_dword s7, s[0:3], 0x39 ; C2038139 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v17, s7, v5 ; 10220A07 s_buffer_load_dword s7, s[0:3], 0x35 ; C2038135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v4, s7, v17 ; D2820011 04440F04 s_buffer_load_dword s7, s[0:3], 0x3d ; C203813D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v6, s7, v17 ; D2820011 04440F06 s_buffer_load_dword s7, s[0:3], 0x41 ; C2038141 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v17, s7, v17 ; 06222207 s_buffer_load_dword s7, s[0:3], 0x3b ; C203813B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v19, s7, v5 ; 10260A07 s_buffer_load_dword s7, s[0:3], 0x37 ; C2038137 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v19, v4, s7, v19 ; D2820013 044C0F04 s_buffer_load_dword s7, s[0:3], 0x3f ; C203813F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v19, v6, s7, v19 ; D2820013 044C0F06 s_buffer_load_dword s7, s[0:3], 0x43 ; C2038143 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v19, s7, v19 ; 06262607 v_rcp_f32_e32 v19, v19 ; 7E265513 v_mul_f32_e32 v17, v17, v19 ; 10222711 v_mad_f32 v17, -0.5, v17, -0.5 ; D2820011 03C622F1 v_sub_f32_e32 v23, 1.0, v17 ; 082E22F2 s_buffer_load_dword s7, s[0:3], 0x38 ; C2038138 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v17, s7, v5 ; 10220A07 s_buffer_load_dword s7, s[0:3], 0x34 ; C2038134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v4, s7, v17 ; D2820011 04440F04 s_buffer_load_dword s7, s[0:3], 0x3c ; C203813C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v17, v6, s7, v17 ; D2820011 04440F06 s_buffer_load_dword s7, s[0:3], 0x40 ; C2038140 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v17, s7, v17 ; 06222207 v_mul_f32_e32 v17, v17, v19 ; 10222711 v_mad_f32 v17, 0.5, v17, -0.5 ; D2820011 03C622F0 v_add_f32_e32 v22, 0, v17 ; 062C2280 s_load_dwordx8 s[8:15], s[100:101], 0x50 ; C0C46550 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v17, 1, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[8:15], s[28:31] ; F0800100 00E21116 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e64 v17, 0, v17 clamp ; D2060811 00022280 s_buffer_load_dword s7, s[0:3], 0x54 ; C2038154 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v17, s7, v17 ; 10222207 v_mul_f32_e32 v19, v14, v17 ; 1026230E v_mov_b32_e32 v22, 0xbf2b851f ; 7E2C02FF BF2B851F v_mad_f32 v14, v19, v22, v14 ; D282000E 043A2D13 v_log_f32_e64 v14, |v14| ; D34E010E 0000010E v_mul_f32_e32 v14, 0x3ee8ba1f, v14 ; 101C1CFF 3EE8BA1F v_exp_f32_e32 v14, v14 ; 7E1C4B0E image_sample v[49:52], 15, 0, 0, 0, 0, 0, 0, 0, v[7:8], s[36:43], s[24:27] ; F0800F00 00C93107 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v7, 1.0, v52 ; 080E68F2 v_mad_f32 v8, v14, v7, v51 ; D2820008 04CE0F0E v_mov_b32_e32 v14, 0xbecccccd ; 7E1C02FF BECCCCCD v_add_f32_e32 v8, v14, v8 ; 0610110E v_mov_b32_e32 v19, 0x3ecccccd ; 7E2602FF 3ECCCCCD v_mad_f32 v55, v2, v8, v19 ; D2820037 044E1102 s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s7, v20 ; 10102807 v_cndmask_b32_e64 v8, 0, v8, s[62:63] ; D2000008 00FA1080 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v23, s8, v21 ; 102E2A08 v_cndmask_b32_e64 v23, 0, v23, s[80:81] ; D2000817 01422E80 v_add_f32_e32 v8, v8, v23 ; 06102F08 v_log_f32_e32 v23, v10 ; 7E2E4F0A v_mul_legacy_f32_e32 v23, v24, v23 ; 0E2E2F18 v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mad_f32 v8, v8, v16, v23 ; D2820008 045E2108 v_mul_f32_e32 v25, s7, v23 ; 10322E07 v_mul_f32_e32 v25, v25, v27 ; 10323719 v_cndmask_b32_e64 v25, 0, v25, s[52:53] ; D2000819 00D23280 v_mad_f32 v8, v8, v13, v25 ; D2820008 04661B08 v_mul_f32_e32 v25, 0x3e800000, v32 ; 103240FF 3E800000 v_mad_f32 v25, v36, v18, v25 ; D2820019 04662524 v_mad_f32 v25, v42, v18, v25 ; D2820019 0466252A v_mad_f32 v25, v46, v18, v25 ; D2820019 0466252E v_log_f32_e64 v25, |v25| ; D34E0119 00000119 v_mul_f32_e32 v25, 0x400ccccd, v25 ; 103232FF 400CCCCD v_exp_f32_e32 v25, v25 ; 7E324B19 v_mad_f32 v8, v25, s6, v8 ; D2820008 04200D19 v_mad_f32 v8, v15, v23, v8 ; D2820008 04222F0F v_mul_f32_e32 v23, v8, v17 ; 102E2308 v_mad_f32 v8, v23, v22, v8 ; D2820008 04222D17 v_log_f32_e64 v8, |v8| ; D34E0108 00000108 v_mul_f32_e32 v8, 0x3ee8ba1f, v8 ; 101010FF 3EE8BA1F v_exp_f32_e32 v8, v8 ; 7E104B08 v_mad_f32 v8, v8, v7, v50 ; D2820008 04CA0F08 v_add_f32_e32 v8, v14, v8 ; 0610110E v_mad_f32 v54, v2, v8, v19 ; D2820036 044E1102 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s7, v20 ; 10102807 v_cndmask_b32_e64 v8, 0, v8, s[62:63] ; D2000008 00FA1080 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s8, v21 ; 10282A08 v_cndmask_b32_e64 v20, 0, v20, s[80:81] ; D2000014 01422880 v_add_f32_e32 v8, v8, v20 ; 06102908 v_log_f32_e32 v20, v9 ; 7E284F09 v_mul_legacy_f32_e32 v20, v24, v20 ; 0E282918 v_exp_f32_e32 v20, v20 ; 7E284B14 v_mad_f32 v8, v8, v16, v20 ; D2820008 04522108 v_mul_f32_e32 v16, s7, v20 ; 10202807 v_mul_f32_e32 v16, v16, v27 ; 10203710 v_cndmask_b32_e64 v16, 0, v16, s[52:53] ; D2000010 00D22080 v_mad_f32 v8, v8, v13, v16 ; D2820008 04421B08 v_mul_f32_e32 v13, 0x3e800000, v31 ; 101A3EFF 3E800000 v_mad_f32 v13, v35, v18, v13 ; D282000D 04362523 v_mad_f32 v13, v41, v18, v13 ; D282000D 04362529 v_mad_f32 v13, v45, v18, v13 ; D282000D 0436252D v_log_f32_e64 v13, |v13| ; D34E010D 0000010D v_mul_f32_e32 v13, 0x400ccccd, v13 ; 101A1AFF 400CCCCD v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mad_f32 v8, v13, s6, v8 ; D2820008 04200D0D v_mad_f32 v8, v15, v20, v8 ; D2820008 0422290F v_mul_f32_e32 v13, v8, v17 ; 101A2308 v_mad_f32 v8, v13, v22, v8 ; D2820008 04222D0D v_log_f32_e64 v8, |v8| ; D34E0108 00000108 v_mul_f32_e32 v8, 0x3ee8ba1f, v8 ; 101010FF 3EE8BA1F v_exp_f32_e32 v8, v8 ; 7E104B08 v_mad_f32 v7, v8, v7, v49 ; D2820007 04C60F08 v_add_f32_e32 v7, v14, v7 ; 060E0F0E v_mad_f32 v53, v2, v7, v19 ; D2820035 044E0F02 s_load_dwordx8 s[8:15], s[100:101], 0x48 ; C0C46548 v_readlane_b32 s24, v57, 4 ; 02310939 v_readlane_b32 s25, v57, 5 ; 02330B39 v_readlane_b32 s26, v57, 6 ; 02350D39 v_readlane_b32 s27, v57, 7 ; 02370F39 s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[13:15], 7, 0, 0, 0, 0, 0, 0, 0, v[53:56], s[8:15], s[24:27] ; F0800700 00C20D35 v_mad_f32 v2, -v3, v2, v2 ; D2820002 240A0503 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v7, v54, v3 ; 100E0736 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v2, v14, v7 ; D2820007 041E1D02 s_buffer_load_dword s6, s[0:3], 0x59 ; C2030159 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v8, s6, v7 ; 08100E06 v_subrev_f32_e32 v5, s4, v5 ; 0A0A0A04 v_subrev_f32_e32 v4, s5, v4 ; 0A080805 v_mul_f32_e32 v4, v4, v4 ; 10080904 v_mad_f32 v4, v5, v5, v4 ; D2820004 04120B05 v_subrev_f32_e32 v5, s60, v6 ; 0A0A0C3C v_mad_f32 v4, v5, v5, v4 ; D2820004 04120B05 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v5 ; 100C0A04 v_mul_f32_e32 v6, 0x3fb8aa65, v6 ; 100C0CFF 3FB8AA65 v_exp_f32_e32 v6, v6 ; 7E0C4B06 v_sub_f32_e32 v6, 1.0, v6 ; 080C0CF2 v_mul_f32_e32 v4, v4, v6 ; 10080D04 v_rcp_f32_e32 v5, v5 ; 7E0A5505 v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mul_f32_e32 v4, 0x3fb8aa65, v4 ; 100808FF 3FB8AA65 v_exp_f32_e32 v4, v4 ; 7E084B04 v_add_f32_e64 v4, 0, v4 clamp ; D2060804 00020880 v_sub_f32_e32 v4, 1.0, v4 ; 080808F2 v_interp_p1_f32 v5, v0, 1, 5, [m0] ; C8141500 v_interp_p2_f32 v5, [v5], v1, 1, 5, [m0] ; C8151501 v_interp_p1_f32 v6, v0, 2, 5, [m0] ; C8181600 v_interp_p2_f32 v6, [v6], v1, 2, 5, [m0] ; C8191601 v_rcp_f32_e32 v0, v6 ; 7E005506 v_mul_f32_e32 v0, v5, v0 ; 10000105 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_readlane_b32 s4, v57, 0 ; 02090139 v_readlane_b32 s5, v57, 1 ; 020B0339 v_readlane_b32 s6, v57, 2 ; 020D0539 v_readlane_b32 s7, v57, 3 ; 020F0739 s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[16:23], s[4:7] ; F0800100 00240000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v4 ; 10000900 v_mad_f32 v1, v0, v8, v7 ; D2820001 041E1100 v_mul_f32_e32 v4, v53, v3 ; 10080735 v_mad_f32 v4, v2, v13, v4 ; D2820004 04121B02 s_buffer_load_dword s4, s[0:3], 0x58 ; C2020158 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v5, s4, v4 ; 080A0804 v_mad_f32 v4, v0, v5, v4 ; D2820004 04120B00 v_cvt_pkrtz_f16_f32_e32 v1, v4, v1 ; 5E020304 v_mul_f32_e32 v3, v55, v3 ; 10060737 v_mad_f32 v2, v2, v15, v3 ; D2820002 040E1F02 s_buffer_load_dword s0, s[0:3], 0x5a ; C200015A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s0, v2 ; 08060400 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_mul_f32_e32 v2, 0x3e800000, v34 ; 100444FF 3E800000 v_mad_f32 v2, v38, v18, v2 ; D2820002 040A2526 v_mad_f32 v2, v44, v18, v2 ; D2820002 040A252C v_mad_f32 v2, v48, v18, v2 ; D2820002 040A2530 v_log_f32_e32 v3, v12 ; 7E064F0C v_mul_legacy_f32_e32 v3, 1.0, v3 ; 0E0606F2 v_exp_f32_e32 v3, v3 ; 7E064B03 v_mad_f32 v2, v2, v3, v3 ; D2820002 040E0702 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL OUT[7], GENERIC[15] DCL CONST[0..13] DCL TEMP[0..7], LOCAL IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: MUL TEMP[0].xy, CONST[13], IN[1] 1: MOV TEMP[0].xy, TEMP[0].xyxx 2: ADD TEMP[1], CONST[12], IN[1].xyxy 3: MUL TEMP[1], TEMP[1], CONST[13].xyxy 4: MUL TEMP[2].xy, TEMP[1], CONST[13].zzzz 5: MOV TEMP[2].xy, TEMP[2].xyxx 6: MUL TEMP[3].xy, TEMP[1].zwzw, CONST[13].wwww 7: MOV TEMP[3].xy, TEMP[3].xyxx 8: MUL TEMP[4].xyz, CONST[1], IN[0].yyyy 9: MOV TEMP[1].xyz, TEMP[4].xyzx 10: MAD TEMP[4].xyz, IN[0].xxxx, CONST[0], TEMP[1] 11: MOV TEMP[1].xyz, TEMP[4].xyzx 12: MAD TEMP[4].xyz, IN[0].zzzz, CONST[2], TEMP[1] 13: MOV TEMP[1].xyz, TEMP[4].xyzx 14: ADD TEMP[4].xyz, TEMP[1], CONST[3] 15: MUL TEMP[5].xy, TEMP[4].yyyy, CONST[9] 16: MOV TEMP[5].xy, TEMP[5].xyxx 17: MAD TEMP[6].xy, TEMP[4].xxxx, CONST[8], TEMP[5] 18: MOV TEMP[5].xy, TEMP[6].xyxx 19: MAD TEMP[6].xy, TEMP[4].zzzz, CONST[10], TEMP[5] 20: MOV TEMP[5].xy, TEMP[6].xyxx 21: ADD TEMP[6].xy, TEMP[5], CONST[11] 22: MOV TEMP[5].xy, TEMP[6].xyxx 23: MAD TEMP[6].xy, TEMP[5], IMM[0].xyzz, IMM[0].yyyy 24: MOV TEMP[6].xy, TEMP[6].xyxx 25: MUL TEMP[5], TEMP[4].yyyy, CONST[5] 26: MAD TEMP[5], TEMP[4].xxxx, CONST[4], TEMP[5] 27: MAD TEMP[5], TEMP[4].zzzz, CONST[6], TEMP[5] 28: MOV TEMP[4].xyz, TEMP[4].xyzx 29: ADD TEMP[1], TEMP[5], CONST[7] 30: MOV TEMP[5].xyz, TEMP[1].xywx 31: MOV TEMP[7].xy, IN[2].xyxx 32: MOV TEMP[6].zw, IMM[0].zzzz 33: MOV TEMP[0].zw, IMM[0].wwzw 34: MOV TEMP[2].zw, IMM[0].wwzw 35: MOV TEMP[3].zw, IMM[0].wwzw 36: MOV TEMP[7].zw, IMM[0].wwzw 37: MOV TEMP[4].w, IMM[0].wwww 38: MOV TEMP[5].w, IMM[0].wwww 39: MOV OUT[6], TEMP[5] 40: MOV OUT[7], TEMP[6] 41: MOV OUT[1], TEMP[0] 42: MOV OUT[0], TEMP[1] 43: MOV OUT[2], TEMP[2] 44: MOV OUT[3], TEMP[3] 45: MOV OUT[4], TEMP[7] 46: MOV OUT[5], TEMP[4] 47: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0 %66 = add i32 %5, %7 %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %66) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = fmul float %53, %68 %77 = fmul float %54, %69 %78 = fadd float %49, %68 %79 = fadd float %50, %69 %80 = fadd float %51, %68 %81 = fadd float %52, %69 %82 = fmul float %78, %53 %83 = fmul float %79, %54 %84 = fmul float %80, %53 %85 = fmul float %81, %54 %86 = fmul float %82, %55 %87 = fmul float %83, %55 %88 = fmul float %84, %56 %89 = fmul float %85, %56 %90 = fmul float %16, %62 %91 = fmul float %17, %62 %92 = fmul float %18, %62 %93 = fmul float %61, %13 %94 = fadd float %93, %90 %95 = fmul float %61, %14 %96 = fadd float %95, %91 %97 = fmul float %61, %15 %98 = fadd float %97, %92 %99 = fmul float %63, %19 %100 = fadd float %99, %94 %101 = fmul float %63, %20 %102 = fadd float %101, %96 %103 = fmul float %63, %21 %104 = fadd float %103, %98 %105 = fadd float %100, %22 %106 = fadd float %102, %23 %107 = fadd float %104, %24 %108 = fmul float %106, %43 %109 = fmul float %106, %44 %110 = fmul float %105, %41 %111 = fadd float %110, %108 %112 = fmul float %105, %42 %113 = fadd float %112, %109 %114 = fmul float %107, %45 %115 = fadd float %114, %111 %116 = fmul float %107, %46 %117 = fadd float %116, %113 %118 = fadd float %115, %47 %119 = fadd float %117, %48 %120 = fmul float %118, 5.000000e-01 %121 = fadd float %120, -5.000000e-01 %122 = fmul float %119, -5.000000e-01 %123 = fadd float %122, -5.000000e-01 %124 = fmul float %106, %29 %125 = fmul float %106, %30 %126 = fmul float %106, %31 %127 = fmul float %106, %32 %128 = fmul float %105, %25 %129 = fadd float %128, %124 %130 = fmul float %105, %26 %131 = fadd float %130, %125 %132 = fmul float %105, %27 %133 = fadd float %132, %126 %134 = fmul float %105, %28 %135 = fadd float %134, %127 %136 = fmul float %107, %33 %137 = fadd float %136, %129 %138 = fmul float %107, %34 %139 = fadd float %138, %131 %140 = fmul float %107, %35 %141 = fadd float %140, %133 %142 = fmul float %107, %36 %143 = fadd float %142, %135 %144 = fadd float %137, %37 %145 = fadd float %139, %38 %146 = fadd float %141, %39 %147 = fadd float %143, %40 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %76, float %77, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %86, float %87, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %88, float %89, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %74, float %75, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %105, float %106, float %107, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %144, float %145, float %147, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 38, i32 0, float %121, float %123, float 0.000000e+00, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %144, float %145, float %146, float %147) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[12:15], 0 idxen ; E00C2000 80030100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s12, s[0:3], 0x35 ; C2060135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s12, v2 ; 100A040C s_buffer_load_dword s13, s[0:3], 0x34 ; C2068134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s13, v1 ; 100C020D v_mov_b32_e32 v7, 1.0 ; 7E0E02F2 v_mov_b32_e32 v8, 0 ; 7E100280 exp 15, 32, 0, 0, 0, v6, v5, v8, v7 ; F800020F 07080506 s_buffer_load_dword s14, s[0:3], 0x31 ; C2070131 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v5, s14, v2 ; 060A040E v_mul_f32_e32 v5, s12, v5 ; 100A0A0C s_buffer_load_dword s14, s[0:3], 0x36 ; C2070136 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s14, v5 ; 100A0A0E s_buffer_load_dword s15, s[0:3], 0x30 ; C2078130 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s15, v1 ; 060C020F v_mul_f32_e32 v6, s13, v6 ; 100C0C0D v_mul_f32_e32 v6, s14, v6 ; 100C0C0E exp 15, 33, 0, 0, 0, v6, v5, v8, v7 ; F800021F 07080506 s_buffer_load_dword s14, s[0:3], 0x33 ; C2070133 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v5, s14, v2 ; 060A040E v_mul_f32_e32 v5, s12, v5 ; 100A0A0C s_buffer_load_dword s12, s[0:3], 0x37 ; C2060137 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s12, v5 ; 100A0A0C s_buffer_load_dword s14, s[0:3], 0x32 ; C2070132 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s14, v1 ; 0602020E v_mul_f32_e32 v1, s13, v1 ; 1002020D v_mul_f32_e32 v1, s12, v1 ; 1002020C exp 15, 34, 0, 0, 0, v1, v5, v8, v7 ; F800022F 07080501 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 35, 0, 0, 0, v1, v2, v8, v7 ; F800023F 07080201 s_waitcnt expcnt(0) ; BF8C070F buffer_load_format_xyzw v[0:3], v0, s[4:7], 0 idxen ; E00C2000 80010000 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v1 ; 100A0204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v2, s4, v6 ; D2820000 04180902 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 36, 0, 0, 0, v0, v5, v4, v7 ; F800024F 07040500 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v5 ; 10020A04 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v1 ; D2820001 04040900 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v4, s4, v1 ; D2820001 04040904 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s4, v1 ; 06020204 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v5 ; 10040A04 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v4, s4, v2 ; D2820002 04080904 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v5 ; 10060A04 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v4, s4, v3 ; D2820003 040C0904 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 exp 15, 37, 0, 0, 0, v3, v2, v1, v7 ; F800025F 07010203 s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v6, s4, v5 ; 100C0A04 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x28 ; C2020128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v4, s4, v6 ; D2820006 04180904 s_buffer_load_dword s4, s[0:3], 0x2c ; C202012C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v6 ; 060C0C04 v_mad_f32 v6, 0.5, v6, -0.5 ; D2820006 03C60CF0 s_buffer_load_dword s4, s[0:3], 0x25 ; C2020125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v5 ; 100E0A04 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v4, s4, v7 ; D2820007 041C0904 s_buffer_load_dword s4, s[0:3], 0x2d ; C202012D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mad_f32 v7, -0.5, v7, -0.5 ; D2820007 03C60EF1 exp 15, 38, 0, 0, 0, v6, v7, v8, v8 ; F800026F 08080706 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v5, s4, v5 ; 100A0A04 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v5 ; D2820000 04140900 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v4, s4, v0 ; D2820000 04000904 s_buffer_load_dword s0, s[0:3], 0x1e ; C200011E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v3, v2, v0, v1 ; F80008CF 01000203 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL IN[6], GENERIC[15], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL SAMP[10] DCL SAMP[11] DCL CONST[0..14] DCL TEMP[0..24], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 2.2000} IMM[1] FLT32 { 0.3330, 2.0000, -2.0000, 0.0000} IMM[2] FLT32 { 0.5000, -0.0000, -16.0000, -1.4427} IMM[3] FLT32 { 16.0000, 0.0050, 0.5000, -0.5000} IMM[4] FLT32 { 6.0000, 2.0000, -2.0000, 0.2500} IMM[5] FLT32 { 0.8000, 0.4545, 0.4000, 1.4427} 0: MAD TEMP[0].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[9], 2D 3: MOV TEMP[1].z, TEMP[0] 4: ABS TEMP[2].x, TEMP[0] 5: MOV TEMP[2], -TEMP[2].xxxx 6: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 7: UIF TEMP[3].xxxx :0 8: MOV TEMP[3].x, IMM[0].yyyy 9: ELSE :0 10: MOV TEMP[3].x, IMM[0].zzzz 11: ENDIF 12: MOV TEMP[3].x, TEMP[3].xxxx 13: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 14: UIF TEMP[4].xxxx :0 15: MOV TEMP[4].x, IMM[0].yyyy 16: ELSE :0 17: MOV TEMP[4].x, IMM[0].zzzz 18: ENDIF 19: MOV TEMP[3].y, TEMP[4].xxxx 20: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 21: UIF TEMP[4].xxxx :0 22: MOV TEMP[4].x, IMM[0].yyyy 23: ELSE :0 24: MOV TEMP[4].x, IMM[0].zzzz 25: ENDIF 26: MOV TEMP[3].z, TEMP[4].xxxx 27: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 28: UIF TEMP[2].xxxx :0 29: ELSE :0 30: ENDIF 31: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 32: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 33: OR TEMP[3].x, TEMP[4].xxxx, TEMP[3].yyyy 34: AND TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx 35: KILL_IF -TEMP[3].xxxx 36: MOV TEMP[3].xy, IN[3].xyyy 37: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 2D 38: POW TEMP[4].x, TEMP[3].xxxx, IMM[0].wwww 39: POW TEMP[4].y, TEMP[3].yyyy, IMM[0].wwww 40: POW TEMP[4].z, TEMP[3].zzzz, IMM[0].wwww 41: MOV TEMP[2].xyz, TEMP[4].xyzx 42: RCP TEMP[3].x, CONST[0].xxxx 43: MUL TEMP[3].y, TEMP[3].xxxx, IMM[1].xxxx 44: MUL TEMP[5].w, TEMP[3].yyyy, TEMP[3].yyyy 45: MOV TEMP[6].xy, IN[0].xyyy 46: TEX TEMP[6], TEMP[6], SAMP[0], 2D 47: MOV TEMP[7].w, TEMP[6].wwzw 48: MOV TEMP[8].xy, IN[0].xyyy 49: TEX TEMP[8].x, TEMP[8], SAMP[1], 2D 50: MAD TEMP[9].xy, TEMP[6], IMM[1].yyyy, IMM[0].yyyy 51: MOV TEMP[7].xy, TEMP[9].xyxx 52: MUL TEMP[10].yz, TEMP[3].yyyy, TEMP[7].xxyw 53: MOV TEMP[9].z, TEMP[10].zyzz 54: MAD TEMP[8].z, TEMP[8].xxxx, IMM[1].yyyy, IMM[0].yyyy 55: MOV TEMP[8].z, TEMP[8].zzzz 56: MOV TEMP[8].xy, TEMP[6].zwzz 57: MOV TEMP[6].xy, IN[1].xyyy 58: TEX TEMP[6], TEMP[6], SAMP[2], 2D 59: MOV TEMP[11].w, TEMP[6].wwww 60: MOV TEMP[12].xy, IN[1].xyyy 61: TEX TEMP[12].xw, TEMP[12], SAMP[3], 2D 62: MOV TEMP[13].w, TEMP[12].wwww 63: ADD TEMP[14].zw, TEMP[6].xyxy, TEMP[6].xyxy 64: MOV TEMP[7].zw, TEMP[14].wwzw 65: MAD TEMP[14].zw, TEMP[7], IMM[0].yxyx, IMM[0].yxxy 66: MOV TEMP[7].zw, TEMP[14].wwzw 67: MUL TEMP[14].zw, TEMP[3].yyyy, TEMP[7] 68: MOV TEMP[7].zw, TEMP[14].wwzw 69: MAD TEMP[15].xy, TEMP[7], TEMP[3].yyyy, TEMP[14].zwzw 70: MOV TEMP[7].xy, TEMP[15].xyxx 71: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].zzzz, IMM[0].xxxx 72: MOV TEMP[13].z, TEMP[12].zzzz 73: MOV TEMP[13].xy, TEMP[6].zwzz 74: MUL TEMP[6].xyz, TEMP[5].wwww, TEMP[13] 75: MOV TEMP[11].xyz, TEMP[6].xyzx 76: MAD TEMP[6].xyz, TEMP[8], TEMP[5].wwww, TEMP[11] 77: MOV TEMP[8].xyz, TEMP[6].xyzx 78: ADD TEMP[6].xw, TEMP[10].yyzz, TEMP[10].yyzz 79: MOV TEMP[9].xw, TEMP[6].xxxw 80: MUL TEMP[6].xy, TEMP[14].zwzw, TEMP[9].xwzw 81: MOV TEMP[11].xy, TEMP[6].xyxx 82: DP2 TEMP[6].x, TEMP[10].yzzz, TEMP[14].wzzz 83: MOV TEMP[11].z, TEMP[6].xxxx 84: ADD TEMP[6].xyz, TEMP[8], TEMP[11] 85: MOV TEMP[8].xyz, TEMP[6].xyzx 86: MOV TEMP[6].xy, IN[2].xyyy 87: TEX TEMP[6], TEMP[6], SAMP[2], 2D 88: MOV TEMP[11].zw, TEMP[6].wwzw 89: MOV TEMP[12].xy, IN[2].xyyy 90: TEX TEMP[12].xw, TEMP[12], SAMP[3], 2D 91: MOV TEMP[13].xw, TEMP[12].xxxw 92: MAD TEMP[15].xy, TEMP[6].yxzw, IMM[1].yyyy, IMM[0].yyyy 93: MOV TEMP[11].xy, TEMP[15].xyxx 94: MUL TEMP[15].yz, TEMP[3].yyyy, TEMP[11].xxyw 95: MOV TEMP[13].yz, TEMP[15].zyzz 96: MAD TEMP[3].xy, TEMP[11].yxzw, TEMP[3].yyyy, TEMP[7] 97: MOV TEMP[3].xy, TEMP[3].xyxx 98: MAD TEMP[12].z, TEMP[12].xxxx, IMM[1].yyyy, IMM[0].yyyy 99: MOV TEMP[12].z, TEMP[12].zzzz 100: MOV TEMP[12].xy, TEMP[6].zwzz 101: MAD TEMP[5].xyz, TEMP[12], TEMP[5].wwww, TEMP[8] 102: MOV TEMP[8].xyz, TEMP[5].xyzx 103: MUL TEMP[5].xy, TEMP[9].xwzw, TEMP[13].zyzw 104: MOV TEMP[11].xy, TEMP[5].xyxx 105: DP2 TEMP[5].x, TEMP[10].yzzz, TEMP[15].yzzz 106: MOV TEMP[11].z, TEMP[5].xxxx 107: ADD TEMP[5].xyz, TEMP[8], TEMP[11] 108: MOV TEMP[9].xyz, TEMP[5].xyzx 109: MUL TEMP[5].yw, TEMP[7].xzzw, TEMP[13].xzzy 110: MOV TEMP[1].yw, TEMP[5].wyww 111: ADD TEMP[5].xy, TEMP[1].ywzw, TEMP[1].ywzw 112: MOV TEMP[8].xy, TEMP[5].xyxx 113: DP2 TEMP[5].x, TEMP[14].zwww, TEMP[15].yzzz 114: MOV TEMP[8].z, TEMP[5].xxxx 115: ADD TEMP[5].xyz, TEMP[9], TEMP[8] 116: MOV TEMP[7].xyz, TEMP[5].xyzx 117: MAD TEMP[5].xyz, TEMP[3].xyxw, -TEMP[3].xyyw, TEMP[7] 118: MOV TEMP[7].xy, TEMP[5].xyzx 119: MOV TEMP[3].z, IMM[0].xxxx 120: DP3 TEMP[6].x, CONST[1].xyzz, TEMP[3].xyzz 121: MOV_SAT TEMP[6].x, TEMP[6].xxxx 122: ADD TEMP[10].xyz, CONST[8], -IN[4] 123: MOV TEMP[9].xyz, TEMP[10].xyzx 124: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[10].xyzz 125: MAX TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww 126: RSQ TEMP[10].x, TEMP[10].xxxx 127: MUL TEMP[14].xyz, TEMP[10].xxxx, TEMP[9] 128: MAD TEMP[15].xyz, TEMP[9], TEMP[10].xxxx, -CONST[5] 129: MOV TEMP[11].xyz, TEMP[15].xyzx 130: MAD TEMP[15].xyz, CONST[5].wwww, TEMP[11], CONST[5] 131: MOV TEMP[11].xyz, TEMP[15].xyzx 132: DP3 TEMP[15].x, TEMP[15].xyzz, TEMP[15].xyzz 133: MAX TEMP[15].x, TEMP[15].xxxx, IMM[1].wwww 134: RSQ TEMP[15].x, TEMP[15].xxxx 135: MAD TEMP[15].xyz, TEMP[11], TEMP[15].xxxx, CONST[1] 136: MOV TEMP[11].xyz, TEMP[15].xyzx 137: RCP TEMP[15].x, TEMP[15].zzzz 138: MAD TEMP[15].xy, TEMP[11], TEMP[15].xxxx, -TEMP[3] 139: RCP TEMP[16].x, CONST[3].wwww 140: ADD TEMP[17].zw, TEMP[16].xxxx, TEMP[5].xyxy 141: MUL TEMP[18].w, TEMP[5].zzzz, TEMP[5].zzzz 142: MOV TEMP[7].w, TEMP[18].wwww 143: MAD TEMP[19].w, TEMP[17].zzzz, TEMP[17].wwww, -TEMP[18].wwww 144: MUL TEMP[20].w, TEMP[15].xxxx, TEMP[15].xxxx 145: ADD TEMP[5].z, TEMP[5].zzzz, TEMP[5].zzzz 146: MOV TEMP[7].z, TEMP[5].zzzz 147: MUL TEMP[21].x, TEMP[15].xxxx, TEMP[5].zzzz 148: MAD TEMP[22].z, TEMP[17].zzzz, TEMP[15].yyyy, -TEMP[21].xxxx 149: MUL TEMP[22].z, TEMP[15].yyyy, TEMP[22].zzzz 150: MAD TEMP[17].z, TEMP[20].wwww, TEMP[17].wwww, TEMP[22].zzzz 151: MUL TEMP[17].z, TEMP[17].zzzz, IMM[2].xxxx 152: RCP TEMP[22].x, TEMP[19].wwww 153: MUL TEMP[23].x, TEMP[22].xxxx, TEMP[17].zzzz 154: MOV TEMP[24].x, -TEMP[19].wwww 155: FSGE TEMP[24].x, TEMP[24].xxxx, IMM[0].zzzz 156: UIF TEMP[24].xxxx :0 157: MOV TEMP[24].x, IMM[0].xxxx 158: ELSE :0 159: MOV TEMP[24].x, IMM[2].yyyy 160: ENDIF 161: MAD TEMP[17].z, TEMP[17].zzzz, TEMP[22].xxxx, IMM[2].zzzz 162: FSGE TEMP[17].x, TEMP[17].zzzz, IMM[0].zzzz 163: UIF TEMP[17].xxxx :0 164: MOV TEMP[17].x, IMM[0].xxxx 165: ELSE :0 166: MOV TEMP[17].x, IMM[2].yyyy 167: ENDIF 168: ADD TEMP[17].z, TEMP[17].xxxx, TEMP[24].xxxx 169: MUL TEMP[22].w, TEMP[23].xxxx, IMM[2].wwww 170: EX2 TEMP[22].x, TEMP[22].wwww 171: MAX TEMP[19].x, TEMP[19].wwww, IMM[1].wwww 172: RSQ TEMP[19].x, TEMP[19].xxxx 173: MUL TEMP[19].w, TEMP[19].xxxx, TEMP[22].xxxx 174: MOV TEMP[9].w, TEMP[19].wwww 175: MAD TEMP[16].xy, TEMP[16].xxxx, IMM[3].xxxx, TEMP[7] 176: MAD TEMP[22].w, TEMP[16].xxxx, TEMP[16].yyyy, -TEMP[18].wwww 177: MAD TEMP[21].x, TEMP[16].xxxx, TEMP[15].yyyy, -TEMP[21].xxxx 178: MUL TEMP[15].x, TEMP[15].yyyy, TEMP[21].xxxx 179: MAD TEMP[15].w, TEMP[20].wwww, TEMP[16].yyyy, TEMP[15].xxxx 180: MUL TEMP[15].w, TEMP[15].wwww, IMM[2].xxxx 181: RCP TEMP[11].x, TEMP[22].wwww 182: MUL TEMP[16].y, TEMP[15].wwww, TEMP[11].xxxx 183: MOV TEMP[20].x, -TEMP[22].wwww 184: FSGE TEMP[20].x, TEMP[20].xxxx, IMM[0].zzzz 185: UIF TEMP[20].xxxx :0 186: MOV TEMP[20].x, IMM[0].xxxx 187: ELSE :0 188: MOV TEMP[20].x, IMM[2].yyyy 189: ENDIF 190: MAD TEMP[11].w, TEMP[15].wwww, TEMP[11].xxxx, IMM[2].zzzz 191: FSGE TEMP[11].x, TEMP[11].wwww, IMM[0].zzzz 192: UIF TEMP[11].xxxx :0 193: MOV TEMP[11].x, IMM[0].xxxx 194: ELSE :0 195: MOV TEMP[11].x, IMM[2].yyyy 196: ENDIF 197: ADD TEMP[11].w, TEMP[11].xxxx, TEMP[20].xxxx 198: MOV TEMP[8].w, TEMP[11].wwww 199: MUL TEMP[15].x, TEMP[16].yyyy, IMM[2].wwww 200: MAX TEMP[16].x, TEMP[22].wwww, IMM[1].wwww 201: RSQ TEMP[16].x, TEMP[16].xxxx 202: EX2 TEMP[15].x, TEMP[15].xxxx 203: MUL TEMP[15].w, TEMP[16].xxxx, TEMP[15].xxxx 204: MUL TEMP[15].w, TEMP[15].wwww, IMM[3].yyyy 205: MOV TEMP[2].w, TEMP[15].wwww 206: MAD TEMP[10].xyz, TEMP[9], TEMP[10].xxxx, -CONST[6] 207: MOV TEMP[9].xyz, TEMP[10].xyzx 208: MAD TEMP[10].xyz, CONST[6].wwww, TEMP[9], CONST[6] 209: MOV TEMP[9].xyz, TEMP[10].xyzx 210: DP3 TEMP[10].x, TEMP[10].xyzz, TEMP[10].xyzz 211: MAX TEMP[10].x, TEMP[10].xxxx, IMM[1].wwww 212: RSQ TEMP[10].x, TEMP[10].xxxx 213: MAD TEMP[10].xyz, TEMP[9], TEMP[10].xxxx, CONST[2] 214: MOV TEMP[9].xyz, TEMP[10].xyzx 215: RCP TEMP[10].x, TEMP[10].zzzz 216: MAD TEMP[10].xy, TEMP[9], TEMP[10].xxxx, -TEMP[3] 217: RCP TEMP[16].x, CONST[4].wwww 218: ADD TEMP[16].xy, TEMP[16].xxxx, TEMP[7] 219: MAD TEMP[18].w, TEMP[16].xxxx, TEMP[16].yyyy, -TEMP[18].wwww 220: MUL TEMP[20].w, TEMP[10].xxxx, TEMP[10].xxxx 221: MUL TEMP[5].z, TEMP[5].zzzz, TEMP[10].xxxx 222: MAD TEMP[5].z, TEMP[16].xxxx, TEMP[10].yyyy, -TEMP[5].zzzz 223: MUL TEMP[5].z, TEMP[10].yyyy, TEMP[5].zzzz 224: MAD TEMP[5].z, TEMP[20].wwww, TEMP[16].yyyy, TEMP[5].zzzz 225: MUL TEMP[5].z, TEMP[5].zzzz, IMM[2].xxxx 226: RCP TEMP[10].x, TEMP[18].wwww 227: MUL TEMP[16].x, TEMP[10].xxxx, TEMP[5].zzzz 228: MOV TEMP[20].x, -TEMP[18].wwww 229: FSGE TEMP[20].x, TEMP[20].xxxx, IMM[0].zzzz 230: UIF TEMP[20].xxxx :0 231: MOV TEMP[20].x, IMM[0].xxxx 232: ELSE :0 233: MOV TEMP[20].x, IMM[2].yyyy 234: ENDIF 235: MAD TEMP[5].z, TEMP[5].zzzz, TEMP[10].xxxx, IMM[2].zzzz 236: FSGE TEMP[5].x, TEMP[5].zzzz, IMM[0].zzzz 237: UIF TEMP[5].xxxx :0 238: MOV TEMP[5].x, IMM[0].xxxx 239: ELSE :0 240: MOV TEMP[5].x, IMM[2].yyyy 241: ENDIF 242: ADD TEMP[5].z, TEMP[5].xxxx, TEMP[20].xxxx 243: MOV TEMP[7].z, TEMP[5].zzzz 244: MUL TEMP[10].w, TEMP[16].xxxx, IMM[2].wwww 245: EX2 TEMP[10].x, TEMP[10].wwww 246: MAX TEMP[16].x, TEMP[18].wwww, IMM[1].wwww 247: RSQ TEMP[16].x, TEMP[16].xxxx 248: MUL TEMP[10].w, TEMP[10].xxxx, TEMP[16].xxxx 249: MOV TEMP[7].w, TEMP[10].wwww 250: MUL TEMP[16].xyz, CONST[10].xyww, IN[4].yyyy 251: MOV TEMP[9].xyz, TEMP[16].xyzx 252: MAD TEMP[16].xyz, IN[4].xxxx, CONST[9].xyww, TEMP[9] 253: MOV TEMP[9].xyz, TEMP[16].xyzx 254: MAD TEMP[16].xyz, IN[4].zzzz, CONST[11].xyww, TEMP[9] 255: MOV TEMP[9].xyz, TEMP[16].xyzx 256: ADD TEMP[16].xyz, TEMP[9], CONST[12].xyww 257: MOV TEMP[9].xy, TEMP[16].xyzx 258: RCP TEMP[16].x, TEMP[16].zzzz 259: MOV TEMP[9].z, TEMP[16].xxxx 260: MUL TEMP[16].xy, TEMP[16].xxxx, TEMP[9] 261: MOV TEMP[9].xy, TEMP[16].xyxx 262: MAD TEMP[16].xy, TEMP[9], IMM[3].zwzw, IMM[3].wwww 263: MOV TEMP[9].xy, TEMP[16].xyxx 264: MAD TEMP[16].xy, TEMP[7], IMM[4].xxxx, TEMP[9] 265: MOV TEMP[7].xy, TEMP[16].xyxx 266: MAD TEMP[16].xy, TEMP[18].wwww, IMM[1].yyyy, TEMP[7] 267: MOV TEMP[16].xy, TEMP[16].xyyy 268: TEX TEMP[16], TEMP[16], SAMP[4], 2D 269: MAD TEMP[20].xy, TEMP[18].wwww, IMM[4].yzzw, TEMP[7] 270: MOV TEMP[20].xy, TEMP[20].xyyy 271: TEX TEMP[20], TEMP[20], SAMP[4], 2D 272: MUL TEMP[12], TEMP[20], IMM[4].wwww 273: MAD TEMP[13], TEMP[16], IMM[4].wwww, TEMP[12] 274: MAD TEMP[12].xy, TEMP[18].wwww, IMM[4].zxzw, TEMP[7] 275: MOV TEMP[12].xy, TEMP[12].xyyy 276: TEX TEMP[12], TEMP[12], SAMP[4], 2D 277: MAD TEMP[13], TEMP[12], IMM[4].wwww, TEMP[13] 278: MAD TEMP[12].xy, TEMP[18].wwww, IMM[1].zzzz, TEMP[7] 279: MOV TEMP[12].xy, TEMP[12].xyyy 280: TEX TEMP[12], TEMP[12], SAMP[4], 2D 281: MAD TEMP[13], TEMP[12], IMM[4].wwww, TEMP[13] 282: ABS TEMP[12].x, TEMP[13].xxxx 283: LG2 TEMP[9].x, TEMP[12].xxxx 284: ABS TEMP[12].x, TEMP[13].yyyy 285: LG2 TEMP[12].x, TEMP[12].xxxx 286: MOV TEMP[9].y, TEMP[12].xxxx 287: ABS TEMP[12].x, TEMP[13].zzzz 288: LG2 TEMP[12].x, TEMP[12].xxxx 289: MOV TEMP[9].z, TEMP[12].xxxx 290: MUL TEMP[12].xyz, TEMP[9], IMM[0].wwww 291: EX2 TEMP[13].x, TEMP[12].xxxx 292: EX2 TEMP[16].x, TEMP[12].yyyy 293: MOV TEMP[13].y, TEMP[16].xxxx 294: EX2 TEMP[12].x, TEMP[12].zzzz 295: MOV TEMP[13].z, TEMP[12].xxxx 296: DP3 TEMP[3].x, TEMP[3].xyzz, TEMP[14].xyzz 297: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].xxxx 298: MUL TEMP[12].x, TEMP[3].wwww, TEMP[3].wwww 299: MUL TEMP[12].x, TEMP[12].xxxx, TEMP[12].xxxx 300: MUL TEMP[14].y, TEMP[3].wwww, TEMP[12].xxxx 301: MUL TEMP[16].xyz, TEMP[19].wwww, CONST[3] 302: MOV TEMP[17], -TEMP[17].zzzz 303: FSGE TEMP[18].x, TEMP[17].xxxx, IMM[0].zzzz 304: UIF TEMP[18].xxxx :0 305: MOV TEMP[18].x, TEMP[16].xxxx 306: ELSE :0 307: MOV TEMP[18].x, IMM[2].yyyy 308: ENDIF 309: MOV TEMP[18].x, TEMP[18].xxxx 310: FSGE TEMP[19].x, TEMP[17].yyyy, IMM[0].zzzz 311: UIF TEMP[19].xxxx :0 312: MOV TEMP[19].x, TEMP[16].yyyy 313: ELSE :0 314: MOV TEMP[19].x, IMM[2].yyyy 315: ENDIF 316: MOV TEMP[18].y, TEMP[19].xxxx 317: FSGE TEMP[19].x, TEMP[17].zzzz, IMM[0].zzzz 318: UIF TEMP[19].xxxx :0 319: MOV TEMP[16].x, TEMP[16].zzzz 320: ELSE :0 321: MOV TEMP[16].x, IMM[2].yyyy 322: ENDIF 323: MOV TEMP[18].z, TEMP[16].xxxx 324: FSGE TEMP[16].x, TEMP[17].wwww, IMM[0].zzzz 325: UIF TEMP[16].xxxx :0 326: ELSE :0 327: ENDIF 328: MOV TEMP[9].xyz, TEMP[18].xyzx 329: MUL TEMP[10].xyz, TEMP[10].wwww, CONST[4] 330: MOV TEMP[8].xyz, TEMP[10].xyzx 331: MUL TEMP[10].xyz, TEMP[8], IMM[5].xxxx 332: MOV TEMP[5], -TEMP[5].zzzz 333: FSGE TEMP[16].x, TEMP[5].xxxx, IMM[0].zzzz 334: UIF TEMP[16].xxxx :0 335: MOV TEMP[16].x, TEMP[10].xxxx 336: ELSE :0 337: MOV TEMP[16].x, IMM[2].yyyy 338: ENDIF 339: MOV TEMP[16].x, TEMP[16].xxxx 340: FSGE TEMP[17].x, TEMP[5].yyyy, IMM[0].zzzz 341: UIF TEMP[17].xxxx :0 342: MOV TEMP[17].x, TEMP[10].yyyy 343: ELSE :0 344: MOV TEMP[17].x, IMM[2].yyyy 345: ENDIF 346: MOV TEMP[16].y, TEMP[17].xxxx 347: FSGE TEMP[17].x, TEMP[5].zzzz, IMM[0].zzzz 348: UIF TEMP[17].xxxx :0 349: MOV TEMP[10].x, TEMP[10].zzzz 350: ELSE :0 351: MOV TEMP[10].x, IMM[2].yyyy 352: ENDIF 353: MOV TEMP[16].z, TEMP[10].xxxx 354: FSGE TEMP[5].x, TEMP[5].wwww, IMM[0].zzzz 355: UIF TEMP[5].xxxx :0 356: ELSE :0 357: ENDIF 358: MOV TEMP[8].xyz, TEMP[16].xyzx 359: ADD TEMP[5].xyz, TEMP[9], TEMP[8] 360: MAD TEMP[4].yzw, TEMP[5].xxyz, TEMP[14].yyyy, TEMP[4].xxyz 361: MOV TEMP[7].w, TEMP[4].zyzw 362: MUL TEMP[5].xyz, TEMP[2], CONST[3] 363: MOV TEMP[2].xyz, TEMP[5].xyzx 364: MUL TEMP[5].xyz, TEMP[15].wwww, TEMP[2] 365: MOV TEMP[8], -TEMP[11].wwww 366: FSGE TEMP[10].x, TEMP[8].xxxx, IMM[0].zzzz 367: UIF TEMP[10].xxxx :0 368: MOV TEMP[10].x, TEMP[5].xxxx 369: ELSE :0 370: MOV TEMP[10].x, IMM[2].yyyy 371: ENDIF 372: MOV TEMP[10].x, TEMP[10].xxxx 373: FSGE TEMP[11].x, TEMP[8].yyyy, IMM[0].zzzz 374: UIF TEMP[11].xxxx :0 375: MOV TEMP[11].x, TEMP[5].yyyy 376: ELSE :0 377: MOV TEMP[11].x, IMM[2].yyyy 378: ENDIF 379: MOV TEMP[10].y, TEMP[11].xxxx 380: FSGE TEMP[11].x, TEMP[8].zzzz, IMM[0].zzzz 381: UIF TEMP[11].xxxx :0 382: MOV TEMP[5].x, TEMP[5].zzzz 383: ELSE :0 384: MOV TEMP[5].x, IMM[2].yyyy 385: ENDIF 386: MOV TEMP[10].z, TEMP[5].xxxx 387: FSGE TEMP[5].x, TEMP[8].wwww, IMM[0].zzzz 388: UIF TEMP[5].xxxx :0 389: ELSE :0 390: ENDIF 391: MOV TEMP[2].xyz, TEMP[10].xyzx 392: MAD TEMP[4].xyz, TEMP[4].yzww, TEMP[6].xxxx, TEMP[2] 393: MOV TEMP[2].xyz, TEMP[4].xyzx 394: MAD TEMP[4].xyz, TEMP[13], CONST[14].xxxx, TEMP[2] 395: MOV TEMP[2].xyz, TEMP[4].xyzx 396: MOV TEMP[4].xy, IN[3].xyyy 397: TEX TEMP[4].w, TEMP[4], SAMP[5], 2D 398: MOV TEMP[9].w, TEMP[4].wwww 399: MAD TEMP[3].y, TEMP[3].wwww, TEMP[12].xxxx, IMM[2].xxxx 400: MUL TEMP[3].xyz, TEMP[3].yyyy, TEMP[2] 401: ADD TEMP[5], TEMP[13].wwww, IMM[0].xxxx 402: MOV_SAT TEMP[5], TEMP[5] 403: MUL TEMP[4].w, TEMP[4].wwww, TEMP[5].yyyy 404: MOV TEMP[4].w, TEMP[4].wwww 405: ABS TEMP[5].x, TEMP[3].xxxx 406: LG2 TEMP[7].x, TEMP[5].xxxx 407: ABS TEMP[5].x, TEMP[3].yyyy 408: LG2 TEMP[5].x, TEMP[5].xxxx 409: MOV TEMP[7].y, TEMP[5].xxxx 410: ABS TEMP[3].x, TEMP[3].zzzz 411: LG2 TEMP[3].x, TEMP[3].xxxx 412: MOV TEMP[7].z, TEMP[3].xxxx 413: MUL TEMP[3].xyz, TEMP[7], IMM[5].yyyy 414: EX2 TEMP[7].x, TEMP[3].xxxx 415: EX2 TEMP[5].x, TEMP[3].yyyy 416: MOV TEMP[7].y, TEMP[5].xxxx 417: EX2 TEMP[3].x, TEMP[3].zzzz 418: MOV TEMP[7].z, TEMP[3].xxxx 419: MOV TEMP[3].xyz, TEMP[7].xyzz 420: TEX TEMP[3], TEMP[3], SAMP[11], 3D 421: MOV TEMP[2].w, TEMP[3].wwww 422: MAD TEMP[5].xy, IN[6], IMM[0].xyxx, IMM[0].zxzz 423: MOV TEMP[5].xy, TEMP[5].xyyy 424: TEX TEMP[5], TEMP[5], SAMP[8], 2D 425: MOV TEMP[7].w, TEMP[5].wwww 426: ADD TEMP[6].y, -TEMP[5].wwww, IMM[0].xxxx 427: MAD TEMP[3].xyz, TEMP[3], TEMP[6].yyyy, TEMP[5] 428: MOV TEMP[2].xyz, TEMP[3].xyzx 429: LRP TEMP[2].xyz, TEMP[0].xxxx, TEMP[2], IMM[5].zzzz 430: MOV TEMP[7].xyz, TEMP[2].xyzx 431: MOV TEMP[2].xyz, TEMP[2].xyzz 432: TEX TEMP[2], TEMP[2], SAMP[10], 3D 433: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 434: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[2], TEMP[7] 435: MOV TEMP[9].xyz, TEMP[0].xyzx 436: RCP TEMP[1].x, IN[5].zzzz 437: MUL TEMP[2].x, TEMP[1].xxxx, IN[5].yyyy 438: MAD TEMP[2].x, TEMP[2].xxxx, IMM[2].xxxx, IMM[2].xxxx 439: MOV TEMP[1].x, TEMP[2].xxxx 440: MOV TEMP[1].y, CONST[7].wwww 441: MOV TEMP[2].xy, TEMP[1].xyyy 442: TEX TEMP[2].x, TEMP[2], SAMP[7], 2D 443: MOV TEMP[1].x, TEMP[2].xxxx 444: ADD TEMP[3].yzw, -CONST[8].xxyz, IN[4].xxyz 445: MOV TEMP[1].w, TEMP[3].zyzw 446: DP3 TEMP[5].x, TEMP[3].yzww, TEMP[3].yzww 447: MOV TEMP[1].y, TEMP[5].xxxx 448: MUL TEMP[1].yz, TEMP[1].xyww, CONST[7].xyxw 449: MUL TEMP[5].z, TEMP[1].zzzz, IMM[5].wwww 450: EX2 TEMP[5].x, TEMP[5].zzzz 451: ADD TEMP[5].z, -TEMP[5].xxxx, IMM[0].xxxx 452: MUL TEMP[1].y, TEMP[5].zzzz, TEMP[1].yyyy 453: RCP TEMP[3].x, TEMP[3].wwww 454: MUL TEMP[1].y, TEMP[3].xxxx, TEMP[1].yyyy 455: MUL TEMP[1].y, TEMP[1].yyyy, IMM[5].wwww 456: EX2 TEMP[1].x, TEMP[1].yyyy 457: MOV_SAT TEMP[1].x, TEMP[1].xxxx 458: ADD TEMP[1].y, -TEMP[1].xxxx, IMM[0].xxxx 459: MUL TEMP[1].x, TEMP[1].yyyy, TEMP[2].xxxx 460: ADD TEMP[0].yzw, -TEMP[0].xxyz, CONST[13].xxyz 461: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[9] 462: MOV TEMP[4].xyz, TEMP[0].xyzx 463: MOV OUT[0], TEMP[4] 464: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 60) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 108) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 124) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 132) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 136) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 156) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 172) %59 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %60 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %61 = call float @llvm.SI.load.const(<16 x i8> %23, i32 188) %62 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %63 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %64 = call float @llvm.SI.load.const(<16 x i8> %23, i32 204) %65 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %66 = call float @llvm.SI.load.const(<16 x i8> %23, i32 212) %67 = call float @llvm.SI.load.const(<16 x i8> %23, i32 216) %68 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %69 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %70 = load <8 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %72 = load <4 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %74 = load <8 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %76 = load <4 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %78 = load <8 x i32> addrspace(2)* %77, !tbaa !0 %79 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %80 = load <4 x i32> addrspace(2)* %79, !tbaa !0 %81 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %82 = load <8 x i32> addrspace(2)* %81, !tbaa !0 %83 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %84 = load <4 x i32> addrspace(2)* %83, !tbaa !0 %85 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %86 = load <8 x i32> addrspace(2)* %85, !tbaa !0 %87 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %88 = load <4 x i32> addrspace(2)* %87, !tbaa !0 %89 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %90 = load <8 x i32> addrspace(2)* %89, !tbaa !0 %91 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %92 = load <4 x i32> addrspace(2)* %91, !tbaa !0 %93 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %94 = load <8 x i32> addrspace(2)* %93, !tbaa !0 %95 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %96 = load <4 x i32> addrspace(2)* %95, !tbaa !0 %97 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %98 = load <8 x i32> addrspace(2)* %97, !tbaa !0 %99 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %100 = load <4 x i32> addrspace(2)* %99, !tbaa !0 %101 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %102 = load <8 x i32> addrspace(2)* %101, !tbaa !0 %103 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %104 = load <4 x i32> addrspace(2)* %103, !tbaa !0 %105 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %106 = load <8 x i32> addrspace(2)* %105, !tbaa !0 %107 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %108 = load <4 x i32> addrspace(2)* %107, !tbaa !0 %109 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 10 %110 = load <8 x i32> addrspace(2)* %109, !tbaa !0 %111 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 10 %112 = load <4 x i32> addrspace(2)* %111, !tbaa !0 %113 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 11 %114 = load <8 x i32> addrspace(2)* %113, !tbaa !0 %115 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 11 %116 = load <4 x i32> addrspace(2)* %115, !tbaa !0 %117 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %118 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %119 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %120 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %121 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %122 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %123 = call float @llvm.SI.fs.interp(i32 0, i32 3, i32 %5, <2 x i32> %7) %124 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %125 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %126 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %127 = call float @llvm.SI.fs.interp(i32 2, i32 4, i32 %5, <2 x i32> %7) %128 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %129 = call float @llvm.SI.fs.interp(i32 2, i32 5, i32 %5, <2 x i32> %7) %130 = call float @llvm.SI.fs.interp(i32 0, i32 6, i32 %5, <2 x i32> %7) %131 = call float @llvm.SI.fs.interp(i32 1, i32 6, i32 %5, <2 x i32> %7) %132 = fmul float %130, 1.000000e+00 %133 = fadd float %132, 0.000000e+00 %134 = fmul float %131, -1.000000e+00 %135 = fadd float %134, 1.000000e+00 %136 = bitcast float %133 to i32 %137 = bitcast float %135 to i32 %138 = insertelement <2 x i32> undef, i32 %136, i32 0 %139 = insertelement <2 x i32> %138, i32 %137, i32 1 %140 = bitcast <8 x i32> %106 to <32 x i8> %141 = bitcast <4 x i32> %108 to <16 x i8> %142 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %139, <32 x i8> %140, <16 x i8> %141, i32 2) %143 = extractelement <4 x float> %142, i32 0 %144 = extractelement <4 x float> %142, i32 2 %145 = call float @fabs(float %143) %146 = fsub float -0.000000e+00, %145 %147 = fsub float -0.000000e+00, %145 %148 = fsub float -0.000000e+00, %145 %149 = fsub float -0.000000e+00, %145 %150 = fcmp oge float %146, 0.000000e+00 %151 = sext i1 %150 to i32 %152 = bitcast i32 %151 to float %153 = bitcast float %152 to i32 %154 = icmp ne i32 %153, 0 %. = select i1 %154, float -1.000000e+00, float 0.000000e+00 %155 = fcmp oge float %147, 0.000000e+00 %156 = sext i1 %155 to i32 %157 = bitcast i32 %156 to float %158 = bitcast float %157 to i32 %159 = icmp ne i32 %158, 0 %temp16.0 = select i1 %159, float -1.000000e+00, float 0.000000e+00 %160 = fcmp oge float %148, 0.000000e+00 %161 = sext i1 %160 to i32 %162 = bitcast i32 %161 to float %163 = bitcast float %162 to i32 %164 = icmp ne i32 %163, 0 %.163 = select i1 %164, float -1.000000e+00, float 0.000000e+00 %165 = fcmp oge float %149, 0.000000e+00 %166 = sext i1 %165 to i32 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = icmp ne i32 %168, 0 %170 = fcmp olt float %., 0.000000e+00 %171 = sext i1 %170 to i32 %172 = fcmp olt float %temp16.0, 0.000000e+00 %173 = sext i1 %172 to i32 %174 = fcmp olt float %.163, 0.000000e+00 %175 = sext i1 %174 to i32 %176 = bitcast i32 %171 to float %177 = bitcast i32 %173 to float %178 = bitcast i32 %175 to float %179 = bitcast float %176 to i32 %180 = bitcast float %178 to i32 %181 = or i32 %179, %180 %182 = bitcast i32 %181 to float %183 = bitcast float %182 to i32 %184 = bitcast float %177 to i32 %185 = or i32 %183, %184 %186 = bitcast i32 %185 to float %187 = bitcast float %186 to i32 %188 = and i32 %187, 1065353216 %189 = bitcast i32 %188 to float %190 = fsub float -0.000000e+00, %189 %191 = fsub float -0.000000e+00, %189 %192 = fsub float -0.000000e+00, %189 %193 = fsub float -0.000000e+00, %189 call void @llvm.AMDGPU.kill(float %190) call void @llvm.AMDGPU.kill(float %191) call void @llvm.AMDGPU.kill(float %192) call void @llvm.AMDGPU.kill(float %193) %194 = bitcast float %123 to i32 %195 = bitcast float %124 to i32 %196 = insertelement <2 x i32> undef, i32 %194, i32 0 %197 = insertelement <2 x i32> %196, i32 %195, i32 1 %198 = bitcast <8 x i32> %94 to <32 x i8> %199 = bitcast <4 x i32> %96 to <16 x i8> %200 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %197, <32 x i8> %198, <16 x i8> %199, i32 2) %201 = extractelement <4 x float> %200, i32 0 %202 = extractelement <4 x float> %200, i32 1 %203 = extractelement <4 x float> %200, i32 2 %204 = call float @llvm.pow.f32(float %201, float 0x40019999A0000000) %205 = call float @llvm.pow.f32(float %202, float 0x40019999A0000000) %206 = call float @llvm.pow.f32(float %203, float 0x40019999A0000000) %207 = fdiv float 1.000000e+00, %24 %208 = fmul float %207, 0x3FD54FDF40000000 %209 = fmul float %208, %208 %210 = bitcast float %117 to i32 %211 = bitcast float %118 to i32 %212 = insertelement <2 x i32> undef, i32 %210, i32 0 %213 = insertelement <2 x i32> %212, i32 %211, i32 1 %214 = bitcast <8 x i32> %70 to <32 x i8> %215 = bitcast <4 x i32> %72 to <16 x i8> %216 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %213, <32 x i8> %214, <16 x i8> %215, i32 2) %217 = extractelement <4 x float> %216, i32 0 %218 = extractelement <4 x float> %216, i32 1 %219 = extractelement <4 x float> %216, i32 2 %220 = extractelement <4 x float> %216, i32 3 %221 = bitcast float %117 to i32 %222 = bitcast float %118 to i32 %223 = insertelement <2 x i32> undef, i32 %221, i32 0 %224 = insertelement <2 x i32> %223, i32 %222, i32 1 %225 = bitcast <8 x i32> %74 to <32 x i8> %226 = bitcast <4 x i32> %76 to <16 x i8> %227 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %224, <32 x i8> %225, <16 x i8> %226, i32 2) %228 = extractelement <4 x float> %227, i32 0 %229 = fmul float %217, 2.000000e+00 %230 = fadd float %229, -1.000000e+00 %231 = fmul float %218, 2.000000e+00 %232 = fadd float %231, -1.000000e+00 %233 = fmul float %208, %230 %234 = fmul float %208, %232 %235 = fmul float %228, 2.000000e+00 %236 = fadd float %235, -1.000000e+00 %237 = bitcast float %119 to i32 %238 = bitcast float %120 to i32 %239 = insertelement <2 x i32> undef, i32 %237, i32 0 %240 = insertelement <2 x i32> %239, i32 %238, i32 1 %241 = bitcast <8 x i32> %78 to <32 x i8> %242 = bitcast <4 x i32> %80 to <16 x i8> %243 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %240, <32 x i8> %241, <16 x i8> %242, i32 2) %244 = extractelement <4 x float> %243, i32 0 %245 = extractelement <4 x float> %243, i32 1 %246 = extractelement <4 x float> %243, i32 2 %247 = extractelement <4 x float> %243, i32 3 %248 = bitcast float %119 to i32 %249 = bitcast float %120 to i32 %250 = insertelement <2 x i32> undef, i32 %248, i32 0 %251 = insertelement <2 x i32> %250, i32 %249, i32 1 %252 = bitcast <8 x i32> %82 to <32 x i8> %253 = bitcast <4 x i32> %84 to <16 x i8> %254 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %251, <32 x i8> %252, <16 x i8> %253, i32 2) %255 = extractelement <4 x float> %254, i32 0 %256 = fadd float %244, %244 %257 = fadd float %245, %245 %258 = fmul float %256, -1.000000e+00 %259 = fadd float %258, 1.000000e+00 %260 = fmul float %257, 1.000000e+00 %261 = fadd float %260, -1.000000e+00 %262 = fmul float %208, %259 %263 = fmul float %208, %261 %264 = fmul float %230, %208 %265 = fadd float %264, %262 %266 = fmul float %232, %208 %267 = fadd float %266, %263 %268 = fmul float %255, -2.000000e+00 %269 = fadd float %268, 1.000000e+00 %270 = fmul float %209, %246 %271 = fmul float %209, %247 %272 = fmul float %209, %269 %273 = fmul float %219, %209 %274 = fadd float %273, %270 %275 = fmul float %220, %209 %276 = fadd float %275, %271 %277 = fmul float %236, %209 %278 = fadd float %277, %272 %279 = fadd float %233, %233 %280 = fadd float %234, %234 %281 = fmul float %262, %279 %282 = fmul float %263, %280 %283 = fmul float %233, %263 %284 = fmul float %234, %262 %285 = fadd float %283, %284 %286 = fadd float %274, %281 %287 = fadd float %276, %282 %288 = fadd float %278, %285 %289 = bitcast float %121 to i32 %290 = bitcast float %122 to i32 %291 = insertelement <2 x i32> undef, i32 %289, i32 0 %292 = insertelement <2 x i32> %291, i32 %290, i32 1 %293 = bitcast <8 x i32> %78 to <32 x i8> %294 = bitcast <4 x i32> %80 to <16 x i8> %295 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %292, <32 x i8> %293, <16 x i8> %294, i32 2) %296 = extractelement <4 x float> %295, i32 0 %297 = extractelement <4 x float> %295, i32 1 %298 = extractelement <4 x float> %295, i32 2 %299 = extractelement <4 x float> %295, i32 3 %300 = bitcast float %121 to i32 %301 = bitcast float %122 to i32 %302 = insertelement <2 x i32> undef, i32 %300, i32 0 %303 = insertelement <2 x i32> %302, i32 %301, i32 1 %304 = bitcast <8 x i32> %82 to <32 x i8> %305 = bitcast <4 x i32> %84 to <16 x i8> %306 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %303, <32 x i8> %304, <16 x i8> %305, i32 2) %307 = extractelement <4 x float> %306, i32 0 %308 = fmul float %297, 2.000000e+00 %309 = fadd float %308, -1.000000e+00 %310 = fmul float %296, 2.000000e+00 %311 = fadd float %310, -1.000000e+00 %312 = fmul float %208, %309 %313 = fmul float %208, %311 %314 = fmul float %311, %208 %315 = fadd float %314, %265 %316 = fmul float %309, %208 %317 = fadd float %316, %267 %318 = fmul float %307, 2.000000e+00 %319 = fadd float %318, -1.000000e+00 %320 = fmul float %298, %209 %321 = fadd float %320, %286 %322 = fmul float %299, %209 %323 = fadd float %322, %287 %324 = fmul float %319, %209 %325 = fadd float %324, %288 %326 = fmul float %279, %313 %327 = fmul float %280, %312 %328 = fmul float %233, %312 %329 = fmul float %234, %313 %330 = fadd float %328, %329 %331 = fadd float %321, %326 %332 = fadd float %323, %327 %333 = fadd float %325, %330 %334 = fmul float %262, %313 %335 = fmul float %263, %312 %336 = fadd float %334, %334 %337 = fadd float %335, %335 %338 = fmul float %262, %312 %339 = fmul float %263, %313 %340 = fadd float %338, %339 %341 = fadd float %331, %336 %342 = fadd float %332, %337 %343 = fadd float %333, %340 %344 = fsub float -0.000000e+00, %315 %345 = fmul float %315, %344 %346 = fadd float %345, %341 %347 = fsub float -0.000000e+00, %317 %348 = fmul float %317, %347 %349 = fadd float %348, %342 %350 = fsub float -0.000000e+00, %317 %351 = fmul float %315, %350 %352 = fadd float %351, %343 %353 = fmul float %25, %315 %354 = fmul float %26, %317 %355 = fadd float %354, %353 %356 = fmul float %27, 1.000000e+00 %357 = fadd float %355, %356 %358 = call float @llvm.AMDIL.clamp.(float %357, float 0.000000e+00, float 1.000000e+00) %359 = fsub float -0.000000e+00, %125 %360 = fadd float %50, %359 %361 = fsub float -0.000000e+00, %126 %362 = fadd float %51, %361 %363 = fsub float -0.000000e+00, %127 %364 = fadd float %52, %363 %365 = fmul float %360, %360 %366 = fmul float %362, %362 %367 = fadd float %366, %365 %368 = fmul float %364, %364 %369 = fadd float %367, %368 %370 = call float @llvm.maxnum.f32(float %369, float 0x3E7AD7F2A0000000) %371 = call float @llvm.AMDGPU.rsq.clamped.f32(float %370) %372 = fmul float %371, %360 %373 = fmul float %371, %362 %374 = fmul float %371, %364 %375 = fsub float -0.000000e+00, %39 %376 = fmul float %360, %371 %377 = fadd float %376, %375 %378 = fsub float -0.000000e+00, %40 %379 = fmul float %362, %371 %380 = fadd float %379, %378 %381 = fsub float -0.000000e+00, %41 %382 = fmul float %364, %371 %383 = fadd float %382, %381 %384 = fmul float %42, %377 %385 = fadd float %384, %39 %386 = fmul float %42, %380 %387 = fadd float %386, %40 %388 = fmul float %42, %383 %389 = fadd float %388, %41 %390 = fmul float %385, %385 %391 = fmul float %387, %387 %392 = fadd float %391, %390 %393 = fmul float %389, %389 %394 = fadd float %392, %393 %395 = call float @llvm.maxnum.f32(float %394, float 0x3E7AD7F2A0000000) %396 = call float @llvm.AMDGPU.rsq.clamped.f32(float %395) %397 = fmul float %385, %396 %398 = fadd float %397, %25 %399 = fmul float %387, %396 %400 = fadd float %399, %26 %401 = fmul float %389, %396 %402 = fadd float %401, %27 %403 = fdiv float 1.000000e+00, %402 %404 = fsub float -0.000000e+00, %315 %405 = fmul float %398, %403 %406 = fadd float %405, %404 %407 = fsub float -0.000000e+00, %317 %408 = fmul float %400, %403 %409 = fadd float %408, %407 %410 = fdiv float 1.000000e+00, %34 %411 = fadd float %410, %346 %412 = fadd float %410, %349 %413 = fmul float %352, %352 %414 = fsub float -0.000000e+00, %413 %415 = fmul float %411, %412 %416 = fadd float %415, %414 %417 = fmul float %406, %406 %418 = fadd float %352, %352 %419 = fmul float %406, %418 %420 = fsub float -0.000000e+00, %419 %421 = fmul float %411, %409 %422 = fadd float %421, %420 %423 = fmul float %409, %422 %424 = fmul float %417, %412 %425 = fadd float %424, %423 %426 = fmul float %425, 5.000000e-01 %427 = fdiv float 1.000000e+00, %416 %428 = fmul float %427, %426 %429 = fsub float -0.000000e+00, %416 %430 = fcmp oge float %429, 0.000000e+00 %431 = sext i1 %430 to i32 %432 = bitcast i32 %431 to float %433 = bitcast float %432 to i32 %434 = icmp ne i32 %433, 0 %temp96.0 = select i1 %434, float 1.000000e+00, float -0.000000e+00 %435 = fmul float %426, %427 %436 = fadd float %435, -1.600000e+01 %437 = fcmp oge float %436, 0.000000e+00 %438 = sext i1 %437 to i32 %439 = bitcast i32 %438 to float %440 = bitcast float %439 to i32 %441 = icmp ne i32 %440, 0 %.164 = select i1 %441, float 1.000000e+00, float -0.000000e+00 %442 = fadd float %.164, %temp96.0 %443 = fmul float %428, 0xBFF7154CA0000000 %444 = call float @llvm.AMDIL.exp.(float %443) %445 = call float @llvm.maxnum.f32(float %416, float 0x3E7AD7F2A0000000) %446 = call float @llvm.AMDGPU.rsq.clamped.f32(float %445) %447 = fmul float %446, %444 %448 = fmul float %410, 1.600000e+01 %449 = fadd float %448, %346 %450 = fmul float %410, 1.600000e+01 %451 = fadd float %450, %349 %452 = fsub float -0.000000e+00, %413 %453 = fmul float %449, %451 %454 = fadd float %453, %452 %455 = fsub float -0.000000e+00, %419 %456 = fmul float %449, %409 %457 = fadd float %456, %455 %458 = fmul float %409, %457 %459 = fmul float %417, %451 %460 = fadd float %459, %458 %461 = fmul float %460, 5.000000e-01 %462 = fdiv float 1.000000e+00, %454 %463 = fmul float %461, %462 %464 = fsub float -0.000000e+00, %454 %465 = fcmp oge float %464, 0.000000e+00 %466 = sext i1 %465 to i32 %467 = bitcast i32 %466 to float %468 = bitcast float %467 to i32 %469 = icmp ne i32 %468, 0 %temp80.0 = select i1 %469, float 1.000000e+00, float -0.000000e+00 %470 = fmul float %461, %462 %471 = fadd float %470, -1.600000e+01 %472 = fcmp oge float %471, 0.000000e+00 %473 = sext i1 %472 to i32 %474 = bitcast i32 %473 to float %475 = bitcast float %474 to i32 %476 = icmp ne i32 %475, 0 %.165 = select i1 %476, float 1.000000e+00, float -0.000000e+00 %477 = fadd float %.165, %temp80.0 %478 = fmul float %463, 0xBFF7154CA0000000 %479 = call float @llvm.maxnum.f32(float %454, float 0x3E7AD7F2A0000000) %480 = call float @llvm.AMDGPU.rsq.clamped.f32(float %479) %481 = call float @llvm.AMDIL.exp.(float %478) %482 = fmul float %480, %481 %483 = fmul float %482, 0x3F747AE140000000 %484 = fsub float -0.000000e+00, %43 %485 = fmul float %360, %371 %486 = fadd float %485, %484 %487 = fsub float -0.000000e+00, %44 %488 = fmul float %362, %371 %489 = fadd float %488, %487 %490 = fsub float -0.000000e+00, %45 %491 = fmul float %364, %371 %492 = fadd float %491, %490 %493 = fmul float %46, %486 %494 = fadd float %493, %43 %495 = fmul float %46, %489 %496 = fadd float %495, %44 %497 = fmul float %46, %492 %498 = fadd float %497, %45 %499 = fmul float %494, %494 %500 = fmul float %496, %496 %501 = fadd float %500, %499 %502 = fmul float %498, %498 %503 = fadd float %501, %502 %504 = call float @llvm.maxnum.f32(float %503, float 0x3E7AD7F2A0000000) %505 = call float @llvm.AMDGPU.rsq.clamped.f32(float %504) %506 = fmul float %494, %505 %507 = fadd float %506, %28 %508 = fmul float %496, %505 %509 = fadd float %508, %29 %510 = fmul float %498, %505 %511 = fadd float %510, %30 %512 = fdiv float 1.000000e+00, %511 %513 = fsub float -0.000000e+00, %315 %514 = fmul float %507, %512 %515 = fadd float %514, %513 %516 = fsub float -0.000000e+00, %317 %517 = fmul float %509, %512 %518 = fadd float %517, %516 %519 = fdiv float 1.000000e+00, %38 %520 = fadd float %519, %346 %521 = fadd float %519, %349 %522 = fsub float -0.000000e+00, %413 %523 = fmul float %520, %521 %524 = fadd float %523, %522 %525 = fmul float %515, %515 %526 = fmul float %418, %515 %527 = fsub float -0.000000e+00, %526 %528 = fmul float %520, %518 %529 = fadd float %528, %527 %530 = fmul float %518, %529 %531 = fmul float %525, %521 %532 = fadd float %531, %530 %533 = fmul float %532, 5.000000e-01 %534 = fdiv float 1.000000e+00, %524 %535 = fmul float %534, %533 %536 = fsub float -0.000000e+00, %524 %537 = fcmp oge float %536, 0.000000e+00 %538 = sext i1 %537 to i32 %539 = bitcast i32 %538 to float %540 = bitcast float %539 to i32 %541 = icmp ne i32 %540, 0 %temp80.1 = select i1 %541, float 1.000000e+00, float -0.000000e+00 %542 = fmul float %533, %534 %543 = fadd float %542, -1.600000e+01 %544 = fcmp oge float %543, 0.000000e+00 %545 = sext i1 %544 to i32 %546 = bitcast i32 %545 to float %547 = bitcast float %546 to i32 %548 = icmp ne i32 %547, 0 %.166 = select i1 %548, float 1.000000e+00, float -0.000000e+00 %549 = fadd float %.166, %temp80.1 %550 = fmul float %535, 0xBFF7154CA0000000 %551 = call float @llvm.AMDIL.exp.(float %550) %552 = call float @llvm.maxnum.f32(float %524, float 0x3E7AD7F2A0000000) %553 = call float @llvm.AMDGPU.rsq.clamped.f32(float %552) %554 = fmul float %551, %553 %555 = fmul float %56, %126 %556 = fmul float %57, %126 %557 = fmul float %58, %126 %558 = fmul float %125, %53 %559 = fadd float %558, %555 %560 = fmul float %125, %54 %561 = fadd float %560, %556 %562 = fmul float %125, %55 %563 = fadd float %562, %557 %564 = fmul float %127, %59 %565 = fadd float %564, %559 %566 = fmul float %127, %60 %567 = fadd float %566, %561 %568 = fmul float %127, %61 %569 = fadd float %568, %563 %570 = fadd float %565, %62 %571 = fadd float %567, %63 %572 = fadd float %569, %64 %573 = fdiv float 1.000000e+00, %572 %574 = fmul float %573, %570 %575 = fmul float %573, %571 %576 = fmul float %574, 5.000000e-01 %577 = fadd float %576, -5.000000e-01 %578 = fmul float %575, -5.000000e-01 %579 = fadd float %578, -5.000000e-01 %580 = fmul float %346, 6.000000e+00 %581 = fadd float %580, %577 %582 = fmul float %349, 6.000000e+00 %583 = fadd float %582, %579 %584 = fmul float %524, 2.000000e+00 %585 = fadd float %584, %581 %586 = fmul float %524, 2.000000e+00 %587 = fadd float %586, %583 %588 = bitcast float %585 to i32 %589 = bitcast float %587 to i32 %590 = insertelement <2 x i32> undef, i32 %588, i32 0 %591 = insertelement <2 x i32> %590, i32 %589, i32 1 %592 = bitcast <8 x i32> %86 to <32 x i8> %593 = bitcast <4 x i32> %88 to <16 x i8> %594 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %591, <32 x i8> %592, <16 x i8> %593, i32 2) %595 = extractelement <4 x float> %594, i32 0 %596 = extractelement <4 x float> %594, i32 1 %597 = extractelement <4 x float> %594, i32 2 %598 = extractelement <4 x float> %594, i32 3 %599 = fmul float %524, 2.000000e+00 %600 = fadd float %599, %581 %601 = fmul float %524, -2.000000e+00 %602 = fadd float %601, %583 %603 = bitcast float %600 to i32 %604 = bitcast float %602 to i32 %605 = insertelement <2 x i32> undef, i32 %603, i32 0 %606 = insertelement <2 x i32> %605, i32 %604, i32 1 %607 = bitcast <8 x i32> %86 to <32 x i8> %608 = bitcast <4 x i32> %88 to <16 x i8> %609 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %606, <32 x i8> %607, <16 x i8> %608, i32 2) %610 = extractelement <4 x float> %609, i32 0 %611 = extractelement <4 x float> %609, i32 1 %612 = extractelement <4 x float> %609, i32 2 %613 = extractelement <4 x float> %609, i32 3 %614 = fmul float %610, 2.500000e-01 %615 = fmul float %611, 2.500000e-01 %616 = fmul float %612, 2.500000e-01 %617 = fmul float %613, 2.500000e-01 %618 = fmul float %595, 2.500000e-01 %619 = fadd float %618, %614 %620 = fmul float %596, 2.500000e-01 %621 = fadd float %620, %615 %622 = fmul float %597, 2.500000e-01 %623 = fadd float %622, %616 %624 = fmul float %598, 2.500000e-01 %625 = fadd float %624, %617 %626 = fmul float %524, -2.000000e+00 %627 = fadd float %626, %581 %628 = fmul float %524, 6.000000e+00 %629 = fadd float %628, %583 %630 = bitcast float %627 to i32 %631 = bitcast float %629 to i32 %632 = insertelement <2 x i32> undef, i32 %630, i32 0 %633 = insertelement <2 x i32> %632, i32 %631, i32 1 %634 = bitcast <8 x i32> %86 to <32 x i8> %635 = bitcast <4 x i32> %88 to <16 x i8> %636 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %633, <32 x i8> %634, <16 x i8> %635, i32 2) %637 = extractelement <4 x float> %636, i32 0 %638 = extractelement <4 x float> %636, i32 1 %639 = extractelement <4 x float> %636, i32 2 %640 = extractelement <4 x float> %636, i32 3 %641 = fmul float %637, 2.500000e-01 %642 = fadd float %641, %619 %643 = fmul float %638, 2.500000e-01 %644 = fadd float %643, %621 %645 = fmul float %639, 2.500000e-01 %646 = fadd float %645, %623 %647 = fmul float %640, 2.500000e-01 %648 = fadd float %647, %625 %649 = fmul float %524, -2.000000e+00 %650 = fadd float %649, %581 %651 = fmul float %524, -2.000000e+00 %652 = fadd float %651, %583 %653 = bitcast float %650 to i32 %654 = bitcast float %652 to i32 %655 = insertelement <2 x i32> undef, i32 %653, i32 0 %656 = insertelement <2 x i32> %655, i32 %654, i32 1 %657 = bitcast <8 x i32> %86 to <32 x i8> %658 = bitcast <4 x i32> %88 to <16 x i8> %659 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %656, <32 x i8> %657, <16 x i8> %658, i32 2) %660 = extractelement <4 x float> %659, i32 0 %661 = extractelement <4 x float> %659, i32 1 %662 = extractelement <4 x float> %659, i32 2 %663 = extractelement <4 x float> %659, i32 3 %664 = fmul float %660, 2.500000e-01 %665 = fadd float %664, %642 %666 = fmul float %661, 2.500000e-01 %667 = fadd float %666, %644 %668 = fmul float %662, 2.500000e-01 %669 = fadd float %668, %646 %670 = fmul float %663, 2.500000e-01 %671 = fadd float %670, %648 %672 = call float @fabs(float %665) %673 = call float @llvm.log2.f32(float %672) %674 = call float @fabs(float %667) %675 = call float @llvm.log2.f32(float %674) %676 = call float @fabs(float %669) %677 = call float @llvm.log2.f32(float %676) %678 = fmul float %673, 0x40019999A0000000 %679 = fmul float %675, 0x40019999A0000000 %680 = fmul float %677, 0x40019999A0000000 %681 = call float @llvm.AMDIL.exp.(float %678) %682 = call float @llvm.AMDIL.exp.(float %679) %683 = call float @llvm.AMDIL.exp.(float %680) %684 = fmul float %315, %372 %685 = fmul float %317, %373 %686 = fadd float %685, %684 %687 = fmul float 1.000000e+00, %374 %688 = fadd float %686, %687 %689 = fsub float -0.000000e+00, %688 %690 = fadd float %689, 1.000000e+00 %691 = fmul float %690, %690 %692 = fmul float %691, %691 %693 = fmul float %690, %692 %694 = fmul float %447, %31 %695 = fmul float %447, %32 %696 = fmul float %447, %33 %697 = fsub float -0.000000e+00, %442 %698 = fsub float -0.000000e+00, %442 %699 = fsub float -0.000000e+00, %442 %700 = fcmp oge float %697, 0.000000e+00 %701 = sext i1 %700 to i32 %702 = bitcast i32 %701 to float %703 = bitcast float %702 to i32 %704 = icmp ne i32 %703, 0 %temp72.0 = select i1 %704, float %694, float -0.000000e+00 %705 = fcmp oge float %698, 0.000000e+00 %706 = sext i1 %705 to i32 %707 = bitcast i32 %706 to float %708 = bitcast float %707 to i32 %709 = icmp ne i32 %708, 0 %.167 = select i1 %709, float %695, float -0.000000e+00 %710 = fcmp oge float %699, 0.000000e+00 %711 = sext i1 %710 to i32 %712 = bitcast i32 %711 to float %713 = bitcast float %712 to i32 %714 = icmp ne i32 %713, 0 %temp64.0 = select i1 %714, float %696, float -0.000000e+00 %715 = fmul float %554, %35 %716 = fmul float %554, %36 %717 = fmul float %554, %37 %718 = fmul float %715, 0x3FE99999A0000000 %719 = fmul float %716, 0x3FE99999A0000000 %720 = fmul float %717, 0x3FE99999A0000000 %721 = fsub float -0.000000e+00, %549 %722 = fsub float -0.000000e+00, %549 %723 = fsub float -0.000000e+00, %549 %724 = fsub float -0.000000e+00, %549 %725 = fcmp oge float %721, 0.000000e+00 %726 = sext i1 %725 to i32 %727 = bitcast i32 %726 to float %728 = bitcast float %727 to i32 %729 = icmp ne i32 %728, 0 %.168 = select i1 %729, float %718, float -0.000000e+00 %730 = fcmp oge float %722, 0.000000e+00 %731 = sext i1 %730 to i32 %732 = bitcast i32 %731 to float %733 = bitcast float %732 to i32 %734 = icmp ne i32 %733, 0 %temp68.1 = select i1 %734, float %719, float -0.000000e+00 %735 = fcmp oge float %723, 0.000000e+00 %736 = sext i1 %735 to i32 %737 = bitcast i32 %736 to float %738 = bitcast float %737 to i32 %739 = icmp ne i32 %738, 0 %.169 = select i1 %739, float %720, float -0.000000e+00 %740 = fcmp oge float %724, 0.000000e+00 %741 = sext i1 %740 to i32 %742 = bitcast i32 %741 to float %743 = bitcast float %742 to i32 %744 = icmp ne i32 %743, 0 %745 = fadd float %temp72.0, %.168 %746 = fadd float %.167, %temp68.1 %747 = fadd float %temp64.0, %.169 %748 = fmul float %745, %693 %749 = fadd float %748, %204 %750 = fmul float %746, %693 %751 = fadd float %750, %205 %752 = fmul float %747, %693 %753 = fadd float %752, %206 %754 = fmul float %204, %31 %755 = fmul float %205, %32 %756 = fmul float %206, %33 %757 = fmul float %483, %754 %758 = fmul float %483, %755 %759 = fmul float %483, %756 %760 = fsub float -0.000000e+00, %477 %761 = fsub float -0.000000e+00, %477 %762 = fsub float -0.000000e+00, %477 %763 = fcmp oge float %760, 0.000000e+00 %764 = sext i1 %763 to i32 %765 = bitcast i32 %764 to float %766 = bitcast float %765 to i32 %767 = icmp ne i32 %766, 0 %temp40.1 = select i1 %767, float %757, float -0.000000e+00 %768 = fcmp oge float %761, 0.000000e+00 %769 = sext i1 %768 to i32 %770 = bitcast i32 %769 to float %771 = bitcast float %770 to i32 %772 = icmp ne i32 %771, 0 %.170 = select i1 %772, float %758, float -0.000000e+00 %773 = fcmp oge float %762, 0.000000e+00 %774 = sext i1 %773 to i32 %775 = bitcast i32 %774 to float %776 = bitcast float %775 to i32 %777 = icmp ne i32 %776, 0 %temp20.1 = select i1 %777, float %759, float -0.000000e+00 %778 = fmul float %749, %358 %779 = fadd float %778, %temp40.1 %780 = fmul float %751, %358 %781 = fadd float %780, %.170 %782 = fmul float %753, %358 %783 = fadd float %782, %temp20.1 %784 = fmul float %681, %68 %785 = fadd float %784, %779 %786 = fmul float %682, %68 %787 = fadd float %786, %781 %788 = fmul float %683, %68 %789 = fadd float %788, %783 %790 = bitcast float %123 to i32 %791 = bitcast float %124 to i32 %792 = insertelement <2 x i32> undef, i32 %790, i32 0 %793 = insertelement <2 x i32> %792, i32 %791, i32 1 %794 = bitcast <8 x i32> %90 to <32 x i8> %795 = bitcast <4 x i32> %92 to <16 x i8> %796 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %793, <32 x i8> %794, <16 x i8> %795, i32 2) %797 = extractelement <4 x float> %796, i32 3 %798 = fmul float %690, %692 %799 = fadd float %798, 5.000000e-01 %800 = fmul float %799, %785 %801 = fmul float %799, %787 %802 = fmul float %799, %789 %803 = fadd float %671, 1.000000e+00 %804 = fadd float %671, 1.000000e+00 %805 = fadd float %671, 1.000000e+00 %806 = fadd float %671, 1.000000e+00 %807 = call float @llvm.AMDIL.clamp.(float %803, float 0.000000e+00, float 1.000000e+00) %808 = call float @llvm.AMDIL.clamp.(float %804, float 0.000000e+00, float 1.000000e+00) %809 = call float @llvm.AMDIL.clamp.(float %805, float 0.000000e+00, float 1.000000e+00) %810 = call float @llvm.AMDIL.clamp.(float %806, float 0.000000e+00, float 1.000000e+00) %811 = fmul float %797, %808 %812 = call float @fabs(float %800) %813 = call float @llvm.log2.f32(float %812) %814 = call float @fabs(float %801) %815 = call float @llvm.log2.f32(float %814) %816 = call float @fabs(float %802) %817 = call float @llvm.log2.f32(float %816) %818 = fmul float %813, 0x3FDD1743E0000000 %819 = fmul float %815, 0x3FDD1743E0000000 %820 = fmul float %817, 0x3FDD1743E0000000 %821 = call float @llvm.AMDIL.exp.(float %818) %822 = call float @llvm.AMDIL.exp.(float %819) %823 = call float @llvm.AMDIL.exp.(float %820) %824 = bitcast float %821 to i32 %825 = bitcast float %822 to i32 %826 = bitcast float %823 to i32 %827 = insertelement <4 x i32> undef, i32 %824, i32 0 %828 = insertelement <4 x i32> %827, i32 %825, i32 1 %829 = insertelement <4 x i32> %828, i32 %826, i32 2 %830 = insertelement <4 x i32> %829, i32 undef, i32 3 %831 = bitcast <8 x i32> %114 to <32 x i8> %832 = bitcast <4 x i32> %116 to <16 x i8> %833 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %830, <32 x i8> %831, <16 x i8> %832, i32 3) %834 = extractelement <4 x float> %833, i32 0 %835 = extractelement <4 x float> %833, i32 1 %836 = extractelement <4 x float> %833, i32 2 %837 = fmul float %130, 1.000000e+00 %838 = fadd float %837, 0.000000e+00 %839 = fmul float %131, -1.000000e+00 %840 = fadd float %839, 1.000000e+00 %841 = bitcast float %838 to i32 %842 = bitcast float %840 to i32 %843 = insertelement <2 x i32> undef, i32 %841, i32 0 %844 = insertelement <2 x i32> %843, i32 %842, i32 1 %845 = bitcast <8 x i32> %102 to <32 x i8> %846 = bitcast <4 x i32> %104 to <16 x i8> %847 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %844, <32 x i8> %845, <16 x i8> %846, i32 2) %848 = extractelement <4 x float> %847, i32 0 %849 = extractelement <4 x float> %847, i32 1 %850 = extractelement <4 x float> %847, i32 2 %851 = extractelement <4 x float> %847, i32 3 %852 = fsub float -0.000000e+00, %851 %853 = fadd float %852, 1.000000e+00 %854 = fmul float %834, %853 %855 = fadd float %854, %848 %856 = fmul float %835, %853 %857 = fadd float %856, %849 %858 = fmul float %836, %853 %859 = fadd float %858, %850 %860 = call float @llvm.AMDGPU.lrp(float %143, float %855, float 0x3FD99999A0000000) %861 = call float @llvm.AMDGPU.lrp(float %143, float %857, float 0x3FD99999A0000000) %862 = call float @llvm.AMDGPU.lrp(float %143, float %859, float 0x3FD99999A0000000) %863 = bitcast float %860 to i32 %864 = bitcast float %861 to i32 %865 = bitcast float %862 to i32 %866 = insertelement <4 x i32> undef, i32 %863, i32 0 %867 = insertelement <4 x i32> %866, i32 %864, i32 1 %868 = insertelement <4 x i32> %867, i32 %865, i32 2 %869 = insertelement <4 x i32> %868, i32 undef, i32 3 %870 = bitcast <8 x i32> %110 to <32 x i8> %871 = bitcast <4 x i32> %112 to <16 x i8> %872 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %869, <32 x i8> %870, <16 x i8> %871, i32 3) %873 = extractelement <4 x float> %872, i32 0 %874 = extractelement <4 x float> %872, i32 1 %875 = extractelement <4 x float> %872, i32 2 %876 = fsub float -0.000000e+00, %143 %877 = fmul float %144, %876 %878 = fadd float %877, %143 %879 = call float @llvm.AMDGPU.lrp(float %878, float %873, float %860) %880 = call float @llvm.AMDGPU.lrp(float %878, float %874, float %861) %881 = call float @llvm.AMDGPU.lrp(float %878, float %875, float %862) %882 = fdiv float 1.000000e+00, %129 %883 = fmul float %882, %128 %884 = fmul float %883, 5.000000e-01 %885 = fadd float %884, 5.000000e-01 %886 = bitcast float %885 to i32 %887 = bitcast float %49 to i32 %888 = insertelement <2 x i32> undef, i32 %886, i32 0 %889 = insertelement <2 x i32> %888, i32 %887, i32 1 %890 = bitcast <8 x i32> %98 to <32 x i8> %891 = bitcast <4 x i32> %100 to <16 x i8> %892 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %889, <32 x i8> %890, <16 x i8> %891, i32 2) %893 = extractelement <4 x float> %892, i32 0 %894 = fsub float -0.000000e+00, %50 %895 = fadd float %894, %125 %896 = fsub float -0.000000e+00, %51 %897 = fadd float %896, %126 %898 = fsub float -0.000000e+00, %52 %899 = fadd float %898, %127 %900 = fmul float %895, %895 %901 = fmul float %897, %897 %902 = fadd float %901, %900 %903 = fmul float %899, %899 %904 = fadd float %902, %903 %905 = fmul float %904, %48 %906 = fmul float %899, %47 %907 = fmul float %906, 0x3FF7154CA0000000 %908 = call float @llvm.AMDIL.exp.(float %907) %909 = fsub float -0.000000e+00, %908 %910 = fadd float %909, 1.000000e+00 %911 = fmul float %910, %905 %912 = fdiv float 1.000000e+00, %899 %913 = fmul float %912, %911 %914 = fmul float %913, 0x3FF7154CA0000000 %915 = call float @llvm.AMDIL.exp.(float %914) %916 = call float @llvm.AMDIL.clamp.(float %915, float 0.000000e+00, float 1.000000e+00) %917 = fsub float -0.000000e+00, %916 %918 = fadd float %917, 1.000000e+00 %919 = fmul float %918, %893 %920 = fsub float -0.000000e+00, %879 %921 = fadd float %920, %65 %922 = fsub float -0.000000e+00, %880 %923 = fadd float %922, %66 %924 = fsub float -0.000000e+00, %881 %925 = fadd float %924, %67 %926 = fmul float %919, %921 %927 = fadd float %926, %879 %928 = fmul float %919, %923 %929 = fadd float %928, %880 %930 = fmul float %919, %925 %931 = fadd float %930, %881 %932 = call i32 @llvm.SI.packf16(float %927, float %929) %933 = bitcast i32 %932 to float %934 = call i32 @llvm.SI.packf16(float %931, float %811) %935 = bitcast i32 %934 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %933, float %935, float %933, float %935) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 s[100:101], s[6:7] ; BEE40406 s_mov_b64 vcc, s[2:3] ; BEEA0402 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 6, [m0] ; C8081900 v_interp_p2_f32 v2, [v2], v1, 1, 6, [m0] ; C8091901 v_sub_f32_e32 v5, 1.0, v2 ; 080A04F2 v_interp_p1_f32 v2, v0, 0, 6, [m0] ; C8081800 v_interp_p2_f32 v2, [v2], v1, 0, 6, [m0] ; C8091801 v_add_f32_e32 v4, 0, v2 ; 06080480 s_load_dwordx4 s[96:99], s[4:5], 0x0 ; C0B00500 s_load_dwordx4 s[76:79], s[4:5], 0x4 ; C0A60504 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx4 s[72:75], s[4:5], 0xc ; C0A4050C s_load_dwordx4 s[48:51], s[4:5], 0x10 ; C0980510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v53, s0, 0 ; 046B0000 v_writelane_b32 v53, s1, 1 ; 046B0201 v_writelane_b32 v53, s2, 2 ; 046B0402 v_writelane_b32 v53, s3, 3 ; 046B0603 s_load_dwordx4 s[60:63], s[4:5], 0x18 ; C09E0518 s_load_dwordx4 s[0:3], s[4:5], 0x1c ; C080051C s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v53, s0, 4 ; 046B0800 v_writelane_b32 v53, s1, 5 ; 046B0A01 v_writelane_b32 v53, s2, 6 ; 046B0C02 v_writelane_b32 v53, s3, 7 ; 046B0E03 s_load_dwordx4 s[36:39], s[4:5], 0x20 ; C0920520 s_load_dwordx4 s[16:19], s[4:5], 0x24 ; C0880524 s_load_dwordx4 s[32:35], s[4:5], 0x28 ; C0900528 s_load_dwordx4 s[40:43], s[4:5], 0x2c ; C094052C s_load_dwordx8 s[20:27], s[100:101], 0x48 ; C0CA6548 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:3], 5, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[20:27], s[16:19] ; F0800500 00850204 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_ge_f32_e64 s[0:1], -|v2|, 0 ; D00C0100 20010102 v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; D2000006 00018280 v_cmp_ne_i32_e64 s[0:1], v6, 0 ; D10A0000 00010106 v_cndmask_b32_e64 v6, 0, -1.0, s[0:1] ; D2000006 0001E680 v_cmp_lt_f32_e64 s[0:1], v6, 0 ; D0020000 00010106 v_cndmask_b32_e64 v6, 0, -1, s[0:1] ; D2000006 00018280 v_and_b32_e32 v6, 1.0, v6 ; 360C0CF2 v_xor_b32_e32 v6, 0x80000000, v6 ; 3A0C0CFF 80000000 v_cmpx_le_f32_e32 vcc, 0, v6 ; 7C260C80 v_cmpx_le_f32_e32 vcc, 0, v6 ; 7C260C80 v_cmpx_le_f32_e32 vcc, 0, v6 ; 7C260C80 v_cmpx_le_f32_e32 vcc, 0, v6 ; 7C260C80 v_interp_p1_f32 v7, v0, 1, 0, [m0] ; C81C0100 v_interp_p2_f32 v7, [v7], v1, 1, 0, [m0] ; C81D0101 v_interp_p1_f32 v6, v0, 0, 0, [m0] ; C8180000 v_interp_p2_f32 v6, [v6], v1, 0, 0, [m0] ; C8190001 s_load_dwordx8 s[0:7], s[100:101], 0x0 ; C0C06500 s_load_dwordx8 s[88:95], s[100:101], 0x8 ; C0EC6508 s_load_dwordx8 s[8:15], s[100:101], 0x10 ; C0C46510 s_load_dwordx8 s[80:87], s[100:101], 0x18 ; C0E86518 s_load_dwordx8 s[52:59], s[100:101], 0x20 ; C0DA6520 s_load_dwordx8 s[16:23], s[100:101], 0x28 ; C0C86528 s_load_dwordx8 s[64:71], s[100:101], 0x30 ; C0E06530 s_load_dwordx8 s[24:31], s[100:101], 0x38 ; C0CC6538 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[8:11], 15, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[0:7], s[96:99] ; F0800F00 03000806 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v12, 2.0, v9, -1.0 ; D282000C 03CE12F4 v_interp_p1_f32 v14, v0, 1, 1, [m0] ; C8380500 v_interp_p2_f32 v14, [v14], v1, 1, 1, [m0] ; C8390501 v_interp_p1_f32 v13, v0, 0, 1, [m0] ; C8340400 v_interp_p2_f32 v13, [v13], v1, 0, 1, [m0] ; C8350401 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[8:15], s[44:47] ; F0800F00 01620F0D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v19, 2.0, v16, -1.0 ; D2820013 03CE20F4 s_load_dwordx4 s[0:3], vcc, 0x0 ; C0806B00 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v20, s4 ; 7E285404 v_mul_f32_e32 v20, 0x3eaa7efa, v20 ; 102828FF 3EAA7EFA v_mul_f32_e32 v19, v19, v20 ; 10262913 v_mad_f32 v21, v20, v12, v19 ; D2820015 044E1914 v_interp_p1_f32 v23, v0, 1, 2, [m0] ; C85C0900 v_interp_p2_f32 v23, [v23], v1, 1, 2, [m0] ; C85D0901 v_interp_p1_f32 v22, v0, 0, 2, [m0] ; C8580800 v_interp_p2_f32 v22, [v22], v1, 0, 2, [m0] ; C8590801 image_sample v[24:27], 15, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[8:15], s[44:47] ; F0800F00 01621816 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, 2.0, v25, -1.0 ; D282001C 03CE32F4 v_mad_f32 v21, v20, v28, v21 ; D2820015 04563914 v_interp_p1_f32 v29, v0, 1, 4, [m0] ; C8741100 v_interp_p2_f32 v29, [v29], v1, 1, 4, [m0] ; C8751101 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v30, s4, v29 ; 083C3A04 v_interp_p1_f32 v31, v0, 0, 4, [m0] ; C87C1000 v_interp_p2_f32 v31, [v31], v1, 0, 4, [m0] ; C87D1001 s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v32, s5, v31 ; 08403E05 v_mul_f32_e32 v33, v32, v32 ; 10424120 v_mad_f32 v33, v30, v30, v33 ; D2820021 04863D1E v_interp_p1_f32 v34, v0, 2, 4, [m0] ; C8881200 v_interp_p2_f32 v34, [v34], v1, 2, 4, [m0] ; C8891201 s_buffer_load_dword s44, s[0:3], 0x22 ; C2160122 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v35, s44, v34 ; 0846442C v_mad_f32 v33, v35, v35, v33 ; D2820021 04864723 v_max_f32_e32 v33, 0x33d6bf95, v33 ; 204242FF 33D6BF95 v_rsq_clamp_f32_e32 v33, v33 ; 7E425921 s_buffer_load_dword s6, s[0:3], 0x19 ; C2030119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v36, v33, v30, -s6 ; D2820024 801A3D21 s_buffer_load_dword s7, s[0:3], 0x1b ; C203811B v_mov_b32_e32 v37, s6 ; 7E4A0206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v36, v36, s7, v37 ; D2820024 04940F24 s_buffer_load_dword s6, s[0:3], 0x18 ; C2030118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v37, v33, v32, -s6 ; D2820025 801A4121 v_mov_b32_e32 v38, s6 ; 7E4C0206 v_mad_f32 v37, v37, s7, v38 ; D2820025 04980F25 v_mul_f32_e32 v38, v37, v37 ; 104C4B25 v_mad_f32 v38, v36, v36, v38 ; D2820026 049A4924 s_buffer_load_dword s6, s[0:3], 0x1a ; C203011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v39, v33, v35, -s6 ; D2820027 801A4721 v_mov_b32_e32 v40, s6 ; 7E500206 v_mad_f32 v39, v39, s7, v40 ; D2820027 04A00F27 v_mad_f32 v38, v39, v39, v38 ; D2820026 049A4F27 v_max_f32_e32 v38, 0x33d6bf95, v38 ; 204C4CFF 33D6BF95 v_rsq_clamp_f32_e32 v38, v38 ; 7E4C5926 s_buffer_load_dword s6, s[0:3], 0x9 ; C2030109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v36, v36, v38, s6 ; D2820024 001A4D24 s_buffer_load_dword s6, s[0:3], 0xa ; C203010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v39, v39, v38, s6 ; D2820027 001A4D27 v_rcp_f32_e32 v39, v39 ; 7E4E5527 v_mad_f32 v36, v36, v39, -v21 ; D2820024 84564F24 v_mad_f32 v40, 2.0, v8, -1.0 ; D2820028 03CE10F4 v_mad_f32 v41, -2.0, v15, 1.0 ; D2820029 03CA1EF5 v_mul_f32_e32 v41, v41, v20 ; 10522929 v_mad_f32 v42, v20, v40, v41 ; D282002A 04A65114 v_mad_f32 v43, 2.0, v24, -1.0 ; D282002B 03CE30F4 v_mad_f32 v42, v20, v43, v42 ; D282002A 04AA5714 s_buffer_load_dword s6, s[0:3], 0x8 ; C2030108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v37, v37, v38, s6 ; D2820025 001A4D25 v_mad_f32 v37, v37, v39, -v42 ; D2820025 84AA4F25 image_sample v6, 1, 0, 0, 0, 0, 0, 0, 0, v[6:7], s[88:95], s[76:79] ; F0800100 02760606 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v6, 2.0, v6, -1.0 ; D2820006 03CE0CF4 image_sample v7, 1, 0, 0, 0, 0, 0, 0, 0, v[13:14], s[80:87], s[72:75] ; F0800100 0254070D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, -2.0, v7, 1.0 ; D2820007 03CA0EF5 v_mul_f32_e32 v13, v20, v20 ; 101A2914 v_mul_f32_e32 v7, v7, v13 ; 100E1B07 v_mad_f32 v6, v6, v13, v7 ; D2820006 041E1B06 v_mul_f32_e32 v7, v12, v20 ; 100E290C v_mul_f32_e32 v14, v41, v7 ; 101C0F29 v_mul_f32_e32 v38, v40, v20 ; 104C2928 v_mad_f32 v14, v38, v19, v14 ; D282000E 043A2726 v_add_f32_e32 v6, v14, v6 ; 060C0D0E image_sample v14, 1, 0, 0, 0, 0, 0, 0, 0, v[22:23], s[80:87], s[72:75] ; F0800100 02540E16 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v14, 2.0, v14, -1.0 ; D282000E 03CE1CF4 v_mad_f32 v6, v14, v13, v6 ; D2820006 041A1B0E v_mul_f32_e32 v14, v43, v20 ; 101C292B v_mul_f32_e32 v22, v14, v7 ; 102C0F0E v_mul_f32_e32 v23, v28, v20 ; 102E291C v_mad_f32 v22, v38, v23, v22 ; D2820016 045A2F26 v_add_f32_e32 v6, v22, v6 ; 060C0D16 v_mul_f32_e32 v22, v14, v19 ; 102C270E v_mad_f32 v22, v41, v23, v22 ; D2820016 045A2F29 v_add_f32_e32 v6, v22, v6 ; 060C0D16 v_mad_f32 v6, -v42, v21, v6 ; D2820006 241A2B2A v_add_f32_e32 v22, v6, v6 ; 062C0D06 v_mul_f32_e32 v28, v37, v22 ; 10382D25 v_mul_f32_e32 v39, v17, v13 ; 104E1B11 v_mad_f32 v39, v10, v13, v39 ; D2820027 049E1B0A v_mad_f32 v38, v20, v40, v38 ; D2820026 049A5114 v_mad_f32 v39, v41, v38, v39 ; D2820027 049E4D29 v_mad_f32 v39, v26, v13, v39 ; D2820027 049E1B1A v_mad_f32 v38, v38, v14, v39 ; D2820026 049E1D26 v_mul_f32_e32 v14, v14, v41 ; 101C530E v_mad_f32 v14, 2.0, v14, v38 ; D282000E 049A1CF4 v_mad_f32 v14, -v42, v42, v14 ; D282000E 243A552A s_buffer_load_dword s6, s[0:3], 0x13 ; C2030113 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v38, s6 ; 7E4C5406 v_add_f32_e32 v39, v14, v38 ; 064E4D0E v_mad_f32 v28, v39, v36, -v28 ; D282001C 84724927 v_mul_f32_e32 v28, v28, v36 ; 1038491C v_mul_f32_e32 v36, v37, v37 ; 10484B25 v_mul_f32_e32 v15, v18, v13 ; 101E1B12 v_mad_f32 v8, v11, v13, v15 ; D2820008 043E1B0B v_mad_f32 v7, v20, v12, v7 ; D2820007 041E1914 v_mad_f32 v8, v19, v7, v8 ; D2820008 04220F13 v_mad_f32 v8, v27, v13, v8 ; D2820008 04221B1B v_mad_f32 v7, v7, v23, v8 ; D2820007 04222F07 v_mul_f32_e32 v8, v23, v19 ; 10102717 v_mad_f32 v7, 2.0, v8, v7 ; D2820007 041E10F4 v_mad_f32 v7, -v21, v21, v7 ; D2820007 241E2B15 v_add_f32_e32 v8, v7, v38 ; 06104D07 v_mad_f32 v9, v36, v8, v28 ; D2820009 04721124 v_mul_f32_e32 v9, 0.5, v9 ; 101212F0 v_mul_f32_e32 v6, v6, v6 ; 100C0D06 v_mad_f32 v8, v39, v8, -v6 ; D2820008 841A1127 v_rcp_f32_e32 v10, v8 ; 7E145508 v_mul_f32_e32 v11, v9, v10 ; 10161509 v_mov_b32_e32 v12, 0xbfb8aa65 ; 7E1802FF BFB8AA65 v_mul_f32_e32 v11, v12, v11 ; 1016170C v_exp_f32_e32 v11, v11 ; 7E164B0B v_max_f32_e32 v13, 0x33d6bf95, v8 ; 201A10FF 33D6BF95 v_rsq_clamp_f32_e32 v13, v13 ; 7E1A590D v_mul_f32_e32 v11, v13, v11 ; 1016170D s_buffer_load_dword s6, s[0:3], 0x12 ; C2030112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s6, v11 ; 101A1606 v_mul_f32_e32 v13, 0x3f4ccccd, v13 ; 101A1AFF 3F4CCCCD v_mov_b32_e32 v15, 0xc1800000 ; 7E1E02FF C1800000 v_mad_f32 v9, v10, v9, v15 ; D2820009 043E130A v_cmp_ge_f32_e64 s[6:7], v9, 0 ; D00C0006 00010109 v_cndmask_b32_e64 v9, 0, -1, s[6:7] ; D2000809 00198280 v_cmp_ne_i32_e64 s[6:7], v9, 0 ; D10A0006 00010109 v_mov_b32_e32 v9, 0x80000000 ; 7E1202FF 80000000 v_cndmask_b32_e64 v10, v9, 1.0, s[6:7] ; D200000A 0819E509 v_cmp_ge_f32_e64 s[6:7], -v8, 0 ; D00C0006 20010108 v_cndmask_b32_e64 v16, 0, -1, s[6:7] ; D2000010 00198280 v_cmp_ne_i32_e64 s[6:7], v16, 0 ; D10A0006 00010110 v_cndmask_b32_e64 v16, v9, 1.0, s[6:7] ; D2000010 0819E509 v_add_f32_e32 v10, v16, v10 ; 06141510 v_cmp_ge_f32_e64 s[6:7], -v10, 0 ; D00C0006 2001010A v_cndmask_b32_e64 v10, 0, -1, s[6:7] ; D200000A 00198280 v_cmp_ne_i32_e64 s[46:47], v10, 0 ; D10A002E 0001010A v_cndmask_b32_e64 v10, v9, v13, s[46:47] ; D200000A 08BA1B09 s_buffer_load_dword s6, s[0:3], 0x15 ; C2030115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v33, v30, -s6 ; D282000D 801A3D21 s_buffer_load_dword s7, s[0:3], 0x17 ; C2038117 v_mov_b32_e32 v16, s6 ; 7E200206 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v13, s7, v16 ; D282000D 04400F0D s_buffer_load_dword s6, s[0:3], 0x14 ; C2030114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v16, v33, v32, -s6 ; D2820010 801A4121 v_mov_b32_e32 v17, s6 ; 7E220206 v_mad_f32 v16, v16, s7, v17 ; D2820010 04440F10 v_mul_f32_e32 v17, v16, v16 ; 10222110 v_mad_f32 v17, v13, v13, v17 ; D2820011 04461B0D s_buffer_load_dword s6, s[0:3], 0x16 ; C2030116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, v33, v35, -s6 ; D2820012 801A4721 v_mov_b32_e32 v19, s6 ; 7E260206 v_mad_f32 v18, v18, s7, v19 ; D2820012 044C0F12 v_mad_f32 v17, v18, v18, v17 ; D2820011 04462512 v_max_f32_e32 v17, 0x33d6bf95, v17 ; 202222FF 33D6BF95 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 s_buffer_load_dword s6, s[0:3], 0x5 ; C2030105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, v13, v17, s6 ; D282000D 001A230D s_buffer_load_dword s7, s[0:3], 0x6 ; C2038106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v18, v18, v17, s7 ; D2820012 001E2312 v_rcp_f32_e32 v18, v18 ; 7E245512 v_mad_f32 v13, v13, v18, -v21 ; D282000D 8456250D s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v16, v16, v17, s8 ; D2820010 00222310 v_mad_f32 v16, v16, v18, -v42 ; D2820010 84AA2510 v_mul_f32_e32 v17, v22, v16 ; 10222116 s_buffer_load_dword s9, s[0:3], 0xf ; C204810F s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v18, s9 ; 7E245409 v_add_f32_e32 v19, v14, v18 ; 0626250E v_mad_f32 v20, v19, v13, -v17 ; D2820014 84461B13 v_mul_f32_e32 v20, v20, v13 ; 10281B14 v_mul_f32_e32 v16, v16, v16 ; 10202110 v_add_f32_e32 v22, v7, v18 ; 062C2507 v_mad_f32 v20, v16, v22, v20 ; D2820014 04522D10 v_mul_f32_e32 v20, 0.5, v20 ; 102828F0 v_mad_f32 v19, v19, v22, -v6 ; D2820013 841A2D13 v_rcp_f32_e32 v22, v19 ; 7E2C5513 v_mul_f32_e32 v23, v20, v22 ; 102E2D14 v_mul_f32_e32 v23, v12, v23 ; 102E2F0C v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_max_f32_e32 v24, 0x33d6bf95, v19 ; 203026FF 33D6BF95 v_rsq_clamp_f32_e32 v24, v24 ; 7E305918 v_mul_f32_e32 v23, v23, v24 ; 102E3117 s_buffer_load_dword s9, s[0:3], 0xe ; C204810E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v24, s9, v23 ; 10302E09 v_mad_f32 v20, v22, v20, v15 ; D2820014 043E2916 v_cmp_ge_f32_e64 s[10:11], v20, 0 ; D00C000A 00010114 v_cndmask_b32_e64 v20, 0, -1, s[10:11] ; D2000014 00298280 v_cmp_ne_i32_e64 s[10:11], v20, 0 ; D10A000A 00010114 v_cndmask_b32_e64 v20, v9, 1.0, s[10:11] ; D2000014 0829E509 v_cmp_ge_f32_e64 s[10:11], -v19, 0 ; D00C000A 20010113 v_cndmask_b32_e64 v19, 0, -1, s[10:11] ; D2000813 00298280 v_cmp_ne_i32_e64 s[10:11], v19, 0 ; D10A000A 00010113 v_cndmask_b32_e64 v19, v9, 1.0, s[10:11] ; D2000813 0829E509 v_add_f32_e32 v19, v19, v20 ; 06262913 v_cmp_ge_f32_e64 s[10:11], -v19, 0 ; D00C000A 20010113 v_cndmask_b32_e64 v19, 0, -1, s[10:11] ; D2000813 00298280 v_cmp_ne_i32_e64 s[72:73], v19, 0 ; D10A0048 00010113 v_cndmask_b32_e64 v19, v9, v24, s[72:73] ; D2000813 09223109 v_add_f32_e32 v10, v10, v19 ; 0614270A v_mul_f32_e32 v19, v32, v33 ; 10264320 v_mul_f32_e32 v19, v19, v42 ; 10265513 v_mul_f32_e32 v20, v30, v33 ; 1028431E v_mad_f32 v19, v21, v20, v19 ; D2820013 044E2915 v_mad_f32 v19, v33, v35, v19 ; D2820013 044E4721 v_sub_f32_e32 v19, 1.0, v19 ; 082626F2 v_mul_f32_e32 v20, v19, v19 ; 10282713 v_mul_f32_e32 v20, v20, v20 ; 10282914 v_mul_f32_e32 v22, v20, v19 ; 102C2714 v_interp_p1_f32 v25, v0, 1, 3, [m0] ; C8640D00 v_interp_p2_f32 v25, [v25], v1, 1, 3, [m0] ; C8650D01 v_interp_p1_f32 v24, v0, 0, 3, [m0] ; C8600C00 v_interp_p2_f32 v24, [v24], v1, 0, 3, [m0] ; C8610C01 image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[64:71], s[60:63] ; F0800700 01F01A18 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v30, v28 ; 7E3C4F1C v_mov_b32_e32 v32, 0x400ccccd ; 7E4002FF 400CCCCD v_mul_legacy_f32_e32 v30, v32, v30 ; 0E3C3D20 v_exp_f32_e32 v30, v30 ; 7E3C4B1E v_mad_f32 v10, v10, v22, v30 ; D282000A 047A2D0A v_mov_b32_e32 v33, 0x41800000 ; 7E4202FF 41800000 v_mad_f32 v35, v18, v33, v14 ; D2820023 043A4312 v_mad_f32 v17, v35, v13, -v17 ; D2820011 84461B23 v_mul_f32_e32 v13, v17, v13 ; 101A1B11 v_mad_f32 v17, v18, v33, v7 ; D2820011 041E4312 v_mad_f32 v13, v16, v17, v13 ; D282000D 04362310 v_mul_f32_e32 v13, 0.5, v13 ; 101A1AF0 v_mad_f32 v6, v35, v17, -v6 ; D2820006 841A2323 v_rcp_f32_e32 v16, v6 ; 7E205506 v_mul_f32_e32 v17, v16, v13 ; 10221B10 v_mul_f32_e32 v12, v12, v17 ; 1018230C v_exp_f32_e32 v12, v12 ; 7E184B0C v_max_f32_e32 v17, 0x33d6bf95, v6 ; 20220CFF 33D6BF95 v_rsq_clamp_f32_e32 v17, v17 ; 7E225911 v_mul_f32_e32 v12, v12, v17 ; 1018230C v_mul_f32_e32 v12, 0x3ba3d70a, v12 ; 101818FF 3BA3D70A v_mul_f32_e32 v17, s9, v30 ; 10223C09 v_mul_f32_e32 v17, v17, v12 ; 10221911 v_mad_f32 v13, v13, v16, v15 ; D282000D 043E210D v_cmp_ge_f32_e64 s[10:11], v13, 0 ; D00C000A 0001010D v_cndmask_b32_e64 v13, 0, -1, s[10:11] ; D200080D 00298280 v_cmp_ne_i32_e64 s[10:11], v13, 0 ; D10A000A 0001010D v_cndmask_b32_e64 v13, v9, 1.0, s[10:11] ; D200080D 0829E509 v_cmp_ge_f32_e64 s[10:11], -v6, 0 ; D00C000A 20010106 v_cndmask_b32_e64 v6, 0, -1, s[10:11] ; D2000006 00298280 v_cmp_ne_i32_e64 s[10:11], v6, 0 ; D10A000A 00010106 v_cndmask_b32_e64 v6, v9, 1.0, s[10:11] ; D2000006 0829E509 v_add_f32_e32 v6, v6, v13 ; 060C1B06 v_cmp_ge_f32_e64 s[10:11], -v6, 0 ; D00C000A 20010106 v_cndmask_b32_e64 v6, 0, -1, s[10:11] ; D2000006 00298280 v_cmp_ne_i32_e64 s[60:61], v6, 0 ; D10A003C 00010106 v_cndmask_b32_e64 v6, v9, v17, s[60:61] ; D2000006 08F22309 v_mul_f32_e32 v13, s8, v42 ; 101A5408 v_mad_f32 v13, s6, v21, v13 ; D282000D 04362A06 v_add_f32_e32 v13, s7, v13 ; 061A1A07 v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_mad_f32 v6, v10, v13, v6 ; D2820006 041A1B0A s_buffer_load_dword s6, s[0:3], 0x29 ; C2030129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s6, v29 ; 10143A06 s_buffer_load_dword s6, s[0:3], 0x25 ; C2030125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v31, s6, v10 ; D282000A 04280D1F s_buffer_load_dword s6, s[0:3], 0x2d ; C203012D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v34, s6, v10 ; D282000A 04280D22 s_buffer_load_dword s6, s[0:3], 0x31 ; C2030131 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v10, s6, v10 ; 06141406 s_buffer_load_dword s6, s[0:3], 0x2b ; C203012B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v15, s6, v29 ; 101E3A06 s_buffer_load_dword s6, s[0:3], 0x27 ; C2030127 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v15, v31, s6, v15 ; D282000F 043C0D1F s_buffer_load_dword s6, s[0:3], 0x2f ; C203012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v15, v34, s6, v15 ; D282000F 043C0D22 s_buffer_load_dword s6, s[0:3], 0x33 ; C2030133 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v15, s6, v15 ; 061E1E06 v_rcp_f32_e32 v15, v15 ; 7E1E550F v_mul_f32_e32 v10, v10, v15 ; 10141F0A v_mad_f32 v10, -0.5, v10, -0.5 ; D282000A 03C614F1 v_mov_b32_e32 v16, 0x40c00000 ; 7E2002FF 40C00000 v_mad_f32 v7, v7, v16, v10 ; D2820007 042A2107 v_mad_f32 v18, -2.0, v8, v7 ; D2820012 041E10F5 s_buffer_load_dword s6, s[0:3], 0x28 ; C2030128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s6, v29 ; 10143A06 s_buffer_load_dword s6, s[0:3], 0x24 ; C2030124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v31, s6, v10 ; D282000A 04280D1F s_buffer_load_dword s6, s[0:3], 0x2c ; C203012C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v34, s6, v10 ; D282000A 04280D22 s_buffer_load_dword s6, s[0:3], 0x30 ; C2030130 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v10, s6, v10 ; 06141406 v_mul_f32_e32 v10, v10, v15 ; 10141F0A v_mad_f32 v10, 0.5, v10, -0.5 ; D282000A 03C614F0 v_mad_f32 v10, v14, v16, v10 ; D282000A 042A210E v_mad_f32 v17, 2.0, v8, v10 ; D2820011 042A10F4 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[17:18], s[52:59], s[48:51] ; F0800F00 018D2311 v_mov_b32_e32 v14, 0x3e800000 ; 7E1C02FF 3E800000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v15, 0x3e800000, v37 ; 101E4AFF 3E800000 v_mad_f32 v21, 2.0, v8, v7 ; D2820015 041E10F4 v_mov_b32_e32 v39, v17 ; 7E4E0311 v_mov_b32_e32 v40, v18 ; 7E500312 v_mov_b32_e32 v40, v21 ; 7E500315 image_sample v[39:42], 15, 0, 0, 0, 0, 0, 0, 0, v[39:40], s[52:59], s[48:51] ; F0800F00 018D2727 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v41, v14, v15 ; D282000F 043E1D29 v_mad_f32 v44, v8, v16, v7 ; D282002C 041E2108 v_mad_f32 v43, -2.0, v8, v10 ; D282002B 042A10F5 image_sample v[45:48], 15, 0, 0, 0, 0, 0, 0, 0, v[43:44], s[52:59], s[48:51] ; F0800F00 018D2D2B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v47, v14, v15 ; D2820007 043E1D2F v_mov_b32_e32 v44, v18 ; 7E580312 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[43:44], s[52:59], s[48:51] ; F0800F00 018D0F2B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v17, v14, v7 ; D2820007 041E1D11 v_log_f32_e64 v7, |v7| ; D34E0107 00000107 v_mul_f32_e32 v7, 0x400ccccd, v7 ; 100E0EFF 400CCCCD v_exp_f32_e32 v7, v7 ; 7E0E4B07 s_buffer_load_dword s6, s[0:3], 0x38 ; C2030138 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v7, s6, v6 ; D2820006 04180D07 v_mad_f32 v7, v19, v20, 0.5 ; D2820007 03C22913 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_log_f32_e64 v6, |v6| ; D34E0106 00000106 v_mul_f32_e32 v6, 0x3ee8ba1f, v6 ; 100C0CFF 3EE8BA1F v_exp_f32_e32 v51, v6 ; 7E664B06 s_buffer_load_dword s7, s[0:3], 0x11 ; C2038111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s7, v11 ; 100C1607 v_mul_f32_e32 v6, 0x3f4ccccd, v6 ; 100C0CFF 3F4CCCCD v_cndmask_b32_e64 v6, v9, v6, s[46:47] ; D2000006 08BA0D09 s_buffer_load_dword s7, s[0:3], 0xd ; C203810D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s7, v23 ; 10102E07 v_cndmask_b32_e64 v8, v9, v8, s[72:73] ; D2000008 09221109 v_add_f32_e32 v6, v6, v8 ; 060C1106 v_log_f32_e32 v8, v27 ; 7E104F1B v_mul_legacy_f32_e32 v8, v32, v8 ; 0E101120 v_exp_f32_e32 v8, v8 ; 7E104B08 v_mad_f32 v6, v6, v22, v8 ; D2820006 04222D06 v_mul_f32_e32 v8, s7, v8 ; 10101007 v_mul_f32_e32 v8, v8, v12 ; 10101908 v_cndmask_b32_e64 v8, v9, v8, s[60:61] ; D2000008 08F21109 v_mad_f32 v6, v6, v13, v8 ; D2820006 04221B06 v_mul_f32_e32 v8, 0x3e800000, v36 ; 101048FF 3E800000 v_mad_f32 v8, v40, v14, v8 ; D2820008 04221D28 v_mad_f32 v8, v46, v14, v8 ; D2820008 04221D2E v_mad_f32 v8, v16, v14, v8 ; D2820008 04221D10 v_log_f32_e64 v8, |v8| ; D34E0108 00000108 v_mul_f32_e32 v8, 0x400ccccd, v8 ; 101010FF 400CCCCD v_exp_f32_e32 v8, v8 ; 7E104B08 v_mad_f32 v6, v8, s6, v6 ; D2820006 04180D08 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_log_f32_e64 v6, |v6| ; D34E0106 00000106 v_mul_f32_e32 v6, 0x3ee8ba1f, v6 ; 100C0CFF 3EE8BA1F v_exp_f32_e32 v50, v6 ; 7E644B06 s_buffer_load_dword s7, s[0:3], 0x10 ; C2038110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s7, v11 ; 100C1607 v_mul_f32_e32 v6, 0x3f4ccccd, v6 ; 100C0CFF 3F4CCCCD v_cndmask_b32_e64 v6, v9, v6, s[46:47] ; D2000006 08BA0D09 s_buffer_load_dword s7, s[0:3], 0xc ; C203810C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s7, v23 ; 10102E07 v_cndmask_b32_e64 v8, v9, v8, s[72:73] ; D2000008 09221109 v_add_f32_e32 v6, v6, v8 ; 060C1106 v_log_f32_e32 v8, v26 ; 7E104F1A v_mul_legacy_f32_e32 v8, v32, v8 ; 0E101120 v_exp_f32_e32 v8, v8 ; 7E104B08 v_mad_f32 v6, v6, v22, v8 ; D2820006 04222D06 v_mul_f32_e32 v8, s7, v8 ; 10101007 v_mul_f32_e32 v8, v8, v12 ; 10101908 v_cndmask_b32_e64 v8, v9, v8, s[60:61] ; D2000008 08F21109 v_mad_f32 v6, v6, v13, v8 ; D2820006 04221B06 v_mul_f32_e32 v8, 0x3e800000, v35 ; 101046FF 3E800000 v_mad_f32 v8, v39, v14, v8 ; D2820008 04221D27 v_mad_f32 v8, v45, v14, v8 ; D2820008 04221D2D v_mad_f32 v8, v15, v14, v8 ; D2820008 04221D0F v_log_f32_e64 v8, |v8| ; D34E0108 00000108 v_mul_f32_e32 v8, 0x400ccccd, v8 ; 101010FF 400CCCCD v_exp_f32_e32 v8, v8 ; 7E104B08 v_mad_f32 v6, v8, s6, v6 ; D2820006 04180D08 v_mul_f32_e32 v6, v6, v7 ; 100C0F06 v_log_f32_e64 v6, |v6| ; D34E0106 00000106 v_mul_f32_e32 v6, 0x3ee8ba1f, v6 ; 100C0CFF 3EE8BA1F v_exp_f32_e32 v49, v6 ; 7E624B06 s_load_dwordx8 s[8:15], s[100:101], 0x58 ; C0C46558 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[49:52], s[8:15], s[40:43] ; F0800700 01420631 s_load_dwordx8 s[8:15], s[100:101], 0x40 ; C0C46540 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[8:15], s[36:39] ; F0800F00 01220904 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v4, 1.0, v12 ; 080818F2 v_mad_f32 v5, v8, v4, v11 ; D2820005 042E0908 v_sub_f32_e32 v13, 1.0, v2 ; 081A04F2 v_mul_f32_e32 v13, 0x3ecccccd, v13 ; 101A1AFF 3ECCCCCD v_mad_f32 v21, v2, v5, v13 ; D2820015 04360B02 v_mad_f32 v5, v7, v4, v10 ; D2820005 042A0907 v_mad_f32 v20, v2, v5, v13 ; D2820014 04360B02 v_mad_f32 v4, v6, v4, v9 ; D2820004 04260906 v_mad_f32 v19, v2, v4, v13 ; D2820013 04360902 s_load_dwordx8 s[8:15], s[100:101], 0x50 ; C0C46550 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[19:22], s[8:15], s[32:35] ; F0800700 01020413 v_mad_f32 v2, -v3, v2, v2 ; D2820002 240A0503 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v7, v20, v3 ; 100E0714 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v2, v5, v7 ; D2820007 041E0B02 s_buffer_load_dword s6, s[0:3], 0x35 ; C2030135 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v8, s6, v7 ; 08100E06 v_subrev_f32_e32 v9, s4, v29 ; 0A123A04 v_subrev_f32_e32 v10, s5, v31 ; 0A143E05 v_mul_f32_e32 v10, v10, v10 ; 1014150A v_mad_f32 v9, v9, v9, v10 ; D2820009 042A1309 v_subrev_f32_e32 v10, s44, v34 ; 0A14442C v_mad_f32 v9, v10, v10, v9 ; D2820009 0426150A s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s4, v9 ; 10121204 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v11, s4, v10 ; 10161404 v_mul_f32_e32 v11, 0x3fb8aa65, v11 ; 101616FF 3FB8AA65 v_exp_f32_e32 v11, v11 ; 7E164B0B v_sub_f32_e32 v11, 1.0, v11 ; 081616F2 v_mul_f32_e32 v9, v9, v11 ; 10121709 v_rcp_f32_e32 v10, v10 ; 7E14550A v_mul_f32_e32 v9, v9, v10 ; 10121509 v_mul_f32_e32 v9, 0x3fb8aa65, v9 ; 101212FF 3FB8AA65 v_exp_f32_e32 v9, v9 ; 7E124B09 v_add_f32_e64 v9, 0, v9 clamp ; D2060809 00021280 v_sub_f32_e32 v9, 1.0, v9 ; 081212F2 v_interp_p1_f32 v10, v0, 1, 5, [m0] ; C8281500 v_interp_p2_f32 v10, [v10], v1, 1, 5, [m0] ; C8291501 v_interp_p1_f32 v11, v0, 2, 5, [m0] ; C82C1600 v_interp_p2_f32 v11, [v11], v1, 2, 5, [m0] ; C82D1601 v_rcp_f32_e32 v0, v11 ; 7E00550B v_mul_f32_e32 v0, v10, v0 ; 1000010A v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_readlane_b32 s4, v53, 4 ; 02090935 v_readlane_b32 s5, v53, 5 ; 020B0B35 v_readlane_b32 s6, v53, 6 ; 020D0D35 v_readlane_b32 s7, v53, 7 ; 020F0F35 s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[24:31], s[4:7] ; F0800100 00260000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v9 ; 10001300 v_mad_f32 v1, v0, v8, v7 ; D2820001 041E1100 v_mul_f32_e32 v7, v19, v3 ; 100E0713 v_mad_f32 v7, v2, v4, v7 ; D2820007 041E0902 s_buffer_load_dword s4, s[0:3], 0x34 ; C2020134 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v8, s4, v7 ; 08100E04 v_mad_f32 v7, v0, v8, v7 ; D2820007 041E1100 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 v_mul_f32_e32 v3, v21, v3 ; 10060715 v_mad_f32 v2, v2, v6, v3 ; D2820002 040E0D02 s_buffer_load_dword s0, s[0:3], 0x36 ; C2000136 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s0, v2 ; 08060400 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_mul_f32_e32 v2, 0x3e800000, v38 ; 10044CFF 3E800000 v_mad_f32 v2, v42, v14, v2 ; D2820002 040A1D2A v_mad_f32 v2, v48, v14, v2 ; D2820002 040A1D30 v_mad_f32 v2, v18, v14, v2 ; D2820002 040A1D12 v_add_f32_e32 v2, 1.0, v2 ; 060404F2 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_readlane_b32 s0, v53, 0 ; 02010135 v_readlane_b32 s1, v53, 1 ; 02030335 v_readlane_b32 s2, v53, 2 ; 02050535 v_readlane_b32 s3, v53, 3 ; 02070735 s_nop 2 ; BF800002 image_sample v3, 8, 0, 0, 0, 0, 0, 0, 0, v[24:25], s[16:23], s[0:3] ; F0800800 00040318 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL OUT[6], GENERIC[14] DCL CONST[0..13] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.0000, -1.0000, 0.5000, -0.5000} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xyz, IN[1], IMM[0].xxxx, IMM[0].yyyy 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].x, IN[0].xxxx, IMM[0].zzzz, IMM[0].zzzz 3: MOV TEMP[2].xy, CONST[4].xyxx 4: ADD TEMP[3].yz, -TEMP[2].xxyw, CONST[5].xxyw 5: MAD TEMP[4].x, TEMP[1].xxxx, TEMP[3].yyyy, CONST[4].xxxx 6: MOV TEMP[2].x, TEMP[4].xxxx 7: MAD TEMP[5].x, IN[0].yyyy, IMM[0].zzzz, IMM[0].zzzz 8: MAD TEMP[3].y, TEMP[5].xxxx, TEMP[3].zzzz, CONST[4].yyyy 9: MOV TEMP[2].y, TEMP[3].yyyy 10: MAD TEMP[5].z, IN[1].wwww, CONST[6].xxxx, CONST[7].xxxx 11: MOV TEMP[2].z, TEMP[5].zzzz 12: ADD TEMP[6].xyz, -TEMP[2], CONST[12] 13: MOV TEMP[6].xyz, TEMP[6].xyzx 14: MUL TEMP[1], TEMP[3].yyyy, CONST[1] 15: MAD TEMP[1], TEMP[4].xxxx, CONST[0], TEMP[1] 16: MAD TEMP[1], TEMP[5].zzzz, CONST[2], TEMP[1] 17: ADD TEMP[1], TEMP[1], CONST[3] 18: RCP TEMP[7].x, TEMP[1].wwww 19: MOV TEMP[2].w, TEMP[7].xxxx 20: MUL TEMP[7].xy, TEMP[1], TEMP[7].xxxx 21: MOV TEMP[7].xy, TEMP[7].xyxx 22: MOV TEMP[8], TEMP[1] 23: ADD TEMP[9].xy, TEMP[2], CONST[13].zwzw 24: MOV TEMP[1].xy, TEMP[9].xyxx 25: MUL TEMP[9].xy, TEMP[1], CONST[13] 26: MOV TEMP[9].xy, TEMP[9].xyxx 27: MUL TEMP[3].xy, TEMP[3].yyyy, CONST[9] 28: MOV TEMP[1].xy, TEMP[3].xyxx 29: MOV TEMP[2].xyz, TEMP[2].xyzx 30: MAD TEMP[3].xy, TEMP[4].xxxx, CONST[8], TEMP[1] 31: MOV TEMP[1].xy, TEMP[3].xyxx 32: MAD TEMP[3].xy, TEMP[5].zzzz, CONST[10], TEMP[1] 33: MOV TEMP[1].xy, TEMP[3].xyxx 34: ADD TEMP[3].xy, TEMP[1], CONST[11] 35: MOV TEMP[1].xy, TEMP[3].xyxx 36: MAD TEMP[1].xy, TEMP[1], IMM[0].zwyw, IMM[0].wwww 37: MOV TEMP[1].xy, TEMP[1].xyxx 38: MOV TEMP[0].w, IMM[1].xxxx 39: MOV TEMP[2].w, IMM[1].xxxx 40: MOV TEMP[6].w, IMM[1].xxxx 41: MOV TEMP[7].zw, IMM[1].xxyx 42: MOV TEMP[1].zw, IMM[1].xxyx 43: MOV TEMP[9].zw, IMM[1].xxyx 44: MOV OUT[6], TEMP[9] 45: MOV OUT[1], TEMP[0] 46: MOV OUT[2], TEMP[2] 47: MOV OUT[0], TEMP[8] 48: MOV OUT[3], TEMP[6] 49: MOV OUT[4], TEMP[7] 50: MOV OUT[5], TEMP[1] 51: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %50 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %51 = load <16 x i8> addrspace(2)* %50, !tbaa !0 %52 = add i32 %5, %7 %53 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %51, i32 0, i32 %52) %54 = extractelement <4 x float> %53, i32 0 %55 = extractelement <4 x float> %53, i32 1 %56 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %57 = load <16 x i8> addrspace(2)* %56, !tbaa !0 %58 = add i32 %5, %7 %59 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %57, i32 0, i32 %58) %60 = extractelement <4 x float> %59, i32 0 %61 = extractelement <4 x float> %59, i32 1 %62 = extractelement <4 x float> %59, i32 2 %63 = extractelement <4 x float> %59, i32 3 %64 = fmul float %60, 2.000000e+00 %65 = fadd float %64, -1.000000e+00 %66 = fmul float %61, 2.000000e+00 %67 = fadd float %66, -1.000000e+00 %68 = fmul float %62, 2.000000e+00 %69 = fadd float %68, -1.000000e+00 %70 = fmul float %54, 5.000000e-01 %71 = fadd float %70, 5.000000e-01 %72 = fsub float -0.000000e+00, %29 %73 = fadd float %72, %31 %74 = fsub float -0.000000e+00, %30 %75 = fadd float %74, %32 %76 = fmul float %71, %73 %77 = fadd float %76, %29 %78 = fmul float %55, 5.000000e-01 %79 = fadd float %78, 5.000000e-01 %80 = fmul float %79, %75 %81 = fadd float %80, %30 %82 = fmul float %63, %33 %83 = fadd float %82, %34 %84 = fsub float -0.000000e+00, %77 %85 = fadd float %84, %43 %86 = fsub float -0.000000e+00, %81 %87 = fadd float %86, %44 %88 = fsub float -0.000000e+00, %83 %89 = fadd float %88, %45 %90 = fmul float %81, %17 %91 = fmul float %81, %18 %92 = fmul float %81, %19 %93 = fmul float %81, %20 %94 = fmul float %77, %13 %95 = fadd float %94, %90 %96 = fmul float %77, %14 %97 = fadd float %96, %91 %98 = fmul float %77, %15 %99 = fadd float %98, %92 %100 = fmul float %77, %16 %101 = fadd float %100, %93 %102 = fmul float %83, %21 %103 = fadd float %102, %95 %104 = fmul float %83, %22 %105 = fadd float %104, %97 %106 = fmul float %83, %23 %107 = fadd float %106, %99 %108 = fmul float %83, %24 %109 = fadd float %108, %101 %110 = fadd float %103, %25 %111 = fadd float %105, %26 %112 = fadd float %107, %27 %113 = fadd float %109, %28 %114 = fdiv float 1.000000e+00, %113 %115 = fmul float %110, %114 %116 = fmul float %111, %114 %117 = fadd float %77, %48 %118 = fadd float %81, %49 %119 = fmul float %117, %46 %120 = fmul float %118, %47 %121 = fmul float %81, %37 %122 = fmul float %81, %38 %123 = fmul float %77, %35 %124 = fadd float %123, %121 %125 = fmul float %77, %36 %126 = fadd float %125, %122 %127 = fmul float %83, %39 %128 = fadd float %127, %124 %129 = fmul float %83, %40 %130 = fadd float %129, %126 %131 = fadd float %128, %41 %132 = fadd float %130, %42 %133 = fmul float %131, 5.000000e-01 %134 = fadd float %133, -5.000000e-01 %135 = fmul float %132, -5.000000e-01 %136 = fadd float %135, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %65, float %67, float %69, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %77, float %81, float %83, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %85, float %87, float %89, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %115, float %116, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %134, float %136, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 37, i32 0, float %119, float %120, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %110, float %111, float %112, float %113) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, 2.0, v3, -1.0 ; D2820005 03CE06F4 v_mad_f32 v6, 2.0, v2, -1.0 ; D2820006 03CE04F4 v_mad_f32 v7, 2.0, v1, -1.0 ; D2820007 03CE02F4 v_mov_b32_e32 v8, 1.0 ; 7E1002F2 exp 15, 32, 0, 0, 0, v7, v6, v5, v8 ; F800020F 08050607 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_buffer_load_dword s9, s[0:3], 0x15 ; C2048115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s8 ; 7E0A0208 v_sub_f32_e32 v5, s9, v5 ; 080A0A09 buffer_load_format_xyzw v[9:12], v0, s[4:7], 0 idxen ; E00C2000 80010900 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, 0.5, v10, 0.5 ; D2820000 03C214F0 v_mad_f32 v0, v0, v5, s8 ; D2820000 00220B00 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_sub_f32_e32 v5, s5, v5 ; 080A0A05 v_mad_f32 v6, 0.5, v9, 0.5 ; D2820006 03C212F0 v_mad_f32 v5, v6, v5, s4 ; D2820005 00120B06 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_buffer_load_dword s5, s[0:3], 0x18 ; C2028118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v6, s4 ; 7E0C0204 v_mad_f32 v1, s5, v4, v6 ; D2820001 041A0805 exp 15, 33, 0, 0, 0, v5, v0, v1, v8 ; F800021F 08010005 s_buffer_load_dword s4, s[0:3], 0x31 ; C2020131 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_sub_f32_e32 v2, s4, v0 ; 08040004 s_buffer_load_dword s4, s[0:3], 0x30 ; C2020130 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v5 ; 08060A04 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v4, s4, v1 ; 08080204 exp 15, 34, 0, 0, 0, v3, v2, v4, v8 ; F800022F 08040203 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v0 ; 10040004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s4, v2 ; D2820002 04080905 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v1, s4, v2 ; D2820002 04080901 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v0 ; 10060004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v5, s4, v3 ; D2820003 040C0905 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v1, s4, v3 ; D2820003 040C0901 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_rcp_f32_e32 v4, v3 ; 7E085503 v_mul_f32_e32 v6, v4, v2 ; 100C0504 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v0 ; 100E0004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s4, v7 ; D2820007 041C0905 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v1, s4, v7 ; D2820007 041C0901 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 35, 0, 0, 0, v4, v6, v9, v8 ; F800023F 08090604 s_buffer_load_dword s4, s[0:3], 0x24 ; C2020124 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v4, s4, v0 ; 10080004 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v5, s4, v4 ; D2820004 04100905 s_buffer_load_dword s4, s[0:3], 0x28 ; C2020128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v1, s4, v4 ; D2820004 04100901 s_buffer_load_dword s4, s[0:3], 0x2c ; C202012C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 v_mad_f32 v4, 0.5, v4, -0.5 ; D2820004 03C608F0 s_buffer_load_dword s4, s[0:3], 0x25 ; C2020125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v0 ; 100C0004 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v6 ; D2820006 04180905 s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s4, v6 ; D2820006 04180901 s_buffer_load_dword s4, s[0:3], 0x2d ; C202012D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v6 ; 060C0C04 v_mad_f32 v6, -0.5, v6, -0.5 ; D2820006 03C60CF1 exp 15, 36, 0, 0, 0, v4, v6, v9, v8 ; F800024F 08090604 s_buffer_load_dword s4, s[0:3], 0x37 ; C2020137 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v4, s4, v0 ; 06080004 s_buffer_load_dword s4, s[0:3], 0x35 ; C2020135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v4 ; 10080804 s_buffer_load_dword s4, s[0:3], 0x36 ; C2020136 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v5 ; 060C0A04 s_buffer_load_dword s4, s[0:3], 0x34 ; C2020134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v6 ; 100C0C04 exp 15, 37, 0, 0, 0, v6, v4, v9, v8 ; F800025F 08090406 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s4, v0 ; D2820000 04000905 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v1, s4, v0 ; D2820000 04000901 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v7, v2, v0, v3 ; F80008CF 03000207 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL IN[5], GENERIC[14], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..16] DCL TEMP[0..28], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0000} IMM[1] FLT32 { 2.2000, 1.0000, -1.0000, 2.0000} IMM[2] FLT32 { 0.5000, -16.0000, -1.4427, 0.0000} IMM[3] FLT32 { 0.3000, 0.5900, 0.1100, 1.4427} IMM[4] FLT32 { 0.4545, 0.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[4], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[7], 2D 3: ABS TEMP[1].x, TEMP[0] 4: MOV TEMP[2], -TEMP[1].xxxx 5: FSGE TEMP[3].x, TEMP[2].xxxx, IMM[0].zzzz 6: UIF TEMP[3].xxxx :0 7: MOV TEMP[3].x, IMM[0].yyyy 8: ELSE :0 9: MOV TEMP[3].x, IMM[0].wwww 10: ENDIF 11: MOV TEMP[3].x, TEMP[3].xxxx 12: FSGE TEMP[4].x, TEMP[2].yyyy, IMM[0].zzzz 13: UIF TEMP[4].xxxx :0 14: MOV TEMP[4].x, IMM[0].yyyy 15: ELSE :0 16: MOV TEMP[4].x, IMM[0].wwww 17: ENDIF 18: MOV TEMP[3].y, TEMP[4].xxxx 19: FSGE TEMP[4].x, TEMP[2].zzzz, IMM[0].zzzz 20: UIF TEMP[4].xxxx :0 21: MOV TEMP[4].x, IMM[0].yyyy 22: ELSE :0 23: MOV TEMP[4].x, IMM[0].wwww 24: ENDIF 25: MOV TEMP[3].z, TEMP[4].xxxx 26: FSGE TEMP[2].x, TEMP[2].wwww, IMM[0].zzzz 27: UIF TEMP[2].xxxx :0 28: MOV TEMP[2].x, IMM[0].yyyy 29: ELSE :0 30: MOV TEMP[2].x, IMM[0].wwww 31: ENDIF 32: MOV TEMP[3].w, TEMP[2].xxxx 33: MOV TEMP[2].w, TEMP[3] 34: FSLT TEMP[3].xyz, TEMP[3].xyzz, IMM[0].zzzz 35: OR TEMP[4].x, TEMP[3].xxxx, TEMP[3].zzzz 36: OR TEMP[3].x, TEMP[4].xxxx, TEMP[3].yyyy 37: AND TEMP[3].x, TEMP[3].xxxx, IMM[0].xxxx 38: KILL_IF -TEMP[3].xxxx 39: MOV TEMP[3].w, IMM[0].zzzz 40: MOV TEMP[3].x, IN[0].xxxx 41: MOV TEMP[3].y, IN[0].yyyy 42: MOV TEMP[3].z, IN[0].zzzz 43: DP4 TEMP[4].x, TEMP[3], TEMP[3] 44: RSQ TEMP[4].x, TEMP[4].xxxx 45: MUL TEMP[3].xyz, TEMP[3], TEMP[4].xxxx 46: MOV TEMP[2].xyz, TEMP[3].xyzx 47: MOV TEMP[4].w, IMM[0].zzzz 48: MOV TEMP[4].x, IN[2].xxxx 49: MOV TEMP[4].y, IN[2].yyyy 50: MOV TEMP[4].z, IN[2].zzzz 51: DP4 TEMP[5].x, TEMP[4], TEMP[4] 52: RSQ TEMP[5].x, TEMP[5].xxxx 53: MUL TEMP[4].xyz, TEMP[4], TEMP[5].xxxx 54: ABS TEMP[5], TEMP[2] 55: ABS TEMP[6], TEMP[2] 56: MUL TEMP[5].xyz, TEMP[5], TEMP[6] 57: MUL TEMP[6].xyw, TEMP[5].xyzz, TEMP[5].xyzz 58: ADD TEMP[7].y, TEMP[6].yyyy, TEMP[6].xxxx 59: MAD TEMP[7].y, TEMP[5].zzzz, TEMP[5].zzzz, TEMP[7].yyyy 60: RCP TEMP[7].x, TEMP[7].yyyy 61: MUL TEMP[6].xyz, TEMP[7].xxxx, TEMP[6].xyww 62: MUL TEMP[7].yw, CONST[9].xxzy, IN[1].xyzz 63: MOV TEMP[7].xy, TEMP[7].ywww 64: TEX TEMP[7], TEMP[7], SAMP[3], 2D 65: POW TEMP[8].x, TEMP[7].xxxx, IMM[1].xxxx 66: POW TEMP[8].y, TEMP[7].yyyy, IMM[1].xxxx 67: POW TEMP[8].z, TEMP[7].zzzz, IMM[1].xxxx 68: POW TEMP[8].w, TEMP[7].wwww, IMM[1].yyyy 69: MOV TEMP[7].w, TEMP[8].wwww 70: MUL TEMP[9].yw, CONST[9].xxzy, IN[1].xxzz 71: MOV TEMP[9].xy, TEMP[9].ywww 72: TEX TEMP[9], TEMP[9], SAMP[3], 2D 73: POW TEMP[10].x, TEMP[9].xxxx, IMM[1].xxxx 74: POW TEMP[10].y, TEMP[9].yyyy, IMM[1].xxxx 75: POW TEMP[10].z, TEMP[9].zzzz, IMM[1].xxxx 76: POW TEMP[10].w, TEMP[9].wwww, IMM[1].yyyy 77: MOV TEMP[9].w, TEMP[10].wwww 78: MUL TEMP[10].xyz, TEMP[6].yyyy, TEMP[10] 79: MOV TEMP[9].xyz, TEMP[10].xyzx 80: MAD TEMP[8].xyz, TEMP[8], TEMP[6].xxxx, TEMP[9] 81: MOV TEMP[7].xyz, TEMP[8].xyzx 82: MUL TEMP[8].yw, CONST[4].xxxx, IN[1].xxzy 83: MOV TEMP[8].xy, TEMP[8].ywww 84: TEX TEMP[8], TEMP[8], SAMP[1], 2D 85: POW TEMP[10].x, TEMP[8].xxxx, IMM[1].xxxx 86: POW TEMP[10].y, TEMP[8].yyyy, IMM[1].xxxx 87: POW TEMP[10].z, TEMP[8].zzzz, IMM[1].xxxx 88: POW TEMP[10].w, TEMP[8].wwww, IMM[1].yyyy 89: MAD TEMP[8].xyz, TEMP[10], TEMP[6].zzzz, TEMP[7] 90: MOV TEMP[7].xyz, TEMP[8].xyzx 91: MUL TEMP[10].yw, CONST[10].xxzy, IN[1].xyzz 92: MOV TEMP[2].w, -TEMP[3].zzzz 93: DP3 TEMP[11].x, TEMP[2].wyxx, TEMP[4].xyzz 94: MOV TEMP[9].y, TEMP[11].xxxx 95: MUL TEMP[11].xyz, TEMP[2].xzyw, IMM[1].yyzw 96: DP3 TEMP[12].x, TEMP[11].zxyy, TEMP[4].xyzz 97: MOV TEMP[9].w, TEMP[12].xxxx 98: DP3 TEMP[12].x, TEMP[3].xyzz, TEMP[4].xyzz 99: MOV TEMP[9].z, TEMP[12].xxxx 100: MUL TEMP[12].xyz, TEMP[2].zyxw, IMM[1].zyyw 101: DP3 TEMP[13].x, TEMP[12].xyzz, CONST[1].xyzz 102: MOV TEMP[13].z, TEMP[13].xxxx 103: DP3 TEMP[14].x, TEMP[11].zxyy, CONST[1].xyzz 104: MOV TEMP[13].w, TEMP[14].xxxx 105: DP3 TEMP[13].x, TEMP[3].xyzz, CONST[1].xyzz 106: DP3 TEMP[14].x, TEMP[12].xyzz, CONST[2].xyzz 107: MOV TEMP[12].y, TEMP[14].xxxx 108: DP3 TEMP[14].x, TEMP[11].zxyy, CONST[2].xyzz 109: MOV TEMP[12].w, TEMP[14].xxxx 110: DP3 TEMP[14].x, TEMP[3].xyzz, CONST[2].xyzz 111: MOV TEMP[12].z, TEMP[14].xxxx 112: MOV TEMP[14].xy, TEMP[10].ywww 113: TEX TEMP[14].zw, TEMP[14], SAMP[5], 2D 114: MOV TEMP[15].zw, TEMP[14].wwzw 115: MOV TEMP[16].xy, TEMP[10].ywww 116: TEX TEMP[16], TEMP[16], SAMP[4], 2D 117: MOV TEMP[17].w, TEMP[16].wwww 118: MUL TEMP[18].y, CONST[3].xxxx, CONST[3].xxxx 119: MAD TEMP[19].xy, TEMP[16], IMM[1].wwww, IMM[0].yyyy 120: MOV TEMP[15].xy, TEMP[19].xyxx 121: MUL TEMP[19].xy, TEMP[15], CONST[3].xxxx 122: MOV TEMP[15].xy, TEMP[19].xyxx 123: MOV TEMP[15].z, IMM[0].xxxx 124: MOV TEMP[20].w, IMM[0].zzzz 125: MOV TEMP[20].x, TEMP[19].xxxx 126: MOV TEMP[20].y, TEMP[19].yyyy 127: MOV TEMP[20].z, IMM[0].xxxx 128: DP4 TEMP[19].x, TEMP[20], TEMP[20] 129: RSQ TEMP[19].x, TEMP[19].xxxx 130: MUL TEMP[19].xyz, TEMP[20], TEMP[19].xxxx 131: MAD TEMP[14].z, TEMP[14].wwww, IMM[1].wwww, IMM[0].yyyy 132: MOV TEMP[14].z, TEMP[14].zzzz 133: MOV TEMP[14].xy, TEMP[16].zwzz 134: MUL TEMP[16].xyz, TEMP[15].xyyw, TEMP[15].xyxw 135: MOV TEMP[17].xyz, TEMP[16].xyzx 136: MAD TEMP[16].xyz, TEMP[14], TEMP[18].yyyy, -TEMP[17] 137: DP3 TEMP[20].x, TEMP[13].zwxx, TEMP[19].xyzz 138: MOV_SAT TEMP[20].x, TEMP[20].xxxx 139: DP3 TEMP[21].x, TEMP[12].ywzz, TEMP[19].xyzz 140: MOV_SAT TEMP[21].x, TEMP[21].xxxx 141: MOV TEMP[2].w, TEMP[21].xxxx 142: ADD TEMP[22].xyz, TEMP[9].ywzw, TEMP[12].ywzw 143: MOV TEMP[14].xyz, TEMP[22].xyzx 144: RCP TEMP[22].x, TEMP[22].zzzz 145: MAD TEMP[22].xy, TEMP[14], TEMP[22].xxxx, -TEMP[15] 146: RCP TEMP[23].x, CONST[6].xxxx 147: ADD TEMP[24].zw, TEMP[23].xxxx, TEMP[16].xyxy 148: MUL TEMP[25].w, TEMP[16].zzzz, TEMP[16].zzzz 149: MAD TEMP[25].w, TEMP[24].zzzz, TEMP[24].wwww, -TEMP[25].wwww 150: MUL TEMP[26].w, TEMP[22].xxxx, TEMP[22].xxxx 151: MUL TEMP[27].w, TEMP[22].yyyy, TEMP[24].wwww 152: DP2 TEMP[16].x, TEMP[22].xxxx, -TEMP[16].zzzz 153: ADD TEMP[16].x, TEMP[16].xxxx, TEMP[27].wwww 154: MUL TEMP[16].w, TEMP[22].yyyy, TEMP[16].xxxx 155: MAD TEMP[16].w, TEMP[26].wwww, TEMP[24].wwww, TEMP[16].wwww 156: MUL TEMP[16].w, TEMP[16].wwww, IMM[2].xxxx 157: RCP TEMP[22].x, TEMP[25].wwww 158: MUL TEMP[24].z, TEMP[16].wwww, TEMP[22].xxxx 159: MOV TEMP[26].x, -TEMP[25].wwww 160: FSGE TEMP[26].x, TEMP[26].xxxx, IMM[0].zzzz 161: UIF TEMP[26].xxxx :0 162: MOV TEMP[26].x, IMM[0].xxxx 163: ELSE :0 164: MOV TEMP[26].x, IMM[0].zzzz 165: ENDIF 166: MAD TEMP[16].w, TEMP[16].wwww, TEMP[22].xxxx, IMM[2].yyyy 167: FSGE TEMP[16].x, TEMP[16].wwww, IMM[0].zzzz 168: UIF TEMP[16].xxxx :0 169: MOV TEMP[16].x, IMM[0].xxxx 170: ELSE :0 171: MOV TEMP[16].x, IMM[0].zzzz 172: ENDIF 173: ADD TEMP[16].w, TEMP[16].xxxx, TEMP[26].xxxx 174: MUL TEMP[22].w, TEMP[24].zzzz, IMM[2].zzzz 175: EX2 TEMP[22].x, TEMP[22].wwww 176: MAX TEMP[24].x, TEMP[25].wwww, IMM[2].wwww 177: RSQ TEMP[24].x, TEMP[24].xxxx 178: MUL TEMP[22].w, TEMP[24].xxxx, TEMP[22].xxxx 179: MOV TEMP[16].x, -TEMP[16].wwww 180: FSGE TEMP[16].x, TEMP[16].xxxx, IMM[0].zzzz 181: UIF TEMP[16].xxxx :0 182: MOV TEMP[16].x, TEMP[22].wwww 183: ELSE :0 184: MOV TEMP[16].x, IMM[0].zzzz 185: ENDIF 186: DP3 TEMP[22].x, TEMP[19].xzyy, TEMP[9].yzww 187: ADD TEMP[22].w, -TEMP[22].xxxx, IMM[0].xxxx 188: MUL TEMP[24].w, TEMP[22].wwww, TEMP[22].wwww 189: MUL TEMP[24].w, TEMP[24].wwww, TEMP[24].wwww 190: MUL TEMP[22].w, TEMP[22].wwww, TEMP[24].wwww 191: MOV TEMP[7].w, TEMP[22].wwww 192: MUL TEMP[24].xyz, TEMP[2].zyxw, IMM[1].zzyw 193: DP3 TEMP[15].x, TEMP[19].xyzz, TEMP[24].xyzz 194: DP3 TEMP[24].x, TEMP[19].yxzz, TEMP[3].xyyy 195: MOV TEMP[15].y, TEMP[24].xxxx 196: DP3 TEMP[19].x, TEMP[19].xyzz, TEMP[3].xzzz 197: MOV TEMP[15].z, TEMP[19].xxxx 198: MOV TEMP[19].xyz, TEMP[15].xyzz 199: TEX TEMP[19], TEMP[19], SAMP[0], CUBE 200: POW TEMP[24].x, TEMP[19].xxxx, IMM[1].xxxx 201: POW TEMP[24].y, TEMP[19].yyyy, IMM[1].xxxx 202: POW TEMP[24].z, TEMP[19].zzzz, IMM[1].xxxx 203: POW TEMP[24].w, TEMP[19].wwww, IMM[1].yyyy 204: MOV TEMP[15].w, TEMP[24].wwww 205: ADD TEMP[19].xyz, TEMP[20].xxxx, TEMP[24] 206: MOV TEMP[15].xyz, TEMP[19].xyzx 207: MUL TEMP[19].xyz, TEMP[7], TEMP[15] 208: MOV TEMP[15].xyz, TEMP[19].xyzx 209: MUL TEMP[16], TEMP[16].xxxx, TEMP[22].wwww 210: MOV_SAT TEMP[16], TEMP[16] 211: MAD TEMP[15].xyz, TEMP[16].wwww, TEMP[21].xxxx, TEMP[15] 212: MUL TEMP[16].xy, CONST[10], IN[1].xzzw 213: MUL TEMP[19].xyz, TEMP[2].zyxw, IMM[1].yyzw 214: DP3 TEMP[9].x, TEMP[19].zxyy, TEMP[4].xyzz 215: DP3 TEMP[20].x, TEMP[19].zxyy, CONST[1].xyzz 216: MOV TEMP[13].y, TEMP[20].xxxx 217: DP3 TEMP[12].x, TEMP[19].zxyy, CONST[2].xyzz 218: MOV TEMP[20].xy, TEMP[16].xyyy 219: TEX TEMP[20].w, TEMP[20], SAMP[5], 2D 220: MOV TEMP[14].w, TEMP[20].wwww 221: MOV TEMP[16].xy, TEMP[16].xyyy 222: TEX TEMP[16], TEMP[16], SAMP[4], 2D 223: MOV TEMP[17].zw, TEMP[16].wwzw 224: MAD TEMP[21].xy, TEMP[16], IMM[1].wwww, IMM[0].yyyy 225: MOV TEMP[17].xy, TEMP[21].xyxx 226: MUL TEMP[21].xy, TEMP[17], CONST[3].xxxx 227: MOV TEMP[14].xy, TEMP[21].xyxx 228: MOV TEMP[22].w, IMM[0].zzzz 229: MOV TEMP[22].x, TEMP[21].xxxx 230: MOV TEMP[22].y, TEMP[21].yyyy 231: MOV TEMP[22].z, IMM[0].xxxx 232: DP4 TEMP[21].x, TEMP[22], TEMP[22] 233: RSQ TEMP[21].x, TEMP[21].xxxx 234: MUL TEMP[21].xyz, TEMP[22], TEMP[21].xxxx 235: MAD TEMP[20].z, TEMP[20].wwww, IMM[1].wwww, IMM[0].yyyy 236: MOV TEMP[20].z, TEMP[20].zzzz 237: MOV TEMP[20].xy, TEMP[16].zwzz 238: MUL TEMP[16].xyz, TEMP[14].xyyw, TEMP[14].xyxw 239: MOV TEMP[17].xyz, TEMP[16].xyzx 240: MAD TEMP[16].xyz, TEMP[20], TEMP[18].yyyy, -TEMP[17] 241: MOV TEMP[17].xyz, TEMP[16].xyzx 242: DP3 TEMP[20].x, TEMP[13].ywxx, TEMP[21].xyzz 243: MOV_SAT TEMP[20].x, TEMP[20].xxxx 244: DP3 TEMP[22].x, TEMP[12].xwzz, TEMP[21].xyzz 245: MOV_SAT TEMP[22].x, TEMP[22].xxxx 246: ADD TEMP[24].yzw, TEMP[9].xxwz, TEMP[12].xxwz 247: MOV TEMP[13].yzw, TEMP[24].zyzw 248: RCP TEMP[24].x, TEMP[24].wwww 249: MAD TEMP[24].yz, TEMP[13], TEMP[24].xxxx, -TEMP[14].xxyw 250: ADD TEMP[25].xy, TEMP[23].xxxx, TEMP[17] 251: MUL TEMP[26].w, TEMP[16].zzzz, TEMP[16].zzzz 252: MAD TEMP[26].w, TEMP[25].xxxx, TEMP[25].yyyy, -TEMP[26].wwww 253: MUL TEMP[27].w, TEMP[24].yyyy, TEMP[24].yyyy 254: MUL TEMP[28].w, TEMP[24].zzzz, TEMP[25].yyyy 255: DP2 TEMP[16].x, TEMP[24].yyyy, -TEMP[16].zzzz 256: ADD TEMP[16].x, TEMP[16].xxxx, TEMP[28].wwww 257: MUL TEMP[16].w, TEMP[24].zzzz, TEMP[16].xxxx 258: MAD TEMP[16].w, TEMP[27].wwww, TEMP[25].yyyy, TEMP[16].wwww 259: MUL TEMP[16].w, TEMP[16].wwww, IMM[2].xxxx 260: RCP TEMP[24].x, TEMP[26].wwww 261: MUL TEMP[25].w, TEMP[16].wwww, TEMP[24].xxxx 262: MOV TEMP[12].w, TEMP[25].wwww 263: MOV TEMP[27].x, -TEMP[26].wwww 264: FSGE TEMP[27].x, TEMP[27].xxxx, IMM[0].zzzz 265: UIF TEMP[27].xxxx :0 266: MOV TEMP[27].x, IMM[0].xxxx 267: ELSE :0 268: MOV TEMP[27].x, IMM[0].zzzz 269: ENDIF 270: MAD TEMP[16].w, TEMP[16].wwww, TEMP[24].xxxx, IMM[2].yyyy 271: FSGE TEMP[16].x, TEMP[16].wwww, IMM[0].zzzz 272: UIF TEMP[16].xxxx :0 273: MOV TEMP[16].x, IMM[0].xxxx 274: ELSE :0 275: MOV TEMP[16].x, IMM[0].zzzz 276: ENDIF 277: ADD TEMP[16].w, TEMP[16].xxxx, TEMP[27].xxxx 278: MUL TEMP[24].w, TEMP[25].wwww, IMM[2].zzzz 279: EX2 TEMP[24].x, TEMP[24].wwww 280: MAX TEMP[25].x, TEMP[26].wwww, IMM[2].wwww 281: RSQ TEMP[25].x, TEMP[25].xxxx 282: MUL TEMP[24].w, TEMP[25].xxxx, TEMP[24].xxxx 283: MOV TEMP[16].x, -TEMP[16].wwww 284: FSGE TEMP[16].x, TEMP[16].xxxx, IMM[0].zzzz 285: UIF TEMP[16].xxxx :0 286: MOV TEMP[16].x, TEMP[24].wwww 287: ELSE :0 288: MOV TEMP[16].x, IMM[0].zzzz 289: ENDIF 290: DP3 TEMP[24].x, TEMP[21].xzyy, TEMP[9].xzww 291: ADD TEMP[24].w, -TEMP[24].xxxx, IMM[0].xxxx 292: MUL TEMP[25].w, TEMP[24].wwww, TEMP[24].wwww 293: MUL TEMP[25].w, TEMP[25].wwww, TEMP[25].wwww 294: MOV TEMP[9].w, TEMP[25].wwww 295: MUL TEMP[24].w, TEMP[24].wwww, TEMP[25].wwww 296: MUL TEMP[25].yzw, TEMP[3].xxyx, IMM[0].yyyx 297: DP3 TEMP[17].x, TEMP[21].xyzz, TEMP[25].yzww 298: DP3 TEMP[25].x, TEMP[21].yzxx, TEMP[3].xyzz 299: MOV TEMP[17].y, TEMP[25].xxxx 300: DP3 TEMP[21].x, TEMP[21].xyzz, TEMP[3].yzzz 301: MOV TEMP[17].z, TEMP[21].xxxx 302: MOV TEMP[21].xyz, TEMP[17].xyzz 303: TEX TEMP[21], TEMP[21], SAMP[0], CUBE 304: POW TEMP[25].x, TEMP[21].xxxx, IMM[1].xxxx 305: POW TEMP[25].y, TEMP[21].yyyy, IMM[1].xxxx 306: POW TEMP[25].z, TEMP[21].zzzz, IMM[1].xxxx 307: POW TEMP[25].w, TEMP[21].wwww, IMM[1].yyyy 308: MOV TEMP[17].w, TEMP[25].wwzw 309: ADD TEMP[20].yzw, TEMP[20].xxxx, TEMP[25].xxyz 310: MOV TEMP[13].yzw, TEMP[20].zyzw 311: MUL TEMP[20].yzw, TEMP[8].xxyz, TEMP[13] 312: MOV TEMP[13].yzw, TEMP[20].zyzw 313: MUL TEMP[16], TEMP[16].xxxx, TEMP[24].wwww 314: MOV_SAT TEMP[16], TEMP[16] 315: MAD TEMP[16].yzw, TEMP[16].wwww, TEMP[22].xxxx, TEMP[13] 316: MUL TEMP[20].xy, CONST[5].xxxx, IN[1] 317: DP3 TEMP[9].x, TEMP[19].xyzz, TEMP[4].xyzz 318: DP3 TEMP[21].x, TEMP[11].xyzz, TEMP[4].xyzz 319: MOV TEMP[9].y, TEMP[21].xxxx 320: DP3 TEMP[14].x, TEMP[19].xyzz, CONST[1].xyzz 321: DP3 TEMP[21].x, TEMP[11].xyzz, CONST[1].xyzz 322: MOV TEMP[14].y, TEMP[21].xxxx 323: DP3 TEMP[12].x, TEMP[19].xyzz, CONST[2].xyzz 324: DP3 TEMP[19].x, TEMP[11].xyzz, CONST[2].xyzz 325: MOV TEMP[12].y, TEMP[19].xxxx 326: MOV TEMP[19].xy, TEMP[20].xyyy 327: TEX TEMP[19], TEMP[19], SAMP[2], 2D 328: MOV TEMP[10].zw, TEMP[19].wwzw 329: MAD TEMP[20].xy, TEMP[19], IMM[1].wwww, IMM[0].yyyy 330: MOV TEMP[10].xy, TEMP[20].xyxx 331: MUL TEMP[20].xy, TEMP[10], CONST[3].xxxx 332: MOV TEMP[17].xy, TEMP[20].xyxx 333: MOV TEMP[17].z, IMM[0].xxxx 334: MOV TEMP[21].w, IMM[0].zzzz 335: MOV TEMP[21].x, TEMP[20].xxxx 336: MOV TEMP[21].y, TEMP[20].yyyy 337: MOV TEMP[21].z, IMM[0].xxxx 338: DP4 TEMP[20].x, TEMP[21], TEMP[21] 339: RSQ TEMP[20].x, TEMP[20].xxxx 340: MUL TEMP[20].xyz, TEMP[21], TEMP[20].xxxx 341: MAD TEMP[19].xyz, TEMP[19].zwww, IMM[1].yywz, IMM[0].zzyy 342: MOV TEMP[10].xyz, TEMP[19].xyzx 343: MUL TEMP[19].xyz, TEMP[17].xyyw, TEMP[17].xyxw 344: MOV TEMP[11].xyz, TEMP[19].xyzx 345: MAD TEMP[11].xyz, TEMP[10], TEMP[18].yyyy, -TEMP[11] 346: MOV TEMP[10].xyz, TEMP[11].xyzx 347: MOV TEMP[14].z, TEMP[13].xxxx 348: DP3 TEMP[14].x, TEMP[14].xyzz, TEMP[20].xyzz 349: MOV_SAT TEMP[14].x, TEMP[14].xxxx 350: DP3 TEMP[18].x, TEMP[12].xyzz, TEMP[20].xyzz 351: MOV_SAT TEMP[18].x, TEMP[18].xxxx 352: ADD TEMP[19].xyz, TEMP[9], TEMP[12] 353: MOV TEMP[12].xyz, TEMP[19].xyzx 354: RCP TEMP[19].x, TEMP[19].zzzz 355: MAD TEMP[12].xy, TEMP[12], TEMP[19].xxxx, -TEMP[17] 356: ADD TEMP[10].xy, TEMP[23].xxxx, TEMP[10] 357: MUL TEMP[17].w, TEMP[11].zzzz, TEMP[11].zzzz 358: MAD TEMP[17].w, TEMP[10].xxxx, TEMP[10].yyyy, -TEMP[17].wwww 359: MUL TEMP[19].w, TEMP[12].xxxx, TEMP[12].xxxx 360: MUL TEMP[21].w, TEMP[12].yyyy, TEMP[10].yyyy 361: DP2 TEMP[11].x, TEMP[12].xxxx, -TEMP[11].zzzz 362: ADD TEMP[11].x, TEMP[11].xxxx, TEMP[21].wwww 363: MUL TEMP[11].w, TEMP[12].yyyy, TEMP[11].xxxx 364: MAD TEMP[10].w, TEMP[19].wwww, TEMP[10].yyyy, TEMP[11].wwww 365: MUL TEMP[10].w, TEMP[10].wwww, IMM[2].xxxx 366: RCP TEMP[11].x, TEMP[17].wwww 367: MUL TEMP[12].w, TEMP[10].wwww, TEMP[11].xxxx 368: MOV TEMP[7].w, TEMP[12].wwww 369: MOV TEMP[19].x, -TEMP[17].wwww 370: FSGE TEMP[19].x, TEMP[19].xxxx, IMM[0].zzzz 371: UIF TEMP[19].xxxx :0 372: MOV TEMP[19].x, IMM[0].xxxx 373: ELSE :0 374: MOV TEMP[19].x, IMM[0].zzzz 375: ENDIF 376: MAD TEMP[10].w, TEMP[10].wwww, TEMP[11].xxxx, IMM[2].yyyy 377: FSGE TEMP[10].x, TEMP[10].wwww, IMM[0].zzzz 378: UIF TEMP[10].xxxx :0 379: MOV TEMP[10].x, IMM[0].xxxx 380: ELSE :0 381: MOV TEMP[10].x, IMM[0].zzzz 382: ENDIF 383: ADD TEMP[10].w, TEMP[10].xxxx, TEMP[19].xxxx 384: MUL TEMP[11].w, TEMP[12].wwww, IMM[2].zzzz 385: EX2 TEMP[11].x, TEMP[11].wwww 386: MAX TEMP[12].x, TEMP[17].wwww, IMM[2].wwww 387: RSQ TEMP[12].x, TEMP[12].xxxx 388: MUL TEMP[11].w, TEMP[12].xxxx, TEMP[11].xxxx 389: MOV TEMP[10].x, -TEMP[10].wwww 390: FSGE TEMP[10].x, TEMP[10].xxxx, IMM[0].zzzz 391: UIF TEMP[10].xxxx :0 392: MOV TEMP[10].x, TEMP[11].wwww 393: ELSE :0 394: MOV TEMP[10].x, IMM[0].zzzz 395: ENDIF 396: MOV TEMP[2].w, TEMP[10].xxxx 397: DP3 TEMP[11].x, TEMP[20].xyzz, TEMP[9].xyzz 398: ADD TEMP[11].w, -TEMP[11].xxxx, IMM[0].xxxx 399: MUL TEMP[12].w, TEMP[11].wwww, TEMP[11].wwww 400: MUL TEMP[12].w, TEMP[12].wwww, TEMP[12].wwww 401: MUL TEMP[11].w, TEMP[11].wwww, TEMP[12].wwww 402: DP3 TEMP[9].x, TEMP[20].yzxx, TEMP[3].xxzz 403: DP3 TEMP[12].x, TEMP[20].xzyy, TEMP[3].yyzz 404: MOV TEMP[9].y, TEMP[12].xxxx 405: MUL TEMP[12].xyz, TEMP[2], IMM[1].zzyw 406: DP3 TEMP[12].x, TEMP[20].xyzz, TEMP[12].xyzz 407: MOV TEMP[9].z, TEMP[12].xxxx 408: MOV TEMP[12].xyz, TEMP[9].xyzz 409: TEX TEMP[12], TEMP[12], SAMP[0], CUBE 410: POW TEMP[17].x, TEMP[12].xxxx, IMM[1].xxxx 411: POW TEMP[17].y, TEMP[12].yyyy, IMM[1].xxxx 412: POW TEMP[17].z, TEMP[12].zzzz, IMM[1].xxxx 413: POW TEMP[17].w, TEMP[12].wwww, IMM[1].yyyy 414: MOV TEMP[9].w, TEMP[17].wwww 415: ADD TEMP[12].xyz, TEMP[14].xxxx, TEMP[17] 416: MOV TEMP[9].xyz, TEMP[12].xyzx 417: MUL TEMP[12].xyz, TEMP[7], TEMP[9] 418: MOV TEMP[9].xyz, TEMP[12].xyzx 419: MUL TEMP[10], TEMP[10].xxxx, TEMP[11].wwww 420: MOV_SAT TEMP[10], TEMP[10] 421: MAD TEMP[10].xyz, TEMP[10].yyyy, TEMP[18].xxxx, TEMP[9] 422: MOV TEMP[9].xyz, TEMP[10].xyzx 423: MUL TEMP[10].xyz, TEMP[6].yyyy, TEMP[16].yzww 424: MAD TEMP[10].xyw, TEMP[15].xyzz, TEMP[6].xxxx, TEMP[10].xyzz 425: MOV TEMP[5].w, TEMP[10].xyxw 426: MAD TEMP[6].xyz, TEMP[9], TEMP[6].zzzz, TEMP[10].xyww 427: MOV TEMP[5].xyz, TEMP[6].xyzx 428: DP3 TEMP[6].x, TEMP[8].xyzz, IMM[3].xyzz 429: ADD TEMP[6].y, -TEMP[6].xxxx, IMM[0].xxxx 430: MUL TEMP[6].xyz, TEMP[6].yyyy, CONST[7] 431: MOV TEMP[7].xyz, TEMP[6].xyzx 432: MOV_SAT TEMP[6].x, TEMP[13].xxxx 433: MOV TEMP[8].xyz, TEMP[3].xyzz 434: TEX TEMP[8], TEMP[8], SAMP[0], CUBE 435: POW TEMP[9].x, TEMP[8].xxxx, IMM[1].xxxx 436: POW TEMP[9].y, TEMP[8].yyyy, IMM[1].xxxx 437: POW TEMP[9].z, TEMP[8].zzzz, IMM[1].xxxx 438: POW TEMP[9].w, TEMP[8].wwww, IMM[1].yyyy 439: MOV TEMP[2].w, TEMP[9].wwww 440: ADD TEMP[6].xyz, TEMP[9], TEMP[6].xxxx 441: MOV TEMP[2].xyz, TEMP[6].xyzx 442: MAD TEMP[6].xyz, TEMP[7], TEMP[2], TEMP[5] 443: MAD TEMP[8].x, IN[3].yyyy, IMM[2].xxxx, IMM[2].xxxx 444: MOV TEMP[5].x, TEMP[8].xxxx 445: MOV TEMP[5].y, CONST[16].wwww 446: MOV TEMP[8].xy, TEMP[5].xyyy 447: TEX TEMP[8].x, TEMP[8], SAMP[6], 2D 448: MOV TEMP[5].x, TEMP[8].xxxx 449: ADD TEMP[9].yzw, -CONST[0].xxyz, IN[1].xxyz 450: DP3 TEMP[10].x, TEMP[9].yzww, TEMP[9].yzww 451: MUL TEMP[10].y, TEMP[10].xxxx, CONST[16].yyyy 452: MUL TEMP[11].w, TEMP[9].wwww, CONST[16].xxxx 453: MUL TEMP[11].w, TEMP[11].wwww, IMM[3].wwww 454: EX2 TEMP[11].x, TEMP[11].wwww 455: ADD TEMP[11].w, -TEMP[11].xxxx, IMM[0].xxxx 456: MUL TEMP[10].y, TEMP[11].wwww, TEMP[10].yyyy 457: RCP TEMP[9].x, TEMP[9].wwww 458: MUL TEMP[9].y, TEMP[9].xxxx, TEMP[10].yyyy 459: MUL TEMP[9].y, TEMP[9].yyyy, IMM[3].wwww 460: EX2 TEMP[9].x, TEMP[9].yyyy 461: MOV_SAT TEMP[9].x, TEMP[9].xxxx 462: ADD TEMP[9].y, -TEMP[9].xxxx, IMM[0].xxxx 463: MUL TEMP[10].w, TEMP[9].yyyy, TEMP[8].xxxx 464: ADD TEMP[11].w, -CONST[8].xxxx, IN[1].zzzz 465: MOV TEMP[1].w, TEMP[11].wwww 466: FSGE TEMP[12].x, TEMP[11].wwww, IMM[0].zzzz 467: UIF TEMP[12].xxxx :0 468: MOV TEMP[12].x, IMM[0].xxxx 469: ELSE :0 470: MOV TEMP[12].x, IMM[0].zzzz 471: ENDIF 472: MOV TEMP[12].w, TEMP[12].xxxx 473: ADD TEMP[13].w, CONST[8].xxxx, -IN[1].zzzz 474: MOV TEMP[7].xyz, CONST[11].xyzx 475: ADD TEMP[14].xyz, -TEMP[7], CONST[12] 476: MUL TEMP[15], TEMP[13].wwww, CONST[13].xxxx 477: MOV_SAT TEMP[15], TEMP[15] 478: MAD TEMP[14].yzw, TEMP[15].yyyy, TEMP[14].xxyz, CONST[11].xxyz 479: MOV TEMP[5].yzw, TEMP[14].zyzw 480: MUL TEMP[14].yzw, TEMP[6].xxyz, TEMP[5] 481: RCP TEMP[7].x, -TEMP[4].zzzz 482: MUL TEMP[7].w, TEMP[13].wwww, TEMP[7].xxxx 483: MUL TEMP[7].xyw, TEMP[7].wwww, TEMP[4].xyzz 484: MOV TEMP[3].w, TEMP[7].xyxw 485: DP3 TEMP[3].x, TEMP[7].xyww, TEMP[7].xyww 486: MAX TEMP[7].x, TEMP[3].xxxx, IMM[2].wwww 487: RSQ TEMP[13].x, TEMP[7].xxxx 488: MUL TEMP[13].x, TEMP[13].xxxx, TEMP[7].xxxx 489: CMP TEMP[3].x, -TEMP[7].xxxx, TEMP[13].xxxx, IMM[0].zzzz 490: MUL TEMP[7].x, -TEMP[3].xxxx, CONST[14].xxxx 491: MUL TEMP[7].x, TEMP[7].xxxx, IMM[3].wwww 492: EX2 TEMP[7].x, TEMP[7].xxxx 493: MUL TEMP[13].y, TEMP[4].zzzz, TEMP[4].zzzz 494: MUL TEMP[13].y, TEMP[13].yyyy, TEMP[13].yyyy 495: MAD TEMP[4].y, TEMP[4].zzzz, -TEMP[13].yyyy, IMM[0].xxxx 496: ADD TEMP[4].y, -TEMP[4].yyyy, IMM[0].xxxx 497: MUL TEMP[4].x, TEMP[7].xxxx, TEMP[4].yyyy 498: MUL TEMP[4].xyz, TEMP[4].xxxx, TEMP[14].yzww 499: MOV TEMP[3].xyz, TEMP[4].xyzx 500: MAD TEMP[4].y, TEMP[8].xxxx, -TEMP[9].yyyy, IMM[0].xxxx 501: MUL TEMP[4].xyz, TEMP[4].yyyy, TEMP[3] 502: MOV TEMP[3].w, IMM[0].zzzz 503: FSGE TEMP[7].x, TEMP[11].wwww, IMM[0].zzzz 504: UIF TEMP[7].xxxx :0 505: MOV TEMP[7].x, TEMP[6].xxxx 506: ELSE :0 507: MOV TEMP[7].x, TEMP[4].xxxx 508: ENDIF 509: FSGE TEMP[8].x, TEMP[11].wwww, IMM[0].zzzz 510: UIF TEMP[8].xxxx :0 511: MOV TEMP[8].x, TEMP[6].yyyy 512: ELSE :0 513: MOV TEMP[8].x, TEMP[4].yyyy 514: ENDIF 515: FSGE TEMP[9].x, TEMP[11].wwww, IMM[0].zzzz 516: UIF TEMP[9].xxxx :0 517: MOV TEMP[6].x, TEMP[6].zzzz 518: ELSE :0 519: MOV TEMP[6].x, TEMP[4].zzzz 520: ENDIF 521: FSGE TEMP[4].x, TEMP[11].wwww, IMM[0].zzzz 522: UIF TEMP[4].xxxx :0 523: MOV TEMP[4].x, TEMP[10].wwww 524: ELSE :0 525: MOV TEMP[4].x, IMM[0].zzzz 526: ENDIF 527: MOV TEMP[2].w, TEMP[4].xxxx 528: ABS TEMP[7].x, TEMP[7].xxxx 529: LG2 TEMP[3].x, TEMP[7].xxxx 530: ABS TEMP[7].x, TEMP[8].xxxx 531: LG2 TEMP[7].x, TEMP[7].xxxx 532: MOV TEMP[3].y, TEMP[7].xxxx 533: ABS TEMP[6].x, TEMP[6].xxxx 534: LG2 TEMP[6].x, TEMP[6].xxxx 535: MOV TEMP[3].z, TEMP[6].xxxx 536: MUL TEMP[6].xyz, TEMP[3], IMM[4].xxxx 537: EX2 TEMP[3].x, TEMP[6].xxxx 538: EX2 TEMP[7].x, TEMP[6].yyyy 539: MOV TEMP[3].y, TEMP[7].xxxx 540: EX2 TEMP[6].x, TEMP[6].zzzz 541: MOV TEMP[3].z, TEMP[6].xxxx 542: MOV TEMP[6].xy, IN[5].xyyy 543: TEX TEMP[6], TEMP[6], SAMP[8], 2D 544: MOV TEMP[5].w, TEMP[6].wwww 545: LRP TEMP[3].xyz, TEMP[0].xxxx, TEMP[3], TEMP[6] 546: MOV TEMP[2].xyz, TEMP[3].xyzx 547: MOV TEMP[3].xyz, TEMP[3].xyzz 548: TEX TEMP[3], TEMP[3], SAMP[9], 3D 549: MAD TEMP[0].x, TEMP[0].zzzz, -TEMP[0].xxxx, TEMP[0].xxxx 550: LRP TEMP[0].xyz, TEMP[0].xxxx, TEMP[3], TEMP[2] 551: MOV TEMP[5].xyz, TEMP[0].xyzx 552: ADD TEMP[0].xyz, -TEMP[5], CONST[15] 553: MOV TEMP[1].xyz, TEMP[0].xyzx 554: MAD TEMP[0].xyz, TEMP[4].xxxx, TEMP[1], TEMP[5] 555: MOV TEMP[12].xyz, TEMP[0].xyzx 556: MOV OUT[0], TEMP[12] 557: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 24) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 116) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 120) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %23, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %23, i32 160) %44 = call float @llvm.SI.load.const(<16 x i8> %23, i32 164) %45 = call float @llvm.SI.load.const(<16 x i8> %23, i32 176) %46 = call float @llvm.SI.load.const(<16 x i8> %23, i32 180) %47 = call float @llvm.SI.load.const(<16 x i8> %23, i32 184) %48 = call float @llvm.SI.load.const(<16 x i8> %23, i32 192) %49 = call float @llvm.SI.load.const(<16 x i8> %23, i32 196) %50 = call float @llvm.SI.load.const(<16 x i8> %23, i32 200) %51 = call float @llvm.SI.load.const(<16 x i8> %23, i32 208) %52 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %53 = call float @llvm.SI.load.const(<16 x i8> %23, i32 240) %54 = call float @llvm.SI.load.const(<16 x i8> %23, i32 244) %55 = call float @llvm.SI.load.const(<16 x i8> %23, i32 248) %56 = call float @llvm.SI.load.const(<16 x i8> %23, i32 256) %57 = call float @llvm.SI.load.const(<16 x i8> %23, i32 260) %58 = call float @llvm.SI.load.const(<16 x i8> %23, i32 268) %59 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %60 = load <8 x i32> addrspace(2)* %59, !tbaa !0 %61 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %62 = load <4 x i32> addrspace(2)* %61, !tbaa !0 %63 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %64 = load <8 x i32> addrspace(2)* %63, !tbaa !0 %65 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %66 = load <4 x i32> addrspace(2)* %65, !tbaa !0 %67 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %68 = load <8 x i32> addrspace(2)* %67, !tbaa !0 %69 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %70 = load <4 x i32> addrspace(2)* %69, !tbaa !0 %71 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %72 = load <8 x i32> addrspace(2)* %71, !tbaa !0 %73 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %74 = load <4 x i32> addrspace(2)* %73, !tbaa !0 %75 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %76 = load <8 x i32> addrspace(2)* %75, !tbaa !0 %77 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %78 = load <4 x i32> addrspace(2)* %77, !tbaa !0 %79 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %80 = load <8 x i32> addrspace(2)* %79, !tbaa !0 %81 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %82 = load <4 x i32> addrspace(2)* %81, !tbaa !0 %83 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %84 = load <8 x i32> addrspace(2)* %83, !tbaa !0 %85 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %86 = load <4 x i32> addrspace(2)* %85, !tbaa !0 %87 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %88 = load <8 x i32> addrspace(2)* %87, !tbaa !0 %89 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %90 = load <4 x i32> addrspace(2)* %89, !tbaa !0 %91 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %92 = load <8 x i32> addrspace(2)* %91, !tbaa !0 %93 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %94 = load <4 x i32> addrspace(2)* %93, !tbaa !0 %95 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %96 = load <8 x i32> addrspace(2)* %95, !tbaa !0 %97 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %98 = load <4 x i32> addrspace(2)* %97, !tbaa !0 %99 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %100 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %101 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %102 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %103 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %104 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %105 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %106 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %107 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %108 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %109 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %110 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %111 = call float @llvm.SI.fs.interp(i32 0, i32 5, i32 %5, <2 x i32> %7) %112 = call float @llvm.SI.fs.interp(i32 1, i32 5, i32 %5, <2 x i32> %7) %113 = fmul float %109, 1.000000e+00 %114 = fadd float %113, 0.000000e+00 %115 = fmul float %110, -1.000000e+00 %116 = fadd float %115, 1.000000e+00 %117 = bitcast float %114 to i32 %118 = bitcast float %116 to i32 %119 = insertelement <2 x i32> undef, i32 %117, i32 0 %120 = insertelement <2 x i32> %119, i32 %118, i32 1 %121 = bitcast <8 x i32> %88 to <32 x i8> %122 = bitcast <4 x i32> %90 to <16 x i8> %123 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %120, <32 x i8> %121, <16 x i8> %122, i32 2) %124 = extractelement <4 x float> %123, i32 0 %125 = extractelement <4 x float> %123, i32 2 %126 = call float @fabs(float %124) %127 = fsub float -0.000000e+00, %126 %128 = fsub float -0.000000e+00, %126 %129 = fsub float -0.000000e+00, %126 %130 = fsub float -0.000000e+00, %126 %131 = fcmp oge float %127, 0.000000e+00 %132 = sext i1 %131 to i32 %133 = bitcast i32 %132 to float %134 = bitcast float %133 to i32 %135 = icmp ne i32 %134, 0 %. = select i1 %135, float -1.000000e+00, float -0.000000e+00 %136 = fcmp oge float %128, 0.000000e+00 %137 = sext i1 %136 to i32 %138 = bitcast i32 %137 to float %139 = bitcast float %138 to i32 %140 = icmp ne i32 %139, 0 %temp16.0 = select i1 %140, float -1.000000e+00, float -0.000000e+00 %141 = fcmp oge float %129, 0.000000e+00 %142 = sext i1 %141 to i32 %143 = bitcast i32 %142 to float %144 = bitcast float %143 to i32 %145 = icmp ne i32 %144, 0 %.167 = select i1 %145, float -1.000000e+00, float -0.000000e+00 %146 = fcmp oge float %130, 0.000000e+00 %147 = sext i1 %146 to i32 %148 = bitcast i32 %147 to float %149 = bitcast float %148 to i32 %150 = icmp ne i32 %149, 0 %temp8.0 = select i1 %150, float -1.000000e+00, float -0.000000e+00 %151 = fcmp olt float %., 0.000000e+00 %152 = sext i1 %151 to i32 %153 = fcmp olt float %temp16.0, 0.000000e+00 %154 = sext i1 %153 to i32 %155 = fcmp olt float %.167, 0.000000e+00 %156 = sext i1 %155 to i32 %157 = bitcast i32 %152 to float %158 = bitcast i32 %154 to float %159 = bitcast i32 %156 to float %160 = bitcast float %157 to i32 %161 = bitcast float %159 to i32 %162 = or i32 %160, %161 %163 = bitcast i32 %162 to float %164 = bitcast float %163 to i32 %165 = bitcast float %158 to i32 %166 = or i32 %164, %165 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = and i32 %168, 1065353216 %170 = bitcast i32 %169 to float %171 = fsub float -0.000000e+00, %170 %172 = fsub float -0.000000e+00, %170 %173 = fsub float -0.000000e+00, %170 %174 = fsub float -0.000000e+00, %170 call void @llvm.AMDGPU.kill(float %171) call void @llvm.AMDGPU.kill(float %172) call void @llvm.AMDGPU.kill(float %173) call void @llvm.AMDGPU.kill(float %174) %175 = fmul float %99, %99 %176 = fmul float %100, %100 %177 = fadd float %175, %176 %178 = fmul float %101, %101 %179 = fadd float %177, %178 %180 = fmul float 0.000000e+00, 0.000000e+00 %181 = fadd float %179, %180 %182 = call float @llvm.AMDGPU.rsq.clamped.f32(float %181) %183 = fmul float %99, %182 %184 = fmul float %100, %182 %185 = fmul float %101, %182 %186 = fmul float %105, %105 %187 = fmul float %106, %106 %188 = fadd float %186, %187 %189 = fmul float %107, %107 %190 = fadd float %188, %189 %191 = fmul float 0.000000e+00, 0.000000e+00 %192 = fadd float %190, %191 %193 = call float @llvm.AMDGPU.rsq.clamped.f32(float %192) %194 = fmul float %105, %193 %195 = fmul float %106, %193 %196 = fmul float %107, %193 %197 = call float @fabs(float %183) %198 = call float @fabs(float %184) %199 = call float @fabs(float %185) %200 = call float @fabs(float %temp8.0) %201 = call float @fabs(float %183) %202 = call float @fabs(float %184) %203 = call float @fabs(float %185) %204 = call float @fabs(float %temp8.0) %205 = fmul float %197, %201 %206 = fmul float %198, %202 %207 = fmul float %199, %203 %208 = fmul float %205, %205 %209 = fmul float %206, %206 %210 = fmul float %207, %207 %211 = fadd float %209, %208 %212 = fmul float %207, %207 %213 = fadd float %212, %211 %214 = fdiv float 1.000000e+00, %213 %215 = fmul float %214, %208 %216 = fmul float %214, %209 %217 = fmul float %214, %210 %218 = fmul float %41, %103 %219 = fmul float %42, %104 %220 = bitcast float %218 to i32 %221 = bitcast float %219 to i32 %222 = insertelement <2 x i32> undef, i32 %220, i32 0 %223 = insertelement <2 x i32> %222, i32 %221, i32 1 %224 = bitcast <8 x i32> %72 to <32 x i8> %225 = bitcast <4 x i32> %74 to <16 x i8> %226 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %223, <32 x i8> %224, <16 x i8> %225, i32 2) %227 = extractelement <4 x float> %226, i32 0 %228 = extractelement <4 x float> %226, i32 1 %229 = extractelement <4 x float> %226, i32 2 %230 = call float @llvm.pow.f32(float %227, float 0x40019999A0000000) %231 = call float @llvm.pow.f32(float %228, float 0x40019999A0000000) %232 = call float @llvm.pow.f32(float %229, float 0x40019999A0000000) %233 = fmul float %41, %102 %234 = fmul float %42, %104 %235 = bitcast float %233 to i32 %236 = bitcast float %234 to i32 %237 = insertelement <2 x i32> undef, i32 %235, i32 0 %238 = insertelement <2 x i32> %237, i32 %236, i32 1 %239 = bitcast <8 x i32> %72 to <32 x i8> %240 = bitcast <4 x i32> %74 to <16 x i8> %241 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %238, <32 x i8> %239, <16 x i8> %240, i32 2) %242 = extractelement <4 x float> %241, i32 0 %243 = extractelement <4 x float> %241, i32 1 %244 = extractelement <4 x float> %241, i32 2 %245 = call float @llvm.pow.f32(float %242, float 0x40019999A0000000) %246 = call float @llvm.pow.f32(float %243, float 0x40019999A0000000) %247 = call float @llvm.pow.f32(float %244, float 0x40019999A0000000) %248 = fmul float %216, %245 %249 = fmul float %216, %246 %250 = fmul float %216, %247 %251 = fmul float %230, %215 %252 = fadd float %251, %248 %253 = fmul float %231, %215 %254 = fadd float %253, %249 %255 = fmul float %232, %215 %256 = fadd float %255, %250 %257 = fmul float %34, %102 %258 = fmul float %34, %103 %259 = bitcast float %257 to i32 %260 = bitcast float %258 to i32 %261 = insertelement <2 x i32> undef, i32 %259, i32 0 %262 = insertelement <2 x i32> %261, i32 %260, i32 1 %263 = bitcast <8 x i32> %64 to <32 x i8> %264 = bitcast <4 x i32> %66 to <16 x i8> %265 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %262, <32 x i8> %263, <16 x i8> %264, i32 2) %266 = extractelement <4 x float> %265, i32 0 %267 = extractelement <4 x float> %265, i32 1 %268 = extractelement <4 x float> %265, i32 2 %269 = extractelement <4 x float> %265, i32 3 %270 = call float @llvm.pow.f32(float %266, float 0x40019999A0000000) %271 = call float @llvm.pow.f32(float %267, float 0x40019999A0000000) %272 = call float @llvm.pow.f32(float %268, float 0x40019999A0000000) %273 = fmul float %270, %217 %274 = fadd float %273, %252 %275 = fmul float %271, %217 %276 = fadd float %275, %254 %277 = fmul float %272, %217 %278 = fadd float %277, %256 %279 = fmul float %43, %103 %280 = fmul float %44, %104 %281 = fsub float -0.000000e+00, %185 %282 = fmul float %281, %194 %283 = fmul float %184, %195 %284 = fadd float %283, %282 %285 = fmul float %183, %196 %286 = fadd float %284, %285 %287 = fmul float %183, 1.000000e+00 %288 = fmul float %185, 1.000000e+00 %289 = fmul float %184, -1.000000e+00 %290 = fmul float %289, %194 %291 = fmul float %287, %195 %292 = fadd float %291, %290 %293 = fmul float %288, %196 %294 = fadd float %292, %293 %295 = fmul float %183, %194 %296 = fmul float %184, %195 %297 = fadd float %296, %295 %298 = fmul float %185, %196 %299 = fadd float %297, %298 %300 = fmul float %185, -1.000000e+00 %301 = fmul float %184, 1.000000e+00 %302 = fmul float %183, 1.000000e+00 %303 = fmul float %300, %27 %304 = fmul float %301, %28 %305 = fadd float %304, %303 %306 = fmul float %302, %29 %307 = fadd float %305, %306 %308 = fmul float %289, %27 %309 = fmul float %287, %28 %310 = fadd float %309, %308 %311 = fmul float %288, %29 %312 = fadd float %310, %311 %313 = fmul float %183, %27 %314 = fmul float %184, %28 %315 = fadd float %314, %313 %316 = fmul float %185, %29 %317 = fadd float %315, %316 %318 = fmul float %300, %30 %319 = fmul float %301, %31 %320 = fadd float %319, %318 %321 = fmul float %302, %32 %322 = fadd float %320, %321 %323 = fmul float %289, %30 %324 = fmul float %287, %31 %325 = fadd float %324, %323 %326 = fmul float %288, %32 %327 = fadd float %325, %326 %328 = fmul float %183, %30 %329 = fmul float %184, %31 %330 = fadd float %329, %328 %331 = fmul float %185, %32 %332 = fadd float %330, %331 %333 = bitcast float %279 to i32 %334 = bitcast float %280 to i32 %335 = insertelement <2 x i32> undef, i32 %333, i32 0 %336 = insertelement <2 x i32> %335, i32 %334, i32 1 %337 = bitcast <8 x i32> %80 to <32 x i8> %338 = bitcast <4 x i32> %82 to <16 x i8> %339 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %336, <32 x i8> %337, <16 x i8> %338, i32 2) %340 = extractelement <4 x float> %339, i32 3 %341 = bitcast float %279 to i32 %342 = bitcast float %280 to i32 %343 = insertelement <2 x i32> undef, i32 %341, i32 0 %344 = insertelement <2 x i32> %343, i32 %342, i32 1 %345 = bitcast <8 x i32> %76 to <32 x i8> %346 = bitcast <4 x i32> %78 to <16 x i8> %347 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %344, <32 x i8> %345, <16 x i8> %346, i32 2) %348 = extractelement <4 x float> %347, i32 0 %349 = extractelement <4 x float> %347, i32 1 %350 = extractelement <4 x float> %347, i32 2 %351 = extractelement <4 x float> %347, i32 3 %352 = fmul float %33, %33 %353 = fmul float %348, 2.000000e+00 %354 = fadd float %353, -1.000000e+00 %355 = fmul float %349, 2.000000e+00 %356 = fadd float %355, -1.000000e+00 %357 = fmul float %354, %33 %358 = fmul float %356, %33 %359 = fmul float %357, %357 %360 = fmul float %358, %358 %361 = fadd float %359, %360 %362 = fmul float 1.000000e+00, 1.000000e+00 %363 = fadd float %361, %362 %364 = fmul float 0.000000e+00, 0.000000e+00 %365 = fadd float %363, %364 %366 = call float @llvm.AMDGPU.rsq.clamped.f32(float %365) %367 = fmul float %357, %366 %368 = fmul float %358, %366 %369 = fmul float 1.000000e+00, %366 %370 = fmul float %340, 2.000000e+00 %371 = fadd float %370, -1.000000e+00 %372 = fmul float %357, %357 %373 = fmul float %358, %358 %374 = fmul float %358, %357 %375 = fsub float -0.000000e+00, %372 %376 = fmul float %350, %352 %377 = fadd float %376, %375 %378 = fsub float -0.000000e+00, %373 %379 = fmul float %351, %352 %380 = fadd float %379, %378 %381 = fsub float -0.000000e+00, %374 %382 = fmul float %371, %352 %383 = fadd float %382, %381 %384 = fmul float %307, %367 %385 = fmul float %312, %368 %386 = fadd float %385, %384 %387 = fmul float %317, %369 %388 = fadd float %386, %387 %389 = call float @llvm.AMDIL.clamp.(float %388, float 0.000000e+00, float 1.000000e+00) %390 = fmul float %322, %367 %391 = fmul float %327, %368 %392 = fadd float %391, %390 %393 = fmul float %332, %369 %394 = fadd float %392, %393 %395 = call float @llvm.AMDIL.clamp.(float %394, float 0.000000e+00, float 1.000000e+00) %396 = fadd float %286, %322 %397 = fadd float %294, %327 %398 = fadd float %299, %332 %399 = fdiv float 1.000000e+00, %398 %400 = fsub float -0.000000e+00, %357 %401 = fmul float %396, %399 %402 = fadd float %401, %400 %403 = fsub float -0.000000e+00, %358 %404 = fmul float %397, %399 %405 = fadd float %404, %403 %406 = fdiv float 1.000000e+00, %36 %407 = fadd float %406, %377 %408 = fadd float %406, %380 %409 = fmul float %383, %383 %410 = fsub float -0.000000e+00, %409 %411 = fmul float %407, %408 %412 = fadd float %411, %410 %413 = fmul float %402, %402 %414 = fmul float %405, %408 %415 = fsub float -0.000000e+00, %383 %416 = fsub float -0.000000e+00, %383 %417 = fmul float %402, %415 %418 = fmul float %402, %416 %419 = fadd float %417, %418 %420 = fadd float %419, %414 %421 = fmul float %405, %420 %422 = fmul float %413, %408 %423 = fadd float %422, %421 %424 = fmul float %423, 5.000000e-01 %425 = fdiv float 1.000000e+00, %412 %426 = fmul float %424, %425 %427 = fsub float -0.000000e+00, %412 %428 = fcmp oge float %427, 0.000000e+00 %429 = sext i1 %428 to i32 %430 = bitcast i32 %429 to float %431 = bitcast float %430 to i32 %432 = icmp ne i32 %431, 0 %.168 = select i1 %432, float 1.000000e+00, float 0.000000e+00 %433 = fmul float %424, %425 %434 = fadd float %433, -1.600000e+01 %435 = fcmp oge float %434, 0.000000e+00 %436 = sext i1 %435 to i32 %437 = bitcast i32 %436 to float %438 = bitcast float %437 to i32 %439 = icmp ne i32 %438, 0 %temp64.0 = select i1 %439, float 1.000000e+00, float 0.000000e+00 %440 = fadd float %temp64.0, %.168 %441 = fmul float %426, 0xBFF7154CA0000000 %442 = call float @llvm.AMDIL.exp.(float %441) %443 = call float @llvm.maxnum.f32(float %412, float 0x3E7AD7F2A0000000) %444 = call float @llvm.AMDGPU.rsq.clamped.f32(float %443) %445 = fmul float %444, %442 %446 = fsub float -0.000000e+00, %440 %447 = fcmp oge float %446, 0.000000e+00 %448 = sext i1 %447 to i32 %449 = bitcast i32 %448 to float %450 = bitcast float %449 to i32 %451 = icmp ne i32 %450, 0 %.169 = select i1 %451, float %445, float 0.000000e+00 %452 = fmul float %367, %286 %453 = fmul float %369, %299 %454 = fadd float %453, %452 %455 = fmul float %368, %294 %456 = fadd float %454, %455 %457 = fsub float -0.000000e+00, %456 %458 = fadd float %457, 1.000000e+00 %459 = fmul float %458, %458 %460 = fmul float %459, %459 %461 = fmul float %458, %460 %462 = fmul float %185, -1.000000e+00 %463 = fmul float %184, -1.000000e+00 %464 = fmul float %183, 1.000000e+00 %465 = fmul float %367, %462 %466 = fmul float %368, %463 %467 = fadd float %466, %465 %468 = fmul float %369, %464 %469 = fadd float %467, %468 %470 = fmul float %368, %183 %471 = fmul float %367, %184 %472 = fadd float %471, %470 %473 = fmul float %369, %184 %474 = fadd float %472, %473 %475 = fmul float %367, %183 %476 = fmul float %368, %185 %477 = fadd float %476, %475 %478 = fmul float %369, %185 %479 = fadd float %477, %478 %480 = insertelement <4 x float> undef, float %469, i32 0 %481 = insertelement <4 x float> %480, float %474, i32 1 %482 = insertelement <4 x float> %481, float %479, i32 2 %483 = insertelement <4 x float> %482, float 0.000000e+00, i32 3 %484 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %483) %485 = extractelement <4 x float> %484, i32 0 %486 = extractelement <4 x float> %484, i32 1 %487 = extractelement <4 x float> %484, i32 2 %488 = extractelement <4 x float> %484, i32 3 %489 = call float @fabs(float %487) %490 = fdiv float 1.000000e+00, %489 %491 = fmul float %485, %490 %492 = fadd float %491, 1.500000e+00 %493 = fmul float %486, %490 %494 = fadd float %493, 1.500000e+00 %495 = bitcast float %494 to i32 %496 = bitcast float %492 to i32 %497 = bitcast float %488 to i32 %498 = insertelement <4 x i32> undef, i32 %495, i32 0 %499 = insertelement <4 x i32> %498, i32 %496, i32 1 %500 = insertelement <4 x i32> %499, i32 %497, i32 2 %501 = insertelement <4 x i32> %500, i32 undef, i32 3 %502 = bitcast <8 x i32> %60 to <32 x i8> %503 = bitcast <4 x i32> %62 to <16 x i8> %504 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %501, <32 x i8> %502, <16 x i8> %503, i32 4) %505 = extractelement <4 x float> %504, i32 0 %506 = extractelement <4 x float> %504, i32 1 %507 = extractelement <4 x float> %504, i32 2 %508 = call float @llvm.pow.f32(float %505, float 0x40019999A0000000) %509 = call float @llvm.pow.f32(float %506, float 0x40019999A0000000) %510 = call float @llvm.pow.f32(float %507, float 0x40019999A0000000) %511 = fadd float %389, %508 %512 = fadd float %389, %509 %513 = fadd float %389, %510 %514 = fmul float %274, %511 %515 = fmul float %276, %512 %516 = fmul float %278, %513 %517 = fmul float %.169, %461 %518 = fmul float %.169, %461 %519 = fmul float %.169, %461 %520 = fmul float %.169, %461 %521 = call float @llvm.AMDIL.clamp.(float %517, float 0.000000e+00, float 1.000000e+00) %522 = call float @llvm.AMDIL.clamp.(float %518, float 0.000000e+00, float 1.000000e+00) %523 = call float @llvm.AMDIL.clamp.(float %519, float 0.000000e+00, float 1.000000e+00) %524 = call float @llvm.AMDIL.clamp.(float %520, float 0.000000e+00, float 1.000000e+00) %525 = fmul float %524, %395 %526 = fadd float %525, %514 %527 = fmul float %524, %395 %528 = fadd float %527, %515 %529 = fmul float %524, %395 %530 = fadd float %529, %516 %531 = fmul float %43, %102 %532 = fmul float %44, %104 %533 = fmul float %185, 1.000000e+00 %534 = fmul float %184, 1.000000e+00 %535 = fmul float %183, -1.000000e+00 %536 = fmul float %535, %194 %537 = fmul float %533, %195 %538 = fadd float %537, %536 %539 = fmul float %534, %196 %540 = fadd float %538, %539 %541 = fmul float %535, %27 %542 = fmul float %533, %28 %543 = fadd float %542, %541 %544 = fmul float %534, %29 %545 = fadd float %543, %544 %546 = fmul float %535, %30 %547 = fmul float %533, %31 %548 = fadd float %547, %546 %549 = fmul float %534, %32 %550 = fadd float %548, %549 %551 = bitcast float %531 to i32 %552 = bitcast float %532 to i32 %553 = insertelement <2 x i32> undef, i32 %551, i32 0 %554 = insertelement <2 x i32> %553, i32 %552, i32 1 %555 = bitcast <8 x i32> %80 to <32 x i8> %556 = bitcast <4 x i32> %82 to <16 x i8> %557 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %554, <32 x i8> %555, <16 x i8> %556, i32 2) %558 = extractelement <4 x float> %557, i32 3 %559 = bitcast float %531 to i32 %560 = bitcast float %532 to i32 %561 = insertelement <2 x i32> undef, i32 %559, i32 0 %562 = insertelement <2 x i32> %561, i32 %560, i32 1 %563 = bitcast <8 x i32> %76 to <32 x i8> %564 = bitcast <4 x i32> %78 to <16 x i8> %565 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %562, <32 x i8> %563, <16 x i8> %564, i32 2) %566 = extractelement <4 x float> %565, i32 0 %567 = extractelement <4 x float> %565, i32 1 %568 = extractelement <4 x float> %565, i32 2 %569 = extractelement <4 x float> %565, i32 3 %570 = fmul float %566, 2.000000e+00 %571 = fadd float %570, -1.000000e+00 %572 = fmul float %567, 2.000000e+00 %573 = fadd float %572, -1.000000e+00 %574 = fmul float %571, %33 %575 = fmul float %573, %33 %576 = fmul float %574, %574 %577 = fmul float %575, %575 %578 = fadd float %576, %577 %579 = fmul float 1.000000e+00, 1.000000e+00 %580 = fadd float %578, %579 %581 = fmul float 0.000000e+00, 0.000000e+00 %582 = fadd float %580, %581 %583 = call float @llvm.AMDGPU.rsq.clamped.f32(float %582) %584 = fmul float %574, %583 %585 = fmul float %575, %583 %586 = fmul float 1.000000e+00, %583 %587 = fmul float %558, 2.000000e+00 %588 = fadd float %587, -1.000000e+00 %589 = fmul float %574, %574 %590 = fmul float %575, %575 %591 = fmul float %575, %574 %592 = fsub float -0.000000e+00, %589 %593 = fmul float %568, %352 %594 = fadd float %593, %592 %595 = fsub float -0.000000e+00, %590 %596 = fmul float %569, %352 %597 = fadd float %596, %595 %598 = fsub float -0.000000e+00, %591 %599 = fmul float %588, %352 %600 = fadd float %599, %598 %601 = fmul float %545, %584 %602 = fmul float %312, %585 %603 = fadd float %602, %601 %604 = fmul float %317, %586 %605 = fadd float %603, %604 %606 = call float @llvm.AMDIL.clamp.(float %605, float 0.000000e+00, float 1.000000e+00) %607 = fmul float %550, %584 %608 = fmul float %327, %585 %609 = fadd float %608, %607 %610 = fmul float %332, %586 %611 = fadd float %609, %610 %612 = call float @llvm.AMDIL.clamp.(float %611, float 0.000000e+00, float 1.000000e+00) %613 = fadd float %540, %550 %614 = fadd float %294, %327 %615 = fadd float %299, %332 %616 = fdiv float 1.000000e+00, %615 %617 = fsub float -0.000000e+00, %574 %618 = fmul float %613, %616 %619 = fadd float %618, %617 %620 = fsub float -0.000000e+00, %575 %621 = fmul float %614, %616 %622 = fadd float %621, %620 %623 = fadd float %406, %594 %624 = fadd float %406, %597 %625 = fmul float %600, %600 %626 = fsub float -0.000000e+00, %625 %627 = fmul float %623, %624 %628 = fadd float %627, %626 %629 = fmul float %619, %619 %630 = fmul float %622, %624 %631 = fsub float -0.000000e+00, %600 %632 = fsub float -0.000000e+00, %600 %633 = fmul float %619, %631 %634 = fmul float %619, %632 %635 = fadd float %633, %634 %636 = fadd float %635, %630 %637 = fmul float %622, %636 %638 = fmul float %629, %624 %639 = fadd float %638, %637 %640 = fmul float %639, 5.000000e-01 %641 = fdiv float 1.000000e+00, %628 %642 = fmul float %640, %641 %643 = fsub float -0.000000e+00, %628 %644 = fcmp oge float %643, 0.000000e+00 %645 = sext i1 %644 to i32 %646 = bitcast i32 %645 to float %647 = bitcast float %646 to i32 %648 = icmp ne i32 %647, 0 %temp108.0 = select i1 %648, float 1.000000e+00, float 0.000000e+00 %649 = fmul float %640, %641 %650 = fadd float %649, -1.600000e+01 %651 = fcmp oge float %650, 0.000000e+00 %652 = sext i1 %651 to i32 %653 = bitcast i32 %652 to float %654 = bitcast float %653 to i32 %655 = icmp ne i32 %654, 0 %.170 = select i1 %655, float 1.000000e+00, float 0.000000e+00 %656 = fadd float %.170, %temp108.0 %657 = fmul float %642, 0xBFF7154CA0000000 %658 = call float @llvm.AMDIL.exp.(float %657) %659 = call float @llvm.maxnum.f32(float %628, float 0x3E7AD7F2A0000000) %660 = call float @llvm.AMDGPU.rsq.clamped.f32(float %659) %661 = fmul float %660, %658 %662 = fsub float -0.000000e+00, %656 %663 = fcmp oge float %662, 0.000000e+00 %664 = sext i1 %663 to i32 %665 = bitcast i32 %664 to float %666 = bitcast float %665 to i32 %667 = icmp ne i32 %666, 0 %temp64.3 = select i1 %667, float %661, float 0.000000e+00 %668 = fmul float %584, %540 %669 = fmul float %586, %299 %670 = fadd float %669, %668 %671 = fmul float %585, %294 %672 = fadd float %670, %671 %673 = fsub float -0.000000e+00, %672 %674 = fadd float %673, 1.000000e+00 %675 = fmul float %674, %674 %676 = fmul float %675, %675 %677 = fmul float %674, %676 %678 = fmul float %183, -1.000000e+00 %679 = fmul float %184, -1.000000e+00 %680 = fmul float %183, 1.000000e+00 %681 = fmul float %584, %678 %682 = fmul float %585, %679 %683 = fadd float %682, %681 %684 = fmul float %586, %680 %685 = fadd float %683, %684 %686 = fmul float %585, %183 %687 = fmul float %586, %184 %688 = fadd float %687, %686 %689 = fmul float %584, %185 %690 = fadd float %688, %689 %691 = fmul float %584, %184 %692 = fmul float %585, %185 %693 = fadd float %692, %691 %694 = fmul float %586, %185 %695 = fadd float %693, %694 %696 = insertelement <4 x float> undef, float %685, i32 0 %697 = insertelement <4 x float> %696, float %690, i32 1 %698 = insertelement <4 x float> %697, float %695, i32 2 %699 = insertelement <4 x float> %698, float 0.000000e+00, i32 3 %700 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %699) %701 = extractelement <4 x float> %700, i32 0 %702 = extractelement <4 x float> %700, i32 1 %703 = extractelement <4 x float> %700, i32 2 %704 = extractelement <4 x float> %700, i32 3 %705 = call float @fabs(float %703) %706 = fdiv float 1.000000e+00, %705 %707 = fmul float %701, %706 %708 = fadd float %707, 1.500000e+00 %709 = fmul float %702, %706 %710 = fadd float %709, 1.500000e+00 %711 = bitcast float %710 to i32 %712 = bitcast float %708 to i32 %713 = bitcast float %704 to i32 %714 = insertelement <4 x i32> undef, i32 %711, i32 0 %715 = insertelement <4 x i32> %714, i32 %712, i32 1 %716 = insertelement <4 x i32> %715, i32 %713, i32 2 %717 = insertelement <4 x i32> %716, i32 undef, i32 3 %718 = bitcast <8 x i32> %60 to <32 x i8> %719 = bitcast <4 x i32> %62 to <16 x i8> %720 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %717, <32 x i8> %718, <16 x i8> %719, i32 4) %721 = extractelement <4 x float> %720, i32 0 %722 = extractelement <4 x float> %720, i32 1 %723 = extractelement <4 x float> %720, i32 2 %724 = call float @llvm.pow.f32(float %721, float 0x40019999A0000000) %725 = call float @llvm.pow.f32(float %722, float 0x40019999A0000000) %726 = call float @llvm.pow.f32(float %723, float 0x40019999A0000000) %727 = fadd float %606, %724 %728 = fadd float %606, %725 %729 = fadd float %606, %726 %730 = fmul float %274, %727 %731 = fmul float %276, %728 %732 = fmul float %278, %729 %733 = fmul float %temp64.3, %677 %734 = fmul float %temp64.3, %677 %735 = fmul float %temp64.3, %677 %736 = fmul float %temp64.3, %677 %737 = call float @llvm.AMDIL.clamp.(float %733, float 0.000000e+00, float 1.000000e+00) %738 = call float @llvm.AMDIL.clamp.(float %734, float 0.000000e+00, float 1.000000e+00) %739 = call float @llvm.AMDIL.clamp.(float %735, float 0.000000e+00, float 1.000000e+00) %740 = call float @llvm.AMDIL.clamp.(float %736, float 0.000000e+00, float 1.000000e+00) %741 = fmul float %740, %612 %742 = fadd float %741, %730 %743 = fmul float %740, %612 %744 = fadd float %743, %731 %745 = fmul float %740, %612 %746 = fadd float %745, %732 %747 = fmul float %35, %102 %748 = fmul float %35, %103 %749 = fmul float %533, %194 %750 = fmul float %534, %195 %751 = fadd float %750, %749 %752 = fmul float %535, %196 %753 = fadd float %751, %752 %754 = fmul float %287, %194 %755 = fmul float %288, %195 %756 = fadd float %755, %754 %757 = fmul float %289, %196 %758 = fadd float %756, %757 %759 = fmul float %533, %27 %760 = fmul float %534, %28 %761 = fadd float %760, %759 %762 = fmul float %535, %29 %763 = fadd float %761, %762 %764 = fmul float %287, %27 %765 = fmul float %288, %28 %766 = fadd float %765, %764 %767 = fmul float %289, %29 %768 = fadd float %766, %767 %769 = fmul float %533, %30 %770 = fmul float %534, %31 %771 = fadd float %770, %769 %772 = fmul float %535, %32 %773 = fadd float %771, %772 %774 = fmul float %287, %30 %775 = fmul float %288, %31 %776 = fadd float %775, %774 %777 = fmul float %289, %32 %778 = fadd float %776, %777 %779 = bitcast float %747 to i32 %780 = bitcast float %748 to i32 %781 = insertelement <2 x i32> undef, i32 %779, i32 0 %782 = insertelement <2 x i32> %781, i32 %780, i32 1 %783 = bitcast <8 x i32> %68 to <32 x i8> %784 = bitcast <4 x i32> %70 to <16 x i8> %785 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %782, <32 x i8> %783, <16 x i8> %784, i32 2) %786 = extractelement <4 x float> %785, i32 0 %787 = extractelement <4 x float> %785, i32 1 %788 = extractelement <4 x float> %785, i32 2 %789 = extractelement <4 x float> %785, i32 3 %790 = fmul float %786, 2.000000e+00 %791 = fadd float %790, -1.000000e+00 %792 = fmul float %787, 2.000000e+00 %793 = fadd float %792, -1.000000e+00 %794 = fmul float %791, %33 %795 = fmul float %793, %33 %796 = fmul float %794, %794 %797 = fmul float %795, %795 %798 = fadd float %796, %797 %799 = fmul float 1.000000e+00, 1.000000e+00 %800 = fadd float %798, %799 %801 = fmul float 0.000000e+00, 0.000000e+00 %802 = fadd float %800, %801 %803 = call float @llvm.AMDGPU.rsq.clamped.f32(float %802) %804 = fmul float %794, %803 %805 = fmul float %795, %803 %806 = fmul float 1.000000e+00, %803 %807 = fmul float %788, 1.000000e+00 %808 = fadd float %807, 0.000000e+00 %809 = fmul float %789, 1.000000e+00 %810 = fadd float %809, 0.000000e+00 %811 = fmul float %789, 2.000000e+00 %812 = fadd float %811, -1.000000e+00 %813 = fmul float %794, %794 %814 = fmul float %795, %795 %815 = fmul float %795, %794 %816 = fsub float -0.000000e+00, %813 %817 = fmul float %808, %352 %818 = fadd float %817, %816 %819 = fsub float -0.000000e+00, %814 %820 = fmul float %810, %352 %821 = fadd float %820, %819 %822 = fsub float -0.000000e+00, %815 %823 = fmul float %812, %352 %824 = fadd float %823, %822 %825 = fmul float %763, %804 %826 = fmul float %768, %805 %827 = fadd float %826, %825 %828 = fmul float %317, %806 %829 = fadd float %827, %828 %830 = call float @llvm.AMDIL.clamp.(float %829, float 0.000000e+00, float 1.000000e+00) %831 = fmul float %773, %804 %832 = fmul float %778, %805 %833 = fadd float %832, %831 %834 = fmul float %332, %806 %835 = fadd float %833, %834 %836 = call float @llvm.AMDIL.clamp.(float %835, float 0.000000e+00, float 1.000000e+00) %837 = fadd float %753, %773 %838 = fadd float %758, %778 %839 = fadd float %299, %332 %840 = fdiv float 1.000000e+00, %839 %841 = fsub float -0.000000e+00, %794 %842 = fmul float %837, %840 %843 = fadd float %842, %841 %844 = fsub float -0.000000e+00, %795 %845 = fmul float %838, %840 %846 = fadd float %845, %844 %847 = fadd float %406, %818 %848 = fadd float %406, %821 %849 = fmul float %824, %824 %850 = fsub float -0.000000e+00, %849 %851 = fmul float %847, %848 %852 = fadd float %851, %850 %853 = fmul float %843, %843 %854 = fmul float %846, %848 %855 = fsub float -0.000000e+00, %824 %856 = fsub float -0.000000e+00, %824 %857 = fmul float %843, %855 %858 = fmul float %843, %856 %859 = fadd float %857, %858 %860 = fadd float %859, %854 %861 = fmul float %846, %860 %862 = fmul float %853, %848 %863 = fadd float %862, %861 %864 = fmul float %863, 5.000000e-01 %865 = fdiv float 1.000000e+00, %852 %866 = fmul float %864, %865 %867 = fsub float -0.000000e+00, %852 %868 = fcmp oge float %867, 0.000000e+00 %869 = sext i1 %868 to i32 %870 = bitcast i32 %869 to float %871 = bitcast float %870 to i32 %872 = icmp ne i32 %871, 0 %.171 = select i1 %872, float 1.000000e+00, float 0.000000e+00 %873 = fmul float %864, %865 %874 = fadd float %873, -1.600000e+01 %875 = fcmp oge float %874, 0.000000e+00 %876 = sext i1 %875 to i32 %877 = bitcast i32 %876 to float %878 = bitcast float %877 to i32 %879 = icmp ne i32 %878, 0 %temp40.0 = select i1 %879, float 1.000000e+00, float 0.000000e+00 %880 = fadd float %temp40.0, %.171 %881 = fmul float %866, 0xBFF7154CA0000000 %882 = call float @llvm.AMDIL.exp.(float %881) %883 = call float @llvm.maxnum.f32(float %852, float 0x3E7AD7F2A0000000) %884 = call float @llvm.AMDGPU.rsq.clamped.f32(float %883) %885 = fmul float %884, %882 %886 = fsub float -0.000000e+00, %880 %887 = fcmp oge float %886, 0.000000e+00 %888 = sext i1 %887 to i32 %889 = bitcast i32 %888 to float %890 = bitcast float %889 to i32 %891 = icmp ne i32 %890, 0 %.172 = select i1 %891, float %885, float 0.000000e+00 %892 = fmul float %804, %753 %893 = fmul float %805, %758 %894 = fadd float %893, %892 %895 = fmul float %806, %299 %896 = fadd float %894, %895 %897 = fsub float -0.000000e+00, %896 %898 = fadd float %897, 1.000000e+00 %899 = fmul float %898, %898 %900 = fmul float %899, %899 %901 = fmul float %898, %900 %902 = fmul float %805, %183 %903 = fmul float %806, %183 %904 = fadd float %903, %902 %905 = fmul float %804, %185 %906 = fadd float %904, %905 %907 = fmul float %804, %184 %908 = fmul float %806, %184 %909 = fadd float %908, %907 %910 = fmul float %805, %185 %911 = fadd float %909, %910 %912 = fmul float %183, -1.000000e+00 %913 = fmul float %184, -1.000000e+00 %914 = fmul float %185, 1.000000e+00 %915 = fmul float %804, %912 %916 = fmul float %805, %913 %917 = fadd float %916, %915 %918 = fmul float %806, %914 %919 = fadd float %917, %918 %920 = insertelement <4 x float> undef, float %906, i32 0 %921 = insertelement <4 x float> %920, float %911, i32 1 %922 = insertelement <4 x float> %921, float %919, i32 2 %923 = insertelement <4 x float> %922, float %900, i32 3 %924 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %923) %925 = extractelement <4 x float> %924, i32 0 %926 = extractelement <4 x float> %924, i32 1 %927 = extractelement <4 x float> %924, i32 2 %928 = extractelement <4 x float> %924, i32 3 %929 = call float @fabs(float %927) %930 = fdiv float 1.000000e+00, %929 %931 = fmul float %925, %930 %932 = fadd float %931, 1.500000e+00 %933 = fmul float %926, %930 %934 = fadd float %933, 1.500000e+00 %935 = bitcast float %934 to i32 %936 = bitcast float %932 to i32 %937 = bitcast float %928 to i32 %938 = insertelement <4 x i32> undef, i32 %935, i32 0 %939 = insertelement <4 x i32> %938, i32 %936, i32 1 %940 = insertelement <4 x i32> %939, i32 %937, i32 2 %941 = insertelement <4 x i32> %940, i32 undef, i32 3 %942 = bitcast <8 x i32> %60 to <32 x i8> %943 = bitcast <4 x i32> %62 to <16 x i8> %944 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %941, <32 x i8> %942, <16 x i8> %943, i32 4) %945 = extractelement <4 x float> %944, i32 0 %946 = extractelement <4 x float> %944, i32 1 %947 = extractelement <4 x float> %944, i32 2 %948 = call float @llvm.pow.f32(float %945, float 0x40019999A0000000) %949 = call float @llvm.pow.f32(float %946, float 0x40019999A0000000) %950 = call float @llvm.pow.f32(float %947, float 0x40019999A0000000) %951 = fadd float %830, %948 %952 = fadd float %830, %949 %953 = fadd float %830, %950 %954 = fmul float %274, %951 %955 = fmul float %276, %952 %956 = fmul float %278, %953 %957 = fmul float %.172, %901 %958 = fmul float %.172, %901 %959 = fmul float %.172, %901 %960 = fmul float %.172, %901 %961 = call float @llvm.AMDIL.clamp.(float %957, float 0.000000e+00, float 1.000000e+00) %962 = call float @llvm.AMDIL.clamp.(float %958, float 0.000000e+00, float 1.000000e+00) %963 = call float @llvm.AMDIL.clamp.(float %959, float 0.000000e+00, float 1.000000e+00) %964 = call float @llvm.AMDIL.clamp.(float %960, float 0.000000e+00, float 1.000000e+00) %965 = fmul float %962, %836 %966 = fadd float %965, %954 %967 = fmul float %962, %836 %968 = fadd float %967, %955 %969 = fmul float %962, %836 %970 = fadd float %969, %956 %971 = fmul float %216, %742 %972 = fmul float %216, %744 %973 = fmul float %216, %746 %974 = fmul float %526, %215 %975 = fadd float %974, %971 %976 = fmul float %528, %215 %977 = fadd float %976, %972 %978 = fmul float %530, %215 %979 = fadd float %978, %973 %980 = fmul float %966, %217 %981 = fadd float %980, %975 %982 = fmul float %968, %217 %983 = fadd float %982, %977 %984 = fmul float %970, %217 %985 = fadd float %984, %979 %986 = fmul float %274, 0x3FD3333340000000 %987 = fmul float %276, 0x3FE2E147A0000000 %988 = fadd float %987, %986 %989 = fmul float %278, 0x3FBC28F5C0000000 %990 = fadd float %988, %989 %991 = fsub float -0.000000e+00, %990 %992 = fadd float %991, 1.000000e+00 %993 = fmul float %992, %37 %994 = fmul float %992, %38 %995 = fmul float %992, %39 %996 = call float @llvm.AMDIL.clamp.(float %317, float 0.000000e+00, float 1.000000e+00) %997 = insertelement <4 x float> undef, float %183, i32 0 %998 = insertelement <4 x float> %997, float %184, i32 1 %999 = insertelement <4 x float> %998, float %185, i32 2 %1000 = insertelement <4 x float> %999, float %269, i32 3 %1001 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %1000) %1002 = extractelement <4 x float> %1001, i32 0 %1003 = extractelement <4 x float> %1001, i32 1 %1004 = extractelement <4 x float> %1001, i32 2 %1005 = extractelement <4 x float> %1001, i32 3 %1006 = call float @fabs(float %1004) %1007 = fdiv float 1.000000e+00, %1006 %1008 = fmul float %1002, %1007 %1009 = fadd float %1008, 1.500000e+00 %1010 = fmul float %1003, %1007 %1011 = fadd float %1010, 1.500000e+00 %1012 = bitcast float %1011 to i32 %1013 = bitcast float %1009 to i32 %1014 = bitcast float %1005 to i32 %1015 = insertelement <4 x i32> undef, i32 %1012, i32 0 %1016 = insertelement <4 x i32> %1015, i32 %1013, i32 1 %1017 = insertelement <4 x i32> %1016, i32 %1014, i32 2 %1018 = insertelement <4 x i32> %1017, i32 undef, i32 3 %1019 = bitcast <8 x i32> %60 to <32 x i8> %1020 = bitcast <4 x i32> %62 to <16 x i8> %1021 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1018, <32 x i8> %1019, <16 x i8> %1020, i32 4) %1022 = extractelement <4 x float> %1021, i32 0 %1023 = extractelement <4 x float> %1021, i32 1 %1024 = extractelement <4 x float> %1021, i32 2 %1025 = call float @llvm.pow.f32(float %1022, float 0x40019999A0000000) %1026 = call float @llvm.pow.f32(float %1023, float 0x40019999A0000000) %1027 = call float @llvm.pow.f32(float %1024, float 0x40019999A0000000) %1028 = fadd float %1025, %996 %1029 = fadd float %1026, %996 %1030 = fadd float %1027, %996 %1031 = fmul float %993, %1028 %1032 = fadd float %1031, %981 %1033 = fmul float %994, %1029 %1034 = fadd float %1033, %983 %1035 = fmul float %995, %1030 %1036 = fadd float %1035, %985 %1037 = fmul float %108, 5.000000e-01 %1038 = fadd float %1037, 5.000000e-01 %1039 = bitcast float %1038 to i32 %1040 = bitcast float %58 to i32 %1041 = insertelement <2 x i32> undef, i32 %1039, i32 0 %1042 = insertelement <2 x i32> %1041, i32 %1040, i32 1 %1043 = bitcast <8 x i32> %84 to <32 x i8> %1044 = bitcast <4 x i32> %86 to <16 x i8> %1045 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1042, <32 x i8> %1043, <16 x i8> %1044, i32 2) %1046 = extractelement <4 x float> %1045, i32 0 %1047 = fsub float -0.000000e+00, %24 %1048 = fadd float %1047, %102 %1049 = fsub float -0.000000e+00, %25 %1050 = fadd float %1049, %103 %1051 = fsub float -0.000000e+00, %26 %1052 = fadd float %1051, %104 %1053 = fmul float %1048, %1048 %1054 = fmul float %1050, %1050 %1055 = fadd float %1054, %1053 %1056 = fmul float %1052, %1052 %1057 = fadd float %1055, %1056 %1058 = fmul float %1057, %57 %1059 = fmul float %1052, %56 %1060 = fmul float %1059, 0x3FF7154CA0000000 %1061 = call float @llvm.AMDIL.exp.(float %1060) %1062 = fsub float -0.000000e+00, %1061 %1063 = fadd float %1062, 1.000000e+00 %1064 = fmul float %1063, %1058 %1065 = fdiv float 1.000000e+00, %1052 %1066 = fmul float %1065, %1064 %1067 = fmul float %1066, 0x3FF7154CA0000000 %1068 = call float @llvm.AMDIL.exp.(float %1067) %1069 = call float @llvm.AMDIL.clamp.(float %1068, float 0.000000e+00, float 1.000000e+00) %1070 = fsub float -0.000000e+00, %1069 %1071 = fadd float %1070, 1.000000e+00 %1072 = fmul float %1071, %1046 %1073 = fsub float -0.000000e+00, %40 %1074 = fadd float %1073, %104 %1075 = fcmp oge float %1074, 0.000000e+00 %1076 = sext i1 %1075 to i32 %1077 = bitcast i32 %1076 to float %1078 = bitcast float %1077 to i32 %1079 = icmp ne i32 %1078, 0 %temp48.0 = select i1 %1079, float 1.000000e+00, float 0.000000e+00 %1080 = fsub float -0.000000e+00, %104 %1081 = fadd float %40, %1080 %1082 = fsub float -0.000000e+00, %45 %1083 = fadd float %1082, %48 %1084 = fsub float -0.000000e+00, %46 %1085 = fadd float %1084, %49 %1086 = fsub float -0.000000e+00, %47 %1087 = fadd float %1086, %50 %1088 = fmul float %1081, %51 %1089 = fmul float %1081, %51 %1090 = fmul float %1081, %51 %1091 = fmul float %1081, %51 %1092 = call float @llvm.AMDIL.clamp.(float %1088, float 0.000000e+00, float 1.000000e+00) %1093 = call float @llvm.AMDIL.clamp.(float %1089, float 0.000000e+00, float 1.000000e+00) %1094 = call float @llvm.AMDIL.clamp.(float %1090, float 0.000000e+00, float 1.000000e+00) %1095 = call float @llvm.AMDIL.clamp.(float %1091, float 0.000000e+00, float 1.000000e+00) %1096 = fmul float %1093, %1083 %1097 = fadd float %1096, %45 %1098 = fmul float %1093, %1085 %1099 = fadd float %1098, %46 %1100 = fmul float %1093, %1087 %1101 = fadd float %1100, %47 %1102 = fmul float %1032, %1097 %1103 = fmul float %1034, %1099 %1104 = fmul float %1036, %1101 %1105 = fsub float -0.000000e+00, %196 %1106 = fdiv float 1.000000e+00, %1105 %1107 = fmul float %1081, %1106 %1108 = fmul float %1107, %194 %1109 = fmul float %1107, %195 %1110 = fmul float %1107, %196 %1111 = fmul float %1108, %1108 %1112 = fmul float %1109, %1109 %1113 = fadd float %1112, %1111 %1114 = fmul float %1110, %1110 %1115 = fadd float %1113, %1114 %1116 = call float @llvm.maxnum.f32(float %1115, float 0x3E7AD7F2A0000000) %1117 = call float @llvm.AMDGPU.rsq.clamped.f32(float %1116) %1118 = fmul float %1117, %1116 %1119 = fsub float -0.000000e+00, %1116 %1120 = call float @llvm.AMDGPU.cndlt(float %1119, float %1118, float 0.000000e+00) %1121 = fsub float -0.000000e+00, %1120 %1122 = fmul float %1121, %52 %1123 = fmul float %1122, 0x3FF7154CA0000000 %1124 = call float @llvm.AMDIL.exp.(float %1123) %1125 = fmul float %196, %196 %1126 = fmul float %1125, %1125 %1127 = fsub float -0.000000e+00, %1126 %1128 = fmul float %196, %1127 %1129 = fadd float %1128, 1.000000e+00 %1130 = fsub float -0.000000e+00, %1129 %1131 = fadd float %1130, 1.000000e+00 %1132 = fmul float %1124, %1131 %1133 = fmul float %1132, %1102 %1134 = fmul float %1132, %1103 %1135 = fmul float %1132, %1104 %1136 = fsub float -0.000000e+00, %1071 %1137 = fmul float %1046, %1136 %1138 = fadd float %1137, 1.000000e+00 %1139 = fmul float %1138, %1133 %1140 = fmul float %1138, %1134 %1141 = fmul float %1138, %1135 %1142 = fcmp oge float %1074, 0.000000e+00 %1143 = sext i1 %1142 to i32 %1144 = bitcast i32 %1143 to float %1145 = bitcast float %1144 to i32 %1146 = icmp ne i32 %1145, 0 %.173 = select i1 %1146, float %1032, float %1139 %1147 = fcmp oge float %1074, 0.000000e+00 %1148 = sext i1 %1147 to i32 %1149 = bitcast i32 %1148 to float %1150 = bitcast float %1149 to i32 %1151 = icmp ne i32 %1150, 0 %temp32.0 = select i1 %1151, float %1034, float %1140 %1152 = fcmp oge float %1074, 0.000000e+00 %1153 = sext i1 %1152 to i32 %1154 = bitcast i32 %1153 to float %1155 = bitcast float %1154 to i32 %1156 = icmp ne i32 %1155, 0 %.174 = select i1 %1156, float %1036, float %1141 %1157 = fcmp oge float %1074, 0.000000e+00 %1158 = sext i1 %1157 to i32 %1159 = bitcast i32 %1158 to float %1160 = bitcast float %1159 to i32 %1161 = icmp ne i32 %1160, 0 %temp16.2 = select i1 %1161, float %1072, float 0.000000e+00 %1162 = call float @fabs(float %.173) %1163 = call float @llvm.log2.f32(float %1162) %1164 = call float @fabs(float %temp32.0) %1165 = call float @llvm.log2.f32(float %1164) %1166 = call float @fabs(float %.174) %1167 = call float @llvm.log2.f32(float %1166) %1168 = fmul float %1163, 0x3FDD1743E0000000 %1169 = fmul float %1165, 0x3FDD1743E0000000 %1170 = fmul float %1167, 0x3FDD1743E0000000 %1171 = call float @llvm.AMDIL.exp.(float %1168) %1172 = call float @llvm.AMDIL.exp.(float %1169) %1173 = call float @llvm.AMDIL.exp.(float %1170) %1174 = bitcast float %111 to i32 %1175 = bitcast float %112 to i32 %1176 = insertelement <2 x i32> undef, i32 %1174, i32 0 %1177 = insertelement <2 x i32> %1176, i32 %1175, i32 1 %1178 = bitcast <8 x i32> %92 to <32 x i8> %1179 = bitcast <4 x i32> %94 to <16 x i8> %1180 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %1177, <32 x i8> %1178, <16 x i8> %1179, i32 2) %1181 = extractelement <4 x float> %1180, i32 0 %1182 = extractelement <4 x float> %1180, i32 1 %1183 = extractelement <4 x float> %1180, i32 2 %1184 = call float @llvm.AMDGPU.lrp(float %124, float %1171, float %1181) %1185 = call float @llvm.AMDGPU.lrp(float %124, float %1172, float %1182) %1186 = call float @llvm.AMDGPU.lrp(float %124, float %1173, float %1183) %1187 = bitcast float %1184 to i32 %1188 = bitcast float %1185 to i32 %1189 = bitcast float %1186 to i32 %1190 = insertelement <4 x i32> undef, i32 %1187, i32 0 %1191 = insertelement <4 x i32> %1190, i32 %1188, i32 1 %1192 = insertelement <4 x i32> %1191, i32 %1189, i32 2 %1193 = insertelement <4 x i32> %1192, i32 undef, i32 3 %1194 = bitcast <8 x i32> %96 to <32 x i8> %1195 = bitcast <4 x i32> %98 to <16 x i8> %1196 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %1193, <32 x i8> %1194, <16 x i8> %1195, i32 3) %1197 = extractelement <4 x float> %1196, i32 0 %1198 = extractelement <4 x float> %1196, i32 1 %1199 = extractelement <4 x float> %1196, i32 2 %1200 = fsub float -0.000000e+00, %124 %1201 = fmul float %125, %1200 %1202 = fadd float %1201, %124 %1203 = call float @llvm.AMDGPU.lrp(float %1202, float %1197, float %1184) %1204 = call float @llvm.AMDGPU.lrp(float %1202, float %1198, float %1185) %1205 = call float @llvm.AMDGPU.lrp(float %1202, float %1199, float %1186) %1206 = fsub float -0.000000e+00, %1203 %1207 = fadd float %1206, %53 %1208 = fsub float -0.000000e+00, %1204 %1209 = fadd float %1208, %54 %1210 = fsub float -0.000000e+00, %1205 %1211 = fadd float %1210, %55 %1212 = fmul float %temp16.2, %1207 %1213 = fadd float %1212, %1203 %1214 = fmul float %temp16.2, %1209 %1215 = fadd float %1214, %1204 %1216 = fmul float %temp16.2, %1211 %1217 = fadd float %1216, %1205 %1218 = call i32 @llvm.SI.packf16(float %1213, float %1215) %1219 = bitcast i32 %1218 to float %1220 = call i32 @llvm.SI.packf16(float %1217, float %temp48.0) %1221 = bitcast i32 %1220 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %1219, float %1221, float %1219, float %1221) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readonly declare float @fabs(float) #2 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #3 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { nounwind readnone readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 4, [m0] ; C8081100 v_interp_p2_f32 v2, [v2], v1, 1, 4, [m0] ; C8091101 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_interp_p1_f32 v4, v0, 0, 4, [m0] ; C8101000 v_interp_p2_f32 v4, [v4], v1, 0, 4, [m0] ; C8111001 v_mov_b32_e32 v19, 0 ; 7E260280 v_add_f32_e32 v2, 0, v4 ; 06040880 s_load_dwordx4 s[28:31], s[4:5], 0x0 ; C08E0500 s_load_dwordx4 s[80:83], s[4:5], 0x4 ; C0A80504 s_load_dwordx4 s[40:43], s[4:5], 0x8 ; C0940508 s_load_dwordx4 s[92:95], s[4:5], 0xc ; C0AE050C s_load_dwordx4 s[68:71], s[4:5], 0x10 ; C0A20510 s_load_dwordx4 s[52:55], s[4:5], 0x14 ; C09A0514 s_load_dwordx4 s[24:27], s[4:5], 0x18 ; C08C0518 s_load_dwordx4 s[56:59], s[4:5], 0x1c ; C09C051C s_load_dwordx4 s[8:11], s[4:5], 0x20 ; C0840520 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v83, s8, 0 ; 04A70008 v_writelane_b32 v83, s9, 1 ; 04A70209 v_writelane_b32 v83, s10, 2 ; 04A7040A v_writelane_b32 v83, s11, 3 ; 04A7060B s_load_dwordx4 s[96:99], s[4:5], 0x24 ; C0B00524 s_load_dwordx8 s[32:39], s[6:7], 0x0 ; C0D00700 s_load_dwordx8 s[84:91], s[6:7], 0x8 ; C0EA0708 s_load_dwordx8 s[44:51], s[6:7], 0x10 ; C0D60710 s_load_dwordx8 s[8:15], s[6:7], 0x18 ; C0C40718 s_load_dwordx8 s[72:79], s[6:7], 0x20 ; C0E40720 s_load_dwordx8 s[60:67], s[6:7], 0x28 ; C0DE0728 s_load_dwordx8 s[16:23], s[6:7], 0x30 ; C0C80730 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v83, s16, 4 ; 04A70810 v_writelane_b32 v83, s17, 5 ; 04A70A11 v_writelane_b32 v83, s18, 6 ; 04A70C12 v_writelane_b32 v83, s19, 7 ; 04A70E13 v_writelane_b32 v83, s20, 8 ; 04A71014 v_writelane_b32 v83, s21, 9 ; 04A71215 v_writelane_b32 v83, s22, 10 ; 04A71416 v_writelane_b32 v83, s23, 11 ; 04A71617 s_load_dwordx8 s[16:23], s[6:7], 0x38 ; C0C80738 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:3], 5, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[16:23], s[56:59] ; F0800500 01C40202 s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_ge_f32_e64 s[0:1], -|v2|, 0 ; D00C0100 20010102 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_cmp_ne_i32_e64 s[0:1], v4, 0 ; D10A0000 00010104 v_mov_b32_e32 v12, 0x80000000 ; 7E1802FF 80000000 v_cndmask_b32_e64 v4, v12, -1.0, s[0:1] ; D2000004 0001E70C v_cmp_lt_f32_e64 s[0:1], v4, 0 ; D0020000 00010104 v_cndmask_b32_e64 v4, 0, -1, s[0:1] ; D2000004 00018280 v_and_b32_e32 v4, 1.0, v4 ; 360808F2 v_xor_b32_e32 v4, v4, v12 ; 3A081904 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_cmpx_le_f32_e32 vcc, 0, v4 ; 7C260880 v_interp_p1_f32 v5, v0, 2, 1, [m0] ; C8140600 v_interp_p2_f32 v5, [v5], v1, 2, 1, [m0] ; C8150601 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x29 ; C2020129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v21, s4, v5 ; 102A0A04 v_interp_p1_f32 v13, v0, 1, 1, [m0] ; C8340500 v_interp_p2_f32 v13, [v13], v1, 1, 1, [m0] ; C8350501 s_buffer_load_dword s59, s[0:3], 0x28 ; C21D8128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v20, s59, v13 ; 10281A3B image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[72:79], s[68:71] ; F0800F00 02321614 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, 2.0, v22, -1.0 ; D2820004 03CE2CF4 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v26, s4, v4 ; 10340804 v_mad_f32 v4, 2.0, v23, -1.0 ; D2820004 03CE2EF4 v_mul_f32_e32 v27, s4, v4 ; 10360804 v_mul_f32_e32 v28, v27, v27 ; 1038371B v_mad_f32 v4, v26, v26, v28 ; D2820004 0472351A v_add_f32_e32 v4, 1.0, v4 ; 060808F2 v_add_f32_e32 v4, 0, v4 ; 06080880 v_rsq_clamp_f32_e32 v29, v4 ; 7E3A5904 v_mul_f32_e32 v30, v29, v27 ; 103C371D v_mul_f32_e32 v31, v29, v26 ; 103E351D v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_interp_p1_f32 v6, v0, 1, 0, [m0] ; C8180100 v_interp_p2_f32 v6, [v6], v1, 1, 0, [m0] ; C8190101 v_mul_f32_e32 v7, v6, v6 ; 100E0D06 v_mad_f32 v7, v4, v4, v7 ; D2820007 041E0904 v_interp_p1_f32 v8, v0, 2, 0, [m0] ; C8200200 v_interp_p2_f32 v8, [v8], v1, 2, 0, [m0] ; C8210201 v_mad_f32 v7, v8, v8, v7 ; D2820007 041E1108 v_add_f32_e32 v7, 0, v7 ; 060E0E80 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v32, v7, v4 ; 10400907 v_mul_f32_e32 v4, v32, v31 ; 10083F20 v_mul_f32_e32 v34, v7, v8 ; 10441107 v_mad_f32 v4, v30, v34, v4 ; D2820004 0412451E v_mad_f32 v18, v29, v34, v4 ; D2820012 0412451D v_mul_f32_e32 v4, v32, v30 ; 10083D20 v_mul_f32_e32 v33, v7, v6 ; 10420D07 v_mad_f32 v4, v31, v33, v4 ; D2820004 0412431F v_mad_f32 v17, v29, v33, v4 ; D2820011 0412431D v_mul_f32_e32 v4, v34, v31 ; 10083F22 v_mad_f32 v4, v30, -v33, -v4 ; D2820004 C412431E v_mad_f32 v16, v29, v32, v4 ; D2820010 0412411D v_cubeid_f32 v39, v16, v17, v18 ; D2880027 044A2310 v_cubema_f32 v38, v16, v17, v18 ; D28E0026 044A2310 v_cubesc_f32 v37, v16, v17, v18 ; D28A0025 044A2310 v_cubetc_f32 v36, v16, v17, v18 ; D28C0024 044A2310 v_rcp_f32_e64 v4, |v38| ; D3540104 00000126 v_mov_b32_e32 v44, 0x3fc00000 ; 7E5802FF 3FC00000 v_mad_f32 v38, v36, v4, v44 ; D2820026 04B20924 v_mad_f32 v37, v37, v4, v44 ; D2820025 04B20925 image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[32:39], s[28:31] ; F0800700 00E80625 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v9, v8 ; 7E124F08 v_mov_b32_e32 v4, 0x400ccccd ; 7E0802FF 400CCCCD v_mul_legacy_f32_e32 v9, v4, v9 ; 0E121304 v_exp_f32_e32 v9, v9 ; 7E124B09 s_buffer_load_dword s16, s[0:3], 0x4 ; C2080104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v36, s16, v34 ; 10484410 s_buffer_load_dword s5, s[0:3], 0x5 ; C2028105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v33, s5, -v36 ; D282000A 84900B21 s_buffer_load_dword s56, s[0:3], 0x6 ; C21C0106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v32, s56, v10 ; D282000A 04287120 v_mul_f32_e32 v10, v31, v10 ; 1014151F v_mul_f32_e32 v11, s16, v33 ; 10164210 v_mad_f32 v11, v32, s5, -v11 ; D282000B 842C0B20 v_mad_f32 v37, v34, s56, v11 ; D2820025 042C7122 v_mad_f32 v10, v37, v30, v10 ; D282000A 042A3D25 v_mul_f32_e32 v38, s16, v32 ; 104C4010 v_mad_f32 v11, v33, s5, v38 ; D282000B 04980B21 v_mad_f32 v39, v34, s56, v11 ; D2820027 042C7122 v_mad_f32 v10, v39, v29, v10 ; D282000A 042A3B27 v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_add_f32_e32 v40, v9, v10 ; 06501509 v_mul_f32_e64 v11, |v33|, |v33| ; D210030B 00024321 v_mul_f32_e64 v9, |v32|, |v32| ; D2100309 00024120 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mad_f32 v14, v11, v11, v9 ; D282000E 0426170B v_mul_f32_e64 v41, |v34|, |v34| ; D2100329 00024522 v_mad_f32 v14, v41, v41, v14 ; D282000E 043A5329 v_rcp_f32_e32 v14, v14 ; 7E1C550E v_mul_f32_e32 v9, v9, v14 ; 10121D09 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mul_f32_e32 v11, v11, v14 ; 10161D0B s_buffer_load_dword s16, s[0:3], 0x25 ; C2080125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v43, s16, v5 ; 10560A10 v_interp_p1_f32 v15, v0, 0, 1, [m0] ; C83C0400 v_interp_p2_f32 v15, [v15], v1, 0, 1, [m0] ; C83D0401 s_buffer_load_dword s16, s[0:3], 0x24 ; C2080124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v42, s16, v15 ; 10541E10 image_sample v[45:47], 7, 0, 0, 0, 0, 0, 0, 0, v[42:43], s[8:15], s[92:95] ; F0800700 02E22D2A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v48, v47 ; 7E604F2F v_mul_legacy_f32_e32 v48, v4, v48 ; 0E606104 v_exp_f32_e32 v48, v48 ; 7E604B30 v_mul_f32_e32 v48, v48, v11 ; 10601730 v_mul_f32_e32 v42, s16, v13 ; 10541A10 image_sample v[49:51], 7, 0, 0, 0, 0, 0, 0, 0, v[42:43], s[8:15], s[92:95] ; F0800700 02E2312A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v42, v51 ; 7E544F33 v_mul_legacy_f32_e32 v42, v4, v42 ; 0E545504 v_exp_f32_e32 v42, v42 ; 7E544B2A v_mad_f32 v42, v42, v9, v48 ; D282002A 04C2132A v_mul_f32_e32 v41, v41, v41 ; 10525329 v_mul_f32_e32 v14, v41, v14 ; 101C1D29 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v53, s8, v13 ; 106A1A08 v_mul_f32_e32 v52, s8, v15 ; 10681E08 image_sample v[52:55], 15, 0, 0, 0, 0, 0, 0, 0, v[52:53], s[84:91], s[80:83] ; F0800F00 02953434 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v41, v54 ; 7E524F36 v_mul_legacy_f32_e32 v41, v4, v41 ; 0E525304 v_exp_f32_e32 v41, v41 ; 7E524B29 v_mad_f32 v41, v41, v14, v42 ; D2820029 04AA1D29 v_mul_f32_e32 v40, v40, v41 ; 10505328 s_buffer_load_dword s8, s[0:3], 0x8 ; C2040108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v42, s8, v33 ; 10544208 s_buffer_load_dword s57, s[0:3], 0x9 ; C21C8109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v42, v32, s57, -v42 ; D282002A 84A87320 s_buffer_load_dword s58, s[0:3], 0xa ; C21D010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v42, v34, s58, v42 ; D282002A 04A87522 v_interp_p1_f32 v43, v0, 0, 2, [m0] ; C8AC0800 v_interp_p2_f32 v43, [v43], v1, 0, 2, [m0] ; C8AD0801 v_interp_p1_f32 v48, v0, 1, 2, [m0] ; C8C00900 v_interp_p2_f32 v48, [v48], v1, 1, 2, [m0] ; C8C10901 v_mul_f32_e32 v56, v48, v48 ; 10706130 v_mad_f32 v56, v43, v43, v56 ; D2820038 04E2572B v_interp_p1_f32 v57, v0, 2, 2, [m0] ; C8E40A00 v_interp_p2_f32 v57, [v57], v1, 2, 2, [m0] ; C8E50A01 v_mad_f32 v56, v57, v57, v56 ; D2820038 04E27339 v_add_f32_e32 v56, 0, v56 ; 06707080 v_rsq_clamp_f32_e32 v56, v56 ; 7E705938 v_mul_f32_e32 v43, v56, v43 ; 10565738 v_mul_f32_e32 v58, v43, v33 ; 1074432B v_mul_f32_e32 v48, v56, v48 ; 10606138 v_mad_f32 v58, v32, v48, -v58 ; D282003A 84EA6120 v_mul_f32_e32 v56, v56, v57 ; 10707338 v_mad_f32 v57, v34, v56, v58 ; D2820039 04EA7122 v_add_f32_e32 v58, v42, v57 ; 0674732A v_mul_f32_e32 v59, s8, v32 ; 10764008 v_mad_f32 v60, v33, s57, v59 ; D282003C 04EC7321 v_mad_f32 v60, v34, s58, v60 ; D282003C 04F07522 v_mul_f32_e32 v61, v43, v32 ; 107A412B v_mad_f32 v62, v33, v48, v61 ; D282003E 04F66121 v_mad_f32 v62, v34, v56, v62 ; D282003E 04FA7122 v_add_f32_e32 v63, v60, v62 ; 067E7D3C v_rcp_f32_e32 v63, v63 ; 7E7E553F v_mad_f32 v64, v58, v63, -v27 ; D2820040 846E7F3A v_mul_f32_e64 v65, s4, s4 ; D2100041 00000804 v_mad_f32 v28, v25, v65, -v28 ; D282001C 84728319 s_buffer_load_dword s9, s[0:3], 0x18 ; C2048118 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v66, s9 ; 7E845409 v_add_f32_e32 v28, v28, v66 ; 0638851C v_mul_f32_e32 v67, v28, v64 ; 1086811C v_mul_f32_e32 v68, s8, v34 ; 10884408 v_mad_f32 v69, v33, s57, -v68 ; D2820045 85107321 v_mad_f32 v69, v32, s58, v69 ; D2820045 05147520 v_mul_f32_e32 v70, v43, v34 ; 108C452B v_mad_f32 v71, v33, v48, -v70 ; D2820047 851A6121 v_mad_f32 v71, v32, v56, v71 ; D2820047 051E7120 v_add_f32_e32 v72, v69, v71 ; 06908F45 v_mad_f32 v72, v72, v63, -v26 ; D2820048 846A7F48 v_mul_f32_e32 v27, v26, v27 ; 1036371A image_sample v73, 8, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[60:67], s[52:55] ; F0800800 01AF4914 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v73, 2.0, v73, -1.0 ; D2820049 03CE92F4 v_mad_f32 v27, v73, v65, -v27 ; D282001B 846E8349 v_mul_f32_e64 v73, v72, -v27 ; D2100049 40023748 v_mad_f32 v67, 2.0, v73, v67 ; D2820043 050E92F4 v_mul_f32_e32 v64, v67, v64 ; 10808143 v_mul_f32_e32 v67, v72, v72 ; 10869148 v_mad_f32 v64, v67, v28, v64 ; D2820040 05023943 v_mul_f32_e32 v64, 0.5, v64 ; 108080F0 v_mul_f32_e32 v26, v26, v26 ; 1034351A v_mad_f32 v22, v24, v65, -v26 ; D2820016 846A8318 v_add_f32_e32 v22, v22, v66 ; 062C8516 v_mul_f32_e32 v23, v27, v27 ; 102E371B v_mad_f32 v22, v22, v28, -v23 ; D2820016 845E3916 v_rcp_f32_e32 v23, v22 ; 7E2E5516 v_mul_f32_e32 v24, v23, v64 ; 10308117 v_mov_b32_e32 v25, 0xbfb8aa65 ; 7E3202FF BFB8AA65 v_mul_f32_e32 v24, v25, v24 ; 10303119 v_exp_f32_e32 v24, v24 ; 7E304B18 v_max_f32_e32 v26, 0x33d6bf95, v22 ; 20342CFF 33D6BF95 v_rsq_clamp_f32_e32 v26, v26 ; 7E34591A v_mul_f32_e32 v24, v24, v26 ; 10303518 v_mov_b32_e32 v26, 0xc1800000 ; 7E3402FF C1800000 v_mad_f32 v23, v64, v23, v26 ; D2820017 046A2F40 v_cmp_ge_f32_e64 s[8:9], v23, 0 ; D00C0008 00010117 v_cndmask_b32_e64 v23, 0, -1, s[8:9] ; D2000817 00218280 v_cmp_ne_i32_e64 s[8:9], v23, 0 ; D10A0008 00010117 v_cndmask_b32_e64 v23, 0, 1.0, s[8:9] ; D2000817 0021E480 v_cmp_ge_f32_e64 s[8:9], -v22, 0 ; D00C0008 20010116 v_cndmask_b32_e64 v22, 0, -1, s[8:9] ; D2000016 00218280 v_cmp_ne_i32_e64 s[8:9], v22, 0 ; D10A0008 00010116 v_cndmask_b32_e64 v22, 0, 1.0, s[8:9] ; D2000016 0021E480 v_add_f32_e32 v22, v22, v23 ; 062C2F16 v_cmp_ge_f32_e64 s[8:9], -v22, 0 ; D00C0008 20010116 v_cndmask_b32_e64 v22, 0, -1, s[8:9] ; D2000016 00218280 v_cmp_ne_i32_e64 s[8:9], v22, 0 ; D10A0008 00010116 v_cndmask_b32_e64 v22, 0, v24, s[8:9] ; D2000016 00223080 v_mul_f32_e32 v23, v71, v31 ; 102E3F47 v_mad_f32 v23, v29, v62, v23 ; D2820017 045E7D1D v_mad_f32 v23, v30, v57, v23 ; D2820017 045E731E v_sub_f32_e32 v23, 1.0, v23 ; 082E2EF2 v_mul_f32_e32 v24, v23, v23 ; 10302F17 v_mul_f32_e32 v24, v24, v24 ; 10303118 v_mul_f32_e32 v23, v24, v23 ; 102E2F18 v_mul_f32_e32 v22, v23, v22 ; 102C2D17 v_add_f32_e64 v22, 0, v22 clamp ; D2060816 00022C80 v_mul_f32_e32 v23, v31, v69 ; 102E8B1F v_mad_f32 v23, v42, v30, v23 ; D2820017 045E3D2A v_mad_f32 v23, v60, v29, v23 ; D2820017 045E3B3C v_add_f32_e64 v23, 0, v23 clamp ; D2060817 00022E80 v_mad_f32 v24, v22, v23, v40 ; D2820018 04A22F16 v_mul_f32_e32 v20, s59, v15 ; 10281E3B image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[72:79], s[68:71] ; F0800F00 02321B14 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v31, 2.0, v27, -1.0 ; D282001F 03CE36F4 v_mul_f32_e32 v31, s4, v31 ; 103E3E04 v_mad_f32 v40, 2.0, v28, -1.0 ; D2820028 03CE38F4 v_mul_f32_e32 v40, s4, v40 ; 10505004 v_mul_f32_e32 v64, v40, v40 ; 10805128 v_mad_f32 v67, v31, v31, v64 ; D2820043 05023F1F v_add_f32_e32 v67, 1.0, v67 ; 068686F2 v_add_f32_e32 v67, 0, v67 ; 06868680 v_rsq_clamp_f32_e32 v67, v67 ; 7E865943 v_mul_f32_e32 v69, v67, v40 ; 108A5143 v_mul_f32_e32 v71, v67, v31 ; 108E3F43 v_mul_f32_e32 v72, v33, v71 ; 10908F21 v_mad_f32 v72, v69, v34, v72 ; D2820048 05224545 v_mad_f32 v18, v67, v34, v72 ; D2820012 05224543 v_mul_f32_e32 v72, v32, v69 ; 10908B20 v_mad_f32 v72, v67, v33, v72 ; D2820048 05224343 v_mad_f32 v17, v71, v34, v72 ; D2820011 05224547 v_mul_f32_e32 v72, v32, v71 ; 10908F20 v_mad_f32 v72, v69, -v33, -v72 ; D2820048 C5224345 v_mad_f32 v16, v67, v32, v72 ; D2820010 05224143 v_cubeid_f32 v75, v16, v17, v18 ; D288004B 044A2310 v_cubema_f32 v74, v16, v17, v18 ; D28E004A 044A2310 v_cubesc_f32 v73, v16, v17, v18 ; D28A0049 044A2310 v_cubetc_f32 v72, v16, v17, v18 ; D28C0048 044A2310 v_rcp_f32_e64 v16, |v74| ; D3540110 0000014A v_mad_f32 v74, v72, v16, v44 ; D282004A 04B22148 v_mad_f32 v73, v73, v16, v44 ; D2820049 04B22149 image_sample v[16:18], 7, 0, 0, 0, 0, 0, 0, 0, v[73:76], s[32:39], s[28:31] ; F0800700 00E81049 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v19, v18 ; 7E264F12 v_mul_legacy_f32_e32 v19, v4, v19 ; 0E262704 v_exp_f32_e32 v19, v19 ; 7E264B13 v_mad_f32 v72, v34, s5, -v38 ; D2820048 84980B22 v_mad_f32 v72, v33, s56, v72 ; D2820048 05207121 v_mul_f32_e32 v72, v71, v72 ; 10909147 v_mad_f32 v37, v37, v69, v72 ; D2820025 05228B25 v_mad_f32 v37, v39, v67, v37 ; D2820025 04968727 v_add_f32_e64 v37, 0, v37 clamp ; D2060825 00024A80 v_add_f32_e32 v19, v19, v37 ; 06264B13 v_mul_f32_e32 v19, v19, v41 ; 10265313 v_mad_f32 v58, v58, v63, -v40 ; D282003A 84A27F3A v_mad_f32 v64, v30, v65, -v64 ; D2820040 8502831E v_add_f32_e32 v64, v64, v66 ; 06808540 v_mul_f32_e32 v72, v64, v58 ; 10907540 v_mad_f32 v73, v34, s57, -v59 ; D2820049 84EC7322 v_mad_f32 v73, v33, s58, v73 ; D2820049 05247521 v_mad_f32 v74, v34, v48, -v61 ; D282004A 84F66122 v_mad_f32 v74, v33, v56, v74 ; D282004A 052A7121 v_add_f32_e32 v75, v73, v74 ; 06969549 v_mad_f32 v75, v75, v63, -v31 ; D282004B 847E7F4B v_mul_f32_e32 v40, v31, v40 ; 1050511F image_sample v20, 8, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[60:67], s[52:55] ; F0800800 01AF1414 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v20, 2.0, v20, -1.0 ; D2820014 03CE28F4 v_mad_f32 v20, v20, v65, -v40 ; D2820014 84A28314 v_mul_f32_e64 v21, v75, -v20 ; D2100015 4002294B v_mad_f32 v21, 2.0, v21, v72 ; D2820015 05222AF4 v_mul_f32_e32 v21, v21, v58 ; 102A7515 v_mul_f32_e32 v40, v75, v75 ; 1050974B v_mad_f32 v21, v40, v64, v21 ; D2820015 04568128 v_mul_f32_e32 v21, 0.5, v21 ; 102A2AF0 v_mul_f32_e32 v31, v31, v31 ; 103E3F1F v_mad_f32 v27, v29, v65, -v31 ; D282001B 847E831D v_add_f32_e32 v27, v27, v66 ; 0636851B v_mul_f32_e32 v20, v20, v20 ; 10282914 v_mad_f32 v20, v27, v64, -v20 ; D2820014 8452811B v_rcp_f32_e32 v27, v20 ; 7E365514 v_mul_f32_e32 v28, v27, v21 ; 10382B1B v_mul_f32_e32 v28, v25, v28 ; 10383919 v_exp_f32_e32 v28, v28 ; 7E384B1C v_max_f32_e32 v29, 0x33d6bf95, v20 ; 203A28FF 33D6BF95 v_rsq_clamp_f32_e32 v29, v29 ; 7E3A591D v_mul_f32_e32 v28, v28, v29 ; 10383B1C v_mad_f32 v21, v21, v27, v26 ; D2820015 046A3715 v_cmp_ge_f32_e64 s[8:9], v21, 0 ; D00C0008 00010115 v_cndmask_b32_e64 v21, 0, -1, s[8:9] ; D2000815 00218280 v_cmp_ne_i32_e64 s[8:9], v21, 0 ; D10A0008 00010115 v_cndmask_b32_e64 v21, 0, 1.0, s[8:9] ; D2000815 0021E480 v_cmp_ge_f32_e64 s[8:9], -v20, 0 ; D00C0008 20010114 v_cndmask_b32_e64 v20, 0, -1, s[8:9] ; D2000014 00218280 v_cmp_ne_i32_e64 s[8:9], v20, 0 ; D10A0008 00010114 v_cndmask_b32_e64 v20, 0, 1.0, s[8:9] ; D2000014 0021E480 v_add_f32_e32 v20, v20, v21 ; 06282B14 v_cmp_ge_f32_e64 s[8:9], -v20, 0 ; D00C0008 20010114 v_cndmask_b32_e64 v20, 0, -1, s[8:9] ; D2000014 00218280 v_cmp_ne_i32_e64 s[8:9], v20, 0 ; D10A0008 00010114 v_cndmask_b32_e64 v20, 0, v28, s[8:9] ; D2000014 00223880 v_mul_f32_e32 v21, v74, v71 ; 102A8F4A v_mad_f32 v21, v67, v62, v21 ; D2820015 04567D43 v_mad_f32 v21, v69, v57, v21 ; D2820015 04567345 v_sub_f32_e32 v21, 1.0, v21 ; 082A2AF2 v_mul_f32_e32 v27, v21, v21 ; 10362B15 v_mul_f32_e32 v27, v27, v27 ; 1036371B v_mul_f32_e32 v21, v27, v21 ; 102A2B1B v_mul_f32_e32 v20, v21, v20 ; 10282915 v_add_f32_e64 v20, 0, v20 clamp ; D2060814 00022880 v_mul_f32_e32 v21, v71, v73 ; 102A9347 v_mad_f32 v21, v42, v69, v21 ; D2820015 04568B2A v_mad_f32 v21, v60, v67, v21 ; D2820015 0456873C v_add_f32_e64 v21, 0, v21 clamp ; D2060815 00022A80 v_mad_f32 v19, v20, v21, v19 ; D2820013 044E2B14 v_mul_f32_e32 v19, v19, v11 ; 10261713 v_mad_f32 v19, v24, v9, v19 ; D2820013 044E1318 s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v28, s8, v13 ; 10381A08 v_mul_f32_e32 v27, s8, v15 ; 10361E08 image_sample v[27:30], 15, 0, 0, 0, 0, 0, 0, 0, v[27:28], s[44:51], s[40:43] ; F0800F00 014B1B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v24, 2.0, v27, -1.0 ; D2820018 03CE36F4 v_mul_f32_e32 v24, s4, v24 ; 10303004 v_mad_f32 v31, 2.0, v28, -1.0 ; D282001F 03CE38F4 v_mul_f32_e32 v31, s4, v31 ; 103E3E04 v_mul_f32_e32 v40, v31, v31 ; 10503F1F v_mad_f32 v42, v24, v24, v40 ; D282002A 04A23118 v_add_f32_e32 v42, 1.0, v42 ; 065454F2 v_add_f32_e32 v42, 0, v42 ; 06545480 v_rsq_clamp_f32_e32 v42, v42 ; 7E54592A v_mul_f32_e32 v57, v42, v31 ; 10723F2A v_mul_f32_e32 v58, v42, v24 ; 1074312A v_mul_f32_e32 v64, v32, v58 ; 10807520 v_mad_f32 v64, v57, -v33, -v64 ; D2820040 C5024339 v_mad_f32 v73, v42, v34, v64 ; D2820049 0502452A v_mul_f32_e32 v64, v33, v58 ; 10807521 v_mad_f32 v64, v42, v33, v64 ; D2820040 0502432A v_mad_f32 v72, v57, v34, v64 ; D2820048 05024539 v_mul_f32_e32 v64, v32, v57 ; 10807320 v_mad_f32 v64, v42, v32, v64 ; D2820040 0502412A v_mad_f32 v71, v58, v34, v64 ; D2820047 0502453A v_mad_f32 v64, v33, v48, v70 ; D2820040 051A6121 v_mad_f32 v64, -v32, v56, v64 ; D2820040 25027120 v_mul_f32_e32 v67, v64, v58 ; 10867540 v_mad_f32 v61, v34, v48, v61 ; D282003D 04F66122 v_mad_f32 v61, -v33, v56, v61 ; D282003D 24F67121 v_mad_f32 v67, v57, v61, v67 ; D2820043 050E7B39 v_mad_f32 v62, v42, v62, v67 ; D282003E 050E7D2A v_sub_f32_e32 v62, 1.0, v62 ; 087C7CF2 v_mul_f32_e32 v67, v62, v62 ; 10867D3E v_mul_f32_e32 v74, v67, v67 ; 10948743 v_cubeid_f32 v78, v71, v72, v73 ; D288004E 05269147 v_cubema_f32 v77, v71, v72, v73 ; D28E004D 05269147 v_cubesc_f32 v76, v71, v72, v73 ; D28A004C 05269147 v_cubetc_f32 v75, v71, v72, v73 ; D28C004B 05269147 v_rcp_f32_e64 v67, |v77| ; D3540143 0000014D v_mad_f32 v77, v75, v67, v44 ; D282004D 04B2874B v_mad_f32 v76, v76, v67, v44 ; D282004C 04B2874C image_sample v[75:77], 7, 0, 0, 0, 0, 0, 0, 0, v[76:79], s[32:39], s[28:31] ; F0800700 00E84B4C s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v67, v77 ; 7E864F4D v_mul_legacy_f32_e32 v67, v4, v67 ; 0E868704 v_exp_f32_e32 v67, v67 ; 7E864B43 v_mad_f32 v36, v33, s5, v36 ; D2820024 04900B21 v_mad_f32 v36, -v32, s56, v36 ; D2820024 24907120 v_mul_f32_e32 v36, v58, v36 ; 1048493A v_mad_f32 v38, v34, s5, v38 ; D2820026 04980B22 v_mad_f32 v38, -v33, s56, v38 ; D2820026 24987121 v_mad_f32 v36, v38, v57, v36 ; D2820024 04927326 v_mad_f32 v36, v39, v42, v36 ; D2820024 04925527 v_add_f32_e64 v36, 0, v36 clamp ; D2060824 00024880 v_add_f32_e32 v38, v67, v36 ; 064C4943 v_mul_f32_e32 v38, v38, v41 ; 104C5326 v_mad_f32 v59, v34, s57, v59 ; D282003B 04EC7322 v_mad_f32 v59, -v33, s58, v59 ; D282003B 24EC7521 v_add_f32_e32 v61, v59, v61 ; 067A7B3B v_mad_f32 v61, v61, v63, -v31 ; D282003D 847E7F3D v_add_f32_e32 v67, 0, v30 ; 06863C80 v_mad_f32 v40, v67, v65, -v40 ; D2820028 84A28343 v_add_f32_e32 v40, v40, v66 ; 06508528 v_mul_f32_e32 v67, v40, v61 ; 10867B28 v_mad_f32 v68, v33, s57, v68 ; D2820044 05107321 v_mad_f32 v68, -v32, s58, v68 ; D2820044 25107520 v_add_f32_e32 v64, v68, v64 ; 06808144 v_mad_f32 v63, v64, v63, -v24 ; D282003F 84627F40 v_mul_f32_e32 v31, v24, v31 ; 103E3F18 v_mad_f32 v64, 2.0, v30, -1.0 ; D2820040 03CE3CF4 v_mad_f32 v31, v64, v65, -v31 ; D282001F 847E8340 v_mul_f32_e64 v64, v63, -v31 ; D2100040 40023F3F v_mad_f32 v64, 2.0, v64, v67 ; D2820040 050E80F4 v_mul_f32_e32 v61, v64, v61 ; 107A7B40 v_mul_f32_e32 v63, v63, v63 ; 107E7F3F v_mad_f32 v61, v63, v40, v61 ; D282003D 04F6513F v_mul_f32_e32 v61, 0.5, v61 ; 107A7AF0 v_add_f32_e32 v27, 0, v29 ; 06363A80 v_mul_f32_e32 v24, v24, v24 ; 10303118 v_mad_f32 v24, v27, v65, -v24 ; D2820018 8462831B v_add_f32_e32 v24, v24, v66 ; 06308518 v_mul_f32_e32 v27, v31, v31 ; 10363F1F v_mad_f32 v24, v24, v40, -v27 ; D2820018 846E5118 v_rcp_f32_e32 v27, v24 ; 7E365518 v_mul_f32_e32 v28, v27, v61 ; 10387B1B v_mul_f32_e32 v25, v25, v28 ; 10323919 v_exp_f32_e32 v25, v25 ; 7E324B19 v_max_f32_e32 v28, 0x33d6bf95, v24 ; 203830FF 33D6BF95 v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mul_f32_e32 v25, v25, v28 ; 10323919 v_mad_f32 v26, v61, v27, v26 ; D282001A 046A373D v_cmp_ge_f32_e64 s[4:5], v26, 0 ; D00C0004 0001011A v_cndmask_b32_e64 v26, 0, -1, s[4:5] ; D200001A 00118280 v_cmp_ne_i32_e64 s[4:5], v26, 0 ; D10A0004 0001011A v_cndmask_b32_e64 v26, 0, 1.0, s[4:5] ; D200001A 0011E480 v_cmp_ge_f32_e64 s[4:5], -v24, 0 ; D00C0004 20010118 v_cndmask_b32_e64 v24, 0, -1, s[4:5] ; D2000018 00118280 v_cmp_ne_i32_e64 s[4:5], v24, 0 ; D10A0004 00010118 v_cndmask_b32_e64 v24, 0, 1.0, s[4:5] ; D2000018 0011E480 v_add_f32_e32 v24, v24, v26 ; 06303518 v_cmp_ge_f32_e64 s[4:5], -v24, 0 ; D00C0004 20010118 v_cndmask_b32_e64 v24, 0, -1, s[4:5] ; D2000018 00118280 v_cmp_ne_i32_e64 s[4:5], v24, 0 ; D10A0004 00010118 v_cndmask_b32_e64 v24, 0, v25, s[4:5] ; D2000018 00123280 v_mul_f32_e32 v25, v74, v62 ; 10327D4A v_mul_f32_e32 v24, v25, v24 ; 10303119 v_add_f32_e64 v24, 0, v24 clamp ; D2060818 00023080 v_mul_f32_e32 v25, v58, v68 ; 1032893A v_mad_f32 v25, v59, v57, v25 ; D2820019 0466733B v_mad_f32 v25, v60, v42, v25 ; D2820019 0466553C v_add_f32_e64 v25, 0, v25 clamp ; D2060819 00023280 v_mad_f32 v26, v24, v25, v38 ; D282001A 049A3318 v_mad_f32 v19, v26, v14, v19 ; D2820013 044E1D1A v_mov_b32_e32 v35, v55 ; 7E460337 v_cubeid_f32 v60, v32, v33, v34 ; D288003C 048A4320 v_cubema_f32 v59, v32, v33, v34 ; D28E003B 048A4320 v_cubesc_f32 v58, v32, v33, v34 ; D28A003A 048A4320 v_cubetc_f32 v57, v32, v33, v34 ; D28C0039 048A4320 v_rcp_f32_e64 v26, |v59| ; D354011A 0000013B v_mad_f32 v59, v57, v26, v44 ; D282003B 04B23539 v_mad_f32 v58, v58, v26, v44 ; D282003A 04B2353A image_sample v[26:28], 7, 0, 0, 0, 0, 0, 0, 0, v[58:61], s[32:39], s[28:31] ; F0800700 00E81A3A s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v29, v28 ; 7E3A4F1C v_mul_legacy_f32_e32 v29, v4, v29 ; 0E3A3B04 v_exp_f32_e32 v29, v29 ; 7E3A4B1D v_add_f32_e64 v30, 0, v39 clamp ; D206081E 00024E80 v_add_f32_e32 v29, v30, v29 ; 063A3B1E v_log_f32_e32 v31, v46 ; 7E3E4F2E v_mul_legacy_f32_e32 v31, v4, v31 ; 0E3E3F04 v_exp_f32_e32 v31, v31 ; 7E3E4B1F v_mul_f32_e32 v31, v31, v11 ; 103E171F v_log_f32_e32 v32, v50 ; 7E404F32 v_mul_legacy_f32_e32 v32, v4, v32 ; 0E404104 v_exp_f32_e32 v32, v32 ; 7E404B20 v_mad_f32 v31, v32, v9, v31 ; D282001F 047E1320 v_log_f32_e32 v32, v53 ; 7E404F35 v_mul_legacy_f32_e32 v32, v4, v32 ; 0E404104 v_exp_f32_e32 v32, v32 ; 7E404B20 v_mad_f32 v31, v32, v14, v31 ; D282001F 047E1D20 v_log_f32_e32 v32, v45 ; 7E404F2D v_mul_legacy_f32_e32 v32, v4, v32 ; 0E404104 v_exp_f32_e32 v32, v32 ; 7E404B20 v_mul_f32_e32 v32, v32, v11 ; 10401720 v_log_f32_e32 v33, v49 ; 7E424F31 v_mul_legacy_f32_e32 v33, v4, v33 ; 0E424304 v_exp_f32_e32 v33, v33 ; 7E424B21 v_mad_f32 v32, v33, v9, v32 ; D2820020 04821321 v_log_f32_e32 v33, v52 ; 7E424F34 v_mul_legacy_f32_e32 v33, v4, v33 ; 0E424304 v_exp_f32_e32 v33, v33 ; 7E424B21 v_mad_f32 v32, v33, v14, v32 ; D2820020 04821D21 v_mul_f32_e32 v33, 0x3e99999a, v32 ; 104240FF 3E99999A v_mov_b32_e32 v34, 0x3f170a3d ; 7E4402FF 3F170A3D v_mad_f32 v33, v34, v31, v33 ; D2820021 04863F22 v_mov_b32_e32 v34, 0x3de147ae ; 7E4402FF 3DE147AE v_mad_f32 v33, v34, v41, v33 ; D2820021 04865322 v_sub_f32_e32 v33, 1.0, v33 ; 084242F2 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v34, s4, v33 ; 10444204 v_mad_f32 v19, v34, v29, v19 ; D2820013 044E3B22 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v29, s4, v5 ; 083A0A04 s_buffer_load_dword s5, s[0:3], 0x34 ; C2028134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v34, s5, v29 ; 10443A05 v_add_f32_e64 v34, 0, v34 clamp ; D2060822 00024480 s_buffer_load_dword s5, s[0:3], 0x2e ; C202812E s_buffer_load_dword s8, s[0:3], 0x32 ; C2040132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v35, s5 ; 7E460205 v_sub_f32_e32 v35, s8, v35 ; 08464608 v_mad_f32 v35, v34, v35, s5 ; D2820023 00164722 v_mul_f32_e32 v35, v35, v19 ; 10462723 v_rcp_f32_e64 v38, -v56 ; D3540026 20000138 v_mul_f32_e32 v29, v38, v29 ; 103A3B26 v_mul_f32_e32 v38, v48, v29 ; 104C3B30 v_mul_f32_e32 v39, v43, v29 ; 104E3B2B v_mul_f32_e32 v39, v39, v39 ; 104E4F27 v_mad_f32 v38, v38, v38, v39 ; D2820026 049E4D26 v_mul_f32_e32 v29, v56, v29 ; 103A3B38 v_mad_f32 v29, v29, v29, v38 ; D282001D 049A3B1D v_max_f32_e32 v29, 0x33d6bf95, v29 ; 203A3AFF 33D6BF95 v_rsq_clamp_f32_e32 v38, v29 ; 7E4C591D v_mul_f32_e32 v38, v29, v38 ; 104C4D1D v_xor_b32_e32 v12, v29, v12 ; 3A18191D v_cmp_gt_f32_e32 vcc, 0, v12 ; 7C081880 v_cndmask_b32_e64 v12, 0, v38, vcc ; D200000C 01AA4C80 s_buffer_load_dword s5, s[0:3], 0x38 ; C2028138 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e64 v12, -v12, s5 ; D210000C 20000B0C v_mul_f32_e32 v12, 0x3fb8aa65, v12 ; 101818FF 3FB8AA65 v_exp_f32_e32 v12, v12 ; 7E184B0C v_mul_f32_e32 v29, v56, v56 ; 103A7138 v_mul_f32_e32 v29, v29, v29 ; 103A3B1D v_mad_f32 v29, -v56, v29, 1.0 ; D282001D 23CA3B38 v_sub_f32_e32 v29, 1.0, v29 ; 083A3AF2 v_mul_f32_e32 v12, v29, v12 ; 1018191D v_mul_f32_e32 v29, v35, v12 ; 103A1923 s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v13, s5, v13 ; 0A1A1A05 s_buffer_load_dword s5, s[0:3], 0x0 ; C2028100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v15, s5, v15 ; 0A1E1E05 v_mul_f32_e32 v15, v15, v15 ; 101E1F0F v_mad_f32 v13, v13, v13, v15 ; D282000D 043E1B0D s_buffer_load_dword s5, s[0:3], 0x2 ; C2028102 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v15, s5, v5 ; 0A1E0A05 v_mad_f32 v13, v15, v15, v13 ; D282000D 04361F0F s_buffer_load_dword s5, s[0:3], 0x41 ; C2028141 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s5, v13 ; 101A1A05 s_buffer_load_dword s5, s[0:3], 0x40 ; C2028140 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v35, s5, v15 ; 10461E05 v_mul_f32_e32 v35, 0x3fb8aa65, v35 ; 104646FF 3FB8AA65 v_exp_f32_e32 v35, v35 ; 7E464B23 v_sub_f32_e32 v35, 1.0, v35 ; 084646F2 v_mul_f32_e32 v13, v13, v35 ; 101A470D v_rcp_f32_e32 v15, v15 ; 7E1E550F v_mul_f32_e32 v13, v13, v15 ; 101A1F0D v_mul_f32_e32 v13, 0x3fb8aa65, v13 ; 101A1AFF 3FB8AA65 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_sub_f32_e32 v13, 1.0, v13 ; 081A1AF2 v_interp_p1_f32 v15, v0, 1, 3, [m0] ; C83C0D00 v_interp_p2_f32 v15, [v15], v1, 1, 3, [m0] ; C83D0D01 v_mad_f32 v38, 0.5, v15, 0.5 ; D2820026 03C21EF0 s_buffer_load_dword s5, s[0:3], 0x43 ; C2028143 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v39, s5 ; 7E4E0205 v_readlane_b32 s8, v83, 4 ; 02110953 v_readlane_b32 s9, v83, 5 ; 02130B53 v_readlane_b32 s10, v83, 6 ; 02150D53 v_readlane_b32 s11, v83, 7 ; 02170F53 v_readlane_b32 s12, v83, 8 ; 02191153 v_readlane_b32 s13, v83, 9 ; 021B1353 v_readlane_b32 s14, v83, 10 ; 021D1553 v_readlane_b32 s15, v83, 11 ; 021F1753 s_nop 2 ; BF800002 image_sample v15, 1, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[8:15], s[24:27] ; F0800100 00C20F26 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, -v13, v15, 1.0 ; D2820023 23CA1F0D v_mul_f32_e32 v29, v29, v35 ; 103A471D v_subrev_f32_e32 v5, s4, v5 ; 0A0A0A04 v_cmp_ge_f32_e64 s[4:5], v5, 0 ; D00C0004 00010105 v_cndmask_b32_e64 v5, 0, -1, s[4:5] ; D2000805 00118280 v_cmp_ne_i32_e64 s[4:5], v5, 0 ; D10A0004 00010105 v_cndmask_b32_e64 v5, v29, v19, s[4:5] ; D2000805 0812271D v_log_f32_e64 v5, |v5| ; D34E0105 00000105 v_mul_f32_e32 v5, 0x3ee8ba1f, v5 ; 100A0AFF 3EE8BA1F v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_sub_f32_e32 v19, 1.0, v2 ; 082604F2 v_interp_p1_f32 v39, v0, 1, 5, [m0] ; C89C1500 v_interp_p2_f32 v39, [v39], v1, 1, 5, [m0] ; C89D1501 v_interp_p1_f32 v38, v0, 0, 5, [m0] ; C8981400 v_interp_p2_f32 v38, [v38], v1, 0, 5, [m0] ; C8991401 s_load_dwordx8 s[8:15], s[6:7], 0x40 ; C0C40740 v_readlane_b32 s16, v83, 0 ; 02210153 v_readlane_b32 s17, v83, 1 ; 02230353 v_readlane_b32 s18, v83, 2 ; 02250553 v_readlane_b32 s19, v83, 3 ; 02270753 s_nop 2 ; BF800002 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[38:40], 7, 0, 0, 0, 0, 0, 0, 0, v[38:39], s[8:15], s[16:19] ; F0800700 00822626 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v40, v19 ; 10002728 v_mad_f32 v43, v2, v5, v0 ; D282002B 04020B02 v_log_f32_e32 v0, v7 ; 7E004F07 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 v_add_f32_e32 v0, v0, v10 ; 06001500 v_mul_f32_e32 v0, v0, v31 ; 10003F00 v_mad_f32 v0, v22, v23, v0 ; D2820000 04022F16 v_log_f32_e32 v1, v17 ; 7E024F11 v_mul_legacy_f32_e32 v1, v4, v1 ; 0E020304 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e32 v1, v1, v37 ; 06024B01 v_mul_f32_e32 v1, v1, v31 ; 10023F01 v_mad_f32 v1, v20, v21, v1 ; D2820001 04062B14 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mad_f32 v0, v0, v9, v1 ; D2820000 04061300 v_log_f32_e32 v1, v76 ; 7E024F4C v_mul_legacy_f32_e32 v1, v4, v1 ; 0E020304 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e32 v1, v1, v36 ; 06024901 v_mul_f32_e32 v1, v1, v31 ; 10023F01 v_mad_f32 v1, v24, v25, v1 ; D2820001 04063318 v_mad_f32 v0, v1, v14, v0 ; D2820000 04021D01 v_log_f32_e32 v1, v27 ; 7E024F1B v_mul_legacy_f32_e32 v1, v4, v1 ; 0E020304 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e32 v1, v30, v1 ; 0602031E s_buffer_load_dword s8, s[0:3], 0x1d ; C204011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v33 ; 100A4208 v_mad_f32 v0, v5, v1, v0 ; D2820000 04020305 s_buffer_load_dword s8, s[0:3], 0x2d ; C204012D s_buffer_load_dword s9, s[0:3], 0x31 ; C2048131 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s8 ; 7E020208 v_sub_f32_e32 v1, s9, v1 ; 08020209 v_mad_f32 v1, v34, v1, s8 ; D2820001 00220322 v_mul_f32_e32 v1, v1, v0 ; 10020101 v_mul_f32_e32 v1, v1, v12 ; 10021901 v_mul_f32_e32 v1, v1, v35 ; 10024701 v_cndmask_b32_e64 v0, v1, v0, s[4:5] ; D2000000 08120101 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v1, v39, v19 ; 10022727 v_mad_f32 v42, v2, v0, v1 ; D282002A 04060102 v_log_f32_e32 v0, v6 ; 7E004F06 v_mul_legacy_f32_e32 v0, v4, v0 ; 0E000104 v_exp_f32_e32 v0, v0 ; 7E004B00 v_add_f32_e32 v0, v0, v10 ; 06001500 v_mul_f32_e32 v0, v0, v32 ; 10004100 v_mad_f32 v0, v22, v23, v0 ; D2820000 04022F16 v_log_f32_e32 v1, v16 ; 7E024F10 v_mul_legacy_f32_e32 v1, v4, v1 ; 0E020304 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e32 v1, v1, v37 ; 06024B01 v_mul_f32_e32 v1, v1, v32 ; 10024101 v_mad_f32 v1, v20, v21, v1 ; D2820001 04062B14 v_mul_f32_e32 v1, v1, v11 ; 10021701 v_mad_f32 v0, v0, v9, v1 ; D2820000 04061300 v_log_f32_e32 v1, v75 ; 7E024F4B v_mul_legacy_f32_e32 v1, v4, v1 ; 0E020304 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e32 v1, v1, v36 ; 06024901 v_mul_f32_e32 v1, v1, v32 ; 10024101 v_mad_f32 v1, v24, v25, v1 ; D2820001 04063318 v_mad_f32 v0, v1, v14, v0 ; D2820000 04021D01 v_log_f32_e32 v1, v26 ; 7E024F1A v_mul_legacy_f32_e32 v1, v4, v1 ; 0E020304 v_exp_f32_e32 v1, v1 ; 7E024B01 v_add_f32_e32 v1, v30, v1 ; 0602031E s_buffer_load_dword s8, s[0:3], 0x1c ; C204011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s8, v33 ; 10084208 v_mad_f32 v0, v4, v1, v0 ; D2820000 04020304 s_buffer_load_dword s8, s[0:3], 0x2c ; C204012C s_buffer_load_dword s9, s[0:3], 0x30 ; C2048130 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s8 ; 7E020208 v_sub_f32_e32 v1, s9, v1 ; 08020209 v_mad_f32 v1, v34, v1, s8 ; D2820001 00220322 v_mul_f32_e32 v1, v1, v0 ; 10020101 v_mul_f32_e32 v1, v1, v12 ; 10021901 v_mul_f32_e32 v1, v1, v35 ; 10024701 v_cndmask_b32_e64 v0, v1, v0, s[4:5] ; D2000000 08120101 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_mul_f32_e32 v1, v38, v19 ; 10022726 v_mad_f32 v41, v2, v0, v1 ; D2820029 04060102 s_load_dwordx8 s[8:15], s[6:7], 0x48 ; C0C40748 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:6], 7, 0, 0, 0, 0, 0, 0, 0, v[41:44], s[8:15], s[96:99] ; F0800700 03020429 v_mad_f32 v0, -v3, v2, v2 ; D2820000 240A0503 v_sub_f32_e32 v1, 1.0, v0 ; 080200F2 v_mul_f32_e32 v2, v42, v1 ; 1004032A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v0, v5, v2 ; D2820002 040A0B00 s_buffer_load_dword s6, s[0:3], 0x3d ; C203013D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s6, v2 ; 08060406 v_mul_f32_e32 v7, v15, v13 ; 100E1B0F v_cndmask_b32_e64 v7, 0, v7, s[4:5] ; D2000807 00120E80 v_mad_f32 v2, v7, v3, v2 ; D2820002 040A0707 v_mul_f32_e32 v3, v41, v1 ; 10060329 v_mad_f32 v3, v0, v4, v3 ; D2820003 040E0900 s_buffer_load_dword s6, s[0:3], 0x3c ; C203013C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v8, s6, v3 ; 08100606 v_mad_f32 v3, v7, v8, v3 ; D2820003 040E1107 v_cvt_pkrtz_f16_f32_e32 v2, v3, v2 ; 5E040503 v_mul_f32_e32 v1, v43, v1 ; 1002032B v_mad_f32 v0, v0, v6, v1 ; D2820000 04060D00 s_buffer_load_dword s0, s[0:3], 0x3e ; C200013E s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v1, s0, v0 ; 08020000 v_mad_f32 v0, v7, v1, v0 ; D2820000 04020307 v_cndmask_b32_e64 v1, 0, 1.0, s[4:5] ; D2000801 0011E480 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v2, v0, v2, v0 ; F8001C0F 00020002 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL SAMP[0] DCL CONST[0..15] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 0.5000, 0.0000, 255.0000, -0.5000} IMM[1] FLT32 { 0.5000, -0.5000, 1.0000, 0.0000} 0: ADD TEMP[0].xy, CONST[15], IN[0] 1: MOV TEMP[1].xy, TEMP[0].xyxx 2: ADD TEMP[0].zw, TEMP[0].xyxy, CONST[15] 3: MOV TEMP[1].zw, TEMP[0].wwzw 4: MUL TEMP[0].xy, TEMP[1], CONST[9] 5: MOV TEMP[1].xy, TEMP[0].xyxx 6: ADD TEMP[2].zw, TEMP[1], IMM[0].xxxx 7: MUL TEMP[2].xy, TEMP[2].zwzw, CONST[10] 8: MOV TEMP[3].xy, TEMP[2].xyyy 9: MOV TEMP[3].w, IMM[0].yyyy 10: TXL TEMP[3].w, TEMP[3], SAMP[0], 2D 11: MUL TEMP[3].z, TEMP[3].wwww, CONST[9].zzzz 12: MUL TEMP[3].z, TEMP[3].zzzz, IMM[0].zzzz 13: MUL TEMP[2], TEMP[0].yyyy, CONST[12] 14: MAD TEMP[2], TEMP[0].xxxx, CONST[11], TEMP[2] 15: MAD TEMP[1], TEMP[3].zzzz, CONST[13], TEMP[2] 16: ADD TEMP[1], TEMP[1], CONST[14] 17: MUL TEMP[2], TEMP[1].yyyy, CONST[1] 18: MAD TEMP[2], TEMP[1].xxxx, CONST[0], TEMP[2] 19: MAD TEMP[2], TEMP[1].zzzz, CONST[2], TEMP[2] 20: MAD TEMP[2], TEMP[1].wwww, CONST[3], TEMP[2] 21: MOV TEMP[0], TEMP[2] 22: MUL TEMP[2], TEMP[1].yyyy, CONST[5] 23: MAD TEMP[2], TEMP[1].xxxx, CONST[4], TEMP[2] 24: MAD TEMP[2], TEMP[1].zzzz, CONST[6], TEMP[2] 25: MAD TEMP[2], TEMP[1].wwww, CONST[7], TEMP[2] 26: MAD TEMP[1], TEMP[2], IMM[1].xyzz, IMM[0].xxyy 27: MOV OUT[1], TEMP[1] 28: MOV OUT[0], TEMP[0] 29: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %57 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %58 = call float @llvm.SI.load.const(<16 x i8> %12, i32 208) %59 = call float @llvm.SI.load.const(<16 x i8> %12, i32 212) %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 216) %61 = call float @llvm.SI.load.const(<16 x i8> %12, i32 220) %62 = call float @llvm.SI.load.const(<16 x i8> %12, i32 224) %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 228) %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 232) %65 = call float @llvm.SI.load.const(<16 x i8> %12, i32 236) %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 240) %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 244) %68 = call float @llvm.SI.load.const(<16 x i8> %12, i32 248) %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 252) %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %75 = load <16 x i8> addrspace(2)* %74, !tbaa !0 %76 = add i32 %5, %7 %77 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %75, i32 0, i32 %76) %78 = extractelement <4 x float> %77, i32 0 %79 = extractelement <4 x float> %77, i32 1 %80 = fadd float %66, %78 %81 = fadd float %67, %79 %82 = fadd float %80, %68 %83 = fadd float %81, %69 %84 = fmul float %80, %45 %85 = fmul float %81, %46 %86 = fadd float %82, 5.000000e-01 %87 = fadd float %83, 5.000000e-01 %88 = fmul float %86, %48 %89 = fmul float %87, %49 %90 = bitcast float %88 to i32 %91 = bitcast float %89 to i32 %92 = bitcast float 0.000000e+00 to i32 %93 = insertelement <4 x i32> undef, i32 %90, i32 0 %94 = insertelement <4 x i32> %93, i32 %91, i32 1 %95 = insertelement <4 x i32> %94, i32 %92, i32 2 %96 = insertelement <4 x i32> %95, i32 undef, i32 3 %97 = bitcast <8 x i32> %71 to <32 x i8> %98 = bitcast <4 x i32> %73 to <16 x i8> %99 = call <4 x float> @llvm.SI.samplel.v4i32(<4 x i32> %96, <32 x i8> %97, <16 x i8> %98, i32 2) %100 = extractelement <4 x float> %99, i32 3 %101 = fmul float %100, %47 %102 = fmul float %101, 2.550000e+02 %103 = fmul float %85, %54 %104 = fmul float %85, %55 %105 = fmul float %85, %56 %106 = fmul float %85, %57 %107 = fmul float %84, %50 %108 = fadd float %107, %103 %109 = fmul float %84, %51 %110 = fadd float %109, %104 %111 = fmul float %84, %52 %112 = fadd float %111, %105 %113 = fmul float %84, %53 %114 = fadd float %113, %106 %115 = fmul float %102, %58 %116 = fadd float %115, %108 %117 = fmul float %102, %59 %118 = fadd float %117, %110 %119 = fmul float %102, %60 %120 = fadd float %119, %112 %121 = fmul float %102, %61 %122 = fadd float %121, %114 %123 = fadd float %116, %62 %124 = fadd float %118, %63 %125 = fadd float %120, %64 %126 = fadd float %122, %65 %127 = fmul float %124, %17 %128 = fmul float %124, %18 %129 = fmul float %124, %19 %130 = fmul float %124, %20 %131 = fmul float %123, %13 %132 = fadd float %131, %127 %133 = fmul float %123, %14 %134 = fadd float %133, %128 %135 = fmul float %123, %15 %136 = fadd float %135, %129 %137 = fmul float %123, %16 %138 = fadd float %137, %130 %139 = fmul float %125, %21 %140 = fadd float %139, %132 %141 = fmul float %125, %22 %142 = fadd float %141, %134 %143 = fmul float %125, %23 %144 = fadd float %143, %136 %145 = fmul float %125, %24 %146 = fadd float %145, %138 %147 = fmul float %126, %25 %148 = fadd float %147, %140 %149 = fmul float %126, %26 %150 = fadd float %149, %142 %151 = fmul float %126, %27 %152 = fadd float %151, %144 %153 = fmul float %126, %28 %154 = fadd float %153, %146 %155 = fmul float %124, %33 %156 = fmul float %124, %34 %157 = fmul float %124, %35 %158 = fmul float %124, %36 %159 = fmul float %123, %29 %160 = fadd float %159, %155 %161 = fmul float %123, %30 %162 = fadd float %161, %156 %163 = fmul float %123, %31 %164 = fadd float %163, %157 %165 = fmul float %123, %32 %166 = fadd float %165, %158 %167 = fmul float %125, %37 %168 = fadd float %167, %160 %169 = fmul float %125, %38 %170 = fadd float %169, %162 %171 = fmul float %125, %39 %172 = fadd float %171, %164 %173 = fmul float %125, %40 %174 = fadd float %173, %166 %175 = fmul float %126, %41 %176 = fadd float %175, %168 %177 = fmul float %126, %42 %178 = fadd float %177, %170 %179 = fmul float %126, %43 %180 = fadd float %179, %172 %181 = fmul float %126, %44 %182 = fadd float %181, %174 %183 = fmul float %176, 5.000000e-01 %184 = fadd float %183, 5.000000e-01 %185 = fmul float %178, -5.000000e-01 %186 = fadd float %185, 5.000000e-01 %187 = fmul float %180, 1.000000e+00 %188 = fadd float %187, 0.000000e+00 %189 = fmul float %182, 1.000000e+00 %190 = fadd float %189, 0.000000e+00 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %184, float %186, float %188, float %190) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %148, float %150, float %152, float %154) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.samplel.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[8:11], s[8:9], 0x0 ; C0840900 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[0:3], v0, s[8:11], 0 idxen ; E00C2000 80020000 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x3c ; C204013C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s8, v0 ; 06080008 s_buffer_load_dword s8, s[0:3], 0x24 ; C2040124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v4 ; 100A0808 s_buffer_load_dword s8, s[0:3], 0x3d ; C204013D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s8, v1 ; 06000208 s_buffer_load_dword s8, s[0:3], 0x25 ; C2040125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v0 ; 10020008 s_buffer_load_dword s8, s[0:3], 0x30 ; C2040130 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s8, v1 ; 10040208 s_buffer_load_dword s8, s[0:3], 0x2c ; C204012C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v5, s8, v2 ; D2820002 04081105 s_buffer_load_dword s8, s[0:3], 0x3f ; C204013F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s8, v0 ; 06000008 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 s_buffer_load_dword s8, s[0:3], 0x29 ; C2040129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v0 ; 100E0008 s_buffer_load_dword s8, s[0:3], 0x3e ; C204013E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s8, v4 ; 06000808 v_add_f32_e32 v0, 0.5, v0 ; 060000F0 s_buffer_load_dword s8, s[0:3], 0x28 ; C2040128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s8, v0 ; 100C0008 v_mov_b32_e32 v8, 0 ; 7E100280 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample_l v0, 8, 0, 0, 0, 0, 0, 0, 0, v[6:9], s[12:19], s[8:11] ; F0900800 00430006 s_buffer_load_dword s4, s[0:3], 0x26 ; C2020126 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mul_f32_e32 v0, s4, v0 ; 10000004 v_mul_f32_e32 v0, 0x437f0000, v0 ; 100000FF 437F0000 s_buffer_load_dword s4, s[0:3], 0x34 ; C2020134 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x38 ; C2020138 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x31 ; C2020131 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v1 ; 10060204 s_buffer_load_dword s4, s[0:3], 0x2d ; C202012D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v5, s4, v3 ; D2820003 040C0905 s_buffer_load_dword s4, s[0:3], 0x35 ; C2020135 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x39 ; C2020139 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s4, v1 ; 100C0204 s_buffer_load_dword s4, s[0:3], 0x2e ; C202012E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v6 ; D2820006 04180905 s_buffer_load_dword s4, s[0:3], 0x36 ; C2020136 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v0, s4, v6 ; D2820006 04180900 s_buffer_load_dword s4, s[0:3], 0x3a ; C202013A s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v6, s4, v6 ; 060C0C04 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s4, v4 ; D2820004 04100906 s_buffer_load_dword s4, s[0:3], 0x33 ; C2020133 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x2f ; C202012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v5, s4, v1 ; D2820001 04040905 s_buffer_load_dword s4, s[0:3], 0x37 ; C2020137 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s4, s[0:3], 0x3b ; C202013B s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v4 ; D2820001 04100900 v_mad_f32 v1, -0.5, v1, 0.5 ; D2820001 03C202F1 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0x1b ; C202011B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s4, v4 ; D2820004 04100906 s_buffer_load_dword s4, s[0:3], 0x1f ; C202011F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 v_add_f32_e32 v4, 0, v4 ; 06080880 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v3 ; 100A0604 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v6, s4, v5 ; D2820005 04140906 s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 v_add_f32_e32 v5, 0, v5 ; 060A0A80 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v3 ; 100E0604 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v2, s4, v7 ; D2820007 041C0902 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v6, s4, v7 ; D2820007 041C0906 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 v_mad_f32 v7, 0.5, v7, 0.5 ; D2820007 03C20EF0 exp 15, 32, 0, 0, 0, v7, v1, v5, v4 ; F800020F 04050107 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v3 ; 10020604 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v2, s4, v1 ; D2820001 04040902 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v6, s4, v1 ; D2820001 04040906 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v0, s4, v1 ; D2820001 04040900 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v3 ; 10080604 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v2, s4, v4 ; D2820004 04100902 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v6, s4, v4 ; D2820004 04100906 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s4, v3 ; 100A0604 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v2, s4, v5 ; D2820005 04140902 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v6, s4, v5 ; D2820005 04140906 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v0, s4, v5 ; D2820005 04140900 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v3 ; 10060604 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v2, s4, v3 ; D2820002 040C0902 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v6, s4, v2 ; D2820002 04080906 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s0, v2 ; D2820000 04080100 exp 15, 12, 0, 1, 0, v0, v5, v4, v1 ; F80008CF 01040500 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL TEMP[0], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, 0.0000} 0: MAD TEMP[0].xy, IN[0], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0], TEMP[0], SAMP[0], 2D 3: MOV OUT[0], TEMP[0] 4: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %28 = fmul float %26, 1.000000e+00 %29 = fadd float %28, 0.000000e+00 %30 = fmul float %27, -1.000000e+00 %31 = fadd float %30, 1.000000e+00 %32 = bitcast float %29 to i32 %33 = bitcast float %31 to i32 %34 = insertelement <2 x i32> undef, i32 %32, i32 0 %35 = insertelement <2 x i32> %34, i32 %33, i32 1 %36 = bitcast <8 x i32> %23 to <32 x i8> %37 = bitcast <4 x i32> %25 to <16 x i8> %38 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %35, <32 x i8> %36, <16 x i8> %37, i32 2) %39 = extractelement <4 x float> %38, i32 0 %40 = extractelement <4 x float> %38, i32 1 %41 = extractelement <4 x float> %38, i32 2 %42 = extractelement <4 x float> %38, i32 3 %43 = call i32 @llvm.SI.packf16(float %39, float %40) %44 = bitcast i32 %43 to float %45 = call i32 @llvm.SI.packf16(float %41, float %42) %46 = bitcast i32 %45 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %44, float %46, float %44, float %46) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 0, [m0] ; C8080100 v_interp_p2_f32 v2, [v2], v1, 1, 0, [m0] ; C8090101 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 v_add_f32_e32 v2, 0, v4 ; 06040880 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx8 s[4:11], s[6:7], 0x0 ; C0C20700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[0:3], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[4:11], s[0:3] ; F0800F00 00010002 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v4, v2, v3 ; 5E080702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300 exp 15, 0, 1, 1, 1, v0, v4, v0, v4 ; F8001C0F 04000400 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..254] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 1.0000, 2.0000, -1.0000, 0.0000} 0: F2I TEMP[0].x, IN[2].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: MOV TEMP[0].xyz, CONST[ADDR[0].x+7].xyzx 3: MAD TEMP[1].xy, CONST[6], IN[1].zwzw, TEMP[0] 4: MOV TEMP[1].xy, TEMP[1].xyxx 5: ADD TEMP[2].xy, TEMP[0].zzzz, -CONST[5].xzzw 6: MOV TEMP[0].xy, TEMP[2].xyxx 7: MUL TEMP[2].z, TEMP[0].zzzz, CONST[4].zzzz 8: MOV TEMP[0].z, TEMP[2].zzzz 9: LRP TEMP[2].x, TEMP[2].zzzz, CONST[4].yyyy, CONST[4].xxxx 10: MUL TEMP[3], TEMP[0], CONST[5].ywzw 11: MOV_SAT TEMP[3], TEMP[3] 12: ADD TEMP[4].y, -TEMP[3].yyyy, IMM[0].xxxx 13: MUL TEMP[3].xy, TEMP[4].yyyy, TEMP[3].xxxx 14: MOV TEMP[3].xy, TEMP[3].xyxx 15: MAD TEMP[4].xy, IN[1], IMM[0].yyyy, IMM[0].zzzz 16: MOV TEMP[0].xy, TEMP[4].xyxx 17: MAD TEMP[4].xy, TEMP[0], -TEMP[2].xxxx, IN[0] 18: MUL TEMP[2], TEMP[4].yyyy, CONST[1] 19: MAD TEMP[0], TEMP[4].xxxx, CONST[0], TEMP[2] 20: MAD TEMP[0], CONST[4].wwww, CONST[2], TEMP[0] 21: ADD TEMP[0], TEMP[0], CONST[3] 22: MOV TEMP[1].zw, IMM[0].xxwx 23: MOV TEMP[3].zw, IMM[0].xxwx 24: MOV OUT[1], TEMP[1] 25: MOV OUT[0], TEMP[0] 26: MOV OUT[2], TEMP[3] 27: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %39 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %46 = load <16 x i8> addrspace(2)* %45, !tbaa !0 %47 = add i32 %5, %7 %48 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %46, i32 0, i32 %47) %49 = extractelement <4 x float> %48, i32 0 %50 = extractelement <4 x float> %48, i32 1 %51 = extractelement <4 x float> %48, i32 2 %52 = extractelement <4 x float> %48, i32 3 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = fptosi float %57 to i32 %59 = bitcast i32 %58 to float %60 = bitcast float %59 to i32 %61 = shl i32 %60, 4 %62 = add i32 %61, 112 %63 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %62) %64 = shl i32 %60, 4 %65 = add i32 %64, 116 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = shl i32 %60, 4 %68 = add i32 %67, 120 %69 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %68) %70 = fmul float %37, %51 %71 = fadd float %70, %63 %72 = fmul float %38, %52 %73 = fadd float %72, %66 %74 = fsub float -0.000000e+00, %33 %75 = fadd float %69, %74 %76 = fsub float -0.000000e+00, %35 %77 = fadd float %69, %76 %78 = fmul float %69, %31 %79 = call float @llvm.AMDGPU.lrp(float %78, float %30, float %29) %80 = fmul float %75, %34 %81 = fmul float %77, %36 %82 = fmul float %78, %35 %83 = fmul float 0.000000e+00, %36 %84 = call float @llvm.AMDIL.clamp.(float %80, float 0.000000e+00, float 1.000000e+00) %85 = call float @llvm.AMDIL.clamp.(float %81, float 0.000000e+00, float 1.000000e+00) %86 = call float @llvm.AMDIL.clamp.(float %82, float 0.000000e+00, float 1.000000e+00) %87 = call float @llvm.AMDIL.clamp.(float %83, float 0.000000e+00, float 1.000000e+00) %88 = fsub float -0.000000e+00, %85 %89 = fadd float %88, 1.000000e+00 %90 = fmul float %89, %84 %91 = fmul float %89, %84 %92 = fmul float %49, 2.000000e+00 %93 = fadd float %92, -1.000000e+00 %94 = fmul float %50, 2.000000e+00 %95 = fadd float %94, -1.000000e+00 %96 = fsub float -0.000000e+00, %79 %97 = fmul float %93, %96 %98 = fadd float %97, %43 %99 = fsub float -0.000000e+00, %79 %100 = fmul float %95, %99 %101 = fadd float %100, %44 %102 = fmul float %101, %17 %103 = fmul float %101, %18 %104 = fmul float %101, %19 %105 = fmul float %101, %20 %106 = fmul float %98, %13 %107 = fadd float %106, %102 %108 = fmul float %98, %14 %109 = fadd float %108, %103 %110 = fmul float %98, %15 %111 = fadd float %110, %104 %112 = fmul float %98, %16 %113 = fadd float %112, %105 %114 = fmul float %32, %21 %115 = fadd float %114, %107 %116 = fmul float %32, %22 %117 = fadd float %116, %109 %118 = fmul float %32, %23 %119 = fadd float %118, %111 %120 = fmul float %32, %24 %121 = fadd float %120, %113 %122 = fadd float %115, %25 %123 = fadd float %117, %26 %124 = fadd float %119, %27 %125 = fadd float %121, %28 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %71, float %73, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %90, float %91, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %122, float %123, float %124, float %125) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_add_i32_e32 v2, 0x74, v1 ; 4A0402FF 00000074 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 buffer_load_format_xyzw v[3:6], v0, s[12:15], 0 idxen ; E00C2000 80030300 s_buffer_load_dword s8, s[0:3], 0x19 ; C2040119 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v2, s8, v6, v2 ; D2820002 040A0C08 v_add_i32_e32 v7, 0x70, v1 ; 4A0E02FF 00000070 buffer_load_dword v7, v7, s[0:3], 0 offen ; E0301000 80000707 s_buffer_load_dword s8, s[0:3], 0x18 ; C2040118 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v7, s8, v5, v7 ; D2820007 041E0A08 v_mov_b32_e32 v8, 1.0 ; 7E1002F2 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 32, 0, 0, 0, v7, v2, v9, v8 ; F800020F 08090207 v_add_i32_e32 v1, 0x78, v1 ; 4A0202FF 00000078 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; BF8C0000 v_subrev_f32_e32 v2, s8, v1 ; 0A040208 s_buffer_load_dword s8, s[0:3], 0x17 ; C2040117 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s8, v2 ; 10040408 v_add_f32_e64 v2, 0, v2 clamp ; D2060802 00020480 v_sub_f32_e32 v2, 1.0, v2 ; 080404F2 s_buffer_load_dword s8, s[0:3], 0x14 ; C2040114 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v7, s8, v1 ; 0A0E0208 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s8, v7 ; 100E0E08 v_add_f32_e64 v7, 0, v7 clamp ; D2060807 00020E80 v_mul_f32_e32 v2, v7, v2 ; 10040507 exp 15, 33, 0, 0, 0, v2, v2, v9, v8 ; F800021F 08090202 s_buffer_load_dword s8, s[0:3], 0x12 ; C2040112 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s8, v1 ; 10040208 v_mad_f32 v1, -v1, s8, 1.0 ; D2820001 23C81101 s_buffer_load_dword s8, s[0:3], 0x10 ; C2040110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s8, v1 ; 10020208 s_buffer_load_dword s8, s[0:3], 0x11 ; C2040111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v2, s8, v1 ; D2820001 04041102 v_mad_f32 v2, 2.0, v3, -1.0 ; D2820002 03CE06F4 buffer_load_format_xyzw v[7:10], v0, s[4:7], 0 idxen ; E00C2000 80010700 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, -v2, v1, v7 ; D2820000 241E0302 v_mad_f32 v2, 2.0, v4, -1.0 ; D2820002 03CE08F4 v_mad_f32 v1, -v2, v1, v8 ; D2820001 24220302 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v1 ; 10040204 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_buffer_load_dword s5, s[0:3], 0x13 ; C2028113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_mad_f32 v2, v3, s5, v2 ; D2820002 04080B03 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v1 ; 10060204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v4, s4 ; 7E080204 v_mad_f32 v3, v4, s5, v3 ; D2820003 040C0B04 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v4, s4, v1 ; 10080204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v4, v0, s4, v4 ; D2820004 04100900 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v5, s4 ; 7E0A0204 v_mad_f32 v4, v5, s5, v4 ; D2820004 04100B05 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v4, s4, v4 ; 06080804 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_mad_f32 v0, v1, s5, v0 ; D2820000 04000B01 s_buffer_load_dword s0, s[0:3], 0xc ; C200010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v0, v4, v3, v2 ; F80008CF 02030400 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL CONST[0] DCL TEMP[0..2], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, 0.4545, 0.0000} 0: MOV TEMP[0].xy, IN[0].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1].wwww 7: MUL TEMP[1].xyz, TEMP[1], IN[1].xxxx 8: MOV TEMP[0].xyz, TEMP[1].xyzx 9: MUL TEMP[0].xyz, TEMP[0], CONST[0] 10: ABS TEMP[1].x, TEMP[0].xxxx 11: LG2 TEMP[1].x, TEMP[1].xxxx 12: ABS TEMP[2].x, TEMP[0].yyyy 13: LG2 TEMP[2].x, TEMP[2].xxxx 14: MOV TEMP[1].y, TEMP[2].xxxx 15: ABS TEMP[0].x, TEMP[0].zzzz 16: LG2 TEMP[0].x, TEMP[0].xxxx 17: MOV TEMP[1].z, TEMP[0].xxxx 18: MUL TEMP[0].xyz, TEMP[1], IMM[0].zzzz 19: EX2 TEMP[1].x, TEMP[0].xxxx 20: EX2 TEMP[2].x, TEMP[0].yyyy 21: MOV TEMP[1].y, TEMP[2].xxxx 22: EX2 TEMP[0].x, TEMP[0].zzzz 23: MOV TEMP[1].z, TEMP[0].xxxx 24: MOV TEMP[1].w, IMM[0].yyyy 25: MOV OUT[0], TEMP[1] 26: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 8) %27 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %28 = load <8 x i32> addrspace(2)* %27, !tbaa !0 %29 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %30 = load <4 x i32> addrspace(2)* %29, !tbaa !0 %31 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %34 = bitcast float %31 to i32 %35 = bitcast float %32 to i32 %36 = insertelement <2 x i32> undef, i32 %34, i32 0 %37 = insertelement <2 x i32> %36, i32 %35, i32 1 %38 = bitcast <8 x i32> %28 to <32 x i8> %39 = bitcast <4 x i32> %30 to <16 x i8> %40 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %37, <32 x i8> %38, <16 x i8> %39, i32 2) %41 = extractelement <4 x float> %40, i32 0 %42 = extractelement <4 x float> %40, i32 1 %43 = extractelement <4 x float> %40, i32 2 %44 = call float @llvm.pow.f32(float %41, float 0x40019999A0000000) %45 = call float @llvm.pow.f32(float %42, float 0x40019999A0000000) %46 = call float @llvm.pow.f32(float %43, float 0x40019999A0000000) %47 = fmul float %44, %33 %48 = fmul float %45, %33 %49 = fmul float %46, %33 %50 = fmul float %47, %24 %51 = fmul float %48, %25 %52 = fmul float %49, %26 %53 = call float @fabs(float %50) %54 = call float @llvm.log2.f32(float %53) %55 = call float @fabs(float %51) %56 = call float @llvm.log2.f32(float %55) %57 = call float @fabs(float %52) %58 = call float @llvm.log2.f32(float %57) %59 = fmul float %54, 0x3FDD1743E0000000 %60 = fmul float %56, 0x3FDD1743E0000000 %61 = fmul float %58, 0x3FDD1743E0000000 %62 = call float @llvm.AMDIL.exp.(float %59) %63 = call float @llvm.AMDIL.exp.(float %60) %64 = call float @llvm.AMDIL.exp.(float %61) %65 = call i32 @llvm.SI.packf16(float %62, float %63) %66 = bitcast i32 %65 to float %67 = call i32 @llvm.SI.packf16(float %64, float 1.000000e+00) %68 = bitcast i32 %67 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %66, float %68, float %66, float %68) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[12:19], s[8:11] ; F0800700 00430202 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v5, v3 ; 7E0A4F03 v_mov_b32_e32 v6, 0x400ccccd ; 7E0C02FF 400CCCCD v_mul_legacy_f32_e32 v5, v6, v5 ; 0E0A0B06 v_exp_f32_e32 v5, v5 ; 7E0A4B05 v_interp_p1_f32 v7, v0, 0, 1, [m0] ; C81C0400 v_interp_p2_f32 v7, [v7], v1, 0, 1, [m0] ; C81D0401 v_mul_f32_e32 v0, v7, v5 ; 10000B07 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 v_log_f32_e64 v0, |v0| ; D34E0100 00000100 v_mul_f32_e32 v0, 0x3ee8ba1f, v0 ; 100000FF 3EE8BA1F v_exp_f32_e32 v0, v0 ; 7E004B00 v_log_f32_e32 v1, v2 ; 7E024F02 v_mul_legacy_f32_e32 v1, v6, v1 ; 0E020306 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v1, v7, v1 ; 10020307 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s4, v1 ; 10020204 v_log_f32_e64 v1, |v1| ; D34E0101 00000101 v_mul_f32_e32 v1, 0x3ee8ba1f, v1 ; 100202FF 3EE8BA1F v_exp_f32_e32 v1, v1 ; 7E024B01 v_cvt_pkrtz_f16_f32_e32 v0, v1, v0 ; 5E000101 v_log_f32_e32 v1, v4 ; 7E024F04 v_mul_legacy_f32_e32 v1, v6, v1 ; 0E020306 v_exp_f32_e32 v1, v1 ; 7E024B01 v_mul_f32_e32 v1, v7, v1 ; 10020307 s_buffer_load_dword s0, s[0:3], 0x2 ; C2000102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v1, s0, v1 ; 10020200 v_log_f32_e64 v1, |v1| ; D34E0101 00000101 v_mul_f32_e32 v1, 0x3ee8ba1f, v1 ; 100202FF 3EE8BA1F v_exp_f32_e32 v1, v1 ; 7E024B01 v_cvt_pkrtz_f16_f32_e64 v1, v1, 1.0 ; D25E0001 0001E501 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..12] DCL TEMP[0..6], LOCAL IMM[0] FLT32 { 0.0000, 0.5000, -0.5000, 0.0000} IMM[1] FLT32 { 1.0000, 0.0000, 0.0000, 0.0000} 0: MUL TEMP[0].xyz, CONST[10], IN[2].yyyy 1: MOV TEMP[0].xyz, TEMP[0].xyzx 2: MAD TEMP[1].xyz, IN[2].xxxx, CONST[9], TEMP[0] 3: MOV TEMP[0].xyz, TEMP[1].xyzx 4: MAD TEMP[1].xyz, IN[2].zzzz, CONST[11], TEMP[0] 5: MOV TEMP[0].xyz, TEMP[1].xyzx 6: DP3 TEMP[1].x, TEMP[1].xyzz, TEMP[1].xyzz 7: MAX TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 8: RSQ TEMP[1].x, TEMP[1].xxxx 9: MOV TEMP[0].w, TEMP[1].xxxx 10: MUL TEMP[1].xyz, TEMP[1].xxxx, TEMP[0] 11: MOV TEMP[1].xyz, TEMP[1].xyzx 12: MUL TEMP[0], CONST[10], IN[0].yyyy 13: MAD TEMP[0], IN[0].xxxx, CONST[9], TEMP[0] 14: MAD TEMP[0], IN[0].zzzz, CONST[11], TEMP[0] 15: ADD TEMP[0], TEMP[0], CONST[12] 16: MUL TEMP[2], TEMP[0].yyyy, CONST[1] 17: MAD TEMP[2], TEMP[0].xxxx, CONST[0], TEMP[2] 18: MAD TEMP[2], TEMP[0].zzzz, CONST[2], TEMP[2] 19: MAD TEMP[2], TEMP[0].wwww, CONST[3], TEMP[2] 20: RCP TEMP[3].x, TEMP[2].wwww 21: MOV TEMP[0].w, TEMP[3].xxxx 22: MUL TEMP[3].xy, TEMP[3].xxxx, TEMP[2] 23: MOV TEMP[3].xy, TEMP[3].xyxx 24: MOV TEMP[4], TEMP[2] 25: ADD TEMP[5].xy, TEMP[0], CONST[8].zwzw 26: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 27: MOV TEMP[5].zw, TEMP[5].wwzw 28: MUL TEMP[6].xy, TEMP[0].yyyy, CONST[5] 29: MOV TEMP[2].xy, TEMP[6].xyxx 30: MAD TEMP[6].xy, TEMP[0].xxxx, CONST[4], TEMP[2] 31: MOV TEMP[2].xy, TEMP[6].xyxx 32: MAD TEMP[6].xy, TEMP[0].zzzz, CONST[6], TEMP[2] 33: MOV TEMP[2].xy, TEMP[6].xyxx 34: MOV TEMP[6].xyz, TEMP[0].xyzx 35: ADD TEMP[2].xy, TEMP[2], CONST[7] 36: MOV TEMP[0].xy, TEMP[2].xyxx 37: MAD TEMP[0].xy, TEMP[0], IMM[0].yzww, IMM[0].zzzz 38: MOV TEMP[0].xy, TEMP[0].xyxx 39: MOV TEMP[5].xy, IN[1].xyxx 40: MOV TEMP[6].w, IMM[1].xxxx 41: MOV TEMP[3].zw, IMM[1].xxyx 42: MOV TEMP[0].zw, IMM[1].xxyx 43: MOV OUT[2], TEMP[5] 44: MOV OUT[3], TEMP[6] 45: MOV OUT[0], TEMP[4] 46: MOV OUT[1], TEMP[1] 47: MOV OUT[4], TEMP[3] 48: MOV OUT[5], TEMP[0] 49: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = call float @llvm.SI.load.const(<16 x i8> %12, i32 144) %42 = call float @llvm.SI.load.const(<16 x i8> %12, i32 148) %43 = call float @llvm.SI.load.const(<16 x i8> %12, i32 152) %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 156) %45 = call float @llvm.SI.load.const(<16 x i8> %12, i32 160) %46 = call float @llvm.SI.load.const(<16 x i8> %12, i32 164) %47 = call float @llvm.SI.load.const(<16 x i8> %12, i32 168) %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 172) %49 = call float @llvm.SI.load.const(<16 x i8> %12, i32 176) %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 180) %51 = call float @llvm.SI.load.const(<16 x i8> %12, i32 184) %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 188) %53 = call float @llvm.SI.load.const(<16 x i8> %12, i32 192) %54 = call float @llvm.SI.load.const(<16 x i8> %12, i32 196) %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 200) %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 204) %57 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %58 = load <16 x i8> addrspace(2)* %57, !tbaa !0 %59 = add i32 %5, %7 %60 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %58, i32 0, i32 %59) %61 = extractelement <4 x float> %60, i32 0 %62 = extractelement <4 x float> %60, i32 1 %63 = extractelement <4 x float> %60, i32 2 %64 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %65 = load <16 x i8> addrspace(2)* %64, !tbaa !0 %66 = add i32 %5, %7 %67 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %65, i32 0, i32 %66) %68 = extractelement <4 x float> %67, i32 0 %69 = extractelement <4 x float> %67, i32 1 %70 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %71 = load <16 x i8> addrspace(2)* %70, !tbaa !0 %72 = add i32 %5, %7 %73 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %71, i32 0, i32 %72) %74 = extractelement <4 x float> %73, i32 0 %75 = extractelement <4 x float> %73, i32 1 %76 = extractelement <4 x float> %73, i32 2 %77 = fmul float %45, %75 %78 = fmul float %46, %75 %79 = fmul float %47, %75 %80 = fmul float %74, %41 %81 = fadd float %80, %77 %82 = fmul float %74, %42 %83 = fadd float %82, %78 %84 = fmul float %74, %43 %85 = fadd float %84, %79 %86 = fmul float %76, %49 %87 = fadd float %86, %81 %88 = fmul float %76, %50 %89 = fadd float %88, %83 %90 = fmul float %76, %51 %91 = fadd float %90, %85 %92 = fmul float %87, %87 %93 = fmul float %89, %89 %94 = fadd float %93, %92 %95 = fmul float %91, %91 %96 = fadd float %94, %95 %97 = call float @llvm.maxnum.f32(float %96, float 0x3E7AD7F2A0000000) %98 = call float @llvm.AMDGPU.rsq.clamped.f32(float %97) %99 = fmul float %98, %87 %100 = fmul float %98, %89 %101 = fmul float %98, %91 %102 = fmul float %45, %62 %103 = fmul float %46, %62 %104 = fmul float %47, %62 %105 = fmul float %48, %62 %106 = fmul float %61, %41 %107 = fadd float %106, %102 %108 = fmul float %61, %42 %109 = fadd float %108, %103 %110 = fmul float %61, %43 %111 = fadd float %110, %104 %112 = fmul float %61, %44 %113 = fadd float %112, %105 %114 = fmul float %63, %49 %115 = fadd float %114, %107 %116 = fmul float %63, %50 %117 = fadd float %116, %109 %118 = fmul float %63, %51 %119 = fadd float %118, %111 %120 = fmul float %63, %52 %121 = fadd float %120, %113 %122 = fadd float %115, %53 %123 = fadd float %117, %54 %124 = fadd float %119, %55 %125 = fadd float %121, %56 %126 = fmul float %123, %17 %127 = fmul float %123, %18 %128 = fmul float %123, %19 %129 = fmul float %123, %20 %130 = fmul float %122, %13 %131 = fadd float %130, %126 %132 = fmul float %122, %14 %133 = fadd float %132, %127 %134 = fmul float %122, %15 %135 = fadd float %134, %128 %136 = fmul float %122, %16 %137 = fadd float %136, %129 %138 = fmul float %124, %21 %139 = fadd float %138, %131 %140 = fmul float %124, %22 %141 = fadd float %140, %133 %142 = fmul float %124, %23 %143 = fadd float %142, %135 %144 = fmul float %124, %24 %145 = fadd float %144, %137 %146 = fmul float %125, %25 %147 = fadd float %146, %139 %148 = fmul float %125, %26 %149 = fadd float %148, %141 %150 = fmul float %125, %27 %151 = fadd float %150, %143 %152 = fmul float %125, %28 %153 = fadd float %152, %145 %154 = fdiv float 1.000000e+00, %153 %155 = fmul float %154, %147 %156 = fmul float %154, %149 %157 = fadd float %122, %39 %158 = fadd float %123, %40 %159 = fmul float %157, %37 %160 = fmul float %158, %38 %161 = fmul float %123, %31 %162 = fmul float %123, %32 %163 = fmul float %122, %29 %164 = fadd float %163, %161 %165 = fmul float %122, %30 %166 = fadd float %165, %162 %167 = fmul float %124, %33 %168 = fadd float %167, %164 %169 = fmul float %124, %34 %170 = fadd float %169, %166 %171 = fadd float %168, %35 %172 = fadd float %170, %36 %173 = fmul float %171, 5.000000e-01 %174 = fadd float %173, -5.000000e-01 %175 = fmul float %172, -5.000000e-01 %176 = fadd float %175, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %99, float %100, float %101, float 0.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %68, float %69, float %159, float %160) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %122, float %123, float %124, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %155, float %156, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %174, float %176, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %147, float %149, float %151, float %153) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[4:7], s[8:9], 0x4 ; C0820904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s8, s[0:3], 0x29 ; C2040129 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v5, s8, v2 ; 100A0408 s_buffer_load_dword s9, s[0:3], 0x25 ; C2048125 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v1, s9, v5 ; D2820005 04141301 s_buffer_load_dword s10, s[0:3], 0x2d ; C205012D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v3, s10, v5 ; D2820005 04141503 s_buffer_load_dword s11, s[0:3], 0x28 ; C2058128 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v6, s11, v2 ; 100C040B s_buffer_load_dword s16, s[0:3], 0x24 ; C2080124 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v1, s16, v6 ; D2820006 04182101 s_buffer_load_dword s17, s[0:3], 0x2c ; C208812C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v3, s17, v6 ; D2820006 04182303 v_mul_f32_e32 v7, v6, v6 ; 100E0D06 v_mad_f32 v7, v5, v5, v7 ; D2820007 041E0B05 s_buffer_load_dword s18, s[0:3], 0x2a ; C209012A s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s18, v2 ; 10100412 s_buffer_load_dword s19, s[0:3], 0x26 ; C2098126 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v1, s19, v8 ; D2820008 04202701 s_buffer_load_dword s20, s[0:3], 0x2e ; C20A012E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v3, s20, v8 ; D2820001 04202903 v_mad_f32 v2, v1, v1, v7 ; D2820002 041E0301 v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_mul_f32_e32 v1, v1, v2 ; 10020501 v_mul_f32_e32 v3, v5, v2 ; 10060505 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mov_b32_e32 v4, 0 ; 7E080280 exp 15, 32, 0, 0, 0, v2, v3, v1, v4 ; F800020F 04010302 buffer_load_format_xyzw v[5:8], v0, s[12:15], 0 idxen ; E00C2000 80030500 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v1, s8, v6 ; 10020C08 v_mad_f32 v1, v5, s9, v1 ; D2820001 04041305 v_mad_f32 v1, v7, s10, v1 ; D2820001 04041507 s_buffer_load_dword s8, s[0:3], 0x31 ; C2040131 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v1, s8, v1 ; 06020208 s_buffer_load_dword s8, s[0:3], 0x23 ; C2040123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s8, v1 ; 06040208 s_buffer_load_dword s8, s[0:3], 0x21 ; C2040121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s8, v2 ; 10040408 v_mul_f32_e32 v3, s11, v6 ; 10060C0B v_mad_f32 v3, v5, s16, v3 ; D2820003 040C2105 v_mad_f32 v3, v7, s17, v3 ; D2820003 040C2307 s_buffer_load_dword s8, s[0:3], 0x30 ; C2040130 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s8, v3 ; 06060608 s_buffer_load_dword s8, s[0:3], 0x22 ; C2040122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v9, s8, v3 ; 06120608 s_buffer_load_dword s8, s[0:3], 0x20 ; C2040120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s8, v9 ; 10121208 buffer_load_format_xyzw v[10:13], v0, s[4:7], 0 idxen ; E00C2000 80010A00 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 33, 0, 0, 0, v10, v11, v9, v2 ; F800021F 02090B0A v_mul_f32_e32 v0, s18, v6 ; 10000C12 v_mad_f32 v0, v5, s19, v0 ; D2820000 04002705 v_mad_f32 v0, v7, s20, v0 ; D2820000 04002907 s_buffer_load_dword s4, s[0:3], 0x32 ; C2020132 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_add_f32_e32 v0, s4, v0 ; 06000004 v_mov_b32_e32 v2, 1.0 ; 7E0402F2 exp 15, 34, 0, 0, 0, v3, v1, v0, v2 ; F800022F 02000103 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v9, s4, v1 ; 10120204 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v3, s4, v9 ; D2820009 04240903 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v0, s4, v9 ; D2820009 04240900 s_buffer_load_dword s4, s[0:3], 0x2b ; C202012B s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s4, v6 ; 10140C04 s_buffer_load_dword s4, s[0:3], 0x27 ; C2020127 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v5, s4, v10 ; D282000A 04280905 s_buffer_load_dword s4, s[0:3], 0x2f ; C202012F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, v7, s4, v10 ; D2820005 04280907 s_buffer_load_dword s4, s[0:3], 0x33 ; C2020133 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v5, s4, v5 ; 060A0A04 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v6, v5, s4, v9 ; D2820006 04240905 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v1 ; 100E0204 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v3, s4, v7 ; D2820007 041C0903 s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v5, s4, v7 ; D2820007 041C0905 v_rcp_f32_e32 v8, v7 ; 7E105507 v_mul_f32_e32 v9, v6, v8 ; 10121106 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s4, v1 ; 10140204 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v3, s4, v10 ; D282000A 04280903 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v0, s4, v10 ; D282000A 04280900 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v10, v5, s4, v10 ; D282000A 04280905 v_mul_f32_e32 v8, v10, v8 ; 1010110A exp 15, 35, 0, 0, 0, v8, v9, v4, v2 ; F800023F 02040908 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v8, s4, v1 ; 10100204 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v3, s4, v8 ; D2820008 04200903 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v8, v0, s4, v8 ; D2820008 04200900 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v8, s4, v8 ; 06101004 v_mad_f32 v8, 0.5, v8, -0.5 ; D2820008 03C610F0 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s4, v1 ; 10120204 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v3, s4, v9 ; D2820009 04240903 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v9, v0, s4, v9 ; D2820009 04240900 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v9, s4, v9 ; 06121204 v_mad_f32 v9, -0.5, v9, -0.5 ; D2820009 03C612F1 exp 15, 36, 0, 0, 0, v8, v9, v4, v2 ; F800024F 02040908 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v1 ; 10020204 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v3, s4, v1 ; D2820001 04040903 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v5, s0, v0 ; D2820000 04000105 exp 15, 12, 0, 1, 0, v10, v6, v0, v7 ; F80008CF 0700060A s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[5].x, TEMP[6].xxxx, TEMP[5].yyyy 14: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy 15: KILL_IF -TEMP[5].xxxx 16: MOV TEMP[5].xyz, IN[0].xyzz 17: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 18: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 19: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 20: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 21: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 22: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 23: MOV TEMP[0].xyz, TEMP[1].xyzx 24: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 25: MOV TEMP[0].xyz, TEMP[1].xyzx 26: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 27: MOV TEMP[3].yzw, TEMP[1].zyzw 28: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 29: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 30: RSQ TEMP[5].x, TEMP[5].xxxx 31: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 32: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 33: MOV TEMP[5].w, IMM[0].wwww 34: MOV TEMP[5].x, TEMP[3].yyyy 35: MOV TEMP[5].y, TEMP[3].zzzz 36: MOV TEMP[5].z, TEMP[3].wwww 37: DP4 TEMP[3].x, TEMP[5], TEMP[5] 38: RSQ TEMP[3].x, TEMP[3].xxxx 39: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 40: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 41: MOV_SAT TEMP[3].x, TEMP[3].xxxx 42: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 43: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 44: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 45: MOV TEMP[6].xyz, TEMP[5].yzww 46: TEX TEMP[6], TEMP[6], SAMP[4], CUBE 47: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 48: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 49: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 50: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 51: MOV TEMP[4].w, TEMP[7].wwww 52: MOV TEMP[6].xy, IN[1].xyyy 53: TEX TEMP[6], TEMP[6], SAMP[1], 2D 54: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 55: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 56: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 57: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 58: ABS TEMP[6].x, TEMP[8].wwww 59: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 60: MOV TEMP[5].xyz, TEMP[5].yzww 61: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 62: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 63: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 64: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 65: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 66: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 67: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 68: ABS TEMP[3].x, TEMP[3].xxxx 69: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 70: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 71: MOV TEMP[0].w, TEMP[6].wwww 72: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 73: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 74: ABS TEMP[5].x, TEMP[3].xxxx 75: LG2 TEMP[4].x, TEMP[5].xxxx 76: ABS TEMP[5].x, TEMP[3].yyyy 77: LG2 TEMP[5].x, TEMP[5].xxxx 78: MOV TEMP[4].y, TEMP[5].xxxx 79: ABS TEMP[3].x, TEMP[3].zzzz 80: LG2 TEMP[3].x, TEMP[3].xxxx 81: MOV TEMP[4].z, TEMP[3].xxxx 82: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 83: EX2 TEMP[4].x, TEMP[3].xxxx 84: EX2 TEMP[5].x, TEMP[3].yyyy 85: MOV TEMP[4].y, TEMP[5].xxxx 86: EX2 TEMP[3].x, TEMP[3].zzzz 87: MOV TEMP[4].z, TEMP[3].xxxx 88: MOV TEMP[3].xyz, TEMP[4].xyzz 89: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 90: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 91: MOV TEMP[5].xy, TEMP[5].xyyy 92: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 93: MOV TEMP[1].w, TEMP[5].wwww 94: MOV TEMP[6].xy, IN[1].zwww 95: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 96: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 97: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 98: MOV TEMP[6].xyz, TEMP[3].yzww 99: TEX TEMP[6], TEMP[6], SAMP[9], 3D 100: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 101: MOV TEMP[1].xyz, TEMP[3].xyzx 102: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 103: MOV TEMP[0].xyz, TEMP[3].xyzx 104: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 105: MOV TEMP[4].z, TEMP[3].zzzz 106: MOV TEMP[4].xy, IN[2].xyxx 107: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 108: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 109: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 110: EX2 TEMP[5].x, TEMP[5].wwww 111: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 112: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 113: RCP TEMP[3].x, TEMP[3].wwww 114: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 115: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 116: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 117: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 118: EX2 TEMP[3].x, TEMP[3].wwww 119: MOV_SAT TEMP[3].x, TEMP[3].xxxx 120: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 121: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 122: MOV TEMP[4].x, TEMP[5].xxxx 123: MOV TEMP[4].y, CONST[4].wwww 124: MOV TEMP[4].xy, TEMP[4].xyyy 125: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 126: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 127: MOV TEMP[0].w, TEMP[3].wwww 128: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 129: MOV TEMP[0].xyz, TEMP[0].xyzx 130: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 131: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 132: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 133: UIF TEMP[3].xxxx :0 134: MOV TEMP[2].x, TEMP[2].xxxx 135: ELSE :0 136: MOV TEMP[2].x, TEMP[1].xxxx 137: ENDIF 138: MOV TEMP[0].w, TEMP[2].xxxx 139: MOV OUT[0], TEMP[0] 140: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = and i32 %128, 1065353216 %130 = bitcast i32 %129 to float %131 = fsub float -0.000000e+00, %130 %132 = fsub float -0.000000e+00, %130 %133 = fsub float -0.000000e+00, %130 %134 = fsub float -0.000000e+00, %130 call void @llvm.AMDGPU.kill(float %131) call void @llvm.AMDGPU.kill(float %132) call void @llvm.AMDGPU.kill(float %133) call void @llvm.AMDGPU.kill(float %134) %135 = insertelement <4 x float> undef, float %78, i32 0 %136 = insertelement <4 x float> %135, float %79, i32 1 %137 = insertelement <4 x float> %136, float %80, i32 2 %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 %139 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %138) %140 = extractelement <4 x float> %139, i32 0 %141 = extractelement <4 x float> %139, i32 1 %142 = extractelement <4 x float> %139, i32 2 %143 = extractelement <4 x float> %139, i32 3 %144 = call float @fabs(float %142) %145 = fdiv float 1.000000e+00, %144 %146 = fmul float %140, %145 %147 = fadd float %146, 1.500000e+00 %148 = fmul float %141, %145 %149 = fadd float %148, 1.500000e+00 %150 = bitcast float %149 to i32 %151 = bitcast float %147 to i32 %152 = bitcast float %143 to i32 %153 = insertelement <4 x i32> undef, i32 %150, i32 0 %154 = insertelement <4 x i32> %153, i32 %151, i32 1 %155 = insertelement <4 x i32> %154, i32 %152, i32 2 %156 = insertelement <4 x i32> %155, i32 undef, i32 3 %157 = bitcast <8 x i32> %47 to <32 x i8> %158 = bitcast <4 x i32> %49 to <16 x i8> %159 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %156, <32 x i8> %157, <16 x i8> %158, i32 4) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 %163 = extractelement <4 x float> %159, i32 3 %164 = call float @llvm.pow.f32(float %160, float 0x40019999A0000000) %165 = call float @llvm.pow.f32(float %161, float 0x40019999A0000000) %166 = call float @llvm.pow.f32(float %162, float 0x40019999A0000000) %167 = call float @llvm.pow.f32(float %163, float 1.000000e+00) %168 = fmul float %102, %164 %169 = fmul float %103, %165 %170 = fmul float %104, %166 %171 = fadd float %168, %168 %172 = fadd float %169, %169 %173 = fadd float %170, %170 %174 = fsub float -0.000000e+00, %85 %175 = fadd float %24, %174 %176 = fsub float -0.000000e+00, %86 %177 = fadd float %25, %176 %178 = fsub float -0.000000e+00, %87 %179 = fadd float %26, %178 %180 = fmul float %175, %175 %181 = fmul float %177, %177 %182 = fadd float %181, %180 %183 = fmul float %179, %179 %184 = fadd float %182, %183 %185 = call float @llvm.maxnum.f32(float %184, float 0x3E7AD7F2A0000000) %186 = call float @llvm.AMDGPU.rsq.clamped.f32(float %185) %187 = fmul float %186, %175 %188 = fmul float %186, %177 %189 = fmul float %186, %179 %190 = fmul float %175, %186 %191 = fadd float %190, 0x3FAB15B580000000 %192 = fmul float %177, %186 %193 = fadd float %192, 0x3FEB126EA0000000 %194 = fmul float %179, %186 %195 = fadd float %194, 0x3FE0ED9160000000 %196 = fmul float %191, %191 %197 = fmul float %193, %193 %198 = fadd float %196, %197 %199 = fmul float %195, %195 %200 = fadd float %198, %199 %201 = fmul float 0.000000e+00, 0.000000e+00 %202 = fadd float %200, %201 %203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202) %204 = fmul float %191, %203 %205 = fmul float %193, %203 %206 = fmul float %195, %203 %207 = fmul float %204, %78 %208 = fmul float %205, %79 %209 = fadd float %208, %207 %210 = fmul float %206, %80 %211 = fadd float %209, %210 %212 = call float @llvm.AMDIL.clamp.(float %211, float 0.000000e+00, float 1.000000e+00) %213 = fmul float %78, %187 %214 = fmul float %79, %188 %215 = fadd float %214, %213 %216 = fmul float %80, %189 %217 = fadd float %215, %216 %218 = fadd float %217, %217 %219 = fsub float -0.000000e+00, %187 %220 = fmul float %218, %78 %221 = fadd float %220, %219 %222 = fsub float -0.000000e+00, %188 %223 = fmul float %218, %79 %224 = fadd float %223, %222 %225 = fsub float -0.000000e+00, %189 %226 = fmul float %218, %80 %227 = fadd float %226, %225 %228 = insertelement <4 x float> undef, float %221, i32 0 %229 = insertelement <4 x float> %228, float %224, i32 1 %230 = insertelement <4 x float> %229, float %227, i32 2 %231 = insertelement <4 x float> %230, float %167, i32 3 %232 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %231) %233 = extractelement <4 x float> %232, i32 0 %234 = extractelement <4 x float> %232, i32 1 %235 = extractelement <4 x float> %232, i32 2 %236 = extractelement <4 x float> %232, i32 3 %237 = call float @fabs(float %235) %238 = fdiv float 1.000000e+00, %237 %239 = fmul float %233, %238 %240 = fadd float %239, 1.500000e+00 %241 = fmul float %234, %238 %242 = fadd float %241, 1.500000e+00 %243 = bitcast float %242 to i32 %244 = bitcast float %240 to i32 %245 = bitcast float %236 to i32 %246 = insertelement <4 x i32> undef, i32 %243, i32 0 %247 = insertelement <4 x i32> %246, i32 %244, i32 1 %248 = insertelement <4 x i32> %247, i32 %245, i32 2 %249 = insertelement <4 x i32> %248, i32 undef, i32 3 %250 = bitcast <8 x i32> %55 to <32 x i8> %251 = bitcast <4 x i32> %57 to <16 x i8> %252 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %249, <32 x i8> %250, <16 x i8> %251, i32 4) %253 = extractelement <4 x float> %252, i32 0 %254 = extractelement <4 x float> %252, i32 1 %255 = extractelement <4 x float> %252, i32 2 %256 = call float @llvm.pow.f32(float %253, float 0x40019999A0000000) %257 = call float @llvm.pow.f32(float %254, float 0x40019999A0000000) %258 = call float @llvm.pow.f32(float %255, float 0x40019999A0000000) %259 = bitcast float %81 to i32 %260 = bitcast float %82 to i32 %261 = insertelement <2 x i32> undef, i32 %259, i32 0 %262 = insertelement <2 x i32> %261, i32 %260, i32 1 %263 = bitcast <8 x i32> %43 to <32 x i8> %264 = bitcast <4 x i32> %45 to <16 x i8> %265 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %262, <32 x i8> %263, <16 x i8> %264, i32 2) %266 = extractelement <4 x float> %265, i32 0 %267 = extractelement <4 x float> %265, i32 1 %268 = extractelement <4 x float> %265, i32 2 %269 = extractelement <4 x float> %265, i32 3 %270 = call float @llvm.pow.f32(float %266, float 0x40019999A0000000) %271 = call float @llvm.pow.f32(float %267, float 0x40019999A0000000) %272 = call float @llvm.pow.f32(float %268, float 0x40019999A0000000) %273 = call float @llvm.pow.f32(float %269, float 1.000000e+00) %274 = call float @fabs(float %273) %275 = call float @llvm.pow.f32(float %274, float 0x40019999A0000000) %276 = insertelement <4 x float> undef, float %221, i32 0 %277 = insertelement <4 x float> %276, float %224, i32 1 %278 = insertelement <4 x float> %277, float %227, i32 2 %279 = insertelement <4 x float> %278, float %227, i32 3 %280 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %279) %281 = extractelement <4 x float> %280, i32 0 %282 = extractelement <4 x float> %280, i32 1 %283 = extractelement <4 x float> %280, i32 2 %284 = extractelement <4 x float> %280, i32 3 %285 = call float @fabs(float %283) %286 = fdiv float 1.000000e+00, %285 %287 = fmul float %281, %286 %288 = fadd float %287, 1.500000e+00 %289 = fmul float %282, %286 %290 = fadd float %289, 1.500000e+00 %291 = bitcast float %290 to i32 %292 = bitcast float %288 to i32 %293 = bitcast float %284 to i32 %294 = insertelement <4 x i32> undef, i32 %291, i32 0 %295 = insertelement <4 x i32> %294, i32 %292, i32 1 %296 = insertelement <4 x i32> %295, i32 %293, i32 2 %297 = insertelement <4 x i32> %296, i32 undef, i32 3 %298 = bitcast <8 x i32> %51 to <32 x i8> %299 = bitcast <4 x i32> %53 to <16 x i8> %300 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %297, <32 x i8> %298, <16 x i8> %299, i32 4) %301 = extractelement <4 x float> %300, i32 0 %302 = extractelement <4 x float> %300, i32 1 %303 = extractelement <4 x float> %300, i32 2 %304 = call float @llvm.pow.f32(float %301, float 0x40019999A0000000) %305 = call float @llvm.pow.f32(float %302, float 0x40019999A0000000) %306 = call float @llvm.pow.f32(float %303, float 0x40019999A0000000) %307 = call float @llvm.AMDGPU.lrp(float %275, float %304, float %256) %308 = call float @llvm.AMDGPU.lrp(float %275, float %305, float %257) %309 = call float @llvm.AMDGPU.lrp(float %275, float %306, float %258) %310 = fmul float %275, 1.990000e+02 %311 = fadd float %310, 1.000000e+00 %312 = call float @fabs(float %212) %313 = call float @llvm.pow.f32(float %312, float %311) %314 = fmul float %311, 0x3FB99999A0000000 %315 = fmul float %314, %313 %316 = fadd float %315, %307 %317 = fmul float %314, %313 %318 = fadd float %317, %308 %319 = fmul float %314, %313 %320 = fadd float %319, %309 %321 = fmul float %316, %270 %322 = fadd float %321, %171 %323 = fmul float %318, %271 %324 = fadd float %323, %172 %325 = fmul float %320, %272 %326 = fadd float %325, %173 %327 = call float @fabs(float %322) %328 = call float @llvm.log2.f32(float %327) %329 = call float @fabs(float %324) %330 = call float @llvm.log2.f32(float %329) %331 = call float @fabs(float %326) %332 = call float @llvm.log2.f32(float %331) %333 = fmul float %328, 0x3FDD1743E0000000 %334 = fmul float %330, 0x3FDD1743E0000000 %335 = fmul float %332, 0x3FDD1743E0000000 %336 = call float @llvm.AMDIL.exp.(float %333) %337 = call float @llvm.AMDIL.exp.(float %334) %338 = call float @llvm.AMDIL.exp.(float %335) %339 = bitcast float %336 to i32 %340 = bitcast float %337 to i32 %341 = bitcast float %338 to i32 %342 = insertelement <4 x i32> undef, i32 %339, i32 0 %343 = insertelement <4 x i32> %342, i32 %340, i32 1 %344 = insertelement <4 x i32> %343, i32 %341, i32 2 %345 = insertelement <4 x i32> %344, i32 undef, i32 3 %346 = bitcast <8 x i32> %63 to <32 x i8> %347 = bitcast <4 x i32> %65 to <16 x i8> %348 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %345, <32 x i8> %346, <16 x i8> %347, i32 3) %349 = extractelement <4 x float> %348, i32 0 %350 = extractelement <4 x float> %348, i32 1 %351 = extractelement <4 x float> %348, i32 2 %352 = fmul float %89, 1.000000e+00 %353 = fadd float %352, 0.000000e+00 %354 = fmul float %90, -1.000000e+00 %355 = fadd float %354, 1.000000e+00 %356 = bitcast float %353 to i32 %357 = bitcast float %355 to i32 %358 = insertelement <2 x i32> undef, i32 %356, i32 0 %359 = insertelement <2 x i32> %358, i32 %357, i32 1 %360 = bitcast <8 x i32> %67 to <32 x i8> %361 = bitcast <4 x i32> %69 to <16 x i8> %362 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %359, <32 x i8> %360, <16 x i8> %361, i32 2) %363 = extractelement <4 x float> %362, i32 0 %364 = extractelement <4 x float> %362, i32 2 %365 = bitcast float %83 to i32 %366 = bitcast float %84 to i32 %367 = insertelement <2 x i32> undef, i32 %365, i32 0 %368 = insertelement <2 x i32> %367, i32 %366, i32 1 %369 = bitcast <8 x i32> %71 to <32 x i8> %370 = bitcast <4 x i32> %73 to <16 x i8> %371 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %368, <32 x i8> %369, <16 x i8> %370, i32 2) %372 = extractelement <4 x float> %371, i32 0 %373 = extractelement <4 x float> %371, i32 1 %374 = extractelement <4 x float> %371, i32 2 %375 = call float @llvm.AMDGPU.lrp(float %363, float %349, float %372) %376 = call float @llvm.AMDGPU.lrp(float %363, float %350, float %373) %377 = call float @llvm.AMDGPU.lrp(float %363, float %351, float %374) %378 = fsub float -0.000000e+00, %363 %379 = fmul float %364, %378 %380 = fadd float %379, %363 %381 = bitcast float %375 to i32 %382 = bitcast float %376 to i32 %383 = bitcast float %377 to i32 %384 = insertelement <4 x i32> undef, i32 %381, i32 0 %385 = insertelement <4 x i32> %384, i32 %382, i32 1 %386 = insertelement <4 x i32> %385, i32 %383, i32 2 %387 = insertelement <4 x i32> %386, i32 undef, i32 3 %388 = bitcast <8 x i32> %75 to <32 x i8> %389 = bitcast <4 x i32> %77 to <16 x i8> %390 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %387, <32 x i8> %388, <16 x i8> %389, i32 3) %391 = extractelement <4 x float> %390, i32 0 %392 = extractelement <4 x float> %390, i32 1 %393 = extractelement <4 x float> %390, i32 2 %394 = call float @llvm.AMDGPU.lrp(float %380, float %391, float %375) %395 = call float @llvm.AMDGPU.lrp(float %380, float %392, float %376) %396 = call float @llvm.AMDGPU.lrp(float %380, float %393, float %377) %397 = fsub float -0.000000e+00, %394 %398 = fadd float %397, %30 %399 = fsub float -0.000000e+00, %395 %400 = fadd float %399, %31 %401 = fsub float -0.000000e+00, %396 %402 = fadd float %401, %32 %403 = fmul float %37, %87 %404 = fsub float -0.000000e+00, %33 %405 = fadd float %85, %404 %406 = fsub float -0.000000e+00, %34 %407 = fadd float %86, %406 %408 = fsub float -0.000000e+00, %35 %409 = fadd float %403, %408 %410 = fmul float %409, %27 %411 = fmul float %410, 0x3FF7154CA0000000 %412 = call float @llvm.AMDIL.exp.(float %411) %413 = fsub float -0.000000e+00, %412 %414 = fadd float %413, 1.000000e+00 %415 = fmul float %405, %405 %416 = fmul float %407, %407 %417 = fadd float %416, %415 %418 = fmul float %409, %409 %419 = fadd float %417, %418 %420 = fdiv float 1.000000e+00, %409 %421 = fmul float %419, %28 %422 = fmul float %414, %421 %423 = fmul float %420, %422 %424 = fmul float %423, 0x3FF7154CA0000000 %425 = call float @llvm.AMDIL.exp.(float %424) %426 = call float @llvm.AMDIL.clamp.(float %425, float 0.000000e+00, float 1.000000e+00) %427 = fsub float -0.000000e+00, %426 %428 = fadd float %427, 1.000000e+00 %429 = fmul float %88, 5.000000e-01 %430 = fadd float %429, 5.000000e-01 %431 = bitcast float %430 to i32 %432 = bitcast float %29 to i32 %433 = insertelement <2 x i32> undef, i32 %431, i32 0 %434 = insertelement <2 x i32> %433, i32 %432, i32 1 %435 = bitcast <8 x i32> %59 to <32 x i8> %436 = bitcast <4 x i32> %61 to <16 x i8> %437 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %434, <32 x i8> %435, <16 x i8> %436, i32 2) %438 = extractelement <4 x float> %437, i32 0 %439 = fmul float %428, %438 %440 = fmul float %439, %398 %441 = fadd float %440, %394 %442 = fmul float %439, %400 %443 = fadd float %442, %395 %444 = fmul float %439, %402 %445 = fadd float %444, %396 %446 = fmul float %107, 5.000000e-01 %447 = fsub float -0.000000e+00, %36 %448 = fadd float %447, %87 %449 = fcmp oge float %448, 0.000000e+00 %450 = sext i1 %449 to i32 %451 = bitcast i32 %450 to float %452 = bitcast float %451 to i32 %453 = icmp ne i32 %452, 0 %. = select i1 %453, float %107, float %446 %454 = call i32 @llvm.SI.packf16(float %441, float %443) %455 = bitcast i32 %454 to float %456 = call i32 @llvm.SI.packf16(float %445, float %.) %457 = bitcast i32 %456 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %455, float %457, float %455, float %457) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 vcc, s[6:7] ; BEEA0406 s_mov_b64 s[100:101], s[2:3] ; BEE40402 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[64:67], s[4:5], 0x4 ; C0A00504 s_load_dwordx4 s[88:91], s[4:5], 0x8 ; C0AC0508 s_load_dwordx4 s[52:55], s[4:5], 0xc ; C09A050C s_load_dwordx4 s[68:71], s[4:5], 0x10 ; C0A20510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v47, s0, 0 ; 045F0000 v_writelane_b32 v47, s1, 1 ; 045F0201 v_writelane_b32 v47, s2, 2 ; 045F0402 v_writelane_b32 v47, s3, 3 ; 045F0603 s_load_dwordx4 s[40:43], s[4:5], 0x18 ; C0940518 s_load_dwordx4 s[28:31], s[4:5], 0x1c ; C08E051C s_load_dwordx4 s[24:27], s[4:5], 0x20 ; C08C0520 s_load_dwordx4 s[20:23], s[4:5], 0x24 ; C08A0524 s_load_dwordx8 s[0:7], vcc, 0x0 ; C0C06B00 s_load_dwordx8 s[72:79], vcc, 0x8 ; C0E46B08 s_load_dwordx8 s[92:99], vcc, 0x10 ; C0EE6B10 s_load_dwordx8 s[56:63], vcc, 0x18 ; C0DC6B18 s_load_dwordx8 s[80:87], vcc, 0x20 ; C0E86B20 s_load_dwordx8 s[12:19], vcc, 0x28 ; C0C66B28 s_load_dwordx8 s[44:51], vcc, 0x30 ; C0D66B30 s_load_dwordx8 s[32:39], vcc, 0x38 ; C0D06B38 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[0:7], s[8:11] ; F0800F00 00400402 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v8, v7 ; 7E104F07 v_mul_legacy_f32_e32 v8, 1.0, v8 ; 0E1010F2 v_exp_f32_e32 v8, v8 ; 7E104B08 v_mov_b32_e32 v9, 0x7fffffff ; 7E1202FF 7FFFFFFF v_and_b32_e32 v8, v8, v9 ; 36101308 v_log_f32_e32 v8, v8 ; 7E104F08 v_mov_b32_e32 v10, 0x400ccccd ; 7E1402FF 400CCCCD v_mul_legacy_f32_e32 v8, v10, v8 ; 0E10110A v_exp_f32_e32 v8, v8 ; 7E104B08 v_mov_b32_e32 v11, 0xbdcccccd ; 7E1602FF BDCCCCCD v_add_f32_e32 v11, v8, v11 ; 06161708 v_mov_b32_e32 v15, 0 ; 7E1E0280 v_cmp_lt_f32_e64 s[0:1], v11, 0 ; D0020000 0001010B v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; D200080B 00018280 v_and_b32_e32 v11, 1.0, v11 ; 361616F2 v_xor_b32_e32 v11, 0x80000000, v11 ; 3A1616FF 80000000 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_cubeid_f32 v19, v12, v13, v14 ; D2880013 043A1B0C v_cubema_f32 v18, v12, v13, v14 ; D28E0012 043A1B0C v_cubesc_f32 v17, v12, v13, v14 ; D28A0011 043A1B0C v_cubetc_f32 v16, v12, v13, v14 ; D28C0010 043A1B0C v_rcp_f32_e64 v11, |v18| ; D354010B 00000112 v_mov_b32_e32 v24, 0x3fc00000 ; 7E3002FF 3FC00000 v_mad_f32 v18, v16, v11, v24 ; D2820012 04621710 v_mad_f32 v17, v17, v11, v24 ; D2820011 04621711 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[92:99], s[88:91] ; F0800F00 02D71011 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v11, v19 ; 7E164F13 v_mul_legacy_f32_e32 v11, 1.0, v11 ; 0E1616F2 v_exp_f32_e32 v23, v11 ; 7E2E4B0B v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_load_dwordx4 s[0:3], s[100:101], 0x0 ; C0806500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v25, s4, v11 ; 08321604 v_interp_p1_f32 v26, v0, 0, 2, [m0] ; C8680800 v_interp_p2_f32 v26, [v26], v1, 0, 2, [m0] ; C8690801 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v27, s4, v26 ; 08363404 v_mul_f32_e32 v28, v27, v27 ; 1038371B v_mad_f32 v28, v25, v25, v28 ; D282001C 04723319 v_interp_p1_f32 v29, v0, 2, 2, [m0] ; C8740A00 v_interp_p2_f32 v29, [v29], v1, 2, 2, [m0] ; C8750A01 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v30, s4, v29 ; 083C3A04 v_mad_f32 v28, v30, v30, v28 ; D282001C 04723D1E v_max_f32_e32 v28, 0x33d6bf95, v28 ; 203838FF 33D6BF95 v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mul_f32_e32 v31, v25, v28 ; 103E3919 v_mul_f32_e32 v32, v27, v28 ; 1040391B v_mul_f32_e32 v33, v32, v12 ; 10421920 v_mad_f32 v33, v13, v31, v33 ; D2820021 04863F0D v_mul_f32_e32 v34, v30, v28 ; 1044391E v_mad_f32 v33, v14, v34, v33 ; D2820021 0486450E v_add_f32_e32 v33, v33, v33 ; 06424321 v_mad_f32 v22, v33, v14, -v34 ; D2820016 848A1D21 v_mad_f32 v21, v33, v13, -v31 ; D2820015 847E1B21 v_mad_f32 v20, v33, v12, -v32 ; D2820014 84821921 v_cubeid_f32 v34, v20, v21, v22 ; D2880022 045A2B14 v_cubema_f32 v33, v20, v21, v22 ; D28E0021 045A2B14 v_cubesc_f32 v32, v20, v21, v22 ; D28A0020 045A2B14 v_cubetc_f32 v31, v20, v21, v22 ; D28C001F 045A2B14 v_rcp_f32_e64 v39, |v33| ; D3540127 00000121 v_mad_f32 v33, v31, v39, v24 ; D2820021 04624F1F v_mad_f32 v32, v32, v39, v24 ; D2820020 04624F20 image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[80:87], s[68:71] ; F0800700 02341F20 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v34, v33 ; 7E444F21 v_mul_legacy_f32_e32 v34, v10, v34 ; 0E44450A v_exp_f32_e32 v34, v34 ; 7E444B22 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[72:79], s[64:67] ; F0800F00 02122302 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v2, v38 ; 7E044F26 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_and_b32_e32 v2, v2, v9 ; 36041302 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, v10, v2 ; 0E04050A v_exp_f32_e32 v2, v2 ; 7E044B02 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v34, v34, v3 ; 10440722 v_mov_b32_e32 v23, v22 ; 7E2E0316 v_cubeid_f32 v42, v20, v21, v22 ; D288002A 045A2B14 v_cubema_f32 v41, v20, v21, v22 ; D28E0029 045A2B14 v_cubesc_f32 v40, v20, v21, v22 ; D28A0028 045A2B14 v_cubetc_f32 v39, v20, v21, v22 ; D28C0027 045A2B14 v_rcp_f32_e64 v20, |v41| ; D3540114 00000129 v_mad_f32 v41, v39, v20, v24 ; D2820029 04622927 v_mad_f32 v40, v40, v20, v24 ; D2820028 04622928 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[56:63], s[52:55] ; F0800700 01AE1428 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v23, v22 ; 7E2E4F16 v_mul_legacy_f32_e32 v23, v10, v23 ; 0E2E2F0A v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mad_f32 v23, v2, v23, v34 ; D2820017 048A2F02 v_mov_b32_e32 v24, 0x3d58adac ; 7E3002FF 3D58ADAC v_mad_f32 v24, v27, v28, v24 ; D2820018 0462391B v_mov_b32_e32 v27, 0x3f589375 ; 7E3602FF 3F589375 v_mad_f32 v25, v25, v28, v27 ; D2820019 046E3919 v_mul_f32_e32 v27, v25, v25 ; 10363319 v_mad_f32 v27, v24, v24, v27 ; D282001B 046E3118 v_mov_b32_e32 v34, 0x3f076c8b ; 7E4402FF 3F076C8B v_mad_f32 v28, v30, v28, v34 ; D282001C 048A391E v_mad_f32 v27, v28, v28, v27 ; D282001B 046E391C v_add_f32_e32 v27, 0, v27 ; 06363680 v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B v_mul_f32_e32 v25, v27, v25 ; 1032331B v_mul_f32_e32 v24, v27, v24 ; 1030311B v_mul_f32_e32 v24, v12, v24 ; 1030310C v_mad_f32 v24, v25, v13, v24 ; D2820018 04621B19 v_mul_f32_e32 v25, v27, v28 ; 1032391B v_mad_f32 v12, v25, v14, v24 ; D282000C 04621D19 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_and_b32_e32 v9, v12, v9 ; 3612130C v_log_f32_e32 v9, v9 ; 7E124F09 v_mov_b32_e32 v12, 0x43470000 ; 7E1802FF 43470000 v_mad_f32 v12, v12, v2, 1.0 ; D282000C 03CA050C v_mul_legacy_f32_e32 v9, v12, v9 ; 0E12130C v_exp_f32_e32 v9, v9 ; 7E124B09 v_mul_f32_e32 v12, 0x3dcccccd, v12 ; 101818FF 3DCCCCCD v_mad_f32 v13, v12, v9, v23 ; D282000D 045E130C v_log_f32_e32 v14, v37 ; 7E1C4F25 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_log_f32_e32 v14, v18 ; 7E1C4F12 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_log_f32_e32 v15, v6 ; 7E1E4F06 v_mul_legacy_f32_e32 v15, v10, v15 ; 0E1E1F0A v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mad_f32 v13, 2.0, v14, v13 ; D282000D 04361CF4 v_log_f32_e64 v13, |v13| ; D34E010D 0000010D v_mul_f32_e32 v13, 0x3ee8ba1f, v13 ; 101A1AFF 3EE8BA1F v_exp_f32_e32 v41, v13 ; 7E524B0D v_log_f32_e32 v13, v32 ; 7E1A4F20 v_mul_legacy_f32_e32 v13, v10, v13 ; 0E1A1B0A v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v13, v13, v3 ; 101A070D v_log_f32_e32 v14, v21 ; 7E1C4F15 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mad_f32 v13, v2, v14, v13 ; D282000D 04361D02 v_mad_f32 v13, v12, v9, v13 ; D282000D 0436130C v_log_f32_e32 v14, v36 ; 7E1C4F24 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_log_f32_e32 v14, v17 ; 7E1C4F11 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_log_f32_e32 v15, v5 ; 7E1E4F05 v_mul_legacy_f32_e32 v15, v10, v15 ; 0E1E1F0A v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mad_f32 v13, 2.0, v14, v13 ; D282000D 04361CF4 v_log_f32_e64 v13, |v13| ; D34E010D 0000010D v_mul_f32_e32 v13, 0x3ee8ba1f, v13 ; 101A1AFF 3EE8BA1F v_exp_f32_e32 v40, v13 ; 7E504B0D v_log_f32_e32 v13, v31 ; 7E1A4F1F v_mul_legacy_f32_e32 v13, v10, v13 ; 0E1A1B0A v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v3, v13, v3 ; 1006070D v_log_f32_e32 v13, v20 ; 7E1A4F14 v_mul_legacy_f32_e32 v13, v10, v13 ; 0E1A1B0A v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mad_f32 v2, v2, v13, v3 ; D2820002 040E1B02 v_mad_f32 v2, v12, v9, v2 ; D2820002 040A130C v_log_f32_e32 v3, v35 ; 7E064F23 v_mul_legacy_f32_e32 v3, v10, v3 ; 0E06070A v_exp_f32_e32 v3, v3 ; 7E064B03 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_log_f32_e32 v3, v16 ; 7E064F10 v_mul_legacy_f32_e32 v3, v10, v3 ; 0E06070A v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_legacy_f32_e32 v4, v10, v4 ; 0E08090A v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_mad_f32 v2, 2.0, v3, v2 ; D2820002 040A06F4 v_log_f32_e64 v2, |v2| ; D34E0102 00000102 v_mul_f32_e32 v2, 0x3ee8ba1f, v2 ; 100404FF 3EE8BA1F v_exp_f32_e32 v39, v2 ; 7E4E4B02 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[40:43] ; F0800700 014B0227 v_interp_p1_f32 v5, v0, 1, 4, [m0] ; C8141100 v_interp_p2_f32 v5, [v5], v1, 1, 4, [m0] ; C8151101 v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2 v_interp_p1_f32 v7, v0, 0, 4, [m0] ; C81C1000 v_interp_p2_f32 v7, [v7], v1, 0, 4, [m0] ; C81D1001 v_add_f32_e32 v5, 0, v7 ; 060A0E80 image_sample v[5:6], 5, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[32:39], s[28:31] ; F0800500 00E80505 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v7, 1.0, v5 ; 080E0AF2 v_interp_p1_f32 v10, v0, 3, 1, [m0] ; C8280700 v_interp_p2_f32 v10, [v10], v1, 3, 1, [m0] ; C8290701 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 s_load_dwordx8 s[4:11], vcc, 0x40 ; C0C26B40 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[4:11], s[24:27] ; F0800700 00C10C09 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v14, v7 ; 10120F0E v_mad_f32 v17, v5, v4, v9 ; D2820011 04260905 v_mul_f32_e32 v9, v13, v7 ; 10120F0D v_mad_f32 v16, v5, v3, v9 ; D2820010 04260705 v_mul_f32_e32 v7, v12, v7 ; 100E0F0C v_mad_f32 v15, v5, v2, v7 ; D282000F 041E0505 s_load_dwordx8 s[4:11], vcc, 0x48 ; C0C26B48 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[4:11], s[20:23] ; F0800700 00A1020F v_mad_f32 v5, -v6, v5, v5 ; D2820005 24160B06 v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2 v_mul_f32_e32 v7, v16, v6 ; 100E0D10 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v5, v3, v7 ; D2820007 041E0705 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s4, v7 ; 08120E04 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v10, s4, v11 ; 0A141604 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v11, s4, v26 ; 0A163404 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mad_f32 v10, v10, v10, v11 ; D282000A 042E150A s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v11, s4 ; 7E160204 v_mad_f32 v11, v29, s5, -v11 ; D282000B 842C0B1D v_mad_f32 v10, v11, v11, v10 ; D282000A 042A170B s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s4, v10 ; 10141404 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s4, v11 ; 10181604 v_mul_f32_e32 v12, 0x3fb8aa65, v12 ; 101818FF 3FB8AA65 v_exp_f32_e32 v12, v12 ; 7E184B0C v_sub_f32_e32 v12, 1.0, v12 ; 081818F2 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_rcp_f32_e32 v11, v11 ; 7E16550B v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mul_f32_e32 v10, 0x3fb8aa65, v10 ; 101414FF 3FB8AA65 v_exp_f32_e32 v10, v10 ; 7E144B0A v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_sub_f32_e32 v10, 1.0, v10 ; 081414F2 v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00 v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01 v_mad_f32 v0, 0.5, v11, 0.5 ; D2820000 03C216F0 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_readlane_b32 s4, v47, 0 ; 0209012F v_readlane_b32 s5, v47, 1 ; 020B032F v_readlane_b32 s6, v47, 2 ; 020D052F v_readlane_b32 s7, v47, 3 ; 020F072F s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[4:7] ; F0800100 00230000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v1, v0, v9, v7 ; D2820001 041E1300 v_mul_f32_e32 v7, v15, v6 ; 100E0D0F v_mad_f32 v7, v5, v2, v7 ; D2820007 041E0505 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s4, v7 ; 08120E04 v_mad_f32 v7, v0, v9, v7 ; D2820007 041E1300 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 v_mul_f32_e32 v6, v17, v6 ; 100C0D11 v_mad_f32 v2, v5, v4, v6 ; D2820002 041A0905 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_mul_f32_e32 v2, 0.5, v8 ; 100410F0 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s0, v29 ; 0A063A00 v_cmp_ge_f32_e64 s[0:1], v3, 0 ; D00C0000 00010103 v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; D2000803 00018280 v_cmp_ne_i32_e64 s[0:1], v3, 0 ; D10A0000 00010103 v_cndmask_b32_e64 v2, v2, v8, s[0:1] ; D2000002 10021102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..98] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 0.5000, -0.5000, 0.0000, 1.0000} 0: F2I TEMP[0].x, IN[2].xxxx 1: UARL ADDR[0].x, TEMP[0].xxxx 2: UARL ADDR[0].x, TEMP[0].xxxx 3: MOV TEMP[1], CONST[ADDR[0].x+9] 4: UARL ADDR[0].x, TEMP[0].xxxx 5: MAD TEMP[1].xyz, IN[0], CONST[ADDR[0].x+9].wwww, TEMP[1] 6: MOV TEMP[2].xyz, TEMP[1].xyzx 7: UARL ADDR[0].x, TEMP[0].xxxx 8: MOV TEMP[0].xyz, CONST[ADDR[0].x+9].xyzx 9: MUL TEMP[3], TEMP[1].yyyy, CONST[1] 10: MAD TEMP[3], TEMP[1].xxxx, CONST[0], TEMP[3] 11: MAD TEMP[3], TEMP[1].zzzz, CONST[2], TEMP[3] 12: ADD TEMP[3], TEMP[3], CONST[3] 13: RCP TEMP[4].x, TEMP[3].wwww 14: MOV TEMP[2].w, TEMP[4].xxxx 15: MUL TEMP[4].xy, TEMP[4].xxxx, TEMP[3] 16: MOV TEMP[4].xy, TEMP[4].xyxx 17: MOV TEMP[3], TEMP[3] 18: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 19: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 20: MOV TEMP[5].zw, TEMP[5].wwzw 21: MUL TEMP[6].yw, TEMP[1].yyyy, CONST[5].xxzy 22: MOV TEMP[2].yw, TEMP[6].wyww 23: MAD TEMP[6].xy, TEMP[1].xxxx, CONST[4], TEMP[2].ywzw 24: MOV TEMP[2].xy, TEMP[6].xyxx 25: MAD TEMP[1].xy, TEMP[1].zzzz, CONST[6], TEMP[2] 26: MOV TEMP[2].xy, TEMP[1].xyxx 27: ADD TEMP[1].xy, TEMP[2], CONST[7] 28: MOV TEMP[2].xy, TEMP[1].xyxx 29: MAD TEMP[1].xy, TEMP[2], IMM[0].xyzz, IMM[0].yyyy 30: MOV TEMP[1].xy, TEMP[1].xyxx 31: MOV TEMP[5].xy, IN[1].xyxx 32: MOV TEMP[0].w, IMM[0].wwww 33: MOV TEMP[1].zw, IMM[0].wwzw 34: MOV TEMP[4].zw, IMM[0].wwzw 35: MOV OUT[1], TEMP[5] 36: MOV OUT[0], TEMP[3] 37: MOV OUT[2], TEMP[0] 38: MOV OUT[3], TEMP[1] 39: MOV OUT[4], TEMP[4] 40: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %55 = load <16 x i8> addrspace(2)* %54, !tbaa !0 %56 = add i32 %5, %7 %57 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %55, i32 0, i32 %56) %58 = extractelement <4 x float> %57, i32 0 %59 = fptosi float %58 to i32 %60 = bitcast i32 %59 to float %61 = bitcast float %60 to i32 %62 = shl i32 %61, 4 %63 = add i32 %62, 144 %64 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %63) %65 = shl i32 %61, 4 %66 = add i32 %65, 148 %67 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %66) %68 = shl i32 %61, 4 %69 = add i32 %68, 152 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = bitcast float %60 to i32 %72 = shl i32 %71, 4 %73 = add i32 %72, 156 %74 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %73) %75 = fmul float %45, %74 %76 = fadd float %75, %64 %77 = shl i32 %71, 4 %78 = add i32 %77, 156 %79 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %78) %80 = fmul float %46, %79 %81 = fadd float %80, %67 %82 = shl i32 %71, 4 %83 = add i32 %82, 156 %84 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %83) %85 = fmul float %47, %84 %86 = fadd float %85, %70 %87 = bitcast float %60 to i32 %88 = shl i32 %87, 4 %89 = add i32 %88, 144 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = shl i32 %87, 4 %92 = add i32 %91, 148 %93 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %92) %94 = shl i32 %87, 4 %95 = add i32 %94, 152 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = fmul float %81, %17 %98 = fmul float %81, %18 %99 = fmul float %81, %19 %100 = fmul float %81, %20 %101 = fmul float %76, %13 %102 = fadd float %101, %97 %103 = fmul float %76, %14 %104 = fadd float %103, %98 %105 = fmul float %76, %15 %106 = fadd float %105, %99 %107 = fmul float %76, %16 %108 = fadd float %107, %100 %109 = fmul float %86, %21 %110 = fadd float %109, %102 %111 = fmul float %86, %22 %112 = fadd float %111, %104 %113 = fmul float %86, %23 %114 = fadd float %113, %106 %115 = fmul float %86, %24 %116 = fadd float %115, %108 %117 = fadd float %110, %25 %118 = fadd float %112, %26 %119 = fadd float %114, %27 %120 = fadd float %116, %28 %121 = fdiv float 1.000000e+00, %120 %122 = fmul float %121, %117 %123 = fmul float %121, %118 %124 = fadd float %76, %39 %125 = fadd float %81, %40 %126 = fmul float %124, %37 %127 = fmul float %125, %38 %128 = fmul float %81, %31 %129 = fmul float %81, %32 %130 = fmul float %76, %29 %131 = fadd float %130, %128 %132 = fmul float %76, %30 %133 = fadd float %132, %129 %134 = fmul float %86, %33 %135 = fadd float %134, %131 %136 = fmul float %86, %34 %137 = fadd float %136, %133 %138 = fadd float %135, %35 %139 = fadd float %137, %36 %140 = fmul float %138, 5.000000e-01 %141 = fadd float %140, -5.000000e-01 %142 = fmul float %139, -5.000000e-01 %143 = fadd float %142, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %52, float %53, float %126, float %127) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %90, float %93, float %96, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %141, float %143, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %122, float %123, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %117, float %118, float %119, float %120) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[12:15], s[8:9], 0x4 ; C0860904 s_load_dwordx4 s[8:11], s[8:9], 0x8 ; C0840908 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_add_i32_e32 v2, 0x94, v1 ; 4A0402FF 00000094 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 v_add_i32_e32 v3, 0x9c, v1 ; 4A0602FF 0000009C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v5, v3, v2 ; D2820008 040A0705 s_buffer_load_dword s4, s[0:3], 0x23 ; C2020123 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v9, s4, v8 ; 06121004 s_buffer_load_dword s4, s[0:3], 0x21 ; C2020121 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v9, s4, v9 ; 10121204 v_add_i32_e32 v10, 0x90, v1 ; 4A1402FF 00000090 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v11, v4, v3, v10 ; D282000B 042A0704 s_buffer_load_dword s4, s[0:3], 0x22 ; C2020122 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v12, s4, v11 ; 06181604 s_buffer_load_dword s4, s[0:3], 0x20 ; C2020120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s4, v12 ; 10181804 buffer_load_format_xyzw v[13:16], v0, s[12:15], 0 idxen ; E00C2000 80030D00 s_waitcnt vmcnt(0) ; BF8C0770 exp 15, 32, 0, 0, 0, v13, v14, v12, v9 ; F800020F 090C0E0D v_add_i32_e32 v0, 0x98, v1 ; 4A0002FF 00000098 buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 v_mov_b32_e32 v1, 1.0 ; 7E0202F2 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 exp 15, 33, 0, 0, 0, v10, v2, v0, v1 ; F800021F 0100020A s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v8 ; 10041004 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v11, s4, v2 ; D2820002 0408090B v_mad_f32 v0, v6, v3, v0 ; D2820000 04020706 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0x1c ; C202011C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 v_mad_f32 v2, 0.5, v2, -0.5 ; D2820002 03C604F0 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v8 ; 10061004 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v11, s4, v3 ; D2820003 040C090B s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_mad_f32 v3, -0.5, v3, -0.5 ; D2820003 03C606F1 v_mov_b32_e32 v4, 0 ; 7E080280 exp 15, 34, 0, 0, 0, v2, v3, v4, v1 ; F800022F 01040302 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v8 ; 10041004 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v11, s4, v2 ; D2820002 0408090B s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v0, s4, v2 ; D2820002 04080900 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v3, s4, v8 ; 10061004 s_buffer_load_dword s4, s[0:3], 0x3 ; C2020103 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v11, s4, v3 ; D2820003 040C090B s_buffer_load_dword s4, s[0:3], 0xb ; C202010B s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v0, s4, v3 ; D2820003 040C0900 s_buffer_load_dword s4, s[0:3], 0xf ; C202010F s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v3, s4, v3 ; 06060604 v_rcp_f32_e32 v5, v3 ; 7E0A5503 v_mul_f32_e32 v6, v2, v5 ; 100C0B02 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v7, s4, v8 ; 100E1004 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v11, s4, v7 ; D2820007 041C090B s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v7, v0, s4, v7 ; D2820007 041C0900 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 exp 15, 35, 0, 0, 0, v5, v6, v4, v1 ; F800023F 01040605 s_buffer_load_dword s4, s[0:3], 0x6 ; C2020106 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v1, s4, v8 ; 10021004 s_buffer_load_dword s4, s[0:3], 0x2 ; C2020102 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v1, v11, s4, v1 ; D2820001 0404090B s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v0, s4, v1 ; D2820000 04040900 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s0, v0 ; 06000000 exp 15, 12, 0, 1, 0, v7, v2, v0, v3 ; F80008CF 03000207 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], POSITION, LINEAR DCL IN[1], GENERIC[9], PERSPECTIVE DCL IN[2], GENERIC[10], PERSPECTIVE DCL IN[3], GENERIC[11], PERSPECTIVE DCL IN[4], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL CONST[14] DCL CONST[0..5] DCL TEMP[0] DCL TEMP[1..10], LOCAL IMM[0] FLT32 { -0.1000, 0.0000, -1.0000, 1.0000} IMM[1] FLT32 { 0.0010, -0.1471, -0.2889, 0.4360} IMM[2] FLT32 { 0.6150, -0.5150, -0.1000, 0.5000} IMM[3] FLT32 { 1.0000, 1.1398, -0.3947, -0.5806} IMM[4] FLT32 { 1.0000, 2.0321, 1.4427, 0.0000} 0: MOV TEMP[0], IN[0] 1: MAD TEMP[0].y, IN[0], CONST[14].xxxx, CONST[14].yyyy 2: MAD TEMP[1].xyz, TEMP[0], CONST[5].xyxx, CONST[5].zwzz 3: MOV TEMP[2].xy, IN[1].xyyy 4: TEX TEMP[2], TEMP[2], SAMP[0], 2D 5: MOV TEMP[3].xw, TEMP[2] 6: ADD TEMP[4].x, TEMP[2].wwww, IMM[0].xxxx 7: FSGE TEMP[5].x, TEMP[4].xxxx, IMM[0].yyyy 8: UIF TEMP[5].xxxx :2 9: MOV TEMP[5].x, IMM[0].yyyy 10: ELSE :2 11: MOV TEMP[5].x, IMM[0].zzzz 12: ENDIF 13: MOV TEMP[5].x, TEMP[5].xxxx 14: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 15: UIF TEMP[6].xxxx :2 16: MOV TEMP[6].x, IMM[0].yyyy 17: ELSE :2 18: MOV TEMP[6].x, IMM[0].zzzz 19: ENDIF 20: MOV TEMP[5].y, TEMP[6].xxxx 21: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 22: UIF TEMP[6].xxxx :2 23: MOV TEMP[6].x, IMM[0].yyyy 24: ELSE :2 25: MOV TEMP[6].x, IMM[0].zzzz 26: ENDIF 27: MOV TEMP[5].z, TEMP[6].xxxx 28: FSGE TEMP[6].x, TEMP[4].xxxx, IMM[0].yyyy 29: UIF TEMP[6].xxxx :2 30: ELSE :2 31: ENDIF 32: FSLT TEMP[5].xyz, TEMP[5].xyzz, IMM[0].yyyy 33: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 34: OR TEMP[5].x, TEMP[6].xxxx, TEMP[5].yyyy 35: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].wwww 36: KILL_IF -TEMP[5].xxxx 37: MAD TEMP[5].xy, IN[3], IMM[0].wzww, IMM[0].ywyy 38: MOV TEMP[5].xy, TEMP[5].xyyy 39: TEX TEMP[5], TEMP[5], SAMP[5], 2D 40: MOV TEMP[4].z, TEMP[5] 41: ABS TEMP[6].x, TEMP[5] 42: MOV TEMP[6], -TEMP[6].xxxx 43: FSGE TEMP[7].x, TEMP[6].xxxx, IMM[0].yyyy 44: UIF TEMP[7].xxxx :2 45: MOV TEMP[7].x, IMM[0].zzzz 46: ELSE :2 47: MOV TEMP[7].x, IMM[0].yyyy 48: ENDIF 49: MOV TEMP[7].x, TEMP[7].xxxx 50: FSGE TEMP[8].x, TEMP[6].yyyy, IMM[0].yyyy 51: UIF TEMP[8].xxxx :2 52: MOV TEMP[8].x, IMM[0].zzzz 53: ELSE :2 54: MOV TEMP[8].x, IMM[0].yyyy 55: ENDIF 56: MOV TEMP[7].y, TEMP[8].xxxx 57: FSGE TEMP[8].x, TEMP[6].zzzz, IMM[0].yyyy 58: UIF TEMP[8].xxxx :2 59: MOV TEMP[8].x, IMM[0].zzzz 60: ELSE :2 61: MOV TEMP[8].x, IMM[0].yyyy 62: ENDIF 63: MOV TEMP[7].z, TEMP[8].xxxx 64: FSGE TEMP[6].x, TEMP[6].wwww, IMM[0].yyyy 65: UIF TEMP[6].xxxx :2 66: MOV TEMP[6].x, IMM[0].zzzz 67: ELSE :2 68: MOV TEMP[6].x, IMM[0].yyyy 69: ENDIF 70: MOV TEMP[7].w, TEMP[6].xxxx 71: MOV TEMP[6].z, TEMP[7] 72: FSLT TEMP[7].xyz, TEMP[7].xyzz, IMM[0].yyyy 73: OR TEMP[8].x, TEMP[7].xxxx, TEMP[7].zzzz 74: OR TEMP[7].x, TEMP[8].xxxx, TEMP[7].yyyy 75: AND TEMP[7].x, TEMP[7].xxxx, IMM[0].wwww 76: KILL_IF -TEMP[7].xxxx 77: RCP TEMP[6].x, CONST[0].xxxx 78: RCP TEMP[7].x, CONST[0].yyyy 79: MOV TEMP[6].y, TEMP[7].xxxx 80: MUL TEMP[1].yw, TEMP[6].xxzy, TEMP[1].xxzy 81: MOV TEMP[4].yw, TEMP[1].wyww 82: MUL TEMP[1].xy, IMM[1].xxxx, IN[2] 83: MOV TEMP[1].xy, TEMP[1].xyyy 84: TEX TEMP[1], TEMP[1], SAMP[1], 2D 85: MOV TEMP[6].xw, TEMP[1].xxxw 86: MAD TEMP[7].xy, TEMP[4].ywzw, IMM[0].wzww, IMM[0].ywyy 87: MOV TEMP[7].xy, TEMP[7].xyyy 88: TEX TEMP[7], TEMP[7], SAMP[2], 2D 89: MOV TEMP[8].w, TEMP[7].xyxw 90: DP3 TEMP[9].x, IMM[1].yzww, TEMP[1].xyzz 91: MOV TEMP[9].y, TEMP[9].xxxx 92: DP3 TEMP[10].x, IMM[2].xyzz, TEMP[1].xyzz 93: MOV TEMP[9].z, TEMP[10].xxxx 94: DP3 TEMP[10].x, IMM[1].yzww, TEMP[2].xyzz 95: MOV TEMP[6].y, TEMP[10].xxxx 96: DP3 TEMP[10].x, IMM[2].xyzz, TEMP[2].xyzz 97: MOV TEMP[6].z, TEMP[10].xxxx 98: LRP TEMP[1].yz, TEMP[1].wwww, TEMP[9], TEMP[6] 99: MOV TEMP[3].yz, TEMP[1].zyzz 100: DP2 TEMP[6].x, IMM[3].xyyy, TEMP[3].xzzz 101: DP3 TEMP[1].x, IMM[3].xzww, TEMP[3].xyzz 102: MOV TEMP[6].y, TEMP[1].xxxx 103: DP2 TEMP[1].x, IMM[4].xyyy, TEMP[3].xyyy 104: MOV TEMP[6].z, TEMP[1].xxxx 105: MUL TEMP[1].xyz, TEMP[7], CONST[1].xxxx 106: MOV TEMP[3].xyz, TEMP[1].xyzx 107: MAD TEMP[1].y, TEMP[7].wwww, -CONST[1].xxxx, -IMM[0].zzzz 108: MOV TEMP[7].xyz, TEMP[6].xyzz 109: TEX TEMP[7], TEMP[7], SAMP[4], 3D 110: MAD TEMP[1].xyz, TEMP[7], TEMP[1].yyyy, TEMP[3] 111: MOV TEMP[3].xyz, TEMP[1].xyzx 112: MOV TEMP[1].xy, IN[1].zwww 113: TEX TEMP[1], TEMP[1], SAMP[6], 2D 114: LRP TEMP[1].xyz, TEMP[5].xxxx, TEMP[3], TEMP[1] 115: MOV TEMP[8].xyz, TEMP[1].xyzx 116: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 117: MOV TEMP[1].xyz, TEMP[1].xyzz 118: TEX TEMP[1], TEMP[1], SAMP[7], 3D 119: LRP TEMP[1].xyz, TEMP[5].xxxx, TEMP[1], TEMP[8] 120: MOV TEMP[4].xyz, TEMP[1].xyzx 121: MAD TEMP[1].x, IN[4].yyyy, IMM[2].wwww, IMM[2].wwww 122: MOV TEMP[3].x, TEMP[1].xxxx 123: MOV TEMP[3].y, CONST[4].wwww 124: MOV TEMP[1].xy, TEMP[3].xyyy 125: TEX TEMP[1].xw, TEMP[1], SAMP[3], 2D 126: MOV TEMP[6].w, TEMP[1].wwww 127: ADD TEMP[5].xyz, -CONST[2], IN[2] 128: MOV TEMP[3].z, TEMP[5].xyzx 129: DP3 TEMP[3].x, TEMP[5].xyzz, TEMP[5].xyzz 130: MUL TEMP[7].xy, TEMP[3].xzzw, CONST[4].yxzw 131: MUL TEMP[8].y, TEMP[7].yyyy, IMM[4].zzzz 132: EX2 TEMP[8].x, TEMP[8].yyyy 133: ADD TEMP[8].y, -TEMP[8].xxxx, IMM[0].wwww 134: MUL TEMP[7].x, TEMP[8].yyyy, TEMP[7].xxxx 135: RCP TEMP[5].x, TEMP[5].zzzz 136: MUL TEMP[5].x, TEMP[5].xxxx, TEMP[7].xxxx 137: MUL TEMP[5].x, TEMP[5].xxxx, IMM[4].zzzz 138: MOV TEMP[3].x, TEMP[5].xxxx 139: EX2 TEMP[5].x, TEMP[5].xxxx 140: MOV_SAT TEMP[3].x, TEMP[5].xxxx 141: ADD TEMP[3].x, -TEMP[3].xxxx, IMM[0].wwww 142: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 143: ADD TEMP[3].xyz, -TEMP[4], CONST[3] 144: MOV TEMP[6].xyz, TEMP[3].xyzx 145: MAD TEMP[1].xyz, TEMP[1].xxxx, TEMP[6], TEMP[4] 146: MOV TEMP[1].xyz, TEMP[1].xyzx 147: MOV TEMP[1].w, TEMP[2].wwww 148: MOV OUT[0], TEMP[1] 149: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 4) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 92) %40 = call float @llvm.SI.load.const(<16 x i8> %23, i32 224) %41 = call float @llvm.SI.load.const(<16 x i8> %23, i32 228) %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %77 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %78 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %84 = fmul float %15, %40 %85 = fadd float %84, %41 %86 = fmul float %14, %36 %87 = fadd float %86, %38 %88 = fmul float %85, %37 %89 = fadd float %88, %39 %90 = bitcast float %74 to i32 %91 = bitcast float %75 to i32 %92 = insertelement <2 x i32> undef, i32 %90, i32 0 %93 = insertelement <2 x i32> %92, i32 %91, i32 1 %94 = bitcast <8 x i32> %43 to <32 x i8> %95 = bitcast <4 x i32> %45 to <16 x i8> %96 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %93, <32 x i8> %94, <16 x i8> %95, i32 2) %97 = extractelement <4 x float> %96, i32 0 %98 = extractelement <4 x float> %96, i32 1 %99 = extractelement <4 x float> %96, i32 2 %100 = extractelement <4 x float> %96, i32 3 %101 = fadd float %100, 0xBFB99999A0000000 %102 = fcmp oge float %101, 0.000000e+00 %103 = sext i1 %102 to i32 %104 = bitcast i32 %103 to float %105 = bitcast float %104 to i32 %106 = icmp ne i32 %105, 0 %. = select i1 %106, float 0.000000e+00, float -1.000000e+00 %107 = fcmp oge float %101, 0.000000e+00 %108 = sext i1 %107 to i32 %109 = bitcast i32 %108 to float %110 = bitcast float %109 to i32 %111 = icmp ne i32 %110, 0 %temp24.0 = select i1 %111, float 0.000000e+00, float -1.000000e+00 %112 = fcmp oge float %101, 0.000000e+00 %113 = sext i1 %112 to i32 %114 = bitcast i32 %113 to float %115 = bitcast float %114 to i32 %116 = icmp ne i32 %115, 0 %.65 = select i1 %116, float 0.000000e+00, float -1.000000e+00 %117 = fcmp oge float %101, 0.000000e+00 %118 = sext i1 %117 to i32 %119 = bitcast i32 %118 to float %120 = bitcast float %119 to i32 %121 = icmp ne i32 %120, 0 %122 = fcmp olt float %., 0.000000e+00 %123 = sext i1 %122 to i32 %124 = fcmp olt float %temp24.0, 0.000000e+00 %125 = sext i1 %124 to i32 %126 = fcmp olt float %.65, 0.000000e+00 %127 = sext i1 %126 to i32 %128 = bitcast i32 %123 to float %129 = bitcast i32 %125 to float %130 = bitcast i32 %127 to float %131 = bitcast float %128 to i32 %132 = bitcast float %130 to i32 %133 = or i32 %131, %132 %134 = bitcast i32 %133 to float %135 = bitcast float %134 to i32 %136 = bitcast float %129 to i32 %137 = or i32 %135, %136 %138 = bitcast i32 %137 to float %139 = bitcast float %138 to i32 %140 = and i32 %139, 1065353216 %141 = bitcast i32 %140 to float %142 = fsub float -0.000000e+00, %141 %143 = fsub float -0.000000e+00, %141 %144 = fsub float -0.000000e+00, %141 %145 = fsub float -0.000000e+00, %141 call void @llvm.AMDGPU.kill(float %142) call void @llvm.AMDGPU.kill(float %143) call void @llvm.AMDGPU.kill(float %144) call void @llvm.AMDGPU.kill(float %145) %146 = fmul float %81, 1.000000e+00 %147 = fadd float %146, 0.000000e+00 %148 = fmul float %82, -1.000000e+00 %149 = fadd float %148, 1.000000e+00 %150 = bitcast float %147 to i32 %151 = bitcast float %149 to i32 %152 = insertelement <2 x i32> undef, i32 %150, i32 0 %153 = insertelement <2 x i32> %152, i32 %151, i32 1 %154 = bitcast <8 x i32> %63 to <32 x i8> %155 = bitcast <4 x i32> %65 to <16 x i8> %156 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %153, <32 x i8> %154, <16 x i8> %155, i32 2) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 2 %159 = call float @fabs(float %157) %160 = fsub float -0.000000e+00, %159 %161 = fsub float -0.000000e+00, %159 %162 = fsub float -0.000000e+00, %159 %163 = fcmp oge float %160, 0.000000e+00 %164 = sext i1 %163 to i32 %165 = bitcast i32 %164 to float %166 = bitcast float %165 to i32 %167 = icmp ne i32 %166, 0 %temp28.0 = select i1 %167, float -1.000000e+00, float 0.000000e+00 %168 = fcmp oge float %161, 0.000000e+00 %169 = sext i1 %168 to i32 %170 = bitcast i32 %169 to float %171 = bitcast float %170 to i32 %172 = icmp ne i32 %171, 0 %.66 = select i1 %172, float -1.000000e+00, float 0.000000e+00 %173 = fcmp oge float %162, 0.000000e+00 %174 = sext i1 %173 to i32 %175 = bitcast i32 %174 to float %176 = bitcast float %175 to i32 %177 = icmp ne i32 %176, 0 %temp32.1 = select i1 %177, float -1.000000e+00, float 0.000000e+00 %178 = fcmp olt float %temp28.0, 0.000000e+00 %179 = sext i1 %178 to i32 %180 = fcmp olt float %.66, 0.000000e+00 %181 = sext i1 %180 to i32 %182 = fcmp olt float %temp32.1, 0.000000e+00 %183 = sext i1 %182 to i32 %184 = bitcast i32 %179 to float %185 = bitcast i32 %181 to float %186 = bitcast i32 %183 to float %187 = bitcast float %184 to i32 %188 = bitcast float %186 to i32 %189 = or i32 %187, %188 %190 = bitcast i32 %189 to float %191 = bitcast float %190 to i32 %192 = bitcast float %185 to i32 %193 = or i32 %191, %192 %194 = bitcast i32 %193 to float %195 = bitcast float %194 to i32 %196 = and i32 %195, 1065353216 %197 = bitcast i32 %196 to float %198 = fsub float -0.000000e+00, %197 %199 = fsub float -0.000000e+00, %197 %200 = fsub float -0.000000e+00, %197 %201 = fsub float -0.000000e+00, %197 call void @llvm.AMDGPU.kill(float %198) call void @llvm.AMDGPU.kill(float %199) call void @llvm.AMDGPU.kill(float %200) call void @llvm.AMDGPU.kill(float %201) %202 = fdiv float 1.000000e+00, %24 %203 = fdiv float 1.000000e+00, %25 %204 = fmul float %202, %87 %205 = fmul float %203, %89 %206 = fmul float 0x3F50624DE0000000, %78 %207 = fmul float 0x3F50624DE0000000, %79 %208 = bitcast float %206 to i32 %209 = bitcast float %207 to i32 %210 = insertelement <2 x i32> undef, i32 %208, i32 0 %211 = insertelement <2 x i32> %210, i32 %209, i32 1 %212 = bitcast <8 x i32> %47 to <32 x i8> %213 = bitcast <4 x i32> %49 to <16 x i8> %214 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %211, <32 x i8> %212, <16 x i8> %213, i32 2) %215 = extractelement <4 x float> %214, i32 0 %216 = extractelement <4 x float> %214, i32 1 %217 = extractelement <4 x float> %214, i32 2 %218 = extractelement <4 x float> %214, i32 3 %219 = fmul float %204, 1.000000e+00 %220 = fadd float %219, 0.000000e+00 %221 = fmul float %205, -1.000000e+00 %222 = fadd float %221, 1.000000e+00 %223 = bitcast float %220 to i32 %224 = bitcast float %222 to i32 %225 = insertelement <2 x i32> undef, i32 %223, i32 0 %226 = insertelement <2 x i32> %225, i32 %224, i32 1 %227 = bitcast <8 x i32> %51 to <32 x i8> %228 = bitcast <4 x i32> %53 to <16 x i8> %229 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %226, <32 x i8> %227, <16 x i8> %228, i32 2) %230 = extractelement <4 x float> %229, i32 0 %231 = extractelement <4 x float> %229, i32 1 %232 = extractelement <4 x float> %229, i32 2 %233 = extractelement <4 x float> %229, i32 3 %234 = fmul float 0xBFC2D527E0000000, %215 %235 = fmul float 0xBFD27CAEA0000000, %216 %236 = fadd float %235, %234 %237 = fmul float 0x3FDBE76C80000000, %217 %238 = fadd float %236, %237 %239 = fmul float 0x3FE3AE1480000000, %215 %240 = fmul float 0xBFE07ACC40000000, %216 %241 = fadd float %240, %239 %242 = fmul float 0xBFB99A4160000000, %217 %243 = fadd float %241, %242 %244 = fmul float 0xBFC2D527E0000000, %97 %245 = fmul float 0xBFD27CAEA0000000, %98 %246 = fadd float %245, %244 %247 = fmul float 0x3FDBE76C80000000, %99 %248 = fadd float %246, %247 %249 = fmul float 0x3FE3AE1480000000, %97 %250 = fmul float 0xBFE07ACC40000000, %98 %251 = fadd float %250, %249 %252 = fmul float 0xBFB99A4160000000, %99 %253 = fadd float %251, %252 %254 = call float @llvm.AMDGPU.lrp(float %218, float %238, float %248) %255 = call float @llvm.AMDGPU.lrp(float %218, float %243, float %253) %256 = fmul float 1.000000e+00, %97 %257 = fmul float 0x3FF23CBE60000000, %255 %258 = fadd float %256, %257 %259 = fmul float 1.000000e+00, %97 %260 = fmul float 0xBFD941F220000000, %254 %261 = fadd float %260, %259 %262 = fmul float 0xBFE2944680000000, %255 %263 = fadd float %261, %262 %264 = fmul float 1.000000e+00, %97 %265 = fmul float 0x400041C2E0000000, %254 %266 = fadd float %264, %265 %267 = fmul float %230, %26 %268 = fmul float %231, %26 %269 = fmul float %232, %26 %270 = fsub float -0.000000e+00, %26 %271 = fmul float %233, %270 %272 = fadd float %271, 1.000000e+00 %273 = bitcast float %258 to i32 %274 = bitcast float %263 to i32 %275 = bitcast float %266 to i32 %276 = insertelement <4 x i32> undef, i32 %273, i32 0 %277 = insertelement <4 x i32> %276, i32 %274, i32 1 %278 = insertelement <4 x i32> %277, i32 %275, i32 2 %279 = insertelement <4 x i32> %278, i32 undef, i32 3 %280 = bitcast <8 x i32> %59 to <32 x i8> %281 = bitcast <4 x i32> %61 to <16 x i8> %282 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %279, <32 x i8> %280, <16 x i8> %281, i32 3) %283 = extractelement <4 x float> %282, i32 0 %284 = extractelement <4 x float> %282, i32 1 %285 = extractelement <4 x float> %282, i32 2 %286 = fmul float %283, %272 %287 = fadd float %286, %267 %288 = fmul float %284, %272 %289 = fadd float %288, %268 %290 = fmul float %285, %272 %291 = fadd float %290, %269 %292 = bitcast float %76 to i32 %293 = bitcast float %77 to i32 %294 = insertelement <2 x i32> undef, i32 %292, i32 0 %295 = insertelement <2 x i32> %294, i32 %293, i32 1 %296 = bitcast <8 x i32> %67 to <32 x i8> %297 = bitcast <4 x i32> %69 to <16 x i8> %298 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %295, <32 x i8> %296, <16 x i8> %297, i32 2) %299 = extractelement <4 x float> %298, i32 0 %300 = extractelement <4 x float> %298, i32 1 %301 = extractelement <4 x float> %298, i32 2 %302 = call float @llvm.AMDGPU.lrp(float %157, float %287, float %299) %303 = call float @llvm.AMDGPU.lrp(float %157, float %289, float %300) %304 = call float @llvm.AMDGPU.lrp(float %157, float %291, float %301) %305 = fsub float -0.000000e+00, %157 %306 = fmul float %158, %305 %307 = fadd float %306, %157 %308 = bitcast float %302 to i32 %309 = bitcast float %303 to i32 %310 = bitcast float %304 to i32 %311 = insertelement <4 x i32> undef, i32 %308, i32 0 %312 = insertelement <4 x i32> %311, i32 %309, i32 1 %313 = insertelement <4 x i32> %312, i32 %310, i32 2 %314 = insertelement <4 x i32> %313, i32 undef, i32 3 %315 = bitcast <8 x i32> %71 to <32 x i8> %316 = bitcast <4 x i32> %73 to <16 x i8> %317 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %314, <32 x i8> %315, <16 x i8> %316, i32 3) %318 = extractelement <4 x float> %317, i32 0 %319 = extractelement <4 x float> %317, i32 1 %320 = extractelement <4 x float> %317, i32 2 %321 = call float @llvm.AMDGPU.lrp(float %307, float %318, float %302) %322 = call float @llvm.AMDGPU.lrp(float %307, float %319, float %303) %323 = call float @llvm.AMDGPU.lrp(float %307, float %320, float %304) %324 = fmul float %83, 5.000000e-01 %325 = fadd float %324, 5.000000e-01 %326 = bitcast float %325 to i32 %327 = bitcast float %35 to i32 %328 = insertelement <2 x i32> undef, i32 %326, i32 0 %329 = insertelement <2 x i32> %328, i32 %327, i32 1 %330 = bitcast <8 x i32> %55 to <32 x i8> %331 = bitcast <4 x i32> %57 to <16 x i8> %332 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %329, <32 x i8> %330, <16 x i8> %331, i32 2) %333 = extractelement <4 x float> %332, i32 0 %334 = fsub float -0.000000e+00, %27 %335 = fadd float %334, %78 %336 = fsub float -0.000000e+00, %28 %337 = fadd float %336, %79 %338 = fsub float -0.000000e+00, %29 %339 = fadd float %338, %80 %340 = fmul float %335, %335 %341 = fmul float %337, %337 %342 = fadd float %341, %340 %343 = fmul float %339, %339 %344 = fadd float %342, %343 %345 = fmul float %344, %34 %346 = fmul float %339, %33 %347 = fmul float %346, 0x3FF7154CA0000000 %348 = call float @llvm.AMDIL.exp.(float %347) %349 = fsub float -0.000000e+00, %348 %350 = fadd float %349, 1.000000e+00 %351 = fmul float %350, %345 %352 = fdiv float 1.000000e+00, %339 %353 = fmul float %352, %351 %354 = fmul float %353, 0x3FF7154CA0000000 %355 = call float @llvm.AMDIL.exp.(float %354) %356 = call float @llvm.AMDIL.clamp.(float %355, float 0.000000e+00, float 1.000000e+00) %357 = fsub float -0.000000e+00, %356 %358 = fadd float %357, 1.000000e+00 %359 = fmul float %358, %333 %360 = fsub float -0.000000e+00, %321 %361 = fadd float %360, %30 %362 = fsub float -0.000000e+00, %322 %363 = fadd float %362, %31 %364 = fsub float -0.000000e+00, %323 %365 = fadd float %364, %32 %366 = fmul float %359, %361 %367 = fadd float %366, %321 %368 = fmul float %359, %363 %369 = fadd float %368, %322 %370 = fmul float %359, %365 %371 = fadd float %370, %323 %372 = call i32 @llvm.SI.packf16(float %367, float %369) %373 = bitcast i32 %372 to float %374 = call i32 @llvm.SI.packf16(float %371, float %100) %375 = bitcast i32 %374 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %373, float %375, float %373, float %375) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readonly declare float @fabs(float) #2 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #3 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #3 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #3 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readonly } attributes #3 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v5, v0, 1, 0, [m0] ; C8140100 v_interp_p2_f32 v5, [v5], v1, 1, 0, [m0] ; C8150101 v_interp_p1_f32 v4, v0, 0, 0, [m0] ; C8100000 v_interp_p2_f32 v4, [v4], v1, 0, 0, [m0] ; C8110001 s_load_dwordx4 s[80:83], s[4:5], 0x0 ; C0A80500 s_load_dwordx4 s[68:71], s[4:5], 0x4 ; C0A20504 s_load_dwordx4 s[44:47], s[4:5], 0x8 ; C0960508 s_load_dwordx4 s[16:19], s[4:5], 0xc ; C088050C s_load_dwordx4 s[56:59], s[4:5], 0x10 ; C09C0510 s_load_dwordx4 s[84:87], s[4:5], 0x14 ; C0AA0514 s_load_dwordx4 s[32:35], s[4:5], 0x18 ; C0900518 s_load_dwordx4 s[20:23], s[4:5], 0x1c ; C08A051C s_load_dwordx8 s[88:95], s[6:7], 0x0 ; C0EC0700 s_load_dwordx8 s[72:79], s[6:7], 0x8 ; C0E40708 s_load_dwordx8 s[48:55], s[6:7], 0x10 ; C0D80710 s_load_dwordx8 s[8:15], s[6:7], 0x18 ; C0C40718 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v28, s8, 0 ; 04390008 v_writelane_b32 v28, s9, 1 ; 04390209 v_writelane_b32 v28, s10, 2 ; 0439040A v_writelane_b32 v28, s11, 3 ; 0439060B v_writelane_b32 v28, s12, 4 ; 0439080C v_writelane_b32 v28, s13, 5 ; 04390A0D v_writelane_b32 v28, s14, 6 ; 04390C0E v_writelane_b32 v28, s15, 7 ; 04390E0F s_load_dwordx8 s[60:67], s[6:7], 0x20 ; C0DE0720 s_load_dwordx8 s[8:15], s[6:7], 0x28 ; C0C40728 s_load_dwordx8 s[36:43], s[6:7], 0x30 ; C0D20730 s_load_dwordx8 s[24:31], s[6:7], 0x38 ; C0CC0738 image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[4:5], s[88:95], s[80:83] ; F0800F00 02960404 v_mov_b32_e32 v8, 0xbdcccccd ; 7E1002FF BDCCCCCD s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_add_f32_e32 v8, v7, v8 ; 06101107 v_cmp_ge_f32_e64 s[0:1], v8, 0 ; D00C0000 00010108 v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; D2000008 00018280 v_cmp_ne_i32_e64 s[0:1], v8, 0 ; D10A0000 00010108 v_cndmask_b32_e64 v8, -1.0, 0, s[0:1] ; D2000008 180100F3 v_cmp_lt_f32_e64 s[0:1], v8, 0 ; D0020000 00010108 v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; D2000008 00018280 v_and_b32_e32 v8, 1.0, v8 ; 361010F2 v_mov_b32_e32 v9, 0x80000000 ; 7E1202FF 80000000 v_xor_b32_e32 v8, v8, v9 ; 3A101308 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_interp_p1_f32 v8, v0, 1, 2, [m0] ; C8200900 v_interp_p2_f32 v8, [v8], v1, 1, 2, [m0] ; C8210901 v_sub_f32_e32 v11, 1.0, v8 ; 081610F2 v_interp_p1_f32 v8, v0, 0, 2, [m0] ; C8200800 v_interp_p2_f32 v8, [v8], v1, 0, 2, [m0] ; C8210801 v_add_f32_e32 v10, 0, v8 ; 06141080 image_sample v[10:11], 5, 0, 0, 0, 0, 0, 0, 0, v[10:11], s[8:15], s[84:87] ; F0800500 02A20A0A s_waitcnt vmcnt(0) ; BF8C0770 v_cmp_ge_f32_e64 s[0:1], -|v10|, 0 ; D00C0100 2001010A v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; D2000008 00018280 v_cmp_ne_i32_e64 s[0:1], v8, 0 ; D10A0000 00010108 v_cndmask_b32_e64 v8, 0, -1.0, s[0:1] ; D2000008 0001E680 v_cmp_lt_f32_e64 s[0:1], v8, 0 ; D0020000 00010108 v_cndmask_b32_e64 v8, 0, -1, s[0:1] ; D2000008 00018280 v_and_b32_e32 v8, 1.0, v8 ; 361010F2 v_xor_b32_e32 v8, v8, v9 ; 3A101308 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_cmpx_le_f32_e32 vcc, 0, v8 ; 7C261080 v_interp_p1_f32 v8, v0, 1, 1, [m0] ; C8200500 v_interp_p2_f32 v8, [v8], v1, 1, 1, [m0] ; C8210501 v_mul_f32_e32 v13, 0x3a83126f, v8 ; 101A10FF 3A83126F v_interp_p1_f32 v9, v0, 0, 1, [m0] ; C8240400 v_interp_p2_f32 v9, [v9], v1, 0, 1, [m0] ; C8250401 v_mul_f32_e32 v12, 0x3a83126f, v9 ; 101812FF 3A83126F image_sample v[12:15], 15, 0, 0, 0, 0, 0, 0, 0, v[12:13], s[72:79], s[68:71] ; F0800F00 02320C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v16, 0x3f1d70a4, v12 ; 102018FF 3F1D70A4 v_mov_b32_e32 v17, 0xbf03d662 ; 7E2202FF BF03D662 v_mad_f32 v16, v13, v17, v16 ; D2820010 0442230D v_mov_b32_e32 v18, 0xbdccd20b ; 7E2402FF BDCCD20B v_mad_f32 v16, v14, v18, v16 ; D2820010 0442250E v_mul_f32_e32 v19, 0x3f1d70a4, v4 ; 102608FF 3F1D70A4 v_mad_f32 v17, v5, v17, v19 ; D2820011 044E2305 v_mad_f32 v17, v6, v18, v17 ; D2820011 04462506 v_sub_f32_e32 v18, 1.0, v15 ; 08241EF2 v_mul_f32_e32 v17, v17, v18 ; 10222511 v_mad_f32 v16, v15, v16, v17 ; D2820010 0446210F v_mov_b32_e32 v17, 0xbe16a93f ; 7E2202FF BE16A93F v_mul_f32_e32 v19, v17, v12 ; 10261911 v_mov_b32_e32 v20, 0xbe93e575 ; 7E2802FF BE93E575 v_mad_f32 v19, v13, v20, v19 ; D2820013 044E290D v_mov_b32_e32 v21, 0x3edf3b64 ; 7E2A02FF 3EDF3B64 v_mad_f32 v19, v14, v21, v19 ; D2820013 044E2B0E v_mul_f32_e32 v17, v17, v4 ; 10220911 v_mad_f32 v17, v5, v20, v17 ; D2820011 04462905 v_mad_f32 v17, v6, v21, v17 ; D2820011 04462B06 v_mul_f32_e32 v17, v17, v18 ; 10222511 v_mad_f32 v12, v15, v19, v17 ; D282000C 0446270F v_mov_b32_e32 v13, 0xbeca0f91 ; 7E1A02FF BECA0F91 v_mad_f32 v13, v13, v12, v4 ; D282000D 0412190D v_mov_b32_e32 v14, 0xbf14a234 ; 7E1C02FF BF14A234 v_mad_f32 v18, v14, v16, v13 ; D2820012 0436210E v_mov_b32_e32 v13, 0x40020e17 ; 7E1A02FF 40020E17 v_mad_f32 v19, v13, v12, v4 ; D2820013 0412190D v_mov_b32_e32 v12, 0x3f91e5f3 ; 7E1802FF 3F91E5F3 v_mad_f32 v17, v12, v16, v4 ; D2820011 0412210C image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[60:67], s[56:59] ; F0800700 01CF0C11 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_buffer_load_dword s5, s[0:3], 0x14 ; C2028114 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v15, s4 ; 7E1E0204 v_mad_f32 v2, s5, v2, v15 ; D2820002 043E0405 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v15, s4 ; 7E1E5404 v_mad_f32 v15, v15, v2, 0 ; D282000F 0202050F s_buffer_load_dword s4, s[0:3], 0x39 ; C2020139 s_buffer_load_dword s5, s[0:3], 0x38 ; C2028138 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v2, s4 ; 7E040204 v_mad_f32 v2, s5, v3, v2 ; D2820002 040A0605 s_buffer_load_dword s4, s[0:3], 0x17 ; C2020117 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_mad_f32 v2, s5, v2, v3 ; D2820002 040E0405 s_buffer_load_dword s4, s[0:3], 0x1 ; C2020101 s_waitcnt lgkmcnt(0) ; BF8C007F v_rcp_f32_e32 v3, s4 ; 7E065404 v_mad_f32 v16, -v3, v2, 1.0 ; D2820010 23CA0503 image_sample v[15:18], 15, 0, 0, 0, 0, 0, 0, 0, v[15:16], s[48:55], s[44:47] ; F0800F00 016C0F0F s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 v_mad_f32 v2, -v18, s4, 1.0 ; D2820002 23C80912 v_mul_f32_e32 v3, s4, v17 ; 10062204 v_mad_f32 v3, v14, v2, v3 ; D2820003 040E050E v_sub_f32_e32 v19, 1.0, v10 ; 082614F2 v_interp_p1_f32 v21, v0, 3, 0, [m0] ; C8540300 v_interp_p2_f32 v21, [v21], v1, 3, 0, [m0] ; C8550301 v_interp_p1_f32 v20, v0, 2, 0, [m0] ; C8500200 v_interp_p2_f32 v20, [v20], v1, 2, 0, [m0] ; C8510201 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[36:43], s[32:35] ; F0800700 01091414 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v23, v22, v19 ; 102E2716 v_mad_f32 v25, v10, v3, v23 ; D2820019 045E070A v_mul_f32_e32 v3, s4, v16 ; 10062004 v_mad_f32 v3, v13, v2, v3 ; D2820003 040E050D v_mul_f32_e32 v27, v21, v19 ; 10362715 v_mad_f32 v24, v10, v3, v27 ; D2820018 046E070A v_mul_f32_e32 v3, s4, v15 ; 10061E04 v_mad_f32 v2, v12, v2, v3 ; D2820002 040E050C v_mul_f32_e32 v3, v20, v19 ; 10062714 v_mad_f32 v23, v10, v2, v3 ; D2820017 040E050A image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[23:26], s[24:31], s[20:23] ; F0800700 00A60C17 v_mad_f32 v2, -v11, v10, v10 ; D2820002 242A150B v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v10, v24, v3 ; 10140718 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v2, v13, v10 ; D282000A 042A1B02 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v11, s4, v10 ; 08161404 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v8, s4, v8 ; 0A101004 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v9, s4, v9 ; 0A121204 v_mul_f32_e32 v9, v9, v9 ; 10121309 v_mad_f32 v8, v8, v8, v9 ; D2820008 04261108 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v9, s4, v9 ; 0A121204 v_mad_f32 v8, v9, v9, v8 ; D2820008 04221309 s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v8 ; 10101004 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v15, s4, v9 ; 101E1204 v_mul_f32_e32 v15, 0x3fb8aa65, v15 ; 101E1EFF 3FB8AA65 v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_sub_f32_e32 v15, 1.0, v15 ; 081E1EF2 v_mul_f32_e32 v8, v8, v15 ; 10101F08 v_rcp_f32_e32 v9, v9 ; 7E125509 v_mul_f32_e32 v8, v8, v9 ; 10101308 v_mul_f32_e32 v8, 0x3fb8aa65, v8 ; 101010FF 3FB8AA65 v_exp_f32_e32 v8, v8 ; 7E104B08 v_add_f32_e64 v8, 0, v8 clamp ; D2060808 00021080 v_sub_f32_e32 v8, 1.0, v8 ; 081010F2 v_interp_p1_f32 v9, v0, 1, 3, [m0] ; C8240D00 v_interp_p2_f32 v9, [v9], v1, 1, 3, [m0] ; C8250D01 v_mad_f32 v0, 0.5, v9, 0.5 ; D2820000 03C212F0 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_readlane_b32 s4, v28, 0 ; 0209011C v_readlane_b32 s5, v28, 1 ; 020B031C v_readlane_b32 s6, v28, 2 ; 020D051C v_readlane_b32 s7, v28, 3 ; 020F071C v_readlane_b32 s8, v28, 4 ; 0211091C v_readlane_b32 s9, v28, 5 ; 02130B1C v_readlane_b32 s10, v28, 6 ; 02150D1C v_readlane_b32 s11, v28, 7 ; 02170F1C s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[4:11], s[16:19] ; F0800100 00810000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v8 ; 10001100 v_mad_f32 v1, v0, v11, v10 ; D2820001 042A1700 v_mul_f32_e32 v8, v23, v3 ; 10100717 v_mad_f32 v8, v2, v12, v8 ; D2820008 04221902 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s4, v8 ; 08121004 v_mad_f32 v8, v0, v9, v8 ; D2820008 04221300 v_cvt_pkrtz_f16_f32_e32 v1, v8, v1 ; 5E020308 v_mul_f32_e32 v3, v25, v3 ; 10060719 v_mad_f32 v2, v2, v14, v3 ; D2820002 040E1D02 s_buffer_load_dword s0, s[0:3], 0xe ; C200010E s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s0, v2 ; 08060400 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_cvt_pkrtz_f16_f32_e32 v0, v0, v7 ; 5E000F00 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..104] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} IMM[2] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+9] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[3] 112: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 113: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 114: RSQ TEMP[0].x, TEMP[0].xxxx 115: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[3] 116: MOV TEMP[0].xyz, TEMP[0].xyzx 117: RCP TEMP[4].x, TEMP[1].wwww 118: MOV TEMP[2].w, TEMP[4].xxxx 119: MUL TEMP[4].xy, TEMP[1], TEMP[4].xxxx 120: MOV TEMP[4].xy, TEMP[4].xyxx 121: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 122: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 123: MOV TEMP[5].zw, TEMP[5].wwzw 124: MUL TEMP[6].xy, TEMP[2].yyyy, CONST[5] 125: MOV TEMP[3].xy, TEMP[6].xyxx 126: MAD TEMP[6].xy, TEMP[2].xxxx, CONST[4], TEMP[3] 127: MOV TEMP[3].xy, TEMP[6].xyxx 128: MAD TEMP[6].xy, TEMP[2].zzzz, CONST[6], TEMP[3] 129: MOV TEMP[3].xy, TEMP[6].xyxx 130: ADD TEMP[6].xy, TEMP[3], CONST[7] 131: MOV TEMP[3].xy, TEMP[6].xyxx 132: MAD TEMP[3].xy, TEMP[3], IMM[2].xyzz, IMM[2].yyyy 133: MOV TEMP[3].xy, TEMP[3].xyxx 134: MOV TEMP[5].xy, IN[4].xyxx 135: MOV TEMP[2].xyz, TEMP[2].xyzx 136: MOV TEMP[2].w, IMM[0].yyyy 137: MOV TEMP[4].zw, IMM[0].yyzy 138: MOV TEMP[3].zw, IMM[0].yyzy 139: MOV OUT[2], TEMP[5] 140: MOV OUT[3], TEMP[2] 141: MOV OUT[0], TEMP[1] 142: MOV OUT[1], TEMP[0] 143: MOV OUT[4], TEMP[4] 144: MOV OUT[5], TEMP[3] 145: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0 %57 = add i32 %5, %7 %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %57) %59 = extractelement <4 x float> %58, i32 0 %60 = extractelement <4 x float> %58, i32 1 %61 = extractelement <4 x float> %58, i32 2 %62 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = fmul float 3.000000e+00, %59 %76 = fmul float %45, 1.000000e+00 %77 = fadd float %76, 0.000000e+00 %78 = fmul float %46, 1.000000e+00 %79 = fadd float %78, 0.000000e+00 %80 = fmul float %47, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %45, 0.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = fptosi float %75 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 144 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %86, 4 %91 = add i32 %90, 148 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %86, 4 %94 = add i32 %93, 152 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %86, 4 %97 = add i32 %96, 156 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fmul float %77, %89 %100 = fmul float %79, %92 %101 = fadd float %99, %100 %102 = fmul float %81, %95 %103 = fadd float %101, %102 %104 = fmul float %83, %98 %105 = fadd float %103, %104 %106 = fptosi float %75 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = add i32 1, %108 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = shl i32 %111, 4 %113 = add i32 %112, 144 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = shl i32 %111, 4 %116 = add i32 %115, 148 %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %116) %118 = shl i32 %111, 4 %119 = add i32 %118, 152 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %111, 4 %122 = add i32 %121, 156 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = fmul float %77, %114 %125 = fmul float %79, %117 %126 = fadd float %124, %125 %127 = fmul float %81, %120 %128 = fadd float %126, %127 %129 = fmul float %83, %123 %130 = fadd float %128, %129 %131 = fptosi float %75 to i32 %132 = bitcast i32 %131 to float %133 = bitcast float %132 to i32 %134 = add i32 2, %133 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = shl i32 %136, 4 %138 = add i32 %137, 144 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = shl i32 %136, 4 %141 = add i32 %140, 148 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = shl i32 %136, 4 %144 = add i32 %143, 152 %145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %144) %146 = shl i32 %136, 4 %147 = add i32 %146, 156 %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %147) %149 = fmul float %77, %139 %150 = fmul float %79, %142 %151 = fadd float %149, %150 %152 = fmul float %81, %145 %153 = fadd float %151, %152 %154 = fmul float %83, %148 %155 = fadd float %153, %154 %156 = fmul float %105, %52 %157 = fmul float %130, %52 %158 = fmul float %155, %52 %159 = fptosi float %75 to i32 %160 = bitcast i32 %159 to float %161 = bitcast float %160 to i32 %162 = shl i32 %161, 4 %163 = add i32 %162, 144 %164 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %163) %165 = shl i32 %161, 4 %166 = add i32 %165, 148 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = shl i32 %161, 4 %169 = add i32 %168, 152 %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %169) %171 = fmul float %66, %164 %172 = fmul float %67, %167 %173 = fadd float %172, %171 %174 = fmul float %68, %170 %175 = fadd float %173, %174 %176 = fptosi float %75 to i32 %177 = bitcast i32 %176 to float %178 = bitcast float %177 to i32 %179 = add i32 1, %178 %180 = bitcast i32 %179 to float %181 = bitcast float %180 to i32 %182 = shl i32 %181, 4 %183 = add i32 %182, 144 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = shl i32 %181, 4 %186 = add i32 %185, 148 %187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %186) %188 = shl i32 %181, 4 %189 = add i32 %188, 152 %190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %189) %191 = fmul float %66, %184 %192 = fmul float %67, %187 %193 = fadd float %192, %191 %194 = fmul float %68, %190 %195 = fadd float %193, %194 %196 = fptosi float %75 to i32 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = add i32 2, %198 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = shl i32 %201, 4 %203 = add i32 %202, 144 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %201, 4 %206 = add i32 %205, 148 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %201, 4 %209 = add i32 %208, 152 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %66, %204 %212 = fmul float %67, %207 %213 = fadd float %212, %211 %214 = fmul float %68, %210 %215 = fadd float %213, %214 %216 = fmul float %175, %52 %217 = fmul float %195, %52 %218 = fmul float %215, %52 %219 = fcmp olt float 0.000000e+00, %53 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 br i1 %223, label %IF, label %ENDIF IF: ; preds = %main_body %224 = fmul float 3.000000e+00, %60 %225 = fptosi float %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = shl i32 %227, 4 %229 = add i32 %228, 144 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = shl i32 %227, 4 %232 = add i32 %231, 148 %233 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %232) %234 = shl i32 %227, 4 %235 = add i32 %234, 152 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = shl i32 %227, 4 %238 = add i32 %237, 156 %239 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %238) %240 = fmul float %77, %230 %241 = fmul float %79, %233 %242 = fadd float %240, %241 %243 = fmul float %81, %236 %244 = fadd float %242, %243 %245 = fmul float %83, %239 %246 = fadd float %244, %245 %247 = fptosi float %224 to i32 %248 = bitcast i32 %247 to float %249 = bitcast float %248 to i32 %250 = add i32 1, %249 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 144 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = shl i32 %252, 4 %257 = add i32 %256, 148 %258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %257) %259 = shl i32 %252, 4 %260 = add i32 %259, 152 %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %260) %262 = shl i32 %252, 4 %263 = add i32 %262, 156 %264 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %263) %265 = fmul float %77, %255 %266 = fmul float %79, %258 %267 = fadd float %265, %266 %268 = fmul float %81, %261 %269 = fadd float %267, %268 %270 = fmul float %83, %264 %271 = fadd float %269, %270 %272 = fptosi float %224 to i32 %273 = bitcast i32 %272 to float %274 = bitcast float %273 to i32 %275 = add i32 2, %274 %276 = bitcast i32 %275 to float %277 = bitcast float %276 to i32 %278 = shl i32 %277, 4 %279 = add i32 %278, 144 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = shl i32 %277, 4 %282 = add i32 %281, 148 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = shl i32 %277, 4 %285 = add i32 %284, 152 %286 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %285) %287 = shl i32 %277, 4 %288 = add i32 %287, 156 %289 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %288) %290 = fmul float %77, %280 %291 = fmul float %79, %283 %292 = fadd float %290, %291 %293 = fmul float %81, %286 %294 = fadd float %292, %293 %295 = fmul float %83, %289 %296 = fadd float %294, %295 %297 = fmul float %53, %246 %298 = fadd float %297, %156 %299 = fmul float %53, %271 %300 = fadd float %299, %157 %301 = fmul float %53, %296 %302 = fadd float %301, %158 %303 = fptosi float %224 to i32 %304 = bitcast i32 %303 to float %305 = bitcast float %304 to i32 %306 = shl i32 %305, 4 %307 = add i32 %306, 144 %308 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %307) %309 = shl i32 %305, 4 %310 = add i32 %309, 148 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = shl i32 %305, 4 %313 = add i32 %312, 152 %314 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %313) %315 = fmul float %66, %308 %316 = fmul float %67, %311 %317 = fadd float %316, %315 %318 = fmul float %68, %314 %319 = fadd float %317, %318 %320 = fptosi float %224 to i32 %321 = bitcast i32 %320 to float %322 = bitcast float %321 to i32 %323 = add i32 1, %322 %324 = bitcast i32 %323 to float %325 = bitcast float %324 to i32 %326 = shl i32 %325, 4 %327 = add i32 %326, 144 %328 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %327) %329 = shl i32 %325, 4 %330 = add i32 %329, 148 %331 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %330) %332 = shl i32 %325, 4 %333 = add i32 %332, 152 %334 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %333) %335 = fmul float %66, %328 %336 = fmul float %67, %331 %337 = fadd float %336, %335 %338 = fmul float %68, %334 %339 = fadd float %337, %338 %340 = fptosi float %224 to i32 %341 = bitcast i32 %340 to float %342 = bitcast float %341 to i32 %343 = add i32 2, %342 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 144 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = shl i32 %345, 4 %350 = add i32 %349, 148 %351 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %350) %352 = shl i32 %345, 4 %353 = add i32 %352, 152 %354 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %353) %355 = fmul float %66, %348 %356 = fmul float %67, %351 %357 = fadd float %356, %355 %358 = fmul float %68, %354 %359 = fadd float %357, %358 %360 = fmul float %53, %319 %361 = fadd float %360, %216 %362 = fmul float %53, %339 %363 = fadd float %362, %217 %364 = fmul float %53, %359 %365 = fadd float %364, %218 %366 = fcmp olt float 0.000000e+00, %54 %367 = sext i1 %366 to i32 %368 = bitcast i32 %367 to float %369 = bitcast float %368 to i32 %370 = icmp ne i32 %369, 0 br i1 %370, label %IF70, label %ENDIF ENDIF: ; preds = %IF70, %IF, %main_body %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %428, %IF70 ], [ %224, %IF ] %temp8.0 = phi float [ %156, %main_body ], [ %502, %IF70 ], [ %298, %IF ] %temp9.0 = phi float [ %157, %main_body ], [ %504, %IF70 ], [ %300, %IF ] %temp10.0 = phi float [ %158, %main_body ], [ %506, %IF70 ], [ %302, %IF ] %temp12.0 = phi float [ %216, %main_body ], [ %565, %IF70 ], [ %361, %IF ] %temp13.0 = phi float [ %217, %main_body ], [ %567, %IF70 ], [ %363, %IF ] %temp14.0 = phi float [ %218, %main_body ], [ %569, %IF70 ], [ %365, %IF ] %371 = fmul float %temp9.0, %17 %372 = fmul float %temp9.0, %18 %373 = fmul float %temp9.0, %19 %374 = fmul float %temp9.0, %20 %375 = fmul float %temp8.0, %13 %376 = fadd float %375, %371 %377 = fmul float %temp8.0, %14 %378 = fadd float %377, %372 %379 = fmul float %temp8.0, %15 %380 = fadd float %379, %373 %381 = fmul float %temp8.0, %16 %382 = fadd float %381, %374 %383 = fmul float %temp10.0, %21 %384 = fadd float %383, %376 %385 = fmul float %temp10.0, %22 %386 = fadd float %385, %378 %387 = fmul float %temp10.0, %23 %388 = fadd float %387, %380 %389 = fmul float %temp10.0, %24 %390 = fadd float %389, %382 %391 = fadd float %384, %25 %392 = fadd float %386, %26 %393 = fadd float %388, %27 %394 = fadd float %390, %28 %395 = fmul float %temp12.0, %temp12.0 %396 = fmul float %temp13.0, %temp13.0 %397 = fadd float %396, %395 %398 = fmul float %temp14.0, %temp14.0 %399 = fadd float %397, %398 %400 = call float @llvm.maxnum.f32(float %399, float 0x3E7AD7F2A0000000) %401 = call float @llvm.AMDGPU.rsq.clamped.f32(float %400) %402 = fmul float %401, %temp12.0 %403 = fmul float %401, %temp13.0 %404 = fmul float %401, %temp14.0 %405 = fdiv float 1.000000e+00, %394 %406 = fmul float %391, %405 %407 = fmul float %392, %405 %408 = fadd float %temp8.0, %39 %409 = fadd float %temp9.0, %40 %410 = fmul float %408, %37 %411 = fmul float %409, %38 %412 = fmul float %temp9.0, %31 %413 = fmul float %temp9.0, %32 %414 = fmul float %temp8.0, %29 %415 = fadd float %414, %412 %416 = fmul float %temp8.0, %30 %417 = fadd float %416, %413 %418 = fmul float %temp10.0, %33 %419 = fadd float %418, %415 %420 = fmul float %temp10.0, %34 %421 = fadd float %420, %417 %422 = fadd float %419, %35 %423 = fadd float %421, %36 %424 = fmul float %422, 5.000000e-01 %425 = fadd float %424, -5.000000e-01 %426 = fmul float %423, -5.000000e-01 %427 = fadd float %426, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %402, float %403, float %404, float %temp3.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %410, float %411) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %406, float %407, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %425, float %427, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %391, float %392, float %393, float %394) ret void IF70: ; preds = %IF %428 = fmul float 3.000000e+00, %61 %429 = fptosi float %428 to i32 %430 = bitcast i32 %429 to float %431 = bitcast float %430 to i32 %432 = shl i32 %431, 4 %433 = add i32 %432, 144 %434 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %433) %435 = shl i32 %431, 4 %436 = add i32 %435, 148 %437 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %436) %438 = shl i32 %431, 4 %439 = add i32 %438, 152 %440 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %439) %441 = shl i32 %431, 4 %442 = add i32 %441, 156 %443 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %442) %444 = fmul float %77, %434 %445 = fmul float %79, %437 %446 = fadd float %444, %445 %447 = fmul float %81, %440 %448 = fadd float %446, %447 %449 = fmul float %83, %443 %450 = fadd float %448, %449 %451 = fptosi float %428 to i32 %452 = bitcast i32 %451 to float %453 = bitcast float %452 to i32 %454 = add i32 1, %453 %455 = bitcast i32 %454 to float %456 = bitcast float %455 to i32 %457 = shl i32 %456, 4 %458 = add i32 %457, 144 %459 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %458) %460 = shl i32 %456, 4 %461 = add i32 %460, 148 %462 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %461) %463 = shl i32 %456, 4 %464 = add i32 %463, 152 %465 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %464) %466 = shl i32 %456, 4 %467 = add i32 %466, 156 %468 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %467) %469 = fmul float %77, %459 %470 = fmul float %79, %462 %471 = fadd float %469, %470 %472 = fmul float %81, %465 %473 = fadd float %471, %472 %474 = fmul float %83, %468 %475 = fadd float %473, %474 %476 = fptosi float %428 to i32 %477 = bitcast i32 %476 to float %478 = bitcast float %477 to i32 %479 = add i32 2, %478 %480 = bitcast i32 %479 to float %481 = bitcast float %480 to i32 %482 = shl i32 %481, 4 %483 = add i32 %482, 144 %484 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %483) %485 = shl i32 %481, 4 %486 = add i32 %485, 148 %487 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %486) %488 = shl i32 %481, 4 %489 = add i32 %488, 152 %490 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %489) %491 = shl i32 %481, 4 %492 = add i32 %491, 156 %493 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %492) %494 = fmul float %77, %484 %495 = fmul float %79, %487 %496 = fadd float %494, %495 %497 = fmul float %81, %490 %498 = fadd float %496, %497 %499 = fmul float %83, %493 %500 = fadd float %498, %499 %501 = fmul float %54, %450 %502 = fadd float %501, %298 %503 = fmul float %54, %475 %504 = fadd float %503, %300 %505 = fmul float %54, %500 %506 = fadd float %505, %302 %507 = fptosi float %428 to i32 %508 = bitcast i32 %507 to float %509 = bitcast float %508 to i32 %510 = shl i32 %509, 4 %511 = add i32 %510, 144 %512 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %511) %513 = shl i32 %509, 4 %514 = add i32 %513, 148 %515 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %514) %516 = shl i32 %509, 4 %517 = add i32 %516, 152 %518 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %517) %519 = fmul float %66, %512 %520 = fmul float %67, %515 %521 = fadd float %520, %519 %522 = fmul float %68, %518 %523 = fadd float %521, %522 %524 = fptosi float %428 to i32 %525 = bitcast i32 %524 to float %526 = bitcast float %525 to i32 %527 = add i32 1, %526 %528 = bitcast i32 %527 to float %529 = bitcast float %528 to i32 %530 = shl i32 %529, 4 %531 = add i32 %530, 144 %532 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %531) %533 = shl i32 %529, 4 %534 = add i32 %533, 148 %535 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %534) %536 = shl i32 %529, 4 %537 = add i32 %536, 152 %538 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %537) %539 = fmul float %66, %532 %540 = fmul float %67, %535 %541 = fadd float %540, %539 %542 = fmul float %68, %538 %543 = fadd float %541, %542 %544 = fptosi float %428 to i32 %545 = bitcast i32 %544 to float %546 = bitcast float %545 to i32 %547 = add i32 2, %546 %548 = bitcast i32 %547 to float %549 = bitcast float %548 to i32 %550 = shl i32 %549, 4 %551 = add i32 %550, 144 %552 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %551) %553 = shl i32 %549, 4 %554 = add i32 %553, 148 %555 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %554) %556 = shl i32 %549, 4 %557 = add i32 %556, 152 %558 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %557) %559 = fmul float %66, %552 %560 = fmul float %67, %555 %561 = fadd float %560, %559 %562 = fmul float %68, %558 %563 = fadd float %561, %562 %564 = fmul float %54, %523 %565 = fadd float %564, %361 %566 = fmul float %54, %543 %567 = fadd float %566, %363 %568 = fmul float %54, %563 %569 = fadd float %568, %365 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v11, s10, v0 ; 4A16000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[4:7], s[8:9], 0x10 ; C0820910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[20:23], v11, s[20:23], 0 idxen ; E00C2000 8005140B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v20 ; 100028FF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v3, 4, v0 ; 34060084 v_add_i32_e32 v0, 0xb0, v3 ; 4A0006FF 000000B0 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_format_xyzw v[15:18], v11, s[24:27], 0 idxen ; E00C2000 80060F0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v0, v15 ; 10021F00 v_add_i32_e32 v2, 0xb4, v3 ; 4A0406FF 000000B4 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v16, v2, v1 ; D2820001 04060510 v_add_i32_e32 v4, 0xb8, v3 ; 4A0806FF 000000B8 buffer_load_dword v12, v4, s[0:3], 0 offen ; E0301000 80000C04 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v17, v12, v1 ; D2820001 04061911 buffer_load_format_xyzw v[7:10], v11, s[16:19], 0 idxen ; E00C2000 8004070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_i32_e32 v4, 0xa0, v3 ; 4A0806FF 000000A0 buffer_load_dword v13, v4, s[0:3], 0 offen ; E0301000 80000D04 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v13, v15 ; 10081F0D v_add_i32_e32 v5, 0xa4, v3 ; 4A0A06FF 000000A4 buffer_load_dword v14, v5, s[0:3], 0 offen ; E0301000 80000E05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v16, v14, v4 ; D2820004 04121D10 v_add_i32_e32 v5, 0xa8, v3 ; 4A0A06FF 000000A8 buffer_load_dword v27, v5, s[0:3], 0 offen ; E0301000 80001B05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v17, v27, v4 ; D2820004 04123711 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_add_i32_e32 v5, 0x90, v3 ; 4A0A06FF 00000090 buffer_load_dword v28, v5, s[0:3], 0 offen ; E0301000 80001C05 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v28, v15 ; 100A1F1C v_add_i32_e32 v6, 0x94, v3 ; 4A0C06FF 00000094 buffer_load_dword v29, v6, s[0:3], 0 offen ; E0301000 80001D06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v16, v29, v5 ; D2820005 04163B10 v_add_i32_e32 v6, 0x98, v3 ; 4A0C06FF 00000098 buffer_load_dword v30, v6, s[0:3], 0 offen ; E0301000 80001E06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v17, v30, v5 ; D2820005 04163D11 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 buffer_load_format_xyzw v[31:34], v11, s[12:15], 0 idxen ; E00C2000 80031F0B v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v19, 0, v32 ; 06264080 v_mul_f32_e32 v2, v2, v19 ; 10042702 v_add_f32_e32 v24, 0, v31 ; 06303E80 v_mad_f32 v0, v24, v0, v2 ; D2820000 040A0118 v_add_f32_e32 v26, 0, v33 ; 06344280 v_mad_f32 v0, v26, v12, v0 ; D2820000 0402191A v_mad_f32 v25, 0, v31, 1.0 ; D2820019 03CA3E80 v_add_i32_e32 v2, 0xbc, v3 ; 4A0406FF 000000BC buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v25, v2, v0 ; D2820000 04020519 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mul_f32_e32 v2, v14, v19 ; 1004270E v_mad_f32 v2, v24, v13, v2 ; D2820002 040A1B18 v_mad_f32 v2, v26, v27, v2 ; D2820002 040A371A v_add_i32_e32 v12, 0xac, v3 ; 4A1806FF 000000AC buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v25, v12, v2 ; D2820002 040A1919 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v12, v29, v19 ; 1018271D v_mad_f32 v12, v24, v28, v12 ; D282000C 04323918 v_mad_f32 v12, v26, v30, v12 ; D282000C 04323D1A v_add_i32_e32 v3, 0x9c, v3 ; 4A0606FF 0000009C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v25, v3, v12 ; D2820003 04320719 v_mul_f32_e32 v3, v7, v3 ; 10060707 buffer_load_format_xyzw v[11:14], v11, s[4:7], 0 idxen ; E00C2000 80010B0B v_cmp_gt_f32_e64 s[4:5], v8, 0 ; D0080004 00010108 v_cndmask_b32_e64 v27, 0, -1, s[4:5] ; D200081B 00118280 v_cmp_ne_i32_e64 s[32:33], v27, 0 ; D10A0020 0001011B s_buffer_load_dword s20, s[0:3], 0x23 ; C20A0123 s_buffer_load_dword s19, s[0:3], 0x22 ; C2098122 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s17, s[0:3], 0xf ; C208810F s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s24, s[0:3], 0xd ; C20C010D s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s23, s[0:3], 0xb ; C20B810B s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s26, s[0:3], 0x9 ; C20D0109 s_buffer_load_dword s18, s[0:3], 0x8 ; C2090108 s_buffer_load_dword s27, s[0:3], 0x7 ; C20D8107 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s25, s[0:3], 0x4 ; C20C8104 s_buffer_load_dword s29, s[0:3], 0x3 ; C20E8103 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s31, s[0:3], 0x1 ; C20F8101 s_buffer_load_dword s28, s[0:3], 0x0 ; C20E0100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v6, 0x40400000, v21 ; 100C2AFF 40400000 v_cvt_i32_f32_e32 v27, v6 ; 7E361106 v_lshlrev_b32_e32 v27, 4, v27 ; 34363684 v_add_i32_e32 v28, 0xb4, v27 ; 4A3836FF 000000B4 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_add_i32_e32 v29, 0xb0, v27 ; 4A3A36FF 000000B0 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v30, v29, v15 ; 103C1F1D v_mad_f32 v30, v16, v28, v30 ; D282001E 047A3910 v_add_i32_e32 v31, 0xb8, v27 ; 4A3E36FF 000000B8 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, v17, v31, v30 ; D282001E 047A3F11 v_mad_f32 v1, v8, v30, v1 ; D2820001 04063D08 v_add_i32_e32 v30, 0xa4, v27 ; 4A3C36FF 000000A4 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E v_add_i32_e32 v32, 0xa0, v27 ; 4A4036FF 000000A0 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, v32, v15 ; 10421F20 v_mad_f32 v33, v16, v30, v33 ; D2820021 04863D10 v_add_i32_e32 v34, 0xa8, v27 ; 4A4436FF 000000A8 buffer_load_dword v34, v34, s[0:3], 0 offen ; E0301000 80002222 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, v17, v34, v33 ; D2820021 04864511 v_mad_f32 v4, v8, v33, v4 ; D2820004 04124308 v_add_i32_e32 v33, 0x94, v27 ; 4A4236FF 00000094 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 v_add_i32_e32 v35, 0x90, v27 ; 4A4636FF 00000090 buffer_load_dword v35, v35, s[0:3], 0 offen ; E0301000 80002323 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, v35, v15 ; 10481F23 v_mad_f32 v36, v16, v33, v36 ; D2820024 04924310 v_add_i32_e32 v37, 0x98, v27 ; 4A4A36FF 00000098 buffer_load_dword v37, v37, s[0:3], 0 offen ; E0301000 80002525 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v36, v17, v37, v36 ; D2820024 04924B11 v_mad_f32 v5, v8, v36, v5 ; D2820005 04164908 v_mul_f32_e32 v28, v28, v19 ; 1038271C v_mad_f32 v28, v24, v29, v28 ; D282001C 04723B18 v_mad_f32 v28, v26, v31, v28 ; D282001C 04723F1A v_add_i32_e32 v29, 0xbc, v27 ; 4A3A36FF 000000BC buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v25, v29, v28 ; D282001C 04723B19 v_mad_f32 v0, v8, v28, v0 ; D2820000 04023908 v_mul_f32_e32 v28, v30, v19 ; 1038271E v_mad_f32 v28, v24, v32, v28 ; D282001C 04724118 v_mad_f32 v28, v26, v34, v28 ; D282001C 0472451A v_add_i32_e32 v29, 0xac, v27 ; 4A3A36FF 000000AC buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v25, v29, v28 ; D282001C 04723B19 v_mad_f32 v2, v8, v28, v2 ; D2820002 040A3908 v_mul_f32_e32 v28, v33, v19 ; 10382721 v_mad_f32 v28, v24, v35, v28 ; D282001C 04724718 v_mad_f32 v28, v26, v37, v28 ; D282001C 04724B1A v_add_i32_e32 v27, 0x9c, v27 ; 4A3636FF 0000009C buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v25, v27, v28 ; D282001B 04723719 v_mad_f32 v3, v8, v27, v3 ; D2820003 040E3708 v_cmp_gt_f32_e64 s[34:35], v9, 0 ; D0080022 00010109 v_cndmask_b32_e64 v27, 0, -1, s[34:35] ; D200081B 00898280 v_cmp_ne_i32_e64 s[34:35], v27, 0 ; D10A0022 0001011B s_and_saveexec_b64 s[34:35], s[34:35] ; BEA22422 s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v6, 0x40400000, v22 ; 100C2CFF 40400000 v_cvt_i32_f32_e32 v20, v6 ; 7E281106 v_lshlrev_b32_e32 v20, 4, v20 ; 34282884 v_add_i32_e32 v21, 0xb4, v20 ; 4A2A28FF 000000B4 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 v_add_i32_e32 v22, 0xb0, v20 ; 4A2C28FF 000000B0 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v23, v22, v15 ; 102E1F16 v_mad_f32 v23, v16, v21, v23 ; D2820017 045E2B10 v_add_i32_e32 v27, 0xb8, v20 ; 4A3628FF 000000B8 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, v17, v27, v23 ; D2820017 045E3711 v_mad_f32 v1, v9, v23, v1 ; D2820001 04062F09 v_add_i32_e32 v23, 0xa4, v20 ; 4A2E28FF 000000A4 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_add_i32_e32 v28, 0xa0, v20 ; 4A3828FF 000000A0 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v28, v15 ; 103A1F1C v_mad_f32 v29, v16, v23, v29 ; D282001D 04762F10 v_add_i32_e32 v30, 0xa8, v20 ; 4A3C28FF 000000A8 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, v17, v30, v29 ; D282001D 04763D11 v_mad_f32 v4, v9, v29, v4 ; D2820004 04123B09 v_add_i32_e32 v29, 0x94, v20 ; 4A3A28FF 00000094 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_add_i32_e32 v31, 0x90, v20 ; 4A3E28FF 00000090 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v31, v15 ; 10401F1F v_mad_f32 v32, v16, v29, v32 ; D2820020 04823B10 v_add_i32_e32 v33, 0x98, v20 ; 4A4228FF 00000098 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v17, v33, v32 ; D282000F 04824311 v_mad_f32 v5, v9, v15, v5 ; D2820005 04161F09 v_mul_f32_e32 v15, v21, v19 ; 101E2715 v_mad_f32 v15, v24, v22, v15 ; D282000F 043E2D18 v_mad_f32 v15, v26, v27, v15 ; D282000F 043E371A v_add_i32_e32 v16, 0xbc, v20 ; 4A2028FF 000000BC buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v0, v9, v15, v0 ; D2820000 04021F09 v_mul_f32_e32 v15, v23, v19 ; 101E2717 v_mad_f32 v15, v24, v28, v15 ; D282000F 043E3918 v_mad_f32 v15, v26, v30, v15 ; D282000F 043E3D1A v_add_i32_e32 v16, 0xac, v20 ; 4A2028FF 000000AC buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v2, v9, v15, v2 ; D2820002 040A1F09 v_mul_f32_e32 v15, v29, v19 ; 101E271D v_mad_f32 v15, v24, v31, v15 ; D282000F 043E3F18 v_mad_f32 v15, v26, v33, v15 ; D282000F 043E431A v_add_i32_e32 v16, 0x9c, v20 ; 4A2028FF 0000009C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v3, v9, v15, v3 ; D2820003 040E1F09 s_or_b64 exec, exec, s[34:35] ; 88FE227E s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mul_f32_e32 v7, v5, v5 ; 100E0B05 v_mad_f32 v7, v4, v4, v7 ; D2820007 041E0904 v_mad_f32 v7, v1, v1, v7 ; D2820007 041E0301 v_max_f32_e32 v7, 0x33d6bf95, v7 ; 200E0EFF 33D6BF95 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v5, v5, v7 ; 100A0F05 exp 15, 32, 0, 0, 0, v5, v4, v1, v6 ; F800020F 06010405 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, s20, v2 ; 06020414 v_mul_f32_e32 v1, s22, v1 ; 10020216 v_add_f32_e32 v4, s19, v3 ; 06080613 v_mul_f32_e32 v4, s21, v4 ; 10080815 exp 15, 33, 0, 0, 0, v11, v12, v4, v1 ; F800021F 01040C0B s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 34, 0, 0, 0, v3, v2, v0, v1 ; F800022F 01000203 v_mul_f32_e32 v4, s30, v2 ; 1008041E v_mad_f32 v4, v3, s31, v4 ; D2820004 04103F03 v_mad_f32 v4, v0, s26, v4 ; D2820004 04103500 v_add_f32_e32 v4, s24, v4 ; 06080818 v_mul_f32_e32 v5, s27, v2 ; 100A041B v_mad_f32 v5, v3, s29, v5 ; D2820005 04143B03 v_mad_f32 v5, v0, s23, v5 ; D2820005 04142F00 v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v6, v5 ; 7E0C5505 v_mul_f32_e32 v7, v6, v4 ; 100E0906 v_mul_f32_e32 v8, s25, v2 ; 10100419 v_mad_f32 v8, v3, s28, v8 ; D2820008 04203903 v_mad_f32 v8, v0, s18, v8 ; D2820008 04202500 v_add_f32_e32 v8, s16, v8 ; 06101010 v_mul_f32_e32 v6, v6, v8 ; 100C1106 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 35, 0, 0, 0, v6, v7, v9, v1 ; F800023F 01090706 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v6, s12, v2 ; 100C040C v_mad_f32 v6, v3, s13, v6 ; D2820006 04181B03 v_mad_f32 v6, v0, s11, v6 ; D2820006 04181700 v_add_f32_e32 v6, s7, v6 ; 060C0C07 v_mad_f32 v6, 0.5, v6, -0.5 ; D2820006 03C60CF0 v_mul_f32_e32 v7, s8, v2 ; 100E0408 v_mad_f32 v7, v3, s9, v7 ; D2820007 041C1303 v_mad_f32 v7, v0, s5, v7 ; D2820007 041C0B00 v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mad_f32 v7, -0.5, v7, -0.5 ; D2820007 03C60EF1 exp 15, 36, 0, 0, 0, v6, v7, v9, v1 ; F800024F 01090706 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s14, v2 ; 1002040E v_mad_f32 v1, v3, s15, v1 ; D2820001 04041F03 v_mad_f32 v0, v0, s10, v1 ; D2820000 04041500 v_add_f32_e32 v0, s6, v0 ; 06000006 exp 15, 12, 0, 1, 0, v8, v4, v0, v5 ; F80008CF 05000408 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[5].x, TEMP[6].xxxx, TEMP[5].yyyy 14: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy 15: KILL_IF -TEMP[5].xxxx 16: MOV TEMP[5].xyz, IN[0].xyzz 17: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 18: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 19: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 20: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 21: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 22: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 23: MOV TEMP[0].xyz, TEMP[1].xyzx 24: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 25: MOV TEMP[0].xyz, TEMP[1].xyzx 26: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 27: MOV TEMP[3].yzw, TEMP[1].zyzw 28: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 29: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 30: RSQ TEMP[5].x, TEMP[5].xxxx 31: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 32: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 33: MOV TEMP[5].w, IMM[0].wwww 34: MOV TEMP[5].x, TEMP[3].yyyy 35: MOV TEMP[5].y, TEMP[3].zzzz 36: MOV TEMP[5].z, TEMP[3].wwww 37: DP4 TEMP[3].x, TEMP[5], TEMP[5] 38: RSQ TEMP[3].x, TEMP[3].xxxx 39: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 40: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 41: MOV_SAT TEMP[3].x, TEMP[3].xxxx 42: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 43: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 44: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 45: MOV TEMP[6].xyz, TEMP[5].yzww 46: TEX TEMP[6], TEMP[6], SAMP[4], CUBE 47: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 48: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 49: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 50: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 51: MOV TEMP[4].w, TEMP[7].wwww 52: MOV TEMP[6].xy, IN[1].xyyy 53: TEX TEMP[6], TEMP[6], SAMP[1], 2D 54: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 55: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 56: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 57: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 58: ABS TEMP[6].x, TEMP[8].wwww 59: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 60: MOV TEMP[5].xyz, TEMP[5].yzww 61: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 62: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 63: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 64: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 65: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 66: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 67: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 68: ABS TEMP[3].x, TEMP[3].xxxx 69: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 70: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 71: MOV TEMP[0].w, TEMP[6].wwww 72: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 73: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 74: ABS TEMP[5].x, TEMP[3].xxxx 75: LG2 TEMP[4].x, TEMP[5].xxxx 76: ABS TEMP[5].x, TEMP[3].yyyy 77: LG2 TEMP[5].x, TEMP[5].xxxx 78: MOV TEMP[4].y, TEMP[5].xxxx 79: ABS TEMP[3].x, TEMP[3].zzzz 80: LG2 TEMP[3].x, TEMP[3].xxxx 81: MOV TEMP[4].z, TEMP[3].xxxx 82: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 83: EX2 TEMP[4].x, TEMP[3].xxxx 84: EX2 TEMP[5].x, TEMP[3].yyyy 85: MOV TEMP[4].y, TEMP[5].xxxx 86: EX2 TEMP[3].x, TEMP[3].zzzz 87: MOV TEMP[4].z, TEMP[3].xxxx 88: MOV TEMP[3].xyz, TEMP[4].xyzz 89: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 90: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 91: MOV TEMP[5].xy, TEMP[5].xyyy 92: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 93: MOV TEMP[1].w, TEMP[5].wwww 94: MOV TEMP[6].xy, IN[1].zwww 95: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 96: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 97: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 98: MOV TEMP[6].xyz, TEMP[3].yzww 99: TEX TEMP[6], TEMP[6], SAMP[9], 3D 100: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 101: MOV TEMP[1].xyz, TEMP[3].xyzx 102: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 103: MOV TEMP[0].xyz, TEMP[3].xyzx 104: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 105: MOV TEMP[4].z, TEMP[3].zzzz 106: MOV TEMP[4].xy, IN[2].xyxx 107: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 108: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 109: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 110: EX2 TEMP[5].x, TEMP[5].wwww 111: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 112: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 113: RCP TEMP[3].x, TEMP[3].wwww 114: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 115: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 116: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 117: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 118: EX2 TEMP[3].x, TEMP[3].wwww 119: MOV_SAT TEMP[3].x, TEMP[3].xxxx 120: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 121: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 122: MOV TEMP[4].x, TEMP[5].xxxx 123: MOV TEMP[4].y, CONST[4].wwww 124: MOV TEMP[4].xy, TEMP[4].xyyy 125: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 126: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 127: MOV TEMP[0].w, TEMP[3].wwww 128: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 129: MOV TEMP[0].xyz, TEMP[0].xyzx 130: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 131: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 132: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 133: UIF TEMP[3].xxxx :0 134: MOV TEMP[2].x, TEMP[2].xxxx 135: ELSE :0 136: MOV TEMP[2].x, TEMP[1].xxxx 137: ENDIF 138: MOV TEMP[0].w, TEMP[2].xxxx 139: MOV OUT[0], TEMP[0] 140: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = and i32 %128, 1065353216 %130 = bitcast i32 %129 to float %131 = fsub float -0.000000e+00, %130 %132 = fsub float -0.000000e+00, %130 %133 = fsub float -0.000000e+00, %130 %134 = fsub float -0.000000e+00, %130 call void @llvm.AMDGPU.kill(float %131) call void @llvm.AMDGPU.kill(float %132) call void @llvm.AMDGPU.kill(float %133) call void @llvm.AMDGPU.kill(float %134) %135 = insertelement <4 x float> undef, float %78, i32 0 %136 = insertelement <4 x float> %135, float %79, i32 1 %137 = insertelement <4 x float> %136, float %80, i32 2 %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 %139 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %138) %140 = extractelement <4 x float> %139, i32 0 %141 = extractelement <4 x float> %139, i32 1 %142 = extractelement <4 x float> %139, i32 2 %143 = extractelement <4 x float> %139, i32 3 %144 = call float @fabs(float %142) %145 = fdiv float 1.000000e+00, %144 %146 = fmul float %140, %145 %147 = fadd float %146, 1.500000e+00 %148 = fmul float %141, %145 %149 = fadd float %148, 1.500000e+00 %150 = bitcast float %149 to i32 %151 = bitcast float %147 to i32 %152 = bitcast float %143 to i32 %153 = insertelement <4 x i32> undef, i32 %150, i32 0 %154 = insertelement <4 x i32> %153, i32 %151, i32 1 %155 = insertelement <4 x i32> %154, i32 %152, i32 2 %156 = insertelement <4 x i32> %155, i32 undef, i32 3 %157 = bitcast <8 x i32> %47 to <32 x i8> %158 = bitcast <4 x i32> %49 to <16 x i8> %159 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %156, <32 x i8> %157, <16 x i8> %158, i32 4) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 %163 = extractelement <4 x float> %159, i32 3 %164 = call float @llvm.pow.f32(float %160, float 0x40019999A0000000) %165 = call float @llvm.pow.f32(float %161, float 0x40019999A0000000) %166 = call float @llvm.pow.f32(float %162, float 0x40019999A0000000) %167 = call float @llvm.pow.f32(float %163, float 1.000000e+00) %168 = fmul float %102, %164 %169 = fmul float %103, %165 %170 = fmul float %104, %166 %171 = fadd float %168, %168 %172 = fadd float %169, %169 %173 = fadd float %170, %170 %174 = fsub float -0.000000e+00, %85 %175 = fadd float %24, %174 %176 = fsub float -0.000000e+00, %86 %177 = fadd float %25, %176 %178 = fsub float -0.000000e+00, %87 %179 = fadd float %26, %178 %180 = fmul float %175, %175 %181 = fmul float %177, %177 %182 = fadd float %181, %180 %183 = fmul float %179, %179 %184 = fadd float %182, %183 %185 = call float @llvm.maxnum.f32(float %184, float 0x3E7AD7F2A0000000) %186 = call float @llvm.AMDGPU.rsq.clamped.f32(float %185) %187 = fmul float %186, %175 %188 = fmul float %186, %177 %189 = fmul float %186, %179 %190 = fmul float %175, %186 %191 = fadd float %190, 0x3FAB15B580000000 %192 = fmul float %177, %186 %193 = fadd float %192, 0x3FEB126EA0000000 %194 = fmul float %179, %186 %195 = fadd float %194, 0x3FE0ED9160000000 %196 = fmul float %191, %191 %197 = fmul float %193, %193 %198 = fadd float %196, %197 %199 = fmul float %195, %195 %200 = fadd float %198, %199 %201 = fmul float 0.000000e+00, 0.000000e+00 %202 = fadd float %200, %201 %203 = call float @llvm.AMDGPU.rsq.clamped.f32(float %202) %204 = fmul float %191, %203 %205 = fmul float %193, %203 %206 = fmul float %195, %203 %207 = fmul float %204, %78 %208 = fmul float %205, %79 %209 = fadd float %208, %207 %210 = fmul float %206, %80 %211 = fadd float %209, %210 %212 = call float @llvm.AMDIL.clamp.(float %211, float 0.000000e+00, float 1.000000e+00) %213 = fmul float %78, %187 %214 = fmul float %79, %188 %215 = fadd float %214, %213 %216 = fmul float %80, %189 %217 = fadd float %215, %216 %218 = fadd float %217, %217 %219 = fsub float -0.000000e+00, %187 %220 = fmul float %218, %78 %221 = fadd float %220, %219 %222 = fsub float -0.000000e+00, %188 %223 = fmul float %218, %79 %224 = fadd float %223, %222 %225 = fsub float -0.000000e+00, %189 %226 = fmul float %218, %80 %227 = fadd float %226, %225 %228 = insertelement <4 x float> undef, float %221, i32 0 %229 = insertelement <4 x float> %228, float %224, i32 1 %230 = insertelement <4 x float> %229, float %227, i32 2 %231 = insertelement <4 x float> %230, float %167, i32 3 %232 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %231) %233 = extractelement <4 x float> %232, i32 0 %234 = extractelement <4 x float> %232, i32 1 %235 = extractelement <4 x float> %232, i32 2 %236 = extractelement <4 x float> %232, i32 3 %237 = call float @fabs(float %235) %238 = fdiv float 1.000000e+00, %237 %239 = fmul float %233, %238 %240 = fadd float %239, 1.500000e+00 %241 = fmul float %234, %238 %242 = fadd float %241, 1.500000e+00 %243 = bitcast float %242 to i32 %244 = bitcast float %240 to i32 %245 = bitcast float %236 to i32 %246 = insertelement <4 x i32> undef, i32 %243, i32 0 %247 = insertelement <4 x i32> %246, i32 %244, i32 1 %248 = insertelement <4 x i32> %247, i32 %245, i32 2 %249 = insertelement <4 x i32> %248, i32 undef, i32 3 %250 = bitcast <8 x i32> %55 to <32 x i8> %251 = bitcast <4 x i32> %57 to <16 x i8> %252 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %249, <32 x i8> %250, <16 x i8> %251, i32 4) %253 = extractelement <4 x float> %252, i32 0 %254 = extractelement <4 x float> %252, i32 1 %255 = extractelement <4 x float> %252, i32 2 %256 = call float @llvm.pow.f32(float %253, float 0x40019999A0000000) %257 = call float @llvm.pow.f32(float %254, float 0x40019999A0000000) %258 = call float @llvm.pow.f32(float %255, float 0x40019999A0000000) %259 = bitcast float %81 to i32 %260 = bitcast float %82 to i32 %261 = insertelement <2 x i32> undef, i32 %259, i32 0 %262 = insertelement <2 x i32> %261, i32 %260, i32 1 %263 = bitcast <8 x i32> %43 to <32 x i8> %264 = bitcast <4 x i32> %45 to <16 x i8> %265 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %262, <32 x i8> %263, <16 x i8> %264, i32 2) %266 = extractelement <4 x float> %265, i32 0 %267 = extractelement <4 x float> %265, i32 1 %268 = extractelement <4 x float> %265, i32 2 %269 = extractelement <4 x float> %265, i32 3 %270 = call float @llvm.pow.f32(float %266, float 0x40019999A0000000) %271 = call float @llvm.pow.f32(float %267, float 0x40019999A0000000) %272 = call float @llvm.pow.f32(float %268, float 0x40019999A0000000) %273 = call float @llvm.pow.f32(float %269, float 1.000000e+00) %274 = call float @fabs(float %273) %275 = call float @llvm.pow.f32(float %274, float 0x40019999A0000000) %276 = insertelement <4 x float> undef, float %221, i32 0 %277 = insertelement <4 x float> %276, float %224, i32 1 %278 = insertelement <4 x float> %277, float %227, i32 2 %279 = insertelement <4 x float> %278, float %227, i32 3 %280 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %279) %281 = extractelement <4 x float> %280, i32 0 %282 = extractelement <4 x float> %280, i32 1 %283 = extractelement <4 x float> %280, i32 2 %284 = extractelement <4 x float> %280, i32 3 %285 = call float @fabs(float %283) %286 = fdiv float 1.000000e+00, %285 %287 = fmul float %281, %286 %288 = fadd float %287, 1.500000e+00 %289 = fmul float %282, %286 %290 = fadd float %289, 1.500000e+00 %291 = bitcast float %290 to i32 %292 = bitcast float %288 to i32 %293 = bitcast float %284 to i32 %294 = insertelement <4 x i32> undef, i32 %291, i32 0 %295 = insertelement <4 x i32> %294, i32 %292, i32 1 %296 = insertelement <4 x i32> %295, i32 %293, i32 2 %297 = insertelement <4 x i32> %296, i32 undef, i32 3 %298 = bitcast <8 x i32> %51 to <32 x i8> %299 = bitcast <4 x i32> %53 to <16 x i8> %300 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %297, <32 x i8> %298, <16 x i8> %299, i32 4) %301 = extractelement <4 x float> %300, i32 0 %302 = extractelement <4 x float> %300, i32 1 %303 = extractelement <4 x float> %300, i32 2 %304 = call float @llvm.pow.f32(float %301, float 0x40019999A0000000) %305 = call float @llvm.pow.f32(float %302, float 0x40019999A0000000) %306 = call float @llvm.pow.f32(float %303, float 0x40019999A0000000) %307 = call float @llvm.AMDGPU.lrp(float %275, float %304, float %256) %308 = call float @llvm.AMDGPU.lrp(float %275, float %305, float %257) %309 = call float @llvm.AMDGPU.lrp(float %275, float %306, float %258) %310 = fmul float %275, 1.990000e+02 %311 = fadd float %310, 1.000000e+00 %312 = call float @fabs(float %212) %313 = call float @llvm.pow.f32(float %312, float %311) %314 = fmul float %311, 0x3FB99999A0000000 %315 = fmul float %314, %313 %316 = fadd float %315, %307 %317 = fmul float %314, %313 %318 = fadd float %317, %308 %319 = fmul float %314, %313 %320 = fadd float %319, %309 %321 = fmul float %316, %270 %322 = fadd float %321, %171 %323 = fmul float %318, %271 %324 = fadd float %323, %172 %325 = fmul float %320, %272 %326 = fadd float %325, %173 %327 = call float @fabs(float %322) %328 = call float @llvm.log2.f32(float %327) %329 = call float @fabs(float %324) %330 = call float @llvm.log2.f32(float %329) %331 = call float @fabs(float %326) %332 = call float @llvm.log2.f32(float %331) %333 = fmul float %328, 0x3FDD1743E0000000 %334 = fmul float %330, 0x3FDD1743E0000000 %335 = fmul float %332, 0x3FDD1743E0000000 %336 = call float @llvm.AMDIL.exp.(float %333) %337 = call float @llvm.AMDIL.exp.(float %334) %338 = call float @llvm.AMDIL.exp.(float %335) %339 = bitcast float %336 to i32 %340 = bitcast float %337 to i32 %341 = bitcast float %338 to i32 %342 = insertelement <4 x i32> undef, i32 %339, i32 0 %343 = insertelement <4 x i32> %342, i32 %340, i32 1 %344 = insertelement <4 x i32> %343, i32 %341, i32 2 %345 = insertelement <4 x i32> %344, i32 undef, i32 3 %346 = bitcast <8 x i32> %63 to <32 x i8> %347 = bitcast <4 x i32> %65 to <16 x i8> %348 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %345, <32 x i8> %346, <16 x i8> %347, i32 3) %349 = extractelement <4 x float> %348, i32 0 %350 = extractelement <4 x float> %348, i32 1 %351 = extractelement <4 x float> %348, i32 2 %352 = fmul float %89, 1.000000e+00 %353 = fadd float %352, 0.000000e+00 %354 = fmul float %90, -1.000000e+00 %355 = fadd float %354, 1.000000e+00 %356 = bitcast float %353 to i32 %357 = bitcast float %355 to i32 %358 = insertelement <2 x i32> undef, i32 %356, i32 0 %359 = insertelement <2 x i32> %358, i32 %357, i32 1 %360 = bitcast <8 x i32> %67 to <32 x i8> %361 = bitcast <4 x i32> %69 to <16 x i8> %362 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %359, <32 x i8> %360, <16 x i8> %361, i32 2) %363 = extractelement <4 x float> %362, i32 0 %364 = extractelement <4 x float> %362, i32 2 %365 = bitcast float %83 to i32 %366 = bitcast float %84 to i32 %367 = insertelement <2 x i32> undef, i32 %365, i32 0 %368 = insertelement <2 x i32> %367, i32 %366, i32 1 %369 = bitcast <8 x i32> %71 to <32 x i8> %370 = bitcast <4 x i32> %73 to <16 x i8> %371 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %368, <32 x i8> %369, <16 x i8> %370, i32 2) %372 = extractelement <4 x float> %371, i32 0 %373 = extractelement <4 x float> %371, i32 1 %374 = extractelement <4 x float> %371, i32 2 %375 = call float @llvm.AMDGPU.lrp(float %363, float %349, float %372) %376 = call float @llvm.AMDGPU.lrp(float %363, float %350, float %373) %377 = call float @llvm.AMDGPU.lrp(float %363, float %351, float %374) %378 = fsub float -0.000000e+00, %363 %379 = fmul float %364, %378 %380 = fadd float %379, %363 %381 = bitcast float %375 to i32 %382 = bitcast float %376 to i32 %383 = bitcast float %377 to i32 %384 = insertelement <4 x i32> undef, i32 %381, i32 0 %385 = insertelement <4 x i32> %384, i32 %382, i32 1 %386 = insertelement <4 x i32> %385, i32 %383, i32 2 %387 = insertelement <4 x i32> %386, i32 undef, i32 3 %388 = bitcast <8 x i32> %75 to <32 x i8> %389 = bitcast <4 x i32> %77 to <16 x i8> %390 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %387, <32 x i8> %388, <16 x i8> %389, i32 3) %391 = extractelement <4 x float> %390, i32 0 %392 = extractelement <4 x float> %390, i32 1 %393 = extractelement <4 x float> %390, i32 2 %394 = call float @llvm.AMDGPU.lrp(float %380, float %391, float %375) %395 = call float @llvm.AMDGPU.lrp(float %380, float %392, float %376) %396 = call float @llvm.AMDGPU.lrp(float %380, float %393, float %377) %397 = fsub float -0.000000e+00, %394 %398 = fadd float %397, %30 %399 = fsub float -0.000000e+00, %395 %400 = fadd float %399, %31 %401 = fsub float -0.000000e+00, %396 %402 = fadd float %401, %32 %403 = fmul float %37, %87 %404 = fsub float -0.000000e+00, %33 %405 = fadd float %85, %404 %406 = fsub float -0.000000e+00, %34 %407 = fadd float %86, %406 %408 = fsub float -0.000000e+00, %35 %409 = fadd float %403, %408 %410 = fmul float %409, %27 %411 = fmul float %410, 0x3FF7154CA0000000 %412 = call float @llvm.AMDIL.exp.(float %411) %413 = fsub float -0.000000e+00, %412 %414 = fadd float %413, 1.000000e+00 %415 = fmul float %405, %405 %416 = fmul float %407, %407 %417 = fadd float %416, %415 %418 = fmul float %409, %409 %419 = fadd float %417, %418 %420 = fdiv float 1.000000e+00, %409 %421 = fmul float %419, %28 %422 = fmul float %414, %421 %423 = fmul float %420, %422 %424 = fmul float %423, 0x3FF7154CA0000000 %425 = call float @llvm.AMDIL.exp.(float %424) %426 = call float @llvm.AMDIL.clamp.(float %425, float 0.000000e+00, float 1.000000e+00) %427 = fsub float -0.000000e+00, %426 %428 = fadd float %427, 1.000000e+00 %429 = fmul float %88, 5.000000e-01 %430 = fadd float %429, 5.000000e-01 %431 = bitcast float %430 to i32 %432 = bitcast float %29 to i32 %433 = insertelement <2 x i32> undef, i32 %431, i32 0 %434 = insertelement <2 x i32> %433, i32 %432, i32 1 %435 = bitcast <8 x i32> %59 to <32 x i8> %436 = bitcast <4 x i32> %61 to <16 x i8> %437 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %434, <32 x i8> %435, <16 x i8> %436, i32 2) %438 = extractelement <4 x float> %437, i32 0 %439 = fmul float %428, %438 %440 = fmul float %439, %398 %441 = fadd float %440, %394 %442 = fmul float %439, %400 %443 = fadd float %442, %395 %444 = fmul float %439, %402 %445 = fadd float %444, %396 %446 = fmul float %107, 5.000000e-01 %447 = fsub float -0.000000e+00, %36 %448 = fadd float %447, %87 %449 = fcmp oge float %448, 0.000000e+00 %450 = sext i1 %449 to i32 %451 = bitcast i32 %450 to float %452 = bitcast float %451 to i32 %453 = icmp ne i32 %452, 0 %. = select i1 %453, float %107, float %446 %454 = call i32 @llvm.SI.packf16(float %441, float %443) %455 = bitcast i32 %454 to float %456 = call i32 @llvm.SI.packf16(float %445, float %.) %457 = bitcast i32 %456 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %455, float %457, float %455, float %457) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 vcc, s[6:7] ; BEEA0406 s_mov_b64 s[100:101], s[2:3] ; BEE40402 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[64:67], s[4:5], 0x4 ; C0A00504 s_load_dwordx4 s[88:91], s[4:5], 0x8 ; C0AC0508 s_load_dwordx4 s[52:55], s[4:5], 0xc ; C09A050C s_load_dwordx4 s[68:71], s[4:5], 0x10 ; C0A20510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v47, s0, 0 ; 045F0000 v_writelane_b32 v47, s1, 1 ; 045F0201 v_writelane_b32 v47, s2, 2 ; 045F0402 v_writelane_b32 v47, s3, 3 ; 045F0603 s_load_dwordx4 s[40:43], s[4:5], 0x18 ; C0940518 s_load_dwordx4 s[28:31], s[4:5], 0x1c ; C08E051C s_load_dwordx4 s[24:27], s[4:5], 0x20 ; C08C0520 s_load_dwordx4 s[20:23], s[4:5], 0x24 ; C08A0524 s_load_dwordx8 s[0:7], vcc, 0x0 ; C0C06B00 s_load_dwordx8 s[72:79], vcc, 0x8 ; C0E46B08 s_load_dwordx8 s[92:99], vcc, 0x10 ; C0EE6B10 s_load_dwordx8 s[56:63], vcc, 0x18 ; C0DC6B18 s_load_dwordx8 s[80:87], vcc, 0x20 ; C0E86B20 s_load_dwordx8 s[12:19], vcc, 0x28 ; C0C66B28 s_load_dwordx8 s[44:51], vcc, 0x30 ; C0D66B30 s_load_dwordx8 s[32:39], vcc, 0x38 ; C0D06B38 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[0:7], s[8:11] ; F0800F00 00400402 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v8, v7 ; 7E104F07 v_mul_legacy_f32_e32 v8, 1.0, v8 ; 0E1010F2 v_exp_f32_e32 v8, v8 ; 7E104B08 v_mov_b32_e32 v9, 0x7fffffff ; 7E1202FF 7FFFFFFF v_and_b32_e32 v8, v8, v9 ; 36101308 v_log_f32_e32 v8, v8 ; 7E104F08 v_mov_b32_e32 v10, 0x400ccccd ; 7E1402FF 400CCCCD v_mul_legacy_f32_e32 v8, v10, v8 ; 0E10110A v_exp_f32_e32 v8, v8 ; 7E104B08 v_mov_b32_e32 v11, 0xbdcccccd ; 7E1602FF BDCCCCCD v_add_f32_e32 v11, v8, v11 ; 06161708 v_mov_b32_e32 v15, 0 ; 7E1E0280 v_cmp_lt_f32_e64 s[0:1], v11, 0 ; D0020000 0001010B v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; D200080B 00018280 v_and_b32_e32 v11, 1.0, v11 ; 361616F2 v_xor_b32_e32 v11, 0x80000000, v11 ; 3A1616FF 80000000 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_cubeid_f32 v19, v12, v13, v14 ; D2880013 043A1B0C v_cubema_f32 v18, v12, v13, v14 ; D28E0012 043A1B0C v_cubesc_f32 v17, v12, v13, v14 ; D28A0011 043A1B0C v_cubetc_f32 v16, v12, v13, v14 ; D28C0010 043A1B0C v_rcp_f32_e64 v11, |v18| ; D354010B 00000112 v_mov_b32_e32 v24, 0x3fc00000 ; 7E3002FF 3FC00000 v_mad_f32 v18, v16, v11, v24 ; D2820012 04621710 v_mad_f32 v17, v17, v11, v24 ; D2820011 04621711 image_sample v[16:19], 15, 0, 0, 0, 0, 0, 0, 0, v[17:20], s[92:99], s[88:91] ; F0800F00 02D71011 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v11, v19 ; 7E164F13 v_mul_legacy_f32_e32 v11, 1.0, v11 ; 0E1616F2 v_exp_f32_e32 v23, v11 ; 7E2E4B0B v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_load_dwordx4 s[0:3], s[100:101], 0x0 ; C0806500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v25, s4, v11 ; 08321604 v_interp_p1_f32 v26, v0, 0, 2, [m0] ; C8680800 v_interp_p2_f32 v26, [v26], v1, 0, 2, [m0] ; C8690801 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v27, s4, v26 ; 08363404 v_mul_f32_e32 v28, v27, v27 ; 1038371B v_mad_f32 v28, v25, v25, v28 ; D282001C 04723319 v_interp_p1_f32 v29, v0, 2, 2, [m0] ; C8740A00 v_interp_p2_f32 v29, [v29], v1, 2, 2, [m0] ; C8750A01 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v30, s4, v29 ; 083C3A04 v_mad_f32 v28, v30, v30, v28 ; D282001C 04723D1E v_max_f32_e32 v28, 0x33d6bf95, v28 ; 203838FF 33D6BF95 v_rsq_clamp_f32_e32 v28, v28 ; 7E38591C v_mul_f32_e32 v31, v25, v28 ; 103E3919 v_mul_f32_e32 v32, v27, v28 ; 1040391B v_mul_f32_e32 v33, v32, v12 ; 10421920 v_mad_f32 v33, v13, v31, v33 ; D2820021 04863F0D v_mul_f32_e32 v34, v30, v28 ; 1044391E v_mad_f32 v33, v14, v34, v33 ; D2820021 0486450E v_add_f32_e32 v33, v33, v33 ; 06424321 v_mad_f32 v22, v33, v14, -v34 ; D2820016 848A1D21 v_mad_f32 v21, v33, v13, -v31 ; D2820015 847E1B21 v_mad_f32 v20, v33, v12, -v32 ; D2820014 84821921 v_cubeid_f32 v34, v20, v21, v22 ; D2880022 045A2B14 v_cubema_f32 v33, v20, v21, v22 ; D28E0021 045A2B14 v_cubesc_f32 v32, v20, v21, v22 ; D28A0020 045A2B14 v_cubetc_f32 v31, v20, v21, v22 ; D28C001F 045A2B14 v_rcp_f32_e64 v39, |v33| ; D3540127 00000121 v_mad_f32 v33, v31, v39, v24 ; D2820021 04624F1F v_mad_f32 v32, v32, v39, v24 ; D2820020 04624F20 image_sample v[31:33], 7, 0, 0, 0, 0, 0, 0, 0, v[32:35], s[80:87], s[68:71] ; F0800700 02341F20 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v34, v33 ; 7E444F21 v_mul_legacy_f32_e32 v34, v10, v34 ; 0E44450A v_exp_f32_e32 v34, v34 ; 7E444B22 image_sample v[35:38], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[72:79], s[64:67] ; F0800F00 02122302 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v2, v38 ; 7E044F26 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_and_b32_e32 v2, v2, v9 ; 36041302 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, v10, v2 ; 0E04050A v_exp_f32_e32 v2, v2 ; 7E044B02 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v34, v34, v3 ; 10440722 v_mov_b32_e32 v23, v22 ; 7E2E0316 v_cubeid_f32 v42, v20, v21, v22 ; D288002A 045A2B14 v_cubema_f32 v41, v20, v21, v22 ; D28E0029 045A2B14 v_cubesc_f32 v40, v20, v21, v22 ; D28A0028 045A2B14 v_cubetc_f32 v39, v20, v21, v22 ; D28C0027 045A2B14 v_rcp_f32_e64 v20, |v41| ; D3540114 00000129 v_mad_f32 v41, v39, v20, v24 ; D2820029 04622927 v_mad_f32 v40, v40, v20, v24 ; D2820028 04622928 image_sample v[20:22], 7, 0, 0, 0, 0, 0, 0, 0, v[40:43], s[56:63], s[52:55] ; F0800700 01AE1428 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v23, v22 ; 7E2E4F16 v_mul_legacy_f32_e32 v23, v10, v23 ; 0E2E2F0A v_exp_f32_e32 v23, v23 ; 7E2E4B17 v_mad_f32 v23, v2, v23, v34 ; D2820017 048A2F02 v_mov_b32_e32 v24, 0x3d58adac ; 7E3002FF 3D58ADAC v_mad_f32 v24, v27, v28, v24 ; D2820018 0462391B v_mov_b32_e32 v27, 0x3f589375 ; 7E3602FF 3F589375 v_mad_f32 v25, v25, v28, v27 ; D2820019 046E3919 v_mul_f32_e32 v27, v25, v25 ; 10363319 v_mad_f32 v27, v24, v24, v27 ; D282001B 046E3118 v_mov_b32_e32 v34, 0x3f076c8b ; 7E4402FF 3F076C8B v_mad_f32 v28, v30, v28, v34 ; D282001C 048A391E v_mad_f32 v27, v28, v28, v27 ; D282001B 046E391C v_add_f32_e32 v27, 0, v27 ; 06363680 v_rsq_clamp_f32_e32 v27, v27 ; 7E36591B v_mul_f32_e32 v25, v27, v25 ; 1032331B v_mul_f32_e32 v24, v27, v24 ; 1030311B v_mul_f32_e32 v24, v12, v24 ; 1030310C v_mad_f32 v24, v25, v13, v24 ; D2820018 04621B19 v_mul_f32_e32 v25, v27, v28 ; 1032391B v_mad_f32 v12, v25, v14, v24 ; D282000C 04621D19 v_add_f32_e64 v12, 0, v12 clamp ; D206080C 00021880 v_and_b32_e32 v9, v12, v9 ; 3612130C v_log_f32_e32 v9, v9 ; 7E124F09 v_mov_b32_e32 v12, 0x43470000 ; 7E1802FF 43470000 v_mad_f32 v12, v12, v2, 1.0 ; D282000C 03CA050C v_mul_legacy_f32_e32 v9, v12, v9 ; 0E12130C v_exp_f32_e32 v9, v9 ; 7E124B09 v_mul_f32_e32 v12, 0x3dcccccd, v12 ; 101818FF 3DCCCCCD v_mad_f32 v13, v12, v9, v23 ; D282000D 045E130C v_log_f32_e32 v14, v37 ; 7E1C4F25 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_log_f32_e32 v14, v18 ; 7E1C4F12 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_log_f32_e32 v15, v6 ; 7E1E4F06 v_mul_legacy_f32_e32 v15, v10, v15 ; 0E1E1F0A v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mad_f32 v13, 2.0, v14, v13 ; D282000D 04361CF4 v_log_f32_e64 v13, |v13| ; D34E010D 0000010D v_mul_f32_e32 v13, 0x3ee8ba1f, v13 ; 101A1AFF 3EE8BA1F v_exp_f32_e32 v41, v13 ; 7E524B0D v_log_f32_e32 v13, v32 ; 7E1A4F20 v_mul_legacy_f32_e32 v13, v10, v13 ; 0E1A1B0A v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v13, v13, v3 ; 101A070D v_log_f32_e32 v14, v21 ; 7E1C4F15 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mad_f32 v13, v2, v14, v13 ; D282000D 04361D02 v_mad_f32 v13, v12, v9, v13 ; D282000D 0436130C v_log_f32_e32 v14, v36 ; 7E1C4F24 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_mul_f32_e32 v13, v14, v13 ; 101A1B0E v_log_f32_e32 v14, v17 ; 7E1C4F11 v_mul_legacy_f32_e32 v14, v10, v14 ; 0E1C1D0A v_exp_f32_e32 v14, v14 ; 7E1C4B0E v_log_f32_e32 v15, v5 ; 7E1E4F05 v_mul_legacy_f32_e32 v15, v10, v15 ; 0E1E1F0A v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_mul_f32_e32 v14, v14, v15 ; 101C1F0E v_mad_f32 v13, 2.0, v14, v13 ; D282000D 04361CF4 v_log_f32_e64 v13, |v13| ; D34E010D 0000010D v_mul_f32_e32 v13, 0x3ee8ba1f, v13 ; 101A1AFF 3EE8BA1F v_exp_f32_e32 v40, v13 ; 7E504B0D v_log_f32_e32 v13, v31 ; 7E1A4F1F v_mul_legacy_f32_e32 v13, v10, v13 ; 0E1A1B0A v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mul_f32_e32 v3, v13, v3 ; 1006070D v_log_f32_e32 v13, v20 ; 7E1A4F14 v_mul_legacy_f32_e32 v13, v10, v13 ; 0E1A1B0A v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_mad_f32 v2, v2, v13, v3 ; D2820002 040E1B02 v_mad_f32 v2, v12, v9, v2 ; D2820002 040A130C v_log_f32_e32 v3, v35 ; 7E064F23 v_mul_legacy_f32_e32 v3, v10, v3 ; 0E06070A v_exp_f32_e32 v3, v3 ; 7E064B03 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_log_f32_e32 v3, v16 ; 7E064F10 v_mul_legacy_f32_e32 v3, v10, v3 ; 0E06070A v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_legacy_f32_e32 v4, v10, v4 ; 0E08090A v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_mad_f32 v2, 2.0, v3, v2 ; D2820002 040A06F4 v_log_f32_e64 v2, |v2| ; D34E0102 00000102 v_mul_f32_e32 v2, 0x3ee8ba1f, v2 ; 100404FF 3EE8BA1F v_exp_f32_e32 v39, v2 ; 7E4E4B02 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[39:42], s[44:51], s[40:43] ; F0800700 014B0227 v_interp_p1_f32 v5, v0, 1, 4, [m0] ; C8141100 v_interp_p2_f32 v5, [v5], v1, 1, 4, [m0] ; C8151101 v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2 v_interp_p1_f32 v7, v0, 0, 4, [m0] ; C81C1000 v_interp_p2_f32 v7, [v7], v1, 0, 4, [m0] ; C81D1001 v_add_f32_e32 v5, 0, v7 ; 060A0E80 image_sample v[5:6], 5, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[32:39], s[28:31] ; F0800500 00E80505 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v7, 1.0, v5 ; 080E0AF2 v_interp_p1_f32 v10, v0, 3, 1, [m0] ; C8280700 v_interp_p2_f32 v10, [v10], v1, 3, 1, [m0] ; C8290701 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 s_load_dwordx8 s[4:11], vcc, 0x40 ; C0C26B40 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[4:11], s[24:27] ; F0800700 00C10C09 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v14, v7 ; 10120F0E v_mad_f32 v17, v5, v4, v9 ; D2820011 04260905 v_mul_f32_e32 v9, v13, v7 ; 10120F0D v_mad_f32 v16, v5, v3, v9 ; D2820010 04260705 v_mul_f32_e32 v7, v12, v7 ; 100E0F0C v_mad_f32 v15, v5, v2, v7 ; D282000F 041E0505 s_load_dwordx8 s[4:11], vcc, 0x48 ; C0C26B48 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[4:11], s[20:23] ; F0800700 00A1020F v_mad_f32 v5, -v6, v5, v5 ; D2820005 24160B06 v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2 v_mul_f32_e32 v7, v16, v6 ; 100E0D10 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v5, v3, v7 ; D2820007 041E0705 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s4, v7 ; 08120E04 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v10, s4, v11 ; 0A141604 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v11, s4, v26 ; 0A163404 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mad_f32 v10, v10, v10, v11 ; D282000A 042E150A s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v11, s4 ; 7E160204 v_mad_f32 v11, v29, s5, -v11 ; D282000B 842C0B1D v_mad_f32 v10, v11, v11, v10 ; D282000A 042A170B s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s4, v10 ; 10141404 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s4, v11 ; 10181604 v_mul_f32_e32 v12, 0x3fb8aa65, v12 ; 101818FF 3FB8AA65 v_exp_f32_e32 v12, v12 ; 7E184B0C v_sub_f32_e32 v12, 1.0, v12 ; 081818F2 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_rcp_f32_e32 v11, v11 ; 7E16550B v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mul_f32_e32 v10, 0x3fb8aa65, v10 ; 101414FF 3FB8AA65 v_exp_f32_e32 v10, v10 ; 7E144B0A v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_sub_f32_e32 v10, 1.0, v10 ; 081414F2 v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00 v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01 v_mad_f32 v0, 0.5, v11, 0.5 ; D2820000 03C216F0 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_readlane_b32 s4, v47, 0 ; 0209012F v_readlane_b32 s5, v47, 1 ; 020B032F v_readlane_b32 s6, v47, 2 ; 020D052F v_readlane_b32 s7, v47, 3 ; 020F072F s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[4:7] ; F0800100 00230000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v1, v0, v9, v7 ; D2820001 041E1300 v_mul_f32_e32 v7, v15, v6 ; 100E0D0F v_mad_f32 v7, v5, v2, v7 ; D2820007 041E0505 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s4, v7 ; 08120E04 v_mad_f32 v7, v0, v9, v7 ; D2820007 041E1300 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 v_mul_f32_e32 v6, v17, v6 ; 100C0D11 v_mad_f32 v2, v5, v4, v6 ; D2820002 041A0905 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_mul_f32_e32 v2, 0.5, v8 ; 100410F0 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s0, v29 ; 0A063A00 v_cmp_ge_f32_e64 s[0:1], v3, 0 ; D00C0000 00010103 v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; D2000803 00018280 v_cmp_ne_i32_e64 s[0:1], v3, 0 ; D10A0000 00010103 v_cndmask_b32_e64 v2, v2, v8, s[0:1] ; D2000002 10021102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL OUT[5], GENERIC[13] DCL CONST[0..104] DCL TEMP[0..6], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} IMM[2] FLT32 { 0.5000, -0.5000, 0.0000, 0.0000} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+9] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+9] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+9] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+9] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+9].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[1] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[0], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[2], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[3] 112: DP3 TEMP[0].x, TEMP[3].xyzz, TEMP[3].xyzz 113: MAX TEMP[0].x, TEMP[0].xxxx, IMM[0].wwww 114: RSQ TEMP[0].x, TEMP[0].xxxx 115: MUL TEMP[0].xyz, TEMP[0].xxxx, TEMP[3] 116: MOV TEMP[0].xyz, TEMP[0].xyzx 117: RCP TEMP[4].x, TEMP[1].wwww 118: MOV TEMP[2].w, TEMP[4].xxxx 119: MUL TEMP[4].xy, TEMP[1], TEMP[4].xxxx 120: MOV TEMP[4].xy, TEMP[4].xyxx 121: ADD TEMP[5].xy, TEMP[2], CONST[8].zwzw 122: MUL TEMP[5].zw, TEMP[5].xyxy, CONST[8].xyxy 123: MOV TEMP[5].zw, TEMP[5].wwzw 124: MUL TEMP[6].xy, TEMP[2].yyyy, CONST[5] 125: MOV TEMP[3].xy, TEMP[6].xyxx 126: MAD TEMP[6].xy, TEMP[2].xxxx, CONST[4], TEMP[3] 127: MOV TEMP[3].xy, TEMP[6].xyxx 128: MAD TEMP[6].xy, TEMP[2].zzzz, CONST[6], TEMP[3] 129: MOV TEMP[3].xy, TEMP[6].xyxx 130: ADD TEMP[6].xy, TEMP[3], CONST[7] 131: MOV TEMP[3].xy, TEMP[6].xyxx 132: MAD TEMP[3].xy, TEMP[3], IMM[2].xyzz, IMM[2].yyyy 133: MOV TEMP[3].xy, TEMP[3].xyxx 134: MOV TEMP[5].xy, IN[4].xyxx 135: MOV TEMP[2].xyz, TEMP[2].xyzx 136: MOV TEMP[2].w, IMM[0].yyyy 137: MOV TEMP[4].zw, IMM[0].yyzy 138: MOV TEMP[3].zw, IMM[0].yyzy 139: MOV OUT[2], TEMP[5] 140: MOV OUT[3], TEMP[2] 141: MOV OUT[0], TEMP[1] 142: MOV OUT[1], TEMP[0] 143: MOV OUT[4], TEMP[4] 144: MOV OUT[5], TEMP[3] 145: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 0) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 4) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 8) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 12) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 16) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 20) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 24) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 28) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 32) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 36) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 40) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 44) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 60) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %32 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %33 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %37 = call float @llvm.SI.load.const(<16 x i8> %12, i32 128) %38 = call float @llvm.SI.load.const(<16 x i8> %12, i32 132) %39 = call float @llvm.SI.load.const(<16 x i8> %12, i32 136) %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 140) %41 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %42 = load <16 x i8> addrspace(2)* %41, !tbaa !0 %43 = add i32 %5, %7 %44 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %42, i32 0, i32 %43) %45 = extractelement <4 x float> %44, i32 0 %46 = extractelement <4 x float> %44, i32 1 %47 = extractelement <4 x float> %44, i32 2 %48 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %49 = load <16 x i8> addrspace(2)* %48, !tbaa !0 %50 = add i32 %5, %7 %51 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %49, i32 0, i32 %50) %52 = extractelement <4 x float> %51, i32 0 %53 = extractelement <4 x float> %51, i32 1 %54 = extractelement <4 x float> %51, i32 2 %55 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %56 = load <16 x i8> addrspace(2)* %55, !tbaa !0 %57 = add i32 %5, %7 %58 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %56, i32 0, i32 %57) %59 = extractelement <4 x float> %58, i32 0 %60 = extractelement <4 x float> %58, i32 1 %61 = extractelement <4 x float> %58, i32 2 %62 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %63 = load <16 x i8> addrspace(2)* %62, !tbaa !0 %64 = add i32 %5, %7 %65 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %63, i32 0, i32 %64) %66 = extractelement <4 x float> %65, i32 0 %67 = extractelement <4 x float> %65, i32 1 %68 = extractelement <4 x float> %65, i32 2 %69 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %70 = load <16 x i8> addrspace(2)* %69, !tbaa !0 %71 = add i32 %5, %7 %72 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %70, i32 0, i32 %71) %73 = extractelement <4 x float> %72, i32 0 %74 = extractelement <4 x float> %72, i32 1 %75 = fmul float 3.000000e+00, %59 %76 = fmul float %45, 1.000000e+00 %77 = fadd float %76, 0.000000e+00 %78 = fmul float %46, 1.000000e+00 %79 = fadd float %78, 0.000000e+00 %80 = fmul float %47, 1.000000e+00 %81 = fadd float %80, 0.000000e+00 %82 = fmul float %45, 0.000000e+00 %83 = fadd float %82, 1.000000e+00 %84 = fptosi float %75 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = shl i32 %86, 4 %88 = add i32 %87, 144 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = shl i32 %86, 4 %91 = add i32 %90, 148 %92 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %91) %93 = shl i32 %86, 4 %94 = add i32 %93, 152 %95 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %94) %96 = shl i32 %86, 4 %97 = add i32 %96, 156 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fmul float %77, %89 %100 = fmul float %79, %92 %101 = fadd float %99, %100 %102 = fmul float %81, %95 %103 = fadd float %101, %102 %104 = fmul float %83, %98 %105 = fadd float %103, %104 %106 = fptosi float %75 to i32 %107 = bitcast i32 %106 to float %108 = bitcast float %107 to i32 %109 = add i32 1, %108 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = shl i32 %111, 4 %113 = add i32 %112, 144 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = shl i32 %111, 4 %116 = add i32 %115, 148 %117 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %116) %118 = shl i32 %111, 4 %119 = add i32 %118, 152 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = shl i32 %111, 4 %122 = add i32 %121, 156 %123 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %122) %124 = fmul float %77, %114 %125 = fmul float %79, %117 %126 = fadd float %124, %125 %127 = fmul float %81, %120 %128 = fadd float %126, %127 %129 = fmul float %83, %123 %130 = fadd float %128, %129 %131 = fptosi float %75 to i32 %132 = bitcast i32 %131 to float %133 = bitcast float %132 to i32 %134 = add i32 2, %133 %135 = bitcast i32 %134 to float %136 = bitcast float %135 to i32 %137 = shl i32 %136, 4 %138 = add i32 %137, 144 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = shl i32 %136, 4 %141 = add i32 %140, 148 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = shl i32 %136, 4 %144 = add i32 %143, 152 %145 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %144) %146 = shl i32 %136, 4 %147 = add i32 %146, 156 %148 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %147) %149 = fmul float %77, %139 %150 = fmul float %79, %142 %151 = fadd float %149, %150 %152 = fmul float %81, %145 %153 = fadd float %151, %152 %154 = fmul float %83, %148 %155 = fadd float %153, %154 %156 = fmul float %105, %52 %157 = fmul float %130, %52 %158 = fmul float %155, %52 %159 = fptosi float %75 to i32 %160 = bitcast i32 %159 to float %161 = bitcast float %160 to i32 %162 = shl i32 %161, 4 %163 = add i32 %162, 144 %164 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %163) %165 = shl i32 %161, 4 %166 = add i32 %165, 148 %167 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %166) %168 = shl i32 %161, 4 %169 = add i32 %168, 152 %170 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %169) %171 = fmul float %66, %164 %172 = fmul float %67, %167 %173 = fadd float %172, %171 %174 = fmul float %68, %170 %175 = fadd float %173, %174 %176 = fptosi float %75 to i32 %177 = bitcast i32 %176 to float %178 = bitcast float %177 to i32 %179 = add i32 1, %178 %180 = bitcast i32 %179 to float %181 = bitcast float %180 to i32 %182 = shl i32 %181, 4 %183 = add i32 %182, 144 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = shl i32 %181, 4 %186 = add i32 %185, 148 %187 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %186) %188 = shl i32 %181, 4 %189 = add i32 %188, 152 %190 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %189) %191 = fmul float %66, %184 %192 = fmul float %67, %187 %193 = fadd float %192, %191 %194 = fmul float %68, %190 %195 = fadd float %193, %194 %196 = fptosi float %75 to i32 %197 = bitcast i32 %196 to float %198 = bitcast float %197 to i32 %199 = add i32 2, %198 %200 = bitcast i32 %199 to float %201 = bitcast float %200 to i32 %202 = shl i32 %201, 4 %203 = add i32 %202, 144 %204 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %203) %205 = shl i32 %201, 4 %206 = add i32 %205, 148 %207 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %206) %208 = shl i32 %201, 4 %209 = add i32 %208, 152 %210 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %209) %211 = fmul float %66, %204 %212 = fmul float %67, %207 %213 = fadd float %212, %211 %214 = fmul float %68, %210 %215 = fadd float %213, %214 %216 = fmul float %175, %52 %217 = fmul float %195, %52 %218 = fmul float %215, %52 %219 = fcmp olt float 0.000000e+00, %53 %220 = sext i1 %219 to i32 %221 = bitcast i32 %220 to float %222 = bitcast float %221 to i32 %223 = icmp ne i32 %222, 0 br i1 %223, label %IF, label %ENDIF IF: ; preds = %main_body %224 = fmul float 3.000000e+00, %60 %225 = fptosi float %224 to i32 %226 = bitcast i32 %225 to float %227 = bitcast float %226 to i32 %228 = shl i32 %227, 4 %229 = add i32 %228, 144 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = shl i32 %227, 4 %232 = add i32 %231, 148 %233 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %232) %234 = shl i32 %227, 4 %235 = add i32 %234, 152 %236 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %235) %237 = shl i32 %227, 4 %238 = add i32 %237, 156 %239 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %238) %240 = fmul float %77, %230 %241 = fmul float %79, %233 %242 = fadd float %240, %241 %243 = fmul float %81, %236 %244 = fadd float %242, %243 %245 = fmul float %83, %239 %246 = fadd float %244, %245 %247 = fptosi float %224 to i32 %248 = bitcast i32 %247 to float %249 = bitcast float %248 to i32 %250 = add i32 1, %249 %251 = bitcast i32 %250 to float %252 = bitcast float %251 to i32 %253 = shl i32 %252, 4 %254 = add i32 %253, 144 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = shl i32 %252, 4 %257 = add i32 %256, 148 %258 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %257) %259 = shl i32 %252, 4 %260 = add i32 %259, 152 %261 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %260) %262 = shl i32 %252, 4 %263 = add i32 %262, 156 %264 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %263) %265 = fmul float %77, %255 %266 = fmul float %79, %258 %267 = fadd float %265, %266 %268 = fmul float %81, %261 %269 = fadd float %267, %268 %270 = fmul float %83, %264 %271 = fadd float %269, %270 %272 = fptosi float %224 to i32 %273 = bitcast i32 %272 to float %274 = bitcast float %273 to i32 %275 = add i32 2, %274 %276 = bitcast i32 %275 to float %277 = bitcast float %276 to i32 %278 = shl i32 %277, 4 %279 = add i32 %278, 144 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = shl i32 %277, 4 %282 = add i32 %281, 148 %283 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %282) %284 = shl i32 %277, 4 %285 = add i32 %284, 152 %286 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %285) %287 = shl i32 %277, 4 %288 = add i32 %287, 156 %289 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %288) %290 = fmul float %77, %280 %291 = fmul float %79, %283 %292 = fadd float %290, %291 %293 = fmul float %81, %286 %294 = fadd float %292, %293 %295 = fmul float %83, %289 %296 = fadd float %294, %295 %297 = fmul float %53, %246 %298 = fadd float %297, %156 %299 = fmul float %53, %271 %300 = fadd float %299, %157 %301 = fmul float %53, %296 %302 = fadd float %301, %158 %303 = fptosi float %224 to i32 %304 = bitcast i32 %303 to float %305 = bitcast float %304 to i32 %306 = shl i32 %305, 4 %307 = add i32 %306, 144 %308 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %307) %309 = shl i32 %305, 4 %310 = add i32 %309, 148 %311 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %310) %312 = shl i32 %305, 4 %313 = add i32 %312, 152 %314 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %313) %315 = fmul float %66, %308 %316 = fmul float %67, %311 %317 = fadd float %316, %315 %318 = fmul float %68, %314 %319 = fadd float %317, %318 %320 = fptosi float %224 to i32 %321 = bitcast i32 %320 to float %322 = bitcast float %321 to i32 %323 = add i32 1, %322 %324 = bitcast i32 %323 to float %325 = bitcast float %324 to i32 %326 = shl i32 %325, 4 %327 = add i32 %326, 144 %328 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %327) %329 = shl i32 %325, 4 %330 = add i32 %329, 148 %331 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %330) %332 = shl i32 %325, 4 %333 = add i32 %332, 152 %334 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %333) %335 = fmul float %66, %328 %336 = fmul float %67, %331 %337 = fadd float %336, %335 %338 = fmul float %68, %334 %339 = fadd float %337, %338 %340 = fptosi float %224 to i32 %341 = bitcast i32 %340 to float %342 = bitcast float %341 to i32 %343 = add i32 2, %342 %344 = bitcast i32 %343 to float %345 = bitcast float %344 to i32 %346 = shl i32 %345, 4 %347 = add i32 %346, 144 %348 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %347) %349 = shl i32 %345, 4 %350 = add i32 %349, 148 %351 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %350) %352 = shl i32 %345, 4 %353 = add i32 %352, 152 %354 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %353) %355 = fmul float %66, %348 %356 = fmul float %67, %351 %357 = fadd float %356, %355 %358 = fmul float %68, %354 %359 = fadd float %357, %358 %360 = fmul float %53, %319 %361 = fadd float %360, %216 %362 = fmul float %53, %339 %363 = fadd float %362, %217 %364 = fmul float %53, %359 %365 = fadd float %364, %218 %366 = fcmp olt float 0.000000e+00, %54 %367 = sext i1 %366 to i32 %368 = bitcast i32 %367 to float %369 = bitcast float %368 to i32 %370 = icmp ne i32 %369, 0 br i1 %370, label %IF70, label %ENDIF ENDIF: ; preds = %IF70, %IF, %main_body %temp3.0 = phi float [ 0.000000e+00, %main_body ], [ %428, %IF70 ], [ %224, %IF ] %temp8.0 = phi float [ %156, %main_body ], [ %502, %IF70 ], [ %298, %IF ] %temp9.0 = phi float [ %157, %main_body ], [ %504, %IF70 ], [ %300, %IF ] %temp10.0 = phi float [ %158, %main_body ], [ %506, %IF70 ], [ %302, %IF ] %temp12.0 = phi float [ %216, %main_body ], [ %565, %IF70 ], [ %361, %IF ] %temp13.0 = phi float [ %217, %main_body ], [ %567, %IF70 ], [ %363, %IF ] %temp14.0 = phi float [ %218, %main_body ], [ %569, %IF70 ], [ %365, %IF ] %371 = fmul float %temp9.0, %17 %372 = fmul float %temp9.0, %18 %373 = fmul float %temp9.0, %19 %374 = fmul float %temp9.0, %20 %375 = fmul float %temp8.0, %13 %376 = fadd float %375, %371 %377 = fmul float %temp8.0, %14 %378 = fadd float %377, %372 %379 = fmul float %temp8.0, %15 %380 = fadd float %379, %373 %381 = fmul float %temp8.0, %16 %382 = fadd float %381, %374 %383 = fmul float %temp10.0, %21 %384 = fadd float %383, %376 %385 = fmul float %temp10.0, %22 %386 = fadd float %385, %378 %387 = fmul float %temp10.0, %23 %388 = fadd float %387, %380 %389 = fmul float %temp10.0, %24 %390 = fadd float %389, %382 %391 = fadd float %384, %25 %392 = fadd float %386, %26 %393 = fadd float %388, %27 %394 = fadd float %390, %28 %395 = fmul float %temp12.0, %temp12.0 %396 = fmul float %temp13.0, %temp13.0 %397 = fadd float %396, %395 %398 = fmul float %temp14.0, %temp14.0 %399 = fadd float %397, %398 %400 = call float @llvm.maxnum.f32(float %399, float 0x3E7AD7F2A0000000) %401 = call float @llvm.AMDGPU.rsq.clamped.f32(float %400) %402 = fmul float %401, %temp12.0 %403 = fmul float %401, %temp13.0 %404 = fmul float %401, %temp14.0 %405 = fdiv float 1.000000e+00, %394 %406 = fmul float %391, %405 %407 = fmul float %392, %405 %408 = fadd float %temp8.0, %39 %409 = fadd float %temp9.0, %40 %410 = fmul float %408, %37 %411 = fmul float %409, %38 %412 = fmul float %temp9.0, %31 %413 = fmul float %temp9.0, %32 %414 = fmul float %temp8.0, %29 %415 = fadd float %414, %412 %416 = fmul float %temp8.0, %30 %417 = fadd float %416, %413 %418 = fmul float %temp10.0, %33 %419 = fadd float %418, %415 %420 = fmul float %temp10.0, %34 %421 = fadd float %420, %417 %422 = fadd float %419, %35 %423 = fadd float %421, %36 %424 = fmul float %422, 5.000000e-01 %425 = fadd float %424, -5.000000e-01 %426 = fmul float %423, -5.000000e-01 %427 = fadd float %426, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %402, float %403, float %404, float %temp3.0) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %73, float %74, float %410, float %411) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %406, float %407, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 36, i32 0, float %425, float %427, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %391, float %392, float %393, float %394) ret void IF70: ; preds = %IF %428 = fmul float 3.000000e+00, %61 %429 = fptosi float %428 to i32 %430 = bitcast i32 %429 to float %431 = bitcast float %430 to i32 %432 = shl i32 %431, 4 %433 = add i32 %432, 144 %434 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %433) %435 = shl i32 %431, 4 %436 = add i32 %435, 148 %437 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %436) %438 = shl i32 %431, 4 %439 = add i32 %438, 152 %440 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %439) %441 = shl i32 %431, 4 %442 = add i32 %441, 156 %443 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %442) %444 = fmul float %77, %434 %445 = fmul float %79, %437 %446 = fadd float %444, %445 %447 = fmul float %81, %440 %448 = fadd float %446, %447 %449 = fmul float %83, %443 %450 = fadd float %448, %449 %451 = fptosi float %428 to i32 %452 = bitcast i32 %451 to float %453 = bitcast float %452 to i32 %454 = add i32 1, %453 %455 = bitcast i32 %454 to float %456 = bitcast float %455 to i32 %457 = shl i32 %456, 4 %458 = add i32 %457, 144 %459 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %458) %460 = shl i32 %456, 4 %461 = add i32 %460, 148 %462 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %461) %463 = shl i32 %456, 4 %464 = add i32 %463, 152 %465 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %464) %466 = shl i32 %456, 4 %467 = add i32 %466, 156 %468 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %467) %469 = fmul float %77, %459 %470 = fmul float %79, %462 %471 = fadd float %469, %470 %472 = fmul float %81, %465 %473 = fadd float %471, %472 %474 = fmul float %83, %468 %475 = fadd float %473, %474 %476 = fptosi float %428 to i32 %477 = bitcast i32 %476 to float %478 = bitcast float %477 to i32 %479 = add i32 2, %478 %480 = bitcast i32 %479 to float %481 = bitcast float %480 to i32 %482 = shl i32 %481, 4 %483 = add i32 %482, 144 %484 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %483) %485 = shl i32 %481, 4 %486 = add i32 %485, 148 %487 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %486) %488 = shl i32 %481, 4 %489 = add i32 %488, 152 %490 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %489) %491 = shl i32 %481, 4 %492 = add i32 %491, 156 %493 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %492) %494 = fmul float %77, %484 %495 = fmul float %79, %487 %496 = fadd float %494, %495 %497 = fmul float %81, %490 %498 = fadd float %496, %497 %499 = fmul float %83, %493 %500 = fadd float %498, %499 %501 = fmul float %54, %450 %502 = fadd float %501, %298 %503 = fmul float %54, %475 %504 = fadd float %503, %300 %505 = fmul float %54, %500 %506 = fadd float %505, %302 %507 = fptosi float %428 to i32 %508 = bitcast i32 %507 to float %509 = bitcast float %508 to i32 %510 = shl i32 %509, 4 %511 = add i32 %510, 144 %512 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %511) %513 = shl i32 %509, 4 %514 = add i32 %513, 148 %515 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %514) %516 = shl i32 %509, 4 %517 = add i32 %516, 152 %518 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %517) %519 = fmul float %66, %512 %520 = fmul float %67, %515 %521 = fadd float %520, %519 %522 = fmul float %68, %518 %523 = fadd float %521, %522 %524 = fptosi float %428 to i32 %525 = bitcast i32 %524 to float %526 = bitcast float %525 to i32 %527 = add i32 1, %526 %528 = bitcast i32 %527 to float %529 = bitcast float %528 to i32 %530 = shl i32 %529, 4 %531 = add i32 %530, 144 %532 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %531) %533 = shl i32 %529, 4 %534 = add i32 %533, 148 %535 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %534) %536 = shl i32 %529, 4 %537 = add i32 %536, 152 %538 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %537) %539 = fmul float %66, %532 %540 = fmul float %67, %535 %541 = fadd float %540, %539 %542 = fmul float %68, %538 %543 = fadd float %541, %542 %544 = fptosi float %428 to i32 %545 = bitcast i32 %544 to float %546 = bitcast float %545 to i32 %547 = add i32 2, %546 %548 = bitcast i32 %547 to float %549 = bitcast float %548 to i32 %550 = shl i32 %549, 4 %551 = add i32 %550, 144 %552 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %551) %553 = shl i32 %549, 4 %554 = add i32 %553, 148 %555 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %554) %556 = shl i32 %549, 4 %557 = add i32 %556, 152 %558 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %557) %559 = fmul float %66, %552 %560 = fmul float %67, %555 %561 = fadd float %560, %559 %562 = fmul float %68, %558 %563 = fadd float %561, %562 %564 = fmul float %54, %523 %565 = fadd float %564, %361 %566 = fmul float %54, %543 %567 = fadd float %566, %363 %568 = fmul float %54, %563 %569 = fadd float %568, %365 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v11, s10, v0 ; 4A16000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[4:7], s[8:9], 0x10 ; C0820910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[20:23], v11, s[20:23], 0 idxen ; E00C2000 8005140B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v20 ; 100028FF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v3, 4, v0 ; 34060084 v_add_i32_e32 v0, 0xb0, v3 ; 4A0006FF 000000B0 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v0, v0, s[0:3], 0 offen ; E0301000 80000000 buffer_load_format_xyzw v[15:18], v11, s[24:27], 0 idxen ; E00C2000 80060F0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v0, v15 ; 10021F00 v_add_i32_e32 v2, 0xb4, v3 ; 4A0406FF 000000B4 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v16, v2, v1 ; D2820001 04060510 v_add_i32_e32 v4, 0xb8, v3 ; 4A0806FF 000000B8 buffer_load_dword v12, v4, s[0:3], 0 offen ; E0301000 80000C04 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v17, v12, v1 ; D2820001 04061911 buffer_load_format_xyzw v[7:10], v11, s[16:19], 0 idxen ; E00C2000 8004070B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v7, v1 ; 10020307 v_add_i32_e32 v4, 0xa0, v3 ; 4A0806FF 000000A0 buffer_load_dword v13, v4, s[0:3], 0 offen ; E0301000 80000D04 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v13, v15 ; 10081F0D v_add_i32_e32 v5, 0xa4, v3 ; 4A0A06FF 000000A4 buffer_load_dword v14, v5, s[0:3], 0 offen ; E0301000 80000E05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v16, v14, v4 ; D2820004 04121D10 v_add_i32_e32 v5, 0xa8, v3 ; 4A0A06FF 000000A8 buffer_load_dword v27, v5, s[0:3], 0 offen ; E0301000 80001B05 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v17, v27, v4 ; D2820004 04123711 v_mul_f32_e32 v4, v7, v4 ; 10080907 v_add_i32_e32 v5, 0x90, v3 ; 4A0A06FF 00000090 buffer_load_dword v28, v5, s[0:3], 0 offen ; E0301000 80001C05 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v28, v15 ; 100A1F1C v_add_i32_e32 v6, 0x94, v3 ; 4A0C06FF 00000094 buffer_load_dword v29, v6, s[0:3], 0 offen ; E0301000 80001D06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v16, v29, v5 ; D2820005 04163B10 v_add_i32_e32 v6, 0x98, v3 ; 4A0C06FF 00000098 buffer_load_dword v30, v6, s[0:3], 0 offen ; E0301000 80001E06 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v17, v30, v5 ; D2820005 04163D11 v_mul_f32_e32 v5, v7, v5 ; 100A0B07 buffer_load_format_xyzw v[31:34], v11, s[12:15], 0 idxen ; E00C2000 80031F0B v_mov_b32_e32 v6, 0 ; 7E0C0280 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v19, 0, v32 ; 06264080 v_mul_f32_e32 v2, v2, v19 ; 10042702 v_add_f32_e32 v24, 0, v31 ; 06303E80 v_mad_f32 v0, v24, v0, v2 ; D2820000 040A0118 v_add_f32_e32 v26, 0, v33 ; 06344280 v_mad_f32 v0, v26, v12, v0 ; D2820000 0402191A v_mad_f32 v25, 0, v31, 1.0 ; D2820019 03CA3E80 v_add_i32_e32 v2, 0xbc, v3 ; 4A0406FF 000000BC buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v25, v2, v0 ; D2820000 04020519 v_mul_f32_e32 v0, v7, v0 ; 10000107 v_mul_f32_e32 v2, v14, v19 ; 1004270E v_mad_f32 v2, v24, v13, v2 ; D2820002 040A1B18 v_mad_f32 v2, v26, v27, v2 ; D2820002 040A371A v_add_i32_e32 v12, 0xac, v3 ; 4A1806FF 000000AC buffer_load_dword v12, v12, s[0:3], 0 offen ; E0301000 80000C0C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v25, v12, v2 ; D2820002 040A1919 v_mul_f32_e32 v2, v7, v2 ; 10040507 v_mul_f32_e32 v12, v29, v19 ; 1018271D v_mad_f32 v12, v24, v28, v12 ; D282000C 04323918 v_mad_f32 v12, v26, v30, v12 ; D282000C 04323D1A v_add_i32_e32 v3, 0x9c, v3 ; 4A0606FF 0000009C buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v25, v3, v12 ; D2820003 04320719 v_mul_f32_e32 v3, v7, v3 ; 10060707 buffer_load_format_xyzw v[11:14], v11, s[4:7], 0 idxen ; E00C2000 80010B0B v_cmp_gt_f32_e64 s[4:5], v8, 0 ; D0080004 00010108 v_cndmask_b32_e64 v27, 0, -1, s[4:5] ; D200081B 00118280 v_cmp_ne_i32_e64 s[32:33], v27, 0 ; D10A0020 0001011B s_buffer_load_dword s20, s[0:3], 0x23 ; C20A0123 s_buffer_load_dword s19, s[0:3], 0x22 ; C2098122 s_buffer_load_dword s22, s[0:3], 0x21 ; C20B0121 s_buffer_load_dword s21, s[0:3], 0x20 ; C20A8120 s_buffer_load_dword s4, s[0:3], 0x1d ; C202011D s_buffer_load_dword s7, s[0:3], 0x1c ; C203811C s_buffer_load_dword s5, s[0:3], 0x19 ; C2028119 s_buffer_load_dword s11, s[0:3], 0x18 ; C2058118 s_buffer_load_dword s8, s[0:3], 0x15 ; C2040115 s_buffer_load_dword s12, s[0:3], 0x14 ; C2060114 s_buffer_load_dword s9, s[0:3], 0x11 ; C2048111 s_buffer_load_dword s13, s[0:3], 0x10 ; C2068110 s_buffer_load_dword s17, s[0:3], 0xf ; C208810F s_buffer_load_dword s6, s[0:3], 0xe ; C203010E s_buffer_load_dword s24, s[0:3], 0xd ; C20C010D s_buffer_load_dword s16, s[0:3], 0xc ; C208010C s_buffer_load_dword s23, s[0:3], 0xb ; C20B810B s_buffer_load_dword s10, s[0:3], 0xa ; C205010A s_buffer_load_dword s26, s[0:3], 0x9 ; C20D0109 s_buffer_load_dword s18, s[0:3], 0x8 ; C2090108 s_buffer_load_dword s27, s[0:3], 0x7 ; C20D8107 s_buffer_load_dword s14, s[0:3], 0x6 ; C2070106 s_buffer_load_dword s30, s[0:3], 0x5 ; C20F0105 s_buffer_load_dword s25, s[0:3], 0x4 ; C20C8104 s_buffer_load_dword s29, s[0:3], 0x3 ; C20E8103 s_buffer_load_dword s15, s[0:3], 0x2 ; C2078102 s_buffer_load_dword s31, s[0:3], 0x1 ; C20F8101 s_buffer_load_dword s28, s[0:3], 0x0 ; C20E0100 s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[32:33], s[32:33] ; BEA02420 s_xor_b64 s[32:33], exec, s[32:33] ; 89A0207E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v6, 0x40400000, v21 ; 100C2AFF 40400000 v_cvt_i32_f32_e32 v27, v6 ; 7E361106 v_lshlrev_b32_e32 v27, 4, v27 ; 34363684 v_add_i32_e32 v28, 0xb4, v27 ; 4A3836FF 000000B4 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_add_i32_e32 v29, 0xb0, v27 ; 4A3A36FF 000000B0 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v30, v29, v15 ; 103C1F1D v_mad_f32 v30, v16, v28, v30 ; D282001E 047A3910 v_add_i32_e32 v31, 0xb8, v27 ; 4A3E36FF 000000B8 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v30, v17, v31, v30 ; D282001E 047A3F11 v_mad_f32 v1, v8, v30, v1 ; D2820001 04063D08 v_add_i32_e32 v30, 0xa4, v27 ; 4A3C36FF 000000A4 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E v_add_i32_e32 v32, 0xa0, v27 ; 4A4036FF 000000A0 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v33, v32, v15 ; 10421F20 v_mad_f32 v33, v16, v30, v33 ; D2820021 04863D10 v_add_i32_e32 v34, 0xa8, v27 ; 4A4436FF 000000A8 buffer_load_dword v34, v34, s[0:3], 0 offen ; E0301000 80002222 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v33, v17, v34, v33 ; D2820021 04864511 v_mad_f32 v4, v8, v33, v4 ; D2820004 04124308 v_add_i32_e32 v33, 0x94, v27 ; 4A4236FF 00000094 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 v_add_i32_e32 v35, 0x90, v27 ; 4A4636FF 00000090 buffer_load_dword v35, v35, s[0:3], 0 offen ; E0301000 80002323 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v36, v35, v15 ; 10481F23 v_mad_f32 v36, v16, v33, v36 ; D2820024 04924310 v_add_i32_e32 v37, 0x98, v27 ; 4A4A36FF 00000098 buffer_load_dword v37, v37, s[0:3], 0 offen ; E0301000 80002525 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v36, v17, v37, v36 ; D2820024 04924B11 v_mad_f32 v5, v8, v36, v5 ; D2820005 04164908 v_mul_f32_e32 v28, v28, v19 ; 1038271C v_mad_f32 v28, v24, v29, v28 ; D282001C 04723B18 v_mad_f32 v28, v26, v31, v28 ; D282001C 04723F1A v_add_i32_e32 v29, 0xbc, v27 ; 4A3A36FF 000000BC buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v25, v29, v28 ; D282001C 04723B19 v_mad_f32 v0, v8, v28, v0 ; D2820000 04023908 v_mul_f32_e32 v28, v30, v19 ; 1038271E v_mad_f32 v28, v24, v32, v28 ; D282001C 04724118 v_mad_f32 v28, v26, v34, v28 ; D282001C 0472451A v_add_i32_e32 v29, 0xac, v27 ; 4A3A36FF 000000AC buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v25, v29, v28 ; D282001C 04723B19 v_mad_f32 v2, v8, v28, v2 ; D2820002 040A3908 v_mul_f32_e32 v28, v33, v19 ; 10382721 v_mad_f32 v28, v24, v35, v28 ; D282001C 04724718 v_mad_f32 v28, v26, v37, v28 ; D282001C 04724B1A v_add_i32_e32 v27, 0x9c, v27 ; 4A3636FF 0000009C buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v25, v27, v28 ; D282001B 04723719 v_mad_f32 v3, v8, v27, v3 ; D2820003 040E3708 v_cmp_gt_f32_e64 s[34:35], v9, 0 ; D0080022 00010109 v_cndmask_b32_e64 v27, 0, -1, s[34:35] ; D200081B 00898280 v_cmp_ne_i32_e64 s[34:35], v27, 0 ; D10A0022 0001011B s_and_saveexec_b64 s[34:35], s[34:35] ; BEA22422 s_xor_b64 s[34:35], exec, s[34:35] ; 89A2227E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v6, 0x40400000, v22 ; 100C2CFF 40400000 v_cvt_i32_f32_e32 v20, v6 ; 7E281106 v_lshlrev_b32_e32 v20, 4, v20 ; 34282884 v_add_i32_e32 v21, 0xb4, v20 ; 4A2A28FF 000000B4 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 v_add_i32_e32 v22, 0xb0, v20 ; 4A2C28FF 000000B0 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v23, v22, v15 ; 102E1F16 v_mad_f32 v23, v16, v21, v23 ; D2820017 045E2B10 v_add_i32_e32 v27, 0xb8, v20 ; 4A3628FF 000000B8 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, v17, v27, v23 ; D2820017 045E3711 v_mad_f32 v1, v9, v23, v1 ; D2820001 04062F09 v_add_i32_e32 v23, 0xa4, v20 ; 4A2E28FF 000000A4 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_add_i32_e32 v28, 0xa0, v20 ; 4A3828FF 000000A0 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v28, v15 ; 103A1F1C v_mad_f32 v29, v16, v23, v29 ; D282001D 04762F10 v_add_i32_e32 v30, 0xa8, v20 ; 4A3C28FF 000000A8 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, v17, v30, v29 ; D282001D 04763D11 v_mad_f32 v4, v9, v29, v4 ; D2820004 04123B09 v_add_i32_e32 v29, 0x94, v20 ; 4A3A28FF 00000094 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_add_i32_e32 v31, 0x90, v20 ; 4A3E28FF 00000090 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v31, v15 ; 10401F1F v_mad_f32 v32, v16, v29, v32 ; D2820020 04823B10 v_add_i32_e32 v33, 0x98, v20 ; 4A4228FF 00000098 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v17, v33, v32 ; D282000F 04824311 v_mad_f32 v5, v9, v15, v5 ; D2820005 04161F09 v_mul_f32_e32 v15, v21, v19 ; 101E2715 v_mad_f32 v15, v24, v22, v15 ; D282000F 043E2D18 v_mad_f32 v15, v26, v27, v15 ; D282000F 043E371A v_add_i32_e32 v16, 0xbc, v20 ; 4A2028FF 000000BC buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v0, v9, v15, v0 ; D2820000 04021F09 v_mul_f32_e32 v15, v23, v19 ; 101E2717 v_mad_f32 v15, v24, v28, v15 ; D282000F 043E3918 v_mad_f32 v15, v26, v30, v15 ; D282000F 043E3D1A v_add_i32_e32 v16, 0xac, v20 ; 4A2028FF 000000AC buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v2, v9, v15, v2 ; D2820002 040A1F09 v_mul_f32_e32 v15, v29, v19 ; 101E271D v_mad_f32 v15, v24, v31, v15 ; D282000F 043E3F18 v_mad_f32 v15, v26, v33, v15 ; D282000F 043E431A v_add_i32_e32 v16, 0x9c, v20 ; 4A2028FF 0000009C buffer_load_dword v16, v16, s[0:3], 0 offen ; E0301000 80001010 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v15, v25, v16, v15 ; D282000F 043E2119 v_mad_f32 v3, v9, v15, v3 ; D2820003 040E1F09 s_or_b64 exec, exec, s[34:35] ; 88FE227E s_or_b64 exec, exec, s[32:33] ; 88FE207E v_mul_f32_e32 v7, v5, v5 ; 100E0B05 v_mad_f32 v7, v4, v4, v7 ; D2820007 041E0904 v_mad_f32 v7, v1, v1, v7 ; D2820007 041E0301 v_max_f32_e32 v7, 0x33d6bf95, v7 ; 200E0EFF 33D6BF95 v_rsq_clamp_f32_e32 v7, v7 ; 7E0E5907 v_mul_f32_e32 v1, v1, v7 ; 10020F01 v_mul_f32_e32 v4, v4, v7 ; 10080F04 v_mul_f32_e32 v5, v5, v7 ; 100A0F05 exp 15, 32, 0, 0, 0, v5, v4, v1, v6 ; F800020F 06010405 s_waitcnt expcnt(0) ; BF8C070F v_add_f32_e32 v1, s20, v2 ; 06020414 v_mul_f32_e32 v1, s22, v1 ; 10020216 v_add_f32_e32 v4, s19, v3 ; 06080613 v_mul_f32_e32 v4, s21, v4 ; 10080815 exp 15, 33, 0, 0, 0, v11, v12, v4, v1 ; F800021F 01040C0B s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v1, 1.0 ; 7E0202F2 exp 15, 34, 0, 0, 0, v3, v2, v0, v1 ; F800022F 01000203 v_mul_f32_e32 v4, s30, v2 ; 1008041E v_mad_f32 v4, v3, s31, v4 ; D2820004 04103F03 v_mad_f32 v4, v0, s26, v4 ; D2820004 04103500 v_add_f32_e32 v4, s24, v4 ; 06080818 v_mul_f32_e32 v5, s27, v2 ; 100A041B v_mad_f32 v5, v3, s29, v5 ; D2820005 04143B03 v_mad_f32 v5, v0, s23, v5 ; D2820005 04142F00 v_add_f32_e32 v5, s17, v5 ; 060A0A11 v_rcp_f32_e32 v6, v5 ; 7E0C5505 v_mul_f32_e32 v7, v6, v4 ; 100E0906 v_mul_f32_e32 v8, s25, v2 ; 10100419 v_mad_f32 v8, v3, s28, v8 ; D2820008 04203903 v_mad_f32 v8, v0, s18, v8 ; D2820008 04202500 v_add_f32_e32 v8, s16, v8 ; 06101010 v_mul_f32_e32 v6, v6, v8 ; 100C1106 v_mov_b32_e32 v9, 0 ; 7E120280 exp 15, 35, 0, 0, 0, v6, v7, v9, v1 ; F800023F 01090706 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v6, s12, v2 ; 100C040C v_mad_f32 v6, v3, s13, v6 ; D2820006 04181B03 v_mad_f32 v6, v0, s11, v6 ; D2820006 04181700 v_add_f32_e32 v6, s7, v6 ; 060C0C07 v_mad_f32 v6, 0.5, v6, -0.5 ; D2820006 03C60CF0 v_mul_f32_e32 v7, s8, v2 ; 100E0408 v_mad_f32 v7, v3, s9, v7 ; D2820007 041C1303 v_mad_f32 v7, v0, s5, v7 ; D2820007 041C0B00 v_add_f32_e32 v7, s4, v7 ; 060E0E04 v_mad_f32 v7, -0.5, v7, -0.5 ; D2820007 03C60EF1 exp 15, 36, 0, 0, 0, v6, v7, v9, v1 ; F800024F 01090706 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v1, s14, v2 ; 1002040E v_mad_f32 v1, v3, s15, v1 ; D2820001 04041F03 v_mad_f32 v0, v0, s10, v1 ; D2820000 04041500 v_add_f32_e32 v0, s6, v0 ; 06000006 exp 15, 12, 0, 1, 0, v8, v4, v0, v5 ; F80008CF 05000408 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL IN[4], GENERIC[13], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL SAMP[6] DCL SAMP[7] DCL SAMP[8] DCL SAMP[9] DCL CONST[0..8] DCL TEMP[0..9], LOCAL IMM[0] FLT32 { 2.2000, 1.0000, -0.1000, 0.0000} IMM[1] FLT32 { 0.0000, 0.0529, 0.8460, 0.5290} IMM[2] FLT32 { 199.0000, 0.1000, 0.4545, 1.4427} IMM[3] FLT32 { 1.0000, -1.0000, 0.5000, 0.0000} 0: MOV TEMP[0].xy, IN[1].xyyy 1: TEX TEMP[0], TEMP[0], SAMP[0], 2D 2: POW TEMP[1].x, TEMP[0].xxxx, IMM[0].xxxx 3: POW TEMP[1].y, TEMP[0].yyyy, IMM[0].xxxx 4: POW TEMP[1].z, TEMP[0].zzzz, IMM[0].xxxx 5: POW TEMP[1].w, TEMP[0].wwww, IMM[0].yyyy 6: MOV TEMP[0].w, TEMP[1] 7: ABS TEMP[2].x, TEMP[1].wwww 8: POW TEMP[2].x, TEMP[2].xxxx, IMM[0].xxxx 9: MOV TEMP[3].x, TEMP[2].xxxx 10: ADD TEMP[4].xyz, TEMP[2].xxxx, IMM[0].zzzz 11: FSLT TEMP[5].xyz, TEMP[4].xyzz, IMM[0].wwww 12: OR TEMP[6].x, TEMP[5].xxxx, TEMP[5].zzzz 13: OR TEMP[5].x, TEMP[6].xxxx, TEMP[5].yyyy 14: AND TEMP[5].x, TEMP[5].xxxx, IMM[0].yyyy 15: KILL_IF -TEMP[5].xxxx 16: MOV TEMP[5].xyz, IN[0].xyzz 17: TEX TEMP[5], TEMP[5], SAMP[2], CUBE 18: POW TEMP[6].x, TEMP[5].xxxx, IMM[0].xxxx 19: POW TEMP[6].y, TEMP[5].yyyy, IMM[0].xxxx 20: POW TEMP[6].z, TEMP[5].zzzz, IMM[0].xxxx 21: POW TEMP[6].w, TEMP[5].wwww, IMM[0].yyyy 22: MUL TEMP[1].xyz, TEMP[1], TEMP[6] 23: MOV TEMP[0].xyz, TEMP[1].xyzx 24: ADD TEMP[1].xyz, TEMP[0], TEMP[0] 25: MOV TEMP[0].xyz, TEMP[1].xyzx 26: ADD TEMP[1].yzw, CONST[3].xxyz, -IN[2].xxyz 27: MOV TEMP[3].yzw, TEMP[1].zyzw 28: DP3 TEMP[5].x, TEMP[1].yzww, TEMP[1].yzww 29: MAX TEMP[5].x, TEMP[5].xxxx, IMM[1].xxxx 30: RSQ TEMP[5].x, TEMP[5].xxxx 31: MUL TEMP[1].xyz, TEMP[5].xxxx, TEMP[1].yzww 32: MAD TEMP[3].yzw, TEMP[3], TEMP[5].xxxx, IMM[1].yyzw 33: MOV TEMP[5].w, IMM[0].wwww 34: MOV TEMP[5].x, TEMP[3].yyyy 35: MOV TEMP[5].y, TEMP[3].zzzz 36: MOV TEMP[5].z, TEMP[3].wwww 37: DP4 TEMP[3].x, TEMP[5], TEMP[5] 38: RSQ TEMP[3].x, TEMP[3].xxxx 39: MUL TEMP[3].xyz, TEMP[5], TEMP[3].xxxx 40: DP3 TEMP[3].x, TEMP[3].xyzz, IN[0].xyzz 41: MOV_SAT TEMP[3].x, TEMP[3].xxxx 42: DP3 TEMP[5].x, IN[0].xyzz, TEMP[1].xyzz 43: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 44: MAD TEMP[5].yzw, TEMP[5].yyyy, IN[0].xxyz, -TEMP[1].xxyz 45: MOV TEMP[6].xy, TEMP[5].yzzz 46: TEX TEMP[6], TEMP[6], SAMP[4], 2D 47: POW TEMP[7].x, TEMP[6].xxxx, IMM[0].xxxx 48: POW TEMP[7].y, TEMP[6].yyyy, IMM[0].xxxx 49: POW TEMP[7].z, TEMP[6].zzzz, IMM[0].xxxx 50: POW TEMP[7].w, TEMP[6].wwww, IMM[0].yyyy 51: MOV TEMP[4].w, TEMP[7].wwww 52: MOV TEMP[6].xy, IN[1].xyyy 53: TEX TEMP[6], TEMP[6], SAMP[1], 2D 54: POW TEMP[8].x, TEMP[6].xxxx, IMM[0].xxxx 55: POW TEMP[8].y, TEMP[6].yyyy, IMM[0].xxxx 56: POW TEMP[8].z, TEMP[6].zzzz, IMM[0].xxxx 57: POW TEMP[8].w, TEMP[6].wwww, IMM[0].yyyy 58: ABS TEMP[6].x, TEMP[8].wwww 59: POW TEMP[6].x, TEMP[6].xxxx, IMM[0].xxxx 60: MOV TEMP[5].xyz, TEMP[5].yzww 61: TEX TEMP[5], TEMP[5], SAMP[3], CUBE 62: POW TEMP[9].x, TEMP[5].xxxx, IMM[0].xxxx 63: POW TEMP[9].y, TEMP[5].yyyy, IMM[0].xxxx 64: POW TEMP[9].z, TEMP[5].zzzz, IMM[0].xxxx 65: POW TEMP[9].w, TEMP[5].wwww, IMM[0].yyyy 66: LRP TEMP[5].xyz, TEMP[6].xxxx, TEMP[9], TEMP[7] 67: MAD TEMP[6].y, TEMP[6].xxxx, IMM[2].xxxx, IMM[0].yyyy 68: ABS TEMP[3].x, TEMP[3].xxxx 69: POW TEMP[3].x, TEMP[3].xxxx, TEMP[6].yyyy 70: MUL TEMP[6].w, TEMP[6].yyyy, IMM[2].yyyy 71: MOV TEMP[0].w, TEMP[6].wwww 72: MAD TEMP[3].yzw, TEMP[6].wwww, TEMP[3].xxxx, TEMP[5].xxyz 73: MAD TEMP[3].xyz, TEMP[3].yzww, TEMP[8], TEMP[0] 74: ABS TEMP[5].x, TEMP[3].xxxx 75: LG2 TEMP[4].x, TEMP[5].xxxx 76: ABS TEMP[5].x, TEMP[3].yyyy 77: LG2 TEMP[5].x, TEMP[5].xxxx 78: MOV TEMP[4].y, TEMP[5].xxxx 79: ABS TEMP[3].x, TEMP[3].zzzz 80: LG2 TEMP[3].x, TEMP[3].xxxx 81: MOV TEMP[4].z, TEMP[3].xxxx 82: MUL TEMP[3].xyz, TEMP[4], IMM[2].zzzz 83: EX2 TEMP[4].x, TEMP[3].xxxx 84: EX2 TEMP[5].x, TEMP[3].yyyy 85: MOV TEMP[4].y, TEMP[5].xxxx 86: EX2 TEMP[3].x, TEMP[3].zzzz 87: MOV TEMP[4].z, TEMP[3].xxxx 88: MOV TEMP[3].xyz, TEMP[4].xyzz 89: TEX TEMP[3].xyz, TEMP[3], SAMP[6], 3D 90: MAD TEMP[5].xy, IN[4], IMM[3].xyxx, IMM[0].wyww 91: MOV TEMP[5].xy, TEMP[5].xyyy 92: TEX TEMP[5].xzw, TEMP[5], SAMP[7], 2D 93: MOV TEMP[1].w, TEMP[5].wwww 94: MOV TEMP[6].xy, IN[1].zwww 95: TEX TEMP[6].xyz, TEMP[6], SAMP[8], 2D 96: LRP TEMP[3].yzw, TEMP[5].xxxx, TEMP[3].xxyz, TEMP[6].xxyz 97: MAD TEMP[5].x, TEMP[5].zzzz, -TEMP[5].xxxx, TEMP[5].xxxx 98: MOV TEMP[6].xyz, TEMP[3].yzww 99: TEX TEMP[6], TEMP[6], SAMP[9], 3D 100: LRP TEMP[3].xyz, TEMP[5].xxxx, TEMP[6], TEMP[3].yzww 101: MOV TEMP[1].xyz, TEMP[3].xyzx 102: ADD TEMP[3].xyz, -TEMP[1], CONST[5] 103: MOV TEMP[0].xyz, TEMP[3].xyzx 104: MUL TEMP[3].z, CONST[8].xxxx, IN[2].zzzz 105: MOV TEMP[4].z, TEMP[3].zzzz 106: MOV TEMP[4].xy, IN[2].xyxx 107: ADD TEMP[3].yzw, TEMP[4].xxyz, -CONST[6].xxyz 108: MUL TEMP[5].w, TEMP[3].wwww, CONST[4].xxxx 109: MUL TEMP[5].w, TEMP[5].wwww, IMM[2].wwww 110: EX2 TEMP[5].x, TEMP[5].wwww 111: ADD TEMP[5].w, -TEMP[5].xxxx, IMM[0].yyyy 112: DP3 TEMP[6].x, TEMP[3].yzww, TEMP[3].yzww 113: RCP TEMP[3].x, TEMP[3].wwww 114: MUL TEMP[6].y, TEMP[6].xxxx, CONST[4].yyyy 115: MUL TEMP[5].w, TEMP[5].wwww, TEMP[6].yyyy 116: MUL TEMP[3].w, TEMP[3].xxxx, TEMP[5].wwww 117: MUL TEMP[3].w, TEMP[3].wwww, IMM[2].wwww 118: EX2 TEMP[3].x, TEMP[3].wwww 119: MOV_SAT TEMP[3].x, TEMP[3].xxxx 120: ADD TEMP[3].w, -TEMP[3].xxxx, IMM[0].yyyy 121: MAD TEMP[5].x, IN[3].yyyy, IMM[3].zzzz, IMM[3].zzzz 122: MOV TEMP[4].x, TEMP[5].xxxx 123: MOV TEMP[4].y, CONST[4].wwww 124: MOV TEMP[4].xy, TEMP[4].xyyy 125: TEX TEMP[4].x, TEMP[4], SAMP[5], 2D 126: MUL TEMP[3].w, TEMP[3].wwww, TEMP[4].xxxx 127: MOV TEMP[0].w, TEMP[3].wwww 128: MAD TEMP[0].xyz, TEMP[3].wwww, TEMP[0], TEMP[1] 129: MOV TEMP[0].xyz, TEMP[0].xyzx 130: MUL TEMP[1].x, TEMP[2].xxxx, IMM[3].zzzz 131: ADD TEMP[3].y, -CONST[7].xxxx, IN[2].zzzz 132: FSGE TEMP[3].x, TEMP[3].yyyy, IMM[0].wwww 133: UIF TEMP[3].xxxx :0 134: MOV TEMP[2].x, TEMP[2].xxxx 135: ELSE :0 136: MOV TEMP[2].x, TEMP[1].xxxx 137: ENDIF 138: MOV TEMP[0].w, TEMP[2].xxxx 139: MOV OUT[0], TEMP[0] 140: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 76) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 96) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 100) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 104) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 112) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 128) %38 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %39 = load <8 x i32> addrspace(2)* %38, !tbaa !0 %40 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %41 = load <4 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %43 = load <8 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %45 = load <4 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %47 = load <8 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %49 = load <4 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %51 = load <8 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %53 = load <4 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %55 = load <8 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %57 = load <4 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %59 = load <8 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %61 = load <4 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 6 %63 = load <8 x i32> addrspace(2)* %62, !tbaa !0 %64 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 6 %65 = load <4 x i32> addrspace(2)* %64, !tbaa !0 %66 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 7 %67 = load <8 x i32> addrspace(2)* %66, !tbaa !0 %68 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 7 %69 = load <4 x i32> addrspace(2)* %68, !tbaa !0 %70 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 8 %71 = load <8 x i32> addrspace(2)* %70, !tbaa !0 %72 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 8 %73 = load <4 x i32> addrspace(2)* %72, !tbaa !0 %74 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 9 %75 = load <8 x i32> addrspace(2)* %74, !tbaa !0 %76 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 9 %77 = load <4 x i32> addrspace(2)* %76, !tbaa !0 %78 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %79 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %80 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %81 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %82 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %83 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %84 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %85 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %86 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %87 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %88 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %89 = call float @llvm.SI.fs.interp(i32 0, i32 4, i32 %5, <2 x i32> %7) %90 = call float @llvm.SI.fs.interp(i32 1, i32 4, i32 %5, <2 x i32> %7) %91 = bitcast float %81 to i32 %92 = bitcast float %82 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %39 to <32 x i8> %96 = bitcast <4 x i32> %41 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = call float @llvm.pow.f32(float %98, float 0x40019999A0000000) %103 = call float @llvm.pow.f32(float %99, float 0x40019999A0000000) %104 = call float @llvm.pow.f32(float %100, float 0x40019999A0000000) %105 = call float @llvm.pow.f32(float %101, float 1.000000e+00) %106 = call float @fabs(float %105) %107 = call float @llvm.pow.f32(float %106, float 0x40019999A0000000) %108 = fadd float %107, 0xBFB99999A0000000 %109 = fadd float %107, 0xBFB99999A0000000 %110 = fadd float %107, 0xBFB99999A0000000 %111 = fcmp olt float %108, 0.000000e+00 %112 = sext i1 %111 to i32 %113 = fcmp olt float %109, 0.000000e+00 %114 = sext i1 %113 to i32 %115 = fcmp olt float %110, 0.000000e+00 %116 = sext i1 %115 to i32 %117 = bitcast i32 %112 to float %118 = bitcast i32 %114 to float %119 = bitcast i32 %116 to float %120 = bitcast float %117 to i32 %121 = bitcast float %119 to i32 %122 = or i32 %120, %121 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = bitcast float %118 to i32 %126 = or i32 %124, %125 %127 = bitcast i32 %126 to float %128 = bitcast float %127 to i32 %129 = and i32 %128, 1065353216 %130 = bitcast i32 %129 to float %131 = fsub float -0.000000e+00, %130 %132 = fsub float -0.000000e+00, %130 %133 = fsub float -0.000000e+00, %130 %134 = fsub float -0.000000e+00, %130 call void @llvm.AMDGPU.kill(float %131) call void @llvm.AMDGPU.kill(float %132) call void @llvm.AMDGPU.kill(float %133) call void @llvm.AMDGPU.kill(float %134) %135 = insertelement <4 x float> undef, float %78, i32 0 %136 = insertelement <4 x float> %135, float %79, i32 1 %137 = insertelement <4 x float> %136, float %80, i32 2 %138 = insertelement <4 x float> %137, float 0.000000e+00, i32 3 %139 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %138) %140 = extractelement <4 x float> %139, i32 0 %141 = extractelement <4 x float> %139, i32 1 %142 = extractelement <4 x float> %139, i32 2 %143 = extractelement <4 x float> %139, i32 3 %144 = call float @fabs(float %142) %145 = fdiv float 1.000000e+00, %144 %146 = fmul float %140, %145 %147 = fadd float %146, 1.500000e+00 %148 = fmul float %141, %145 %149 = fadd float %148, 1.500000e+00 %150 = bitcast float %149 to i32 %151 = bitcast float %147 to i32 %152 = bitcast float %143 to i32 %153 = insertelement <4 x i32> undef, i32 %150, i32 0 %154 = insertelement <4 x i32> %153, i32 %151, i32 1 %155 = insertelement <4 x i32> %154, i32 %152, i32 2 %156 = insertelement <4 x i32> %155, i32 undef, i32 3 %157 = bitcast <8 x i32> %47 to <32 x i8> %158 = bitcast <4 x i32> %49 to <16 x i8> %159 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %156, <32 x i8> %157, <16 x i8> %158, i32 4) %160 = extractelement <4 x float> %159, i32 0 %161 = extractelement <4 x float> %159, i32 1 %162 = extractelement <4 x float> %159, i32 2 %163 = call float @llvm.pow.f32(float %160, float 0x40019999A0000000) %164 = call float @llvm.pow.f32(float %161, float 0x40019999A0000000) %165 = call float @llvm.pow.f32(float %162, float 0x40019999A0000000) %166 = fmul float %102, %163 %167 = fmul float %103, %164 %168 = fmul float %104, %165 %169 = fadd float %166, %166 %170 = fadd float %167, %167 %171 = fadd float %168, %168 %172 = fsub float -0.000000e+00, %85 %173 = fadd float %24, %172 %174 = fsub float -0.000000e+00, %86 %175 = fadd float %25, %174 %176 = fsub float -0.000000e+00, %87 %177 = fadd float %26, %176 %178 = fmul float %173, %173 %179 = fmul float %175, %175 %180 = fadd float %179, %178 %181 = fmul float %177, %177 %182 = fadd float %180, %181 %183 = call float @llvm.maxnum.f32(float %182, float 0x3E7AD7F2A0000000) %184 = call float @llvm.AMDGPU.rsq.clamped.f32(float %183) %185 = fmul float %184, %173 %186 = fmul float %184, %175 %187 = fmul float %184, %177 %188 = fmul float %173, %184 %189 = fadd float %188, 0x3FAB15B580000000 %190 = fmul float %175, %184 %191 = fadd float %190, 0x3FEB126EA0000000 %192 = fmul float %177, %184 %193 = fadd float %192, 0x3FE0ED9160000000 %194 = fmul float %189, %189 %195 = fmul float %191, %191 %196 = fadd float %194, %195 %197 = fmul float %193, %193 %198 = fadd float %196, %197 %199 = fmul float 0.000000e+00, 0.000000e+00 %200 = fadd float %198, %199 %201 = call float @llvm.AMDGPU.rsq.clamped.f32(float %200) %202 = fmul float %189, %201 %203 = fmul float %191, %201 %204 = fmul float %193, %201 %205 = fmul float %202, %78 %206 = fmul float %203, %79 %207 = fadd float %206, %205 %208 = fmul float %204, %80 %209 = fadd float %207, %208 %210 = call float @llvm.AMDIL.clamp.(float %209, float 0.000000e+00, float 1.000000e+00) %211 = fmul float %78, %185 %212 = fmul float %79, %186 %213 = fadd float %212, %211 %214 = fmul float %80, %187 %215 = fadd float %213, %214 %216 = fadd float %215, %215 %217 = fsub float -0.000000e+00, %185 %218 = fmul float %216, %78 %219 = fadd float %218, %217 %220 = fsub float -0.000000e+00, %186 %221 = fmul float %216, %79 %222 = fadd float %221, %220 %223 = fsub float -0.000000e+00, %187 %224 = fmul float %216, %80 %225 = fadd float %224, %223 %226 = bitcast float %219 to i32 %227 = bitcast float %222 to i32 %228 = insertelement <2 x i32> undef, i32 %226, i32 0 %229 = insertelement <2 x i32> %228, i32 %227, i32 1 %230 = bitcast <8 x i32> %55 to <32 x i8> %231 = bitcast <4 x i32> %57 to <16 x i8> %232 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %229, <32 x i8> %230, <16 x i8> %231, i32 2) %233 = extractelement <4 x float> %232, i32 0 %234 = extractelement <4 x float> %232, i32 1 %235 = extractelement <4 x float> %232, i32 2 %236 = call float @llvm.pow.f32(float %233, float 0x40019999A0000000) %237 = call float @llvm.pow.f32(float %234, float 0x40019999A0000000) %238 = call float @llvm.pow.f32(float %235, float 0x40019999A0000000) %239 = bitcast float %81 to i32 %240 = bitcast float %82 to i32 %241 = insertelement <2 x i32> undef, i32 %239, i32 0 %242 = insertelement <2 x i32> %241, i32 %240, i32 1 %243 = bitcast <8 x i32> %43 to <32 x i8> %244 = bitcast <4 x i32> %45 to <16 x i8> %245 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %242, <32 x i8> %243, <16 x i8> %244, i32 2) %246 = extractelement <4 x float> %245, i32 0 %247 = extractelement <4 x float> %245, i32 1 %248 = extractelement <4 x float> %245, i32 2 %249 = extractelement <4 x float> %245, i32 3 %250 = call float @llvm.pow.f32(float %246, float 0x40019999A0000000) %251 = call float @llvm.pow.f32(float %247, float 0x40019999A0000000) %252 = call float @llvm.pow.f32(float %248, float 0x40019999A0000000) %253 = call float @llvm.pow.f32(float %249, float 1.000000e+00) %254 = call float @fabs(float %253) %255 = call float @llvm.pow.f32(float %254, float 0x40019999A0000000) %256 = insertelement <4 x float> undef, float %219, i32 0 %257 = insertelement <4 x float> %256, float %222, i32 1 %258 = insertelement <4 x float> %257, float %225, i32 2 %259 = insertelement <4 x float> %258, float %225, i32 3 %260 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %259) %261 = extractelement <4 x float> %260, i32 0 %262 = extractelement <4 x float> %260, i32 1 %263 = extractelement <4 x float> %260, i32 2 %264 = extractelement <4 x float> %260, i32 3 %265 = call float @fabs(float %263) %266 = fdiv float 1.000000e+00, %265 %267 = fmul float %261, %266 %268 = fadd float %267, 1.500000e+00 %269 = fmul float %262, %266 %270 = fadd float %269, 1.500000e+00 %271 = bitcast float %270 to i32 %272 = bitcast float %268 to i32 %273 = bitcast float %264 to i32 %274 = insertelement <4 x i32> undef, i32 %271, i32 0 %275 = insertelement <4 x i32> %274, i32 %272, i32 1 %276 = insertelement <4 x i32> %275, i32 %273, i32 2 %277 = insertelement <4 x i32> %276, i32 undef, i32 3 %278 = bitcast <8 x i32> %51 to <32 x i8> %279 = bitcast <4 x i32> %53 to <16 x i8> %280 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %277, <32 x i8> %278, <16 x i8> %279, i32 4) %281 = extractelement <4 x float> %280, i32 0 %282 = extractelement <4 x float> %280, i32 1 %283 = extractelement <4 x float> %280, i32 2 %284 = call float @llvm.pow.f32(float %281, float 0x40019999A0000000) %285 = call float @llvm.pow.f32(float %282, float 0x40019999A0000000) %286 = call float @llvm.pow.f32(float %283, float 0x40019999A0000000) %287 = call float @llvm.AMDGPU.lrp(float %255, float %284, float %236) %288 = call float @llvm.AMDGPU.lrp(float %255, float %285, float %237) %289 = call float @llvm.AMDGPU.lrp(float %255, float %286, float %238) %290 = fmul float %255, 1.990000e+02 %291 = fadd float %290, 1.000000e+00 %292 = call float @fabs(float %210) %293 = call float @llvm.pow.f32(float %292, float %291) %294 = fmul float %291, 0x3FB99999A0000000 %295 = fmul float %294, %293 %296 = fadd float %295, %287 %297 = fmul float %294, %293 %298 = fadd float %297, %288 %299 = fmul float %294, %293 %300 = fadd float %299, %289 %301 = fmul float %296, %250 %302 = fadd float %301, %169 %303 = fmul float %298, %251 %304 = fadd float %303, %170 %305 = fmul float %300, %252 %306 = fadd float %305, %171 %307 = call float @fabs(float %302) %308 = call float @llvm.log2.f32(float %307) %309 = call float @fabs(float %304) %310 = call float @llvm.log2.f32(float %309) %311 = call float @fabs(float %306) %312 = call float @llvm.log2.f32(float %311) %313 = fmul float %308, 0x3FDD1743E0000000 %314 = fmul float %310, 0x3FDD1743E0000000 %315 = fmul float %312, 0x3FDD1743E0000000 %316 = call float @llvm.AMDIL.exp.(float %313) %317 = call float @llvm.AMDIL.exp.(float %314) %318 = call float @llvm.AMDIL.exp.(float %315) %319 = bitcast float %316 to i32 %320 = bitcast float %317 to i32 %321 = bitcast float %318 to i32 %322 = insertelement <4 x i32> undef, i32 %319, i32 0 %323 = insertelement <4 x i32> %322, i32 %320, i32 1 %324 = insertelement <4 x i32> %323, i32 %321, i32 2 %325 = insertelement <4 x i32> %324, i32 undef, i32 3 %326 = bitcast <8 x i32> %63 to <32 x i8> %327 = bitcast <4 x i32> %65 to <16 x i8> %328 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %325, <32 x i8> %326, <16 x i8> %327, i32 3) %329 = extractelement <4 x float> %328, i32 0 %330 = extractelement <4 x float> %328, i32 1 %331 = extractelement <4 x float> %328, i32 2 %332 = fmul float %89, 1.000000e+00 %333 = fadd float %332, 0.000000e+00 %334 = fmul float %90, -1.000000e+00 %335 = fadd float %334, 1.000000e+00 %336 = bitcast float %333 to i32 %337 = bitcast float %335 to i32 %338 = insertelement <2 x i32> undef, i32 %336, i32 0 %339 = insertelement <2 x i32> %338, i32 %337, i32 1 %340 = bitcast <8 x i32> %67 to <32 x i8> %341 = bitcast <4 x i32> %69 to <16 x i8> %342 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %339, <32 x i8> %340, <16 x i8> %341, i32 2) %343 = extractelement <4 x float> %342, i32 0 %344 = extractelement <4 x float> %342, i32 2 %345 = bitcast float %83 to i32 %346 = bitcast float %84 to i32 %347 = insertelement <2 x i32> undef, i32 %345, i32 0 %348 = insertelement <2 x i32> %347, i32 %346, i32 1 %349 = bitcast <8 x i32> %71 to <32 x i8> %350 = bitcast <4 x i32> %73 to <16 x i8> %351 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %348, <32 x i8> %349, <16 x i8> %350, i32 2) %352 = extractelement <4 x float> %351, i32 0 %353 = extractelement <4 x float> %351, i32 1 %354 = extractelement <4 x float> %351, i32 2 %355 = call float @llvm.AMDGPU.lrp(float %343, float %329, float %352) %356 = call float @llvm.AMDGPU.lrp(float %343, float %330, float %353) %357 = call float @llvm.AMDGPU.lrp(float %343, float %331, float %354) %358 = fsub float -0.000000e+00, %343 %359 = fmul float %344, %358 %360 = fadd float %359, %343 %361 = bitcast float %355 to i32 %362 = bitcast float %356 to i32 %363 = bitcast float %357 to i32 %364 = insertelement <4 x i32> undef, i32 %361, i32 0 %365 = insertelement <4 x i32> %364, i32 %362, i32 1 %366 = insertelement <4 x i32> %365, i32 %363, i32 2 %367 = insertelement <4 x i32> %366, i32 undef, i32 3 %368 = bitcast <8 x i32> %75 to <32 x i8> %369 = bitcast <4 x i32> %77 to <16 x i8> %370 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %367, <32 x i8> %368, <16 x i8> %369, i32 3) %371 = extractelement <4 x float> %370, i32 0 %372 = extractelement <4 x float> %370, i32 1 %373 = extractelement <4 x float> %370, i32 2 %374 = call float @llvm.AMDGPU.lrp(float %360, float %371, float %355) %375 = call float @llvm.AMDGPU.lrp(float %360, float %372, float %356) %376 = call float @llvm.AMDGPU.lrp(float %360, float %373, float %357) %377 = fsub float -0.000000e+00, %374 %378 = fadd float %377, %30 %379 = fsub float -0.000000e+00, %375 %380 = fadd float %379, %31 %381 = fsub float -0.000000e+00, %376 %382 = fadd float %381, %32 %383 = fmul float %37, %87 %384 = fsub float -0.000000e+00, %33 %385 = fadd float %85, %384 %386 = fsub float -0.000000e+00, %34 %387 = fadd float %86, %386 %388 = fsub float -0.000000e+00, %35 %389 = fadd float %383, %388 %390 = fmul float %389, %27 %391 = fmul float %390, 0x3FF7154CA0000000 %392 = call float @llvm.AMDIL.exp.(float %391) %393 = fsub float -0.000000e+00, %392 %394 = fadd float %393, 1.000000e+00 %395 = fmul float %385, %385 %396 = fmul float %387, %387 %397 = fadd float %396, %395 %398 = fmul float %389, %389 %399 = fadd float %397, %398 %400 = fdiv float 1.000000e+00, %389 %401 = fmul float %399, %28 %402 = fmul float %394, %401 %403 = fmul float %400, %402 %404 = fmul float %403, 0x3FF7154CA0000000 %405 = call float @llvm.AMDIL.exp.(float %404) %406 = call float @llvm.AMDIL.clamp.(float %405, float 0.000000e+00, float 1.000000e+00) %407 = fsub float -0.000000e+00, %406 %408 = fadd float %407, 1.000000e+00 %409 = fmul float %88, 5.000000e-01 %410 = fadd float %409, 5.000000e-01 %411 = bitcast float %410 to i32 %412 = bitcast float %29 to i32 %413 = insertelement <2 x i32> undef, i32 %411, i32 0 %414 = insertelement <2 x i32> %413, i32 %412, i32 1 %415 = bitcast <8 x i32> %59 to <32 x i8> %416 = bitcast <4 x i32> %61 to <16 x i8> %417 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %414, <32 x i8> %415, <16 x i8> %416, i32 2) %418 = extractelement <4 x float> %417, i32 0 %419 = fmul float %408, %418 %420 = fmul float %419, %378 %421 = fadd float %420, %374 %422 = fmul float %419, %380 %423 = fadd float %422, %375 %424 = fmul float %419, %382 %425 = fadd float %424, %376 %426 = fmul float %107, 5.000000e-01 %427 = fsub float -0.000000e+00, %36 %428 = fadd float %427, %87 %429 = fcmp oge float %428, 0.000000e+00 %430 = sext i1 %429 to i32 %431 = bitcast i32 %430 to float %432 = bitcast float %431 to i32 %433 = icmp ne i32 %432, 0 %. = select i1 %433, float %107, float %426 %434 = call i32 @llvm.SI.packf16(float %421, float %423) %435 = bitcast i32 %434 to float %436 = call i32 @llvm.SI.packf16(float %425, float %.) %437 = bitcast i32 %436 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %435, float %437, float %435, float %437) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone readonly declare float @llvm.pow.f32(float, float) #2 ; Function Attrs: readonly declare float @fabs(float) #3 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #4 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #4 ; Function Attrs: readnone declare float @llvm.AMDGPU.lrp(float, float, float) #4 ; Function Attrs: nounwind readnone readonly declare float @llvm.log2.f32(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #4 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { nounwind readnone readonly } attributes #3 = { readonly } attributes #4 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b64 vcc, s[6:7] ; BEEA0406 s_mov_b64 s[100:101], s[2:3] ; BEE40402 s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v3, v0, 1, 1, [m0] ; C80C0500 v_interp_p2_f32 v3, [v3], v1, 1, 1, [m0] ; C80D0501 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 s_load_dwordx4 s[8:11], s[4:5], 0x0 ; C0840500 s_load_dwordx4 s[64:67], s[4:5], 0x4 ; C0A00504 s_load_dwordx4 s[52:55], s[4:5], 0x8 ; C09A0508 s_load_dwordx4 s[80:83], s[4:5], 0xc ; C0A8050C s_load_dwordx4 s[68:71], s[4:5], 0x10 ; C0A20510 s_load_dwordx4 s[0:3], s[4:5], 0x14 ; C0800514 s_waitcnt lgkmcnt(0) ; BF8C007F v_writelane_b32 v44, s0, 0 ; 04590000 v_writelane_b32 v44, s1, 1 ; 04590201 v_writelane_b32 v44, s2, 2 ; 04590402 v_writelane_b32 v44, s3, 3 ; 04590603 s_load_dwordx4 s[40:43], s[4:5], 0x18 ; C0940518 s_load_dwordx4 s[28:31], s[4:5], 0x1c ; C08E051C s_load_dwordx4 s[24:27], s[4:5], 0x20 ; C08C0520 s_load_dwordx4 s[20:23], s[4:5], 0x24 ; C08A0524 s_load_dwordx8 s[0:7], vcc, 0x0 ; C0C06B00 s_load_dwordx8 s[72:79], vcc, 0x8 ; C0E46B08 s_load_dwordx8 s[56:63], vcc, 0x10 ; C0DC6B10 s_load_dwordx8 s[92:99], vcc, 0x18 ; C0EE6B18 s_load_dwordx8 s[84:91], vcc, 0x20 ; C0EA6B20 s_load_dwordx8 s[12:19], vcc, 0x28 ; C0C66B28 s_load_dwordx8 s[44:51], vcc, 0x30 ; C0D66B30 s_load_dwordx8 s[32:39], vcc, 0x38 ; C0D06B38 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[4:7], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[0:7], s[8:11] ; F0800F00 00400402 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v8, v7 ; 7E104F07 v_mul_legacy_f32_e32 v8, 1.0, v8 ; 0E1010F2 v_exp_f32_e32 v8, v8 ; 7E104B08 v_mov_b32_e32 v9, 0x7fffffff ; 7E1202FF 7FFFFFFF v_and_b32_e32 v8, v8, v9 ; 36101308 v_log_f32_e32 v8, v8 ; 7E104F08 v_mov_b32_e32 v10, 0x400ccccd ; 7E1402FF 400CCCCD v_mul_legacy_f32_e32 v8, v10, v8 ; 0E10110A v_exp_f32_e32 v8, v8 ; 7E104B08 v_mov_b32_e32 v11, 0xbdcccccd ; 7E1602FF BDCCCCCD v_add_f32_e32 v11, v8, v11 ; 06161708 v_mov_b32_e32 v15, 0 ; 7E1E0280 v_cmp_lt_f32_e64 s[0:1], v11, 0 ; D0020000 0001010B v_cndmask_b32_e64 v11, 0, -1, s[0:1] ; D200080B 00018280 v_and_b32_e32 v11, 1.0, v11 ; 361616F2 v_xor_b32_e32 v11, 0x80000000, v11 ; 3A1616FF 80000000 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_cmpx_le_f32_e32 vcc, 0, v11 ; 7C261680 v_interp_p1_f32 v11, v0, 1, 2, [m0] ; C82C0900 v_interp_p2_f32 v11, [v11], v1, 1, 2, [m0] ; C82D0901 s_load_dwordx4 s[0:3], s[100:101], 0x0 ; C0806500 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v16, s4, v11 ; 08201604 v_interp_p1_f32 v17, v0, 0, 2, [m0] ; C8440800 v_interp_p2_f32 v17, [v17], v1, 0, 2, [m0] ; C8450801 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v18, s4, v17 ; 08242204 v_mul_f32_e32 v19, v18, v18 ; 10262512 v_mad_f32 v19, v16, v16, v19 ; D2820013 044E2110 v_interp_p1_f32 v20, v0, 2, 2, [m0] ; C8500A00 v_interp_p2_f32 v20, [v20], v1, 2, 2, [m0] ; C8510A01 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v21, s4, v20 ; 082A2804 v_mad_f32 v19, v21, v21, v19 ; D2820013 044E2B15 v_max_f32_e32 v19, 0x33d6bf95, v19 ; 202626FF 33D6BF95 v_rsq_clamp_f32_e32 v19, v19 ; 7E265913 v_mul_f32_e32 v22, v16, v19 ; 102C2710 v_mul_f32_e32 v23, v18, v19 ; 102E2712 v_interp_p1_f32 v12, v0, 0, 0, [m0] ; C8300000 v_interp_p2_f32 v12, [v12], v1, 0, 0, [m0] ; C8310001 v_mul_f32_e32 v24, v23, v12 ; 10301917 v_interp_p1_f32 v13, v0, 1, 0, [m0] ; C8340100 v_interp_p2_f32 v13, [v13], v1, 1, 0, [m0] ; C8350101 v_mad_f32 v24, v13, v22, v24 ; D2820018 04622D0D v_mul_f32_e32 v25, v21, v19 ; 10322715 v_interp_p1_f32 v14, v0, 2, 0, [m0] ; C8380200 v_interp_p2_f32 v14, [v14], v1, 2, 0, [m0] ; C8390201 v_mad_f32 v24, v14, v25, v24 ; D2820018 0462330E v_add_f32_e32 v24, v24, v24 ; 06303118 v_mad_f32 v27, v24, v14, -v25 ; D282001B 84661D18 v_mad_f32 v26, v24, v13, -v22 ; D282001A 845A1B18 v_mad_f32 v25, v24, v12, -v23 ; D2820019 845E1918 v_mov_b32_e32 v28, v27 ; 7E38031B v_cubeid_f32 v32, v25, v26, v27 ; D2880020 046E3519 v_cubema_f32 v31, v25, v26, v27 ; D28E001F 046E3519 v_cubesc_f32 v30, v25, v26, v27 ; D28A001E 046E3519 v_cubetc_f32 v29, v25, v26, v27 ; D28C001D 046E3519 v_rcp_f32_e64 v22, |v31| ; D3540116 0000011F v_mov_b32_e32 v23, 0x3fc00000 ; 7E2E02FF 3FC00000 v_mad_f32 v31, v29, v22, v23 ; D282001F 045E2D1D v_mad_f32 v30, v30, v22, v23 ; D282001E 045E2D1E image_sample v[29:31], 7, 0, 0, 0, 0, 0, 0, 0, v[30:33], s[92:99], s[80:83] ; F0800700 02971D1E s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v22, v31 ; 7E2C4F1F v_mul_legacy_f32_e32 v22, v10, v22 ; 0E2C2D0A v_exp_f32_e32 v22, v22 ; 7E2C4B16 image_sample v[24:26], 7, 0, 0, 0, 0, 0, 0, 0, v[25:26], s[84:91], s[68:71] ; F0800700 02351819 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v27, v26 ; 7E364F1A v_mul_legacy_f32_e32 v27, v10, v27 ; 0E36370A v_exp_f32_e32 v27, v27 ; 7E364B1B image_sample v[32:35], 15, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[72:79], s[64:67] ; F0800F00 02122002 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v2, v35 ; 7E044F23 v_mul_legacy_f32_e32 v2, 1.0, v2 ; 0E0404F2 v_exp_f32_e32 v2, v2 ; 7E044B02 v_and_b32_e32 v2, v2, v9 ; 36041302 v_log_f32_e32 v2, v2 ; 7E044F02 v_mul_legacy_f32_e32 v2, v10, v2 ; 0E04050A v_exp_f32_e32 v2, v2 ; 7E044B02 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_mul_f32_e32 v27, v27, v3 ; 1036071B v_mad_f32 v22, v2, v22, v27 ; D2820016 046E2D02 v_mov_b32_e32 v27, 0x3d58adac ; 7E3602FF 3D58ADAC v_mad_f32 v18, v18, v19, v27 ; D2820012 046E2712 v_mov_b32_e32 v27, 0x3f589375 ; 7E3602FF 3F589375 v_mad_f32 v16, v16, v19, v27 ; D2820010 046E2710 v_mul_f32_e32 v27, v16, v16 ; 10362110 v_mad_f32 v27, v18, v18, v27 ; D282001B 046E2512 v_mov_b32_e32 v28, 0x3f076c8b ; 7E3802FF 3F076C8B v_mad_f32 v19, v21, v19, v28 ; D2820013 04722715 v_mad_f32 v21, v19, v19, v27 ; D2820015 046E2713 v_add_f32_e32 v21, 0, v21 ; 062A2A80 v_rsq_clamp_f32_e32 v21, v21 ; 7E2A5915 v_mul_f32_e32 v16, v21, v16 ; 10202115 v_mul_f32_e32 v18, v21, v18 ; 10242515 v_mul_f32_e32 v18, v12, v18 ; 1024250C v_mad_f32 v16, v16, v13, v18 ; D2820010 044A1B10 v_mul_f32_e32 v18, v21, v19 ; 10242715 v_mad_f32 v16, v18, v14, v16 ; D2820010 04421D12 v_add_f32_e64 v16, 0, v16 clamp ; D2060810 00022080 v_and_b32_e32 v9, v16, v9 ; 36121310 v_log_f32_e32 v9, v9 ; 7E124F09 v_mov_b32_e32 v16, 0x43470000 ; 7E2002FF 43470000 v_mad_f32 v16, v16, v2, 1.0 ; D2820010 03CA0510 v_mul_legacy_f32_e32 v9, v16, v9 ; 0E121310 v_exp_f32_e32 v9, v9 ; 7E124B09 v_mul_f32_e32 v16, 0x3dcccccd, v16 ; 102020FF 3DCCCCCD v_mad_f32 v18, v16, v9, v22 ; D2820012 045A1310 v_log_f32_e32 v19, v34 ; 7E264F22 v_mul_legacy_f32_e32 v19, v10, v19 ; 0E26270A v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v18, v19, v18 ; 10242513 v_cubeid_f32 v39, v12, v13, v14 ; D2880027 043A1B0C v_cubema_f32 v38, v12, v13, v14 ; D28E0026 043A1B0C v_cubesc_f32 v37, v12, v13, v14 ; D28A0025 043A1B0C v_cubetc_f32 v36, v12, v13, v14 ; D28C0024 043A1B0C v_rcp_f32_e64 v12, |v38| ; D354010C 00000126 v_mad_f32 v38, v36, v12, v23 ; D2820026 045E1924 v_mad_f32 v37, v37, v12, v23 ; D2820025 045E1925 image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[37:40], s[56:63], s[52:55] ; F0800700 01AE0C25 s_waitcnt vmcnt(0) ; BF8C0770 v_log_f32_e32 v15, v14 ; 7E1E4F0E v_mul_legacy_f32_e32 v15, v10, v15 ; 0E1E1F0A v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_log_f32_e32 v19, v6 ; 7E264F06 v_mul_legacy_f32_e32 v19, v10, v19 ; 0E26270A v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v15, v15, v19 ; 101E270F v_mad_f32 v15, 2.0, v15, v18 ; D282000F 044A1EF4 v_log_f32_e64 v15, |v15| ; D34E010F 0000010F v_mul_f32_e32 v15, 0x3ee8ba1f, v15 ; 101E1EFF 3EE8BA1F v_exp_f32_e32 v38, v15 ; 7E4C4B0F v_log_f32_e32 v15, v30 ; 7E1E4F1E v_mul_legacy_f32_e32 v15, v10, v15 ; 0E1E1F0A v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_log_f32_e32 v18, v25 ; 7E244F19 v_mul_legacy_f32_e32 v18, v10, v18 ; 0E24250A v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v18, v18, v3 ; 10240712 v_mad_f32 v15, v2, v15, v18 ; D282000F 044A1F02 v_mad_f32 v15, v16, v9, v15 ; D282000F 043E1310 v_log_f32_e32 v18, v33 ; 7E244F21 v_mul_legacy_f32_e32 v18, v10, v18 ; 0E24250A v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v15, v18, v15 ; 101E1F12 v_log_f32_e32 v18, v13 ; 7E244F0D v_mul_legacy_f32_e32 v18, v10, v18 ; 0E24250A v_exp_f32_e32 v18, v18 ; 7E244B12 v_log_f32_e32 v19, v5 ; 7E264F05 v_mul_legacy_f32_e32 v19, v10, v19 ; 0E26270A v_exp_f32_e32 v19, v19 ; 7E264B13 v_mul_f32_e32 v18, v18, v19 ; 10242712 v_mad_f32 v15, 2.0, v18, v15 ; D282000F 043E24F4 v_log_f32_e64 v15, |v15| ; D34E010F 0000010F v_mul_f32_e32 v15, 0x3ee8ba1f, v15 ; 101E1EFF 3EE8BA1F v_exp_f32_e32 v37, v15 ; 7E4A4B0F v_log_f32_e32 v15, v29 ; 7E1E4F1D v_mul_legacy_f32_e32 v15, v10, v15 ; 0E1E1F0A v_exp_f32_e32 v15, v15 ; 7E1E4B0F v_log_f32_e32 v18, v24 ; 7E244F18 v_mul_legacy_f32_e32 v18, v10, v18 ; 0E24250A v_exp_f32_e32 v18, v18 ; 7E244B12 v_mul_f32_e32 v3, v18, v3 ; 10060712 v_mad_f32 v2, v2, v15, v3 ; D2820002 040E1F02 v_mad_f32 v2, v16, v9, v2 ; D2820002 040A1310 v_log_f32_e32 v3, v32 ; 7E064F20 v_mul_legacy_f32_e32 v3, v10, v3 ; 0E06070A v_exp_f32_e32 v3, v3 ; 7E064B03 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_log_f32_e32 v3, v12 ; 7E064F0C v_mul_legacy_f32_e32 v3, v10, v3 ; 0E06070A v_exp_f32_e32 v3, v3 ; 7E064B03 v_log_f32_e32 v4, v4 ; 7E084F04 v_mul_legacy_f32_e32 v4, v10, v4 ; 0E08090A v_exp_f32_e32 v4, v4 ; 7E084B04 v_mul_f32_e32 v3, v3, v4 ; 10060903 v_mad_f32 v2, 2.0, v3, v2 ; D2820002 040A06F4 v_log_f32_e64 v2, |v2| ; D34E0102 00000102 v_mul_f32_e32 v2, 0x3ee8ba1f, v2 ; 100404FF 3EE8BA1F v_exp_f32_e32 v36, v2 ; 7E484B02 image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[36:39], s[44:51], s[40:43] ; F0800700 014B0224 v_interp_p1_f32 v5, v0, 1, 4, [m0] ; C8141100 v_interp_p2_f32 v5, [v5], v1, 1, 4, [m0] ; C8151101 v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2 v_interp_p1_f32 v7, v0, 0, 4, [m0] ; C81C1000 v_interp_p2_f32 v7, [v7], v1, 0, 4, [m0] ; C81D1001 v_add_f32_e32 v5, 0, v7 ; 060A0E80 image_sample v[5:6], 5, 0, 0, 0, 0, 0, 0, 0, v[5:6], s[32:39], s[28:31] ; F0800500 00E80505 s_waitcnt vmcnt(0) ; BF8C0770 v_sub_f32_e32 v7, 1.0, v5 ; 080E0AF2 v_interp_p1_f32 v10, v0, 3, 1, [m0] ; C8280700 v_interp_p2_f32 v10, [v10], v1, 3, 1, [m0] ; C8290701 v_interp_p1_f32 v9, v0, 2, 1, [m0] ; C8240600 v_interp_p2_f32 v9, [v9], v1, 2, 1, [m0] ; C8250601 s_load_dwordx8 s[4:11], vcc, 0x40 ; C0C26B40 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[12:14], 7, 0, 0, 0, 0, 0, 0, 0, v[9:10], s[4:11], s[24:27] ; F0800700 00C10C09 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v9, v14, v7 ; 10120F0E v_mad_f32 v23, v5, v4, v9 ; D2820017 04260905 v_mul_f32_e32 v9, v13, v7 ; 10120F0D v_mad_f32 v22, v5, v3, v9 ; D2820016 04260705 v_mul_f32_e32 v7, v12, v7 ; 100E0F0C v_mad_f32 v21, v5, v2, v7 ; D2820015 041E0505 s_load_dwordx8 s[4:11], vcc, 0x48 ; C0C26B48 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[2:4], 7, 0, 0, 0, 0, 0, 0, 0, v[21:24], s[4:11], s[20:23] ; F0800700 00A10215 v_mad_f32 v5, -v6, v5, v5 ; D2820005 24160B06 v_sub_f32_e32 v6, 1.0, v5 ; 080C0AF2 v_mul_f32_e32 v7, v22, v6 ; 100E0D16 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v7, v5, v3, v7 ; D2820007 041E0705 s_buffer_load_dword s4, s[0:3], 0x15 ; C2020115 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s4, v7 ; 08120E04 s_buffer_load_dword s4, s[0:3], 0x19 ; C2020119 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v10, s4, v11 ; 0A141604 s_buffer_load_dword s4, s[0:3], 0x18 ; C2020118 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v11, s4, v17 ; 0A162204 v_mul_f32_e32 v11, v11, v11 ; 1016170B v_mad_f32 v10, v10, v10, v11 ; D282000A 042E150A s_buffer_load_dword s4, s[0:3], 0x1a ; C202011A s_buffer_load_dword s5, s[0:3], 0x20 ; C2028120 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v11, s4 ; 7E160204 v_mad_f32 v11, v20, s5, -v11 ; D282000B 842C0B14 v_mad_f32 v10, v11, v11, v10 ; D282000A 042A170B s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v10, s4, v10 ; 10141404 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v12, s4, v11 ; 10181604 v_mul_f32_e32 v12, 0x3fb8aa65, v12 ; 101818FF 3FB8AA65 v_exp_f32_e32 v12, v12 ; 7E184B0C v_sub_f32_e32 v12, 1.0, v12 ; 081818F2 v_mul_f32_e32 v10, v10, v12 ; 1014190A v_rcp_f32_e32 v11, v11 ; 7E16550B v_mul_f32_e32 v10, v10, v11 ; 1014170A v_mul_f32_e32 v10, 0x3fb8aa65, v10 ; 101414FF 3FB8AA65 v_exp_f32_e32 v10, v10 ; 7E144B0A v_add_f32_e64 v10, 0, v10 clamp ; D206080A 00021480 v_sub_f32_e32 v10, 1.0, v10 ; 081414F2 v_interp_p1_f32 v11, v0, 1, 3, [m0] ; C82C0D00 v_interp_p2_f32 v11, [v11], v1, 1, 3, [m0] ; C82D0D01 v_mad_f32 v0, 0.5, v11, 0.5 ; D2820000 03C216F0 s_buffer_load_dword s4, s[0:3], 0x13 ; C2020113 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 v_readlane_b32 s4, v44, 0 ; 0209012C v_readlane_b32 s5, v44, 1 ; 020B032C v_readlane_b32 s6, v44, 2 ; 020D052C v_readlane_b32 s7, v44, 3 ; 020F072C s_nop 2 ; BF800002 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[4:7] ; F0800100 00230000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v10 ; 10001500 v_mad_f32 v1, v0, v9, v7 ; D2820001 041E1300 v_mul_f32_e32 v7, v21, v6 ; 100E0D15 v_mad_f32 v7, v5, v2, v7 ; D2820007 041E0505 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v9, s4, v7 ; 08120E04 v_mad_f32 v7, v0, v9, v7 ; D2820007 041E1300 v_cvt_pkrtz_f16_f32_e32 v1, v7, v1 ; 5E020307 v_mul_f32_e32 v6, v23, v6 ; 100C0D17 v_mad_f32 v2, v5, v4, v6 ; D2820002 041A0905 s_buffer_load_dword s4, s[0:3], 0x16 ; C2020116 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 v_mul_f32_e32 v2, 0.5, v8 ; 100410F0 s_buffer_load_dword s0, s[0:3], 0x1c ; C200011C s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v3, s0, v20 ; 0A062800 v_cmp_ge_f32_e64 s[0:1], v3, 0 ; D00C0000 00010103 v_cndmask_b32_e64 v3, 0, -1, s[0:1] ; D2000803 00018280 v_cmp_ne_i32_e64 s[0:1], v3, 0 ; D10A0000 00010103 v_cndmask_b32_e64 v2, v2, v8, s[0:1] ; D2000002 10021102 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL IN[2] DCL IN[3] DCL IN[4] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL OUT[3], GENERIC[11] DCL OUT[4], GENERIC[12] DCL CONST[0..103] DCL TEMP[0..5], LOCAL DCL ADDR[0] IMM[0] FLT32 { 3.0000, 1.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 0, 0} 0: MUL TEMP[0].x, IMM[0].xxxx, IN[2].xxxx 1: MAD TEMP[1], IN[0].xyzx, IMM[0].yyyz, IMM[0].zzzy 2: F2I TEMP[2].x, TEMP[0].xxxx 3: UARL ADDR[0].x, TEMP[2].xxxx 4: UARL ADDR[0].x, TEMP[2].xxxx 5: DP4 TEMP[2].x, TEMP[1], CONST[ADDR[0].x+8] 6: F2I TEMP[3].x, TEMP[0].xxxx 7: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 8: UARL ADDR[0].x, TEMP[3].xxxx 9: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+8] 10: MOV TEMP[2].y, TEMP[3].xxxx 11: F2I TEMP[3].x, TEMP[0].xxxx 12: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 13: UARL ADDR[0].x, TEMP[3].xxxx 14: DP4 TEMP[3].x, TEMP[1], CONST[ADDR[0].x+8] 15: MOV TEMP[2].z, TEMP[3].xxxx 16: MUL TEMP[3].xyz, TEMP[2], IN[1].xxxx 17: MOV TEMP[2].xyz, TEMP[3].xyzx 18: F2I TEMP[3].x, TEMP[0].xxxx 19: UARL ADDR[0].x, TEMP[3].xxxx 20: UARL ADDR[0].x, TEMP[3].xxxx 21: DP3 TEMP[3].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 22: F2I TEMP[4].x, TEMP[0].xxxx 23: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 24: UARL ADDR[0].x, TEMP[4].xxxx 25: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 26: MOV TEMP[3].y, TEMP[4].xxxx 27: F2I TEMP[0].x, TEMP[0].xxxx 28: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 29: UARL ADDR[0].x, TEMP[0].xxxx 30: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 31: MOV TEMP[3].z, TEMP[0].xxxx 32: MUL TEMP[0].xyz, TEMP[3], IN[1].xxxx 33: MOV TEMP[3].xyz, TEMP[0].xyzx 34: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].yyyy 35: UIF TEMP[0].xxxx :0 36: MUL TEMP[0].w, IMM[0].xxxx, IN[2].yyyy 37: MOV TEMP[2].w, TEMP[0].wwww 38: F2I TEMP[4].x, TEMP[0].wwww 39: UARL ADDR[0].x, TEMP[4].xxxx 40: UARL ADDR[0].x, TEMP[4].xxxx 41: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+8] 42: F2I TEMP[5].x, TEMP[0].wwww 43: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 44: UARL ADDR[0].x, TEMP[5].xxxx 45: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 46: MOV TEMP[4].y, TEMP[5].xxxx 47: F2I TEMP[5].x, TEMP[0].wwww 48: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 49: UARL ADDR[0].x, TEMP[5].xxxx 50: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 51: MOV TEMP[4].z, TEMP[5].xxxx 52: MAD TEMP[5].xyz, IN[1].yyyy, TEMP[4], TEMP[2] 53: MOV TEMP[2].xyz, TEMP[5].xyzx 54: F2I TEMP[5].x, TEMP[0].wwww 55: UARL ADDR[0].x, TEMP[5].xxxx 56: UARL ADDR[0].x, TEMP[5].xxxx 57: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 58: F2I TEMP[5].x, TEMP[0].wwww 59: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 60: UARL ADDR[0].x, TEMP[5].xxxx 61: DP3 TEMP[5].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 62: MOV TEMP[4].y, TEMP[5].xxxx 63: F2I TEMP[0].x, TEMP[0].wwww 64: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 65: UARL ADDR[0].x, TEMP[0].xxxx 66: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 67: MOV TEMP[4].z, TEMP[0].xxxx 68: MAD TEMP[0].xyz, IN[1].yyyy, TEMP[4], TEMP[3] 69: MOV TEMP[3].xyz, TEMP[0].xyzx 70: FSLT TEMP[0].x, IMM[0].zzzz, IN[1].zzzz 71: UIF TEMP[0].xxxx :0 72: MUL TEMP[0].w, IMM[0].xxxx, IN[2].zzzz 73: MOV TEMP[2].w, TEMP[0].wwww 74: F2I TEMP[5].x, TEMP[0].wwww 75: UARL ADDR[0].x, TEMP[5].xxxx 76: UARL ADDR[0].x, TEMP[5].xxxx 77: DP4 TEMP[4].x, TEMP[1], CONST[ADDR[0].x+8] 78: F2I TEMP[5].x, TEMP[0].wwww 79: UADD TEMP[5].x, IMM[1].xxxx, TEMP[5].xxxx 80: UARL ADDR[0].x, TEMP[5].xxxx 81: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 82: MOV TEMP[4].y, TEMP[5].xxxx 83: F2I TEMP[5].x, TEMP[0].wwww 84: UADD TEMP[5].x, IMM[1].yyyy, TEMP[5].xxxx 85: UARL ADDR[0].x, TEMP[5].xxxx 86: DP4 TEMP[5].x, TEMP[1], CONST[ADDR[0].x+8] 87: MOV TEMP[4].z, TEMP[5].xxxx 88: MAD TEMP[4].xyz, IN[1].zzzz, TEMP[4], TEMP[2] 89: MOV TEMP[2].xyz, TEMP[4].xyzx 90: F2I TEMP[4].x, TEMP[0].wwww 91: UARL ADDR[0].x, TEMP[4].xxxx 92: UARL ADDR[0].x, TEMP[4].xxxx 93: DP3 TEMP[1].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 94: F2I TEMP[4].x, TEMP[0].wwww 95: UADD TEMP[4].x, IMM[1].xxxx, TEMP[4].xxxx 96: UARL ADDR[0].x, TEMP[4].xxxx 97: DP3 TEMP[4].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 98: MOV TEMP[1].y, TEMP[4].xxxx 99: F2I TEMP[0].x, TEMP[0].wwww 100: UADD TEMP[0].x, IMM[1].yyyy, TEMP[0].xxxx 101: UARL ADDR[0].x, TEMP[0].xxxx 102: DP3 TEMP[0].x, IN[3].xyzz, CONST[ADDR[0].x+8].xyzz 103: MOV TEMP[1].z, TEMP[0].xxxx 104: MAD TEMP[0].xyz, IN[1].zzzz, TEMP[1], TEMP[3] 105: MOV TEMP[3].xyz, TEMP[0].xyzx 106: ENDIF 107: ENDIF 108: MUL TEMP[1], TEMP[2].yyyy, CONST[5] 109: MAD TEMP[1], TEMP[2].xxxx, CONST[4], TEMP[1] 110: MAD TEMP[1], TEMP[2].zzzz, CONST[6], TEMP[1] 111: ADD TEMP[1], TEMP[1], CONST[7] 112: ADD TEMP[0].xyz, TEMP[2], -CONST[3] 113: MOV TEMP[0].xyz, TEMP[0].xyzx 114: MOV TEMP[3].w, IN[4].xxxx 115: MOV TEMP[0].w, IN[4].yyyy 116: MOV TEMP[2].xyz, TEMP[2].xyzx 117: MOV TEMP[4].xyz, TEMP[1].xywx 118: MOV TEMP[2].w, IMM[0].yyyy 119: MOV TEMP[4].w, IMM[0].yyyy 120: MOV OUT[1], TEMP[3] 121: MOV OUT[0], TEMP[1] 122: MOV OUT[2], TEMP[0] 123: MOV OUT[3], TEMP[2] 124: MOV OUT[4], TEMP[4] 125: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 48) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 52) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 56) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 64) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 68) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 72) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 76) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 80) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 84) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 88) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 92) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 96) %25 = call float @llvm.SI.load.const(<16 x i8> %12, i32 100) %26 = call float @llvm.SI.load.const(<16 x i8> %12, i32 104) %27 = call float @llvm.SI.load.const(<16 x i8> %12, i32 108) %28 = call float @llvm.SI.load.const(<16 x i8> %12, i32 112) %29 = call float @llvm.SI.load.const(<16 x i8> %12, i32 116) %30 = call float @llvm.SI.load.const(<16 x i8> %12, i32 120) %31 = call float @llvm.SI.load.const(<16 x i8> %12, i32 124) %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 0 %37 = extractelement <4 x float> %35, i32 1 %38 = extractelement <4 x float> %35, i32 2 %39 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %40 = load <16 x i8> addrspace(2)* %39, !tbaa !0 %41 = add i32 %5, %7 %42 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %40, i32 0, i32 %41) %43 = extractelement <4 x float> %42, i32 0 %44 = extractelement <4 x float> %42, i32 1 %45 = extractelement <4 x float> %42, i32 2 %46 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 2 %47 = load <16 x i8> addrspace(2)* %46, !tbaa !0 %48 = add i32 %5, %7 %49 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %47, i32 0, i32 %48) %50 = extractelement <4 x float> %49, i32 0 %51 = extractelement <4 x float> %49, i32 1 %52 = extractelement <4 x float> %49, i32 2 %53 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 3 %54 = load <16 x i8> addrspace(2)* %53, !tbaa !0 %55 = add i32 %5, %7 %56 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %54, i32 0, i32 %55) %57 = extractelement <4 x float> %56, i32 0 %58 = extractelement <4 x float> %56, i32 1 %59 = extractelement <4 x float> %56, i32 2 %60 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 4 %61 = load <16 x i8> addrspace(2)* %60, !tbaa !0 %62 = add i32 %5, %7 %63 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %61, i32 0, i32 %62) %64 = extractelement <4 x float> %63, i32 0 %65 = extractelement <4 x float> %63, i32 1 %66 = fmul float 3.000000e+00, %50 %67 = fmul float %36, 1.000000e+00 %68 = fadd float %67, 0.000000e+00 %69 = fmul float %37, 1.000000e+00 %70 = fadd float %69, 0.000000e+00 %71 = fmul float %38, 1.000000e+00 %72 = fadd float %71, 0.000000e+00 %73 = fmul float %36, 0.000000e+00 %74 = fadd float %73, 1.000000e+00 %75 = fptosi float %66 to i32 %76 = bitcast i32 %75 to float %77 = bitcast float %76 to i32 %78 = shl i32 %77, 4 %79 = add i32 %78, 128 %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %79) %81 = shl i32 %77, 4 %82 = add i32 %81, 132 %83 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %82) %84 = shl i32 %77, 4 %85 = add i32 %84, 136 %86 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %85) %87 = shl i32 %77, 4 %88 = add i32 %87, 140 %89 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %88) %90 = fmul float %68, %80 %91 = fmul float %70, %83 %92 = fadd float %90, %91 %93 = fmul float %72, %86 %94 = fadd float %92, %93 %95 = fmul float %74, %89 %96 = fadd float %94, %95 %97 = fptosi float %66 to i32 %98 = bitcast i32 %97 to float %99 = bitcast float %98 to i32 %100 = add i32 1, %99 %101 = bitcast i32 %100 to float %102 = bitcast float %101 to i32 %103 = shl i32 %102, 4 %104 = add i32 %103, 128 %105 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %104) %106 = shl i32 %102, 4 %107 = add i32 %106, 132 %108 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %107) %109 = shl i32 %102, 4 %110 = add i32 %109, 136 %111 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %110) %112 = shl i32 %102, 4 %113 = add i32 %112, 140 %114 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %113) %115 = fmul float %68, %105 %116 = fmul float %70, %108 %117 = fadd float %115, %116 %118 = fmul float %72, %111 %119 = fadd float %117, %118 %120 = fmul float %74, %114 %121 = fadd float %119, %120 %122 = fptosi float %66 to i32 %123 = bitcast i32 %122 to float %124 = bitcast float %123 to i32 %125 = add i32 2, %124 %126 = bitcast i32 %125 to float %127 = bitcast float %126 to i32 %128 = shl i32 %127, 4 %129 = add i32 %128, 128 %130 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %129) %131 = shl i32 %127, 4 %132 = add i32 %131, 132 %133 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %132) %134 = shl i32 %127, 4 %135 = add i32 %134, 136 %136 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %135) %137 = shl i32 %127, 4 %138 = add i32 %137, 140 %139 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %138) %140 = fmul float %68, %130 %141 = fmul float %70, %133 %142 = fadd float %140, %141 %143 = fmul float %72, %136 %144 = fadd float %142, %143 %145 = fmul float %74, %139 %146 = fadd float %144, %145 %147 = fmul float %96, %43 %148 = fmul float %121, %43 %149 = fmul float %146, %43 %150 = fptosi float %66 to i32 %151 = bitcast i32 %150 to float %152 = bitcast float %151 to i32 %153 = shl i32 %152, 4 %154 = add i32 %153, 128 %155 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %154) %156 = shl i32 %152, 4 %157 = add i32 %156, 132 %158 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %157) %159 = shl i32 %152, 4 %160 = add i32 %159, 136 %161 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %160) %162 = fmul float %57, %155 %163 = fmul float %58, %158 %164 = fadd float %163, %162 %165 = fmul float %59, %161 %166 = fadd float %164, %165 %167 = fptosi float %66 to i32 %168 = bitcast i32 %167 to float %169 = bitcast float %168 to i32 %170 = add i32 1, %169 %171 = bitcast i32 %170 to float %172 = bitcast float %171 to i32 %173 = shl i32 %172, 4 %174 = add i32 %173, 128 %175 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %174) %176 = shl i32 %172, 4 %177 = add i32 %176, 132 %178 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %177) %179 = shl i32 %172, 4 %180 = add i32 %179, 136 %181 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %180) %182 = fmul float %57, %175 %183 = fmul float %58, %178 %184 = fadd float %183, %182 %185 = fmul float %59, %181 %186 = fadd float %184, %185 %187 = fptosi float %66 to i32 %188 = bitcast i32 %187 to float %189 = bitcast float %188 to i32 %190 = add i32 2, %189 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = shl i32 %192, 4 %194 = add i32 %193, 128 %195 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %194) %196 = shl i32 %192, 4 %197 = add i32 %196, 132 %198 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %197) %199 = shl i32 %192, 4 %200 = add i32 %199, 136 %201 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %200) %202 = fmul float %57, %195 %203 = fmul float %58, %198 %204 = fadd float %203, %202 %205 = fmul float %59, %201 %206 = fadd float %204, %205 %207 = fmul float %166, %43 %208 = fmul float %186, %43 %209 = fmul float %206, %43 %210 = fcmp olt float 0.000000e+00, %44 %211 = sext i1 %210 to i32 %212 = bitcast i32 %211 to float %213 = bitcast float %212 to i32 %214 = icmp ne i32 %213, 0 br i1 %214, label %IF, label %ENDIF IF: ; preds = %main_body %215 = fmul float 3.000000e+00, %51 %216 = fptosi float %215 to i32 %217 = bitcast i32 %216 to float %218 = bitcast float %217 to i32 %219 = shl i32 %218, 4 %220 = add i32 %219, 128 %221 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %220) %222 = shl i32 %218, 4 %223 = add i32 %222, 132 %224 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %223) %225 = shl i32 %218, 4 %226 = add i32 %225, 136 %227 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %226) %228 = shl i32 %218, 4 %229 = add i32 %228, 140 %230 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %229) %231 = fmul float %68, %221 %232 = fmul float %70, %224 %233 = fadd float %231, %232 %234 = fmul float %72, %227 %235 = fadd float %233, %234 %236 = fmul float %74, %230 %237 = fadd float %235, %236 %238 = fptosi float %215 to i32 %239 = bitcast i32 %238 to float %240 = bitcast float %239 to i32 %241 = add i32 1, %240 %242 = bitcast i32 %241 to float %243 = bitcast float %242 to i32 %244 = shl i32 %243, 4 %245 = add i32 %244, 128 %246 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %245) %247 = shl i32 %243, 4 %248 = add i32 %247, 132 %249 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %248) %250 = shl i32 %243, 4 %251 = add i32 %250, 136 %252 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %251) %253 = shl i32 %243, 4 %254 = add i32 %253, 140 %255 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %254) %256 = fmul float %68, %246 %257 = fmul float %70, %249 %258 = fadd float %256, %257 %259 = fmul float %72, %252 %260 = fadd float %258, %259 %261 = fmul float %74, %255 %262 = fadd float %260, %261 %263 = fptosi float %215 to i32 %264 = bitcast i32 %263 to float %265 = bitcast float %264 to i32 %266 = add i32 2, %265 %267 = bitcast i32 %266 to float %268 = bitcast float %267 to i32 %269 = shl i32 %268, 4 %270 = add i32 %269, 128 %271 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %270) %272 = shl i32 %268, 4 %273 = add i32 %272, 132 %274 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %273) %275 = shl i32 %268, 4 %276 = add i32 %275, 136 %277 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %276) %278 = shl i32 %268, 4 %279 = add i32 %278, 140 %280 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %279) %281 = fmul float %68, %271 %282 = fmul float %70, %274 %283 = fadd float %281, %282 %284 = fmul float %72, %277 %285 = fadd float %283, %284 %286 = fmul float %74, %280 %287 = fadd float %285, %286 %288 = fmul float %44, %237 %289 = fadd float %288, %147 %290 = fmul float %44, %262 %291 = fadd float %290, %148 %292 = fmul float %44, %287 %293 = fadd float %292, %149 %294 = fptosi float %215 to i32 %295 = bitcast i32 %294 to float %296 = bitcast float %295 to i32 %297 = shl i32 %296, 4 %298 = add i32 %297, 128 %299 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %298) %300 = shl i32 %296, 4 %301 = add i32 %300, 132 %302 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %301) %303 = shl i32 %296, 4 %304 = add i32 %303, 136 %305 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %304) %306 = fmul float %57, %299 %307 = fmul float %58, %302 %308 = fadd float %307, %306 %309 = fmul float %59, %305 %310 = fadd float %308, %309 %311 = fptosi float %215 to i32 %312 = bitcast i32 %311 to float %313 = bitcast float %312 to i32 %314 = add i32 1, %313 %315 = bitcast i32 %314 to float %316 = bitcast float %315 to i32 %317 = shl i32 %316, 4 %318 = add i32 %317, 128 %319 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %318) %320 = shl i32 %316, 4 %321 = add i32 %320, 132 %322 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %321) %323 = shl i32 %316, 4 %324 = add i32 %323, 136 %325 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %324) %326 = fmul float %57, %319 %327 = fmul float %58, %322 %328 = fadd float %327, %326 %329 = fmul float %59, %325 %330 = fadd float %328, %329 %331 = fptosi float %215 to i32 %332 = bitcast i32 %331 to float %333 = bitcast float %332 to i32 %334 = add i32 2, %333 %335 = bitcast i32 %334 to float %336 = bitcast float %335 to i32 %337 = shl i32 %336, 4 %338 = add i32 %337, 128 %339 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %338) %340 = shl i32 %336, 4 %341 = add i32 %340, 132 %342 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %341) %343 = shl i32 %336, 4 %344 = add i32 %343, 136 %345 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %344) %346 = fmul float %57, %339 %347 = fmul float %58, %342 %348 = fadd float %347, %346 %349 = fmul float %59, %345 %350 = fadd float %348, %349 %351 = fmul float %44, %310 %352 = fadd float %351, %207 %353 = fmul float %44, %330 %354 = fadd float %353, %208 %355 = fmul float %44, %350 %356 = fadd float %355, %209 %357 = fcmp olt float 0.000000e+00, %45 %358 = sext i1 %357 to i32 %359 = bitcast i32 %358 to float %360 = bitcast float %359 to i32 %361 = icmp ne i32 %360, 0 br i1 %361, label %IF66, label %ENDIF ENDIF: ; preds = %IF66, %IF, %main_body %temp8.0 = phi float [ %147, %main_body ], [ %466, %IF66 ], [ %289, %IF ] %temp9.0 = phi float [ %148, %main_body ], [ %468, %IF66 ], [ %291, %IF ] %temp10.0 = phi float [ %149, %main_body ], [ %470, %IF66 ], [ %293, %IF ] %temp12.0 = phi float [ %207, %main_body ], [ %529, %IF66 ], [ %352, %IF ] %temp13.0 = phi float [ %208, %main_body ], [ %531, %IF66 ], [ %354, %IF ] %temp14.0 = phi float [ %209, %main_body ], [ %533, %IF66 ], [ %356, %IF ] %362 = fmul float %temp9.0, %20 %363 = fmul float %temp9.0, %21 %364 = fmul float %temp9.0, %22 %365 = fmul float %temp9.0, %23 %366 = fmul float %temp8.0, %16 %367 = fadd float %366, %362 %368 = fmul float %temp8.0, %17 %369 = fadd float %368, %363 %370 = fmul float %temp8.0, %18 %371 = fadd float %370, %364 %372 = fmul float %temp8.0, %19 %373 = fadd float %372, %365 %374 = fmul float %temp10.0, %24 %375 = fadd float %374, %367 %376 = fmul float %temp10.0, %25 %377 = fadd float %376, %369 %378 = fmul float %temp10.0, %26 %379 = fadd float %378, %371 %380 = fmul float %temp10.0, %27 %381 = fadd float %380, %373 %382 = fadd float %375, %28 %383 = fadd float %377, %29 %384 = fadd float %379, %30 %385 = fadd float %381, %31 %386 = fsub float -0.000000e+00, %13 %387 = fadd float %temp8.0, %386 %388 = fsub float -0.000000e+00, %14 %389 = fadd float %temp9.0, %388 %390 = fsub float -0.000000e+00, %15 %391 = fadd float %temp10.0, %390 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %temp12.0, float %temp13.0, float %temp14.0, float %64) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %387, float %389, float %391, float %65) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 34, i32 0, float %temp8.0, float %temp9.0, float %temp10.0, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 35, i32 0, float %382, float %383, float %385, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %382, float %383, float %384, float %385) ret void IF66: ; preds = %IF %392 = fmul float 3.000000e+00, %52 %393 = fptosi float %392 to i32 %394 = bitcast i32 %393 to float %395 = bitcast float %394 to i32 %396 = shl i32 %395, 4 %397 = add i32 %396, 128 %398 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %397) %399 = shl i32 %395, 4 %400 = add i32 %399, 132 %401 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %400) %402 = shl i32 %395, 4 %403 = add i32 %402, 136 %404 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %403) %405 = shl i32 %395, 4 %406 = add i32 %405, 140 %407 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %406) %408 = fmul float %68, %398 %409 = fmul float %70, %401 %410 = fadd float %408, %409 %411 = fmul float %72, %404 %412 = fadd float %410, %411 %413 = fmul float %74, %407 %414 = fadd float %412, %413 %415 = fptosi float %392 to i32 %416 = bitcast i32 %415 to float %417 = bitcast float %416 to i32 %418 = add i32 1, %417 %419 = bitcast i32 %418 to float %420 = bitcast float %419 to i32 %421 = shl i32 %420, 4 %422 = add i32 %421, 128 %423 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %422) %424 = shl i32 %420, 4 %425 = add i32 %424, 132 %426 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %425) %427 = shl i32 %420, 4 %428 = add i32 %427, 136 %429 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %428) %430 = shl i32 %420, 4 %431 = add i32 %430, 140 %432 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %431) %433 = fmul float %68, %423 %434 = fmul float %70, %426 %435 = fadd float %433, %434 %436 = fmul float %72, %429 %437 = fadd float %435, %436 %438 = fmul float %74, %432 %439 = fadd float %437, %438 %440 = fptosi float %392 to i32 %441 = bitcast i32 %440 to float %442 = bitcast float %441 to i32 %443 = add i32 2, %442 %444 = bitcast i32 %443 to float %445 = bitcast float %444 to i32 %446 = shl i32 %445, 4 %447 = add i32 %446, 128 %448 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %447) %449 = shl i32 %445, 4 %450 = add i32 %449, 132 %451 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %450) %452 = shl i32 %445, 4 %453 = add i32 %452, 136 %454 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %453) %455 = shl i32 %445, 4 %456 = add i32 %455, 140 %457 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %456) %458 = fmul float %68, %448 %459 = fmul float %70, %451 %460 = fadd float %458, %459 %461 = fmul float %72, %454 %462 = fadd float %460, %461 %463 = fmul float %74, %457 %464 = fadd float %462, %463 %465 = fmul float %45, %414 %466 = fadd float %465, %289 %467 = fmul float %45, %439 %468 = fadd float %467, %291 %469 = fmul float %45, %464 %470 = fadd float %469, %293 %471 = fptosi float %392 to i32 %472 = bitcast i32 %471 to float %473 = bitcast float %472 to i32 %474 = shl i32 %473, 4 %475 = add i32 %474, 128 %476 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %475) %477 = shl i32 %473, 4 %478 = add i32 %477, 132 %479 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %478) %480 = shl i32 %473, 4 %481 = add i32 %480, 136 %482 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %481) %483 = fmul float %57, %476 %484 = fmul float %58, %479 %485 = fadd float %484, %483 %486 = fmul float %59, %482 %487 = fadd float %485, %486 %488 = fptosi float %392 to i32 %489 = bitcast i32 %488 to float %490 = bitcast float %489 to i32 %491 = add i32 1, %490 %492 = bitcast i32 %491 to float %493 = bitcast float %492 to i32 %494 = shl i32 %493, 4 %495 = add i32 %494, 128 %496 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %495) %497 = shl i32 %493, 4 %498 = add i32 %497, 132 %499 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %498) %500 = shl i32 %493, 4 %501 = add i32 %500, 136 %502 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %501) %503 = fmul float %57, %496 %504 = fmul float %58, %499 %505 = fadd float %504, %503 %506 = fmul float %59, %502 %507 = fadd float %505, %506 %508 = fptosi float %392 to i32 %509 = bitcast i32 %508 to float %510 = bitcast float %509 to i32 %511 = add i32 2, %510 %512 = bitcast i32 %511 to float %513 = bitcast float %512 to i32 %514 = shl i32 %513, 4 %515 = add i32 %514, 128 %516 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %515) %517 = shl i32 %513, 4 %518 = add i32 %517, 132 %519 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %518) %520 = shl i32 %513, 4 %521 = add i32 %520, 136 %522 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %521) %523 = fmul float %57, %516 %524 = fmul float %58, %519 %525 = fadd float %524, %523 %526 = fmul float %59, %522 %527 = fadd float %525, %526 %528 = fmul float %45, %487 %529 = fadd float %528, %352 %530 = fmul float %45, %507 %531 = fadd float %530, %354 %532 = fmul float %45, %527 %533 = fadd float %532, %356 br label %ENDIF } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v14, s10, v0 ; 4A1C000A s_load_dwordx4 s[12:15], s[8:9], 0x0 ; C0860900 s_load_dwordx4 s[16:19], s[8:9], 0x4 ; C0880904 s_load_dwordx4 s[20:23], s[8:9], 0x8 ; C08A0908 s_load_dwordx4 s[24:27], s[8:9], 0xc ; C08C090C s_load_dwordx4 s[4:7], s[8:9], 0x10 ; C0820910 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[20:23], v14, s[20:23], 0 idxen ; E00C2000 8005140E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, 0x40400000, v20 ; 100028FF 40400000 v_cvt_i32_f32_e32 v0, v0 ; 7E001100 v_lshlrev_b32_e32 v5, 4, v0 ; 340A0084 v_add_i32_e32 v0, 0xa0, v5 ; 4A000AFF 000000A0 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v0, s[0:3], 0 offen ; E0301000 80000200 buffer_load_format_xyzw v[10:13], v14, s[24:27], 0 idxen ; E00C2000 80060A0E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v2, v10 ; 10001502 v_add_i32_e32 v1, 0xa4, v5 ; 4A020AFF 000000A4 buffer_load_dword v4, v1, s[0:3], 0 offen ; E0301000 80000401 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v11, v4, v0 ; D2820000 0402090B v_add_i32_e32 v1, 0xa8, v5 ; 4A020AFF 000000A8 buffer_load_dword v15, v1, s[0:3], 0 offen ; E0301000 80000F01 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v12, v15, v0 ; D2820000 04021F0C buffer_load_format_xyzw v[6:9], v14, s[16:19], 0 idxen ; E00C2000 8004060E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v6, v0 ; 10000106 v_add_i32_e32 v1, 0x90, v5 ; 4A020AFF 00000090 buffer_load_dword v16, v1, s[0:3], 0 offen ; E0301000 80001001 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v1, v16, v10 ; 10021510 v_add_i32_e32 v3, 0x94, v5 ; 4A060AFF 00000094 buffer_load_dword v17, v3, s[0:3], 0 offen ; E0301000 80001103 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v11, v17, v1 ; D2820001 0406230B v_add_i32_e32 v3, 0x98, v5 ; 4A060AFF 00000098 buffer_load_dword v26, v3, s[0:3], 0 offen ; E0301000 80001A03 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v12, v26, v1 ; D2820001 0406350C v_mul_f32_e32 v1, v6, v1 ; 10020306 v_add_i32_e32 v3, 0x80, v5 ; 4A060AFF 00000080 buffer_load_dword v27, v3, s[0:3], 0 offen ; E0301000 80001B03 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v27, v10 ; 1006151B v_add_i32_e32 v18, 0x84, v5 ; 4A240AFF 00000084 buffer_load_dword v28, v18, s[0:3], 0 offen ; E0301000 80001C12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v11, v28, v3 ; D2820003 040E390B v_add_i32_e32 v18, 0x88, v5 ; 4A240AFF 00000088 buffer_load_dword v29, v18, s[0:3], 0 offen ; E0301000 80001D12 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v12, v29, v3 ; D2820003 040E3B0C v_mul_f32_e32 v3, v6, v3 ; 10060706 buffer_load_format_xyzw v[30:33], v14, s[12:15], 0 idxen ; E00C2000 80031E0E s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v18, 0, v31 ; 06243E80 v_mul_f32_e32 v4, v4, v18 ; 10082504 v_add_f32_e32 v19, 0, v30 ; 06263C80 v_mad_f32 v2, v19, v2, v4 ; D2820002 04120513 v_add_f32_e32 v25, 0, v32 ; 06324080 v_mad_f32 v2, v25, v15, v2 ; D2820002 040A1F19 v_mad_f32 v24, 0, v30, 1.0 ; D2820018 03CA3C80 v_add_i32_e32 v4, 0xac, v5 ; 4A080AFF 000000AC buffer_load_dword v4, v4, s[0:3], 0 offen ; E0301000 80000404 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v2, v24, v4, v2 ; D2820002 040A0918 v_mul_f32_e32 v2, v6, v2 ; 10040506 v_mul_f32_e32 v4, v17, v18 ; 10082511 v_mad_f32 v4, v19, v16, v4 ; D2820004 04122113 v_mad_f32 v4, v25, v26, v4 ; D2820004 04123519 v_add_i32_e32 v15, 0x9c, v5 ; 4A1E0AFF 0000009C buffer_load_dword v15, v15, s[0:3], 0 offen ; E0301000 80000F0F s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v4, v24, v15, v4 ; D2820004 04121F18 v_mul_f32_e32 v4, v6, v4 ; 10080906 v_mul_f32_e32 v15, v28, v18 ; 101E251C v_mad_f32 v15, v19, v27, v15 ; D282000F 043E3713 v_mad_f32 v15, v25, v29, v15 ; D282000F 043E3B19 v_add_i32_e32 v5, 0x8c, v5 ; 4A0A0AFF 0000008C buffer_load_dword v5, v5, s[0:3], 0 offen ; E0301000 80000505 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v5, v24, v5, v15 ; D2820005 043E0B18 v_mul_f32_e32 v5, v6, v5 ; 100A0B06 buffer_load_format_xyzw v[14:17], v14, s[4:7], 0 idxen ; E00C2000 80010E0E v_cmp_gt_f32_e64 s[4:5], v7, 0 ; D0080004 00010107 v_cndmask_b32_e64 v26, 0, -1, s[4:5] ; D200001A 00118280 v_cmp_ne_i32_e64 s[24:25], v26, 0 ; D10A0018 0001011A s_buffer_load_dword s9, s[0:3], 0x1f ; C204811F s_buffer_load_dword s4, s[0:3], 0x1e ; C202011E s_buffer_load_dword s7, s[0:3], 0x1d ; C203811D s_buffer_load_dword s6, s[0:3], 0x1c ; C203011C s_buffer_load_dword s13, s[0:3], 0x1b ; C206811B s_buffer_load_dword s5, s[0:3], 0x1a ; C202811A s_buffer_load_dword s12, s[0:3], 0x19 ; C2060119 s_buffer_load_dword s10, s[0:3], 0x18 ; C2050118 s_buffer_load_dword s17, s[0:3], 0x17 ; C2088117 s_buffer_load_dword s8, s[0:3], 0x16 ; C2040116 s_buffer_load_dword s15, s[0:3], 0x15 ; C2078115 s_buffer_load_dword s14, s[0:3], 0x14 ; C2070114 s_buffer_load_dword s19, s[0:3], 0x13 ; C2098113 s_buffer_load_dword s11, s[0:3], 0x12 ; C2058112 s_buffer_load_dword s18, s[0:3], 0x11 ; C2090111 s_buffer_load_dword s16, s[0:3], 0x10 ; C2080110 s_buffer_load_dword s20, s[0:3], 0xe ; C20A010E s_buffer_load_dword s21, s[0:3], 0xd ; C20A810D s_buffer_load_dword s22, s[0:3], 0xc ; C20B010C s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070 s_and_saveexec_b64 s[24:25], s[24:25] ; BE982418 s_xor_b64 s[24:25], exec, s[24:25] ; 8998187E s_cbranch_execz BB0_3 ; BF880000 v_mul_f32_e32 v26, 0x40400000, v21 ; 10342AFF 40400000 v_cvt_i32_f32_e32 v26, v26 ; 7E34111A v_lshlrev_b32_e32 v26, 4, v26 ; 34343484 v_add_i32_e32 v27, 0xa4, v26 ; 4A3634FF 000000A4 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B v_add_i32_e32 v28, 0xa0, v26 ; 4A3834FF 000000A0 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v29, v28, v10 ; 103A151C v_mad_f32 v29, v11, v27, v29 ; D282001D 0476370B v_add_i32_e32 v30, 0xa8, v26 ; 4A3C34FF 000000A8 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v29, v12, v30, v29 ; D282001D 04763D0C v_mad_f32 v0, v7, v29, v0 ; D2820000 04023B07 v_add_i32_e32 v29, 0x94, v26 ; 4A3A34FF 00000094 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D v_add_i32_e32 v31, 0x90, v26 ; 4A3E34FF 00000090 buffer_load_dword v31, v31, s[0:3], 0 offen ; E0301000 80001F1F s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v32, v31, v10 ; 1040151F v_mad_f32 v32, v11, v29, v32 ; D2820020 04823B0B v_add_i32_e32 v33, 0x98, v26 ; 4A4234FF 00000098 buffer_load_dword v33, v33, s[0:3], 0 offen ; E0301000 80002121 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v32, v12, v33, v32 ; D2820020 0482430C v_mad_f32 v1, v7, v32, v1 ; D2820001 04064107 v_add_i32_e32 v32, 0x84, v26 ; 4A4034FF 00000084 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 v_add_i32_e32 v34, 0x80, v26 ; 4A4434FF 00000080 buffer_load_dword v34, v34, s[0:3], 0 offen ; E0301000 80002222 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v35, v34, v10 ; 10461522 v_mad_f32 v35, v11, v32, v35 ; D2820023 048E410B v_add_i32_e32 v36, 0x88, v26 ; 4A4834FF 00000088 buffer_load_dword v36, v36, s[0:3], 0 offen ; E0301000 80002424 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v35, v12, v36, v35 ; D2820023 048E490C v_mad_f32 v3, v7, v35, v3 ; D2820003 040E4707 v_mul_f32_e32 v27, v27, v18 ; 1036251B v_mad_f32 v27, v19, v28, v27 ; D282001B 046E3913 v_mad_f32 v27, v25, v30, v27 ; D282001B 046E3D19 v_add_i32_e32 v28, 0xac, v26 ; 4A3834FF 000000AC buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v24, v28, v27 ; D282001B 046E3918 v_mad_f32 v2, v7, v27, v2 ; D2820002 040A3707 v_mul_f32_e32 v27, v29, v18 ; 1036251D v_mad_f32 v27, v19, v31, v27 ; D282001B 046E3F13 v_mad_f32 v27, v25, v33, v27 ; D282001B 046E4319 v_add_i32_e32 v28, 0x9c, v26 ; 4A3834FF 0000009C buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v27, v24, v28, v27 ; D282001B 046E3918 v_mad_f32 v4, v7, v27, v4 ; D2820004 04123707 v_mul_f32_e32 v27, v32, v18 ; 10362520 v_mad_f32 v27, v19, v34, v27 ; D282001B 046E4513 v_mad_f32 v27, v25, v36, v27 ; D282001B 046E4919 v_add_i32_e32 v26, 0x8c, v26 ; 4A3434FF 0000008C buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v26, v24, v26, v27 ; D282001A 046E3518 v_mad_f32 v5, v7, v26, v5 ; D2820005 04163507 v_cmp_gt_f32_e64 s[26:27], v8, 0 ; D008001A 00010108 v_cndmask_b32_e64 v26, 0, -1, s[26:27] ; D200001A 00698280 v_cmp_ne_i32_e64 s[26:27], v26, 0 ; D10A001A 0001011A s_and_saveexec_b64 s[26:27], s[26:27] ; BE9A241A s_xor_b64 s[26:27], exec, s[26:27] ; 899A1A7E s_cbranch_execz BB0_2 ; BF880000 v_mul_f32_e32 v20, 0x40400000, v22 ; 10282CFF 40400000 v_cvt_i32_f32_e32 v20, v20 ; 7E281114 v_lshlrev_b32_e32 v20, 4, v20 ; 34282884 v_add_i32_e32 v21, 0xa4, v20 ; 4A2A28FF 000000A4 buffer_load_dword v21, v21, s[0:3], 0 offen ; E0301000 80001515 v_add_i32_e32 v22, 0xa0, v20 ; 4A2C28FF 000000A0 buffer_load_dword v22, v22, s[0:3], 0 offen ; E0301000 80001616 s_waitcnt vmcnt(0) expcnt(0) ; BF8C0700 v_mul_f32_e32 v23, v22, v10 ; 102E1516 v_mad_f32 v23, v11, v21, v23 ; D2820017 045E2B0B v_add_i32_e32 v26, 0xa8, v20 ; 4A3428FF 000000A8 buffer_load_dword v26, v26, s[0:3], 0 offen ; E0301000 80001A1A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v23, v12, v26, v23 ; D2820017 045E350C v_mad_f32 v0, v8, v23, v0 ; D2820000 04022F08 v_add_i32_e32 v23, 0x94, v20 ; 4A2E28FF 00000094 buffer_load_dword v23, v23, s[0:3], 0 offen ; E0301000 80001717 v_add_i32_e32 v27, 0x90, v20 ; 4A3628FF 00000090 buffer_load_dword v27, v27, s[0:3], 0 offen ; E0301000 80001B1B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v28, v27, v10 ; 1038151B v_mad_f32 v28, v11, v23, v28 ; D282001C 04722F0B v_add_i32_e32 v29, 0x98, v20 ; 4A3A28FF 00000098 buffer_load_dword v29, v29, s[0:3], 0 offen ; E0301000 80001D1D s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v28, v12, v29, v28 ; D282001C 04723B0C v_mad_f32 v1, v8, v28, v1 ; D2820001 04063908 v_add_i32_e32 v28, 0x84, v20 ; 4A3828FF 00000084 buffer_load_dword v28, v28, s[0:3], 0 offen ; E0301000 80001C1C v_add_i32_e32 v30, 0x80, v20 ; 4A3C28FF 00000080 buffer_load_dword v30, v30, s[0:3], 0 offen ; E0301000 80001E1E s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v31, v30, v10 ; 103E151E v_mad_f32 v31, v11, v28, v31 ; D282001F 047E390B v_add_i32_e32 v32, 0x88, v20 ; 4A4028FF 00000088 buffer_load_dword v32, v32, s[0:3], 0 offen ; E0301000 80002020 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v12, v32, v31 ; D282000A 047E410C v_mad_f32 v3, v8, v10, v3 ; D2820003 040E1508 v_mul_f32_e32 v10, v21, v18 ; 10142515 v_mad_f32 v10, v19, v22, v10 ; D282000A 042A2D13 v_mad_f32 v10, v25, v26, v10 ; D282000A 042A3519 v_add_i32_e32 v11, 0xac, v20 ; 4A1628FF 000000AC buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v24, v11, v10 ; D282000A 042A1718 v_mad_f32 v2, v8, v10, v2 ; D2820002 040A1508 v_mul_f32_e32 v10, v23, v18 ; 10142517 v_mad_f32 v10, v19, v27, v10 ; D282000A 042A3713 v_mad_f32 v10, v25, v29, v10 ; D282000A 042A3B19 v_add_i32_e32 v11, 0x9c, v20 ; 4A1628FF 0000009C buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v24, v11, v10 ; D282000A 042A1718 v_mad_f32 v4, v8, v10, v4 ; D2820004 04121508 v_mul_f32_e32 v10, v28, v18 ; 1014251C v_mad_f32 v10, v19, v30, v10 ; D282000A 042A3D13 v_mad_f32 v10, v25, v32, v10 ; D282000A 042A4119 v_add_i32_e32 v11, 0x8c, v20 ; 4A1628FF 0000008C buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v24, v11, v10 ; D282000A 042A1718 v_mad_f32 v5, v8, v10, v5 ; D2820005 04161508 s_or_b64 exec, exec, s[26:27] ; 88FE1A7E s_or_b64 exec, exec, s[24:25] ; 88FE187E exp 15, 32, 0, 0, 0, v3, v1, v0, v14 ; F800020F 0E000103 s_waitcnt expcnt(0) ; BF8C070F v_subrev_f32_e32 v0, s20, v2 ; 0A000414 v_subrev_f32_e32 v1, s21, v4 ; 0A020815 v_subrev_f32_e32 v3, s22, v5 ; 0A060A16 exp 15, 33, 0, 0, 0, v3, v1, v0, v15 ; F800021F 0F000103 s_waitcnt expcnt(0) ; BF8C070F v_mov_b32_e32 v0, 1.0 ; 7E0002F2 exp 15, 34, 0, 0, 0, v5, v4, v2, v0 ; F800022F 00020405 v_mul_f32_e32 v1, s17, v4 ; 10020811 v_mad_f32 v1, v5, s19, v1 ; D2820001 04042705 v_mad_f32 v1, v2, s13, v1 ; D2820001 04041B02 v_add_f32_e32 v1, s9, v1 ; 06020209 v_mul_f32_e32 v3, s15, v4 ; 1006080F v_mad_f32 v3, v5, s18, v3 ; D2820003 040C2505 v_mad_f32 v3, v2, s12, v3 ; D2820003 040C1902 v_add_f32_e32 v3, s7, v3 ; 06060607 v_mul_f32_e32 v6, s14, v4 ; 100C080E v_mad_f32 v6, v5, s16, v6 ; D2820006 04182105 v_mad_f32 v6, v2, s10, v6 ; D2820006 04181502 v_add_f32_e32 v6, s6, v6 ; 060C0C06 exp 15, 35, 0, 0, 0, v6, v3, v1, v0 ; F800023F 00010306 s_waitcnt expcnt(0) ; BF8C070F v_mul_f32_e32 v0, s8, v4 ; 10000808 v_mad_f32 v0, v5, s11, v0 ; D2820000 04001705 v_mad_f32 v0, v2, s5, v0 ; D2820000 04000B02 v_add_f32_e32 v0, s4, v0 ; 06000004 exp 15, 12, 0, 1, 0, v6, v3, v0, v1 ; F80008CF 01000306 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL IN[2], GENERIC[11], PERSPECTIVE DCL IN[3], GENERIC[12], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL SAMP[2] DCL SAMP[3] DCL SAMP[4] DCL SAMP[5] DCL CONST[0..5] DCL TEMP[0..8], LOCAL IMM[0] FLT32 { 1.4427, 1.0000, 0.5000, 0.0000} IMM[1] FLT32 { 0.0000, 0.3300, 0.0000, 0.0000} 0: ADD TEMP[0].xyz, -CONST[3], IN[2] 1: MUL TEMP[1].w, TEMP[0].zzzz, CONST[1].xxxx 2: MUL TEMP[1].w, TEMP[1].wwww, IMM[0].xxxx 3: EX2 TEMP[1].x, TEMP[1].wwww 4: ADD TEMP[1].w, -TEMP[1].xxxx, IMM[0].yyyy 5: DP3 TEMP[2].x, TEMP[0].xyzz, TEMP[0].xyzz 6: RCP TEMP[3].x, TEMP[0].zzzz 7: MUL TEMP[2].x, TEMP[2].xxxx, CONST[1].yyyy 8: MUL TEMP[1].x, TEMP[1].wwww, TEMP[2].xxxx 9: MUL TEMP[1].x, TEMP[3].xxxx, TEMP[1].xxxx 10: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 11: MOV TEMP[0].x, TEMP[1].xxxx 12: EX2 TEMP[1].x, TEMP[1].xxxx 13: MOV_SAT TEMP[0].x, TEMP[1].xxxx 14: ADD TEMP[1].x, -TEMP[0].xxxx, IMM[0].yyyy 15: RCP TEMP[2].x, IN[3].zzzz 16: MUL TEMP[2].y, TEMP[2].xxxx, IN[3].yyyy 17: MAD TEMP[2].x, TEMP[2].yyyy, IMM[0].zzzz, IMM[0].zzzz 18: MOV TEMP[2].x, TEMP[2].xxxx 19: MOV TEMP[2].y, CONST[1].wwww 20: MOV TEMP[3].xy, TEMP[2].xyyy 21: TEX TEMP[3].x, TEMP[3], SAMP[5], 2D 22: MUL TEMP[1].x, TEMP[1].xxxx, TEMP[3].xxxx 23: MOV TEMP[0].x, TEMP[1].xxxx 24: MOV TEMP[3].w, IMM[0].wwww 25: MOV TEMP[3].x, IN[1].xxxx 26: MOV TEMP[3].y, IN[1].yyyy 27: MOV TEMP[3].z, IN[1].zzzz 28: DP4 TEMP[4].x, TEMP[3], TEMP[3] 29: RSQ TEMP[4].x, TEMP[4].xxxx 30: MUL TEMP[3].xyz, TEMP[3], TEMP[4].xxxx 31: MOV TEMP[4].w, IMM[0].wwww 32: MOV TEMP[4].x, IN[0].xxxx 33: MOV TEMP[4].y, IN[0].yyyy 34: MOV TEMP[4].z, IN[0].zzzz 35: DP4 TEMP[5].x, TEMP[4], TEMP[4] 36: RSQ TEMP[5].x, TEMP[5].xxxx 37: MUL TEMP[4].xyz, TEMP[4], TEMP[5].xxxx 38: DP3 TEMP[5].x, TEMP[3].xyzz, TEMP[4].xyzz 39: ADD TEMP[5].y, TEMP[5].xxxx, TEMP[5].xxxx 40: MAD TEMP[3].yzw, TEMP[4].xxyz, -TEMP[5].yyyy, TEMP[3].xxyz 41: MOV TEMP[4].xyz, TEMP[4].xyzz 42: TEX TEMP[4], TEMP[4], SAMP[2], CUBE 43: MUL TEMP[4].xyz, TEMP[4], TEMP[4] 44: MOV TEMP[5].xyz, TEMP[3].yzww 45: TEX TEMP[5], TEMP[5], SAMP[4], CUBE 46: MOV TEMP[6].w, TEMP[5].wwww 47: MOV TEMP[3].xyz, TEMP[3].yzww 48: TEX TEMP[3].xyz, TEMP[3], SAMP[3], CUBE 49: MUL TEMP[7].yzw, TEMP[3].xxyz, TEMP[3].xxyz 50: MOV TEMP[0].yzw, TEMP[7].zyzw 51: MAD TEMP[5].xyz, TEMP[5], TEMP[5], -TEMP[7].yzww 52: MOV TEMP[3].x, IN[0].wwww 53: MOV TEMP[3].y, IN[1].wwww 54: MOV TEMP[7].xy, TEMP[3].xyyy 55: TEX TEMP[7], TEMP[7], SAMP[1], 2D 56: MOV TEMP[8].xy, TEMP[3].xyyy 57: TEX TEMP[8], TEMP[8], SAMP[0], 2D 58: MUL TEMP[3], TEMP[8], TEMP[8] 59: MUL TEMP[7], TEMP[7], TEMP[7] 60: MUL TEMP[8].w, TEMP[7].wwww, TEMP[7].wwww 61: MUL TEMP[8], TEMP[8].wwww, TEMP[8].wwww 62: MOV_SAT TEMP[8], TEMP[8] 63: MOV TEMP[2].w, TEMP[8].wwww 64: MAD TEMP[5].yzw, TEMP[8].wwww, TEMP[5].xxyz, TEMP[0] 65: MOV TEMP[0].yzw, TEMP[5].zyzw 66: MUL TEMP[5].yzw, TEMP[0], TEMP[7].xxyz 67: MOV TEMP[0].yzw, TEMP[5].zyzw 68: MUL TEMP[5].xyz, CONST[4], CONST[4] 69: MOV TEMP[6].xyz, TEMP[5].xyzx 70: MOV TEMP[7].xyz, CONST[4].xyzx 71: MAD TEMP[5].xyz, TEMP[7], -TEMP[7], IMM[0].yyyy 72: MOV TEMP[7].xyz, TEMP[5].xyzx 73: MAD TEMP[5].xyz, TEMP[3].wwww, TEMP[7], TEMP[6] 74: MOV TEMP[6].xyz, TEMP[5].xyzx 75: MUL TEMP[3].xyz, TEMP[3], TEMP[6] 76: MAD TEMP[0].yzw, TEMP[3].xxyz, TEMP[4].xxyz, TEMP[0] 77: MAX TEMP[3].x, TEMP[0].yyyy, IMM[1].xxxx 78: RSQ TEMP[3].x, TEMP[3].xxxx 79: RCP TEMP[2].x, TEMP[3].xxxx 80: MAX TEMP[3].x, TEMP[0].zzzz, IMM[1].xxxx 81: RSQ TEMP[3].x, TEMP[3].xxxx 82: MAX TEMP[0].x, TEMP[0].wwww, IMM[1].xxxx 83: RSQ TEMP[4].x, TEMP[0].xxxx 84: MUL TEMP[4].x, TEMP[4].xxxx, TEMP[0].xxxx 85: CMP TEMP[4].x, -TEMP[0].xxxx, TEMP[4].xxxx, IMM[0].wwww 86: MOV TEMP[2].z, TEMP[4].xxxx 87: RCP TEMP[0].x, TEMP[3].xxxx 88: MOV TEMP[2].y, TEMP[0].xxxx 89: ADD TEMP[0].yzw, -TEMP[2].xxyz, CONST[2].xxyz 90: MAD TEMP[0].xyz, TEMP[1].xxxx, TEMP[0].yzww, TEMP[2] 91: MOV TEMP[0].xyz, TEMP[0].xyzx 92: ADD TEMP[1].x, -CONST[0].xxxx, IN[2].zzzz 93: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].wwww 94: UIF TEMP[1].xxxx :0 95: MOV TEMP[1].x, IMM[0].yyyy 96: ELSE :0 97: MOV TEMP[1].x, IMM[1].yyyy 98: ENDIF 99: ADD TEMP[2].y, -CONST[5].xxxx, CONST[5].yyyy 100: MAD TEMP[2].y, CONST[5].zzzz, TEMP[2].yyyy, CONST[5].xxxx 101: MUL TEMP[1].w, TEMP[2].yyyy, TEMP[1].xxxx 102: MOV TEMP[0].w, TEMP[1].wwww 103: MOV OUT[0], TEMP[0] 104: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %23 = load <16 x i8> addrspace(2)* %22, !tbaa !0 %24 = call float @llvm.SI.load.const(<16 x i8> %23, i32 0) %25 = call float @llvm.SI.load.const(<16 x i8> %23, i32 16) %26 = call float @llvm.SI.load.const(<16 x i8> %23, i32 20) %27 = call float @llvm.SI.load.const(<16 x i8> %23, i32 28) %28 = call float @llvm.SI.load.const(<16 x i8> %23, i32 32) %29 = call float @llvm.SI.load.const(<16 x i8> %23, i32 36) %30 = call float @llvm.SI.load.const(<16 x i8> %23, i32 40) %31 = call float @llvm.SI.load.const(<16 x i8> %23, i32 48) %32 = call float @llvm.SI.load.const(<16 x i8> %23, i32 52) %33 = call float @llvm.SI.load.const(<16 x i8> %23, i32 56) %34 = call float @llvm.SI.load.const(<16 x i8> %23, i32 64) %35 = call float @llvm.SI.load.const(<16 x i8> %23, i32 68) %36 = call float @llvm.SI.load.const(<16 x i8> %23, i32 72) %37 = call float @llvm.SI.load.const(<16 x i8> %23, i32 80) %38 = call float @llvm.SI.load.const(<16 x i8> %23, i32 84) %39 = call float @llvm.SI.load.const(<16 x i8> %23, i32 88) %40 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %41 = load <8 x i32> addrspace(2)* %40, !tbaa !0 %42 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %43 = load <4 x i32> addrspace(2)* %42, !tbaa !0 %44 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %45 = load <8 x i32> addrspace(2)* %44, !tbaa !0 %46 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %47 = load <4 x i32> addrspace(2)* %46, !tbaa !0 %48 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 2 %49 = load <8 x i32> addrspace(2)* %48, !tbaa !0 %50 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 2 %51 = load <4 x i32> addrspace(2)* %50, !tbaa !0 %52 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 3 %53 = load <8 x i32> addrspace(2)* %52, !tbaa !0 %54 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 3 %55 = load <4 x i32> addrspace(2)* %54, !tbaa !0 %56 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 4 %57 = load <8 x i32> addrspace(2)* %56, !tbaa !0 %58 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 4 %59 = load <4 x i32> addrspace(2)* %58, !tbaa !0 %60 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 5 %61 = load <8 x i32> addrspace(2)* %60, !tbaa !0 %62 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 5 %63 = load <4 x i32> addrspace(2)* %62, !tbaa !0 %64 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %65 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %66 = call float @llvm.SI.fs.interp(i32 2, i32 0, i32 %5, <2 x i32> %7) %67 = call float @llvm.SI.fs.interp(i32 3, i32 0, i32 %5, <2 x i32> %7) %68 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %69 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %70 = call float @llvm.SI.fs.interp(i32 2, i32 1, i32 %5, <2 x i32> %7) %71 = call float @llvm.SI.fs.interp(i32 3, i32 1, i32 %5, <2 x i32> %7) %72 = call float @llvm.SI.fs.interp(i32 0, i32 2, i32 %5, <2 x i32> %7) %73 = call float @llvm.SI.fs.interp(i32 1, i32 2, i32 %5, <2 x i32> %7) %74 = call float @llvm.SI.fs.interp(i32 2, i32 2, i32 %5, <2 x i32> %7) %75 = call float @llvm.SI.fs.interp(i32 1, i32 3, i32 %5, <2 x i32> %7) %76 = call float @llvm.SI.fs.interp(i32 2, i32 3, i32 %5, <2 x i32> %7) %77 = fsub float -0.000000e+00, %31 %78 = fadd float %77, %72 %79 = fsub float -0.000000e+00, %32 %80 = fadd float %79, %73 %81 = fsub float -0.000000e+00, %33 %82 = fadd float %81, %74 %83 = fmul float %82, %25 %84 = fmul float %83, 0x3FF7154CA0000000 %85 = call float @llvm.AMDIL.exp.(float %84) %86 = fsub float -0.000000e+00, %85 %87 = fadd float %86, 1.000000e+00 %88 = fmul float %78, %78 %89 = fmul float %80, %80 %90 = fadd float %89, %88 %91 = fmul float %82, %82 %92 = fadd float %90, %91 %93 = fdiv float 1.000000e+00, %82 %94 = fmul float %92, %26 %95 = fmul float %87, %94 %96 = fmul float %93, %95 %97 = fmul float %96, 0x3FF7154CA0000000 %98 = call float @llvm.AMDIL.exp.(float %97) %99 = call float @llvm.AMDIL.clamp.(float %98, float 0.000000e+00, float 1.000000e+00) %100 = fsub float -0.000000e+00, %99 %101 = fadd float %100, 1.000000e+00 %102 = fdiv float 1.000000e+00, %76 %103 = fmul float %102, %75 %104 = fmul float %103, 5.000000e-01 %105 = fadd float %104, 5.000000e-01 %106 = bitcast float %105 to i32 %107 = bitcast float %27 to i32 %108 = insertelement <2 x i32> undef, i32 %106, i32 0 %109 = insertelement <2 x i32> %108, i32 %107, i32 1 %110 = bitcast <8 x i32> %61 to <32 x i8> %111 = bitcast <4 x i32> %63 to <16 x i8> %112 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %109, <32 x i8> %110, <16 x i8> %111, i32 2) %113 = extractelement <4 x float> %112, i32 0 %114 = fmul float %101, %113 %115 = fmul float %68, %68 %116 = fmul float %69, %69 %117 = fadd float %115, %116 %118 = fmul float %70, %70 %119 = fadd float %117, %118 %120 = fmul float 0.000000e+00, 0.000000e+00 %121 = fadd float %119, %120 %122 = call float @llvm.AMDGPU.rsq.clamped.f32(float %121) %123 = fmul float %68, %122 %124 = fmul float %69, %122 %125 = fmul float %70, %122 %126 = fmul float %64, %64 %127 = fmul float %65, %65 %128 = fadd float %126, %127 %129 = fmul float %66, %66 %130 = fadd float %128, %129 %131 = fmul float 0.000000e+00, 0.000000e+00 %132 = fadd float %130, %131 %133 = call float @llvm.AMDGPU.rsq.clamped.f32(float %132) %134 = fmul float %64, %133 %135 = fmul float %65, %133 %136 = fmul float %66, %133 %137 = fmul float %123, %134 %138 = fmul float %124, %135 %139 = fadd float %138, %137 %140 = fmul float %125, %136 %141 = fadd float %139, %140 %142 = fadd float %141, %141 %143 = fsub float -0.000000e+00, %142 %144 = fmul float %134, %143 %145 = fadd float %144, %123 %146 = fsub float -0.000000e+00, %142 %147 = fmul float %135, %146 %148 = fadd float %147, %124 %149 = fsub float -0.000000e+00, %142 %150 = fmul float %136, %149 %151 = fadd float %150, %125 %152 = insertelement <4 x float> undef, float %134, i32 0 %153 = insertelement <4 x float> %152, float %135, i32 1 %154 = insertelement <4 x float> %153, float %136, i32 2 %155 = insertelement <4 x float> %154, float 0.000000e+00, i32 3 %156 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %155) %157 = extractelement <4 x float> %156, i32 0 %158 = extractelement <4 x float> %156, i32 1 %159 = extractelement <4 x float> %156, i32 2 %160 = extractelement <4 x float> %156, i32 3 %161 = call float @fabs(float %159) %162 = fdiv float 1.000000e+00, %161 %163 = fmul float %157, %162 %164 = fadd float %163, 1.500000e+00 %165 = fmul float %158, %162 %166 = fadd float %165, 1.500000e+00 %167 = bitcast float %166 to i32 %168 = bitcast float %164 to i32 %169 = bitcast float %160 to i32 %170 = insertelement <4 x i32> undef, i32 %167, i32 0 %171 = insertelement <4 x i32> %170, i32 %168, i32 1 %172 = insertelement <4 x i32> %171, i32 %169, i32 2 %173 = insertelement <4 x i32> %172, i32 undef, i32 3 %174 = bitcast <8 x i32> %49 to <32 x i8> %175 = bitcast <4 x i32> %51 to <16 x i8> %176 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %173, <32 x i8> %174, <16 x i8> %175, i32 4) %177 = extractelement <4 x float> %176, i32 0 %178 = extractelement <4 x float> %176, i32 1 %179 = extractelement <4 x float> %176, i32 2 %180 = fmul float %177, %177 %181 = fmul float %178, %178 %182 = fmul float %179, %179 %183 = insertelement <4 x float> undef, float %145, i32 0 %184 = insertelement <4 x float> %183, float %148, i32 1 %185 = insertelement <4 x float> %184, float %151, i32 2 %186 = insertelement <4 x float> %185, float 0.000000e+00, i32 3 %187 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %186) %188 = extractelement <4 x float> %187, i32 0 %189 = extractelement <4 x float> %187, i32 1 %190 = extractelement <4 x float> %187, i32 2 %191 = extractelement <4 x float> %187, i32 3 %192 = call float @fabs(float %190) %193 = fdiv float 1.000000e+00, %192 %194 = fmul float %188, %193 %195 = fadd float %194, 1.500000e+00 %196 = fmul float %189, %193 %197 = fadd float %196, 1.500000e+00 %198 = bitcast float %197 to i32 %199 = bitcast float %195 to i32 %200 = bitcast float %191 to i32 %201 = insertelement <4 x i32> undef, i32 %198, i32 0 %202 = insertelement <4 x i32> %201, i32 %199, i32 1 %203 = insertelement <4 x i32> %202, i32 %200, i32 2 %204 = insertelement <4 x i32> %203, i32 undef, i32 3 %205 = bitcast <8 x i32> %57 to <32 x i8> %206 = bitcast <4 x i32> %59 to <16 x i8> %207 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %204, <32 x i8> %205, <16 x i8> %206, i32 4) %208 = extractelement <4 x float> %207, i32 0 %209 = extractelement <4 x float> %207, i32 1 %210 = extractelement <4 x float> %207, i32 2 %211 = insertelement <4 x float> undef, float %145, i32 0 %212 = insertelement <4 x float> %211, float %148, i32 1 %213 = insertelement <4 x float> %212, float %151, i32 2 %214 = insertelement <4 x float> %213, float %151, i32 3 %215 = call <4 x float> @llvm.AMDGPU.cube(<4 x float> %214) %216 = extractelement <4 x float> %215, i32 0 %217 = extractelement <4 x float> %215, i32 1 %218 = extractelement <4 x float> %215, i32 2 %219 = extractelement <4 x float> %215, i32 3 %220 = call float @fabs(float %218) %221 = fdiv float 1.000000e+00, %220 %222 = fmul float %216, %221 %223 = fadd float %222, 1.500000e+00 %224 = fmul float %217, %221 %225 = fadd float %224, 1.500000e+00 %226 = bitcast float %225 to i32 %227 = bitcast float %223 to i32 %228 = bitcast float %219 to i32 %229 = insertelement <4 x i32> undef, i32 %226, i32 0 %230 = insertelement <4 x i32> %229, i32 %227, i32 1 %231 = insertelement <4 x i32> %230, i32 %228, i32 2 %232 = insertelement <4 x i32> %231, i32 undef, i32 3 %233 = bitcast <8 x i32> %53 to <32 x i8> %234 = bitcast <4 x i32> %55 to <16 x i8> %235 = call <4 x float> @llvm.SI.sample.v4i32(<4 x i32> %232, <32 x i8> %233, <16 x i8> %234, i32 4) %236 = extractelement <4 x float> %235, i32 0 %237 = extractelement <4 x float> %235, i32 1 %238 = extractelement <4 x float> %235, i32 2 %239 = fmul float %236, %236 %240 = fmul float %237, %237 %241 = fmul float %238, %238 %242 = fsub float -0.000000e+00, %239 %243 = fmul float %208, %208 %244 = fadd float %243, %242 %245 = fsub float -0.000000e+00, %240 %246 = fmul float %209, %209 %247 = fadd float %246, %245 %248 = fsub float -0.000000e+00, %241 %249 = fmul float %210, %210 %250 = fadd float %249, %248 %251 = bitcast float %67 to i32 %252 = bitcast float %71 to i32 %253 = insertelement <2 x i32> undef, i32 %251, i32 0 %254 = insertelement <2 x i32> %253, i32 %252, i32 1 %255 = bitcast <8 x i32> %45 to <32 x i8> %256 = bitcast <4 x i32> %47 to <16 x i8> %257 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %254, <32 x i8> %255, <16 x i8> %256, i32 2) %258 = extractelement <4 x float> %257, i32 0 %259 = extractelement <4 x float> %257, i32 1 %260 = extractelement <4 x float> %257, i32 2 %261 = extractelement <4 x float> %257, i32 3 %262 = bitcast float %67 to i32 %263 = bitcast float %71 to i32 %264 = insertelement <2 x i32> undef, i32 %262, i32 0 %265 = insertelement <2 x i32> %264, i32 %263, i32 1 %266 = bitcast <8 x i32> %41 to <32 x i8> %267 = bitcast <4 x i32> %43 to <16 x i8> %268 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %265, <32 x i8> %266, <16 x i8> %267, i32 2) %269 = extractelement <4 x float> %268, i32 0 %270 = extractelement <4 x float> %268, i32 1 %271 = extractelement <4 x float> %268, i32 2 %272 = extractelement <4 x float> %268, i32 3 %273 = fmul float %269, %269 %274 = fmul float %270, %270 %275 = fmul float %271, %271 %276 = fmul float %272, %272 %277 = fmul float %258, %258 %278 = fmul float %259, %259 %279 = fmul float %260, %260 %280 = fmul float %261, %261 %281 = fmul float %280, %280 %282 = fmul float %281, %281 %283 = fmul float %281, %281 %284 = fmul float %281, %281 %285 = fmul float %281, %281 %286 = call float @llvm.AMDIL.clamp.(float %282, float 0.000000e+00, float 1.000000e+00) %287 = call float @llvm.AMDIL.clamp.(float %283, float 0.000000e+00, float 1.000000e+00) %288 = call float @llvm.AMDIL.clamp.(float %284, float 0.000000e+00, float 1.000000e+00) %289 = call float @llvm.AMDIL.clamp.(float %285, float 0.000000e+00, float 1.000000e+00) %290 = fmul float %289, %244 %291 = fadd float %290, %239 %292 = fmul float %289, %247 %293 = fadd float %292, %240 %294 = fmul float %289, %250 %295 = fadd float %294, %241 %296 = fmul float %291, %277 %297 = fmul float %293, %278 %298 = fmul float %295, %279 %299 = fmul float %34, %34 %300 = fmul float %35, %35 %301 = fmul float %36, %36 %302 = fsub float -0.000000e+00, %34 %303 = fmul float %34, %302 %304 = fadd float %303, 1.000000e+00 %305 = fsub float -0.000000e+00, %35 %306 = fmul float %35, %305 %307 = fadd float %306, 1.000000e+00 %308 = fsub float -0.000000e+00, %36 %309 = fmul float %36, %308 %310 = fadd float %309, 1.000000e+00 %311 = fmul float %276, %304 %312 = fadd float %311, %299 %313 = fmul float %276, %307 %314 = fadd float %313, %300 %315 = fmul float %276, %310 %316 = fadd float %315, %301 %317 = fmul float %273, %312 %318 = fmul float %274, %314 %319 = fmul float %275, %316 %320 = fmul float %317, %180 %321 = fadd float %320, %296 %322 = fmul float %318, %181 %323 = fadd float %322, %297 %324 = fmul float %319, %182 %325 = fadd float %324, %298 %326 = call float @llvm.maxnum.f32(float %321, float 0x3E7AD7F2A0000000) %327 = call float @llvm.AMDGPU.rsq.clamped.f32(float %326) %328 = fdiv float 1.000000e+00, %327 %329 = call float @llvm.maxnum.f32(float %323, float 0x3E7AD7F2A0000000) %330 = call float @llvm.AMDGPU.rsq.clamped.f32(float %329) %331 = call float @llvm.maxnum.f32(float %325, float 0x3E7AD7F2A0000000) %332 = call float @llvm.AMDGPU.rsq.clamped.f32(float %331) %333 = fmul float %332, %331 %334 = fsub float -0.000000e+00, %331 %335 = call float @llvm.AMDGPU.cndlt(float %334, float %333, float 0.000000e+00) %336 = fdiv float 1.000000e+00, %330 %337 = fsub float -0.000000e+00, %328 %338 = fadd float %337, %28 %339 = fsub float -0.000000e+00, %336 %340 = fadd float %339, %29 %341 = fsub float -0.000000e+00, %335 %342 = fadd float %341, %30 %343 = fmul float %114, %338 %344 = fadd float %343, %328 %345 = fmul float %114, %340 %346 = fadd float %345, %336 %347 = fmul float %114, %342 %348 = fadd float %347, %335 %349 = fsub float -0.000000e+00, %24 %350 = fadd float %349, %74 %351 = fcmp oge float %350, 0.000000e+00 %352 = sext i1 %351 to i32 %353 = bitcast i32 %352 to float %354 = bitcast float %353 to i32 %355 = icmp ne i32 %354, 0 %. = select i1 %355, float 1.000000e+00, float 0x3FD51EB860000000 %356 = fsub float -0.000000e+00, %37 %357 = fadd float %356, %38 %358 = fmul float %39, %357 %359 = fadd float %358, %37 %360 = fmul float %359, %. %361 = call i32 @llvm.SI.packf16(float %344, float %346) %362 = bitcast i32 %361 to float %363 = call i32 @llvm.SI.packf16(float %348, float %360) %364 = bitcast i32 %363 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %362, float %364, float %362, float %364) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.exp.(float) #2 ; Function Attrs: readnone declare float @llvm.AMDIL.clamp.(float, float, float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.AMDGPU.rsq.clamped.f32(float) #1 ; Function Attrs: readnone declare <4 x float> @llvm.AMDGPU.cube(<4 x float>) #2 ; Function Attrs: readnone declare float @fabs(float) #2 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v4i32(<4 x i32>, <32 x i8>, <16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare float @llvm.maxnum.f32(float, float) #1 ; Function Attrs: readnone declare float @llvm.AMDGPU.cndlt(float, float, float) #2 ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 0, 0, [m0] ; C8080000 v_interp_p2_f32 v2, [v2], v1, 0, 0, [m0] ; C8090001 v_interp_p1_f32 v3, v0, 1, 0, [m0] ; C80C0100 v_interp_p2_f32 v3, [v3], v1, 1, 0, [m0] ; C80D0101 v_mul_f32_e32 v4, v3, v3 ; 10080703 v_mad_f32 v4, v2, v2, v4 ; D2820004 04120502 v_interp_p1_f32 v5, v0, 2, 0, [m0] ; C8140200 v_interp_p2_f32 v5, [v5], v1, 2, 0, [m0] ; C8150201 v_mad_f32 v4, v5, v5, v4 ; D2820004 04120B05 v_mov_b32_e32 v9, 0 ; 7E120280 v_add_f32_e32 v4, 0, v4 ; 06080880 v_rsq_clamp_f32_e32 v4, v4 ; 7E085904 v_mul_f32_e32 v10, v4, v2 ; 10140504 v_interp_p1_f32 v2, v0, 0, 1, [m0] ; C8080400 v_interp_p2_f32 v2, [v2], v1, 0, 1, [m0] ; C8090401 v_interp_p1_f32 v14, v0, 1, 1, [m0] ; C8380500 v_interp_p2_f32 v14, [v14], v1, 1, 1, [m0] ; C8390501 v_mul_f32_e32 v15, v14, v14 ; 101E1D0E v_mad_f32 v15, v2, v2, v15 ; D282000F 043E0502 v_interp_p1_f32 v16, v0, 2, 1, [m0] ; C8400600 v_interp_p2_f32 v16, [v16], v1, 2, 1, [m0] ; C8410601 v_mad_f32 v15, v16, v16, v15 ; D282000F 043E2110 v_add_f32_e32 v15, 0, v15 ; 061E1E80 v_rsq_clamp_f32_e32 v15, v15 ; 7E1E590F v_mul_f32_e32 v17, v15, v2 ; 1022050F v_mul_f32_e32 v17, v10, v17 ; 1022230A v_mul_f32_e32 v11, v4, v3 ; 10160704 v_mul_f32_e32 v3, v15, v14 ; 10061D0F v_mad_f32 v3, v3, v11, v17 ; D2820003 04461703 v_mul_f32_e32 v12, v4, v5 ; 10180B04 v_mul_f32_e32 v4, v15, v16 ; 1008210F v_mad_f32 v3, v4, v12, v3 ; D2820003 040E1904 v_add_f32_e32 v3, v3, v3 ; 06060703 v_mul_f32_e32 v4, v3, v12 ; 10081903 v_mad_f32 v8, v16, v15, -v4 ; D2820008 84121F10 v_mul_f32_e32 v4, v3, v11 ; 10081703 v_mad_f32 v7, v14, v15, -v4 ; D2820007 84121F0E v_mul_f32_e32 v3, v3, v10 ; 10061503 v_mad_f32 v6, v2, v15, -v3 ; D2820006 840E1F02 v_cubeid_f32 v17, v6, v7, v8 ; D2880011 04220F06 v_cubema_f32 v16, v6, v7, v8 ; D28E0010 04220F06 v_cubesc_f32 v15, v6, v7, v8 ; D28A000F 04220F06 v_cubetc_f32 v14, v6, v7, v8 ; D28C000E 04220F06 v_rcp_f32_e64 v2, |v16| ; D3540102 00000110 v_mov_b32_e32 v3, 0x3fc00000 ; 7E0602FF 3FC00000 v_mad_f32 v16, v14, v2, v3 ; D2820010 040E050E v_mad_f32 v15, v15, v2, v3 ; D282000F 040E050F s_load_dwordx4 s[20:23], s[4:5], 0x0 ; C08A0500 s_load_dwordx4 s[44:47], s[4:5], 0x4 ; C0960504 s_load_dwordx4 s[24:27], s[4:5], 0x8 ; C08C0508 s_load_dwordx4 s[48:51], s[4:5], 0xc ; C098050C s_load_dwordx4 s[52:55], s[4:5], 0x10 ; C09A0510 s_load_dwordx4 s[8:11], s[4:5], 0x14 ; C0840514 s_load_dwordx8 s[28:35], s[6:7], 0x0 ; C0CE0700 s_load_dwordx8 s[56:63], s[6:7], 0x8 ; C0DC0708 s_load_dwordx8 s[36:43], s[6:7], 0x10 ; C0D20710 s_load_dwordx8 s[64:71], s[6:7], 0x18 ; C0E00718 s_load_dwordx8 s[72:79], s[6:7], 0x20 ; C0E40720 s_load_dwordx8 s[12:19], s[6:7], 0x28 ; C0C60728 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v[14:16], 7, 0, 0, 0, 0, 0, 0, 0, v[15:18], s[72:79], s[52:55] ; F0800700 01B20E0F v_mov_b32_e32 v17, v6 ; 7E220306 v_mov_b32_e32 v18, v7 ; 7E240307 v_mov_b32_e32 v19, v8 ; 7E260308 v_mov_b32_e32 v20, v9 ; 7E280309 v_mov_b32_e32 v20, v8 ; 7E280308 v_cubeid_f32 v24, v17, v18, v19 ; D2880018 044E2511 v_cubema_f32 v23, v17, v18, v19 ; D28E0017 044E2511 v_cubesc_f32 v22, v17, v18, v19 ; D28A0016 044E2511 v_cubetc_f32 v21, v17, v18, v19 ; D28C0015 044E2511 v_rcp_f32_e64 v2, |v23| ; D3540102 00000117 v_mad_f32 v23, v21, v2, v3 ; D2820017 040E0515 v_mad_f32 v22, v22, v2, v3 ; D2820016 040E0516 image_sample v[17:19], 7, 0, 0, 0, 0, 0, 0, 0, v[22:25], s[64:71], s[48:51] ; F0800700 01901116 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v18, v18 ; 10042512 v_mad_f32 v4, v15, v15, -v2 ; D2820004 840A1F0F v_interp_p1_f32 v21, v0, 3, 1, [m0] ; C8540700 v_interp_p2_f32 v21, [v21], v1, 3, 1, [m0] ; C8550701 v_interp_p1_f32 v20, v0, 3, 0, [m0] ; C8500300 v_interp_p2_f32 v20, [v20], v1, 3, 0, [m0] ; C8510301 image_sample v[22:25], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[56:63], s[44:47] ; F0800F00 016E1614 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v5, v25, v25 ; 100A3319 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_mul_f32_e32 v5, v5, v5 ; 100A0B05 v_add_f32_e64 v5, 0, v5 clamp ; D2060805 00020A80 v_mad_f32 v2, v5, v4, v2 ; D2820002 040A0905 v_mul_f32_e32 v4, v23, v23 ; 10082F17 v_mul_f32_e32 v2, v4, v2 ; 10040504 v_mov_b32_e32 v13, v9 ; 7E1A0309 v_cubeid_f32 v29, v10, v11, v12 ; D288001D 0432170A v_cubema_f32 v28, v10, v11, v12 ; D28E001C 0432170A v_cubesc_f32 v27, v10, v11, v12 ; D28A001B 0432170A v_cubetc_f32 v26, v10, v11, v12 ; D28C001A 0432170A v_rcp_f32_e64 v4, |v28| ; D3540104 0000011C v_mad_f32 v28, v26, v4, v3 ; D282001C 040E091A v_mad_f32 v27, v27, v4, v3 ; D282001B 040E091B image_sample v[6:8], 7, 0, 0, 0, 0, 0, 0, 0, v[27:30], s[36:43], s[24:27] ; F0800700 00C9061B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v7, v7 ; 10060F07 image_sample v[9:12], 15, 0, 0, 0, 0, 0, 0, 0, v[20:21], s[28:35], s[20:23] ; F0800F00 00A70914 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v4, v12, v12 ; 1008190C s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F s_buffer_load_dword s4, s[0:3], 0x11 ; C2020111 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, -s4, s4, 1.0 ; D282000D 23C80804 v_mul_f32_e64 v20, s4, s4 ; D2100014 00000804 v_mad_f32 v13, v4, v13, v20 ; D282000D 04521B04 v_mul_f32_e32 v20, v10, v10 ; 1028150A v_mul_f32_e32 v13, v13, v20 ; 101A290D v_mad_f32 v2, v13, v3, v2 ; D2820002 040A070D v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_rcp_f32_e32 v2, v2 ; 7E045502 s_buffer_load_dword s4, s[0:3], 0x9 ; C2020109 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_interp_p1_f32 v13, v0, 1, 2, [m0] ; C8340900 v_interp_p2_f32 v13, [v13], v1, 1, 2, [m0] ; C8350901 s_buffer_load_dword s4, s[0:3], 0xd ; C202010D s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v13, s4, v13 ; 0A1A1A04 v_interp_p1_f32 v20, v0, 0, 2, [m0] ; C8500800 v_interp_p2_f32 v20, [v20], v1, 0, 2, [m0] ; C8510801 s_buffer_load_dword s4, s[0:3], 0xc ; C202010C s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v20, s4, v20 ; 0A282804 v_mul_f32_e32 v20, v20, v20 ; 10282914 v_mad_f32 v13, v13, v13, v20 ; D282000D 04521B0D v_interp_p1_f32 v20, v0, 2, 2, [m0] ; C8500A00 v_interp_p2_f32 v20, [v20], v1, 2, 2, [m0] ; C8510A01 s_buffer_load_dword s4, s[0:3], 0xe ; C202010E s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v21, s4, v20 ; 0A2A2804 v_mad_f32 v13, v21, v21, v13 ; D282000D 04362B15 s_buffer_load_dword s4, s[0:3], 0x5 ; C2020105 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v13, s4, v13 ; 101A1A04 s_buffer_load_dword s4, s[0:3], 0x4 ; C2020104 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v26, s4, v21 ; 10342A04 v_mul_f32_e32 v26, 0x3fb8aa65, v26 ; 103434FF 3FB8AA65 v_exp_f32_e32 v26, v26 ; 7E344B1A v_sub_f32_e32 v26, 1.0, v26 ; 083434F2 v_mul_f32_e32 v13, v13, v26 ; 101A350D v_rcp_f32_e32 v21, v21 ; 7E2A5515 v_mul_f32_e32 v13, v13, v21 ; 101A2B0D v_mul_f32_e32 v13, 0x3fb8aa65, v13 ; 101A1AFF 3FB8AA65 v_exp_f32_e32 v13, v13 ; 7E1A4B0D v_add_f32_e64 v13, 0, v13 clamp ; D206080D 00021A80 v_sub_f32_e32 v13, 1.0, v13 ; 081A1AF2 v_interp_p1_f32 v21, v0, 1, 3, [m0] ; C8540D00 v_interp_p2_f32 v21, [v21], v1, 1, 3, [m0] ; C8550D01 v_interp_p1_f32 v26, v0, 2, 3, [m0] ; C8680E00 v_interp_p2_f32 v26, [v26], v1, 2, 3, [m0] ; C8690E01 v_rcp_f32_e32 v0, v26 ; 7E00551A v_mul_f32_e32 v0, v21, v0 ; 10000115 v_mad_f32 v0, 0.5, v0, 0.5 ; D2820000 03C200F0 s_buffer_load_dword s4, s[0:3], 0x7 ; C2020107 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v1, s4 ; 7E020204 image_sample v0, 1, 0, 0, 0, 0, 0, 0, 0, v[0:1], s[12:19], s[8:11] ; F0800100 00430000 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v0, v13 ; 10001B00 v_mad_f32 v1, v0, v3, v2 ; D2820001 040A0700 v_mul_f32_e32 v2, v17, v17 ; 10042311 v_mad_f32 v3, v14, v14, -v2 ; D2820003 840A1D0E v_mad_f32 v2, v5, v3, v2 ; D2820002 040A0705 v_mul_f32_e32 v3, v22, v22 ; 10062D16 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v3, v6, v6 ; 10060D06 s_buffer_load_dword s4, s[0:3], 0x10 ; C2020110 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v13, -s4, s4, 1.0 ; D282000D 23C80804 v_mul_f32_e64 v21, s4, s4 ; D2100015 00000804 v_mad_f32 v13, v4, v13, v21 ; D282000D 04561B04 v_mul_f32_e32 v21, v9, v9 ; 102A1309 v_mul_f32_e32 v13, v13, v21 ; 101A2B0D v_mad_f32 v2, v13, v3, v2 ; D2820002 040A070D v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v2, v2 ; 7E045902 v_rcp_f32_e32 v2, v2 ; 7E045502 s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108 s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v2, v0, v3, v2 ; D2820002 040A0700 v_cvt_pkrtz_f16_f32_e32 v1, v2, v1 ; 5E020302 v_mul_f32_e32 v2, v19, v19 ; 10042713 v_mad_f32 v3, v16, v16, -v2 ; D2820003 840A2110 v_mad_f32 v2, v5, v3, v2 ; D2820002 040A0705 v_mul_f32_e32 v3, v24, v24 ; 10063118 v_mul_f32_e32 v2, v3, v2 ; 10040503 v_mul_f32_e32 v3, v8, v8 ; 10061108 s_buffer_load_dword s4, s[0:3], 0x12 ; C2020112 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v5, -s4, s4, 1.0 ; D2820005 23C80804 v_mul_f32_e64 v6, s4, s4 ; D2100006 00000804 v_mad_f32 v4, v4, v5, v6 ; D2820004 041A0B04 v_mul_f32_e32 v5, v11, v11 ; 100A170B v_mul_f32_e32 v4, v4, v5 ; 10080B04 v_mad_f32 v2, v4, v3, v2 ; D2820002 040A0704 v_max_f32_e32 v2, 0x33d6bf95, v2 ; 200404FF 33D6BF95 v_rsq_clamp_f32_e32 v3, v2 ; 7E065902 v_mul_f32_e32 v3, v2, v3 ; 10060702 v_xor_b32_e32 v2, 0x80000000, v2 ; 3A0404FF 80000000 v_cmp_gt_f32_e32 vcc, 0, v2 ; 7C080480 v_cndmask_b32_e64 v2, 0, v3, vcc ; D2000002 01AA0680 s_buffer_load_dword s4, s[0:3], 0xa ; C202010A s_waitcnt lgkmcnt(0) ; BF8C007F v_sub_f32_e32 v3, s4, v2 ; 08060404 v_mad_f32 v0, v0, v3, v2 ; D2820000 040A0700 s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100 s_waitcnt lgkmcnt(0) ; BF8C007F v_subrev_f32_e32 v2, s4, v20 ; 0A042804 v_cmp_ge_f32_e64 s[4:5], v2, 0 ; D00C0004 00010102 v_cndmask_b32_e64 v2, 0, -1, s[4:5] ; D2000002 00118280 v_cmp_ne_i32_e64 s[4:5], v2, 0 ; D10A0004 00010102 v_mov_b32_e32 v2, 0x3ea8f5c3 ; 7E0402FF 3EA8F5C3 v_cndmask_b32_e64 v2, v2, 1.0, s[4:5] ; D2000002 1011E502 s_buffer_load_dword s4, s[0:3], 0x14 ; C2020114 s_buffer_load_dword s5, s[0:3], 0x15 ; C2028115 s_waitcnt lgkmcnt(0) ; BF8C007F v_mov_b32_e32 v3, s4 ; 7E060204 v_sub_f32_e32 v3, s5, v3 ; 08060605 s_buffer_load_dword s0, s[0:3], 0x16 ; C2000116 v_mov_b32_e32 v4, s4 ; 7E080204 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v3, v3, s0, v4 ; D2820003 04100103 v_mul_f32_e32 v2, v2, v3 ; 10040702 v_cvt_pkrtz_f16_f32_e32 v0, v0, v2 ; 5E000500 exp 15, 0, 1, 1, 1, v1, v0, v1, v0 ; F8001C0F 00010001 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL CONST[0..95] DCL TEMP[0..2], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.0000, 0.0000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} 0: FRC TEMP[0].x, IN[1].zzzz 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].zzzz 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: MUL TEMP[2].xyz, IN[0].yyyy, CONST[ADDR[0].x].xyww 7: MOV TEMP[0].xyz, TEMP[2].xyzx 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: MAD TEMP[2].xyz, IN[0].xxxx, CONST[ADDR[0].x].xyww, TEMP[0] 11: MOV TEMP[0].xyz, TEMP[2].xyzx 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: MAD TEMP[0].xyz, IN[0].zzzz, CONST[ADDR[0].x].xyww, TEMP[0] 16: F2I TEMP[1].x, TEMP[1].xxxx 17: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 18: UARL ADDR[0].x, TEMP[1].xxxx 19: ADD TEMP[0].xyw, TEMP[0].xyzz, CONST[ADDR[0].x] 20: MOV TEMP[0].xyw, TEMP[0].xyxw 21: MOV TEMP[0].z, IMM[0].yyyy 22: MOV OUT[0], TEMP[0] 23: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %14 = load <16 x i8> addrspace(2)* %13, !tbaa !0 %15 = add i32 %5, %7 %16 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %14, i32 0, i32 %15) %17 = extractelement <4 x float> %16, i32 0 %18 = extractelement <4 x float> %16, i32 1 %19 = extractelement <4 x float> %16, i32 2 %20 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %21 = load <16 x i8> addrspace(2)* %20, !tbaa !0 %22 = add i32 %5, %7 %23 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %21, i32 0, i32 %22) %24 = extractelement <4 x float> %23, i32 2 %25 = call float @llvm.AMDIL.fraction.(float %24) %26 = fsub float -0.000000e+00, %25 %27 = fadd float %26, %24 %28 = fmul float %27, 4.000000e+00 %29 = fptosi float %28 to i32 %30 = bitcast i32 %29 to float %31 = bitcast float %30 to i32 %32 = add i32 1, %31 %33 = bitcast i32 %32 to float %34 = bitcast float %33 to i32 %35 = shl i32 %34, 4 %36 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %35) %37 = fmul float %18, %36 %38 = shl i32 %34, 4 %39 = add i32 %38, 4 %40 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %39) %41 = fmul float %18, %40 %42 = shl i32 %34, 4 %43 = add i32 %42, 12 %44 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %43) %45 = fmul float %18, %44 %46 = fptosi float %28 to i32 %47 = bitcast i32 %46 to float %48 = bitcast float %47 to i32 %49 = shl i32 %48, 4 %50 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %49) %51 = fmul float %17, %50 %52 = fadd float %51, %37 %53 = shl i32 %48, 4 %54 = add i32 %53, 4 %55 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %54) %56 = fmul float %17, %55 %57 = fadd float %56, %41 %58 = shl i32 %48, 4 %59 = add i32 %58, 12 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %17, %60 %62 = fadd float %61, %45 %63 = fptosi float %28 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = add i32 2, %65 %67 = bitcast i32 %66 to float %68 = bitcast float %67 to i32 %69 = shl i32 %68, 4 %70 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %69) %71 = fmul float %19, %70 %72 = fadd float %71, %52 %73 = shl i32 %68, 4 %74 = add i32 %73, 4 %75 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %74) %76 = fmul float %19, %75 %77 = fadd float %76, %57 %78 = shl i32 %68, 4 %79 = add i32 %78, 12 %80 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %79) %81 = fmul float %19, %80 %82 = fadd float %81, %62 %83 = fptosi float %28 to i32 %84 = bitcast i32 %83 to float %85 = bitcast float %84 to i32 %86 = add i32 3, %85 %87 = bitcast i32 %86 to float %88 = bitcast float %87 to i32 %89 = shl i32 %88, 4 %90 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %89) %91 = fadd float %72, %90 %92 = shl i32 %88, 4 %93 = add i32 %92, 4 %94 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %93) %95 = fadd float %77, %94 %96 = shl i32 %88, 4 %97 = add i32 %96, 12 %98 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %97) %99 = fadd float %82, %98 call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %91, float %95, float 0.000000e+00, float %99) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v3 ; 7E0A4103 v_subrev_f32_e32 v1, v5, v3 ; 0A020705 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v1, s[0:3], 0 offen ; E0301000 80000201 v_add_i32_e32 v3, 16, v1 ; 4A060290 buffer_load_dword v4, v3, s[0:3], 0 offen ; E0301000 80000403 buffer_load_format_xyzw v[5:8], v0, s[4:7], 0 idxen ; E00C2000 80010500 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v4, v6 ; 10000D04 v_mad_f32 v0, v5, v2, v0 ; D2820000 04020505 v_add_i32_e32 v2, 32, v1 ; 4A0402A0 buffer_load_dword v4, v2, s[0:3], 0 offen ; E0301000 80000402 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v7, v4, v0 ; D2820000 04020907 v_add_i32_e32 v4, 48, v1 ; 4A0802B0 buffer_load_dword v9, v4, s[0:3], 0 offen ; E0301000 80000904 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v9, v0 ; 06000109 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v6 ; 10140D0A v_mad_f32 v9, v5, v9, v10 ; D2820009 042A1305 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v7, v10, v9 ; D2820009 04261507 v_or_b32_e32 v10, 12, v4 ; 3814088C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v9, v10, v9 ; 0612130A v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v3, 4, v3 ; 38060684 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v6 ; 10060D03 v_mad_f32 v1, v5, v1, v3 ; D2820001 040E0305 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v7, v2, v1 ; D2820001 04060507 v_or_b32_e32 v2, 4, v4 ; 38040884 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, v2, v1 ; 06020302 v_mov_b32_e32 v2, 0 ; 7E040280 exp 15, 12, 0, 1, 0, v0, v1, v2, v9 ; F80008CF 09020100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL OUT[0], COLOR IMM[0] FLT32 { 0.0000, 0.0000, 0.0000, 0.0000} 0: MOV OUT[0], IMM[0].xxxx 1: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %23 = bitcast i32 %22 to float %24 = call i32 @llvm.SI.packf16(float 0.000000e+00, float 0.000000e+00) %25 = bitcast i32 %24 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %23, float %25, float %23, float %25) ret void } ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } Shader Disassembly: v_cvt_pkrtz_f16_f32_e64 v0, 0, 0 ; D25E0000 00010080 exp 15, 0, 1, 1, 1, v0, v0, v0, v0 ; F8001C0F 00000000 s_endpgm ; BF810000 SHADER KEY instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0} as_es = 0 VERT DCL IN[0] DCL IN[1] DCL OUT[0], POSITION DCL OUT[1], GENERIC[9] DCL OUT[2], GENERIC[10] DCL CONST[0..196] DCL TEMP[0..4], LOCAL DCL ADDR[0] IMM[0] FLT32 { 4.0000, 0.5000, -0.5000, 0.0000} IMM[1] INT32 {1, 2, 3, 0} IMM[2] FLT32 { 0.0000, 1.0000, 0.0000, 0.0000} 0: FRC TEMP[0].x, IN[1].zzzz 1: ADD TEMP[1].x, -TEMP[0].xxxx, IN[1].zzzz 2: MUL TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 3: F2I TEMP[2].x, TEMP[1].xxxx 4: UADD TEMP[2].x, IMM[1].xxxx, TEMP[2].xxxx 5: UARL ADDR[0].x, TEMP[2].xxxx 6: UARL ADDR[0].x, TEMP[2].xxxx 7: MUL TEMP[0], IN[0].yyyy, CONST[ADDR[0].x] 8: F2I TEMP[2].x, TEMP[1].xxxx 9: UARL ADDR[0].x, TEMP[2].xxxx 10: UARL ADDR[0].x, TEMP[2].xxxx 11: MAD TEMP[0], IN[0].xxxx, CONST[ADDR[0].x], TEMP[0] 12: F2I TEMP[2].x, TEMP[1].xxxx 13: UADD TEMP[2].x, IMM[1].yyyy, TEMP[2].xxxx 14: UARL ADDR[0].x, TEMP[2].xxxx 15: UARL ADDR[0].x, TEMP[2].xxxx 16: MAD TEMP[0], IN[0].zzzz, CONST[ADDR[0].x], TEMP[0] 17: F2I TEMP[2].x, TEMP[1].xxxx 18: UADD TEMP[2].x, IMM[1].zzzz, TEMP[2].xxxx 19: UARL ADDR[0].x, TEMP[2].xxxx 20: UARL ADDR[0].x, TEMP[2].xxxx 21: ADD TEMP[2], TEMP[0], CONST[ADDR[0].x] 22: F2I TEMP[3].x, TEMP[1].xxxx 23: UADD TEMP[3].x, IMM[1].xxxx, TEMP[3].xxxx 24: UARL ADDR[0].x, TEMP[3].xxxx 25: MUL TEMP[3].xyz, IN[0].yyyy, CONST[ADDR[0].x+96] 26: MOV TEMP[0].xyz, TEMP[3].xyzx 27: F2I TEMP[3].x, TEMP[1].xxxx 28: UARL ADDR[0].x, TEMP[3].xxxx 29: MAD TEMP[3].xyz, IN[0].xxxx, CONST[ADDR[0].x+96], TEMP[0] 30: MOV TEMP[0].xyz, TEMP[3].xyzx 31: F2I TEMP[3].x, TEMP[1].xxxx 32: UADD TEMP[3].x, IMM[1].yyyy, TEMP[3].xxxx 33: UARL ADDR[0].x, TEMP[3].xxxx 34: MAD TEMP[3].xyz, IN[0].zzzz, CONST[ADDR[0].x+96], TEMP[0] 35: MOV TEMP[0].xyz, TEMP[3].xyzx 36: F2I TEMP[1].x, TEMP[1].xxxx 37: UADD TEMP[1].x, IMM[1].zzzz, TEMP[1].xxxx 38: UARL ADDR[0].x, TEMP[1].xxxx 39: ADD TEMP[1].xyz, TEMP[0], CONST[ADDR[0].x+96] 40: MOV TEMP[0].xyz, TEMP[1].xyzx 41: ADD TEMP[3].xy, TEMP[0], CONST[196].zwzw 42: MOV TEMP[3].xy, TEMP[3].xyxx 43: MUL TEMP[3].xy, TEMP[3], CONST[196] 44: MOV TEMP[3].xy, TEMP[3].xyxx 45: MUL TEMP[4].yw, TEMP[1].yyyy, CONST[193].xxzy 46: MOV TEMP[0].yw, TEMP[4].wyww 47: MAD TEMP[4].xy, TEMP[1].xxxx, CONST[192], TEMP[0].ywzw 48: MOV TEMP[0].xy, TEMP[4].xyxx 49: MAD TEMP[1].xy, TEMP[1].zzzz, CONST[194], TEMP[0] 50: MOV TEMP[0].xy, TEMP[1].xyxx 51: ADD TEMP[1].xy, TEMP[0], CONST[195] 52: MOV TEMP[0].xy, TEMP[1].xyxx 53: MAD TEMP[0].xy, TEMP[0], IMM[0].yzzw, IMM[0].zzzz 54: MOV TEMP[0].xy, TEMP[0].xyxx 55: MOV TEMP[3].zw, IMM[2].yyxy 56: MOV TEMP[0].zw, IMM[2].yyxy 57: MOV OUT[1], TEMP[3] 58: MOV OUT[2], TEMP[0] 59: MOV OUT[0], TEMP[2] 60: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, [16 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, i32, i32, i32, i32) #0 { main_body: %11 = getelementptr [17 x <16 x i8>] addrspace(2)* %1, i32 0, i32 0 %12 = load <16 x i8> addrspace(2)* %11, !tbaa !0 %13 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3072) %14 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3076) %15 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3088) %16 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3092) %17 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3104) %18 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3108) %19 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3120) %20 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3124) %21 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3136) %22 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3140) %23 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3144) %24 = call float @llvm.SI.load.const(<16 x i8> %12, i32 3148) %25 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 0 %26 = load <16 x i8> addrspace(2)* %25, !tbaa !0 %27 = add i32 %5, %7 %28 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %26, i32 0, i32 %27) %29 = extractelement <4 x float> %28, i32 0 %30 = extractelement <4 x float> %28, i32 1 %31 = extractelement <4 x float> %28, i32 2 %32 = getelementptr [16 x <16 x i8>] addrspace(2)* %4, i32 0, i32 1 %33 = load <16 x i8> addrspace(2)* %32, !tbaa !0 %34 = add i32 %5, %7 %35 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %34) %36 = extractelement <4 x float> %35, i32 2 %37 = call float @llvm.AMDIL.fraction.(float %36) %38 = fsub float -0.000000e+00, %37 %39 = fadd float %38, %36 %40 = fmul float %39, 4.000000e+00 %41 = fptosi float %40 to i32 %42 = bitcast i32 %41 to float %43 = bitcast float %42 to i32 %44 = add i32 1, %43 %45 = bitcast i32 %44 to float %46 = bitcast float %45 to i32 %47 = shl i32 %46, 4 %48 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %47) %49 = fmul float %30, %48 %50 = shl i32 %46, 4 %51 = add i32 %50, 4 %52 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %51) %53 = fmul float %30, %52 %54 = shl i32 %46, 4 %55 = add i32 %54, 8 %56 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %55) %57 = fmul float %30, %56 %58 = shl i32 %46, 4 %59 = add i32 %58, 12 %60 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %59) %61 = fmul float %30, %60 %62 = fptosi float %40 to i32 %63 = bitcast i32 %62 to float %64 = bitcast float %63 to i32 %65 = shl i32 %64, 4 %66 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %65) %67 = fmul float %29, %66 %68 = fadd float %67, %49 %69 = shl i32 %64, 4 %70 = add i32 %69, 4 %71 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %70) %72 = fmul float %29, %71 %73 = fadd float %72, %53 %74 = shl i32 %64, 4 %75 = add i32 %74, 8 %76 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %75) %77 = fmul float %29, %76 %78 = fadd float %77, %57 %79 = shl i32 %64, 4 %80 = add i32 %79, 12 %81 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %80) %82 = fmul float %29, %81 %83 = fadd float %82, %61 %84 = fptosi float %40 to i32 %85 = bitcast i32 %84 to float %86 = bitcast float %85 to i32 %87 = add i32 2, %86 %88 = bitcast i32 %87 to float %89 = bitcast float %88 to i32 %90 = shl i32 %89, 4 %91 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %90) %92 = fmul float %31, %91 %93 = fadd float %92, %68 %94 = shl i32 %89, 4 %95 = add i32 %94, 4 %96 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %95) %97 = fmul float %31, %96 %98 = fadd float %97, %73 %99 = shl i32 %89, 4 %100 = add i32 %99, 8 %101 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %100) %102 = fmul float %31, %101 %103 = fadd float %102, %78 %104 = shl i32 %89, 4 %105 = add i32 %104, 12 %106 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %105) %107 = fmul float %31, %106 %108 = fadd float %107, %83 %109 = fptosi float %40 to i32 %110 = bitcast i32 %109 to float %111 = bitcast float %110 to i32 %112 = add i32 3, %111 %113 = bitcast i32 %112 to float %114 = bitcast float %113 to i32 %115 = shl i32 %114, 4 %116 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %115) %117 = fadd float %93, %116 %118 = shl i32 %114, 4 %119 = add i32 %118, 4 %120 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %119) %121 = fadd float %98, %120 %122 = shl i32 %114, 4 %123 = add i32 %122, 8 %124 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %123) %125 = fadd float %103, %124 %126 = shl i32 %114, 4 %127 = add i32 %126, 12 %128 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %127) %129 = fadd float %108, %128 %130 = fptosi float %40 to i32 %131 = bitcast i32 %130 to float %132 = bitcast float %131 to i32 %133 = add i32 1, %132 %134 = bitcast i32 %133 to float %135 = bitcast float %134 to i32 %136 = shl i32 %135, 4 %137 = add i32 %136, 1536 %138 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %137) %139 = fmul float %30, %138 %140 = shl i32 %135, 4 %141 = add i32 %140, 1540 %142 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %141) %143 = fmul float %30, %142 %144 = shl i32 %135, 4 %145 = add i32 %144, 1544 %146 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %145) %147 = fmul float %30, %146 %148 = fptosi float %40 to i32 %149 = bitcast i32 %148 to float %150 = bitcast float %149 to i32 %151 = shl i32 %150, 4 %152 = add i32 %151, 1536 %153 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %152) %154 = fmul float %29, %153 %155 = fadd float %154, %139 %156 = shl i32 %150, 4 %157 = add i32 %156, 1540 %158 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %157) %159 = fmul float %29, %158 %160 = fadd float %159, %143 %161 = shl i32 %150, 4 %162 = add i32 %161, 1544 %163 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %162) %164 = fmul float %29, %163 %165 = fadd float %164, %147 %166 = fptosi float %40 to i32 %167 = bitcast i32 %166 to float %168 = bitcast float %167 to i32 %169 = add i32 2, %168 %170 = bitcast i32 %169 to float %171 = bitcast float %170 to i32 %172 = shl i32 %171, 4 %173 = add i32 %172, 1536 %174 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %173) %175 = fmul float %31, %174 %176 = fadd float %175, %155 %177 = shl i32 %171, 4 %178 = add i32 %177, 1540 %179 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %178) %180 = fmul float %31, %179 %181 = fadd float %180, %160 %182 = shl i32 %171, 4 %183 = add i32 %182, 1544 %184 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %183) %185 = fmul float %31, %184 %186 = fadd float %185, %165 %187 = fptosi float %40 to i32 %188 = bitcast i32 %187 to float %189 = bitcast float %188 to i32 %190 = add i32 3, %189 %191 = bitcast i32 %190 to float %192 = bitcast float %191 to i32 %193 = shl i32 %192, 4 %194 = add i32 %193, 1536 %195 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %194) %196 = fadd float %176, %195 %197 = shl i32 %192, 4 %198 = add i32 %197, 1540 %199 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %198) %200 = fadd float %181, %199 %201 = shl i32 %192, 4 %202 = add i32 %201, 1544 %203 = call float @llvm.SI.load.const(<16 x i8> %12, i32 %202) %204 = fadd float %186, %203 %205 = fadd float %196, %23 %206 = fadd float %200, %24 %207 = fmul float %205, %21 %208 = fmul float %206, %22 %209 = fmul float %200, %15 %210 = fmul float %200, %16 %211 = fmul float %196, %13 %212 = fadd float %211, %209 %213 = fmul float %196, %14 %214 = fadd float %213, %210 %215 = fmul float %204, %17 %216 = fadd float %215, %212 %217 = fmul float %204, %18 %218 = fadd float %217, %214 %219 = fadd float %216, %19 %220 = fadd float %218, %20 %221 = fmul float %219, 5.000000e-01 %222 = fadd float %221, -5.000000e-01 %223 = fmul float %220, -5.000000e-01 %224 = fadd float %223, -5.000000e-01 call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %207, float %208, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 33, i32 0, float %222, float %224, float 0.000000e+00, float 1.000000e+00) call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %117, float %121, float %125, float %129) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.load.const(<16 x i8>, i32) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1 ; Function Attrs: readnone declare float @llvm.AMDIL.fraction.(float) #2 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } attributes #2 = { readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: v_add_i32_e32 v0, s10, v0 ; 4A00000A s_load_dwordx4 s[4:7], s[8:9], 0x0 ; C0820900 s_load_dwordx4 s[8:11], s[8:9], 0x4 ; C0840904 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_format_xyzw v[1:4], v0, s[8:11], 0 idxen ; E00C2000 80020100 s_waitcnt vmcnt(0) ; BF8C0770 v_fract_f32_e32 v5, v3 ; 7E0A4103 v_subrev_f32_e32 v1, v5, v3 ; 0A020705 v_mul_f32_e32 v1, 4.0, v1 ; 100202F6 v_cvt_i32_f32_e32 v1, v1 ; 7E021101 v_lshlrev_b32_e32 v1, 4, v1 ; 34020284 v_add_i32_e32 v2, 0x604, v1 ; 4A0402FF 00000604 s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300 s_waitcnt lgkmcnt(0) ; BF8C007F buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 v_add_i32_e32 v3, 0x614, v1 ; 4A0602FF 00000614 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 buffer_load_format_xyzw v[4:7], v0, s[4:7], 0 idxen ; E00C2000 80010400 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v0, v3, v5 ; 10000B03 v_mad_f32 v0, v4, v2, v0 ; D2820000 04020504 v_add_i32_e32 v2, 0x624, v1 ; 4A0402FF 00000624 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v2, v0 ; D2820000 04020506 v_add_i32_e32 v2, 0x634, v1 ; 4A0402FF 00000634 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v2, v0 ; 06000102 s_movk_i32 s4, 0xc4c ; B0040C4C s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v0 ; 06040004 s_movk_i32 s4, 0xc44 ; B0040C44 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v2, s4, v2 ; 10040404 v_add_i32_e32 v3, 0x600, v1 ; 4A0602FF 00000600 buffer_load_dword v3, v3, s[0:3], 0 offen ; E0301000 80000303 v_add_i32_e32 v8, 0x610, v1 ; 4A1002FF 00000610 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v8, v8, v5 ; 10100B08 v_mad_f32 v3, v4, v3, v8 ; D2820003 04220704 v_add_i32_e32 v8, 0x620, v1 ; 4A1002FF 00000620 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v3, v6, v8, v3 ; D2820003 040E1106 v_add_i32_e32 v8, 0x630, v1 ; 4A1002FF 00000630 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v3, v8, v3 ; 06060708 s_movk_i32 s4, 0xc48 ; B0040C48 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v8, s4, v3 ; 06100604 s_movk_i32 s4, 0xc40 ; B0040C40 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v8, s4, v8 ; 10101004 v_mov_b32_e32 v9, 1.0 ; 7E1202F2 v_mov_b32_e32 v10, 0 ; 7E140280 exp 15, 32, 0, 0, 0, v8, v2, v10, v9 ; F800020F 090A0208 s_movk_i32 s4, 0xc10 ; B0040C10 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt expcnt(0) lgkmcnt(0) ; BF8C000F v_mul_f32_e32 v2, s4, v0 ; 10040004 s_movk_i32 s4, 0xc00 ; B0040C00 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v3, s4, v2 ; D2820002 04080903 v_add_i32_e32 v8, 0x608, v1 ; 4A1002FF 00000608 buffer_load_dword v8, v8, s[0:3], 0 offen ; E0301000 80000808 v_add_i32_e32 v11, 0x618, v1 ; 4A1602FF 00000618 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v5 ; 10160B0B v_mad_f32 v8, v4, v8, v11 ; D2820008 042E1104 v_add_i32_e32 v11, 0x628, v1 ; 4A1602FF 00000628 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v8, v6, v11, v8 ; D2820008 04221706 v_add_i32_e32 v11, 0x638, v1 ; 4A1602FF 00000638 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v8, v11, v8 ; 0610110B s_movk_i32 s4, 0xc20 ; B0040C20 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v2, v8, s4, v2 ; D2820002 04080908 s_movk_i32 s4, 0xc30 ; B0040C30 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v2, s4, v2 ; 06040404 v_mad_f32 v2, 0.5, v2, -0.5 ; D2820002 03C604F0 s_movk_i32 s4, 0xc14 ; B0040C14 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mul_f32_e32 v0, s4, v0 ; 10000004 s_movk_i32 s4, 0xc04 ; B0040C04 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v3, s4, v0 ; D2820000 04000903 s_movk_i32 s4, 0xc24 ; B0040C24 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_mad_f32 v0, v8, s4, v0 ; D2820000 04000908 s_movk_i32 s4, 0xc34 ; B0040C34 s_buffer_load_dword s4, s[0:3], s4 ; C2020004 s_waitcnt lgkmcnt(0) ; BF8C007F v_add_f32_e32 v0, s4, v0 ; 06000004 v_mad_f32 v0, -0.5, v0, -0.5 ; D2820000 03C600F1 exp 15, 33, 0, 0, 0, v2, v0, v10, v9 ; F800021F 090A0002 s_waitcnt expcnt(0) ; BF8C070F buffer_load_dword v0, v1, s[0:3], 0 offen ; E0301000 80000001 v_add_i32_e32 v2, 16, v1 ; 4A040290 buffer_load_dword v3, v2, s[0:3], 0 offen ; E0301000 80000302 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v3, v3, v5 ; 10060B03 v_mad_f32 v0, v4, v0, v3 ; D2820000 040E0104 v_add_i32_e32 v3, 32, v1 ; 4A0602A0 buffer_load_dword v8, v3, s[0:3], 0 offen ; E0301000 80000803 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v0, v6, v8, v0 ; D2820000 04021106 v_add_i32_e32 v8, 48, v1 ; 4A1002B0 buffer_load_dword v9, v8, s[0:3], 0 offen ; E0301000 80000908 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v0, v9, v0 ; 06000109 v_or_b32_e32 v9, 12, v1 ; 3812028C buffer_load_dword v9, v9, s[0:3], 0 offen ; E0301000 80000909 v_or_b32_e32 v10, 12, v2 ; 3814048C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v10, v10, v5 ; 10140B0A v_mad_f32 v9, v4, v9, v10 ; D2820009 042A1304 v_or_b32_e32 v10, 12, v3 ; 3814068C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v9, v6, v10, v9 ; D2820009 04261506 v_or_b32_e32 v10, 12, v8 ; 3814108C buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v9, v10, v9 ; 0612130A v_or_b32_e32 v10, 8, v1 ; 38140288 buffer_load_dword v10, v10, s[0:3], 0 offen ; E0301000 80000A0A v_or_b32_e32 v11, 8, v2 ; 38160488 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v11, v11, v5 ; 10160B0B v_mad_f32 v10, v4, v10, v11 ; D282000A 042E1504 v_or_b32_e32 v11, 8, v3 ; 38160688 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v10, v6, v11, v10 ; D282000A 042A1706 v_or_b32_e32 v11, 8, v8 ; 38161088 buffer_load_dword v11, v11, s[0:3], 0 offen ; E0301000 80000B0B s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v10, v11, v10 ; 0614150B v_or_b32_e32 v1, 4, v1 ; 38020284 buffer_load_dword v1, v1, s[0:3], 0 offen ; E0301000 80000101 v_or_b32_e32 v2, 4, v2 ; 38040484 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mul_f32_e32 v2, v2, v5 ; 10040B02 v_mad_f32 v1, v4, v1, v2 ; D2820001 040A0304 v_or_b32_e32 v2, 4, v3 ; 38040684 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_mad_f32 v1, v6, v2, v1 ; D2820001 04060506 v_or_b32_e32 v2, 4, v8 ; 38041084 buffer_load_dword v2, v2, s[0:3], 0 offen ; E0301000 80000202 s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v1, v2, v1 ; 06020302 exp 15, 12, 0, 1, 0, v0, v1, v10, v9 ; F80008CF 090A0100 s_endpgm ; BF810000 SHADER KEY export_16bpc = 0x3 last_cbuf = 0 color_two_side = 0 alpha_func = 7 alpha_to_one = 0 poly_stipple = 0 FRAG DCL IN[0], GENERIC[9], PERSPECTIVE DCL IN[1], GENERIC[10], PERSPECTIVE DCL OUT[0], COLOR DCL SAMP[0] DCL SAMP[1] DCL TEMP[0..3], LOCAL IMM[0] FLT32 { 1.0000, -1.0000, 0.0000, -0.0100} 0: MAD TEMP[0].xy, IN[1], IMM[0].xyxx, IMM[0].zxzz 1: MOV TEMP[0].xy, TEMP[0].xyyy 2: TEX TEMP[0].y, TEMP[0], SAMP[1], 2D 3: ADD TEMP[1].x, TEMP[0].yyyy, IMM[0].wwww 4: FSGE TEMP[2].x, TEMP[1].xxxx, IMM[0].zzzz 5: UIF TEMP[2].xxxx :0 6: MOV TEMP[2].x, IMM[0].zzzz 7: ELSE :0 8: MOV TEMP[2].x, IMM[0].yyyy 9: ENDIF 10: MOV TEMP[2].x, TEMP[2].xxxx 11: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz 12: UIF TEMP[3].xxxx :0 13: MOV TEMP[3].x, IMM[0].zzzz 14: ELSE :0 15: MOV TEMP[3].x, IMM[0].yyyy 16: ENDIF 17: MOV TEMP[2].y, TEMP[3].xxxx 18: FSGE TEMP[3].x, TEMP[1].xxxx, IMM[0].zzzz 19: UIF TEMP[3].xxxx :0 20: MOV TEMP[3].x, IMM[0].zzzz 21: ELSE :0 22: MOV TEMP[3].x, IMM[0].yyyy 23: ENDIF 24: MOV TEMP[2].z, TEMP[3].xxxx 25: FSGE TEMP[1].x, TEMP[1].xxxx, IMM[0].zzzz 26: UIF TEMP[1].xxxx :0 27: ELSE :0 28: ENDIF 29: FSLT TEMP[1].xyz, TEMP[2].xyzz, IMM[0].zzzz 30: OR TEMP[2].x, TEMP[1].xxxx, TEMP[1].zzzz 31: OR TEMP[1].x, TEMP[2].xxxx, TEMP[1].yyyy 32: AND TEMP[1].x, TEMP[1].xxxx, IMM[0].xxxx 33: KILL_IF -TEMP[1].xxxx 34: MOV TEMP[1].xy, IN[0].xyyy 35: TEX TEMP[1], TEMP[1], SAMP[0], 2D 36: MUL TEMP[0].w, TEMP[0].yyyy, TEMP[1].wwww 37: MOV TEMP[0].w, TEMP[0].wwww 38: MOV TEMP[0].xyz, TEMP[1].xyzx 39: MOV OUT[0], TEMP[0] 40: END ; ModuleID = 'tgsi' define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 { main_body: %22 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 0 %23 = load <8 x i32> addrspace(2)* %22, !tbaa !0 %24 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 0 %25 = load <4 x i32> addrspace(2)* %24, !tbaa !0 %26 = getelementptr [34 x <8 x i32>] addrspace(2)* %3, i32 0, i32 1 %27 = load <8 x i32> addrspace(2)* %26, !tbaa !0 %28 = getelementptr [17 x <4 x i32>] addrspace(2)* %2, i32 0, i32 1 %29 = load <4 x i32> addrspace(2)* %28, !tbaa !0 %30 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %7) %31 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %7) %32 = call float @llvm.SI.fs.interp(i32 0, i32 1, i32 %5, <2 x i32> %7) %33 = call float @llvm.SI.fs.interp(i32 1, i32 1, i32 %5, <2 x i32> %7) %34 = fmul float %32, 1.000000e+00 %35 = fadd float %34, 0.000000e+00 %36 = fmul float %33, -1.000000e+00 %37 = fadd float %36, 1.000000e+00 %38 = bitcast float %35 to i32 %39 = bitcast float %37 to i32 %40 = insertelement <2 x i32> undef, i32 %38, i32 0 %41 = insertelement <2 x i32> %40, i32 %39, i32 1 %42 = bitcast <8 x i32> %27 to <32 x i8> %43 = bitcast <4 x i32> %29 to <16 x i8> %44 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %41, <32 x i8> %42, <16 x i8> %43, i32 2) %45 = extractelement <4 x float> %44, i32 1 %46 = fadd float %45, 0xBF847AE140000000 %47 = fcmp oge float %46, 0.000000e+00 %48 = sext i1 %47 to i32 %49 = bitcast i32 %48 to float %50 = bitcast float %49 to i32 %51 = icmp ne i32 %50, 0 %. = select i1 %51, float 0.000000e+00, float -1.000000e+00 %52 = fcmp oge float %46, 0.000000e+00 %53 = sext i1 %52 to i32 %54 = bitcast i32 %53 to float %55 = bitcast float %54 to i32 %56 = icmp ne i32 %55, 0 %temp12.0 = select i1 %56, float 0.000000e+00, float -1.000000e+00 %57 = fcmp oge float %46, 0.000000e+00 %58 = sext i1 %57 to i32 %59 = bitcast i32 %58 to float %60 = bitcast float %59 to i32 %61 = icmp ne i32 %60, 0 %.25 = select i1 %61, float 0.000000e+00, float -1.000000e+00 %62 = fcmp oge float %46, 0.000000e+00 %63 = sext i1 %62 to i32 %64 = bitcast i32 %63 to float %65 = bitcast float %64 to i32 %66 = icmp ne i32 %65, 0 %67 = fcmp olt float %., 0.000000e+00 %68 = sext i1 %67 to i32 %69 = fcmp olt float %temp12.0, 0.000000e+00 %70 = sext i1 %69 to i32 %71 = fcmp olt float %.25, 0.000000e+00 %72 = sext i1 %71 to i32 %73 = bitcast i32 %68 to float %74 = bitcast i32 %70 to float %75 = bitcast i32 %72 to float %76 = bitcast float %73 to i32 %77 = bitcast float %75 to i32 %78 = or i32 %76, %77 %79 = bitcast i32 %78 to float %80 = bitcast float %79 to i32 %81 = bitcast float %74 to i32 %82 = or i32 %80, %81 %83 = bitcast i32 %82 to float %84 = bitcast float %83 to i32 %85 = and i32 %84, 1065353216 %86 = bitcast i32 %85 to float %87 = fsub float -0.000000e+00, %86 %88 = fsub float -0.000000e+00, %86 %89 = fsub float -0.000000e+00, %86 %90 = fsub float -0.000000e+00, %86 call void @llvm.AMDGPU.kill(float %87) call void @llvm.AMDGPU.kill(float %88) call void @llvm.AMDGPU.kill(float %89) call void @llvm.AMDGPU.kill(float %90) %91 = bitcast float %30 to i32 %92 = bitcast float %31 to i32 %93 = insertelement <2 x i32> undef, i32 %91, i32 0 %94 = insertelement <2 x i32> %93, i32 %92, i32 1 %95 = bitcast <8 x i32> %23 to <32 x i8> %96 = bitcast <4 x i32> %25 to <16 x i8> %97 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %94, <32 x i8> %95, <16 x i8> %96, i32 2) %98 = extractelement <4 x float> %97, i32 0 %99 = extractelement <4 x float> %97, i32 1 %100 = extractelement <4 x float> %97, i32 2 %101 = extractelement <4 x float> %97, i32 3 %102 = fmul float %45, %101 %103 = call i32 @llvm.SI.packf16(float %98, float %99) %104 = bitcast i32 %103 to float %105 = call i32 @llvm.SI.packf16(float %100, float %102) %106 = bitcast i32 %105 to float call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %104, float %106, float %104, float %106) ret void } ; Function Attrs: nounwind readnone declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1 ; Function Attrs: nounwind readnone declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1 declare void @llvm.AMDGPU.kill(float) ; Function Attrs: nounwind readnone declare i32 @llvm.SI.packf16(float, float) #1 declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" } attributes #1 = { nounwind readnone } !0 = !{!"const", null, i32 1} Shader Disassembly: s_wqm_b64 exec, exec ; BEFE0A7E s_mov_b32 m0, s9 ; BEFC0309 v_interp_p1_f32 v2, v0, 1, 1, [m0] ; C8080500 v_interp_p2_f32 v2, [v2], v1, 1, 1, [m0] ; C8090501 v_sub_f32_e32 v3, 1.0, v2 ; 080604F2 v_interp_p1_f32 v4, v0, 0, 1, [m0] ; C8100400 v_interp_p2_f32 v4, [v4], v1, 0, 1, [m0] ; C8110401 v_add_f32_e32 v2, 0, v4 ; 06040880 s_load_dwordx4 s[0:3], s[4:5], 0x0 ; C0800500 s_load_dwordx4 s[8:11], s[4:5], 0x4 ; C0840504 s_load_dwordx8 s[12:19], s[6:7], 0x0 ; C0C60700 s_load_dwordx8 s[20:27], s[6:7], 0x8 ; C0CA0708 s_waitcnt lgkmcnt(0) ; BF8C007F image_sample v2, 2, 0, 0, 0, 0, 0, 0, 0, v[2:3], s[20:27], s[8:11] ; F0800200 00450202 v_mov_b32_e32 v3, 0xbc23d70a ; 7E0602FF BC23D70A s_waitcnt vmcnt(0) ; BF8C0770 v_add_f32_e32 v3, v2, v3 ; 06060702 v_cmp_ge_f32_e64 s[4:5], v3, 0 ; D00C0004 00010103 v_cndmask_b32_e64 v3, 0, -1, s[4:5] ; D2000803 00118280 v_cmp_ne_i32_e64 s[4:5], v3, 0 ; D10A0004 00010103 v_cndmask_b32_e64 v3, -1.0, 0, s[4:5] ; D2000803 181100F3 v_cmp_lt_f32_e64 s[4:5], v3, 0 ; D0020004 00010103 v_cndmask_b32_e64 v3, 0, -1, s[4:5] ; D2000803 00118280 v_and_b32_e32 v3, 1.0, v3 ; 360606F2 v_xor_b32_e32 v3, 0x80000000, v3 ; 3A0606FF 80000000 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_cmpx_le_f32_e32 vcc, 0, v3 ; 7C260680 v_interp_p1_f32 v4, v0, 1, 0, [m0] ; C8100100 v_interp_p2_f32 v4, [v4], v1, 1, 0, [m0] ; C8110101 v_interp_p1_f32 v3, v0, 0, 0, [m0] ; C80C0000 v_interp_p2_f32 v3, [v3], v1, 0, 0, [m0] ; C80D0001 image_sample v[3:6], 15, 0, 0, 0, 0, 0, 0, 0, v[3:4], s[12:19], s[0:3] ; F0800F00 00030303 s_waitcnt vmcnt(0) ; BF8C0770 v_cvt_pkrtz_f16_f32_e32 v0, v3, v4 ; 5E000903 v_mul_f32_e32 v1, v6, v2 ; 10020506 v_cvt_pkrtz_f16_f32_e32 v1, v5, v1 ; 5E020305 exp 15, 0, 1, 1, 1, v0, v1, v0, v1 ; F8001C0F 01000100 s_endpgm ; BF810000 radeon: Failed to allocate virtual address for buffer: radeon: size : 1048576 bytes radeon: alignment : 4096 bytes radeon: domains : 2 radeon: va : 0x0000000038733000 radeon: Failed to allocate virtual address for buffer: radeon: size : 1048576 bytes radeon: alignment : 4096 bytes radeon: domains : 2 radeon: va : 0x0000000038733000